def iterate_handle_threaded(handle, ref, database='data/gsaid.db'): """ Function to iterate through handle passed by Chunky bot :param cursor: sqlite database handler :param handle: list containing tuples (header, raw seq) :param ref: file containing reference sequence, default-> NC_04552.fa """ _, refseq = gotoh2.convert_fasta(open(ref))[0] in_queue = queue.Queue() out_queue = queue.Queue() # stream records into queue for h, s in handle: in_queue.put((h, s, database, refseq)) # pairwise align sequences in queue for seq in range(0, in_queue.qsize()): AlignerThread(in_queue, out_queue).start() # block until queue is processed in_queue.join() # update database with aligned sequences cursor, conn = open_connection(database) while not out_queue.empty(): header, seq, alignedseq = out_queue.get() insert_seq(cursor, seq, header, alignedseq) conn.commit() out_queue.task_done() conn.close() out_queue.join()
def iterate_fasta(fasta, ref, database = 'data/gsaid.db'): """ Function to iterate through fasta file :params: :cursor: sqlite database handler :fasta: file containing sequences :ref: file containing reference sequence, default-> NC_04552.fa """ handle = gotoh2.iter_fasta(open(fasta)) _, refseq = gotoh2.convert_fasta(open(ref))[0] in_queue= queue.Queue() out_queue= queue.Queue() for h, s in handle: in_queue.put((h, s, database, refseq)) for seq in range(0,in_queue.qsize()): alignerThread(in_queue, out_queue).start() print(threading.active_count()) in_queue.join() cursor, conn = open_connection(database) while not out_queue.empty(): header, seq, alignedseq = out_queue.get() insert_seq(cursor, seq, header, alignedseq) conn.commit() out_queue.task_done() conn.close() out_queue.join()
def iterate_fasta(fasta, ref, database='data/gsaid.db'): """ Function to iterate through fasta file *non threaded* :param cursor: sqlite database handler :param fasta: file containing sequences :param ref: file containing reference sequence, default-> NC_04552.fa """ handle = gotoh2.iter_fasta(open(fasta)) _, refseq = gotoh2.convert_fasta(open(ref))[0] cursor, conn = open_connection(database) # stream records into queue for header, seq in handle: alignedseq = find_seq(conn, seq, refseq) insert_seq(cursor, seq, header, alignedseq) conn.commit() conn.close()
def iterate_handle(handle, ref, database='data/gsaid.db'): """ Function to iterate through list [(header,seq)....] passed by ChunkyBot ##NON THREADED## :param cursor: sqlite database handler :param handle: list containing tuples (header, raw seq) :param ref: file containing reference sequence, default-> NC_04552.fa """ _, refseq = gotoh2.convert_fasta(open(ref))[0] cursor, conn = open_connection(database) #align and insert into database for header, seq in handle: alignedseq = find_seq(conn, seq, refseq) #if alignedseq returns 0; raw seq length <5,000 if alignedseq == 0: print('Sequence {} with a length of {} cannot be aligned.'.format( header, len(seq))) else: print('Aligning {}.'.format(header)) insert_seq(cursor, seq, header, alignedseq) conn.commit()
maxScore = 0 maxIndex = -1 # get the max probability score and its assosciated index for i in range(len(predictions[index])): if predictions[index][i] > maxScore: maxScore = predictions[index][i] maxIndex = i score = maxScore prediction = loaded_model.classes_[maxIndex] accession = idList[index].split('|')[1] payload = [accession, prediction, score, pangoLEARN.__version__, 'passed_qc', ''] insert_lineage(cursor, payload) conn.commit() if __name__ == '__main__': args = parse_args() sequence_handle = gotoh2.convert_fasta(open(args.sequences_file, 'r')) filtered_handle = filter_seqs(sequence_handle, args.filterout, max_prop_n=0.05, minlen=29000) if args.header-file == None and args.model-file == None: classify_and_insert(args.pangolindir+ 'decisionTreeHeaders_v1.joblib', args.pangolindir+'decisionTree_v1.joblib', filtered_handle, args.indicies, args.db) else: classify_and_insert(args.header_file, args.model_file, filtered_handle, args.indicies, args.db)