def load_all_sequences(filename, file_format): start_time = time.time() for seqRecord in SeqIO.parse(filename, file_format): seq = seqRecord.seq.upper() location = [] region = '' for feature in seqRecord.features: if feature.type == 'source': start = feature.location.nofuzzy_start end = feature.location.nofuzzy_end country = feature.qualifiers.get('country', ['undefined: UND']) location = [start, end] region = country[0] # count nucleoid stats if needed # reset stats first # count_nucleoid_stats(seq) document = SequenceDocument( version=str(seqRecord.id), length=len(seq), fasta=seqRecord.format('fasta'), sequence=str(seq), location=location, region=region ) document.save() load_time = ExecutionTime(name="Sequence Load Time", seconds=time.time() - start_time) load_time.save()
def parce_base_sequence(filePath, file_format): for record in SeqIO.parse(filePath, file_format): seqId = str(record.id) seq = str(record.seq.upper()) document = SequenceDocument( version=seqId, length=len(seq), fasta=record.format('fasta'), sequence=seq, name=record.name ) document.save() return document.id
def query_not_base_sequences(): length = 16569 cursor = SequenceDocument.objects(length__ne=length) records = [] for r in cursor: records.append(r) del cursor return records
def query_normal_length(): length = 377 cursor = SequenceDocument.objects(length=length) records = [] for r in cursor: records.append(r) del cursor return records
def save_stats_db(sequence): rcrs_base_sequence = query_base_sequence('NC_012920') rsrs_base_sequence = query_base_sequence('RSRS') doc = SequenceDocument.objects(version=sequence.version) rcrs_distance = count_distance(sequence.sequence, rcrs_base_sequence.sequence[16023:16400]) doc.update(distance_to_rCRS=rcrs_distance) rsrs_distance = count_distance(sequence.sequence, rsrs_base_sequence.sequence[16023:16400]) doc.update(distance_to_RSRS=rsrs_distance)
def query_base_sequence(name): return SequenceDocument.objects(name=name)[0]
def reset_sequence_documents_regions(): for seq_document in SequenceDocument.objects(length=377): seq_document.delete()
def get_sequences_by_region(region): return SequenceDocument.objects(region__contains=region).only('sequence')