예제 #1
0
def load_all_sequences(filename, file_format):
    start_time = time.time()
    for seqRecord in SeqIO.parse(filename, file_format):

        seq = seqRecord.seq.upper()
        location = []
        region = ''
        for feature in seqRecord.features:
            if feature.type == 'source':
                start = feature.location.nofuzzy_start
                end = feature.location.nofuzzy_end
                country = feature.qualifiers.get('country', ['undefined: UND'])
                location = [start, end]
                region = country[0]
        # count nucleoid stats if needed
        # reset stats first
        # count_nucleoid_stats(seq)
        document = SequenceDocument(
            version=str(seqRecord.id),
            length=len(seq),
            fasta=seqRecord.format('fasta'),
            sequence=str(seq),
            location=location,
            region=region
        )
        document.save()

    load_time = ExecutionTime(name="Sequence Load Time", seconds=time.time() - start_time)
    load_time.save()
예제 #2
0
def parce_base_sequence(filePath, file_format):
    for record in SeqIO.parse(filePath, file_format):
        seqId = str(record.id)
        seq = str(record.seq.upper())
        document = SequenceDocument(
            version=seqId,
            length=len(seq),
            fasta=record.format('fasta'),
            sequence=seq,
            name=record.name
        )
        document.save()
        return document.id