def _record_with_genes_only(reference): gene_names = Transcript.query.with_entities(Transcript.gene). \ filter_by(reference_id=reference.id).all() record = _bare_record(reference) genes = [] for gene_name in gene_names: gene = Gene(gene_name.gene) version = gene.newLocusTag() my_transcript = Locus(version) my_transcript.mRNA = PList() my_transcript.CDS = PList() gene.transcriptList.append(my_transcript) genes.append(gene) record.geneList = genes record.seq = Seq('', generic_dna) return record
def _get_mutalyzer_record(reference, db_transcripts): """ Creates a Mutalyzer specific record from the transcript entries retrieved from the gbparser database. :param reference: A gbparser database reference entry. :param db_transcripts:A gbparser database list of transcript. :return: The Mutalyzer record. """ record = _bare_record(reference) # Extracting the transcripts from the DB entries. transcripts = [] for db_transcript in db_transcripts: transcript = { 'gene': db_transcript.gene, 'strand': db_transcript.strand, 'transcript_start': db_transcript.transcript_start, 'transcript_stop': db_transcript.transcript_stop, 'transcript_product': db_transcript.transcript_product, 'exons': [], 'exons_start': db_transcript.exons_start, 'exons_stop': db_transcript.exons_stop, 'transcriptID': db_transcript.transcript_accession + '.' + db_transcript.transcript_version, } if db_transcript.protein_accession is not None \ and db_transcript.protein_version is not None: transcript['cds_start'] = db_transcript.cds_start transcript['cds_stop'] = db_transcript.cds_stop transcript['protein_product'] = db_transcript.protein_product transcript['proteinID'] = '%s.%s' %\ (db_transcript.protein_accession, db_transcript.protein_version) transcript['linkMethod'] = 'ncbi' starts = map(int, db_transcript.exons_start.split(',')) \ if db_transcript.exons_start else None stops = map(int, db_transcript.exons_stop.split(',')) \ if db_transcript.exons_stop else None if (starts and stops) and (len(starts) == len(stops)): for start, stop in zip(starts, stops): exon = {'start': start, 'stop': stop} transcript['exons'].append(exon) transcripts.append(transcript) # Generating the actual record entries in the Mutalyzer format. gene_dict = {} for db_transcript in transcripts: if db_transcript['gene'] in gene_dict: gene = gene_dict[db_transcript['gene']] else: gene = Gene(db_transcript['gene']) if db_transcript['strand'] == '+': gene.orientation = 1 if db_transcript['strand'] == '-': gene.orientation = -1 transcript = Locus(gene.newLocusTag()) transcript.mRNA = PList() transcript.mRNA.location = [ db_transcript['transcript_start'], db_transcript['transcript_stop'] ] transcript.transcriptID = db_transcript['transcriptID'] transcript.exon = PList() if db_transcript.get('exons') \ and isinstance(db_transcript.get('exons'), list): exon_list = [] for exon in db_transcript['exons']: exon_list.extend([exon['start'], exon['stop']]) transcript.exon.positionList = exon_list else: transcript.exon.positionList = transcript.mRNA.location transcript.mRNA.positionList = transcript.exon.positionList transcript.mRNA.positionList.sort() if db_transcript.get('proteinID'): transcript.CDS = PList() transcript.CDS.location = [ db_transcript['cds_start'], db_transcript['cds_stop'] ] transcript.CDS.positionList = cds_position_list( transcript.mRNA.positionList, transcript.CDS.location) transcript.proteinID = db_transcript['proteinID'] transcript.transcriptProduct = db_transcript['transcript_product'] transcript.proteinProduct = db_transcript['protein_product'] transcript.linkMethod = 'ncbi' transcript.transcribe = True transcript.translate = True else: transcript.linkMethod = None transcript.transcribe = True transcript.translate = False transcript.locusTag = '' # transcript.molType = db_transcript['molType'] gene.transcriptList.append(transcript) gene_dict[gene.name] = gene record.geneList = list(gene_dict.values()) # Get the sequence. seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence' try: seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1), generic_dna) except IOError: return None else: record.seq = seq return record
def _get_mutalyzer_record(reference, db_transcripts): """ Creates a Mutalyzer specific record from the transcript entries retrieved from the gbparser database. :param reference: A gbparser database reference entry. :param db_transcripts:A gbparser database list of transcript. :return: The Mutalyzer record. """ record = _bare_record(reference) # Extracting the transcripts from the DB entries. transcripts = [] for db_transcript in db_transcripts: transcript = { 'gene': db_transcript.gene, 'strand': db_transcript.strand, 'transcript_start': db_transcript.transcript_start, 'transcript_stop': db_transcript.transcript_stop, 'transcript_product': db_transcript.transcript_product, 'exons': [], 'exons_start': db_transcript.exons_start, 'exons_stop': db_transcript.exons_stop, 'transcriptID': db_transcript.transcript_accession + '.' + db_transcript.transcript_version, } if db_transcript.protein_accession is not None \ and db_transcript.protein_version is not None: transcript['cds_start'] = db_transcript.cds_start transcript['cds_stop'] = db_transcript.cds_stop transcript['protein_product'] = db_transcript.protein_product transcript['proteinID'] = '%s.%s' %\ (db_transcript.protein_accession, db_transcript.protein_version) transcript['linkMethod'] = 'ncbi' starts = map(int, db_transcript.exons_start.split(',')) \ if db_transcript.exons_start else None stops = map(int, db_transcript.exons_stop.split(',')) \ if db_transcript.exons_stop else None if (starts and stops) and (len(starts) == len(stops)): for start, stop in zip(starts, stops): exon = {'start': start, 'stop': stop} transcript['exons'].append(exon) transcripts.append(transcript) # Generating the actual record entries in the Mutalyzer format. gene_dict = {} for db_transcript in transcripts: if db_transcript['gene'] in gene_dict: gene = gene_dict[db_transcript['gene']] else: gene = Gene(db_transcript['gene']) if db_transcript['strand'] == '+': gene.orientation = 1 if db_transcript['strand'] == '-': gene.orientation = -1 transcript = Locus(gene.newLocusTag()) transcript.mRNA = PList() transcript.mRNA.location = [db_transcript['transcript_start'], db_transcript['transcript_stop']] transcript.transcriptID = db_transcript['transcriptID'] transcript.exon = PList() if db_transcript.get('exons') \ and isinstance(db_transcript.get('exons'), list): exon_list = [] for exon in db_transcript['exons']: exon_list.extend([exon['start'], exon['stop']]) transcript.exon.positionList = exon_list else: transcript.exon.positionList = transcript.mRNA.location transcript.mRNA.positionList = transcript.exon.positionList transcript.mRNA.positionList.sort() if db_transcript.get('proteinID'): transcript.CDS = PList() transcript.CDS.location = [db_transcript['cds_start'], db_transcript['cds_stop']] transcript.CDS.positionList = cds_position_list( transcript.mRNA.positionList, transcript.CDS.location) transcript.proteinID = db_transcript['proteinID'] transcript.transcriptProduct = db_transcript['transcript_product'] transcript.proteinProduct = db_transcript['protein_product'] transcript.linkMethod = 'ncbi' transcript.transcribe = True transcript.translate = True else: transcript.linkMethod = None transcript.transcribe = True transcript.translate = False transcript.locusTag = '' # transcript.molType = db_transcript['molType'] gene.transcriptList.append(transcript) gene_dict[gene.name] = gene record.geneList = list(gene_dict.values()) # Get the sequence. seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence' try: seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1), generic_dna) except IOError: return None else: record.seq = seq return record
def _get_mutalyzer_record(reference, db_transcripts): """ Creates a Mutalyzer specific record from the transcript entries retrieved from the gbparser database. :param reference: A gbparser database reference entry. :param db_transcripts:A gbparser database list of transcript. :return: The Mutalyzer record. """ record = _bare_record(reference) # Extracting the transcripts from the DB entries. transcripts = [] for transcript in db_transcripts: my_transcript = { 'gene': transcript.gene, 'strand': transcript.strand, 'transcript_start': transcript.transcript_start, 'transcript_stop': transcript.transcript_stop, 'cds_start': transcript.cds_start, 'cds_stop': transcript.cds_stop, 'exons': [], 'exons_start': transcript.exons_start, 'exons_stop': transcript.exons_stop, 'transcriptID': transcript.transcript_accession + '.' + transcript.transcript_version, 'proteinID': transcript.protein_accession + '.' + transcript.protein_version, 'linkMethod': 'ncbi' } # if transcript.exons_start: # starts = transcript.exons_start.split(',') # if transcripts.exons_stop: # stops = transcript.exons_stopts.split(',') starts = map(int, transcript.exons_start.split( ',')) if transcript.exons_start else None stops = map(int, transcript.exons_stop.split( ',')) if transcript.exons_stop else None if (starts and stops) and (len(starts) == len(stops)): for start, stop in zip(starts, stops): exon = {'start': start, 'stop': stop} my_transcript['exons'].append(exon) # if transcript.exons and isinstance(transcript.exons, list): # for exon in transcript.exons: # exon = {'start': exon.start, # 'stop': exon.stop} # my_transcript['exons'].append(exon) transcripts.append(my_transcript) # Generating the actual record entries in the Mutalyzer format. gene_dict = {} for transcript in transcripts: if transcript['gene'] in gene_dict: gene = gene_dict[transcript['gene']] else: gene = Gene(transcript['gene']) if transcript['strand'] == '+': gene.orientation = 1 if transcript['strand'] == '-': gene.orientation = -1 my_transcript = Locus(gene.newLocusTag()) my_transcript.mRNA = PList() my_transcript.mRNA.location = [ transcript['transcript_start'], transcript['transcript_stop'] ] my_transcript.CDS = PList() my_transcript.CDS.location = [ transcript['cds_start'], transcript['cds_stop'] ] my_transcript.exon = PList() if transcript.get('exons') and isinstance(transcript.get('exons'), list): exon_list = [] for exon in transcript['exons']: exon_list.extend([exon['start'], exon['stop']]) my_transcript.exon.positionList = exon_list else: my_transcript.exon.positionList = my_transcript.mRNA.location my_transcript.mRNA.positionList = my_transcript.exon.positionList my_transcript.mRNA.positionList.sort() my_transcript.CDS.positionList = cds_position_list( my_transcript.mRNA.positionList, my_transcript.CDS.location) my_transcript.transcriptID = transcript['transcriptID'] my_transcript.proteinID = transcript['proteinID'] my_transcript.linkMethod = 'ncbi' my_transcript.transcribe = True my_transcript.translate = True gene.transcriptList.append(my_transcript) gene_dict[gene.name] = gene record.geneList = list(gene_dict.values()) # Get the sequence. seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence' try: seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1), generic_dna) except IOError: return None else: record.seq = seq return record