Example #1
0
def _record_with_genes_only(reference):
    gene_names = Transcript.query.with_entities(Transcript.gene). \
        filter_by(reference_id=reference.id).all()
    record = _bare_record(reference)
    genes = []
    for gene_name in gene_names:
        gene = Gene(gene_name.gene)
        version = gene.newLocusTag()
        my_transcript = Locus(version)

        my_transcript.mRNA = PList()
        my_transcript.CDS = PList()

        gene.transcriptList.append(my_transcript)
        genes.append(gene)
    record.geneList = genes
    record.seq = Seq('', generic_dna)
    return record
Example #2
0
def _record_with_genes_only(reference):
    gene_names = Transcript.query.with_entities(Transcript.gene). \
        filter_by(reference_id=reference.id).all()
    record = _bare_record(reference)
    genes = []
    for gene_name in gene_names:
        gene = Gene(gene_name.gene)
        version = gene.newLocusTag()
        my_transcript = Locus(version)

        my_transcript.mRNA = PList()
        my_transcript.CDS = PList()

        gene.transcriptList.append(my_transcript)
        genes.append(gene)
    record.geneList = genes
    record.seq = Seq('', generic_dna)
    return record
Example #3
0
def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for db_transcript in db_transcripts:
        transcript = {
            'gene':
            db_transcript.gene,
            'strand':
            db_transcript.strand,
            'transcript_start':
            db_transcript.transcript_start,
            'transcript_stop':
            db_transcript.transcript_stop,
            'transcript_product':
            db_transcript.transcript_product,
            'exons': [],
            'exons_start':
            db_transcript.exons_start,
            'exons_stop':
            db_transcript.exons_stop,
            'transcriptID':
            db_transcript.transcript_accession + '.' +
            db_transcript.transcript_version,
        }
        if db_transcript.protein_accession is not None \
                and db_transcript.protein_version is not None:
            transcript['cds_start'] = db_transcript.cds_start
            transcript['cds_stop'] = db_transcript.cds_stop
            transcript['protein_product'] = db_transcript.protein_product
            transcript['proteinID'] = '%s.%s' %\
                                            (db_transcript.protein_accession,
                                             db_transcript.protein_version)
            transcript['linkMethod'] = 'ncbi'
        starts = map(int, db_transcript.exons_start.split(',')) \
            if db_transcript.exons_start else None
        stops = map(int, db_transcript.exons_stop.split(',')) \
            if db_transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start, 'stop': stop}
                transcript['exons'].append(exon)
        transcripts.append(transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for db_transcript in transcripts:
        if db_transcript['gene'] in gene_dict:
            gene = gene_dict[db_transcript['gene']]
        else:
            gene = Gene(db_transcript['gene'])

        if db_transcript['strand'] == '+':
            gene.orientation = 1
        if db_transcript['strand'] == '-':
            gene.orientation = -1

        transcript = Locus(gene.newLocusTag())

        transcript.mRNA = PList()
        transcript.mRNA.location = [
            db_transcript['transcript_start'], db_transcript['transcript_stop']
        ]

        transcript.transcriptID = db_transcript['transcriptID']
        transcript.exon = PList()
        if db_transcript.get('exons') \
                and isinstance(db_transcript.get('exons'), list):
            exon_list = []
            for exon in db_transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            transcript.exon.positionList = exon_list
        else:
            transcript.exon.positionList = transcript.mRNA.location

        transcript.mRNA.positionList = transcript.exon.positionList
        transcript.mRNA.positionList.sort()

        if db_transcript.get('proteinID'):
            transcript.CDS = PList()
            transcript.CDS.location = [
                db_transcript['cds_start'], db_transcript['cds_stop']
            ]

            transcript.CDS.positionList = cds_position_list(
                transcript.mRNA.positionList, transcript.CDS.location)

            transcript.proteinID = db_transcript['proteinID']

            transcript.transcriptProduct = db_transcript['transcript_product']
            transcript.proteinProduct = db_transcript['protein_product']
            transcript.linkMethod = 'ncbi'
            transcript.transcribe = True
            transcript.translate = True
        else:
            transcript.linkMethod = None
            transcript.transcribe = True
            transcript.translate = False
            transcript.locusTag = ''

        # transcript.molType = db_transcript['molType']

        gene.transcriptList.append(transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record
Example #4
0
def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for db_transcript in db_transcripts:
        transcript = {
            'gene': db_transcript.gene,
            'strand': db_transcript.strand,
            'transcript_start': db_transcript.transcript_start,
            'transcript_stop': db_transcript.transcript_stop,
            'transcript_product': db_transcript.transcript_product,
            'exons': [],
            'exons_start': db_transcript.exons_start,
            'exons_stop': db_transcript.exons_stop,
            'transcriptID': db_transcript.transcript_accession + '.' +
                            db_transcript.transcript_version,
        }
        if db_transcript.protein_accession is not None \
                and db_transcript.protein_version is not None:
            transcript['cds_start'] = db_transcript.cds_start
            transcript['cds_stop'] = db_transcript.cds_stop
            transcript['protein_product'] = db_transcript.protein_product
            transcript['proteinID'] = '%s.%s' %\
                                            (db_transcript.protein_accession,
                                             db_transcript.protein_version)
            transcript['linkMethod'] = 'ncbi'
        starts = map(int, db_transcript.exons_start.split(',')) \
            if db_transcript.exons_start else None
        stops = map(int, db_transcript.exons_stop.split(',')) \
            if db_transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start,
                        'stop': stop}
                transcript['exons'].append(exon)
        transcripts.append(transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for db_transcript in transcripts:
        if db_transcript['gene'] in gene_dict:
            gene = gene_dict[db_transcript['gene']]
        else:
            gene = Gene(db_transcript['gene'])

        if db_transcript['strand'] == '+':
            gene.orientation = 1
        if db_transcript['strand'] == '-':
            gene.orientation = -1

        transcript = Locus(gene.newLocusTag())

        transcript.mRNA = PList()
        transcript.mRNA.location = [db_transcript['transcript_start'],
                                       db_transcript['transcript_stop']]

        transcript.transcriptID = db_transcript['transcriptID']
        transcript.exon = PList()
        if db_transcript.get('exons') \
                and isinstance(db_transcript.get('exons'), list):
            exon_list = []
            for exon in db_transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            transcript.exon.positionList = exon_list
        else:
            transcript.exon.positionList = transcript.mRNA.location

        transcript.mRNA.positionList = transcript.exon.positionList
        transcript.mRNA.positionList.sort()

        if db_transcript.get('proteinID'):
            transcript.CDS = PList()
            transcript.CDS.location = [db_transcript['cds_start'],
                                          db_transcript['cds_stop']]

            transcript.CDS.positionList = cds_position_list(
                transcript.mRNA.positionList,
                transcript.CDS.location)

            transcript.proteinID = db_transcript['proteinID']

            transcript.transcriptProduct = db_transcript['transcript_product']
            transcript.proteinProduct = db_transcript['protein_product']
            transcript.linkMethod = 'ncbi'
            transcript.transcribe = True
            transcript.translate = True
        else:
            transcript.linkMethod = None
            transcript.transcribe = True
            transcript.translate = False
            transcript.locusTag = ''

        # transcript.molType = db_transcript['molType']

        gene.transcriptList.append(transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record
Example #5
0
def _get_mutalyzer_record(reference, db_transcripts):
    """
    Creates a Mutalyzer specific record from the transcript entries retrieved
    from the gbparser database.
    :param reference: A gbparser database reference entry.
    :param db_transcripts:A gbparser database list of transcript.
    :return: The Mutalyzer record.
    """
    record = _bare_record(reference)

    # Extracting the transcripts from the DB entries.
    transcripts = []
    for transcript in db_transcripts:
        my_transcript = {
            'gene': transcript.gene,
            'strand': transcript.strand,
            'transcript_start': transcript.transcript_start,
            'transcript_stop': transcript.transcript_stop,
            'cds_start': transcript.cds_start,
            'cds_stop': transcript.cds_stop,
            'exons': [],
            'exons_start': transcript.exons_start,
            'exons_stop': transcript.exons_stop,
            'transcriptID': transcript.transcript_accession + '.' +
            transcript.transcript_version,
            'proteinID':
            transcript.protein_accession + '.' + transcript.protein_version,
            'linkMethod': 'ncbi'
        }
        # if transcript.exons_start:
        #     starts = transcript.exons_start.split(',')
        # if transcripts.exons_stop:
        #     stops = transcript.exons_stopts.split(',')
        starts = map(int, transcript.exons_start.split(
            ',')) if transcript.exons_start else None
        stops = map(int, transcript.exons_stop.split(
            ',')) if transcript.exons_stop else None
        if (starts and stops) and (len(starts) == len(stops)):
            for start, stop in zip(starts, stops):
                exon = {'start': start, 'stop': stop}
                my_transcript['exons'].append(exon)
        # if transcript.exons and isinstance(transcript.exons, list):
        #     for exon in transcript.exons:
        #         exon = {'start': exon.start,
        #                 'stop': exon.stop}
        #         my_transcript['exons'].append(exon)
        transcripts.append(my_transcript)

    # Generating the actual record entries in the Mutalyzer format.
    gene_dict = {}
    for transcript in transcripts:
        if transcript['gene'] in gene_dict:
            gene = gene_dict[transcript['gene']]
        else:
            gene = Gene(transcript['gene'])

        if transcript['strand'] == '+':
            gene.orientation = 1
        if transcript['strand'] == '-':
            gene.orientation = -1

        my_transcript = Locus(gene.newLocusTag())

        my_transcript.mRNA = PList()
        my_transcript.mRNA.location = [
            transcript['transcript_start'], transcript['transcript_stop']
        ]

        my_transcript.CDS = PList()
        my_transcript.CDS.location = [
            transcript['cds_start'], transcript['cds_stop']
        ]
        my_transcript.exon = PList()
        if transcript.get('exons') and isinstance(transcript.get('exons'),
                                                  list):
            exon_list = []
            for exon in transcript['exons']:
                exon_list.extend([exon['start'], exon['stop']])
            my_transcript.exon.positionList = exon_list
        else:
            my_transcript.exon.positionList = my_transcript.mRNA.location

        my_transcript.mRNA.positionList = my_transcript.exon.positionList
        my_transcript.mRNA.positionList.sort()

        my_transcript.CDS.positionList = cds_position_list(
            my_transcript.mRNA.positionList, my_transcript.CDS.location)

        my_transcript.transcriptID = transcript['transcriptID']
        my_transcript.proteinID = transcript['proteinID']
        my_transcript.linkMethod = 'ncbi'
        my_transcript.transcribe = True
        my_transcript.translate = True
        gene.transcriptList.append(my_transcript)
        gene_dict[gene.name] = gene

    record.geneList = list(gene_dict.values())

    # Get the sequence.
    seq_path = settings.SEQ_PATH + reference.checksum_sequence + '.sequence'
    try:
        seq = Seq(_get_sequence_mmap(seq_path, 1, reference.length + 1),
                  generic_dna)
    except IOError:
        return None
    else:
        record.seq = seq

    return record