def fetch_pairwise(mapping): pairwise_alignments = [] enst = mapping.transcript.enst_id uniprot_id = mapping.uniprot.uniprot_acc for alignment in mapping.alignments.all(): if alignment.alignment_run.score1_type == 'identity': cigarplus = alignment.pairwise.cigarplus mdz = alignment.pairwise.mdz if mdz.startswith('MD:Z:'): mdz = mdz[len('MD:Z:'):] ens_release = alignment.alignment_run.ensembl_release ensp = ensembl_protein(enst, ens_release) seq = ensembl_sequence(ensp, ens_release) uniprot_seq, match_str, ensembl_seq = pairwise_alignment( seq, cigarplus, mdz) pairwise_alignments.append({ 'uniprot_alignment': uniprot_seq, 'ensembl_alignment': ensembl_seq, 'match_str': match_str, 'alignment_id': alignment.alignment_id, 'ensembl_release': ens_release, 'ensembl_id': ensp, 'uniprot_id': uniprot_id, 'alignment_type': 'identity' }) # Break out of the loop, we're done break elif alignment.alignment_run.score1_type == 'perfect_match' and alignment.score1 == 1: ens_release = alignment.alignment_run.ensembl_release ensp = ensembl_protein(enst, ens_release) seq = ensembl_sequence(ensp, ens_release) pairwise_alignments.append({ 'uniprot_alignment': seq, 'ensembl_alignment': seq, 'match_str': '|' * len(seq), 'alignment_id': alignment.alignment_id, 'ensembl_release': ens_release, 'ensembl_id': ensp, 'uniprot_id': uniprot_id, 'alignment_type': 'perfect_match' }) return { 'mapping_id': mapping.mapping_id, 'alignments': pairwise_alignments }
def _fetch_alignment(alignment, enst, uniprot_id): """ Parameters ---------- alignment enst : str uniprot_id : str Returns ------- pw_alignment : dict Alignment object """ ens_release = alignment.alignment_run.ensembl_release ensp = ensembl_protein(enst, ens_release) seq = ensembl_sequence(ensp, ens_release) ensembl_seq = seq uniprot_seq = seq match_str = '|' * len(seq) alignment_type = 'perfect_match' if alignment.alignment_run.score1_type == 'identity': cigarplus = alignment.pairwise.cigarplus mdz = alignment.pairwise.mdz if mdz.startswith('MD:Z:'): mdz = mdz[len('MD:Z:'):] uniprot_seq, match_str, ensembl_seq = pairwise_alignment(seq, cigarplus, mdz) alignment_type = 'identity' pw_alignment = { 'uniprot_alignment': ensembl_seq, 'ensembl_alignment': uniprot_seq, 'match_str': match_str, 'alignment_id': alignment.alignment_id, 'ensembl_release': ens_release, 'ensembl_id': ensp, 'uniprot_id': uniprot_id, 'alignment_type': alignment_type } return pw_alignment
def build_mapping(cls, mapping, fetch_sequence=False, authenticated=False): mapping_history = mapping.mapping_history.select_related( 'release_mapping_history').select_related( 'release_mapping_history__ensembl_species_history').latest( 'mapping_history_id') release_mapping_history = mapping_history.release_mapping_history ensembl_history = mapping_history.release_mapping_history.ensembl_species_history status = mapping.status.id sequence = None if fetch_sequence: try: sequence = ensembl_sequence(mapping.transcript.enst_id, ensembl_history.ensembl_release) except Exception as e: print(e) # TODO: log sequence = None mapping_obj = { 'mappingId': mapping.mapping_id, 'timeMapped': release_mapping_history.time_mapped, 'ensemblRelease': ensembl_history.ensembl_release, 'uniprotRelease': release_mapping_history.uniprot_release, 'uniprotEntry': { 'uniprotAccession': mapping.uniprot.uniprot_acc, 'entryType': Mapping.entry_type(mapping_history.entry_type_id), 'sequenceVersion': mapping.uniprot.sequence_version, 'upi': mapping.uniprot.upi, 'md5': mapping.uniprot.md5, 'isCanonical': True if mapping.uniprot.canonical_uniprot_id else False, 'alias': mapping.uniprot.alias, 'ensemblDerived': mapping.uniprot.ensembl_derived, 'gene_symbol': mapping.uniprot.gene_symbol, 'gene_accession': mapping.uniprot.gene_accession, 'length': mapping.uniprot.length }, 'ensemblTranscript': { 'enstId': mapping.transcript.enst_id, 'enstVersion': mapping.transcript.enst_version, 'upi': mapping.transcript.uniparc_accession, 'biotype': mapping.transcript.biotype, 'deleted': mapping.transcript.deleted, 'chromosome': mapping.transcript.gene.chromosome, 'seqRegionStart': mapping.transcript.seq_region_start, 'seqRegionEnd': mapping.transcript.seq_region_end, 'seqRegionStrand': mapping.transcript.gene.seq_region_strand, 'ensgId': mapping.transcript.gene.ensg_id, 'ensgName': mapping.transcript.gene.gene_name, 'ensgSymbol': mapping.transcript.gene.gene_symbol, 'ensgAccession': mapping.transcript.gene.gene_accession, 'sequence': sequence, 'enspId': mapping.transcript.ensp_id, 'enspLen': mapping.transcript.ensp_len, 'select': mapping.transcript.select }, 'alignment_difference': mapping.alignment_difference, 'status': Mapping.status_type(status), 'status_history': mapping.statuses(usernames=authenticated) } return mapping_obj
def test_ensembl_sequence(self): seq = external.ensembl_sequence('ENST00000382038', 95) self.assertEqual(seq[0:7], 'GCTTGCC')
def build_mapping(cls, mapping_view, fetch_sequence=False, authenticated=False): status = mapping_view.status sequence = None if fetch_sequence: try: sequence = ensembl_sequence(mapping_view.enst_id, mapping_view.ensembl_release) except Exception as e: print(e) sequence = None mapping_obj = { 'id': mapping_view.id, 'mappingId': mapping_view.mapping_id, 'groupingId': mapping_view.grouping_id, 'timeMapped': mapping_view.time_mapped, 'ensemblRelease': mapping_view.ensembl_release, 'uniprotRelease': mapping_view.uniprot_release, 'uniprotEntry': { 'uniprot_id': mapping_view.uniprot_id, 'uniprotAccession': mapping_view.uniprot_acc, 'entryType': MappingView.entry_description(mapping_view.entry_type), 'sequenceVersion': mapping_view.sequence_version, 'upi': mapping_view.upi, 'md5': mapping_view.md5, 'isCanonical': not mapping_view.canonical_uniprot_id, 'alias': mapping_view.alias, 'ensemblDerived': mapping_view.ensembl_derived, 'gene_symbol': mapping_view.gene_symbol_up, 'gene_accession': mapping_view.chromosome_line, 'length': mapping_view.length, 'protein_existence_id': mapping_view.protein_existence_id }, 'ensemblTranscript': { 'transcript_id': mapping_view.transcript_id, 'enstId': mapping_view.enst_id, 'enstVersion': mapping_view.enst_version, 'upi': mapping_view.uniparc_accession, 'biotype': mapping_view.biotype, 'deleted': mapping_view.deleted, 'chromosome': mapping_view.chromosome, 'regionAccession': mapping_view.region_accession, 'seqRegionStart': mapping_view.seq_region_start, 'seqRegionEnd': mapping_view.seq_region_end, 'seqRegionStrand': mapping_view.seq_region_strand, 'ensgId': mapping_view.ensg_id, 'ensgName': mapping_view.gene_name, 'ensgSymbol': mapping_view.gene_symbol_eg, 'ensgAccession': mapping_view.gene_accession, 'ensgRegionAccession': mapping_view.region_accession, 'sequence': sequence, 'enspId': mapping_view.ensp_id, 'enspLen': mapping_view.ensp_len, 'source': mapping_view.source, 'select': mapping_view.select }, 'alignment_difference': mapping_view.alignment_difference, 'status': MappingView.status_description(status), 'status_history': mapping_view.statuses(usernames=authenticated) } return mapping_obj