def create_facets(self, queryset): statuses = OrderedDict([('name', 'status'), ('label', 'Status'), ('items', [])]) organism = OrderedDict([('name', 'organism'), ('label', 'Organism'), ('items', [])]) sequence = OrderedDict([('name', 'divergence'), ('label', 'Divergence'), ('items', [])]) species_set = queryset.species() for species in species_set: organism["items"].append({"name": species[0], "label": species[1]}) for query_status in queryset.statuses(): statuses["items"].append({ "name": Mapping.status_type(query_status), "label": Mapping.status_type(query_status).replace("_", " ").capitalize() }) differences = queryset.divergences() if differences[0]: sequence["items"].append({ "label": "identical", "name": "identical", "count": differences[0] }) if differences[1]: sequence["items"].append({ "label": "small", "name": "small", "count": differences[1] }) if differences[2]: sequence["items"].append({ "label": "large", "name": "large", "count": differences[2] }) if len(species_set) == 1: chromosomes = OrderedDict([('name', 'chromosomes'), ('label', 'Chromosomes'), ('items', [])]) for chromosome in queryset.chromosomes(): chromosomes["items"].append({ 'name': chromosome.lower(), 'label': chromosome.upper() }) return [statuses, organism, sequence, chromosomes] return [statuses, organism, sequence]
def get_status(mapping): try: mapping_status = Mapping.status_type(mapping.status) except (IndexError, CvUeStatus.DoesNotExist): # TODO: should log this anomaly or do something else mapping_status = None return mapping_status
def create_facets(self, queryset): statuses = OrderedDict([('name', 'status'), ('label', 'Status'), ('items', [])]) organism = OrderedDict([('name', 'organism'), ('label', 'Organism'), ('items', [])]) sequence = OrderedDict([('name', 'sequence'), ('label', 'Sequence'), ('items', [])]) ####### # TODO mappings = OrderedDict([('name', 'mappings'), ('label', 'Type'), ('items', [])]) ####### for species in queryset.species(): organism["items"].append({"name": species[0], "label": species[1]}) for status in queryset.statuses(): statuses["items"].append({ "name": Mapping.status_type(status), "label": Mapping.status_type(status).replace("_", " ").capitalize() }) differences = queryset.divergences() if differences[0]: sequence["items"].append({ "label": "identical", "name": "identical", "count": differences[0] }) if differences[1]: sequence["items"].append({ "label": "small", "name": "small", "count": differences[1] }) if differences[2]: sequence["items"].append({ "label": "large", "name": "large", "count": differences[2] }) # return [ statuses, organism, sequence, mappings ] return [statuses, organism, sequence]
def get(self, request): # # Mapping stats: general and Uniprot/Ensembl specific # mappings_count = Mapping.objects.count() # tot mappings uniprot_mapped_count = Mapping.objects.values( 'uniprot').distinct().count() # tot mapped uniprot entries uniprot_not_mapped_sp_count = None # tot non mapped Swiss-Prot entries, NOTE: NO WAY TO GET IT AT THE MOMENT all_entry_types = dict((entry.id, entry.description) for entry in CvEntryType.objects.all()) sp_entry_type_ids = [ k for (k, v) in all_entry_types.items() if v.lower().startswith('swiss') ] nonsp_entry_type_ids = list( set(all_entry_types.keys()).difference(sp_entry_type_ids)) # # NOTE: # This is not requested but it's computation is reported (commented) here for completeness uniprot_mapped_sp_count = 0 # tot mapped Swiss-Prot entries, NOTE: NOT REQUESTED BUT HERE FOR COMPLETENESS # if sp_entry_type_ids: # query_filter = Q(mapping_history__entry_type=sp_entry_type_ids[0]) # # for i in range(1, len(sp_entry_type_ids)): # query_filter = query_filter | Q(mapping_history__entry_type=sp_entry_type_ids[i]) # # uniprot_mapped_sp_count = Mapping.objects.filter(query_filter).values('uniprot').distinct().count() # # tot non mapped genes which none of its transcripts match to any SwissProt entry gene_not_mapped_sp_count = 0 # # NOTE: # Here we're counting genes not mapped to Swiss-Prot entries among the mapped genes. # As discussed with UniProt, they'd prefer counting among the non mapped genes, which # in this case coincide with counting the non-mapped genes in general, as done below. # # if nonsp_entry_type_ids: # query_filter = Q(mapping_history__entry_type=nonsp_entry_type_ids[0]) # # for i in range(1, len(sp_entry_type_ids)): # query_filter = query_filter | Q(mapping_history__entry_type=nonsp_entry_type_ids[i]) # # gene_not_mapped_sp_count = Mapping.objects.filter(query_filter).values('transcript__gene').distinct().count() gene_ids = set(gene.gene_id for gene in EnsemblGene.objects.all()) gene_mapped_ids = set( item['transcript__gene'] for item in Mapping.objects.values('transcript__gene').distinct()) gene_mapped_count = len(gene_mapped_ids) # tot mapped Ensembl genes gene_not_mapped_sp_count = len(gene_ids.difference(gene_mapped_ids)) transcript_mapped_count = Mapping.objects.values( 'transcript').distinct().count() # tot mapped Ensembl transcripts # # Stats relative to mapping labels # all_labels = CvUeLabel.objects.all() label_counts = [] for label in all_labels: count = UeMappingLabel.objects.filter(label=label).count() label_counts.append({'label': label.description, 'count': count}) # # Stats for mapping status # status_counts = [] status_totals = Mapping.objects.values('status').annotate( total=Count('status')) for status_count in status_totals: status_counts.append({ 'status': Mapping.status_type(status_count['status']), 'count': status_count['total'] }) serializer = MappingStatsSerializer({ 'mapping': { 'total': mappings_count, 'uniprot': { 'mapped': uniprot_mapped_count, 'not_mapped_sp': uniprot_not_mapped_sp_count }, 'ensembl': { 'gene_mapped': gene_mapped_count, 'gene_not_mapped_sp': gene_not_mapped_sp_count, 'transcript_mapped': transcript_mapped_count } }, 'status': status_counts, 'label': label_counts }) return Response(serializer.data)
def build_mapping(cls, mapping, fetch_sequence=False, authenticated=False): mapping_history = mapping.mapping_history.select_related( 'release_mapping_history').select_related( 'release_mapping_history__ensembl_species_history').latest( 'mapping_history_id') release_mapping_history = mapping_history.release_mapping_history ensembl_history = mapping_history.release_mapping_history.ensembl_species_history status = mapping.status.id sequence = None if fetch_sequence: try: sequence = ensembl_sequence(mapping.transcript.enst_id, ensembl_history.ensembl_release) except Exception as e: print(e) # TODO: log sequence = None mapping_obj = { 'mappingId': mapping.mapping_id, 'timeMapped': release_mapping_history.time_mapped, 'ensemblRelease': ensembl_history.ensembl_release, 'uniprotRelease': release_mapping_history.uniprot_release, 'uniprotEntry': { 'uniprotAccession': mapping.uniprot.uniprot_acc, 'entryType': Mapping.entry_type(mapping_history.entry_type_id), 'sequenceVersion': mapping.uniprot.sequence_version, 'upi': mapping.uniprot.upi, 'md5': mapping.uniprot.md5, 'isCanonical': True if mapping.uniprot.canonical_uniprot_id else False, 'alias': mapping.uniprot.alias, 'ensemblDerived': mapping.uniprot.ensembl_derived, 'gene_symbol': mapping.uniprot.gene_symbol, 'gene_accession': mapping.uniprot.gene_accession, 'length': mapping.uniprot.length }, 'ensemblTranscript': { 'enstId': mapping.transcript.enst_id, 'enstVersion': mapping.transcript.enst_version, 'upi': mapping.transcript.uniparc_accession, 'biotype': mapping.transcript.biotype, 'deleted': mapping.transcript.deleted, 'chromosome': mapping.transcript.gene.chromosome, 'seqRegionStart': mapping.transcript.seq_region_start, 'seqRegionEnd': mapping.transcript.seq_region_end, 'seqRegionStrand': mapping.transcript.gene.seq_region_strand, 'ensgId': mapping.transcript.gene.ensg_id, 'ensgName': mapping.transcript.gene.gene_name, 'ensgSymbol': mapping.transcript.gene.gene_symbol, 'ensgAccession': mapping.transcript.gene.gene_accession, 'sequence': sequence, 'enspId': mapping.transcript.ensp_id, 'enspLen': mapping.transcript.ensp_len, 'select': mapping.transcript.select }, 'alignment_difference': mapping.alignment_difference, 'status': Mapping.status_type(status), 'status_history': mapping.statuses(usernames=authenticated) } return mapping_obj
def build_related_unmapped_entries_data(mapping): """ Return the list of unmapped entries releated to the mapping (via grouping_id) """ # related unmapped entries share the same grouping_id and tax id mapping_mh = mapping.mapping_history.latest( 'release_mapping_history__time_mapped') mapping_mh_rmh = mapping_mh.release_mapping_history mapping_grouping_id = mapping_mh.grouping_id related_unmapped_ue_histories = UniprotEntryHistory.objects.filter( release_version=mapping_mh_rmh.uniprot_release, grouping_id=mapping_grouping_id) related_unmapped_ue_entries = [] for ueh in related_unmapped_ue_histories: up_entry = ueh.uniprot related_unmapped_ue_entries.append({ 'uniprot_id': up_entry.uniprot_id, 'uniprotAccession': up_entry.uniprot_acc, 'entryType': Mapping.entry_type(up_entry.entry_type_id), 'sequenceVersion': up_entry.sequence_version, 'upi': up_entry.upi, 'md5': up_entry.md5, 'isCanonical': not up_entry.canonical_uniprot_id, 'alias': up_entry.alias, 'ensemblDerived': up_entry.ensembl_derived, 'gene_symbol': up_entry.gene_symbol, 'gene_accession': up_entry.chromosome_line, 'length': up_entry.length, 'protein_existence_id': up_entry.protein_existence_id }) related_unmapped_transcript_histories = TranscriptHistory.objects.filter( ensembl_species_history=mapping_mh_rmh.ensembl_species_history, grouping_id=mapping_grouping_id) related_unmapped_transcripts = [] for t_hist in related_unmapped_transcript_histories: transcript = t_hist.transcript related_unmapped_transcripts.append({ 'transcript_id': transcript.transcript_id, 'enstId': transcript.enst_id, 'enstVersion': transcript.enst_version, 'upi': transcript.uniparc_accession, 'biotype': transcript.biotype, 'deleted': transcript.deleted, 'chromosome': transcript.gene.chromosome, 'regionAccession': transcript.gene.region_accession, 'seqRegionStart': transcript.seq_region_start, 'seqRegionEnd': transcript.seq_region_end, 'seqRegionStrand': transcript.gene.seq_region_strand, 'ensgId': transcript.gene.ensg_id, 'ensgName': transcript.gene.gene_name, 'ensgSymbol': transcript.gene.gene_symbol, 'ensgAccession': transcript.gene.gene_accession, 'ensgRegionAccession': transcript.gene.region_accession, 'sequence': None, 'enspId': transcript.ensp_id, 'enspLen': transcript.ensp_len, 'source': transcript.source, 'select': transcript.select }) return { 'ensembl': related_unmapped_transcripts, 'uniprot': related_unmapped_ue_entries }