def get(self, request, proteins=None, segments=None, statistics=False): if proteins is not None: protein_list = proteins.split(",") ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list) # take the numbering scheme from the first protein s_slug = Protein.objects.get( entry_name=protein_list[0]).residue_numbering_scheme_id gen_list = [] segment_list = [] if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter( partial=False).values_list('slug', flat=True) for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: gen_object = ResidueGenericNumberEquivalent.objects.get( label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False) # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_proteins(ps) # load generic numbers and TMs seperately if gen_list: a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate statistics if statistics == True: a.calculate_statistics() # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', { 'a': a }).split("\n") # convert the list to a dict ali_dict = {} k = False for row in response: if row.startswith(">"): k = row[1:] elif k: ali_dict[k] = row k = False # render statistics for output if statistics == True: feat = {} for i, feature in enumerate(AMINO_ACID_GROUPS): feature_stats = a.feature_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append( sub_list) # remove feature frequencies # print(feature_stats_clean) feat[feature] = [ item for sublist in feature_stats_clean for item in sublist ] for i, AA in enumerate(AMINO_ACIDS): feature_stats = a.amino_acid_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append( sub_list) # remove feature frequencies # print(feature_stats_clean) feat[AA] = [ item for sublist in feature_stats_clean for item in sublist ] ali_dict["statistics"] = feat return Response(ali_dict)
def main_func(self, positions, iteration): # families if not positions[1]: families = self.families[positions[0]:] else: families = self.families[positions[0]:positions[1]] for family in families: # get proteins in this family proteins = Protein.objects.filter( family__slug__startswith=family.slug, sequence_type__slug='wt', species__common_name="Human").prefetch_related( 'species', 'residue_numbering_scheme') if proteins.count() <= 1: continue self.logger.info('Building alignment for {}'.format(family)) # create alignment a = Alignment() a.load_proteins(proteins) a.load_segments(self.segments) a.build_alignment() a.calculate_statistics() self.logger.info( 'Completed building alignment for {}'.format(family)) # get (forced) consensus sequence from alignment object family_consensus = str() for segment, s in a.forced_consensus.items(): for gn, aa in s.items(): family_consensus += aa # create sequence type 'consensus' sequence_type, created = ProteinSequenceType.objects.get_or_create( slug='consensus', defaults={ 'name': 'Consensus', }) if created: self.logger.info('Created protein sequence type {}'.format( sequence_type.name)) # create a protein record consensus_name = family.name + " consensus" residue_numbering_scheme = proteins[0].residue_numbering_scheme up = dict() up['entry_name'] = slugify(consensus_name) if Protein.objects.filter(entry_name=up['entry_name']).exists(): up['entry_name'] += "-" + family.slug.split('_')[0] up['source'] = "OTHER" up['species_latin_name'] = proteins[0].species.latin_name up['species_common_name'] = proteins[0].species.common_name up['sequence'] = family_consensus up['names'] = up['genes'] = [] self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up) # get protein anomalies in family all_constrictions = [] constriction_freq = dict() consensus_pas = dict( ) # a constriction has to be in all sequences to be included in the consensus pcs = ProteinConformation.objects.filter( protein__in=proteins, state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related( 'protein_anomalies') for pc in pcs: pas = pc.protein_anomalies.all().prefetch_related( 'generic_number__protein_segment', 'anomaly_type') for pa in pas: pa_label = pa.generic_number.label pa_type = pa.anomaly_type.slug pa_segment_slug = pa.generic_number.protein_segment.slug # bulges are directly added to the consensus list if pa_type == 'bulge': if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] if pa not in consensus_pas[pa_segment_slug]: consensus_pas[pa_segment_slug].append(pa) # a constriction's frequency is counted else: if pa not in all_constrictions: all_constrictions.append(pa) if pa_label in constriction_freq: constriction_freq[pa_label] += 1 else: constriction_freq[pa_label] = 1 # go through constrictions to see which ones should be included in the consensus for pa in all_constrictions: pa_label = pa.generic_number.label pa_segment_slug = pa.generic_number.protein_segment.slug freq = constriction_freq[pa_label] # is the constriction in all sequences? if freq == len(all_constrictions): if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] consensus_pas[pa_segment_slug].append(pa) # create residues pc = ProteinConformation.objects.get( protein__entry_name=up['entry_name'], state__slug=settings.DEFAULT_PROTEIN_STATE) segment_info = self.get_segment_residue_information( a.forced_consensus) ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info for segment_slug, s in a.forced_consensus.items(): segment = ProteinSegment.objects.get(slug=segment_slug) if segment_slug in consensus_pas: protein_anomalies = consensus_pas[segment_slug] else: protein_anomalies = [] if segment_slug in segment_starts: create_or_update_residues_in_segment( pc, segment, segment_starts[segment_slug], segment_aligned_starts[segment_slug], segment_ends[segment_slug], segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)
def get(self, request, proteins=None, segments=None, statistics=False): if proteins is not None: protein_list = proteins.split(",") ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list) # take the numbering scheme from the first protein s_slug = Protein.objects.get(entry_name=protein_list[0]).residue_numbering_scheme_id gen_list = [] segment_list = [] if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False) # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_proteins(ps) # load generic numbers and TMs seperately if gen_list: a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() # calculate statistics if statistics == True: a.calculate_statistics() # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n") # convert the list to a dict ali_dict = {} k = False for row in response: if row.startswith(">"): k = row[1:] elif k: ali_dict[k] = row k = False # render statistics for output if statistics == True: feat = {} for i, feature in enumerate(AMINO_ACID_GROUPS): feature_stats = a.feature_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append(sub_list) # remove feature frequencies # print(feature_stats_clean) feat[feature] = [item for sublist in feature_stats_clean for item in sublist] for i, AA in enumerate(AMINO_ACIDS): feature_stats = a.amino_acid_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append(sub_list) # remove feature frequencies # print(feature_stats_clean) feat[AA] = [item for sublist in feature_stats_clean for item in sublist] ali_dict["statistics"] = feat return Response(ali_dict)
def main_func(self, positions, iteration,count,lock): # families # if not positions[1]: # families = self.families[positions[0]:] # else: # families = self.families[positions[0]:positions[1]] if self.signprot: signprot_fam = ProteinFamily.objects.get(name=self.signprot) families = ProteinFamily.objects.filter(slug__startswith=signprot_fam.slug+'_').all() # The '_' at the end is needed to skip the Alpha and Arrestin consensus sequences self.segments = ProteinSegment.objects.filter(partial=False, proteinfamily=self.signprot) else: families = self.families if self.input_slug: families = ProteinFamily.objects.filter(slug__startswith=self.input_slug) while count.value<len(families): with lock: family = families[count.value] count.value +=1 # for family in families: # get proteins in this family proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt', species__common_name="Human").prefetch_related('species', 'residue_numbering_scheme') # if family does not have human equivalents, like Class D1 if len(proteins)==0: proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',).prefetch_related('species', 'residue_numbering_scheme') if proteins.count() <= 1: continue self.logger.info('Building alignment for {}'.format(family)) # create alignment a = Alignment() a.load_proteins(proteins) a.load_segments(self.segments) a.build_alignment() a.calculate_statistics() try: # Save alignment AlignmentConsensus.objects.create(slug=family.slug, alignment=pickle.dumps(a)) # Load alignment to ensure it works a = pickle.loads(AlignmentConsensus.objects.get(slug=family.slug).alignment) self.logger.info('Succesfully pickled {}'.format(family)) except: self.logger.error('Failed pickle for {}'.format(family)) self.logger.info('Completed building alignment for {}'.format(family)) # get (forced) consensus sequence from alignment object family_consensus = str() for segment, s in a.forced_consensus.items(): for gn, aa in s.items(): family_consensus += aa # create sequence type 'consensus' sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus', defaults={'name': 'Consensus',}) if created: self.logger.info('Created protein sequence type {}'.format(sequence_type.name)) # create a protein record consensus_name = family.name + " consensus" residue_numbering_scheme = proteins[0].residue_numbering_scheme up = dict() up['entry_name'] = slugify(consensus_name) if Protein.objects.filter(entry_name=up['entry_name']).exists(): up['entry_name'] += "-" + family.slug.split('_')[0] up['source'] = "OTHER" up['species_latin_name'] = proteins[0].species.latin_name up['species_common_name'] = proteins[0].species.common_name up['sequence'] = family_consensus up['names'] = up['genes'] = [] self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up) # get protein anomalies in family all_constrictions = [] constriction_freq = dict() consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus pcs = ProteinConformation.objects.filter(protein__in=proteins, state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies') for pc in pcs: pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type') for pa in pas: pa_label = pa.generic_number.label pa_type = pa.anomaly_type.slug pa_segment_slug = pa.generic_number.protein_segment.slug # bulges are directly added to the consensus list if pa_type == 'bulge': if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] if pa not in consensus_pas[pa_segment_slug]: consensus_pas[pa_segment_slug].append(pa) # a constriction's frequency is counted else: if pa not in all_constrictions: all_constrictions.append(pa) if pa_label in constriction_freq: constriction_freq[pa_label] += 1 else: constriction_freq[pa_label] = 1 # go through constrictions to see which ones should be included in the consensus for pa in all_constrictions: pa_label = pa.generic_number.label pa_segment_slug = pa.generic_number.protein_segment.slug freq = constriction_freq[pa_label] # is the constriction in all sequences? if freq == len(all_constrictions): if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] consensus_pas[pa_segment_slug].append(pa) # create residues pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'], state__slug=settings.DEFAULT_PROTEIN_STATE) segment_info = self.get_segment_residue_information(a.forced_consensus) ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info for segment_slug, s in a.forced_consensus.items(): if self.signprot: segment = ProteinSegment.objects.get(slug=segment_slug, proteinfamily=self.signprot) else: segment = ProteinSegment.objects.get(slug=segment_slug) if segment_slug in consensus_pas: protein_anomalies = consensus_pas[segment_slug] else: protein_anomalies = [] if segment_slug in segment_starts: if self.signprot: create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug], segment_aligned_starts[segment_slug], segment_ends[segment_slug], segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True, self.signprot) else: create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug], segment_aligned_starts[segment_slug], segment_ends[segment_slug], segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)
def get(self, request, slug=None, segments=None, latin_name=None, statistics=False): if slug is not None: # Check for specific species if latin_name is not None: ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug, species__latin_name=latin_name) else: ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug) # take the numbering scheme from the first protein #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id s_slug = ps[0].residue_numbering_scheme_id protein_family = ps[0].family.slug[:3] gen_list = [] segment_list = [] if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter( partial=False).values_list('slug', flat=True) for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: # make sure the query works for all positions gen_object = ResidueGenericNumberEquivalent.objects.get( label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False) if int(protein_family) < 100: ss = [s for s in ss if s.proteinfamily == 'GPCR'] elif protein_family == "100": ss = [s for s in ss if s.proteinfamily == 'Gprotein'] elif protein_family == "200": ss = [s for s in ss if s.proteinfamily == 'Arrestin'] # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_proteins(ps) # load generic numbers and TMs seperately if gen_list: a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() a.calculate_statistics() residue_list = [] for aa in a.full_consensus: residue_list.append(aa.amino_acid) # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', { 'a': a }).split("\n") # convert the list to a dict ali_dict = OrderedDict({}) for row in response: if row.startswith(">"): k = row[1:] else: ali_dict[k] = row k = False ali_dict['CONSENSUS'] = ''.join(residue_list) # render statistics for output if statistics == True: feat = {} for i, feature in enumerate(AMINO_ACID_GROUPS): feature_stats = a.feature_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append( sub_list) # remove feature frequencies # print(feature_stats_clean) feat[feature] = [ item for sublist in feature_stats_clean for item in sublist ] for i, AA in enumerate(AMINO_ACIDS): feature_stats = a.amino_acid_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append( sub_list) # remove feature frequencies # print(feature_stats_clean) feat[AA] = [ item for sublist in feature_stats_clean for item in sublist ] ali_dict["statistics"] = feat return Response(ali_dict)
def get(self, request, slug=None, segments=None, latin_name=None, statistics=False): if slug is not None: # Check for specific species if latin_name is not None: ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug, species__latin_name=latin_name) else: ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug) # take the numbering scheme from the first protein #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id s_slug = ps[0].residue_numbering_scheme_id protein_family = ps[0].family.slug[:3] gen_list = [] segment_list = [] if segments is not None: input_list = segments.split(",") # fetch a list of all segments protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) for s in input_list: # add to segment list if s in protein_segments: segment_list.append(s) # get generic numbering object for generic positions else: # make sure the query works for all positions gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug) gen_object.properties = {} gen_list.append(gen_object) # fetch all complete protein_segments ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False) else: ss = ProteinSegment.objects.filter(partial=False) if int(protein_family) < 100: ss = [ s for s in ss if s.proteinfamily == 'GPCR'] elif protein_family == "100": ss = [ s for s in ss if s.proteinfamily == 'Gprotein'] elif protein_family == "200": ss = [ s for s in ss if s.proteinfamily == 'Arrestin'] # create an alignment object a = Alignment() a.show_padding = False # load data from selection into the alignment a.load_proteins(ps) # load generic numbers and TMs seperately if gen_list: a.load_segments(gen_list) a.load_segments(ss) # build the alignment data matrix a.build_alignment() a.calculate_statistics() residue_list = [] for aa in a.full_consensus: residue_list.append(aa.amino_acid) # render the fasta template as string response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n") # convert the list to a dict ali_dict = OrderedDict({}) for row in response: if row.startswith(">"): k = row[1:] else: ali_dict[k] = row k = False ali_dict['CONSENSUS'] = ''.join(residue_list) # render statistics for output if statistics == True: feat = {} for i, feature in enumerate(AMINO_ACID_GROUPS): feature_stats = a.feature_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append(sub_list) # remove feature frequencies # print(feature_stats_clean) feat[feature] = [item for sublist in feature_stats_clean for item in sublist] for i, AA in enumerate(AMINO_ACIDS): feature_stats = a.amino_acid_stats[i] feature_stats_clean = [] for d in feature_stats: sub_list = [x[0] for x in d] feature_stats_clean.append(sub_list) # remove feature frequencies # print(feature_stats_clean) feat[AA] = [item for sublist in feature_stats_clean for item in sublist] ali_dict["statistics"] = feat return Response(ali_dict)
def main_func(self, positions, iteration): # families if not positions[1]: families = self.families[positions[0]:] else: families = self.families[positions[0]:positions[1]] for family in families: # get proteins in this family proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt', species__id=1).prefetch_related('species', 'residue_numbering_scheme') if proteins.count() <= 1: continue self.logger.info('Building alignment for {}'.format(family)) # create alignment a = Alignment() a.load_proteins(proteins) a.load_segments(self.segments) a.build_alignment() a.calculate_statistics() self.logger.info('Completed building alignment for {}'.format(family)) # get (forced) consensus sequence from alignment object family_consensus = str() for segment, s in a.forced_consensus.items(): for gn, aa in s.items(): family_consensus += aa # create sequence type 'consensus' sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus', defaults={'name': 'Consensus',}) if created: self.logger.info('Created protein sequence type {}'.format(sequence_type.name)) # create a protein record consensus_name = family.name + " consensus" residue_numbering_scheme = proteins[0].residue_numbering_scheme up = dict() up['entry_name'] = slugify(consensus_name) if Protein.objects.filter(entry_name=up['entry_name']).exists(): up['entry_name'] += "-" + family.slug.split('_')[0] up['source'] = "OTHER" up['species_latin_name'] = proteins[0].species.latin_name up['species_common_name'] = proteins[0].species.common_name up['sequence'] = family_consensus up['names'] = up['genes'] = [] self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up) # get protein anomalies in family all_constrictions = [] constriction_freq = dict() consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus pcs = ProteinConformation.objects.filter(protein__in=proteins, state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies') for pc in pcs: pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type') for pa in pas: pa_label = pa.generic_number.label pa_type = pa.anomaly_type.slug pa_segment_slug = pa.generic_number.protein_segment.slug # bulges are directly added to the consensus list if pa_type == 'bulge': if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] if pa not in consensus_pas[pa_segment_slug]: consensus_pas[pa_segment_slug].append(pa) # a constriction's frequency is counted else: if pa not in all_constrictions: all_constrictions.append(pa) if pa_label in constriction_freq: constriction_freq[pa_label] += 1 else: constriction_freq[pa_label] = 1 # go through constrictions to see which ones should be included in the consensus for pa in all_constrictions: pa_label = pa.generic_number.label pa_segment_slug = pa.generic_number.protein_segment.slug freq = constriction_freq[pa_label] # is the constriction in all sequences? if freq == len(all_constrictions): if pa_segment_slug not in consensus_pas: consensus_pas[pa_segment_slug] = [] consensus_pas[pa_segment_slug].append(pa) # create residues pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'], state__slug=settings.DEFAULT_PROTEIN_STATE) segment_info = self.get_segment_residue_information(a.forced_consensus) ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info for segment_slug, s in a.forced_consensus.items(): segment = ProteinSegment.objects.get(slug=segment_slug) if segment_slug in consensus_pas: protein_anomalies = consensus_pas[segment_slug] else: protein_anomalies = [] if segment_slug in segment_starts: create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug], segment_aligned_starts[segment_slug], segment_ends[segment_slug], segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)