Python Alignment.calculate_statisticsの例

プログラミング言語: Python

名前空間/パッケージ名: common.alignment

クラス/型: Alignment

メソッド/関数: calculate_statistics

hotexamples.comのコード掲載数: 7

Python Alignment.calculate_statistics - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcommon.alignment.Alignment.calculate_statisticsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Alignment(11)

build_alignment(11)

load_proteins(11)

load_segments(11)

calculate_similarity(6)

load_reference_protein(5)

show_padding(5)

calculate_statistics(4)

remove_non_generic_numbers_from_alignment(1)

コード例 #1

ファイルを表示

ファイル: views.py プロジェクト: simomounir/protwis

    def get(self, request, proteins=None, segments=None, statistics=False):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter(sequence_type__slug='wt',
                                        entry_name__in=protein_list)

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(
                entry_name=protein_list[0]).residue_numbering_scheme_id

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(
                    partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        gen_object = ResidueGenericNumberEquivalent.objects.get(
                            label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)

            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate statistics
            if statistics == True:
                a.calculate_statistics()

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    ali_dict[k] = row
                    k = False

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                ali_dict["statistics"] = feat

            return Response(ali_dict)

コード例 #2

ファイルを表示

    def main_func(self, positions, iteration):
        # families
        if not positions[1]:
            families = self.families[positions[0]:]
        else:
            families = self.families[positions[0]:positions[1]]

        for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(
                family__slug__startswith=family.slug,
                sequence_type__slug='wt',
                species__common_name="Human").prefetch_related(
                    'species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()
            self.logger.info(
                'Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(
                slug='consensus', defaults={
                    'name': 'Consensus',
                })
            if created:
                self.logger.info('Created protein sequence type {}'.format(
                    sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type,
                                residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict(
            )  # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(
                protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related(
                    'protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related(
                    'generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug

                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1

            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(
                protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(
                a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    create_or_update_residues_in_segment(
                        pc, segment, segment_starts[segment_slug],
                        segment_aligned_starts[segment_slug],
                        segment_ends[segment_slug],
                        segment_aligned_ends[segment_slug], self.schemes,
                        ref_positions, protein_anomalies, True)

コード例 #3

ファイルを表示

ファイル: views.py プロジェクト: protwis/protwis

    def get(self, request, proteins=None, segments=None, statistics=False):
        if proteins is not None:
            protein_list = proteins.split(",")
            ps = Protein.objects.filter(sequence_type__slug='wt', entry_name__in=protein_list)

            # take the numbering scheme from the first protein
            s_slug = Protein.objects.get(entry_name=protein_list[0]).residue_numbering_scheme_id

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments
                protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True) 
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)                        
                
                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)

            else:
                ss = ProteinSegment.objects.filter(partial=False)

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            # calculate statistics
            if statistics == True:
                a.calculate_statistics()
            
            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = {}
            k = False
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                elif k:
                    ali_dict[k] = row
                    k = False
            
            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [item for sublist in feature_stats_clean for item in sublist]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [item for sublist in feature_stats_clean for item in sublist]

                ali_dict["statistics"] = feat

            return Response(ali_dict)

コード例 #4

ファイルを表示

ファイル: build_consensus_sequences.py プロジェクト: magdchat/protwis

    def main_func(self, positions, iteration,count,lock):
        # families
        # if not positions[1]:
        #     families = self.families[positions[0]:]
        # else:
        #     families = self.families[positions[0]:positions[1]]
        if self.signprot:
            signprot_fam = ProteinFamily.objects.get(name=self.signprot)
            families = ProteinFamily.objects.filter(slug__startswith=signprot_fam.slug+'_').all() # The '_' at the end is needed to skip the Alpha and Arrestin consensus sequences
            self.segments = ProteinSegment.objects.filter(partial=False, proteinfamily=self.signprot)
        else:
            families = self.families

        if self.input_slug:
            families = ProteinFamily.objects.filter(slug__startswith=self.input_slug)
        
        while count.value<len(families):
            with lock:
                family = families[count.value]
                count.value +=1 
        # for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',
                species__common_name="Human").prefetch_related('species', 'residue_numbering_scheme')

            # if family does not have human equivalents, like Class D1
            if len(proteins)==0:
                proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',).prefetch_related('species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()

            try:
                # Save alignment
                AlignmentConsensus.objects.create(slug=family.slug, alignment=pickle.dumps(a))

                # Load alignment to ensure it works
                a = pickle.loads(AlignmentConsensus.objects.get(slug=family.slug).alignment)
                self.logger.info('Succesfully pickled {}'.format(family))
            except:
                self.logger.error('Failed pickle for {}'.format(family))

            self.logger.info('Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus',
                defaults={'name': 'Consensus',})
            if created:
                self.logger.info('Created protein sequence type {}'.format(sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug
                    
                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1
            
            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                if self.signprot:
                    segment = ProteinSegment.objects.get(slug=segment_slug, proteinfamily=self.signprot)
                else:
                    segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    if self.signprot:
                        create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                            segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                            segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True, self.signprot)
                    else:
                        create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                            segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                            segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)

コード例 #5

ファイルを表示

ファイル: views.py プロジェクト: jermayioni/protwis

    def get(self,
            request,
            slug=None,
            segments=None,
            latin_name=None,
            statistics=False):
        if slug is not None:
            # Check for specific species
            if latin_name is not None:
                ps = Protein.objects.filter(sequence_type__slug='wt',
                                            source__id=1,
                                            family__slug__startswith=slug,
                                            species__latin_name=latin_name)
            else:
                ps = Protein.objects.filter(sequence_type__slug='wt',
                                            source__id=1,
                                            family__slug__startswith=slug)

            # take the numbering scheme from the first protein
            #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id
            s_slug = ps[0].residue_numbering_scheme_id

            protein_family = ps[0].family.slug[:3]

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments

                protein_segments = ProteinSegment.objects.filter(
                    partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(
                            label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list,
                                                   partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            if int(protein_family) < 100:
                ss = [s for s in ss if s.proteinfamily == 'GPCR']
            elif protein_family == "100":
                ss = [s for s in ss if s.proteinfamily == 'Gprotein']
            elif protein_family == "200":
                ss = [s for s in ss if s.proteinfamily == 'Arrestin']

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            a.calculate_statistics()

            residue_list = []
            for aa in a.full_consensus:
                residue_list.append(aa.amino_acid)

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {
                'a': a
            }).split("\n")

            # convert the list to a dict
            ali_dict = OrderedDict({})
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                else:
                    ali_dict[k] = row
                    k = False
            ali_dict['CONSENSUS'] = ''.join(residue_list)

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(
                            sub_list)  # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [
                        item for sublist in feature_stats_clean
                        for item in sublist
                    ]

                ali_dict["statistics"] = feat

            return Response(ali_dict)

コード例 #6

ファイルを表示

ファイル: views.py プロジェクト: pszgaspar/protwis

    def get(self, request, slug=None, segments=None, latin_name=None, statistics=False):
        if slug is not None:
            # Check for specific species
            if latin_name is not None:
                ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug,
                    species__latin_name=latin_name)
            else:
                ps = Protein.objects.filter(sequence_type__slug='wt', source__id=1, family__slug__startswith=slug)

            # take the numbering scheme from the first protein
            #s_slug = Protein.objects.get(entry_name=ps[0]).residue_numbering_scheme_id
            s_slug = ps[0].residue_numbering_scheme_id

            protein_family = ps[0].family.slug[:3]

            gen_list = []
            segment_list = []
            if segments is not None:
                input_list = segments.split(",")
                # fetch a list of all segments

                protein_segments = ProteinSegment.objects.filter(partial=False).values_list('slug', flat=True)
                for s in input_list:
                    # add to segment list
                    if s in protein_segments:
                        segment_list.append(s)
                    # get generic numbering object for generic positions
                    else:
                        # make sure the query works for all positions
                        gen_object = ResidueGenericNumberEquivalent.objects.get(label=s, scheme__id=s_slug)
                        gen_object.properties = {}
                        gen_list.append(gen_object)

                # fetch all complete protein_segments
                ss = ProteinSegment.objects.filter(slug__in=segment_list, partial=False)
            else:
                ss = ProteinSegment.objects.filter(partial=False)

            if int(protein_family) < 100:
                ss = [ s for s in ss if s.proteinfamily == 'GPCR']
            elif protein_family == "100":
                ss = [ s for s in ss if s.proteinfamily == 'Gprotein']
            elif protein_family == "200":
                ss = [ s for s in ss if s.proteinfamily == 'Arrestin']

            # create an alignment object
            a = Alignment()
            a.show_padding = False

            # load data from selection into the alignment
            a.load_proteins(ps)

            # load generic numbers and TMs seperately
            if gen_list:
                a.load_segments(gen_list)
            a.load_segments(ss)

            # build the alignment data matrix
            a.build_alignment()

            a.calculate_statistics()

            residue_list = []
            for aa in a.full_consensus:
                residue_list.append(aa.amino_acid)

            # render the fasta template as string
            response = render_to_string('alignment/alignment_fasta.html', {'a': a}).split("\n")

            # convert the list to a dict
            ali_dict = OrderedDict({})
            for row in response:
                if row.startswith(">"):
                    k = row[1:]
                else:
                    ali_dict[k] = row
                    k = False
            ali_dict['CONSENSUS'] = ''.join(residue_list)

            # render statistics for output
            if statistics == True:
                feat = {}
                for i, feature in enumerate(AMINO_ACID_GROUPS):
                    feature_stats = a.feature_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[feature] = [item for sublist in feature_stats_clean for item in sublist]

                for i, AA in enumerate(AMINO_ACIDS):
                    feature_stats = a.amino_acid_stats[i]
                    feature_stats_clean = []
                    for d in feature_stats:
                        sub_list = [x[0] for x in d]
                        feature_stats_clean.append(sub_list) # remove feature frequencies
                    # print(feature_stats_clean)
                    feat[AA] = [item for sublist in feature_stats_clean for item in sublist]

                ali_dict["statistics"] = feat

            return Response(ali_dict)

コード例 #7

ファイルを表示

ファイル: build_consensus_sequences.py プロジェクト: 25352697/protwis

    def main_func(self, positions, iteration):
        # families
        if not positions[1]:
            families = self.families[positions[0]:]
        else:
            families = self.families[positions[0]:positions[1]]

        for family in families:
            # get proteins in this family
            proteins = Protein.objects.filter(family__slug__startswith=family.slug, sequence_type__slug='wt',
                species__id=1).prefetch_related('species', 'residue_numbering_scheme')

            if proteins.count() <= 1:
                continue
            self.logger.info('Building alignment for {}'.format(family))
            # create alignment
            a = Alignment()
            a.load_proteins(proteins)
            a.load_segments(self.segments)
            a.build_alignment()
            a.calculate_statistics()
            self.logger.info('Completed building alignment for {}'.format(family))

            # get (forced) consensus sequence from alignment object
            family_consensus = str()
            for segment, s in a.forced_consensus.items():
                for gn, aa in s.items():
                    family_consensus += aa

            # create sequence type 'consensus'
            sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='consensus',
                defaults={'name': 'Consensus',})
            if created:
                self.logger.info('Created protein sequence type {}'.format(sequence_type.name))

            # create a protein record
            consensus_name = family.name + " consensus"
            residue_numbering_scheme = proteins[0].residue_numbering_scheme
            up = dict()
            up['entry_name'] = slugify(consensus_name)
            if Protein.objects.filter(entry_name=up['entry_name']).exists():
                up['entry_name'] += "-" + family.slug.split('_')[0]
            up['source'] = "OTHER"
            up['species_latin_name'] = proteins[0].species.latin_name
            up['species_common_name'] = proteins[0].species.common_name
            up['sequence'] = family_consensus

            up['names'] = up['genes'] = []
            self.create_protein(consensus_name, family, sequence_type, residue_numbering_scheme, False, up)

            # get protein anomalies in family
            all_constrictions = []
            constriction_freq = dict()
            consensus_pas = dict() # a constriction has to be in all sequences to be included in the consensus
            pcs = ProteinConformation.objects.filter(protein__in=proteins,
                state__slug=settings.DEFAULT_PROTEIN_STATE).prefetch_related('protein_anomalies')
            for pc in pcs:
                pas = pc.protein_anomalies.all().prefetch_related('generic_number__protein_segment', 'anomaly_type')
                for pa in pas:
                    pa_label = pa.generic_number.label
                    pa_type = pa.anomaly_type.slug
                    pa_segment_slug = pa.generic_number.protein_segment.slug
                    
                    # bulges are directly added to the consensus list
                    if pa_type == 'bulge':
                        if pa_segment_slug not in consensus_pas:
                            consensus_pas[pa_segment_slug] = []
                        if pa not in consensus_pas[pa_segment_slug]:
                            consensus_pas[pa_segment_slug].append(pa)

                    # a constriction's frequency is counted
                    else:
                        if pa not in all_constrictions:
                            all_constrictions.append(pa)
                        if pa_label in constriction_freq:
                            constriction_freq[pa_label] += 1
                        else:
                            constriction_freq[pa_label] = 1
            
            # go through constrictions to see which ones should be included in the consensus
            for pa in all_constrictions:
                pa_label = pa.generic_number.label
                pa_segment_slug = pa.generic_number.protein_segment.slug
                freq = constriction_freq[pa_label]

                # is the constriction in all sequences?
                if freq == len(all_constrictions):
                    if pa_segment_slug not in consensus_pas:
                        consensus_pas[pa_segment_slug] = []
                    consensus_pas[pa_segment_slug].append(pa)

            # create residues
            pc = ProteinConformation.objects.get(protein__entry_name=up['entry_name'],
                state__slug=settings.DEFAULT_PROTEIN_STATE)
            segment_info = self.get_segment_residue_information(a.forced_consensus)
            ref_positions, segment_starts, segment_aligned_starts, segment_ends, segment_aligned_ends = segment_info
            for segment_slug, s in a.forced_consensus.items():
                segment = ProteinSegment.objects.get(slug=segment_slug)
                if segment_slug in consensus_pas:
                    protein_anomalies = consensus_pas[segment_slug]
                else:
                    protein_anomalies = []
                if segment_slug in segment_starts:
                    create_or_update_residues_in_segment(pc, segment, segment_starts[segment_slug],
                        segment_aligned_starts[segment_slug], segment_ends[segment_slug],
                        segment_aligned_ends[segment_slug], self.schemes, ref_positions, protein_anomalies, True)