コード例 #1
0
    def handle(self, *args, **options):
        self.options = options
        if self.options['purge']:
            Residue.objects.filter(
                protein_conformation__protein__entry_name__endswith='_a',
                protein_conformation__protein__family__parent__parent__name=
                'Alpha').delete()
            ProteinConformation.objects.filter(
                protein__entry_name__endswith='_a',
                protein__family__parent__parent__name='Alpha').delete()
            Protein.objects.filter(
                entry_name__endswith='_a',
                family__parent__parent__name='Alpha').delete()

        # Building protein and protconf objects for g protein structure in complex
        scs = SignprotComplex.objects.all()
        for sc in scs:
            self.logger.info(
                'Protein, ProteinConformation and Residue build for alpha subunit of {} is building'
                .format(sc))
            try:
                # Alpha subunit
                try:
                    alpha_protein = Protein.objects.get(
                        entry_name=sc.structure.pdb_code.index.lower() + '_a')
                except:
                    alpha_protein = Protein()
                    alpha_protein.entry_name = sc.structure.pdb_code.index.lower(
                    ) + '_a'
                    alpha_protein.accession = None
                    alpha_protein.name = sc.structure.pdb_code.index.lower(
                    ) + '_a'
                    alpha_protein.sequence = sc.protein.sequence
                    alpha_protein.family = sc.protein.family
                    alpha_protein.parent = sc.protein
                    alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme
                    alpha_protein.sequence_type = ProteinSequenceType.objects.get(
                        slug='mod')
                    alpha_protein.source = ProteinSource.objects.get(
                        name='OTHER')
                    alpha_protein.species = sc.protein.species
                    alpha_protein.save()
                try:
                    alpha_protconf = ProteinConformation.objects.get(
                        protein__entry_name=sc.structure.pdb_code.index.lower(
                        ) + '_a')
                except:
                    alpha_protconf = ProteinConformation()
                    alpha_protconf.protein = alpha_protein
                    alpha_protconf.state = ProteinState.objects.get(
                        slug='active')
                    alpha_protconf.save()
                pdbp = PDBParser(PERMISSIVE=True, QUIET=True)
                s = pdbp.get_structure('struct',
                                       StringIO(sc.structure.pdb_data.pdb))
                chain = s[0][sc.alpha]
                nums = []
                for res in chain:
                    try:
                        res['CA']
                        nums.append(res.get_id()[1])
                    except:
                        pass

                resis = Residue.objects.filter(
                    protein_conformation__protein=sc.protein)
                num_i = 0
                temp_seq2 = ''
                pdb_num_dict = OrderedDict()
                # Create first alignment based on sequence numbers
                for n in nums:
                    if sc.structure.pdb_code.index == '6OIJ' and n < 30:
                        nr = n + 6
                    else:
                        nr = n
                    pdb_num_dict[n] = [chain[n], resis.get(sequence_number=nr)]
                # Find mismatches
                mismatches = []
                for n, res in pdb_num_dict.items():
                    if AA[res[0].get_resname()] != res[1].amino_acid:
                        mismatches.append(res)

                pdb_lines = sc.structure.pdb_data.pdb.split('\n')
                seqadv = []
                for l in pdb_lines:
                    if l.startswith('SEQADV'):
                        seqadv.append(l)
                mutations, shifted_mutations = OrderedDict(), OrderedDict()
                # Search for annotated engineered mutations in pdb SEQADV
                for s in seqadv:
                    line_search = re.search(
                        'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)',
                        s)
                    if line_search != None:
                        if line_search.group(2) == sc.alpha:
                            if line_search.group(
                                    4).strip() == sc.protein.accession:
                                if line_search.group(3) == line_search.group(
                                        6):
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                                else:
                                    shifted_mutations[int(
                                        line_search.group(3))] = [
                                            line_search.group(1),
                                            line_search.group(5),
                                            int(line_search.group(6))
                                        ]
                            else:
                                # Exception for 6G79
                                if line_search.group(3) != line_search.group(
                                        6) and 'CONFLICT' in line_search.group(
                                            7):
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                                # Exception for 5G53
                                if line_search.group(
                                        4).strip() != sc.protein.accession:
                                    mutations[int(line_search.group(3))] = [
                                        line_search.group(1),
                                        line_search.group(5)
                                    ]
                remaining_mismatches = []

                # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation
                for m in mismatches:
                    num = m[0].get_id()[1]
                    if num in mutations:
                        if m[0].get_resname() != mutations[num][0] and m[
                                1].amino_acid != AA[mutations[num][1]]:
                            remaining_mismatches.append(m)
                    elif num in shifted_mutations:
                        remaining_mismatches.append(m)
                    else:
                        remaining_mismatches.append(m)

                ### sanity check
                # print(mutations)
                # print(shifted_mutations)
                # print(mismatches)
                # print(remaining_mismatches)
                # pprint.pprint(pdb_num_dict)

                # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment
                if len(remaining_mismatches
                       ) > 0 and sc.structure.pdb_code.index not in [
                           '6OIJ', '6OY9', '6OYA'
                       ]:
                    ppb = PPBuilder()
                    seq = ''
                    for pp in ppb.build_peptides(chain, aa_only=False):
                        seq += str(pp.get_sequence())
                    pw2 = pairwise2.align.localms(sc.protein.sequence, seq, 2,
                                                  -1, -.5, -.1)
                    ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1])
                    wt_pdb_dict = OrderedDict()
                    pdb_wt_dict = OrderedDict()
                    j, k = 0, 0
                    for i, ref, temp in zip(range(0, len(ref_seq)), ref_seq,
                                            temp_seq):
                        if ref != '-' and temp != '-':
                            wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]]
                            pdb_wt_dict[pdb_num_dict[nums[k]][0]] = resis[j]
                            j += 1
                            k += 1
                        elif ref == '-':
                            wt_pdb_dict[i] = pdb_num_dict[nums[k]]
                            pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i
                            k += 1
                        elif temp == '-':
                            wt_pdb_dict[resis[j]] = i
                            pdb_wt_dict[i] = resis[j]
                            j += 1
                    for i, r in enumerate(remaining_mismatches):
                        # Adjust for shifted residue when residue is a match
                        if r[0].get_id()[1] - remaining_mismatches[
                                i - 1][0].get_id()[1] > 1:
                            pdb_num_dict[r[0].get_id()[1] -
                                         1][1] = pdb_wt_dict[chain[
                                             r[0].get_id()[1] - 1]]
                        # Adjust for shifted residue when residue is mutated and it's logged in SEQADV
                        if r[0].get_id()[1] in shifted_mutations:
                            pdb_num_dict[r[0].get_id()[1]][1] = resis.get(
                                sequence_number=shifted_mutations[
                                    r[0].get_id()[1]][2])
                        # Adjust for shift
                        else:
                            pdb_num_dict[r[0].get_id()[1]][1] = pdb_wt_dict[
                                r[0]]

                bulked_residues = []
                for key, val in pdb_num_dict.items():
                    # print(key, val) # sanity check
                    res_obj = Residue()
                    res_obj.sequence_number = val[0].get_id()[1]
                    res_obj.amino_acid = AA[val[0].get_resname()]
                    res_obj.display_generic_number = val[
                        1].display_generic_number
                    res_obj.generic_number = val[1].generic_number
                    res_obj.protein_conformation = alpha_protconf
                    res_obj.protein_segment = val[1].protein_segment
                    bulked_residues.append(res_obj)
                Residue.objects.bulk_create(bulked_residues)
                self.logger.info(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished'
                    .format(sc))
            except Exception as msg:
                print(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'
                    .format(sc))
                print(msg)
                self.logger.info(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'
                    .format(sc))
コード例 #2
0
ファイル: build_arrestins.py プロジェクト: luhangzhi/protwis
    def can_create_arrestins(self, family, residue_numbering_scheme, accession,
                             uniprot):
        # get/create protein source

        try:
            source, created = ProteinSource.objects.get_or_create(
                name=uniprot['source'], defaults={'name': uniprot['source']})
            if created:
                self.logger.info('Created protein source ' + source.name)
        except IntegrityError:
            source = ProteinSource.objects.get(name=uniprot['source'])

        # get/create species
        try:
            species, created = Species.objects.get_or_create(
                latin_name=uniprot['species_latin_name'],
                defaults={
                    'common_name': uniprot['species_common_name'],
                })
            if created:
                self.logger.info('Created species ' + species.latin_name)
        except IntegrityError:
            species = Species.objects.get(
                latin_name=uniprot['species_latin_name'])

        # get/create protein sequence type
        # Wild-type for all sequences from source file
        try:
            sequence_type, created = ProteinSequenceType.objects.get_or_create(
                slug='wt', defaults={
                    'slug': 'wt',
                    'name': 'Wild-type',
                })
            if created:
                self.logger.info('Created protein sequence type Wild-type')
        except:
            self.logger.error(
                'Failed creating protein sequence type Wild-type')

        # create protein
        p = Protein()
        p.family = family
        p.species = species
        p.source = source
        p.residue_numbering_scheme = residue_numbering_scheme
        p.sequence_type = sequence_type

        if accession:
            p.accession = accession
        p.entry_name = uniprot['entry_name'].lower()
        p.name = uniprot['names'][0]
        p.sequence = uniprot['sequence']

        try:
            p.save()
            self.logger.info('Created protein {}'.format(p.entry_name))
        except:
            self.logger.error('Failed creating protein {}'.format(
                p.entry_name))

        # protein aliases
        for i, alias in enumerate(uniprot['names']):
            pcan = Protein.objects.get(
                entry_name=uniprot['entry_name'].lower())
            a = ProteinAlias()
            a.protein = pcan
            a.name = alias
            a.position = i

            try:
                a.save()
                self.logger.info('Created protein alias ' + a.name +
                                 ' for protein ' + p.name)
            except:
                self.logger.error('Failed creating protein alias ' + a.name +
                                  ' for protein ' + p.name)

        # genes
        for i, gene in enumerate(uniprot['genes']):
            g = False
            try:
                g, created = Gene.objects.get_or_create(name=gene,
                                                        species=species,
                                                        position=i)
                if created:
                    self.logger.info('Created gene ' + g.name +
                                     ' for protein ' + p.name)
            except IntegrityError:
                g = Gene.objects.get(name=gene, species=species, position=i)

            if g:
                pcan = Protein.objects.get(
                    entry_name=uniprot['entry_name'].lower())
                g.proteins.add(pcan)

        # structures
        for i, structure in enumerate(uniprot['structures']):
            try:
                res = structure[1]
                if res == '-':
                    res = 0

                structure, created = SignprotStructure.objects.get_or_create(
                    PDB_code=structure[0], resolution=res)
                if created:
                    self.logger.info('Created structure ' +
                                     structure.PDB_code + ' for protein ' +
                                     p.name)
            except IntegrityError:
                self.logger.error('Failed creating structure ' +
                                  structure.PDB_code + ' for protein ' +
                                  p.name)

            if g:
                pcan = Protein.objects.get(
                    entry_name=uniprot['entry_name'].lower())
                structure.origin.add(pcan)
                structure.save()
コード例 #3
0
ファイル: build_g_proteins.py プロジェクト: protwis/protwis
    def cgn_creat_gproteins(self, family, residue_numbering_scheme, accession, uniprot):

        # get/create protein source
        try:
            source, created = ProteinSource.objects.get_or_create(name=uniprot['source'],
                defaults={'name': uniprot['source']})
            if created:
                self.logger.info('Created protein source ' + source.name)
        except IntegrityError:
            source = ProteinSource.objects.get(name=uniprot['source'])

        # get/create species
        try:
            species, created = Species.objects.get_or_create(latin_name=uniprot['species_latin_name'],
                defaults={
                'common_name': uniprot['species_common_name'],
                })
            if created:
                self.logger.info('Created species ' + species.latin_name)
        except IntegrityError:
            species = Species.objects.get(latin_name=uniprot['species_latin_name'])

        # get/create protein sequence type
        # Wild-type for all sequences from source file
        try:
            sequence_type, created = ProteinSequenceType.objects.get_or_create(slug='wt',
                defaults={
                'slug': 'wt',
                'name': 'Wild-type',
                })
            if created:
                self.logger.info('Created protein sequence type Wild-type')
        except:
                self.logger.error('Failed creating protein sequence type Wild-type')

        # create protein
        p = Protein()
        p.family = family
        p.species = species
        p.source = source
        p.residue_numbering_scheme = residue_numbering_scheme
        p.sequence_type = sequence_type

        if accession:
            p.accession = accession
        p.entry_name = uniprot['entry_name'].lower()
        p.name = uniprot['names'][0].split('Guanine nucleotide-binding protein ')[1]
        p.sequence = uniprot['sequence']

        try:
            p.save()
            self.logger.info('Created protein {}'.format(p.entry_name))
        except:
            self.logger.error('Failed creating protein {}'.format(p.entry_name))

        # protein aliases
        for i, alias in enumerate(uniprot['names']):
            pcgn = Protein.objects.get(entry_name=uniprot['entry_name'].lower())
            a = ProteinAlias()
            a.protein = pcgn
            a.name = alias
            a.position = i

            try:
                a.save()
                self.logger.info('Created protein alias ' + a.name + ' for protein ' + p.name)
            except:
                self.logger.error('Failed creating protein alias ' + a.name + ' for protein ' + p.name)

        # genes
        for i, gene in enumerate(uniprot['genes']):
            g = False
            try:
                g, created = Gene.objects.get_or_create(name=gene, species=species, position=i)
                if created:
                    self.logger.info('Created gene ' + g.name + ' for protein ' + p.name)
            except IntegrityError:
                g = Gene.objects.get(name=gene, species=species, position=i)

            if g:
                pcgn = Protein.objects.get(entry_name=uniprot['entry_name'].lower())
                g.proteins.add(pcgn)

        # structures
        for i, structure in enumerate(uniprot['structures']):
            try:
                res = structure[1]
                if res == '-':
                    res = 0
    
                structure, created = SignprotStructure.objects.get_or_create(PDB_code=structure[0], resolution=res)
                if created:
                    self.logger.info('Created structure ' + structure.PDB_code + ' for protein ' + p.name)
            except IntegrityError:
                self.logger.error('Failed creating structure ' + structure.PDB_code + ' for protein ' + p.name)

            if g:
                pcgn = Protein.objects.get(entry_name=uniprot['entry_name'].lower())
                structure.origin.add(pcgn)
                structure.save()
コード例 #4
0
    def create_protein(self, name, family, sequence_type,
                       residue_numbering_scheme, accession, uniprot):
        # get/create protein source
        try:
            source, created = ProteinSource.objects.get_or_create(
                name=uniprot['source'], defaults={'name': uniprot['source']})
            if created:
                self.logger.info('Created protein source ' + source.name)
        except IntegrityError:
            source = ProteinSource.objects.get(name=uniprot['source'])

        # get/create species
        try:
            species, created = Species.objects.get_or_create(
                latin_name=uniprot['species_latin_name'],
                defaults={
                    'common_name': uniprot['species_common_name'],
                })
            if created:
                self.logger.info('Created species ' + species.latin_name)
        except IntegrityError:
            species = Species.objects.get(
                latin_name=uniprot['species_latin_name'])

        # create protein
        p = Protein()
        p.family = family
        p.species = species
        p.source = source
        p.residue_numbering_scheme = residue_numbering_scheme
        p.sequence_type = sequence_type
        if accession:
            p.accession = accession
        p.entry_name = uniprot['entry_name']
        p.name = name
        p.sequence = uniprot['sequence']

        try:
            p.save()
            self.logger.info('Created protein {}'.format(p.entry_name))
        except:
            self.logger.error('Failed creating protein {}'.format(
                p.entry_name))

        # protein conformations
        try:
            ps, created = ProteinState.objects.get_or_create(
                slug=settings.DEFAULT_PROTEIN_STATE,
                defaults={'name': settings.DEFAULT_PROTEIN_STATE.title()})
        except IntegrityError:
            ps = ProteinState.objects.get(slug=settings.DEFAULT_PROTEIN_STATE)

        pc = ProteinConformation.objects.create(protein=p, state=ps)

        # protein aliases
        for i, alias in enumerate(uniprot['names']):
            a = ProteinAlias()
            a.protein = p
            a.name = alias
            a.position = i

            try:
                a.save()
                self.logger.info('Created protein alias ' + a.name +
                                 ' for protein ' + p.name)
            except:
                self.logger.error('Failed creating protein alias ' + a.name +
                                  ' for protein ' + p.name)

        # genes
        for i, gene in enumerate(uniprot['genes']):
            g = False
            try:
                g, created = Gene.objects.get_or_create(name=gene,
                                                        species=species,
                                                        position=i)
                if created:
                    self.logger.info('Created gene ' + g.name +
                                     ' for protein ' + p.name)
            except IntegrityError:
                g = Gene.objects.get(name=gene, species=species, position=i)

            if g:
                g.proteins.add(p)
コード例 #5
0
    def create_protein(self, name, family, sequence_type, residue_numbering_scheme, accession, uniprot):
        # get/create protein source
        try:
            source, created = ProteinSource.objects.get_or_create(name=uniprot['source'],
                defaults={'name': uniprot['source']})
            if created:
                self.logger.info('Created protein source ' + source.name)
        except IntegrityError:
            source = ProteinSource.objects.get(name=uniprot['source'])

        # get/create species
        try:
            species, created = Species.objects.get_or_create(latin_name=uniprot['species_latin_name'],
                defaults={
                'common_name': uniprot['species_common_name'],
                })
            if created:
                self.logger.info('Created species ' + species.latin_name)
        except IntegrityError:
            species = Species.objects.get(latin_name=uniprot['species_latin_name'])

        # create protein
        p = Protein()
        p.family = family
        p.species = species
        p.source = source
        p.residue_numbering_scheme = residue_numbering_scheme
        p.sequence_type = sequence_type
        if accession:
            p.accession = accession
        p.entry_name = uniprot['entry_name']
        p.name = name
        p.sequence = uniprot['sequence']

        try:
            p.save()
            self.logger.info('Created protein {}'.format(p.entry_name))
        except Exception as e:
            self.logger.error('Failed creating protein {} {}'.format(p.entry_name, str(e)))

        # protein conformations
        try:
            ps, created = ProteinState.objects.get_or_create(slug=settings.DEFAULT_PROTEIN_STATE,
                defaults={'name': settings.DEFAULT_PROTEIN_STATE.title()})
        except IntegrityError:
            ps = ProteinState.objects.get(slug=settings.DEFAULT_PROTEIN_STATE)

        pc = ProteinConformation.objects.create(protein=p, state=ps)

        # protein aliases
        for i, alias in enumerate(uniprot['names']):
            a = ProteinAlias()
            a.protein = p
            a.name = alias
            a.position = i

            try:
                a.save()
                self.logger.info('Created protein alias ' + a.name + ' for protein ' + p.name)
            except:
                self.logger.error('Failed creating protein alias ' + a.name + ' for protein ' + p.name)

        # genes
        for i, gene in enumerate(uniprot['genes']):
            g = False
            try:
                g, created = Gene.objects.get_or_create(name=gene, species=species, position=i)
                if created:
                    self.logger.info('Created gene ' + g.name + ' for protein ' + p.name)
            except IntegrityError:
                g = Gene.objects.get(name=gene, species=species, position=i)

            if g:
                g.proteins.add(p)
コード例 #6
0
    def handle(self, *args, **options):
        self.options = options
        if self.options['purge']:
            Residue.objects.filter(
                protein_conformation__protein__entry_name__endswith='_a',
                protein_conformation__protein__family__parent__parent__name=
                'Alpha').delete()
            ProteinConformation.objects.filter(
                protein__entry_name__endswith='_a',
                protein__family__parent__parent__name='Alpha').delete()
            Protein.objects.filter(
                entry_name__endswith='_a',
                family__parent__parent__name='Alpha').delete()
            SignprotStructureExtraProteins.objects.all().delete()
            SignprotStructure.objects.all().delete()

        if not options['only_signprot_structures']:
            # Building protein and protconf objects for g protein structure in complex
            scs = SignprotComplex.objects.all()
            for sc in scs:
                self.logger.info(
                    'Protein, ProteinConformation and Residue build for alpha subunit of {} is building'
                    .format(sc))
                try:
                    # Alpha subunit
                    try:
                        alpha_protein = Protein.objects.get(
                            entry_name=sc.structure.pdb_code.index.lower() +
                            '_a')
                    except:
                        alpha_protein = Protein()
                        alpha_protein.entry_name = sc.structure.pdb_code.index.lower(
                        ) + '_a'
                        alpha_protein.accession = None
                        alpha_protein.name = sc.structure.pdb_code.index.lower(
                        ) + '_a'
                        alpha_protein.sequence = sc.protein.sequence
                        alpha_protein.family = sc.protein.family
                        alpha_protein.parent = sc.protein
                        alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme
                        alpha_protein.sequence_type = ProteinSequenceType.objects.get(
                            slug='mod')
                        alpha_protein.source = ProteinSource.objects.get(
                            name='OTHER')
                        alpha_protein.species = sc.protein.species
                        alpha_protein.save()

                    try:
                        alpha_protconf = ProteinConformation.objects.get(
                            protein__entry_name=sc.structure.pdb_code.index.
                            lower() + '_a')
                    except:
                        alpha_protconf = ProteinConformation()
                        alpha_protconf.protein = alpha_protein
                        alpha_protconf.state = ProteinState.objects.get(
                            slug='active')
                        alpha_protconf.save()

                    pdbp = PDBParser(PERMISSIVE=True, QUIET=True)
                    s = pdbp.get_structure('struct',
                                           StringIO(sc.structure.pdb_data.pdb))
                    chain = s[0][sc.alpha]
                    nums = []
                    for res in chain:
                        try:
                            res['CA']
                            nums.append(res.get_id()[1])
                        except:
                            pass

                    resis = Residue.objects.filter(
                        protein_conformation__protein=sc.protein)
                    num_i = 0
                    temp_seq2 = ''
                    pdb_num_dict = OrderedDict()
                    # Create first alignment based on sequence numbers
                    for n in nums:
                        if sc.structure.pdb_code.index == '6OIJ' and n < 30:
                            nr = n + 6
                        else:
                            nr = n
                        pdb_num_dict[n] = [
                            chain[n], resis.get(sequence_number=nr)
                        ]
                    # Find mismatches
                    mismatches = []
                    for n, res in pdb_num_dict.items():
                        if AA[res[0].get_resname()] != res[1].amino_acid:
                            mismatches.append(res)

                    pdb_lines = sc.structure.pdb_data.pdb.split('\n')
                    seqadv = []
                    for l in pdb_lines:
                        if l.startswith('SEQADV'):
                            seqadv.append(l)
                    mutations, shifted_mutations = OrderedDict(), OrderedDict()
                    # Search for annotated engineered mutations in pdb SEQADV
                    for s in seqadv:
                        line_search = re.search(
                            'SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)',
                            s)
                        if line_search != None:
                            if line_search.group(2) == sc.alpha:
                                if line_search.group(
                                        4).strip() == sc.protein.accession:
                                    if line_search.group(
                                            3) == line_search.group(6):
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                                    else:
                                        shifted_mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5),
                                                int(line_search.group(6))
                                            ]
                                else:
                                    # Exception for 6G79
                                    if line_search.group(
                                            3
                                    ) != line_search.group(
                                            6
                                    ) and 'CONFLICT' in line_search.group(7):
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                                    # Exception for 5G53
                                    if line_search.group(
                                            4).strip() != sc.protein.accession:
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                    remaining_mismatches = []

                    # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation
                    for m in mismatches:
                        num = m[0].get_id()[1]
                        if num in mutations:
                            if m[0].get_resname() != mutations[num][0] and m[
                                    1].amino_acid != AA[mutations[num][1]]:
                                remaining_mismatches.append(m)
                        elif num in shifted_mutations:
                            remaining_mismatches.append(m)
                        else:
                            remaining_mismatches.append(m)

                    ### sanity check
                    # print(sc)
                    # print(mutations)
                    # print(shifted_mutations)
                    # print(mismatches)
                    # print('======')
                    # print(remaining_mismatches)
                    # pprint.pprint(pdb_num_dict)

                    # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment
                    if len(remaining_mismatches
                           ) > 0 and sc.structure.pdb_code.index not in [
                               '6OIJ', '6OY9', '6OYA', '6LPB', '6WHA'
                           ]:
                        ppb = PPBuilder()
                        seq = ''
                        for pp in ppb.build_peptides(chain, aa_only=False):
                            seq += str(pp.get_sequence())
                        pw2 = pairwise2.align.localms(sc.protein.sequence, seq,
                                                      2, -1, -.5, -.1)
                        ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1])
                        wt_pdb_dict = OrderedDict()
                        pdb_wt_dict = OrderedDict()
                        j, k = 0, 0
                        for i, ref, temp in zip(range(0, len(ref_seq)),
                                                ref_seq, temp_seq):
                            # print(i, ref, temp) # alignment check
                            if ref != '-' and temp != '-':
                                wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]]
                                pdb_wt_dict[pdb_num_dict[nums[k]]
                                            [0]] = resis[j]
                                j += 1
                                k += 1
                            elif ref == '-':
                                wt_pdb_dict[i] = pdb_num_dict[nums[k]]
                                pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i
                                k += 1
                            elif temp == '-':
                                wt_pdb_dict[resis[j]] = i
                                pdb_wt_dict[i] = resis[j]
                                j += 1
                        for i, r in enumerate(remaining_mismatches):
                            # Adjust for shifted residue when residue is a match
                            if r[0].get_id()[1] - remaining_mismatches[
                                    i - 1][0].get_id()[1] > 1:
                                pdb_num_dict[r[0].get_id()[1] -
                                             1][1] = pdb_wt_dict[chain[
                                                 r[0].get_id()[1] - 1]]
                            # Adjust for shifted residue when residue is mutated and it's logged in SEQADV
                            if r[0].get_id()[1] in shifted_mutations:
                                pdb_num_dict[r[0].get_id()[1]][1] = resis.get(
                                    sequence_number=shifted_mutations[
                                        r[0].get_id()[1]][2])
                            # Adjust for shift
                            else:
                                pdb_num_dict[r[0].get_id()
                                             [1]][1] = pdb_wt_dict[r[0]]
                    # Custom alignment fix for 6WHA mini-Gq/Gi2/Gs chimera
                    # elif sc.structure.pdb_code.index=='6WHA':
                    #     ref_seq  = 'MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMY--SHLVDYFPEYDGP----QRDAQAAREFILKMFVDL---NPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV'
                    #     temp_seq = '----------VSAEDKAAAERSKMIDKNLREDGEKARRTLRLLLLGADNSGKSTIVK----------------------------------------------------------------------------------------------------------------------------------GIFETKFQVDKVNFHMFDVG-----RRKWIQCFNDVTAIIFVVDSSDYNR----------LQEALNDFKSIWNNRWLRTISVILFLNKQDLLAEKVLAGKSKIEDYFPEFARYTTPDPRVTRAKY-FIRKEFVDISTASGDGRHICYPHFTC-VDTENARRIFNDCKDIILQMNLREYNLV'
                    #     for i, ref, temp in zip(range(0,len(ref_seq)), ref_seq, temp_seq):
                    #         print(i, ref, temp)
                    #     pprint.pprint(pdb_num_dict)

                    bulked_residues = []
                    for key, val in pdb_num_dict.items():
                        # print(key, val) # sanity check
                        if not isinstance(val[1], int):
                            res_obj = Residue()
                            res_obj.sequence_number = val[0].get_id()[1]
                            res_obj.amino_acid = AA[val[0].get_resname()]
                            res_obj.display_generic_number = val[
                                1].display_generic_number
                            res_obj.generic_number = val[1].generic_number
                            res_obj.protein_conformation = alpha_protconf
                            res_obj.protein_segment = val[1].protein_segment
                            bulked_residues.append(res_obj)
                        else:
                            self.logger.info(
                                'Skipped {} as no annotation was present, while building for alpha subunit of {}'
                                .format(val[1], sc))

                    Residue.objects.bulk_create(bulked_residues)
                    self.logger.info(
                        'Protein, ProteinConformation and Residue build for alpha subunit of {} is finished'
                        .format(sc))
                except Exception as msg:
                    #print('Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'.format(sc))
                    #print(msg)
                    #print(traceback.format_exc())
                    #exit(0)
                    self.logger.info(
                        'Protein, ProteinConformation and Residue build for alpha subunit of {} has failed'
                        .format(sc))

        ### Build SignprotStructure objects from non-complex signprots
        g_prot_alphas = Protein.objects.filter(
            family__slug__startswith='100_001',
            accession__isnull=False)  #.filter(entry_name='gnai1_human')
        complex_structures = SignprotComplex.objects.all().values_list(
            'structure__pdb_code__index', flat=True)
        for a in g_prot_alphas:
            pdb_list = get_pdb_ids(a.accession)
            for pdb in pdb_list:
                if pdb not in complex_structures:
                    try:
                        data = self.fetch_gprot_data(pdb, a)
                        if data:
                            self.build_g_prot_struct(a, pdb, data)
                    except Exception as msg:
                        self.logger.error(
                            'SignprotStructure of {} {} failed\n{}: {}'.format(
                                a.entry_name, pdb, type(msg), msg))
コード例 #7
0
    def handle(self, *args, **options):
        startTime = datetime.datetime.now()
        self.options = options
        if self.options["purge"]:
            Residue.objects.filter(
                protein_conformation__protein__entry_name__endswith="_a",
                protein_conformation__protein__family__parent__parent__name=
                "Alpha").delete()
            ProteinConformation.objects.filter(
                protein__entry_name__endswith="_a",
                protein__family__parent__parent__name="Alpha").delete()
            Protein.objects.filter(
                entry_name__endswith="_a",
                family__parent__parent__name="Alpha").delete()
            SignprotStructureExtraProteins.objects.all().delete()
            SignprotStructure.objects.all().delete()

        if not options["only_signprot_structures"]:
            # Building protein and protconf objects for g protein structure in complex
            if options["s"]:
                scs = SignprotComplex.objects.filter(
                    structure__pdb_code__index__in=[
                        i.upper() for i in options["s"]
                    ])
            else:
                scs = SignprotComplex.objects.all()
            for sc in scs:
                self.logger.info(
                    "Protein, ProteinConformation and Residue build for alpha subunit of {} is building"
                    .format(sc))
                try:
                    # Alpha subunit
                    try:
                        alpha_protein = Protein.objects.get(
                            entry_name=sc.structure.pdb_code.index.lower() +
                            "_a")
                    except:
                        alpha_protein = Protein()
                        alpha_protein.entry_name = sc.structure.pdb_code.index.lower(
                        ) + "_a"
                        alpha_protein.accession = None
                        alpha_protein.name = sc.structure.pdb_code.index.lower(
                        ) + "_a"
                        alpha_protein.sequence = sc.protein.sequence
                        alpha_protein.family = sc.protein.family
                        alpha_protein.parent = sc.protein
                        alpha_protein.residue_numbering_scheme = sc.protein.residue_numbering_scheme
                        alpha_protein.sequence_type = ProteinSequenceType.objects.get(
                            slug="mod")
                        alpha_protein.source = ProteinSource.objects.get(
                            name="OTHER")
                        alpha_protein.species = sc.protein.species
                        alpha_protein.save()

                    try:
                        alpha_protconf = ProteinConformation.objects.get(
                            protein__entry_name=sc.structure.pdb_code.index.
                            lower() + "_a")
                    except:
                        alpha_protconf = ProteinConformation()
                        alpha_protconf.protein = alpha_protein
                        alpha_protconf.state = ProteinState.objects.get(
                            slug="active")
                        alpha_protconf.save()

                    pdbp = PDBParser(PERMISSIVE=True, QUIET=True)
                    s = pdbp.get_structure("struct",
                                           StringIO(sc.structure.pdb_data.pdb))
                    chain = s[0][sc.alpha]
                    nums = []
                    for res in chain:
                        if "CA" in res and res.id[0] == " ":
                            nums.append(res.get_id()[1])

                    resis = Residue.objects.filter(
                        protein_conformation__protein=sc.protein)
                    num_i = 0
                    temp_seq2 = ""
                    pdb_num_dict = OrderedDict()
                    # Create first alignment based on sequence numbers
                    for n in nums:
                        if sc.structure.pdb_code.index == "6OIJ" and n < 30:
                            nr = n + 6
                        else:
                            nr = n
                        pdb_num_dict[n] = [
                            chain[n], resis.get(sequence_number=nr)
                        ]
                    # Find mismatches
                    mismatches = []
                    for n, res in pdb_num_dict.items():
                        if AA[res[0].get_resname()] != res[1].amino_acid:
                            mismatches.append(res)

                    pdb_lines = sc.structure.pdb_data.pdb.split("\n")
                    seqadv = []
                    for l in pdb_lines:
                        if l.startswith("SEQADV"):
                            seqadv.append(l)
                    mutations, shifted_mutations = OrderedDict(), OrderedDict()
                    # Search for annotated engineered mutations in pdb SEQADV
                    for s in seqadv:
                        line_search = re.search(
                            "SEQADV\s{1}[A-Z\s\d]{4}\s{1}([A-Z]{3})\s{1}([A-Z]{1})\s+(\d+)[\s\S\d]{5}([\s\S\d]{12})([A-Z]{3})\s+(\d+)(\s\S+)",
                            s)
                        if line_search != None:
                            if line_search.group(2) == sc.alpha:
                                if line_search.group(
                                        4).strip() == sc.protein.accession:
                                    if line_search.group(
                                            3) == line_search.group(6):
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                                    else:
                                        shifted_mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5),
                                                int(line_search.group(6))
                                            ]
                                else:
                                    # Exception for 6G79
                                    if line_search.group(
                                            3
                                    ) != line_search.group(
                                            6
                                    ) and "CONFLICT" in line_search.group(7):
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                                    # Exception for 5G53
                                    if line_search.group(
                                            4).strip() != sc.protein.accession:
                                        mutations[int(
                                            line_search.group(3))] = [
                                                line_search.group(1),
                                                line_search.group(5)
                                            ]
                    remaining_mismatches = []

                    # Check and clear mismatches that are registered in pdb SEQADV as engineered mutation
                    for m in mismatches:
                        num = m[0].get_id()[1]
                        if num in mutations:
                            if m[0].get_resname() != mutations[num][0] and m[
                                    1].amino_acid != AA[mutations[num][1]]:
                                remaining_mismatches.append(m)
                        elif num in shifted_mutations:
                            remaining_mismatches.append(m)
                        else:
                            remaining_mismatches.append(m)

                    if options["debug"]:
                        print(sc)
                        print(mutations)
                        print(shifted_mutations)
                        print(mismatches)
                        print("======")
                        print(remaining_mismatches)
                        pprint.pprint(pdb_num_dict)

                    no_seqnum_shift = [
                        '6OY9', '6OYA', '6LPB', '6WHA', '7D77', '6XOX', '7L1U',
                        '7L1V'
                    ]

                    # Check if HN is mutated to GNAI1 for the scFv16 stabilizer
                    if sc.protein.entry_name != 'gnai1_human' and len(
                            remaining_mismatches) > 0:
                        target_HN = resis.filter(protein_segment__slug='HN')
                        gnai1_HN = Residue.objects.filter(
                            protein_conformation__protein__entry_name=
                            'gnai1_human',
                            protein_segment__slug='HN')
                        pdb_HN_seq = ''
                        for num, val in pdb_num_dict.items():
                            if num <= target_HN.reverse()[0].sequence_number:
                                pdb_HN_seq += Polypeptide.three_to_one(
                                    val[0].get_resname())
                        if options['debug']:
                            print('Checking if HN is gnai1_human')
                            print(pdb_HN_seq)
                            print(''.join(
                                gnai1_HN.values_list('amino_acid', flat=True)))
                        gnai1_HN_seq = ''.join(
                            gnai1_HN.values_list('amino_acid', flat=True))
                        pw2 = pairwise2.align.localms(gnai1_HN_seq, pdb_HN_seq,
                                                      3, -4, -3, -1)
                        ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1])
                        length, match = 0, 0
                        for r, t in zip(ref_seq, temp_seq):
                            if options['debug']:
                                print(r, t)
                            if t != '-':
                                if r == t:
                                    match += 1
                                length += 1
                        identity = match / length * 100
                        if options['debug']:
                            print(identity)
                        if identity > 85:
                            if sc.structure.pdb_code.index not in ['7DFL']:
                                no_seqnum_shift.append(
                                    sc.structure.pdb_code.index)
                            if options['debug']:
                                print(
                                    'INFO: HN has {}% with gnai1_human HN, skipping seqnum shift correction'
                                    .format(round(identity)))

                    # Mismatches remained possibly to seqnumber shift, making pairwise alignment to try and fix alignment
                    if len(
                            remaining_mismatches
                    ) > 0 and sc.structure.pdb_code.index not in no_seqnum_shift:
                        ppb = PPBuilder()
                        seq = ""
                        for pp in ppb.build_peptides(chain, aa_only=False):
                            seq += str(pp.get_sequence())
                        if sc.structure.pdb_code.index in [
                                '7JVQ', '7L1U', '7L1V'
                        ]:
                            pw2 = pairwise2.align.localms(
                                sc.protein.sequence, seq, 3, -4, -3, -1)
                        else:
                            pw2 = pairwise2.align.localms(
                                sc.protein.sequence, seq, 2, -1, -.5, -.1)
                        ref_seq, temp_seq = str(pw2[0][0]), str(pw2[0][1])

                        # Custom fix for A->G mutation at pos 18
                        if sc.structure.pdb_code.index == '7JJO':
                            ref_seq = ref_seq[:18] + ref_seq[19:]
                            temp_seq = temp_seq[:17] + temp_seq[18:]
                        # Custom alignment fixes
                        elif sc.structure.pdb_code.index == '7DFL':
                            ref_seq = 'MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMYSHLVDYFPEYDGPQRDAQAAREFILKMFVDLNPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV'
                            temp_seq = '--------CTLSAEDKAAVERSKMIDRNLREDGEKARRELKLLLLGTGESGKSTFIKQMRIIHG--------------------------------------------------------------------------------------------------------------------------TGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQV----DNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIMYSHLVDYFPEYDGPQRDAQAAREFILKMFVDLNPDSDKILYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV'
                        elif sc.structure.pdb_code.index == '7JOZ':
                            temp_seq = temp_seq[:67] + (
                                '-' * 14) + 'FNGDS' + temp_seq[86:]
                        elif sc.structure.pdb_code.index == '7AUE':
                            ref_seq = ref_seq[:31].replace('-',
                                                           '') + ref_seq[31:]
                            temp_seq = (
                                9 *
                                '-') + temp_seq[2:5] + temp_seq[5:54].replace(
                                    '-', '') + temp_seq[54:]
                        wt_pdb_dict = OrderedDict()
                        pdb_wt_dict = OrderedDict()
                        j, k = 0, 0
                        for i, ref, temp in zip(range(0, len(ref_seq)),
                                                ref_seq, temp_seq):
                            if options["debug"]:
                                print(i, ref, temp)  # alignment check
                            if ref != "-" and temp != "-":
                                wt_pdb_dict[resis[j]] = pdb_num_dict[nums[k]]
                                pdb_wt_dict[pdb_num_dict[nums[k]]
                                            [0]] = resis[j]
                                j += 1
                                k += 1
                            elif ref == "-":
                                wt_pdb_dict[i] = pdb_num_dict[nums[k]]
                                pdb_wt_dict[pdb_num_dict[nums[k]][0]] = i
                                k += 1
                            elif temp == "-":
                                wt_pdb_dict[resis[j]] = i
                                pdb_wt_dict[i] = resis[j]
                                j += 1
                        # Custom fix for 7JJO isoform difference
                        if sc.structure.pdb_code.index in [
                                '7JJO', '7JOZ', '7AUE'
                        ]:
                            pdb_num_dict = OrderedDict()
                            for wt_res, st_res in wt_pdb_dict.items():
                                if type(st_res) == type([]):
                                    pdb_num_dict[wt_res.sequence_number] = [
                                        st_res[0], wt_res
                                    ]
                        else:
                            for i, r in enumerate(remaining_mismatches):
                                # Adjust for shifted residue when residue is a match
                                if r[0].get_id()[1] - remaining_mismatches[
                                        i - 1][0].get_id()[1] > 1:
                                    pdb_num_dict[r[0].get_id()[1] -
                                                 1][1] = pdb_wt_dict[chain[
                                                     r[0].get_id()[1] - 1]]
                                # Adjust for shifted residue when residue is mutated and it's logged in SEQADV
                                if r[0].get_id()[1] in shifted_mutations:
                                    pdb_num_dict[
                                        r[0].get_id()[1]][1] = resis.get(
                                            sequence_number=shifted_mutations[
                                                r[0].get_id()[1]][2])
                                # Adjust for shift
                                else:
                                    pdb_num_dict[r[0].get_id()
                                                 [1]][1] = pdb_wt_dict[r[0]]
                            if sc.structure.pdb_code.index == '7JVQ':
                                pdb_num_dict[198][1] = Residue.objects.get(
                                    protein_conformation__protein=sc.protein,
                                    sequence_number=346)
                                pdb_num_dict[235][1] = Residue.objects.get(
                                    protein_conformation__protein=sc.protein,
                                    sequence_number=383)
                            elif sc.structure.pdb_code.index == '6PB0':
                                pdb_num_dict[205][1] = Residue.objects.get(
                                    protein_conformation__protein=sc.protein,
                                    sequence_number=205)
                    ### Custom alignment fix for 6WHA mini-Gq/Gi2/Gs chimera
                    elif sc.structure.pdb_code.index == "6WHA":
                        ref_seq = "MTLESIMACCLSEEAKEARRINDEIERQLRRDKRDARRELKLLLLGTGESGKSTFIKQMRIIHGSGYSDEDKRGFTKLVYQNIFTAMQAMIRAMDTLKIPYKYEHNKAHAQLVREVDVEKVSAFENPYVDAIKSLWNDPGIQECYDRRREYQLSDSTKYYLNDLDRVADPAYLPTQQDVLRVRVPTTGIIEYPFDLQSVIFRMVDVGGQRSERRKWIHCFENVTSIMFLVALSEYDQVLVESDNENRMEESKALFRTIITYPWFQNSSVILFLNKKDLLEEKIM--YSHLVDYFPEYDGP----QRDAQAAREFILKMFVDL---NPDSDKIIYSHFTCATDTENIRFVFAAVKDTILQLNLKEYNLV"
                        temp_seq = "----------VSAEDKAAAERSKMIDKNLREDGEKARRTLRLLLLGADNSGKSTIVK----------------------------------------------------------------------------------------------------------------------------------GIFETKFQVDKVNFHMFDVG-----RRKWIQCFNDVTAIIFVVDSSDYNR----------LQEALNDFKSIWNNRWLRTISVILFLNKQDLLAEKVLAGKSKIEDYFPEFARYTTPDPRVTRAKY-FIRKEFVDISTASGDGRHICYPHFTC-VDTENARRIFNDCKDIILQMNLREYNLV"
                        pdb_num_dict = OrderedDict()
                        temp_resis = [res for res in chain]
                        temp_i = 0
                        mapped_cgns = []
                        for i, aa in enumerate(temp_seq):
                            if aa != "-":
                                ref_split_on_gaps = ref_seq[:i + 1].split("-")
                                ref_seqnum = i - (len(ref_split_on_gaps) -
                                                  1) + 1
                                res = resis.get(sequence_number=ref_seqnum)
                                if res.display_generic_number.label in mapped_cgns:
                                    next_presumed_cgn = self.get_next_presumed_cgn(
                                        res)
                                    if next_presumed_cgn:
                                        res = next_presumed_cgn
                                        while res and res.display_generic_number.label in mapped_cgns:
                                            res = self.get_next_presumed_cgn(
                                                res)
                                    else:
                                        print(
                                            "Error: {} CGN does not exist. Incorrect mapping of {} in {}"
                                            .format(next_presumed_cgn,
                                                    chain[nums[temp_i]],
                                                    sc.structure))
                                mapped_cgns.append(
                                    res.display_generic_number.label)
                                pdb_num_dict[nums[temp_i]] = [
                                    chain[nums[temp_i]], res
                                ]
                                temp_i += 1

                    bulked_rotamers = []
                    for key, val in pdb_num_dict.items():
                        # print(key, val) # sanity check
                        if not isinstance(val[1], int):
                            res_obj = Residue()
                            res_obj.sequence_number = val[0].get_id()[1]
                            res_obj.amino_acid = AA[val[0].get_resname()]
                            res_obj.display_generic_number = val[
                                1].display_generic_number
                            res_obj.generic_number = val[1].generic_number
                            res_obj.protein_conformation = alpha_protconf
                            res_obj.protein_segment = val[1].protein_segment
                            res_obj.save()
                            rot = self.create_structure_rotamer(
                                val[0], res_obj, sc.structure)
                            bulked_rotamers.append(rot)
                        else:
                            self.logger.info(
                                "Skipped {} as no annotation was present, while building for alpha subunit of {}"
                                .format(val[1], sc))
                    if options["debug"]:
                        pprint.pprint(pdb_num_dict)
                    Rotamer.objects.bulk_create(bulked_rotamers)
                    self.logger.info(
                        "Protein, ProteinConformation and Residue build for alpha subunit of {} is finished"
                        .format(sc))
                except Exception as msg:
                    if options["debug"]:
                        print("Error: ", sc, msg)
                    self.logger.info(
                        "Protein, ProteinConformation and Residue build for alpha subunit of {} has failed"
                        .format(sc))

        if not options["s"]:
            ### Build SignprotStructure objects from non-complex signprots
            g_prot_alphas = Protein.objects.filter(
                family__slug__startswith="100_001",
                accession__isnull=False)  #.filter(entry_name="gnai1_human")
            complex_structures = SignprotComplex.objects.all().values_list(
                "structure__pdb_code__index", flat=True)
            for a in g_prot_alphas:
                pdb_list = get_pdb_ids(a.accession)
                for pdb in pdb_list:
                    if pdb not in complex_structures:
                        try:
                            data = self.fetch_gprot_data(pdb, a)
                            if data:
                                self.build_g_prot_struct(a, pdb, data)
                        except Exception as msg:
                            self.logger.error(
                                "SignprotStructure of {} {} failed\n{}: {}".
                                format(a.entry_name, pdb, type(msg), msg))

        if options["debug"]:
            print(datetime.datetime.now() - startTime)