Example #1
0
def add_hydrogens(config, path, inp, out):
    #Initialize
    from SBI.structure import PDB
    import shutil
    src_path = config.get('Paths', 'modppi_path')
    hbplus = config.get('Paths', 'hbplus_path')
    reduce_exe = config.get('Paths', 'reduce_path')
    reduce_db = config.get('Paths', 'reduce_db_path')
    relax_exe = config.get('Paths', 'relax_exe')
    hydrogen_type = config.get('Parameters', 'hydrogens')
    relax = config.get('Parameters', 'relax')
    cwd = os.getcwd()
    os.chdir(path)
    if fileExist(inp):
        if len(inp.split('.')) > 0:
            output_hbplus = ".".join(inp.split('.')[:-1]) + ".h"
        else:
            output_hbplus = inp.strip() + ".h"
        if hydrogen_type == "full":
            os.system("%s -Quiet %s -DB %s> %s" %
                      (reduce_exe, inp, reduce_db, output_hbplus))
        else:
            os.system("%s -o %s >& hbplus.log" % (hbplus, inp))
        if relax == "yes":
            sys.stdout.write(
                "\t\t\t-- Relaxing the hydrogen-intermediate model %s (see Rosetta output in relax.log and score.sc)...\n"
                % output_hbplus)
            os.system(
                "%s -s %s -in:file:fullatom -nstruct 1  -packing:repack_only >& relax.log"
                % (relax_exe, output_hbplus))
            opt_model = ".".join(output_hbplus.split('.')[:-1]) + "_0001.pdb"
            old_model = ".".join(
                output_hbplus.split('.')[:-1]) + "_non_optimized.pdb"
            shutil.move(output_hbplus, old_model)
            if fileExist(opt_model):
                check_pdb = PDB(opt_model)
                if check_pdb.has_protein:
                    check_pdb.clean()
                    check_pdb.write(output_hbplus)
                    try:
                        sys.path.remove(opt_model)
                    except:
                        sys.stdout.write("\t\t\t-- Keeping old file %s ...\n" %
                                         opt_model)
                else:
                    shutil.copy(old_model, output_hbplus)
            else:
                shutil.copy(old_model, output_hbplus)
        if not fileExist(output_hbplus):
            raise ValueError("Cannot find file with hydrogen atoms")
        else:
            pdb = PDB(output_hbplus)
            pdb.clean()
            pdb.write(out, force=True)
    os.chdir(cwd)
Example #2
0
def main():
    #Initialize
    options = parse_user_arguments()
    verbose = options.show
    pdb_path = os.path.join(config.get('Paths', 'modppi_path'),
                            config.get('Paths', 'pdb_path'))
    dummy_dir = options.dummy_dir
    try:
        did_path = os.path.join(config.get('Paths', 'modppi_path'),
                                config.get('Paths', '3did_path'))
        data_path = os.path.join(config.get('Paths', 'modppi_path'),
                                 config.get('Paths', 'data_path'))
    except:
        did_path = options.outdir
        data_path = options.outdir

    if not os.path.exists(did_path):
        os.makedirs(did_path)
    if not os.path.exists(dummy_dir):
        os.makedirs(dummy_dir)
    if not os.path.exists(data_path):
        sys.stderr.write(
            "No DATA directory, please check your installation or INPUT\n")

    #Parse did flat file
    did = parse_3did(options)

    #Create PDB files of 3DiD interactions
    for dd, cases in did.iteritems():
        for label in xrange(0, len(cases)):
            #Define the name of the PDB output file with domain-domain interactions
            did_file = os.path.join(
                did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk.gz")
            if not os.path.exists(did_file.lower()):
                did_file = os.path.join(
                    did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk")
            if not os.path.exists(did_file.lower()):
                if verbose:
                    sys.stderr.write("\t\t--Create %s\n" % (did_file.lower()))
                pdb_code, d1, d2 = cases[label]
                pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(),
                                        "pdb" + pdb_code + ".ent")
                if not os.path.exists(pdb_file):
                    pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(),
                                            "pdb" + pdb_code + ".ent.gz")
                if not os.path.exists(pdb_file):
                    if verbose:
                        sys.stderr.write("\t\t\t-- %s not found\n" % pdb_file)
                    continue
                try:
                    pdb = PDB(pdb_file)
                    brk = PDB()
                    pdb_chain_A = pdb.get_chain_by_id(d1[0])
                    start_A = d1[1]
                    end_A = d1[2]
                    pdb_chain_B = pdb.get_chain_by_id(d2[0])
                    start_B = d2[1]
                    end_B = d2[2]
                    brk_chain_A = pdb_chain_A.extract(init=start_A, end=end_A)
                    brk_chain_A.chain = "A"
                    brk.add_chain(brk_chain_A)
                    brk_chain_B = pdb_chain_B.extract(init=start_B, end=end_B)
                    brk_chain_B.chain = "B"
                    brk.add_chain(brk_chain_B)
                    brk.clean()
                    brk.write(did_file.lower())
                except Exception as e:
                    if verbose: sys.stderr.write("\t\t\t  Error: %s\n" % e)
                    continue

    #Create list of interactions and FASTA sequences of 3DiD
    did_interactions = open(os.path.join(data_path, options.interactions_file),
                            "w")
    did_fasta = open(os.path.join(data_path, options.seq_file), "w")
    for brk in os.listdir(did_path):
        if verbose:
            sys.stderr.write("\t\t-- Reading %s  \n" %
                             os.path.join(did_path, brk))
        try:
            pdb = PDB(os.path.join(did_path, brk))
            id_chain = []
            for c in pdb.chain_identifiers:
                pdb_chain = pdb.get_chain_by_id(c)
                id_chain.append(pdb.id + "_" + c)
                printfasta(did_fasta, pdb.id + "_" + c,
                           pdb_chain.gapped_protein_sequence)
            did_interactions.write("%s\t%s\n" % (id_chain[0], id_chain[1]))
        except Exception as e:
            if verbose:
                sys.stderr.write(
                    "\t\t-- %s cannot be read\n\t\t   Error: %s\n" %
                    (os.path.join(did_path, brk), e))
            continue
    did_interactions.close()
    did_fasta.close()
Example #3
0
            ofile.parent.mkdir(parents=True, exist_ok=True)
            structure, _ = TButil.build_pdb_object(self.log,
                                                   case.ordered_structures, 2)

            self.log.notice(f'Writing structure {ofile}')
            structure.write(output_file=str(ofile),
                            format='pdb',
                            clean=True,
                            force=TBcore.get_option('system', 'overwrite'))

            if self.motif and binder is not None:
                bfile.parent.mkdir(parents=True, exist_ok=True)
                self.log.notice(f'Writing binder {bfile}')
                fl_binder_ali.write(output_file=str(bfile),
                                    format='pdb',
                                    clean=True,
                                    force=TBcore.get_option(
                                        'system', 'overwrite'))

                ifile.parent.mkdir(parents=True, exist_ok=True)
                self.log.notice(f'Writing input {ifile}')
                initial.write(output_file=str(ifile),
                              format='pdb',
                              clean=True,
                              force=TBcore.get_option('system', 'overwrite'))

        return case

    def make_structure(self, sse: Dict, pick_aa: Optional[str] = None) -> Case:
        """
        """
Example #4
0
        #full_structure = [pdb,]
        binders = []
        for key in case.data['metadata']['binder']:
            binder = case.data['metadata']['binder'][key]
            binders.append(binder)
            binderfile = os.path.dirname(str(pdb_file)) + f'/binder_{key}.pdb'
            binder_chains.extend(
                binder['auth_asym_id'].drop_duplicates().tolist())
            #full_structure.append(binder)
            log.debug(f'Adding binder chains: {binder_chains}')

        bindersfile = os.path.dirname(str(pdb_file)) + f'/binders.pdb'
        log.notice(f'Writing structure {bindersfile}')
        binders = PDB(pd.concat(binders, sort=False))
        binders.write(bindersfile,
                      format='pdb',
                      clean=True,
                      force=TBcore.get_option('system', 'overwrite'))

        full_structure = PDB(
            pd.concat([pdb[columns], binders[columns]], sort=False))
        log.notice(f'Writing structure {full_file}')
        full_structure.write(str(full_file),
                             format='pdb',
                             clean=True,
                             force=TBcore.get_option('system', 'overwrite'))
    #else:
    #    pdb.write(str(pdb_file), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite'))

    # Push back hotspots, motif seq num and binder chain
    if binder_chains == []: binder_chains = None
    if res_attach == []: res_attach = None
Example #5
0
def modelling(queriesA_original,queriesB_original,queriesA, queriesB, hit_items_A, hit_items_B, sections_modeled, remaining_sections_A, remaining_sections_B, options):
    # Initialize
    verbose = options.show
    output_dir = options.outdir
    dummy_dir = options.dummy_dir
    hydrogens = options.hbplus
    force_model =options.force
    python_path = config.get('Paths', 'python_path')
    src_path = config.get('Paths','modppi_path')
    modeller_path = os.path.join(config.get('Paths', 'modeller_path'))
    modpy_path = os.path.join(src_path, config.get('Paths', 'functions_path'),"modpy")
    numMod= options.nmodels
    renumerate = options.renumerate


    # Assign the PID to the dummy modeling and avoid overwritting files 
    
    modelling_dummy_name = 'modelling_' + str(os.getpid()) + str(random.randint(0,os.getpid()))
    #modelling_dummy_name = 'modelling_' + str(os.getpid())
    make_subdirs(dummy_dir, subdirs = [modelling_dummy_name])
    modelling_dir = os.path.join(dummy_dir, modelling_dummy_name)


    # Get items from the hits
    query_A_orig = queriesA_original.get(hit_items_A[0])
    query_B_orig = queriesB_original.get(hit_items_B[0])
    query_A = queriesA.get(hit_items_A[0]).get_sequence()
    query_B = queriesB.get(hit_items_B[0]).get_sequence()
    query_name_A = hit_items_A[0]
    query_name_B = hit_items_B[0]
    query_id_A = query_name_A.split(':')[0]
    query_start = hit_items_A[4][0]
    query_end = int(hit_items_A[4][-1]) + int(hit_items_B[4][-1])
    template_name_A_chain = hit_items_A[1]
    template_name_B_chain = hit_items_B[1]
    template_chain_A_chain = template_name_A_chain.split('_')[-1]
    template_chain_B_chain = template_name_B_chain.split('_')[-1]
    template_A_chain_start = hit_items_A[5][0]
    template_B_chain_start = hit_items_B[5][0]
    template_id_A = "_".join(template_name_A_chain.split('_')[:-1])
    template_id_B = "_".join(template_name_B_chain.split('_')[:-1])
    sequences_complex = {}
    sequences_complex.setdefault("A",query_A_orig)
    sequences_complex.setdefault("B",query_B_orig)

    # Get the positions of the current section
    extension_threshold = int(config.get('Parameters', 'extension_threshold'))
    current_A_section = [hit_items_A[4][0], hit_items_A[4][-1]]
    current_B_section = [hit_items_B[4][0], hit_items_B[4][-1]]
    current_sections = [current_A_section, current_B_section]
    current_interaction = '%s::%s' %(query_name_A, query_name_B)

    # Initialize 'sections_modeled' dictionary
    if not sections_modeled.get(current_interaction):
        section_group = sections_modeled.setdefault(current_interaction, [])
        section_group.append(current_sections)

    # Check if the segments of the current interaction belong to a previous group
    for section_pair in sections_modeled.get(current_interaction):
        # The segments must be within a given interval
        if (section_pair[0][0] - extension_threshold <= current_sections[0][0] <= section_pair[0][0] + extension_threshold and
            section_pair[0][1] - extension_threshold <= current_sections[0][1] <= section_pair[0][1] + extension_threshold and
            section_pair[1][0] - extension_threshold <= current_sections[1][0] <= section_pair[1][0] + extension_threshold and
            section_pair[1][1] - extension_threshold <= current_sections[1][1] <= section_pair[1][1] + extension_threshold):
                current_sections = section_pair
                break
    # If the segments are not within the interval, create a new group
    else:
        section_group = sections_modeled.setdefault(current_interaction, [])
        section_group.append(current_sections)

    # Get the sections that have not been used in the alignment
    query_A_fragment_used = hit_items_A[2].replace('-', '')
    query_B_fragment_used = hit_items_B[2].replace('-', '')
    remaining_terminus_A = query_A.split(query_A_fragment_used)
    remaining_terminus_B = query_B.split(query_B_fragment_used)
    Nterminus_name_A = '%s_1-%s' %(query_name_A, hit_items_A[4][0] - 1)
    Cterminus_name_A = '%s_%s-%s' %(query_name_A, hit_items_A[4][-1] + 1, len(query_A))
    Nterminus_name_B = '%s_1-%s' %(query_name_B, hit_items_B[4][0] - 1)
    Cterminus_name_B = '%s_%s-%s' %(query_name_B, hit_items_B[4][-1] + 1, len(query_B))

    # If there are remaining sections, store them in the dictionary
    if hit_items_A[4][0] > 1:
        remaining_sections_A[Nterminus_name_A] = ProteinSequence(Nterminus_name_A, remaining_terminus_A[0])
    if hit_items_A[4][-1] < len(query_A):
        remaining_sections_A[Cterminus_name_A] = ProteinSequence(Cterminus_name_A, remaining_terminus_A[-1])
    if hit_items_B[4][0] > 1:
        remaining_sections_B[Nterminus_name_B] = ProteinSequence(Nterminus_name_B, remaining_terminus_B[0])
    if hit_items_B[4][-1] < len(query_B):
        remaining_sections_B[Cterminus_name_B] = ProteinSequence(Cterminus_name_B, remaining_terminus_B[-1])

    #Create LOG for tests
    if verbose:
     dummy_log_file="%s/%s.log"%(modelling_dir, template_id_A)
     dummy_log=open(dummy_log_file,"a")

    # Create PDB file
    if verbose: sys.stdout.write('\t\t-- Using templates %s and %s...\n' %(template_name_A_chain, template_name_B_chain))
    pdb_name = template_id_A
    dummy_pdb_file = '%s/%s.pdb' %(modelling_dir, pdb_name.replace(":","-"))
    # Initialize PDB object
    pdb_obj = PDB()


    #Check template in PDB files
    src_path = config.get('Paths','modppi_path')
    pdb_path = os.path.join(src_path, config.get('Paths', 'pdb_path'), template_id_A[1:3].lower())
    pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent')
    if not os.path.exists(pdb_file):
      sys.stderr.write('WARNING: PDB file %s was not found, try compressed\n' %(pdb_file))
      pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent.gz')

    #Check now template in 3DiD files
    if not os.path.exists(pdb_file):
      sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix\n' %(pdb_file))
      pdb_path = os.path.join(src_path, config.get('Paths', '3did_path'))
      pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk')
      if not os.path.exists(pdb_file):
        sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix compressed\n' %(pdb_file))
        pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk.gz')

    # If the PDB file is not found in the database, skips to the next interaction
    if not os.path.exists(pdb_file):
        sys.stderr.write('WARNING: PDB file %s was not found\n' %(pdb_file))
        raise ModelException
    pdb_chain_obj = PDB(pdb_file)
    pdb_chain_obj.clean()

    # Add only the chains present in the alignment
    pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_A_chain))
    pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_B_chain))

    # Get sequences from PDB, where 'x' are gaps and 'X' are heteroatoms
    pdb_seqA = pdb_obj.chains[0].gapped_protein_sequence.replace('x', '-').replace('X', '.')
    pdb_seqB = pdb_obj.chains[1].gapped_protein_sequence.replace('x', '-').replace('X', '.')

    # Create the dummy PDB file
    pdb_obj.clean()
    pdb_obj.write(output_file = dummy_pdb_file,force=True)

    # Check contacts
    PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type')
    PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold'))
    protein_complex = Complex(pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold)
    # If the proteins don't form a complex, avoids modelling
    if len(protein_complex.PPInterfaces[0].contacts) == 0:
        sys.stderr.write('WARNING: No interaction between %s and %s ( for %s %s)\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B))
        remove_files([dummy_pdb_file])
        raise ModelException
    else:
        if verbose: sys.stdout.write('\t\t\t-- Accepted interaction between %s and %s ( for %s %s)...\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B))

    # Correct possible discrepancies between the template sequence found in the FASTA file of the nodes in the PIN and the sequence found in the PDB file
    # e.g. The sequence of a protein can have an 'X' in the FASTA file and an 'M' in the newest version of the PDB file
    template_seqA = hit_items_A[3]
    template_seqA_ungapped = re.sub('-', '', template_seqA)
    pdbA_section = pdb_seqA[hit_items_A[5][0]-1:hit_items_A[5][-1]]
    for pair in itertools.izip(template_seqA_ungapped, pdbA_section):
        if pair[0] == 'X' or pair[0] == 'x':
            template_seqA = re.sub('[xX]', pair[1], template_seqA, 1)
    template_seqB = hit_items_B[3]
    template_seqB_ungapped = re.sub('-', '', template_seqB)
    pdbB_section = pdb_seqB[hit_items_B[5][0]-1:hit_items_B[5][-1]]
    for pair in itertools.izip(template_seqB_ungapped, pdbB_section):
        if pair[0] == 'X' or pair[0] == 'x':
            template_seqB = re.sub('[xX]', pair[1], template_seqB, 1)

    if verbose: dummy_log.write("Hits_items_A: %s\n"%([str(x) for x in hit_items_A]))
    if verbose: dummy_log.write("Hits_items_B: %s\n"%([str(x) for x in hit_items_B]))
    if verbose: dummy_log.write("pdbA_section %s\n"%pdbA_section)
    if verbose: dummy_log.write("pdbB_section %s\n"%pdbB_section)
    if verbose: dummy_log.write("length PDB A: %d\n"%len(pdb_seqA))
    if verbose: dummy_log.write("length PDB B: %d\n"%len(pdb_seqB))

    # Add the remaining residues at the beginning or at the end of the template sequences, if needed
    template_seqA = re.sub('[xX]', '-', template_seqA)
    if template_A_chain_start > 1:
        template_A_first_residues = ''.join(pdb_seqA[:hit_items_A[5][0]-1])
        template_seqA = template_A_first_residues + template_seqA
    if hit_items_A[5][-1] < len(pdb_seqA):
        template_seqA += ''.join(pdb_seqA[hit_items_A[5][-1]:])
    template_seqB = re.sub('[xX]', '-', template_seqB)
    if template_B_chain_start > 1:
        template_B_first_residues = ''.join(pdb_seqB[:hit_items_B[5][0]-1])
        template_seqB = template_B_first_residues + template_seqB
    if hit_items_B[5][-1] < len(pdb_seqB):
        template_seqB += ''.join(pdb_seqB[hit_items_B[5][-1]:])


    if verbose: dummy_log.write("FINAL template_seqA %s\n"%template_seqA)
    if verbose: dummy_log.write("FINAL template_seqB %s\n"%template_seqB)

    # Add gaps at the beginning of the query sequences, if needed
    gaps_number_A_chain_beginning = 0
    gaps_number_B_chain_beginning = 0
    if template_A_chain_start > 1:
        gaps_number_A_chain_beginning = int(template_A_chain_start) - 1
    if template_B_chain_start > 1:
        gaps_number_B_chain_beginning = int(template_B_chain_start) - 1
    A_chain_query_seq = ''.join(['-' for i in range(gaps_number_A_chain_beginning)]) + re.sub('[xX]', '-', hit_items_A[2])
    B_chain_query_seq = ''.join(['-' for i in range(gaps_number_B_chain_beginning)]) + re.sub('[xX]', '-', hit_items_B[2])

    # Add gaps at the end of the query sequences, if needed
    for pair in itertools.izip_longest(A_chain_query_seq, template_seqA):
        if pair[0] == None:
            A_chain_query_seq += '-'
    for pair in itertools.izip_longest(B_chain_query_seq, template_seqB):
        if pair[0] == None:
            B_chain_query_seq += '-'


    # Create PIR alignment
    query_whole_seq = A_chain_query_seq + '/' + B_chain_query_seq + '*'
    template_whole_seq = template_seqA + '/' + template_seqB + '*'

    header1 = '>P1;%s\nsequence:%s:%s:.:%s:.:.:.:.:.' %(query_id_A, query_id_A, query_start, query_end)
    header2 = '>P1;%s\nstructureX:%s:1:%s:.:%s:.:.:.:.' %(template_id_A.replace(":","-"), template_id_A.replace(":","-"), template_chain_A_chain, template_chain_B_chain)

    lines = []
    lines.append(header1)
    lines.extend([query_whole_seq[i:i+60] for i in range(0, len(query_whole_seq), 60)])
    lines.append(header2)
    lines.extend([template_whole_seq[i:i+60] for i in range(0, len(template_whole_seq), 60)])

    pir_alignment = '\n'.join(lines)
    pir_file = open('%s/alignment.pir' %(modelling_dir), 'w+')
    for line in lines:
        pir_file.write('%s\n' %(line))
    pir_file.close()


    # Model
    # Create a folder for the models of each type of interaction
    if '-' in query_name_A:
        query_name_A = query_name_A.rsplit('_', 1)[0]
    if '-' in query_name_B:
        query_name_B = query_name_B.rsplit('_', 1)[0]
    interaction_dir = os.path.join(output_dir , '%s::%s' %(query_name_A, query_name_B))
    if not os.path.exists(interaction_dir):
        make_subdirs(output_dir, subdirs = ['./%s::%s' %(query_name_A, query_name_B)])

    # If the models do not yet exist, proceed and add in the list of MODELS
    do_model=False
    model_path = os.path.abspath(interaction_dir)
    for imodel in xrange(1,numMod+1):
     model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel)
     model_path_model = os.path.join(model_path , model_name)
     #print "CHECK %s %s\n"%(do_model,model_path_model)
     with open(interaction_dir + '/%s.list' %(current_interaction), 'a+') as paths_to_models_file:
        if model_path_model not in paths_to_models_file.read(): paths_to_models_file.write(model_path_model + '\n')
     if not do_model and not fileExist( model_path_model ): do_model=True

    #Complete the set of models
    if do_model or force_model:
        # Keep the current working directory, then change to the modelling folder
        cwd = os.getcwd()
        os.chdir(modelling_dir)
        try:
         if options.optimize:
            process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod), '--optimize'], stderr = subprocess.STDOUT)
         else:
            process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod)], stderr = subprocess.STDOUT)
        except Exception as e:
         sys.stderr.write("ERROR: %s\n"%(e))
         sys.stderr.write("LOCATION; %s\n"%modelling_dir)
         if verbose: os.system("grep get_ran %s"%(template_name_A_chain+"-"+template_name_B_chain+".log"))
         if verbose: sys.stderr.write("\t\tSkip models with template %s\n"%(model_name))
         os.chdir(cwd)
         raise ModelException
        # Clean and rename all models
        for imodel in xrange(1,numMod+1):
          label_model=99990000+imodel
          input_model = '%s.B%s.pdb' %(query_id_A,str(label_model))
          model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel)
          model_path_model = os.path.join(model_path , model_name)
          if fileExist(os.path.abspath('%s' %(input_model))):
            # Check contacts
            check_pdb_obj=PDB(os.path.abspath('%s' %(input_model)))
            PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type')
            PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold'))
            check_protein_complex = Complex(check_pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold)
            if len(check_protein_complex.PPInterfaces[0].contacts) == 0:
              if verbose: sys.stdout.write("\t\t\t-- Skip model without contacts %s\n"%model_name)
              continue
            else:
              if verbose: sys.stdout.write("\t\t\t-- Accepted model %s\n"%model_name)
            if hydrogens:
              if verbose: sys.stdout.write("\t\t\t-- Adding hydrogens and relaxing the model %s\n"%model_name)
              output_model=model_name
              try:
               add_hydrogens(config,os.path.abspath("./"),input_model, output_model,dummy_dir)
              except ValueError as e:
               sys.stderr.write("WARNING %s\n"%e)
               os.rename(input_model, output_model)
            else:
              output_model=model_name
              os.rename(input_model, output_model)
            if renumerate:
              if verbose: sys.stdout.write("\t\t\t-- Renumerate residues as original sequence\n")
              output_model_renumber=model_name+".re"
              try:
                pdb_renumber=PDB()
                pdb_renumber=renumber_pdb(config,os.path.abspath("./"),output_model,sequences_complex,os.path.abspath("./"))
                pdb_renumber.write(output_model_renumber)
                os.rename(output_model_renumber,output_model)
              except Exception as e:
                sys.stderr.write("WARNING %s\n"%e)
            shutil.copy(output_model, model_path_model)
        os.chdir(cwd)
    try:
     shutil.rmtree(modelling_dir)
    except Exception as e:
     sys.stderr.write("WARNING first attempt to remove folder %s\n"%e)
     try:
       os.system("\\rm -r %s"%(modelling_dir))
     except Exception as ee:
       sys.stderr.write("WARNING last attempt %s\n"%ee)
      
      

    return sections_modeled, remaining_sections_A, remaining_sections_B
Example #6
0
def build_pdb_object(
        log: Logger,
        sses: List[Dict],
        loops: Union[List[int], int],
        concat: Optional[bool] = True,
        outfile: Optional[Union[str,
                                Path]] = None) -> Tuple[Frame3D, List[int]]:
    """Make the parametrically build atoms in a :class:`.Case` into a PDB file.

    :param log: Job logger.
    :param sses: List of the secondary structures to build. Each SSE dictionary must contain the
        ``metadata.atoms`` keys, already in the final expected position.
    :param loops: Number of residues between SSE. It can be one less than the number of structures,
        which assumes no N- or C-terminal, or one more, which assumes N- and C-terminal residues.
    :param concat: When :data:`True`, return the full stucture as a single object, otherwise
        return a list of the individual parts.
    :param outfile: If provided, write the structure to file.
    """
    if isinstance(loops, int):
        loops = [
            loops,
        ] * (len(sses) - 1)

    if len(loops) != len(sses) - 1:
        raise ValueError(
            'Number of loops should equal number of SSE minus one.')

    pieces = []
    columns = [
        'auth_comp_id', 'auth_atom_id', 'auth_seq_id', 'Cartn_x', 'Cartn_y',
        'Cartn_z'
    ]
    start = 1 if len(loops) < len(sses) else loops.pop(0)
    log.debug(f'starting numbering with: {start}')
    for i, sse in enumerate(sses):
        start = start if i == 0 else int(
            sses[i - 1]['length']) + loops[i - 1] + start
        pdb_numbering = pd.DataFrame(sse['metadata']['atoms'],
                                     columns=columns)['auth_seq_id'].values
        try:
            structure = PDB(
                pd.DataFrame(sse['metadata']['atoms'],
                             columns=columns)).renumber(start)
        except:
            structure = PDB(
                pd.DataFrame(sse['metadata']['atoms'], columns=columns))
            structure['auth_seq_id'] += (start -
                                         structure['auth_seq_id'].values[0])

        structure = structure.assign(sse_id=[sse["id"]] * len(structure),
                                     pdb_num=pdb_numbering)
        pieces.append(structure)

    structure = pd.concat(pieces, sort=False).reset_index()
    structure['id'] = list(range(1, structure.shape[0] + 1))

    if outfile is not None:
        structure.write(output_file=str(outfile),
                        format='pdb',
                        clean=True,
                        force=TBcore.get_option('system', 'overwrite'))

    if not concat:
        return pieces

    return structure, [int(p.iloc[-1]['auth_seq_id']) for p in pieces]