Python PDB 예제들, SBI.structure.PDB Python 예제들

예제 #1

0

파일 보기

파일: structure.py 프로젝트: LPDI-EPFL/topobuilder

def get_loop_length(log: Logger, sse1: Frame3D, sse2: Frame3D, loop_step: int,
                    loop_range: int) -> Tuple[int, int]:
    """Calculate the expected number of residues to join two SSE.

    :param log: Job Logger.
    :param sse1: N-SSE.
    :param sse2: C-SSE.
    :param loop_step: Assumption on how much distance a residue can cover.
    :param loop_range: Plus-minus range of residue length.
    """
    from SBI.structure import ChainFrame
    from SBI.structure.geometry.basics import distance

    res1 = ChainFrame(PDB(sse1)).last_compound
    res2 = ChainFrame(PDB(sse2)).first_compound
    distance = distance(res1[res1['label_atom_id'] == 'C'].coordinates,
                        res2[res2['label_atom_id'] == 'N'].coordinates)
    log.debug(f'Distance between SSE is {distance} Angstrongs.')
    distance = math.ceil(distance / loop_step)
    log.debug(
        f'Assuming the need of {distance} residues with a {loop_range} residue range.'
    )
    distance = [
        x for x in range(distance - loop_range - 1, distance + loop_range + 1)
        if x > 0
    ]
    return max(distance), min(distance)

예제 #2

0

파일 보기

def make_structure(sse1: dict, sse2: dict,
                   outfile: Path) -> Tuple[PDBFrame, PDBFrame]:
    """
    """
    sse1 = PDB(
        pd.DataFrame(sse1['metadata']['atoms'],
                     columns=[
                         'auth_comp_id', 'auth_atom_id', 'auth_seq_id',
                         'Cartn_x', 'Cartn_y', 'Cartn_z'
                     ])).renumber(1)

    sse2 = PDB(
        pd.DataFrame(sse2['metadata']['atoms'],
                     columns=[
                         'auth_comp_id', 'auth_atom_id', 'auth_seq_id',
                         'Cartn_x', 'Cartn_y', 'Cartn_z'
                     ])).renumber(sse1.iloc[-1]['auth_seq_id'] + 5)
    structure = pd.concat([sse1, sse2])
    structure['id'] = list(range(1, structure.shape[0] + 1))

    if TBcore.get_option('system', 'verbose'):
        sys.stdout.write('-> generating structure {}\n'.format(
            outfile.resolve()))
    structure.write(output_file=str(outfile),
                    format='pdb',
                    clean=True,
                    force=TBcore.get_option('system', 'overwrite'))

    return sse1, sse2

예제 #3

0

파일 보기

def assemble(pdb_id):
    struct = PDB(os.path.join(masif_opts['raw_pdb_dir'],
                              '{}.pdb'.format(pdb_id)),
                 header=True)
    try:
        struct_assembly = struct.apply_biomolecule_matrices()[0]
    except:
        return 0
    struct_assembly.write(
        os.path.join(masif_opts['ligand']['assembly_dir'],
                     '{}.pdb'.format(pdb_id)))
    return 1

예제 #4

0

파일 보기

def assemble(pdb_id):
    # Reads and builds the biological assembly of a structure
    struct = PDB(os.path.join(masif_opts["raw_pdb_dir"],
                              "{}.pdb".format(pdb_id)),
                 header=True)
    try:
        struct_assembly = struct.apply_biomolecule_matrices()[0]
    except:
        return 0
    struct_assembly.write(
        os.path.join(masif_opts["ligand"]["assembly_dir"],
                     "{}.pdb".format(pdb_id)))
    return 1

예제 #5

0

파일 보기

파일: PDBlink.py 프로젝트: venkatesh-sivaraman/MODPIN

    def make_PDBseq(self, log_file, resolution_threshold=None):
        if not self.has_local:
            raise NameError(
                'A local PDB database must be defined to do create a PDBseq database.'
            )
        outdir = self.PDBseq if self.PDBseq is not None else os.curdir

        Path.mkdir(self.PDBseq)
        fasta_file = File(file_name=os.path.join(outdir, 'PDBseq.fa'),
                          action='w',
                          overwrite=True)
        fasta_fd = fasta_file.descriptor
        idx_file = File(file_name=os.path.join(outdir, 'PDBseq.fa.idx'),
                        action='w',
                        overwrite=True)
        idx_fd = idx_file.descriptor
        # if resolution_threshold is not None:
        #     filtered_file_name = self.get_PDBseq_filtered(resolution_threshold)
        #     filtered_file      = File(file_name = filtered_file_name, action = 'w', overwrite = True)
        #     filtered_fd        = filtered_file.descriptor
        #     resolutions        = self.get_resolutions(resolution_threshold = resolution_threshold)
        log_file = File(file_name=log_file, action='w', overwrite=True)
        log_idx = log_file.descriptor

        for pdb_file in self.localPDBs:
            log_idx.write("Reading File: {0}\n".format(pdb_file))
            newPDB = PDB(pdb_file=pdb_file, dehydrate=True)
            fasta_idx = newPDB.FASTA_IDX(nucleotide=False)
            if len(fasta_idx['FASTA']) != len(fasta_idx['IDX']):
                log_idx.write(
                    'ERROR!!!!! Number of fastas and indexes are different for pdb {0}!!\n'
                    .format(newPDB.id))
            if len(fasta_idx['FASTA']) > 0:
                log_idx.write('\tPrinting FASTA and IDX...\n')
            else:
                log_idx.write('\tProblably just a nucleotide PDB...\n')
            for c in range(len(fasta_idx['FASTA'])):
                sequence = fasta_idx['FASTA'][c].split('\n')[1]
                sequence = sequence.replace('X', '').replace('x', '')
                if len(sequence) > 0:
                    fasta_fd.write(fasta_idx['FASTA'][c] + "\n")
                    if resolution_threshold is not None and newPDB.id in resolutions and not newPDB.is_all_ca:
                        filtered_fd.write(fasta_idx['FASTA'][c] + "\n")
                    idx_fd.write(fasta_idx['IDX'][c] + "\n")
            del (newPDB)

        #CLOSE & END
        fasta_file.close()
        idx_file.close()
        if resolution_threshold is not None:
            filtered_fd.close()

예제 #6

0

파일 보기

def get_loop_length(sse1: PDB, sse2: PDB, loop_step: int,
                    loop_range: int) -> Tuple[int, int]:
    """
    """
    res1 = ChainFrame(PDB(sse1)).last_compound
    res2 = ChainFrame(PDB(sse2)).first_compound
    distance = SBIgeo.point_distance(
        res1[res1['label_atom_id'] == 'N'].coordinates,
        res2[res2['label_atom_id'] == 'N'].coordinates)
    distance = math.ceil(distance / loop_step)
    distance = [
        x for x in range(distance - loop_range - 1, distance + loop_range + 1)
        if x > 0
    ]
    return max(distance), min(distance)

예제 #7

0

파일 보기

파일: get_gapped_sequences.py 프로젝트: venkatesh-sivaraman/MODPIN

def main():
    #Initialize
    options = parse_user_arguments()
    verbose = options.show
    pdb_path = os.path.join(config.get('Paths', 'modppi_path'),
                            config.get('Paths', 'pdb_path'))
    try:
        did_path = os.path.join(config.get('Paths', 'modppi_path'),
                                config.get('Paths', '3did_path'))
        data_path = os.path.join(config.get('Paths', 'modppi_path'),
                                 config.get('Paths', 'data_path'))
    except:
        did_path = options.outdir
        data_path = options.outdir

    if not os.path.exists(did_path):
        sys.stderr.write(
            "No 3DID directory, please check your installation or INPUT\n")

    if not os.path.exists(data_path):
        sys.stderr.write(
            "No DATA directory, please check your installation or INPUT\n")

    #Create list of interactions and FASTA sequences of 3DiD
    did_interactions = open(os.path.join(data_path, options.interactions_file),
                            "w")
    did_fasta = open(os.path.join(data_path, options.seq_file), "w")
    for brk in os.listdir(did_path):
        if verbose:
            sys.stderr.write("\t\t-- Reading %s  \n" %
                             os.path.join(did_path, brk))
        try:
            pdb = PDB(os.path.join(did_path, brk))
            id_chain = []
            for c in pdb.chain_identifiers:
                pdb_chain = pdb.get_chain_by_id(c)
                id_chain.append(pdb.id + "_" + c)
                printfasta(did_fasta, pdb.id + "_" + c,
                           pdb_chain.gapped_protein_sequence)
            did_interactions.write("%s\t%s\n" % (id_chain[0], id_chain[1]))
        except Exception as e:
            if verbose:
                sys.stderr.write(
                    "\t\t-- %s cannot be read\n\t\t   Error: %s\n" %
                    (os.path.join(did_path, brk), e))
            continue
    did_interactions.close()
    did_fasta.close()

예제 #8

0

파일 보기

def build_pdb_object(
        sses: List[Dict], loops: Union[List[int],
                                       int]) -> Tuple[Frame3D, List[int]]:
    """
    """
    if isinstance(loops, int):
        loops = [
            loops,
        ] * (len(sses) - 1)
    if len(sses) != len(loops) + 1:
        raise ValueError(
            'Number of loops should equal number of SSE minus one.')

    pieces = []
    columns = [
        'auth_comp_id', 'auth_atom_id', 'auth_seq_id', 'Cartn_x', 'Cartn_y',
        'Cartn_z'
    ]
    start = 1
    for i, sse in enumerate(sses):
        start = 1 if i == 0 else int(sses[i - 1]['length']) + loops[i -
                                                                    1] + start
        if TBcore.get_option('system', 'verbose'):
            sys.stdout.write(
                'PDB: Building SSE {:02d}:{} starting at {}\n'.format(
                    i + 1, sse['id'], start))
        pieces.append(
            PDB(pd.DataFrame(sse['metadata']['atoms'],
                             columns=columns)).renumber(start))

    structure = pd.concat(pieces, sort=False).reset_index()
    structure['id'] = list(range(1, structure.shape[0] + 1))
    return structure, [int(p.iloc[-1]['auth_seq_id']) for p in pieces]

예제 #9

0

파일 보기

def pdb2pin(options):

 filelist=options.listfile
 rootname=options.out
 verbose=options.show
 PPI_distance = options.PPI_distance
 PPI_type = options.PPI_type
 
 edges=[]
 nodes=[]

 if fileExist(filelist):
  for line in parse_list_file(filelist):
   pdb=PDB(line)
   complexes=cn.Complex(pdb,PPI_distance = PPI_distance, PPI_type = PPI_type)
   for pair in complexes.PPInterfaces:
     #if not pair.is_empty:
     if len(pair.contacts)>0:
      edges.append((pair.interactor_id,pair.protein_id))
      if verbose: sys.stdout.write("Add {0:s}\t{1:s}\n".format(pair.interactor_id,pair.protein_id))
  nodes=get_nodes(edges)
 else:
  sys.stderr.write("Missing list of PDB files\n")
  exit(0)

 ppi=open(rootname+".ppi","w")
 nds=open(rootname+".dat","w")
 for (x,y) in edges:
  ppi.write("%s\t%s\n"%(x,y))
 for x in nodes:
  nds.write("%s\n"%(x))

예제 #10

0

파일 보기

 def get_sequence(pdb):
     seqs = []
     sys.stdout.write(pdb[0] + '\n')
     if not os.path.isdir(pdb[0]):
         os.mkdir(pdb[0])
     pdbf = os.path.join(pdb[0], '{}.pdb'.format(pdb[0]))
     if not os.path.isfile(pdbf):
         wget.download('http://files.rcsb.org/view/{}.pdb'.format(pdb[0]),
                       out=pdbf)
     pdbstr = PDB(pdbf)
     qchains = pdb[1] if len(pdb) > 1 else pdbstr.chain_identifiers
     for chain in qchains:
         print('\t--' + chain + '--')
         seqs.append(
             Sequence('{}_{}'.format(pdb[0], chain),
                      pdbstr.get_chain_by_id(chain).protein_sequence))
     return seqs

예제 #11

0

파일 보기

파일: SPServerPPI.py 프로젝트: structuralbioinformatics/SPServer

    def on_ppi(self, receptor_path, ligand_path, fold_name=None):
        '''
        Compute the split potentials for a ppi
            receptor_path = Complete path to the structure of the receptor
            ligand_path   = Complete path to the structure of the ligand
            fold_name     = Optional parameter, name given to the PPI
        '''
        # Get PDB structure (fold structure)
        receptor = PDB.read_pdb(receptor_path)
        ligand = PDB.read_pdb(ligand_path)
        receptor_name = self.get_fold_name_from_fold_path(receptor_path)
        ligand_name = self.get_fold_name_from_fold_path(ligand_path)
        if not fold_name:
            fold_name = '{}-{}'.format(receptor_name, ligand_name)

        if isinstance(receptor, list): # if the structure obtained is an instance of a list, it means that it is not a unique structure: it is a COMPLEX of more than one chain
            receptor = PDB.read_pdb(receptor_path, merge_chains=True) # The complex will always be merged
        if isinstance(ligand, list): # if the structure obtained is an instance of a list, it means that it is not a unique structure: it is a COMPLEX of more than one chain
            ligand = PDB.read_pdb(ligand_path, merge_chains=True) # The complex will always be merged

        receptor.set_dssp()
        receptor.clean()
        receptor.normalize_residues()
        ligand.set_dssp()
        ligand.clean()
        ligand.normalize_residues()

        ppi = Interaction(receptor, ligand)

        # Compute split potentials
        if self.pot_type == 'CB':
            cutoff = 12
        else: 
            cutoff = 5

        split_potentials = SplitPotentialsPPI(c_type=self.pot_type, cutoff=cutoff) # definition of SplitPotentialsPPI class (BioLib.Docking)
        #global_energies = []
        global_energies = split_potentials.calculate_global_energies(ppi, Zscores=True) # calculation of global energies using the method of the SplitPotentialsPPI class
        print('\nGLOBAL ENERGIES')
        #residues_energies = []
        residues_energies = split_potentials.calculate_residue_energies_between_pairs(ppi, 'R', Zscores=True) # same for residues energies
        print('\nRESIDUE ENERGIES')
        self.ppi_xml(fold_name, global_energies, residues_energies, receptor, ligand) # creation of xml results file
        return

예제 #12

0

파일 보기

파일: SPServerPPI.py 프로젝트: structuralbioinformatics/SPServer

 def check_pdb_format(self, structure_path):
     file_name = self.get_fold_name_from_fold_path(structure_path)
     try:
         struct = PDB.read_pdb(structure_path)
     except:
         self.print_error(8, file_name)
         self.write_output_file(self.xml_errors, self.xml_err_file)
         self.write_output_file(self.xml_result, self.xml_out_file) # Output an empty results file
         sys.exit(10)
     return

예제 #13

0

파일 보기

파일: structure.py 프로젝트: LPDI-EPFL/topobuilder

def pdb_geometry_from_rules(pdb_file: Union[Path, str, Frame3D],
                            rules: List[Tuple],
                            log: Optional[Logger] = None) -> pd.DataFrame:
    """Calculates the geometry statistic from a PDB.

    :param log: Job Logger.
    :param pdb_file: The pdb file to calculate the geometry from.
    :param rules: The rules to be applied.
    """
    if isinstance(pdb_file, (Path, str)):
        pdb_file = Path(pdb_file)
        if not pdb_file.is_file():
            raise IOError('PDB structure {} not found.'.format(pdb_file))
        pdb3d = PDB(str(pdb_file),
                    format='pdb',
                    clean=True,
                    dehydrate=True,
                    hetatms=False)['AtomTask:PROTEINBACKBONE']
    elif isinstance(pdb_file, Frame3D):
        pdb3d = pdb_file['AtomTask:PROTEINBACKBONE']
    else:
        raise ValueError('Unexpected type for pdb_file.')

    if log:
        log.info(f'PDB:Analyzing geometry of {pdb3d.id}\n')
        log.debug(
            f'PDB:Available secondary structures {",".join([x[0] for x in rules])}\n'
        )
        log.debug(
            f'PDB:With ranges {",".join(["{}-{}".format(*x[1]) for x in rules])}\n'
        )
        log.debug(
            f'PDB:With flip policy {",".join([str(x[2]) for x in rules])}\n')
    else:
        sys.stdout.write(f'PDB:Analyzing geometry of {pdb3d.id}\n')
        sys.stdout.write(
            f'PDB:Available secondary structures {",".join([x[0] for x in rules])}\n'
        )
        sys.stdout.write(
            f'PDB:With ranges {",".join(["{}-{}".format(*x[1]) for x in rules])}\n'
        )
        sys.stdout.write(
            f'PDB:With flip policy {",".join([str(x[2]) for x in rules])}\n')

    pieces = make_pieces(pdb3d, rules)
    pieces = make_vectors(pieces, rules)
    pieces = make_planes(pieces)
    df = make_angles_and_distances(pieces)
    df = df.assign(pdb_path=[
        str(pdb_file) if not isinstance(pdb_file, Frame3D) else pdb3d.id,
    ] * df.shape[0])
    return df

예제 #14

0

파일 보기

파일: structure.py 프로젝트: LPDI-EPFL/topobuilder

def reverse_motif(
    log: Logger,
    source: Union[str, Path],
    selection: List[str],
    attach: List[str],
    hotspot: str,
    identifier: str,
    binder: Optional[str] = None,
) -> Dict:
    """Process a provided motif so that it can be attached to a :term:`FORM`.

    :param log: Logger from the calling :class:`.Node` to keep requested verbosity.
    :param source: File containing the structural data.
    :param selection: Selection defining the motif of interest.
    :param hotspot: Single position defining the exposed side.
    :param identifier: Single position defining the exposed side.
    :param binder: Selection defining the binder.

    """
    # Load Structure
    pdbSTR = PDB(source, header=False, dehydrate=True)
    # Get the full motif to define its planes
    motif, hotspots = pick_motif(log, pdbSTR, selection, attach, hotspot)
    # Pick Binder
    binder = pick_binder(log, pdbSTR, binder)

    # # Find motif's orientation
    # eigens = dict(map(reversed, zip(motif['AtomType:CA'].eigenvectors(10), ('perpendicular', 'side', 'major'))))
    # edist = [np.linalg.norm(hotspot.coordinates - eigens['perpendicular'][0]), np.linalg.norm(hotspot.coordinates - eigens['perpendicular'][-1]),
    #          np.linalg.norm(hotspot.coordinates - eigens['side'][0]), np.linalg.norm(hotspot.coordinates - eigens['side'][-1])]
    # mdist = edist.index(min(edist))
    # if mdist > 1:  # swap side and perpendicular
    #     tmp = eigens['side']
    #     eigens['side'] = eigens['perpendicular']
    #     eigens['perpendicular'] = tmp
    #     if mdist == 2:  # Change perpendicular orientation
    #         eigens['perpendicular'] = np.flip(eigens['perpendicular'], axis=0)
    # if mdist == 0:  # Change perpendicular orientation
    #     eigens['perpendicular'] = np.flip(eigens['perpendicular'], axis=0)
    #
    # # Try to identify the orientation of the motif.
    # for k in eigens:
    #     pymol_arrow(f'arrow_{k}', eigens[k][0], eigens[k][-1],
    #                 'white' if k == 'major' else 'red' if k == 'side' else 'green')
    return motif, binder, hotspots, attach, selection, identifier

예제 #15

0

파일 보기

파일: PDBlink.py 프로젝트: venkatesh-sivaraman/MODPIN

    def get_resolutions(self):
        # resolutions (-1) are for methods that do not define resolution
        resolutions = {}

        ftp = ftplib.FTP(PDBftp['address'])
        ftp.login()
        ftp.cwd(PDBftp['derived'])
        resoluIDX = []
        ftp.retrlines('RETR ' + PDBftp['resolution'], resoluIDX.append)
        ftp.quit()

        SBIglobals.alert('debug', self,
                         'Retrieving resolution data from PDB FTP...')

        active = False
        for line in resoluIDX:
            if line.startswith('-'):
                active = True
                continue
            if active and len(line.strip()) > 0:
                data = [x.strip() for x in line.split(';')]
                if len(data[1]) > 0:
                    SBIglobals.alert(
                        'debug', self,
                        '\tResolution for {0[0]} is {0[1]}...'.format(data))
                    # if resolution_threshold is None:
                    resolutions[data[0]] = data[1]

        #rsync is accumulative, we might have structures that are not in the residu.idx anymore.. must check
        for pdb_file in self.localPDBs:
            newfile = File(file_name=pdb_file, action='r')
            pdbid = newfile.prefix.lstrip('pdb').upper()
            if pdbid not in resolutions:
                pdbobj = PDB(pdb_file=pdb_file, header=True, onlyheader=True)
                SBIglobals.alert(
                    'debug', self,
                    '\tGrabbing Resolution for {0} is {1}...'.format(
                        pdbid, pdbobj.header.resolution))
                resolutions[pdbid] = pdbobj.header.resolution

        return resolutions

예제 #16

0

파일 보기

def pdb_geometry_from_rules(pdb_file: Union[Path, str, Frame3D],
                            rules: List[Tuple]) -> pd.DataFrame:
    """
    """
    if isinstance(pdb_file, (Path, str)):
        pdb_file = Path(pdb_file)
        if not pdb_file.is_file():
            raise IOError('PDB structure {} not found.'.format(pdb_file))
        pdb3d = PDB(str(pdb_file),
                    format='pdb',
                    clean=True,
                    dehydrate=True,
                    hetatms=False)['AtomTask:PROTEINBACKBONE']
    elif isinstance(pdb_file, Frame3D):
        pdb3d = pdb_file['AtomTask:PROTEINBACKBONE']
    else:
        raise ValueError('Unexpected type for pdb_file.')
    if TBcore.get_option('system', 'verbose'):
        sys.stdout.write('PDB:Analyzing geometry of {}\n'.format(pdb3d.id))

    if TBcore.get_option('system', 'debug'):
        sys.stdout.write('PDB:Available secondary structures {}\n'.format(
            ','.join([x[0] for x in rules])))
        sys.stdout.write('PDB:With ranges {}\n'.format(','.join(
            ['{}-{}'.format(*x[1]) for x in rules])))
        sys.stdout.write('PDB:With flip policy {}\n'.format(','.join(
            [str(x[2]) for x in rules])))

    pieces = make_pieces(pdb3d, rules)
    pieces = make_vectors(pieces, rules)
    pieces = make_planes(pieces)
    df = make_angles_and_distances(pieces)
    df = df.assign(pdb_path=[
        str(pdb_file) if not isinstance(pdb_file, Frame3D) else pdb3d.id,
    ] * df.shape[0])
    return df

예제 #17

0

파일 보기

    def toSQL(self):
        pdbheader = self.header

        #if pdbheader.valid_resolution and pdbheader.resolution != 'NULL':
        if pdbheader.experiment.resolution > 0:
            command = "INSERT INTO {0} VALUES ('{1.id}','{2.date}','{2.header}','{2.xpdta}',{2.resolution:.2f},{2.rfactor:.3f},{2.freeR:.3f});\n".format(
                tables['main'], self, pdbheader)
        else:
            command = "INSERT INTO {0} (pdb,date,header,method) VALUES ('{1.id}','{2.date}','{2.header}','{2.xpdta}');\n".format(
                tables['main'], self, pdbheader)
        for deprec in pdbheader.deprecated:
            command += "INSERT INTO {0} VALUES ('{1}','{2.id}');\n".format(
                tables['old'], deprec, self)
        for chain in self.chains:
            m = pdbheader.get_molecule4chain(chain.chain)
            f = chain.first_structure
            l = chain.last_structure
            command += "INSERT INTO {0}(pdb,chain,name,type,start,idxs,end,idxe) VALUES ('{1.pdb}','{1.chain}','{2.name}','{1.chaintype}',{3.number},'{3.version}',{4.number},'{4.version}');\n".format(
                tables['chain'], chain, m, f, l)
            command += "SET @chain = LAST_INSERT_ID();\n"
            for ec in m.ec:
                command += PDB._choose_ec(ec)
                command += "INSERT INTO {0} VALUES (@chain,@ec);\n".format(
                    tables['ec'])
            for tx in m.taxid:
                command += "SET @taxid = (SELECT COALESCE((SELECT taxid FROM {0} WHERE oldid={2}),(SELECT id FROM {1} WHERE id={2})));\n".format(
                    tables['taxidold'], tables['taxid'], tx)
                command += "INSERT INTO {0} VALUES (@chain,@taxid);\n".format(
                    tables['ctaxid'])
        for dbr in pdbheader.dbrefs:
            if dbr.chain in self._chain_id:
                command += "SET @chain = (SELECT nid FROM {0} WHERE pdb='{1}' AND chain='{2}');\n".format(
                    tables['chain'], self.id, dbr.chain)
                command += "SET @uniprot = (COALESCE((SELECT entry FROM {0} WHERE accession='{1}' LIMIT 1), 'FAKE_PROTEIN'));\n".format(
                    tables['ent2acc'], dbr.uniprot)
                command += "INSERT INTO {0} VALUES (@chain,@uniprot,{1.start},'{1.idxs}',{1.end},'{1.idxe}');\n".format(
                    tables['pdbuniprot'], dbr)
        for h in pdbheader.hetero:
            if h.chain in self._chain_id:
                command += "INSERT IGNORE INTO {0} VALUES ('{1.id}','{1._name}','{1.form}', 0, 0);\n".format(
                    tables['hetatm'], h)
                command += "SET @chain = (SELECT nid FROM {0} WHERE pdb='{1}' AND chain='{2}');\n".format(
                    tables['chain'], self.id, h.chain)
                dataset = 0
                if int(h.pos) <= self.get_chain_by_id(
                        h.chain).last_structure.number:
                    dataset = 1
                command += "INSERT INTO {0}(chain,position,hetero,inchain) VALUES (@chain, {1.pos}, '{1.id}', {2});\n".format(
                    tables['hetchn'], h, dataset)
        for m in pdbheader.molecules:
            command += "INSERT INTO {0} VALUES ();\n".format(tables['repeidx'])
            for c in pdbheader.molecules[m].chains:
                if c in self._chain_id:  #sometimes a chain is described in the header that has no relation in the coordinates
                    command += "SET @chain = (SELECT nid FROM {0} WHERE pdb='{1}' AND chain='{2}');\n".format(
                        tables['chain'], self.id, c)
                    command += "INSERT INTO {0} VALUES (LAST_INSERT_ID(), @chain);\n".format(
                        tables['repe'])
        for s in pdbheader.sites:
            if pdbheader.sites[s].bind is not None:
                bdata = pdbheader.sites[s].bind
                if not isinstance(bdata[1], list):
                    bdata[1] = bdata[1].split()
                    if len(bdata[1]) == 1:
                        bdata[1].append(bdata[1][0][1:].strip())
                        bdata[1][0] = bdata[1][0][0]
                if (len(bdata[1]) == 0):  #only one het of this type in the pdb
                    for e in pdbheader.hetero:
                        if bdata[0] == e.id:
                            bdata[1].append(e.chain)
                            bdata[1].append(e.pos)
                import re
                posidx = re.compile('(\-*\d+)(\w*)')
                m = posidx.match(bdata[1][1])
                pos = m.group(1)
                idx = m.group(2)
                command += "SET @chain = (SELECT nid FROM {0} WHERE pdb='{1}' AND chain='{2[1][0]}');\n".format(
                    tables['chain'], self.id, bdata)
                command += "SET @bind = (SELECT nid FROM {0} WHERE chain=@chain AND position={3} AND hetero='{2[0]}');\n".format(
                    tables['hetchn'], self.id, bdata, pos)
                command += "INSERT INTO {0}(pdb,name,description,bind) VALUES ('{1}','{2.id}','{2.desc}', @bind);\n".format(
                    tables['site'], self.id, pdbheader.sites[s])
            else:
                command += "INSERT INTO {0}(pdb,name,description) VALUES ('{1}','{2.id}','{2.desc}');\n".format(
                    tables['site'], self.id, pdbheader.sites[s])
            for p in pdbheader.sites[s].spec:
                command += "SET @chain = (SELECT nid FROM {0} WHERE pdb='{1}' AND chain='{2}');\n".format(
                    tables['chain'], self.id, p[1])
                idx = ' '
                num = p[2]
                try:
                    int(num[-1])
                    idx = ' '
                    num = p[2]
                except:
                    idx = num[-1]
                    num = num[:-1]
                command += "INSERT IGNORE INTO {0} VALUES(LAST_INSERT_ID(),@chain,{1},'{2}','{3}');\n".format(
                    tables['sitepos'], num, idx, p[0])

        command += self.innercontacts.toSQL()
        command += self.interfaces.toSQL()

        return command

예제 #18

0

파일 보기

                                             'auth_seq_id', 'auth_asym_id',
                                             'sse_id', 'internal_num',
                                             'Cartn_x', 'Cartn_y', 'Cartn_z'
                                         ]).reset_index(drop=True)
                    binder = pd.DataFrame(binder,
                                          columns=[
                                              'auth_comp_id', 'auth_atom_id',
                                              'auth_seq_id', 'auth_asym_id',
                                              'Cartn_x', 'Cartn_y', 'Cartn_z'
                                          ]).reset_index(drop=True)
                    self.log.debug(f'processing motif id: {identifier}')

                    mcolumns, bcolumns = motif.columns.tolist(
                    ), binder.columns.tolist()
                    initial = PDB(
                        pd.concat([motif[bcolumns], binder[bcolumns]],
                                  sort=False))

                    # Get segments of interest
                    segments = []
                    for i, j, sse in case:
                        if sse['id'] in attach:
                            segment = pd.DataFrame(sse['metadata']['atoms'],
                                                   columns=[
                                                       'residue',
                                                       'auth_atom_id',
                                                       'resi_id', 'Cartn_x',
                                                       'Cartn_y', 'Cartn_z'
                                                   ])
                            segment = segment.assign(sse_id=[sse['id']] *
                                                     len(segment))

예제 #19

0

파일 보기

파일: parametric.py 프로젝트: ZeroDesigner/topobuilder

    def build(self, pick_aa: Optional[str] = None):
        """
        """
        if self._MONO is None or self._PERIODE is None:
            raise NotImplementedError()

        # 1. Locate center point for each residue we need to build
        vector_module = float(self._PERIODE * (self.desc['length'] - 1))
        upper_bound = np.copy(np.array([0., 0., 0.],
                                       dtype='float64')) + np.array(
                                           [0, vector_module / 2, 0])
        points = [
            np.copy(upper_bound) - np.array([0, self._PERIODE * x, 0])
            for x in range(self.desc['length'])
        ]

        # 2. Build. For each point, we build one periode at [0, 0, 0]. Then, we rotate and then shift.
        self.pdb = []
        _MONO = pd.DataFrame(self._MONO).T
        for i, p in enumerate(points):
            coords = rotate_degrees(_MONO.values, y=self._ROTATION * i)
            coords = translate(coords, p)
            self.pdb.append(coords)
        self.pdb = np.vstack(self.pdb)
        # We want undirected structures to start always looking up
        self.pdb = rotate_degrees(self.pdb, x=180)

        # Apply the case-defined placements for each structure
        if TBcore.get_option('system', 'debug'):
            sys.stdout.write('tilt: ' + str(self.desc['tilt']) + '\n')
            sys.stdout.write('move: ' + str(self.desc['coordinates']) + '\n')

        self.pdb = rotate_degrees(self.pdb,
                                  x=self.desc['tilt']['x'],
                                  y=self.desc['tilt']['y'],
                                  z=self.desc['tilt']['z'])
        self.pdb = translate(self.pdb, [
            self.desc['coordinates']['x'], self.desc['coordinates']['y'],
            self.desc['coordinates']['z']
        ])

        # Prepare other data to create a coordinate entity
        resis = np.repeat(list(range(1, i + 2)), _MONO.shape[0])
        atoms = np.asarray([
            _MONO.index.values,
        ] * (i + 1)).flatten()

        # Prepare sequence
        sequence = []
        if pick_aa is not None:
            pick_aa = pick_aa if len(
                pick_aa) == 3 else alphabet.aminoacids1to3(pick_aa)
            sequence = [
                pick_aa,
            ] * self.desc['length']
        else:
            for _ in range(self.desc['length']):
                sequence.append(
                    alphabet.aminoacids1to3(weighted_choice(self._AA_STAT)))
        sequence = np.repeat(np.asarray(sequence), _MONO.shape[0])

        self.pdb = PDB(
            pd.DataFrame(self.pdb, columns=[
                "Cartn_x", "Cartn_y", "Cartn_z"
            ]).assign(auth_comp_id=sequence).assign(auth_atom_id=atoms).assign(
                auth_seq_id=resis).assign(
                    id=list(range(1, self.pdb.shape[0] + 1))))

예제 #20

0

파일 보기

파일: functions.py 프로젝트: structuralbioinformatics/MODPIN

def renumber_pdb(config, path, pdb_name, sequences, dummy_dir):
    ''' 
     Renumber PDB file located in path folder with the real sequences

     path	Folder where PDB file is located
     pdb 	PDB file
     sequences  dictionary of sequences (of ProteinSequence Class from SeqIO) that define the Aa number
                chain identifier is the key of the dictionary
     dummy_dir  Dummy directory to cerate files

    '''

    #Initialize
    from SBI.structure.chain import Chain
    from SBI.sequence import Sequence
    from SBI.structure import PDB
    from Bio import SeqIO
    from Bio import ExPASy
    from Bio import AlignIO
    from Bio.Align import Applications

    clustal_exe = os.path.join(config.get('Paths', 'clustal_path'),
                               'clustalw2')
    name_pdb = ".".join(pdb_name.split('/')[-1].split('.')[:-1])
    new_pdb = PDB()
    pdb_file = os.path.join(path, pdb_name)
    pdb = PDB(pdb_file)
    pdb.clean()
    for chain_id, chain_seq in sequences.iteritems():
        name_chain = name_pdb + "_" + chain_id
        name_seq = chain_seq.get_identifier()
        pdb_chain = pdb.get_chain_by_id(chain_id)
        new_chain = Chain(name_pdb, chain_id)
        #define/create files
        infile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".fa"
        outfile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".aln"
        dndfile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".dnd"
        fd = open(infile, "w")
        fd.write(">{0:s}\n{1:s}\n".format(name_chain,
                                          pdb_chain.protein_sequence))
        fd.write(">{0:s}\n{1:s}\n".format(name_seq, chain_seq.get_sequence()))
        fd.close()
        try:
            # run clustalw2
            msa_cline = Applications.ClustalwCommandline(clustal_exe,
                                                         infile=infile,
                                                         outfile=outfile)
            child = subprocess.Popen(str(msa_cline),
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     shell="/bin/bash")
            child.communicate()
            #store alignment in compare
            alignment = AlignIO.read(outfile, 'clustal')
            structure = alignment[0].seq
            reference = alignment[1].seq
            try:
                len_3d = len(structure)
                len_ref = len(reference)
            except Exception as e:
                sys.stderr.write("ERROR: %s\n" % e)
                return e
        except Exception as e:
            sys.stderr.write("ERROR: %s\n" % e)
            return e
        #remove temporary fasta and alignment files
        remove_files([infile, outfile, dndfile])
        #mapping of residues to the original sequence
        mapping = create_mapping(pdb_chain.protein_idx.split(";"), structure,
                                 reference)
        #fill the new chain with the correct numbering of residues
        for residue in pdb_chain.aminoacids:
            pair = (str(residue.number), residue.version)
            number, version = mapping.get(pair)
            residue.number = number
            residue.version = version
            new_chain.add_residue(residue)
        #fill the new pdb
        new_pdb.add_chain(new_chain)

    return new_pdb

예제 #21

0

파일 보기

파일: 00c-save_ligand_coords.py 프로젝트: boyuezhong/masif

from default_config.masif_opts import masif_opts

in_fields = sys.argv[1].split('_')
pdb_id = in_fields[0]

if not os.path.exists(masif_opts['ligand']['ligand_coords_dir']):
    os.mkdir(masif_opts['ligand']['ligand_coords_dir'])

# Ligands of interest
ligands = ['ADP', 'COA', 'FAD', 'HEM', 'NAD', 'NAP', 'SAM']

structure_ligands_type = []
structure_ligands_coords = []
try:
    structure = PDB(
        os.path.join(masif_opts['ligand']['assembly_dir'],
                     '{}.pdb'.format(pdb_id)))
except:
    print('Problem with opening structure', pdb)
for chain in structure.chains:
    for het in chain.heteroatoms:
        # Check all ligands in structure and save coordinates if they are of interest
        if het.type in ligands:
            structure_ligands_type.append(het.type)
            structure_ligands_coords.append(het.all_coordinates)
np.save(
    os.path.join(masif_opts['ligand']['ligand_coords_dir'],
                 '{}_ligand_types.npy'.format(pdb_id)), structure_ligands_type)
np.save(
    os.path.join(masif_opts['ligand']['ligand_coords_dir'],
                 '{}_ligand_coords.npy'.format(pdb_id)),

예제 #22

0

파일 보기

파일: eval_constraints.py 프로젝트: Vedasheersh/RosettaTools

# Including rotools without adding them to the PYTHONPATH
scrdir  = os.path.dirname(os.path.realpath(__file__))
rotools = os.path.join(scrdir, "../..")
sys.path.append(rotools)
from rotools.constraints import ConstraintSet


def similar_to(d, mind, ca, cb, geo, bck):
    if mind >= float(d) - 3 and mind <= float(d) + 3: return "MIN"
    if ca >= float(d) - 3 and ca <= float(d) + 3:     return "CA"
    if cb >= float(d) - 3 and cb <= float(d) + 3:     return "CB"
    if geo >= float(d) - 3 and geo <= float(d) + 3:   return "GEO"
    if bck >= float(d) - 3 and bck <= float(d) + 3:   return "BCK"
    return "NONE"


cs = ConstraintSet.parse(sys.argv[1])
for f in os.listdir(os.getcwd()):
    if f.endswith('pdb'):
        data = []
        inc = InnerContacts(PDB(f), AA=True, AA_distance=35, HT=False)
        for cont in inc.AAcontacts[0].contacts:
            if cs.has_contact(cont.aminoacid1.number, cont.aminoacid2.number):
                d = cs.get_contact(cont.aminoacid1.number, cont.aminoacid2.number).value
                s = similar_to(d, cont.min_distance, cont.ca_distance, cont.cb_distance, cont.geometric_distance, cont.backbone_distance)
                data.append(s)
                print f, cont.aminoacid1.number, cont.aminoacid2.number, d, cont.min_distance, cont.ca_distance, cont.cb_distance, cont.geometric_distance, cont.backbone_distance, s
        cc = Counter(data)
        print "SUMMARY", f, cc, len(data)

예제 #23

0

파일 보기

    #if case.data['metadata']['binder']: # Binder
    if 'binder' in case.data['metadata']:
        #full_structure = [pdb,]
        binders = []
        for key in case.data['metadata']['binder']:
            binder = case.data['metadata']['binder'][key]
            binders.append(binder)
            binderfile = os.path.dirname(str(pdb_file)) + f'/binder_{key}.pdb'
            binder_chains.extend(
                binder['auth_asym_id'].drop_duplicates().tolist())
            #full_structure.append(binder)
            log.debug(f'Adding binder chains: {binder_chains}')

        bindersfile = os.path.dirname(str(pdb_file)) + f'/binders.pdb'
        log.notice(f'Writing structure {bindersfile}')
        binders = PDB(pd.concat(binders, sort=False))
        binders.write(bindersfile,
                      format='pdb',
                      clean=True,
                      force=TBcore.get_option('system', 'overwrite'))

        full_structure = PDB(
            pd.concat([pdb[columns], binders[columns]], sort=False))
        log.notice(f'Writing structure {full_file}')
        full_structure.write(str(full_file),
                             format='pdb',
                             clean=True,
                             force=TBcore.get_option('system', 'overwrite'))
    #else:
    #    pdb.write(str(pdb_file), format='pdb', clean=True, force=TBcore.get_option('system', 'overwrite'))

예제 #24

0

파일 보기

def main():
    #Initialize
    options = parse_user_arguments()
    verbose = options.show
    pdb_path = os.path.join(config.get('Paths', 'modppi_path'),
                            config.get('Paths', 'pdb_path'))
    dummy_dir = options.dummy_dir
    try:
        did_path = os.path.join(config.get('Paths', 'modppi_path'),
                                config.get('Paths', '3did_path'))
        data_path = os.path.join(config.get('Paths', 'modppi_path'),
                                 config.get('Paths', 'data_path'))
    except:
        did_path = options.outdir
        data_path = options.outdir

    if not os.path.exists(did_path):
        os.makedirs(did_path)
    if not os.path.exists(dummy_dir):
        os.makedirs(dummy_dir)
    if not os.path.exists(data_path):
        sys.stderr.write(
            "No DATA directory, please check your installation or INPUT\n")

    #Parse did flat file
    did = parse_3did(options)

    #Create PDB files of 3DiD interactions
    for dd, cases in did.iteritems():
        for label in xrange(0, len(cases)):
            #Define the name of the PDB output file with domain-domain interactions
            did_file = os.path.join(
                did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk.gz")
            if not os.path.exists(did_file.lower()):
                did_file = os.path.join(
                    did_path, dd[0] + ":" + dd[1] + "#" + str(label) + ".brk")
            if not os.path.exists(did_file.lower()):
                if verbose:
                    sys.stderr.write("\t\t--Create %s\n" % (did_file.lower()))
                pdb_code, d1, d2 = cases[label]
                pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(),
                                        "pdb" + pdb_code + ".ent")
                if not os.path.exists(pdb_file):
                    pdb_file = os.path.join(pdb_path, pdb_code[1:3].lower(),
                                            "pdb" + pdb_code + ".ent.gz")
                if not os.path.exists(pdb_file):
                    if verbose:
                        sys.stderr.write("\t\t\t-- %s not found\n" % pdb_file)
                    continue
                try:
                    pdb = PDB(pdb_file)
                    brk = PDB()
                    pdb_chain_A = pdb.get_chain_by_id(d1[0])
                    start_A = d1[1]
                    end_A = d1[2]
                    pdb_chain_B = pdb.get_chain_by_id(d2[0])
                    start_B = d2[1]
                    end_B = d2[2]
                    brk_chain_A = pdb_chain_A.extract(init=start_A, end=end_A)
                    brk_chain_A.chain = "A"
                    brk.add_chain(brk_chain_A)
                    brk_chain_B = pdb_chain_B.extract(init=start_B, end=end_B)
                    brk_chain_B.chain = "B"
                    brk.add_chain(brk_chain_B)
                    brk.clean()
                    brk.write(did_file.lower())
                except Exception as e:
                    if verbose: sys.stderr.write("\t\t\t  Error: %s\n" % e)
                    continue

    #Create list of interactions and FASTA sequences of 3DiD
    did_interactions = open(os.path.join(data_path, options.interactions_file),
                            "w")
    did_fasta = open(os.path.join(data_path, options.seq_file), "w")
    for brk in os.listdir(did_path):
        if verbose:
            sys.stderr.write("\t\t-- Reading %s  \n" %
                             os.path.join(did_path, brk))
        try:
            pdb = PDB(os.path.join(did_path, brk))
            id_chain = []
            for c in pdb.chain_identifiers:
                pdb_chain = pdb.get_chain_by_id(c)
                id_chain.append(pdb.id + "_" + c)
                printfasta(did_fasta, pdb.id + "_" + c,
                           pdb_chain.gapped_protein_sequence)
            did_interactions.write("%s\t%s\n" % (id_chain[0], id_chain[1]))
        except Exception as e:
            if verbose:
                sys.stderr.write(
                    "\t\t-- %s cannot be read\n\t\t   Error: %s\n" %
                    (os.path.join(did_path, brk), e))
            continue
    did_interactions.close()
    did_fasta.close()

예제 #25

0

파일 보기

파일: functions.py 프로젝트: structuralbioinformatics/MODPIN

def add_hydrogens(config, path, inp, out, dummy_dir):
    #Initialize
    from SBI.structure import PDB
    import shutil
    src_path = config.get('Paths', 'modppi_path')
    hbplus = config.get('Paths', 'hbplus_path')
    reduce_exe = config.get('Paths', 'reduce_path')
    reduce_db = config.get('Paths', 'reduce_db_path')
    relax_exe = config.get('Paths', 'relax_exe')
    hydrogen_type = config.get('Parameters', 'hydrogens')
    relax = config.get('Parameters', 'relax')
    cwd = os.getcwd()
    os.chdir(path)
    if fileExist(inp):
        if len(inp.split('.')) > 0:
            output_hbplus = ".".join(inp.split('.')[:-1]) + ".h"
        else:
            output_hbplus = inp.strip() + ".h"
        if hydrogen_type == "full":
            os.system("%s -Quiet %s -DB %s> %s" %
                      (reduce_exe, inp, reduce_db, output_hbplus))
        else:
            os.system("%s -o %s >& hbplus.log" % (hbplus, inp))
        if relax == "yes":
            sys.stdout.write(
                "\t\t\t-- Relaxing the hydrogen-intermediate model %s (see Rosetta output in relax.log and score.sc)...\n"
                % output_hbplus)
            os.system(
                "%s -s %s -in:file:fullatom -nstruct 1  -packing:repack_only -relax:jump_move false >& relax.log"
                % (relax_exe, output_hbplus))
            opt_model = ".".join(output_hbplus.split('.')[:-1]) + "_0001.pdb"
            old_model = ".".join(
                output_hbplus.split('.')[:-1]) + "_non_optimized.pdb"
            shutil.move(output_hbplus, old_model)
            if fileExist(opt_model):
                check_pdb = PDB(opt_model)
                if check_pdb.has_protein:
                    check_pdb.clean()
                    check_pdb.write(output_hbplus)
                    try:
                        sys.path.remove(opt_model)
                    except:
                        sys.stdout.write("\t\t\t-- Keeping old file %s ...\n" %
                                         opt_model)
                else:
                    shutil.copy(old_model, output_hbplus)
            else:
                shutil.copy(old_model, output_hbplus)
            #clean files
            if fileExist(opt_model):
                if fileExist(os.path.join(dummy_dir, opt_model)):
                    os.remove(opt_model)
                else:
                    shutil.move(opt_model, dummy_dir)
            if fileExist(old_model):
                if fileExist(os.path.join(dummy_dir, old_model)):
                    os.remove(old_model)
                else:
                    shutil.move(old_model, dummy_dir)
        if not fileExist(output_hbplus):
            raise ValueError("Cannot find file with hydrogen atoms")
        else:
            pdb = PDB(output_hbplus)
            pdb.clean()
            pdb.write(out, force=True)
    os.chdir(cwd)

예제 #26

0

파일 보기

파일: structure.py 프로젝트: LPDI-EPFL/topobuilder

def build_pdb_object(
        log: Logger,
        sses: List[Dict],
        loops: Union[List[int], int],
        concat: Optional[bool] = True,
        outfile: Optional[Union[str,
                                Path]] = None) -> Tuple[Frame3D, List[int]]:
    """Make the parametrically build atoms in a :class:`.Case` into a PDB file.

    :param log: Job logger.
    :param sses: List of the secondary structures to build. Each SSE dictionary must contain the
        ``metadata.atoms`` keys, already in the final expected position.
    :param loops: Number of residues between SSE. It can be one less than the number of structures,
        which assumes no N- or C-terminal, or one more, which assumes N- and C-terminal residues.
    :param concat: When :data:`True`, return the full stucture as a single object, otherwise
        return a list of the individual parts.
    :param outfile: If provided, write the structure to file.
    """
    if isinstance(loops, int):
        loops = [
            loops,
        ] * (len(sses) - 1)

    if len(loops) != len(sses) - 1:
        raise ValueError(
            'Number of loops should equal number of SSE minus one.')

    pieces = []
    columns = [
        'auth_comp_id', 'auth_atom_id', 'auth_seq_id', 'Cartn_x', 'Cartn_y',
        'Cartn_z'
    ]
    start = 1 if len(loops) < len(sses) else loops.pop(0)
    log.debug(f'starting numbering with: {start}')
    for i, sse in enumerate(sses):
        start = start if i == 0 else int(
            sses[i - 1]['length']) + loops[i - 1] + start
        pdb_numbering = pd.DataFrame(sse['metadata']['atoms'],
                                     columns=columns)['auth_seq_id'].values
        try:
            structure = PDB(
                pd.DataFrame(sse['metadata']['atoms'],
                             columns=columns)).renumber(start)
        except:
            structure = PDB(
                pd.DataFrame(sse['metadata']['atoms'], columns=columns))
            structure['auth_seq_id'] += (start -
                                         structure['auth_seq_id'].values[0])

        structure = structure.assign(sse_id=[sse["id"]] * len(structure),
                                     pdb_num=pdb_numbering)
        pieces.append(structure)

    structure = pd.concat(pieces, sort=False).reset_index()
    structure['id'] = list(range(1, structure.shape[0] + 1))

    if outfile is not None:
        structure.write(output_file=str(outfile),
                        format='pdb',
                        clean=True,
                        force=TBcore.get_option('system', 'overwrite'))

    if not concat:
        return pieces

    return structure, [int(p.iloc[-1]['auth_seq_id']) for p in pieces]

예제 #27

0

파일 보기

파일: 00c-save_ligand_coords.py 프로젝트: PolyachenkoYA/masif_my

from default_config.masif_opts import masif_opts

in_fields = sys.argv[1].split("_")
pdb_id = in_fields[0]

if not os.path.exists(masif_opts["ligand"]["ligand_coords_dir"]):
    os.mkdir(masif_opts["ligand"]["ligand_coords_dir"])

# Ligands of interest
ligands = ["ADP", "COA", "FAD", "HEM", "NAD", "NAP", "SAM"]

structure_ligands_type = []
structure_ligands_coords = []
try:
    structure = PDB(
        os.path.join(masif_opts["ligand"]["assembly_dir"], "{}.pdb".format(pdb_id))
    )
except:
    print("Problem with opening structure", pdb)
for chain in structure.chains:
    for het in chain.heteroatoms:
        # Check all ligands in structure and save coordinates if they are of interest
        if het.type in ligands:
            structure_ligands_type.append(het.type)
            structure_ligands_coords.append(het.all_coordinates)

np.save(
    os.path.join(
        masif_opts["ligand"]["ligand_coords_dir"], "{}_ligand_types.npy".format(pdb_id)
    ),
    structure_ligands_type,

예제 #28

0

파일 보기

def modelling(queriesA_original,queriesB_original,queriesA, queriesB, hit_items_A, hit_items_B, sections_modeled, remaining_sections_A, remaining_sections_B, options):
    # Initialize
    verbose = options.show
    output_dir = options.outdir
    dummy_dir = options.dummy_dir
    hydrogens = options.hbplus
    force_model =options.force
    python_path = config.get('Paths', 'python_path')
    src_path = config.get('Paths','modppi_path')
    modeller_path = os.path.join(config.get('Paths', 'modeller_path'))
    modpy_path = os.path.join(src_path, config.get('Paths', 'functions_path'),"modpy")
    numMod= options.nmodels
    renumerate = options.renumerate


    # Assign the PID to the dummy modeling and avoid overwritting files 
    
    modelling_dummy_name = 'modelling_' + str(os.getpid()) + str(random.randint(0,os.getpid()))
    #modelling_dummy_name = 'modelling_' + str(os.getpid())
    make_subdirs(dummy_dir, subdirs = [modelling_dummy_name])
    modelling_dir = os.path.join(dummy_dir, modelling_dummy_name)


    # Get items from the hits
    query_A_orig = queriesA_original.get(hit_items_A[0])
    query_B_orig = queriesB_original.get(hit_items_B[0])
    query_A = queriesA.get(hit_items_A[0]).get_sequence()
    query_B = queriesB.get(hit_items_B[0]).get_sequence()
    query_name_A = hit_items_A[0]
    query_name_B = hit_items_B[0]
    query_id_A = query_name_A.split(':')[0]
    query_start = hit_items_A[4][0]
    query_end = int(hit_items_A[4][-1]) + int(hit_items_B[4][-1])
    template_name_A_chain = hit_items_A[1]
    template_name_B_chain = hit_items_B[1]
    template_chain_A_chain = template_name_A_chain.split('_')[-1]
    template_chain_B_chain = template_name_B_chain.split('_')[-1]
    template_A_chain_start = hit_items_A[5][0]
    template_B_chain_start = hit_items_B[5][0]
    template_id_A = "_".join(template_name_A_chain.split('_')[:-1])
    template_id_B = "_".join(template_name_B_chain.split('_')[:-1])
    sequences_complex = {}
    sequences_complex.setdefault("A",query_A_orig)
    sequences_complex.setdefault("B",query_B_orig)

    # Get the positions of the current section
    extension_threshold = int(config.get('Parameters', 'extension_threshold'))
    current_A_section = [hit_items_A[4][0], hit_items_A[4][-1]]
    current_B_section = [hit_items_B[4][0], hit_items_B[4][-1]]
    current_sections = [current_A_section, current_B_section]
    current_interaction = '%s::%s' %(query_name_A, query_name_B)

    # Initialize 'sections_modeled' dictionary
    if not sections_modeled.get(current_interaction):
        section_group = sections_modeled.setdefault(current_interaction, [])
        section_group.append(current_sections)

    # Check if the segments of the current interaction belong to a previous group
    for section_pair in sections_modeled.get(current_interaction):
        # The segments must be within a given interval
        if (section_pair[0][0] - extension_threshold <= current_sections[0][0] <= section_pair[0][0] + extension_threshold and
            section_pair[0][1] - extension_threshold <= current_sections[0][1] <= section_pair[0][1] + extension_threshold and
            section_pair[1][0] - extension_threshold <= current_sections[1][0] <= section_pair[1][0] + extension_threshold and
            section_pair[1][1] - extension_threshold <= current_sections[1][1] <= section_pair[1][1] + extension_threshold):
                current_sections = section_pair
                break
    # If the segments are not within the interval, create a new group
    else:
        section_group = sections_modeled.setdefault(current_interaction, [])
        section_group.append(current_sections)

    # Get the sections that have not been used in the alignment
    query_A_fragment_used = hit_items_A[2].replace('-', '')
    query_B_fragment_used = hit_items_B[2].replace('-', '')
    remaining_terminus_A = query_A.split(query_A_fragment_used)
    remaining_terminus_B = query_B.split(query_B_fragment_used)
    Nterminus_name_A = '%s_1-%s' %(query_name_A, hit_items_A[4][0] - 1)
    Cterminus_name_A = '%s_%s-%s' %(query_name_A, hit_items_A[4][-1] + 1, len(query_A))
    Nterminus_name_B = '%s_1-%s' %(query_name_B, hit_items_B[4][0] - 1)
    Cterminus_name_B = '%s_%s-%s' %(query_name_B, hit_items_B[4][-1] + 1, len(query_B))

    # If there are remaining sections, store them in the dictionary
    if hit_items_A[4][0] > 1:
        remaining_sections_A[Nterminus_name_A] = ProteinSequence(Nterminus_name_A, remaining_terminus_A[0])
    if hit_items_A[4][-1] < len(query_A):
        remaining_sections_A[Cterminus_name_A] = ProteinSequence(Cterminus_name_A, remaining_terminus_A[-1])
    if hit_items_B[4][0] > 1:
        remaining_sections_B[Nterminus_name_B] = ProteinSequence(Nterminus_name_B, remaining_terminus_B[0])
    if hit_items_B[4][-1] < len(query_B):
        remaining_sections_B[Cterminus_name_B] = ProteinSequence(Cterminus_name_B, remaining_terminus_B[-1])

    #Create LOG for tests
    if verbose:
     dummy_log_file="%s/%s.log"%(modelling_dir, template_id_A)
     dummy_log=open(dummy_log_file,"a")

    # Create PDB file
    if verbose: sys.stdout.write('\t\t-- Using templates %s and %s...\n' %(template_name_A_chain, template_name_B_chain))
    pdb_name = template_id_A
    dummy_pdb_file = '%s/%s.pdb' %(modelling_dir, pdb_name.replace(":","-"))
    # Initialize PDB object
    pdb_obj = PDB()


    #Check template in PDB files
    src_path = config.get('Paths','modppi_path')
    pdb_path = os.path.join(src_path, config.get('Paths', 'pdb_path'), template_id_A[1:3].lower())
    pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent')
    if not os.path.exists(pdb_file):
      sys.stderr.write('WARNING: PDB file %s was not found, try compressed\n' %(pdb_file))
      pdb_file = os.path.join(pdb_path, 'pdb' + template_id_A.lower() + '.ent.gz')

    #Check now template in 3DiD files
    if not os.path.exists(pdb_file):
      sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix\n' %(pdb_file))
      pdb_path = os.path.join(src_path, config.get('Paths', '3did_path'))
      pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk')
      if not os.path.exists(pdb_file):
        sys.stderr.write('WARNING: PDB file %s was not found, try 3DiD ".brk" suffix compressed\n' %(pdb_file))
        pdb_file = os.path.join(pdb_path, template_id_A.lower() + '.brk.gz')

    # If the PDB file is not found in the database, skips to the next interaction
    if not os.path.exists(pdb_file):
        sys.stderr.write('WARNING: PDB file %s was not found\n' %(pdb_file))
        raise ModelException
    pdb_chain_obj = PDB(pdb_file)
    pdb_chain_obj.clean()

    # Add only the chains present in the alignment
    pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_A_chain))
    pdb_obj.add_chain(pdb_chain_obj.get_chain_by_id(template_chain_B_chain))

    # Get sequences from PDB, where 'x' are gaps and 'X' are heteroatoms
    pdb_seqA = pdb_obj.chains[0].gapped_protein_sequence.replace('x', '-').replace('X', '.')
    pdb_seqB = pdb_obj.chains[1].gapped_protein_sequence.replace('x', '-').replace('X', '.')

    # Create the dummy PDB file
    pdb_obj.clean()
    pdb_obj.write(output_file = dummy_pdb_file,force=True)

    # Check contacts
    PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type')
    PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold'))
    protein_complex = Complex(pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold)
    # If the proteins don't form a complex, avoids modelling
    if len(protein_complex.PPInterfaces[0].contacts) == 0:
        sys.stderr.write('WARNING: No interaction between %s and %s ( for %s %s)\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B))
        remove_files([dummy_pdb_file])
        raise ModelException
    else:
        if verbose: sys.stdout.write('\t\t\t-- Accepted interaction between %s and %s ( for %s %s)...\n' %(template_name_A_chain, template_name_B_chain, query_name_A, query_name_B))

    # Correct possible discrepancies between the template sequence found in the FASTA file of the nodes in the PIN and the sequence found in the PDB file
    # e.g. The sequence of a protein can have an 'X' in the FASTA file and an 'M' in the newest version of the PDB file
    template_seqA = hit_items_A[3]
    template_seqA_ungapped = re.sub('-', '', template_seqA)
    pdbA_section = pdb_seqA[hit_items_A[5][0]-1:hit_items_A[5][-1]]
    for pair in itertools.izip(template_seqA_ungapped, pdbA_section):
        if pair[0] == 'X' or pair[0] == 'x':
            template_seqA = re.sub('[xX]', pair[1], template_seqA, 1)
    template_seqB = hit_items_B[3]
    template_seqB_ungapped = re.sub('-', '', template_seqB)
    pdbB_section = pdb_seqB[hit_items_B[5][0]-1:hit_items_B[5][-1]]
    for pair in itertools.izip(template_seqB_ungapped, pdbB_section):
        if pair[0] == 'X' or pair[0] == 'x':
            template_seqB = re.sub('[xX]', pair[1], template_seqB, 1)

    if verbose: dummy_log.write("Hits_items_A: %s\n"%([str(x) for x in hit_items_A]))
    if verbose: dummy_log.write("Hits_items_B: %s\n"%([str(x) for x in hit_items_B]))
    if verbose: dummy_log.write("pdbA_section %s\n"%pdbA_section)
    if verbose: dummy_log.write("pdbB_section %s\n"%pdbB_section)
    if verbose: dummy_log.write("length PDB A: %d\n"%len(pdb_seqA))
    if verbose: dummy_log.write("length PDB B: %d\n"%len(pdb_seqB))

    # Add the remaining residues at the beginning or at the end of the template sequences, if needed
    template_seqA = re.sub('[xX]', '-', template_seqA)
    if template_A_chain_start > 1:
        template_A_first_residues = ''.join(pdb_seqA[:hit_items_A[5][0]-1])
        template_seqA = template_A_first_residues + template_seqA
    if hit_items_A[5][-1] < len(pdb_seqA):
        template_seqA += ''.join(pdb_seqA[hit_items_A[5][-1]:])
    template_seqB = re.sub('[xX]', '-', template_seqB)
    if template_B_chain_start > 1:
        template_B_first_residues = ''.join(pdb_seqB[:hit_items_B[5][0]-1])
        template_seqB = template_B_first_residues + template_seqB
    if hit_items_B[5][-1] < len(pdb_seqB):
        template_seqB += ''.join(pdb_seqB[hit_items_B[5][-1]:])


    if verbose: dummy_log.write("FINAL template_seqA %s\n"%template_seqA)
    if verbose: dummy_log.write("FINAL template_seqB %s\n"%template_seqB)

    # Add gaps at the beginning of the query sequences, if needed
    gaps_number_A_chain_beginning = 0
    gaps_number_B_chain_beginning = 0
    if template_A_chain_start > 1:
        gaps_number_A_chain_beginning = int(template_A_chain_start) - 1
    if template_B_chain_start > 1:
        gaps_number_B_chain_beginning = int(template_B_chain_start) - 1
    A_chain_query_seq = ''.join(['-' for i in range(gaps_number_A_chain_beginning)]) + re.sub('[xX]', '-', hit_items_A[2])
    B_chain_query_seq = ''.join(['-' for i in range(gaps_number_B_chain_beginning)]) + re.sub('[xX]', '-', hit_items_B[2])

    # Add gaps at the end of the query sequences, if needed
    for pair in itertools.izip_longest(A_chain_query_seq, template_seqA):
        if pair[0] == None:
            A_chain_query_seq += '-'
    for pair in itertools.izip_longest(B_chain_query_seq, template_seqB):
        if pair[0] == None:
            B_chain_query_seq += '-'


    # Create PIR alignment
    query_whole_seq = A_chain_query_seq + '/' + B_chain_query_seq + '*'
    template_whole_seq = template_seqA + '/' + template_seqB + '*'

    header1 = '>P1;%s\nsequence:%s:%s:.:%s:.:.:.:.:.' %(query_id_A, query_id_A, query_start, query_end)
    header2 = '>P1;%s\nstructureX:%s:1:%s:.:%s:.:.:.:.' %(template_id_A.replace(":","-"), template_id_A.replace(":","-"), template_chain_A_chain, template_chain_B_chain)

    lines = []
    lines.append(header1)
    lines.extend([query_whole_seq[i:i+60] for i in range(0, len(query_whole_seq), 60)])
    lines.append(header2)
    lines.extend([template_whole_seq[i:i+60] for i in range(0, len(template_whole_seq), 60)])

    pir_alignment = '\n'.join(lines)
    pir_file = open('%s/alignment.pir' %(modelling_dir), 'w+')
    for line in lines:
        pir_file.write('%s\n' %(line))
    pir_file.close()


    # Model
    # Create a folder for the models of each type of interaction
    if '-' in query_name_A:
        query_name_A = query_name_A.rsplit('_', 1)[0]
    if '-' in query_name_B:
        query_name_B = query_name_B.rsplit('_', 1)[0]
    interaction_dir = os.path.join(output_dir , '%s::%s' %(query_name_A, query_name_B))
    if not os.path.exists(interaction_dir):
        make_subdirs(output_dir, subdirs = ['./%s::%s' %(query_name_A, query_name_B)])

    # If the models do not yet exist, proceed and add in the list of MODELS
    do_model=False
    model_path = os.path.abspath(interaction_dir)
    for imodel in xrange(1,numMod+1):
     model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel)
     model_path_model = os.path.join(model_path , model_name)
     #print "CHECK %s %s\n"%(do_model,model_path_model)
     with open(interaction_dir + '/%s.list' %(current_interaction), 'a+') as paths_to_models_file:
        if model_path_model not in paths_to_models_file.read(): paths_to_models_file.write(model_path_model + '\n')
     if not do_model and not fileExist( model_path_model ): do_model=True

    #Complete the set of models
    if do_model or force_model:
        # Keep the current working directory, then change to the modelling folder
        cwd = os.getcwd()
        os.chdir(modelling_dir)
        try:
         if options.optimize:
            process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod), '--optimize'], stderr = subprocess.STDOUT)
         else:
            process = subprocess.check_output([os.path.join(modeller_path, 'modpy.sh'), os.path.join(python_path, 'python'), os.path.join(modpy_path, 'simpleModel.py'), '--pir=' + './alignment.pir', '--out=%s-%s' %(template_name_A_chain, template_name_B_chain), '--models=%d'%(numMod)], stderr = subprocess.STDOUT)
        except Exception as e:
         sys.stderr.write("ERROR: %s\n"%(e))
         sys.stderr.write("LOCATION; %s\n"%modelling_dir)
         if verbose: os.system("grep get_ran %s"%(template_name_A_chain+"-"+template_name_B_chain+".log"))
         if verbose: sys.stderr.write("\t\tSkip models with template %s\n"%(model_name))
         os.chdir(cwd)
         raise ModelException
        # Clean and rename all models
        for imodel in xrange(1,numMod+1):
          label_model=99990000+imodel
          input_model = '%s.B%s.pdb' %(query_id_A,str(label_model))
          model_name = '%s_%s_%d-%d::%s_%s_%d-%d#%d.pdb' %(template_id_A, template_chain_A_chain, current_sections[0][0], current_sections[0][1], template_id_B, template_chain_B_chain, current_sections[1][0], current_sections[1][1],imodel)
          model_path_model = os.path.join(model_path , model_name)
          if fileExist(os.path.abspath('%s' %(input_model))):
            # Check contacts
            check_pdb_obj=PDB(os.path.abspath('%s' %(input_model)))
            PPI_threshold_type = config.get('Parameters', 'PPI_threshold_type')
            PPI_distance_threshold = float(config.get('Parameters', 'PPI_distance_threshold'))
            check_protein_complex = Complex(check_pdb_obj, PPI_type = PPI_threshold_type, PPI_distance = PPI_distance_threshold)
            if len(check_protein_complex.PPInterfaces[0].contacts) == 0:
              if verbose: sys.stdout.write("\t\t\t-- Skip model without contacts %s\n"%model_name)
              continue
            else:
              if verbose: sys.stdout.write("\t\t\t-- Accepted model %s\n"%model_name)
            if hydrogens:
              if verbose: sys.stdout.write("\t\t\t-- Adding hydrogens and relaxing the model %s\n"%model_name)
              output_model=model_name
              try:
               add_hydrogens(config,os.path.abspath("./"),input_model, output_model,dummy_dir)
              except ValueError as e:
               sys.stderr.write("WARNING %s\n"%e)
               os.rename(input_model, output_model)
            else:
              output_model=model_name
              os.rename(input_model, output_model)
            if renumerate:
              if verbose: sys.stdout.write("\t\t\t-- Renumerate residues as original sequence\n")
              output_model_renumber=model_name+".re"
              try:
                pdb_renumber=PDB()
                pdb_renumber=renumber_pdb(config,os.path.abspath("./"),output_model,sequences_complex,os.path.abspath("./"))
                pdb_renumber.write(output_model_renumber)
                os.rename(output_model_renumber,output_model)
              except Exception as e:
                sys.stderr.write("WARNING %s\n"%e)
            shutil.copy(output_model, model_path_model)
        os.chdir(cwd)
    try:
     shutil.rmtree(modelling_dir)
    except Exception as e:
     sys.stderr.write("WARNING first attempt to remove folder %s\n"%e)
     try:
       os.system("\\rm -r %s"%(modelling_dir))
     except Exception as ee:
       sys.stderr.write("WARNING last attempt %s\n"%ee)
      
      

    return sections_modeled, remaining_sections_A, remaining_sections_B