예제 #1
0
파일: oligomer.py 프로젝트: le-yuan/ssbio
def write_merged_bioassembly(inpath, outdir, outname, force_rerun=False):
    """Utility to take as input a bioassembly file and merge all its models into multiple chains in a single model.

    Args:
        infile (str): Path to input PDB file with multiple models that represent an oligomeric form of a structure.
        outdir (str): Path to output directory
        outname (str): New filename of structure file
        force_rerun (bool): If a new PDB should be written if the file exists

    Returns:
        str: Path to newly written PDB file.

    """
    outpath = outfile = op.join(outdir, outname + '.pdb')

    if ssbio.utils.force_rerun(flag=force_rerun,
                               outfile=op.join(outdir, outname + '.pdb')):
        s = StructProp('Model merging', structure_path=inpath, file_type='pdb')
        ss = s.parse_structure()
        merge_all_models_into_first_model(ss.structure)
        outpath = ss.write_pdb(custom_name=outname,
                               out_dir=outdir,
                               force_rerun=force_rerun)
    else:
        return outpath
예제 #2
0
 def load_structure_path(self, structure_path, file_type='pdb'):
     StructProp.load_structure_path(self,
                                    structure_path=structure_path,
                                    file_type=file_type)
     # Additionally save model creation date
     self.model_date = time.strftime(
         '%Y-%m-%d', time.gmtime(os.path.getmtime(self.structure_path)))
예제 #3
0
def get_metalpdb_info(metalpdb_lig_file):
    """Parse a MetalPDB .lig file and return a tuple of the chain ID it represents, along with metal binding information.

    Args:
        metalpdb_lig_file (str): Path to .lig file

    Returns:
        tuple: (str, dict) of the chain ID and the parsed metal binding site information

    """

    pdb_metals = [
        'CU', 'ZN', 'MN', 'FE', 'MG', 'CO', 'SE', 'YB', 'SF4', 'FES', 'F3S',
        'NI', 'FE2'
    ]

    # Information to collect
    coordination_number = 0
    endogenous_ligands = []
    exogenous_ligands = []

    # Load the structure
    ss = StructProp(ident='metalpdb',
                    structure_path=metalpdb_lig_file,
                    file_type='pdb')

    # This lig file should just be for one chain
    chain_id = op.basename(metalpdb_lig_file)[5]
    metal_id = (op.basename(metalpdb_lig_file).split('_')[2],
                op.basename(metalpdb_lig_file).split('_')[3])

    for r in ss.parse_structure().first_model.get_residues():
        return_id = (r.get_id(), r.get_resname())
        #         print(r.resname)
        # Binding partners
        ## check if residue is a normal one (not a HETATM, WAT, or the metal that is identified)
        if r.get_id()[0] != ' ':
            if not r.resname.strip() in pdb_metals and r.resname != 'HOH':
                #                 print('appended', r.resname)
                exogenous_ligands.append(return_id)
        else:
            endogenous_ligands.append(return_id)

        # Coordination number
        for a in r.get_atom():
            if not a.element in pdb_metals:
                coordination_number += 1

    infodict = {
        metal_id: {
            'endogenous_ligands': endogenous_ligands,
            'exogenous_ligands': exogenous_ligands,
            'coordination_number': coordination_number
        }
    }

    return chain_id, infodict
예제 #4
0
파일: oligomer.py 프로젝트: le-yuan/ssbio
 def __init__(self,
              ident,
              description=None,
              is_experimental=False,
              structure_path=None,
              file_type=None):
     StructProp.__init__(self,
                         ident=ident,
                         description=description,
                         chains=None,
                         mapped_chains=None,
                         is_experimental=is_experimental,
                         structure_path=structure_path,
                         file_type=file_type)
     self.subunits = None
     """dict: Subunit composition defined as ``{chain_id: number_of_times_used_in_bioassembly}``"""
예제 #5
0
    def get_dict(self,
                 only_attributes=None,
                 exclude_attributes=None,
                 df_format=False):
        """Summarize the I-TASSER run in a dictionary containing modeling results and top predictions from COACH

        Args:
            only_attributes (str, list): Attributes that should be returned. If not provided, all are returned.
            exclude_attributes (str, list): Attributes that should be excluded.
            df_format (bool): If dictionary values should be formatted for a dataframe
                (everything possible is transformed into strings, int, or float -
                if something can't be transformed it is excluded)

        Returns:
            dict: Dictionary of attributes

        """

        to_exclude = [
            'coach_bsites', 'coach_ec', 'coach_go_mf', 'coach_go_bp',
            'coach_go_cc'
        ]
        if not exclude_attributes:
            excluder = to_exclude
        else:
            excluder = ssbio.utils.force_list(exclude_attributes)
            excluder.extend(to_exclude)

        summary_dict = StructProp.get_dict(self,
                                           only_attributes=only_attributes,
                                           exclude_attributes=excluder,
                                           df_format=df_format)

        if self.coach_bsites:
            tmp = {
                'top_bsite_' + k: v
                for k, v in self.coach_bsites[0].items()
            }
            summary_dict.update(tmp)

        if self.coach_ec:
            tmp = {'top_ec_' + k: v for k, v in self.coach_ec[0].items()}
            summary_dict.update(tmp)

        if self.coach_go_mf:
            tmp = {'top_go_mf_' + k: v for k, v in self.coach_go_mf[0].items()}
            summary_dict.update(tmp)

        if self.coach_go_bp:
            tmp = {'top_go_bp_' + k: v for k, v in self.coach_go_bp[0].items()}
            summary_dict.update(tmp)

        if self.coach_go_cc:
            tmp = {'top_go_cc_' + k: v for k, v in self.coach_go_cc[0].items()}
            summary_dict.update(tmp)

        return summary_dict
예제 #6
0
 def __init__(self,
              ident,
              description=None,
              chains=None,
              mapped_chains=None,
              structure_path=None,
              file_type=None):
     StructProp.__init__(self,
                         ident,
                         description=description,
                         chains=chains,
                         mapped_chains=mapped_chains,
                         is_experimental=True,
                         structure_path=structure_path,
                         file_type=file_type)
     self.experimental_method = None
     self.resolution = None
     self.date = None
     self.taxonomy_name = None
예제 #7
0
 def __init__(self,
              ident,
              description=None,
              chains=None,
              mapped_chains=None,
              structure_path=None,
              file_type=None):
     StructProp.__init__(self,
                         ident,
                         description=description,
                         chains=chains,
                         mapped_chains=mapped_chains,
                         is_experimental=True,
                         structure_path=structure_path,
                         file_type=file_type)
     self.experimental_method = None
     self.resolution = None
     self.date = None
     self.taxonomy_name = None
     self.biological_assemblies = DictList()
     """DictList: A list for storing Bioassembly objects related to this PDB ID"""
예제 #8
0
    def __init__(self, ident, original_results_path, coach_results_folder='model1/coach', model_to_use='model1'):

        if not op.exists(original_results_path):
            raise OSError('{}: folder does not exist'.format(original_results_path))

        StructProp.__init__(self, ident, is_experimental=False)

        self._attrs_to_copy = []

        self.original_results_path = original_results_path
        self.model_to_use = model_to_use

        ### MODEL
        self.model_date = None
        original_model_path = op.join(original_results_path, '{}.pdb'.format(model_to_use))
        if not op.exists(original_model_path):
            raise IOError('{}: no homology model generated'.format(original_model_path))
        else:
            self.load_structure_path(structure_path=original_model_path, file_type='pdb')

        ### MODELING RESULTS
        self.difficulty = None
        self.top_template_pdb = None
        self.top_template_chain = None

        # Parse init.dat
        self._init_dat_path = op.join(original_results_path, 'init.dat')
        if op.exists(self._init_dat_path):
            self.update(parse_init_dat(self._init_dat_path))

        # Parse seq.dat
        self._seq_dat_path = op.join(original_results_path, 'seq.dat')
        if op.exists(self._seq_dat_path):
            self._attrs_to_copy.append('_seq_dat_path')
            # TODO: parse seq.dat and store in modeling_results
            self.update(parse_seq_dat(self._seq_dat_path))

        # Parse exp.dat
        self._exp_dat_path = op.join(original_results_path, 'exp.dat')
        if op.exists(self._exp_dat_path):
            self._attrs_to_copy.append('_exp_dat_path')
            # TODO: parse exp.dat and store in modeling_results
            self.update(parse_exp_dat(self._exp_dat_path))

        # Parse BFP.dat
        self._bfp_dat_path = op.join(original_results_path, 'BFP.dat')
        if op.exists(self._bfp_dat_path):
            self._attrs_to_copy.append('_bfp_dat_path')
            # TODO: parse BFP.dat and store in modeling_results
            self.update(parse_bfp_dat(self._bfp_dat_path))

        # Parse cscore
        self.c_score = None
        self.tm_score = None
        self.tm_score_err = None
        self.rmsd = None
        self.rmsd_err = None
        self._cscore_path = op.join(original_results_path, 'cscore')
        if op.exists(self._cscore_path):
            self._attrs_to_copy.append('_cscore_path')
            self.update(parse_cscore(self._cscore_path))


        ### COACH RESULTS
        self.coach_bsites = []
        self.coach_ec = []
        self.coach_go_mf = []
        self.coach_go_bp = []
        self.coach_go_cc = []

        coach_folder = op.join(original_results_path, coach_results_folder)
        self._coach_bsites_inf_path = op.join(coach_folder, 'Bsites.inf')
        self._coach_ec_dat_path = op.join(coach_folder, 'EC.dat')
        self._coach_go_mf_dat_path = op.join(coach_folder, 'GO_MF.dat')
        self._coach_go_bp_dat_path = op.join(coach_folder, 'GO_BP.dat')
        self._coach_go_cc_dat_path = op.join(coach_folder, 'GO_CC.dat')

        if op.isdir(coach_folder):
            # Parse Bsites.inf
            if op.exists(self._coach_bsites_inf_path):
                parsed_bsites = parse_coach_bsites_inf(self._coach_bsites_inf_path)
                if parsed_bsites:
                    self._attrs_to_copy.append('_coach_bsites_inf_path')
                    self.coach_bsites = parsed_bsites

            # Parse EC.dat
            if op.exists(self._coach_ec_dat_path):
                parsed_ec = parse_coach_ec(self._coach_ec_dat_path)
                if parsed_ec:
                    self._attrs_to_copy.append('_coach_ec_dat_path')
                    self.coach_ec = parsed_ec

            # Parse GO_MF.dat
            if op.exists(self._coach_go_mf_dat_path):
                parsed_go_mf = parse_coach_go(self._coach_go_mf_dat_path)
                if parsed_go_mf:
                    self._attrs_to_copy.append('_coach_go_mf_dat_path')
                    self.coach_go_mf = parsed_go_mf

            # Parse GO_BP.dat
            if op.exists(self._coach_go_bp_dat_path):
                parsed_go_bp = parse_coach_go(self._coach_go_bp_dat_path)
                if parsed_go_bp:
                    self._attrs_to_copy.append('_coach_go_bp_dat_path')
                    self.coach_go_bp = parsed_go_bp

            # Parse GO_CC.dat
            if op.exists(self._coach_go_cc_dat_path):
                parsed_go_cc = parse_coach_go(self._coach_go_cc_dat_path)
                if parsed_go_cc:
                    self._attrs_to_copy.append('_coach_go_cc_dat_path')
                    self.coach_go_cc = parsed_go_cc