def write_merged_bioassembly(inpath, outdir, outname, force_rerun=False): """Utility to take as input a bioassembly file and merge all its models into multiple chains in a single model. Args: infile (str): Path to input PDB file with multiple models that represent an oligomeric form of a structure. outdir (str): Path to output directory outname (str): New filename of structure file force_rerun (bool): If a new PDB should be written if the file exists Returns: str: Path to newly written PDB file. """ outpath = outfile = op.join(outdir, outname + '.pdb') if ssbio.utils.force_rerun(flag=force_rerun, outfile=op.join(outdir, outname + '.pdb')): s = StructProp('Model merging', structure_path=inpath, file_type='pdb') ss = s.parse_structure() merge_all_models_into_first_model(ss.structure) outpath = ss.write_pdb(custom_name=outname, out_dir=outdir, force_rerun=force_rerun) else: return outpath
def load_structure_path(self, structure_path, file_type='pdb'): StructProp.load_structure_path(self, structure_path=structure_path, file_type=file_type) # Additionally save model creation date self.model_date = time.strftime( '%Y-%m-%d', time.gmtime(os.path.getmtime(self.structure_path)))
def get_metalpdb_info(metalpdb_lig_file): """Parse a MetalPDB .lig file and return a tuple of the chain ID it represents, along with metal binding information. Args: metalpdb_lig_file (str): Path to .lig file Returns: tuple: (str, dict) of the chain ID and the parsed metal binding site information """ pdb_metals = [ 'CU', 'ZN', 'MN', 'FE', 'MG', 'CO', 'SE', 'YB', 'SF4', 'FES', 'F3S', 'NI', 'FE2' ] # Information to collect coordination_number = 0 endogenous_ligands = [] exogenous_ligands = [] # Load the structure ss = StructProp(ident='metalpdb', structure_path=metalpdb_lig_file, file_type='pdb') # This lig file should just be for one chain chain_id = op.basename(metalpdb_lig_file)[5] metal_id = (op.basename(metalpdb_lig_file).split('_')[2], op.basename(metalpdb_lig_file).split('_')[3]) for r in ss.parse_structure().first_model.get_residues(): return_id = (r.get_id(), r.get_resname()) # print(r.resname) # Binding partners ## check if residue is a normal one (not a HETATM, WAT, or the metal that is identified) if r.get_id()[0] != ' ': if not r.resname.strip() in pdb_metals and r.resname != 'HOH': # print('appended', r.resname) exogenous_ligands.append(return_id) else: endogenous_ligands.append(return_id) # Coordination number for a in r.get_atom(): if not a.element in pdb_metals: coordination_number += 1 infodict = { metal_id: { 'endogenous_ligands': endogenous_ligands, 'exogenous_ligands': exogenous_ligands, 'coordination_number': coordination_number } } return chain_id, infodict
def __init__(self, ident, description=None, is_experimental=False, structure_path=None, file_type=None): StructProp.__init__(self, ident=ident, description=description, chains=None, mapped_chains=None, is_experimental=is_experimental, structure_path=structure_path, file_type=file_type) self.subunits = None """dict: Subunit composition defined as ``{chain_id: number_of_times_used_in_bioassembly}``"""
def get_dict(self, only_attributes=None, exclude_attributes=None, df_format=False): """Summarize the I-TASSER run in a dictionary containing modeling results and top predictions from COACH Args: only_attributes (str, list): Attributes that should be returned. If not provided, all are returned. exclude_attributes (str, list): Attributes that should be excluded. df_format (bool): If dictionary values should be formatted for a dataframe (everything possible is transformed into strings, int, or float - if something can't be transformed it is excluded) Returns: dict: Dictionary of attributes """ to_exclude = [ 'coach_bsites', 'coach_ec', 'coach_go_mf', 'coach_go_bp', 'coach_go_cc' ] if not exclude_attributes: excluder = to_exclude else: excluder = ssbio.utils.force_list(exclude_attributes) excluder.extend(to_exclude) summary_dict = StructProp.get_dict(self, only_attributes=only_attributes, exclude_attributes=excluder, df_format=df_format) if self.coach_bsites: tmp = { 'top_bsite_' + k: v for k, v in self.coach_bsites[0].items() } summary_dict.update(tmp) if self.coach_ec: tmp = {'top_ec_' + k: v for k, v in self.coach_ec[0].items()} summary_dict.update(tmp) if self.coach_go_mf: tmp = {'top_go_mf_' + k: v for k, v in self.coach_go_mf[0].items()} summary_dict.update(tmp) if self.coach_go_bp: tmp = {'top_go_bp_' + k: v for k, v in self.coach_go_bp[0].items()} summary_dict.update(tmp) if self.coach_go_cc: tmp = {'top_go_cc_' + k: v for k, v in self.coach_go_cc[0].items()} summary_dict.update(tmp) return summary_dict
def __init__(self, ident, description=None, chains=None, mapped_chains=None, structure_path=None, file_type=None): StructProp.__init__(self, ident, description=description, chains=chains, mapped_chains=mapped_chains, is_experimental=True, structure_path=structure_path, file_type=file_type) self.experimental_method = None self.resolution = None self.date = None self.taxonomy_name = None
def __init__(self, ident, description=None, chains=None, mapped_chains=None, structure_path=None, file_type=None): StructProp.__init__(self, ident, description=description, chains=chains, mapped_chains=mapped_chains, is_experimental=True, structure_path=structure_path, file_type=file_type) self.experimental_method = None self.resolution = None self.date = None self.taxonomy_name = None self.biological_assemblies = DictList() """DictList: A list for storing Bioassembly objects related to this PDB ID"""
def __init__(self, ident, original_results_path, coach_results_folder='model1/coach', model_to_use='model1'): if not op.exists(original_results_path): raise OSError('{}: folder does not exist'.format(original_results_path)) StructProp.__init__(self, ident, is_experimental=False) self._attrs_to_copy = [] self.original_results_path = original_results_path self.model_to_use = model_to_use ### MODEL self.model_date = None original_model_path = op.join(original_results_path, '{}.pdb'.format(model_to_use)) if not op.exists(original_model_path): raise IOError('{}: no homology model generated'.format(original_model_path)) else: self.load_structure_path(structure_path=original_model_path, file_type='pdb') ### MODELING RESULTS self.difficulty = None self.top_template_pdb = None self.top_template_chain = None # Parse init.dat self._init_dat_path = op.join(original_results_path, 'init.dat') if op.exists(self._init_dat_path): self.update(parse_init_dat(self._init_dat_path)) # Parse seq.dat self._seq_dat_path = op.join(original_results_path, 'seq.dat') if op.exists(self._seq_dat_path): self._attrs_to_copy.append('_seq_dat_path') # TODO: parse seq.dat and store in modeling_results self.update(parse_seq_dat(self._seq_dat_path)) # Parse exp.dat self._exp_dat_path = op.join(original_results_path, 'exp.dat') if op.exists(self._exp_dat_path): self._attrs_to_copy.append('_exp_dat_path') # TODO: parse exp.dat and store in modeling_results self.update(parse_exp_dat(self._exp_dat_path)) # Parse BFP.dat self._bfp_dat_path = op.join(original_results_path, 'BFP.dat') if op.exists(self._bfp_dat_path): self._attrs_to_copy.append('_bfp_dat_path') # TODO: parse BFP.dat and store in modeling_results self.update(parse_bfp_dat(self._bfp_dat_path)) # Parse cscore self.c_score = None self.tm_score = None self.tm_score_err = None self.rmsd = None self.rmsd_err = None self._cscore_path = op.join(original_results_path, 'cscore') if op.exists(self._cscore_path): self._attrs_to_copy.append('_cscore_path') self.update(parse_cscore(self._cscore_path)) ### COACH RESULTS self.coach_bsites = [] self.coach_ec = [] self.coach_go_mf = [] self.coach_go_bp = [] self.coach_go_cc = [] coach_folder = op.join(original_results_path, coach_results_folder) self._coach_bsites_inf_path = op.join(coach_folder, 'Bsites.inf') self._coach_ec_dat_path = op.join(coach_folder, 'EC.dat') self._coach_go_mf_dat_path = op.join(coach_folder, 'GO_MF.dat') self._coach_go_bp_dat_path = op.join(coach_folder, 'GO_BP.dat') self._coach_go_cc_dat_path = op.join(coach_folder, 'GO_CC.dat') if op.isdir(coach_folder): # Parse Bsites.inf if op.exists(self._coach_bsites_inf_path): parsed_bsites = parse_coach_bsites_inf(self._coach_bsites_inf_path) if parsed_bsites: self._attrs_to_copy.append('_coach_bsites_inf_path') self.coach_bsites = parsed_bsites # Parse EC.dat if op.exists(self._coach_ec_dat_path): parsed_ec = parse_coach_ec(self._coach_ec_dat_path) if parsed_ec: self._attrs_to_copy.append('_coach_ec_dat_path') self.coach_ec = parsed_ec # Parse GO_MF.dat if op.exists(self._coach_go_mf_dat_path): parsed_go_mf = parse_coach_go(self._coach_go_mf_dat_path) if parsed_go_mf: self._attrs_to_copy.append('_coach_go_mf_dat_path') self.coach_go_mf = parsed_go_mf # Parse GO_BP.dat if op.exists(self._coach_go_bp_dat_path): parsed_go_bp = parse_coach_go(self._coach_go_bp_dat_path) if parsed_go_bp: self._attrs_to_copy.append('_coach_go_bp_dat_path') self.coach_go_bp = parsed_go_bp # Parse GO_CC.dat if op.exists(self._coach_go_cc_dat_path): parsed_go_cc = parse_coach_go(self._coach_go_cc_dat_path) if parsed_go_cc: self._attrs_to_copy.append('_coach_go_cc_dat_path') self.coach_go_cc = parsed_go_cc