def _try_as_cif(self): # XXX hack to avoid choking on CCP4 maps and images file_ext = os.path.splitext(self.file_name)[1] assert (not file_ext in [".ccp4", ".img", ".osc", ".mccd"]) import iotbx.cif from iotbx.reflection_file_reader import any_reflection_file cif_file = any_reflection_file(str(self.file_name)) if cif_file.file_type() is not None: self._file_object = cif_file self._file_type = "hkl" else: from iotbx.pdb.mmcif import cif_input from iotbx.pdb.hierarchy import input_hierarchy_pair try: cif_in = cif_input(file_name=self.file_name) self._file_object = input_hierarchy_pair( cif_in, cif_in.hierarchy) self._file_type = "pdb" except Exception as e: if (str(e).startswith("Space group is incompatible") or str(e).startswith("The space group")): raise else: self._file_object = iotbx.cif.reader( file_path=self.file_name, strict=False) self._file_type = "cif"
def old_run(args, out=None): import iotbx.phil if (out is None): out = sys.stdout cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="input.pdb_file", seq_file_def="input.seq_file") params = cmdline.work.extract() cmdline.work.show() if params.output.cif_file is None: params.output.cif_file = os.path.splitext( params.input.pdb_file)[0] + ".deposit.cif" model_vs_sequence.validate_params(params) pdb_input = mmcif.cif_input(file_name=params.input.pdb_file) pdb_hierarchy = pdb_input.construct_hierarchy() cif_model = pdb_input.cif_model cif_block = pdb_input.cif_block seq_in = any_file(params.input.seq_file, force_type="seq") seq_in.check_file_type("seq") sequences = seq_in.file_object cif_block = pdb_hierarchy.as_cif_block_with_sequence( sequences, crystal_symmetry=pdb_input.crystal_symmetry(), alignment_params=params) block_name = cif_model.keys()[0] def float_or_none(string): try: return float(string) except TypeError: return None d_min_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_high')) d_max_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_low')) # XXX maybe the values from the CIF (i.e. those actually used in the refinement) # should override the input params? if params.high_resolution is not None: params.high_resolution = d_min_from_cif if params.low_resolution is not None: params.low_resolution = d_max_from_cif if params.input.unmerged_data is not None: from mmtbx.command_line import cc_star result = cc_star.run(params=params.input, out=out) cif_block.update(result.as_cif_block()) cif_model[block_name].update(cif_block) cif_model[block_name].sort(key=category_sort_function) print >> out, "Writing updated CIF file:" print >> out, " " + params.output.cif_file with open(params.output.cif_file, "wb") as f: print >> f, cif_model return
def run(args, out=None): import iotbx.phil if (out is None) : out = sys.stdout cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="input.pdb_file", seq_file_def="input.seq_file" ) params = cmdline.work.extract() cmdline.work.show() if params.output.cif_file is None: params.output.cif_file = os.path.splitext(params.input.pdb_file)[0] + ".deposit.cif" model_vs_sequence.validate_params(params) pdb_input = mmcif.cif_input(file_name=params.input.pdb_file) pdb_hierarchy = pdb_input.construct_hierarchy() cif_model = pdb_input.cif_model cif_block = pdb_input.cif_block seq_in = any_file(params.input.seq_file, force_type="seq") seq_in.check_file_type("seq") sequences = seq_in.file_object cif_block = pdb_hierarchy.as_cif_block_with_sequence( sequences, crystal_symmetry=pdb_input.crystal_symmetry(), alignment_params=params) block_name = cif_model.keys()[0] def float_or_none(string): try: return float(string) except TypeError: return None d_min_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_high')) d_max_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_low')) # XXX maybe the values from the CIF (i.e. those actually used in the refinement) # should override the input params? if params.high_resolution is not None: params.high_resolution = d_min_from_cif if params.low_resolution is not None: params.low_resolution = d_max_from_cif if params.input.unmerged_data is not None: from mmtbx.command_line import cc_star result = cc_star.run(params=params.input, out=out) cif_block.update(result.as_cif_block()) cif_model[block_name].update(cif_block) cif_model[block_name].sort(key=category_sort_function) print >> out, "Writing updated CIF file:" print >> out, " " + params.output.cif_file with open(params.output.cif_file, "wb") as f: print >> f, cif_model return
def exercise_extract_header_misc(): cif_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/3orl.cif", test=os.path.isfile) if (cif_file is None): return cif_in = mmcif.cif_input(file_name=cif_file) assert (cif_in.file_type() == "mmcif") wavelength = cif_in.extract_wavelength() assert (approx_equal(wavelength, 1.8927)) exptl_method = cif_in.get_experiment_type() assert (exptl_method == 'X-RAY DIFFRACTION') r_rfree_sigma = cif_in.get_r_rfree_sigma(cif_file) r_rfree_sigma.show()
def exercise_extract_header_misc () : cif_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/3orl.cif", test=os.path.isfile) if (cif_file is None) : return cif_in = mmcif.cif_input(file_name=cif_file) assert (cif_in.file_type() == "mmcif") wavelength = cif_in.extract_wavelength() assert (approx_equal(wavelength, 1.8927)) exptl_method = cif_in.get_experiment_type() assert (exptl_method == 'X-RAY DIFFRACTION') r_rfree_sigma = cif_in.get_r_rfree_sigma(cif_file) r_rfree_sigma.show()
def _try_as_cif (self) : # XXX hack to avoid choking on CCP4 maps and images file_ext = os.path.splitext(self.file_name)[1] assert (not file_ext in [".ccp4", ".img", ".osc", ".mccd"]) import iotbx.cif from iotbx.reflection_file_reader import any_reflection_file cif_file = any_reflection_file(str(self.file_name)) if cif_file.file_type() is not None: self._file_object = cif_file self._file_type = "hkl" else: from iotbx.pdb.mmcif import cif_input from iotbx.pdb.hierarchy import input_hierarchy_pair try: cif_in = cif_input(file_name=self.file_name) self._file_object = input_hierarchy_pair(cif_in, cif_in.hierarchy) self._file_type = "pdb" except Exception, e: self._file_object = iotbx.cif.reader(file_path=self.file_name, strict=False) self._file_type = "cif"
def get_pdb_info(cif_file, data_file, dres, matthews=True, chains=True): """Get info from PDB of mmCIF file""" # Get rid of ligands and water so Phenix won't error. np = 0 na = 0 nmol = 1 sc = 0.55 nchains = 0 res1 = 0.0 d = {} l = [] # Read in the file cif_file = convert_unicode(cif_file) if cif_file[-3:].lower() == 'cif': root = iotbx_mmcif.cif_input(file_name=cif_file).construct_hierarchy() else: root = iotbx_pdb.input(cif_file).construct_hierarchy() # Go through the chains for chain in root.models()[0].chains(): # Number of protein residues np1 = 0 # Number of nucleic acid residues na1 = 0 # Sometimes Hetatoms are AA with same segid. if l.count(chain.id) == 0: l.append(chain.id) repeat = False nchains += 1 else: repeat = True # Count the number of AA and NA in pdb file. for rg in chain.residue_groups(): if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_amino_acid: np1 += 1 if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_rna_dna: na1 += 1 # Not sure if I get duplicates? if rg.atoms()[0].parent().resname in \ iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna: na1 += 1 # Limit to 10 chains?!? if nchains < 10: # Do not split up PDB if run from cell analysis if chains and not repeat: # Save info for each chain. if np1 or na1: # Write new pdb files for each chain. temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain) # Long was of making sure that user does not have directory named '.pdb' or # '.cif' #n = os.path.join(os.path.dirname(cif_file), "%s_%s.pdb" % \ n = os.path.join(os.path.dirname(cif_file), "%s_%s.cif" % \ (os.path.basename(cif_file)[:os.path.basename(cif_file).find('.')], \ chain.id)) #temp.write_pdb_file(file_name=n) temp.write_mmcif_file(file_name=n) d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110} if matthews: # Run Matthews Calc. on chain #phaser_return = run_phaser_module((np1, na1, dres, n, data_file)) #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, mmcif=n, dres=dres, np=np1, na=na1) d[chain.id].update({'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)}) else: #res1 = run_phaser_module(n) phaser_return = run_phaser_module(data_file=data_file, ellg=True, mmcif=n) d[chain.id].update({'res': phaser_return.get("target_resolution", res1)}) """ d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ # Add up residue count np += np1 na += na1 d['all'] = {'file': cif_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110} # Run on entire PDB if matthews: #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, mmcif=cif_file, dres=dres, np=np, na=na) d['all'].update({'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)}) else: #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, mmcif=cif_file) d['all'].update({'res': phaser_return.get("target_resolution", res1)}) """ d['all'] = {'file': cif_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ return d
def get_pdb_info(struct_file, data_file, dres, matthews=True, chains=True): """Get info from PDB or mmCIF file""" # Get rid of ligands and water so Phenix won't error. np = 0 na = 0 nmol = 1 sc = 0.55 nchains = 0 res1 = 0.0 d = {} l = [] # Read in the file struct_file = convert_unicode(struct_file) if struct_file[-3:].lower() == 'cif': root = iotbx_mmcif.cif_input( file_name=struct_file).construct_hierarchy() else: root = iotbx_pdb.input(struct_file).construct_hierarchy() # Go through the chains for chain in root.models()[0].chains(): # Number of protein residues np1 = 0 # Number of nucleic acid residues na1 = 0 # Sometimes Hetatoms are AA with same segid. if l.count(chain.id) == 0: l.append(chain.id) repeat = False nchains += 1 else: repeat = True # Count the number of AA and NA in pdb file. for rg in chain.residue_groups(): if rg.atoms()[0].parent( ).resname in iotbx_pdb.common_residue_names_amino_acid: np1 += 1 if rg.atoms()[0].parent( ).resname in iotbx_pdb.common_residue_names_rna_dna: na1 += 1 # Not sure if I get duplicates? if rg.atoms()[0].parent().resname in \ iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna: na1 += 1 # Limit to 10 chains?!? if nchains < 10: # Do not split up PDB if run from cell analysis if chains and not repeat: # Save info for each chain. if np1 or na1: # Write new pdb files for each chain. temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain) # Long was of making sure that user does not have directory named '.pdb' or # '.cif' #n = os.path.join(os.path.dirname(struct_file), "%s_%s.pdb" % \ n = os.path.join(os.path.dirname(struct_file), "%s_%s.cif" % \ (os.path.basename(struct_file)[:os.path.basename(struct_file).find('.')], \ chain.id)) #temp.write_pdb_file(file_name=n) # Write chain as mmCIF file. temp.write_mmcif_file(file_name=n) d[chain.id] = { 'file': n, 'NRes': np1 + na1, 'MWna': na1 * 330, 'MWaa': np1 * 110, 'MW': na1 * 330 + np1 * 110 } if matthews: # Run Matthews Calc. on chain #phaser_return = run_phaser_module((np1, na1, dres, n, data_file)) #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, struct_file=n, dres=dres, np=np1, na=na1) d[chain.id].update({ 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1) }) else: #res1 = run_phaser_module(n) phaser_return = run_phaser_module(data_file=data_file, ellg=True, struct_file=n) d[chain.id].update({ 'res': phaser_return.get("target_resolution", res1) }) """ d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ # Add up residue count np += np1 na += na1 d['all'] = { 'file': struct_file, 'NRes': np + na, 'MWna': na * 330, 'MWaa': np * 110, 'MW': na * 330 + np * 110 } # Run on entire PDB if matthews: #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, struct_file=struct_file, dres=dres, np=np, na=na) d['all'].update({ 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1) }) else: #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file)) # phaser_return = run_phaser_module(data_file=data_file, # ellg=True, # struct_file=struct_file) phaser_return = run_phaser_module(data_file=data_file, ellg=True, struct_file=struct_file) d['all'].update({'res': phaser_return.get("target_resolution", res1)}) """ d['all'] = {'file': struct_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ return d