예제 #1
0
 def _try_as_cif(self):
     # XXX hack to avoid choking on CCP4 maps and images
     file_ext = os.path.splitext(self.file_name)[1]
     assert (not file_ext in [".ccp4", ".img", ".osc", ".mccd"])
     import iotbx.cif
     from iotbx.reflection_file_reader import any_reflection_file
     cif_file = any_reflection_file(str(self.file_name))
     if cif_file.file_type() is not None:
         self._file_object = cif_file
         self._file_type = "hkl"
     else:
         from iotbx.pdb.mmcif import cif_input
         from iotbx.pdb.hierarchy import input_hierarchy_pair
         try:
             cif_in = cif_input(file_name=self.file_name)
             self._file_object = input_hierarchy_pair(
                 cif_in, cif_in.hierarchy)
             self._file_type = "pdb"
         except Exception as e:
             if (str(e).startswith("Space group is incompatible")
                     or str(e).startswith("The space group")):
                 raise
             else:
                 self._file_object = iotbx.cif.reader(
                     file_path=self.file_name, strict=False)
                 self._file_type = "cif"
def old_run(args, out=None):
    import iotbx.phil
    if (out is None):
        out = sys.stdout
    cmdline = iotbx.phil.process_command_line_with_files(
        args=args,
        master_phil=master_phil,
        pdb_file_def="input.pdb_file",
        seq_file_def="input.seq_file")
    params = cmdline.work.extract()
    cmdline.work.show()
    if params.output.cif_file is None:
        params.output.cif_file = os.path.splitext(
            params.input.pdb_file)[0] + ".deposit.cif"
    model_vs_sequence.validate_params(params)
    pdb_input = mmcif.cif_input(file_name=params.input.pdb_file)
    pdb_hierarchy = pdb_input.construct_hierarchy()
    cif_model = pdb_input.cif_model
    cif_block = pdb_input.cif_block
    seq_in = any_file(params.input.seq_file, force_type="seq")
    seq_in.check_file_type("seq")
    sequences = seq_in.file_object

    cif_block = pdb_hierarchy.as_cif_block_with_sequence(
        sequences,
        crystal_symmetry=pdb_input.crystal_symmetry(),
        alignment_params=params)
    block_name = cif_model.keys()[0]

    def float_or_none(string):
        try:
            return float(string)
        except TypeError:
            return None

    d_min_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_high'))
    d_max_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_low'))
    # XXX maybe the values from the CIF (i.e. those actually used in the refinement)
    # should override the input params?
    if params.high_resolution is not None:
        params.high_resolution = d_min_from_cif
    if params.low_resolution is not None:
        params.low_resolution = d_max_from_cif

    if params.input.unmerged_data is not None:
        from mmtbx.command_line import cc_star
        result = cc_star.run(params=params.input, out=out)
        cif_block.update(result.as_cif_block())

    cif_model[block_name].update(cif_block)
    cif_model[block_name].sort(key=category_sort_function)
    print >> out, "Writing updated CIF file:"
    print >> out, "  " + params.output.cif_file
    with open(params.output.cif_file, "wb") as f:
        print >> f, cif_model
    return
def run(args, out=None):
  import iotbx.phil
  if (out is None) :
    out = sys.stdout
  cmdline = iotbx.phil.process_command_line_with_files(
    args=args,
    master_phil=master_phil,
    pdb_file_def="input.pdb_file",
    seq_file_def="input.seq_file"
  )
  params = cmdline.work.extract()
  cmdline.work.show()
  if params.output.cif_file is None:
    params.output.cif_file = os.path.splitext(params.input.pdb_file)[0] + ".deposit.cif"
  model_vs_sequence.validate_params(params)
  pdb_input = mmcif.cif_input(file_name=params.input.pdb_file)
  pdb_hierarchy = pdb_input.construct_hierarchy()
  cif_model = pdb_input.cif_model
  cif_block = pdb_input.cif_block
  seq_in = any_file(params.input.seq_file, force_type="seq")
  seq_in.check_file_type("seq")
  sequences = seq_in.file_object

  cif_block = pdb_hierarchy.as_cif_block_with_sequence(
    sequences, crystal_symmetry=pdb_input.crystal_symmetry(),
    alignment_params=params)
  block_name = cif_model.keys()[0]

  def float_or_none(string):
    try: return float(string)
    except TypeError: return None

  d_min_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_high'))
  d_max_from_cif = float_or_none(cif_block.get('_refine.ls_d_res_low'))
  # XXX maybe the values from the CIF (i.e. those actually used in the refinement)
  # should override the input params?
  if params.high_resolution is not None:
    params.high_resolution = d_min_from_cif
  if params.low_resolution is not None:
    params.low_resolution = d_max_from_cif

  if params.input.unmerged_data is not None:
    from mmtbx.command_line import cc_star
    result = cc_star.run(params=params.input, out=out)
    cif_block.update(result.as_cif_block())

  cif_model[block_name].update(cif_block)
  cif_model[block_name].sort(key=category_sort_function)
  print >> out, "Writing updated CIF file:"
  print >> out, "  " + params.output.cif_file
  with open(params.output.cif_file, "wb") as f:
    print >> f, cif_model
  return
예제 #4
0
def exercise_extract_header_misc():
    cif_file = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/pdb/3orl.cif", test=os.path.isfile)
    if (cif_file is None):
        return
    cif_in = mmcif.cif_input(file_name=cif_file)
    assert (cif_in.file_type() == "mmcif")
    wavelength = cif_in.extract_wavelength()
    assert (approx_equal(wavelength, 1.8927))
    exptl_method = cif_in.get_experiment_type()
    assert (exptl_method == 'X-RAY DIFFRACTION')
    r_rfree_sigma = cif_in.get_r_rfree_sigma(cif_file)
    r_rfree_sigma.show()
예제 #5
0
def exercise_extract_header_misc () :
  cif_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/3orl.cif",
    test=os.path.isfile)
  if (cif_file is None) :
    return
  cif_in = mmcif.cif_input(file_name=cif_file)
  assert (cif_in.file_type() == "mmcif")
  wavelength = cif_in.extract_wavelength()
  assert (approx_equal(wavelength, 1.8927))
  exptl_method = cif_in.get_experiment_type()
  assert (exptl_method == 'X-RAY DIFFRACTION')
  r_rfree_sigma = cif_in.get_r_rfree_sigma(cif_file)
  r_rfree_sigma.show()
예제 #6
0
 def _try_as_cif (self) :
   # XXX hack to avoid choking on CCP4 maps and images
   file_ext = os.path.splitext(self.file_name)[1]
   assert (not file_ext in [".ccp4", ".img", ".osc", ".mccd"])
   import iotbx.cif
   from iotbx.reflection_file_reader import any_reflection_file
   cif_file = any_reflection_file(str(self.file_name))
   if cif_file.file_type() is not None:
     self._file_object = cif_file
     self._file_type = "hkl"
   else:
     from iotbx.pdb.mmcif import cif_input
     from iotbx.pdb.hierarchy import input_hierarchy_pair
     try:
       cif_in = cif_input(file_name=self.file_name)
       self._file_object = input_hierarchy_pair(cif_in, cif_in.hierarchy)
       self._file_type = "pdb"
     except Exception, e:
       self._file_object = iotbx.cif.reader(file_path=self.file_name,
         strict=False)
       self._file_type = "cif"
예제 #7
0
파일: rapd_cctbx.py 프로젝트: RAPD/RAPD
def get_pdb_info(cif_file, data_file, dres, matthews=True, chains=True):
    """Get info from PDB of mmCIF file"""

    # Get rid of ligands and water so Phenix won't error.
    np = 0
    na = 0
    nmol = 1
    sc = 0.55
    nchains = 0
    res1 = 0.0
    d = {}
    l = []

    # Read in the file
    cif_file = convert_unicode(cif_file)
    if cif_file[-3:].lower() == 'cif':
        root = iotbx_mmcif.cif_input(file_name=cif_file).construct_hierarchy()
    else:
        root = iotbx_pdb.input(cif_file).construct_hierarchy()

    # Go through the chains
    for chain in root.models()[0].chains():
        # Number of protein residues
        np1 = 0
        # Number of nucleic acid residues
        na1 = 0

        # Sometimes Hetatoms are AA with same segid.
        if l.count(chain.id) == 0:
            l.append(chain.id)
            repeat = False
            nchains += 1
        else:
            repeat = True

        # Count the number of AA and NA in pdb file.
        for rg in chain.residue_groups():
            if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_amino_acid:
                np1 += 1
            if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_rna_dna:
                na1 += 1
            # Not sure if I get duplicates?
            if rg.atoms()[0].parent().resname in \
               iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna:
                na1 += 1
        # Limit to 10 chains?!?
        if nchains < 10:
            # Do not split up PDB if run from cell analysis
            if chains and not repeat:

                # Save info for each chain.
                if np1 or na1:

                    # Write new pdb files for each chain.
                    temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain)

                    # Long was of making sure that user does not have directory named '.pdb' or
                    # '.cif'
                    #n = os.path.join(os.path.dirname(cif_file), "%s_%s.pdb" % \
                    n = os.path.join(os.path.dirname(cif_file), "%s_%s.cif" % \
                        (os.path.basename(cif_file)[:os.path.basename(cif_file).find('.')], \
                        chain.id))
                    #temp.write_pdb_file(file_name=n)
                    temp.write_mmcif_file(file_name=n)
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110}
                    if matthews:
                        # Run Matthews Calc. on chain
                        #phaser_return = run_phaser_module((np1, na1, dres, n, data_file))
                        #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n))
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          cca=True,
                                                          mmcif=n,
                                                          dres=dres,
                                                          np=np1,
                                                          na=na1)
                        d[chain.id].update({'NMol': phaser_return.get("z", nmol),
                                            'SC': phaser_return.get("solvent_content", sc),
                                            'res': phaser_return.get("target_resolution", res1)})
                    else:
                        #res1 = run_phaser_module(n)
                        phaser_return = run_phaser_module(data_file=data_file,
                                                           ellg=True, 
                                                           mmcif=n)
                        d[chain.id].update({'res': phaser_return.get("target_resolution", res1)})
                    """
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110,
                                   'NMol': phaser_return.get("z", nmol),
                                   'SC': phaser_return.get("solvent_content", sc),
                                   'res': phaser_return.get("target_resolution", res1)}
                    """
        # Add up residue count
        np += np1
        na += na1

    d['all'] = {'file': cif_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110}
    # Run on entire PDB
    if matthews:
        #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          cca=True,
                                          mmcif=cif_file,
                                          dres=dres,
                                          np=np,
                                          na=na)
        d['all'].update({'NMol': phaser_return.get("z", nmol),
                         'SC': phaser_return.get("solvent_content", sc),
                         'res': phaser_return.get("target_resolution", res1)})
    else:
        #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                           ellg=True, 
                                           mmcif=cif_file)
        d['all'].update({'res': phaser_return.get("target_resolution", res1)})
    """
    d['all'] = {'file': cif_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110,
                'NMol': phaser_return.get("z", nmol),
                'SC': phaser_return.get("solvent_content", sc),
                'res': phaser_return.get("target_resolution", res1)}
    """
    return d
예제 #8
0
def get_pdb_info(struct_file, data_file, dres, matthews=True, chains=True):
    """Get info from PDB or mmCIF file"""

    # Get rid of ligands and water so Phenix won't error.
    np = 0
    na = 0
    nmol = 1
    sc = 0.55
    nchains = 0
    res1 = 0.0
    d = {}
    l = []

    # Read in the file
    struct_file = convert_unicode(struct_file)
    if struct_file[-3:].lower() == 'cif':
        root = iotbx_mmcif.cif_input(
            file_name=struct_file).construct_hierarchy()
    else:
        root = iotbx_pdb.input(struct_file).construct_hierarchy()

    # Go through the chains
    for chain in root.models()[0].chains():
        # Number of protein residues
        np1 = 0
        # Number of nucleic acid residues
        na1 = 0

        # Sometimes Hetatoms are AA with same segid.
        if l.count(chain.id) == 0:
            l.append(chain.id)
            repeat = False
            nchains += 1
        else:
            repeat = True

        # Count the number of AA and NA in pdb file.
        for rg in chain.residue_groups():
            if rg.atoms()[0].parent(
            ).resname in iotbx_pdb.common_residue_names_amino_acid:
                np1 += 1
            if rg.atoms()[0].parent(
            ).resname in iotbx_pdb.common_residue_names_rna_dna:
                na1 += 1
            # Not sure if I get duplicates?
            if rg.atoms()[0].parent().resname in \
               iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna:
                na1 += 1
        # Limit to 10 chains?!?
        if nchains < 10:
            # Do not split up PDB if run from cell analysis
            if chains and not repeat:

                # Save info for each chain.
                if np1 or na1:

                    # Write new pdb files for each chain.
                    temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain)

                    # Long was of making sure that user does not have directory named '.pdb' or
                    # '.cif'
                    #n = os.path.join(os.path.dirname(struct_file), "%s_%s.pdb" % \
                    n = os.path.join(os.path.dirname(struct_file), "%s_%s.cif" % \
                        (os.path.basename(struct_file)[:os.path.basename(struct_file).find('.')], \
                        chain.id))
                    #temp.write_pdb_file(file_name=n)
                    # Write chain as mmCIF file.
                    temp.write_mmcif_file(file_name=n)

                    d[chain.id] = {
                        'file': n,
                        'NRes': np1 + na1,
                        'MWna': na1 * 330,
                        'MWaa': np1 * 110,
                        'MW': na1 * 330 + np1 * 110
                    }
                    if matthews:
                        # Run Matthews Calc. on chain
                        #phaser_return = run_phaser_module((np1, na1, dres, n, data_file))
                        #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n))
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          cca=True,
                                                          struct_file=n,
                                                          dres=dres,
                                                          np=np1,
                                                          na=na1)
                        d[chain.id].update({
                            'NMol':
                            phaser_return.get("z", nmol),
                            'SC':
                            phaser_return.get("solvent_content", sc),
                            'res':
                            phaser_return.get("target_resolution", res1)
                        })
                    else:
                        #res1 = run_phaser_module(n)
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          struct_file=n)
                        d[chain.id].update({
                            'res':
                            phaser_return.get("target_resolution", res1)
                        })
                    """
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110,
                                   'NMol': phaser_return.get("z", nmol),
                                   'SC': phaser_return.get("solvent_content", sc),
                                   'res': phaser_return.get("target_resolution", res1)}
                    """
        # Add up residue count
        np += np1
        na += na1

    d['all'] = {
        'file': struct_file,
        'NRes': np + na,
        'MWna': na * 330,
        'MWaa': np * 110,
        'MW': na * 330 + np * 110
    }
    # Run on entire PDB
    if matthews:
        #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          cca=True,
                                          struct_file=struct_file,
                                          dres=dres,
                                          np=np,
                                          na=na)
        d['all'].update({
            'NMol': phaser_return.get("z", nmol),
            'SC': phaser_return.get("solvent_content", sc),
            'res': phaser_return.get("target_resolution", res1)
        })
    else:
        #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file))
        # phaser_return = run_phaser_module(data_file=data_file,
        #                                   ellg=True,
        #                                   struct_file=struct_file)
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          struct_file=struct_file)
        d['all'].update({'res': phaser_return.get("target_resolution", res1)})
    """
    d['all'] = {'file': struct_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110,
                'NMol': phaser_return.get("z", nmol),
                'SC': phaser_return.get("solvent_content", sc),
                'res': phaser_return.get("target_resolution", res1)}
    """
    return d