Beispiel #1
0
 def __init__(self, cell, symmetry, sg_number, dmin, dmax, columns,
              filename):
     assert isinstance(columns[0], tuple)
     Cell.__init__(self, cell, symmetry)
     self.sg_number = sg_number
     self.dmin = dmin
     self.dmax = dmax
     assert dmin >= dmax  # yes, min > max here
     self.columns = OrderedDict(columns)
     self.filename = filename
Beispiel #2
0
 def __init__(self, cell, symmetry, sg_number, dmin, dmax, columns,
              filename):
     assert isinstance(columns[0], tuple)
     Cell.__init__(self, cell, symmetry)
     self.sg_number = sg_number
     self.dmin = dmin
     self.dmax = dmax
     assert dmin >= dmax # yes, min > max here
     self.columns = OrderedDict(columns)
     self.filename = filename
Beispiel #3
0
 def __init__(self, cryst1_line):
     assert cryst1_line.startswith("CRYST1")
     a = float(cryst1_line[6:15])
     b = float(cryst1_line[15:24])
     c = float(cryst1_line[24:33])
     alpha = float(cryst1_line[33:40])
     beta = float(cryst1_line[40:47])
     gamma = float(cryst1_line[47:54])
     symmetry = cryst1_line[55:66].strip()
     Cell.__init__(self, (a, b, c, alpha, beta, gamma), symmetry)
     self.has_hetatm_x = None
Beispiel #4
0
 def __init__(self, cryst1_line):
     assert cryst1_line.startswith('CRYST1')
     a = float(cryst1_line[6:15])
     b = float(cryst1_line[15:24])
     c = float(cryst1_line[24:33])
     alpha = float(cryst1_line[33:40])
     beta = float(cryst1_line[40:47])
     gamma = float(cryst1_line[47:54])
     symmetry = cryst1_line[55:66].strip()
     Cell.__init__(self, (a, b, c, alpha, beta, gamma), symmetry)
     self.has_hetatm_x = None
Beispiel #5
0
class TestCell(unittest.TestCase):
    def setUp(self):
        self.cell = Cell((22.84, 32.84, 42.84, 80.84, 90.84, 100.84), None)
    def test_orth(self):
        orth = self.cell.get_orth_matrix()
        expected = np.array([[22.84, -6.17612, -0.628045],
                             [0,     32.254,    6.82343],
                             [0,      0,       42.2884]])
        assert_allclose(to_np(orth), expected, rtol=1e-6)

    def test_frac(self):
        frac = self.cell.get_frac_matrix()
        orth = self.cell.get_orth_matrix()
        assert_allclose(to_np(frac.dot(orth)), np.identity(3), atol=1e-12)
Beispiel #6
0
def dimple(wf, opt):
    comment('     ### Dimple v%s. Problems and suggestions:'
            ' ccp4.github.io/dimple ###' % __version__)
    mtz_meta = wf.read_mtz_metadata(opt.mtz)
    _comment_summary_line('MTZ (%.1fA)' % mtz_meta.dmax, mtz_meta)
    if opt.dls_naming:
        opt.pdbs = dls_name_filter(opt.pdbs)
    opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir)
    if not opt.pdbs:
        comment('\nNo non-empty pdb files given. Nothing to do.')
        return
    for p in opt.pdbs:
        wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1))
    if len(opt.pdbs) > 1:
        comment('\nPDBs in order of similarity (using the first one):')
        opt.pdbs.sort(
            key=lambda x: calculate_difference(wf.file_info[x], mtz_meta))
    utils.log_value('data_file', opt.mtz)
    utils.log_value('pdb_files', opt.pdbs)
    for p in opt.pdbs:
        _comment_summary_line(os.path.basename(p), wf.file_info[p])
    ini_pdb = 'ini.pdb'
    wf.copy_uncompressed(opt.pdbs[0], ini_pdb)
    pdb_meta = wf.file_info[opt.pdbs[0]]
    if pdb_meta is None:
        put_error('PDB file missing CRYST1 record, starting from MR')
    if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta):
        if not opt.no_hetatm:
            comment('\nHETATM marked as element X would choke many programs.')
        rb_xyzin = 'prepared.pdb'
        wf.temporary_files.add(rb_xyzin)
        n_het = wf.remove_hetatm(xyzin=ini_pdb,
                                 xyzout=rb_xyzin,
                                 remove_all=opt.no_hetatm)
        comment('\nRemoved %d HETATM atoms' % n_het)
    else:
        rb_xyzin = ini_pdb
    # run rwcontents even without CRYST1 - it will show mol. weight only
    wf.rwcontents(xyzin=rb_xyzin).run()
    rw_data = wf.jobs[-1].data
    if pdb_meta is None:
        pass  # we already had a warning message
    elif rw_data.get('solvent_percent') is None:
        put_error('rwcontents could not interpret %s' % rb_xyzin)
    elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT:
        comment('\nHmm... %.1f%% of solvent or incomplete model' %
                rw_data['solvent_percent'])
        if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10:
            comment('\ndebug: problem when calculating volume?')

    ####### pointless - reindexing #######
    if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and (
            0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4):
        reindexed_mtz = 'pointless.mtz'
        wf.temporary_files.add(reindexed_mtz)
        wf.pointless(hklin=opt.mtz,
                     xyzin=rb_xyzin,
                     hklout=reindexed_mtz,
                     keys='TOLERANCE 5').run(may_fail=True)
        alt_reindex = wf.jobs[-1].data.get('alt_reindex')
        if wf.jobs[-1].exit_status == 0 and alt_reindex:
            for ar in alt_reindex:
                comment('\n    %-10s CC: %-8.3f cell diff: %.1fA' %
                        (ar['op'], ar['cc'], ar['cell_deviat']))
        else:
            # until recently (2015) pointless didn't print CC for non-ambiguous
            # spacegroups (e.g. C2), but now it always prints
            comment('\n    no good indexing')
            reindexed_mtz = opt.mtz
    else:
        reindexed_mtz = opt.mtz
    reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz)
    if reindexed_mtz_meta.symmetry != mtz_meta.symmetry:
        _comment_summary_line('reindexed MTZ', reindexed_mtz_meta)

    ####### (c)truncate - calculate amplitudes if needed #######
    if not opt.fcolumn:
        opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP'
    elif opt.icolumn or opt.ItoF_prog:
        put_error('Ignoring options --fcolumn/--sigfcolumn')
    opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn)
    if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns
            or opt.sigfcolumn not in mtz_meta.columns):
        f_mtz = 'amplit.mtz'
        wf.temporary_files.add(f_mtz)
        i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt)
        if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None
                                            and opt.slow):
            colano = None
            if opt.anode and all(
                    col in mtz_meta.columns
                    for col in ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']):
                colano = '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]'
            wf.ctruncate(hklin=reindexed_mtz,
                         hklout=f_mtz,
                         colin='/*/*/[%s,%s]' % i_sigi_cols,
                         colano=colano).run()
        else:
            wf.truncate(hklin=reindexed_mtz,
                        hklout=f_mtz,
                        labin='IMEAN=%s SIGIMEAN=%s' % i_sigi_cols,
                        labout='F=F SIGF=SIGF').run()
        opt.fcolumn = 'F'
        opt.sigfcolumn = 'SIGF'
    else:
        f_mtz = reindexed_mtz

    ####### rigid body - check if model is good for refinement? #######
    refmac_labin_nofree = 'FP=%s SIGFP=%s' % (opt.fcolumn, opt.sigfcolumn)
    refmac_xyzin = None
    cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta)
    if pdb_meta is None:
        pass  # the error message was already printed
    elif opt.mr_when_r <= 0:
        comment('\nMR requested unconditionally.')
    elif cell_diff > 0.1 and opt.mr_when_r < 1:
        comment('\nDifferent unit cells.')
    elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry:
        comment('\nDifferent space groups.')
    else:
        comment('\nRigid-body refinement with resolution 3.5 A, %d cycles.' %
                opt.rigid_cycles)
        if 'aa_count' in rw_data and 'water_count' in rw_data:
            if rw_data['aa_count'] != 0:
                comment(' %.1f waters/aa.' %
                        (rw_data['water_count'] / rw_data['aa_count']))
            else:
                comment(' %d/0 waters/aa.' % rw_data['water_count'])
        wf.temporary_files |= {'refmacRB.pdb', 'refmacRB.mtz'}
        # it may fail because of "Disagreement between mtz and pdb"
        wf.refmac5(hklin=f_mtz,
                   xyzin=rb_xyzin,
                   hklout='refmacRB.mtz',
                   xyzout='refmacRB.pdb',
                   labin=refmac_labin_nofree,
                   libin=None,
                   keys="""refinement type rigidbody resolution 15 3.5
                           rigidbody ncycle %d""" %
                   opt.rigid_cycles).run(may_fail=True)
        # if the error is caused by mtz/pdb disagreement, continue with MR
        if wf.jobs[-1].exit_status != 0:
            comment('\nTry MR.')
        elif not wf.jobs[-1].data.get('overall_r'):
            comment('\nWARNING: unknown R factor, something went wrong.\n')
            refmac_xyzin = 'refmacRB.pdb'
        elif wf.jobs[-1].data['overall_r'] > opt.mr_when_r:
            comment('\nRun MR for R > %g.' % opt.mr_when_r)
        else:
            comment('\nNo MR for R < %g.' % opt.mr_when_r)
            refmac_xyzin = 'refmacRB.pdb'

    ####### phaser/molrep - molecular replacement #######
    if refmac_xyzin is None:
        vol_ratio = None
        if pdb_meta:
            # num_mol accounts for strict NCS (MTRIX without iGiven)
            vol_ratio = (mtz_meta.asu_volume() /
                         pdb_meta.asu_volume(rw_data['num_mol']))
            comment(' Volume of asu: %.1f%% of model asu.' % (100 * vol_ratio))
        if opt.mr_when_r >= 1:
            comment('\nWould try MR, but it is disabled.')
            return
        if opt.mr_num:
            mr_num = opt.mr_num
        else:
            mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio)
        mw = rw_data.get('weight')
        if isinstance(mr_num, float):
            wf.ensembler(pdbin=rb_xyzin, root='ens').run()
            n_models = len(wf.jobs[-1].data['models'])
            mw = None
            rb_xyzin = 'ens_merged.pdb'
            mr_num = max(int(round(mr_num * n_models)), 1)
        # phaser is used by default if number of searched molecules is known
        if opt.mr_prog == 'molrep':
            wf.temporary_files |= {
                'molrep.pdb', 'molrep_dimer.pdb', 'molrep.crd'
            }
            wf.molrep(f=f_mtz, m=rb_xyzin).run()
            refmac_xyzin = 'molrep.pdb'
        else:
            wf.temporary_files |= {'phaser.1.pdb', 'phaser.1.mtz'}
            wf.phaser_auto(hklin=f_mtz,
                           labin='F=%s SIGF=%s' %
                           (opt.fcolumn, opt.sigfcolumn),
                           model=dict(pdb=rb_xyzin,
                                      identity=100,
                                      num=mr_num,
                                      mw=mw),
                           sg_alt='ALL',
                           opt=opt,
                           root='phaser').run(may_fail=True)
            if not _after_phaser_comments(wf.jobs[-1],
                                          sg_in=reindexed_mtz_meta.symmetry):
                raise RuntimeError('No phaser solution.')
            refmac_xyzin = 'phaser.1.pdb'
            f_mtz = 'phaser.1.mtz'

    if False:
        wf.findwaters(pdbin=refmac_xyzin,
                      hklin=f_mtz,
                      f='FC',
                      phi='PHIC',
                      pdbout='prepared_wat.pdb',
                      sigma=2)
        refmac_xyzin = 'prepared_wat.pdb'

    ####### adding free-R flags #######
    f_mtz_meta = wf.read_mtz_metadata(f_mtz)
    cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps)
    if opt.free_r_flags:
        free_mtz = opt.free_r_flags
        free_col = check_freerflags_column(wf.path(free_mtz),
                                           expected_symmetry=pdb_meta,
                                           column=opt.freecolumn)
        comment('\nFree-R flags from the %s file, column %s.' %
                (('reference' if free_mtz != opt.mtz else 'input'), free_col))
    else:
        free_col = DEFAULT_FREE_COLS[0]
        if free_col in f_mtz_meta.columns:
            comment('\nReplace free-R flags')
        else:
            comment('\nGenerate free-R flags')
        free_mtz = 'free.mtz'
        wf.temporary_files |= {'unique.mtz', free_mtz}
        if opt.seed_freerflag or cell_diff > 1e3:  # i.e. different SG
            wf.unique(hklout='unique.mtz', ref=f_mtz_meta,
                      resolution=cad_reso).run()
        else:
            comment(' (repeatably)')
            # Here we'd like to have always the same set of free-r flags
            # for given PDB file. That's why we don't use information
            # from the data file (mtz).
            wf.unique(hklout='unique.mtz', ref=pdb_meta, resolution=1.0).run()
        # CCP4 freerflag uses always the same pseudo-random sequence by default
        wf.freerflag(hklin='unique.mtz',
                     hklout=free_mtz,
                     keys=('SEED' if opt.seed_freerflag else '')).run()

    if free_mtz == opt.mtz and opt.reso is None:
        prepared_mtz = f_mtz
    else:
        prepared_mtz = 'prepared.mtz'
        wf.temporary_files.add(prepared_mtz)
        wf.cad(
            data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]),
                     (free_mtz, [free_col])],
            hklout=prepared_mtz,
            keys=[
                'sysab_keep',  # does it matter?
                'reso overall 1000.0 %g' % cad_reso
            ]).run()
    freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col)
    if freerflag_missing:
        wf.freerflag(hklin=prepared_mtz,
                     hklout='prepared2.mtz',
                     keys='COMPLETE FREE=' + free_col,
                     parser=' (again, for %d refl. more)' %
                     freerflag_missing).run()
        prepared_mtz = 'prepared2.mtz'
        wf.temporary_files.add(prepared_mtz)

    ####### refinement #######
    if opt.weight:
        refmac_weight = 'matrix %f' % opt.weight
    else:
        refmac_weight = 'auto'
    restr_ref_keys = """\
     make newligand continue
     refinement type restrained
     weight %s
     """ % refmac_weight
    if opt.freecolumn_val:
        restr_ref_keys += 'free %s\n' % opt.freecolumn_val
    refmac_labin = '%s FREE=%s' % (refmac_labin_nofree, free_col)
    comment('\nRestrained refinement, %d+%d cycles.' %
            (opt.jelly, opt.restr_cycles))
    if opt.jelly:
        wf.temporary_files |= {'jelly.pdb', 'jelly.mtz'}
        wf.refmac5(hklin=prepared_mtz,
                   xyzin=refmac_xyzin,
                   hklout='jelly.mtz',
                   xyzout='jelly.pdb',
                   labin=refmac_labin,
                   libin=opt.libin,
                   keys=restr_ref_keys + 'ridge distance sigma 0.01\n'
                   'make hydrogen no\n'
                   'ncycle %d' % opt.jelly + opt.extra_ref_keys).run()
        comment(_refmac_rms_line(wf.jobs[-1].data))
        refmac_xyzin = 'jelly.pdb'
    restr_job = wf.refmac5(
        hklin=prepared_mtz,
        xyzin=refmac_xyzin,
        hklout=opt.hklout,
        xyzout=opt.xyzout,
        labin=refmac_labin,
        libin=opt.libin,
        keys=(restr_ref_keys + 'ncycle %d' % opt.restr_cycles +
              opt.extra_ref_keys)).run()
    comment(_refmac_rms_line(restr_job.data))
    # if that run is repeated with --from-step it's useful to compare Rfree
    if wf.from_job > 0 and wf.from_job <= len(wf.jobs):  # from_job is 1-based
        prev = [j for j in wf.repl_jobs if j.name == restr_job.name]
        if prev and prev[0].data and 'free_r' in prev[0].data:
            comment('\nPreviously:  R/Rfree %.4f/%.4f  Rfree change: %+.4f' %
                    (prev[0].data['overall_r'], prev[0].data['free_r'],
                     restr_job.data['free_r'] - prev[0].data['free_r']))

    ####### check blobs #######
    if opt.blob_search:
        if restr_job.data['free_r'] <= BAD_FINAL_RFREE:
            fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run()
            coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data)
            if coot_script:
                comment('\nTo see it in Coot run %s' % coot_script)
        else:
            comment('\nNo blob search for Rfree > %g.' % BAD_FINAL_RFREE)
            _generate_scripts_and_pictures(wf, opt, None)

    if opt.anode:
        # check if mtz contains I+/- and SIGI+/-
        column_types = list(reindexed_mtz_meta.columns.values())
        if column_types.count('K') != 2 and column_types.count('M') != 2:
            comment('\nColumns I+/- and SIG+/- not found. Skipping AnoDe.')
            return
        anode_name = 'anode'
        # convert to sca for input to shelxc
        scaout = anode_name + '.sca'
        wf.mtz2sca(prepared_mtz, scaout).run()

        wf.shelxc(scaout, reindexed_mtz_meta.cell,
                  reindexed_mtz_meta.symmetry).run()

        wf.copy_uncompressed(opt.xyzout, anode_name + '.pdb')
        anode_job = wf.anode(anode_name).run()
        wf.temporary_files |= {
            scaout, anode_name + '.pdb', anode_name + '.hkl',
            anode_name + '.pha', anode_name + '_sad.cif',
            anode_name + '_fa.hkl'
        }
        cell = Cell(reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry)
        # need orthogonal not fractional coordinates to generate coot script
        anode_job.data['blobs'] = cell.orthogonalize(anode_job.data['xyz'])
        comment(_anode_anom_peak_lines(anode_job.data))
        coot_script = _generate_scripts_and_pictures(wf,
                                                     opt,
                                                     anode_job.data,
                                                     pha=anode_name + '.pha')
Beispiel #7
0
 def __init__(self, entry):
     Cell.__init__(self, entry[2:8], symmetry=entry[1])
     self.pdb_id = entry[0]
     self.uniprot_src = entry[8]  # we may have a homolog of uniprot entry
Beispiel #8
0
 def __init__(self, entry):
     Cell.__init__(self, entry[2:8], symmetry=entry[1])
     self.pdb_id = entry[0]
     self.uniprot_src = entry[8]  # we may have a homolog of uniprot entry
Beispiel #9
0
 def setUp(self):
     self.cell = Cell((22.84, 32.84, 42.84, 80.84, 90.84, 100.84), None)