Beispiel #1
0
def run_shelxe_local(_settings):
    '''Run shelxe locally with settings given in dictionary, containing:

    nsite - number of sites
    solv - solvent fraction
    resol - high resolution limit
    hand - original or inverted
    wd - working directory'''

    if not 'resol' in _settings:
        _settings['resol'] = 0.0

    nsite = _settings['nsite']
    solv = _settings['solv']
    hand = _settings['hand']
    resol = _settings['resol']
    nrefl = _settings['nrefl']
    wd = _settings['wd']

    if hand == 'original':
        job_output = run_job('shelxe', ['sad', 'sad_fa', '-h%d' % nsite, '-l%d' % nrefl,
                                        '-s%f' % solv, '-d%f' % resol, '-m20'], [], wd)
    else:
        job_output = run_job('shelxe', ['sad', 'sad_fa', '-h%d' % nsite, '-l%d' % nrefl,
                                        '-s%f' % solv, '-d%f' % resol, '-m20', '-i'], [], wd)

    return
Beispiel #2
0
def run_shelxd_local(_settings):
    '''Run shelxd locally settings given in dictionary, containing:

    nrefl = 1 + floor(nref / 100000) - space to allocate
    ncpu - number of cpus to use
    wd - working directory'''

    nrefl = _settings['nrefl']
    ncpu = _settings['ncpu']
    wd = _settings['wd']

    job_output = run_job(
        'shelxd', ['-L%d' % nrefl, 'sad_fa', '-t%d' % ncpu], [], wd)

    open(os.path.join(wd, 'shelxd.log'), 'w').write(''.join(job_output))

    return
Beispiel #3
0
def ctruncate_anomalous_signal(hklin):
    '''

    Estimated limits of anomalous signal
      Wang limit (deltaI/I) > 0.6% : 1.3 A
      anomalous limit (deltaI/sig) > 1.3 : 2.1 A
      measurability limit (Nanon/Nov) > 5% : 1.8 A

Use

      measurability limit (Nanon/Nov) > 5% : 1.8 A

      + / - 0.2 A


from

ctruncate -mtzin ../AUTOMATIC_DEFAULT_scaled.mtz -mtzout truncated.mtz -colano '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]'

'''
    ctruncate_output = run_job(
        ' '.join([
            'ctruncate', '-mtzin', hklin, '-mtzout', 'truncated.mtz',
            '-colano', '"/*/*/[I(+),SIGI(+),I(-),SIGI(-)]"'
        ]), [], [])
    open('ctruncate.log', 'w').write(''.join(ctruncate_output))

    maximum_resolution = float('nan')
    rlimit = float('nan')

    for record in ctruncate_output:
        if 'Maximum resolution =' in record:
            maximum_resolution = float(record.split()[-2])
        if "measurability limit (Nanon/Nov)" in record:
            rlimit = float(record.split()[-2])
            break

    if not isinf(rlimit) and not isnan(rlimit) and \
        not isinf(maximum_resolution) and not isnan(maximum_resolution):
        return [max(maximum_resolution, rlimit - 0.2), rlimit, rlimit + 0.2]

    return None
Beispiel #4
0
def main():
    """
    main function - creates session and runs jobs
    """
    time_start = time.time()
    dx_session_obj = get_session.GetSession()
    dx_logging.logging_est(ARGUMENTS['--logdir'])
    config_file_path = ARGUMENTS['--config']
    single_thread = ARGUMENTS['--single_thread']
    engine = ARGUMENTS['--engine']
    try:
        dx_session_obj.get_config(config_file_path)
        # This is the function that will handle processing main_workflow for
        # all the servers.
        for each in run_job.run_job(main_workflow, dx_session_obj, engine,
                                    single_thread):
            # join them back together so that we wait for all threads to
            # complete
            each.join()
        elapsed_minutes = run_job.time_elapsed(time_start)
        dx_logging.print_info(f'script took {elapsed_minutes} minutes to '
                              f'get this far.')
    # Here we handle what we do when the unexpected happens
    except SystemExit as err:
        # This is what we use to handle our sys.exit(#)
        sys.exit(err)

    except dlpx_exceptions.DlpxException as err:
        # We use this exception handler when an error occurs in a function
        # call.
        dx_logging.print_exception(f'ERROR: Please check the ERROR message '
                                   f'below:\n {err.error}')
        sys.exit(2)

    except exceptions.HttpError as err:
        # We use this exception handler when our connection to Delphix fails
        dx_logging.print_exception(
            f'ERROR: Connection failed to the Delphix DDP. Please check '
            f'the ERROR message below:\n{err.status}')
        sys.exit(2)

    except KeyError as err:
        dx_logging.print_exception(f'ERROR: Key not found:\n{err}')
        sys.exit(2)

    except exceptions.JobError as err:
        # We use this exception handler when a job fails in Delphix so that we
        # have actionable data
        elapsed_minutes = run_job.time_elapsed(time_start)
        dx_logging.print_exception(
            f'A job failed in the Delphix Engine:\n{err.job}.'
            f'{basename(__file__)} took {elapsed_minutes} minutes to get '
            f'this far')
        sys.exit(3)

    except KeyboardInterrupt:
        # We use this exception handler to gracefully handle ctrl+c exits
        dx_logging.print_debug('You sent a CTRL+C to interrupt the process')
        elapsed_minutes = run_job.time_elapsed(time_start)
        dx_logging.print_info(f'{basename(__file__)} took {elapsed_minutes} '
                              f'minutes to get this far.')
Beispiel #5
0
    def phase(self):
        '''Perform the phasing following from the substructure determination,
        using the best solution found, using shelxe. This will be run for a
        range of sensible solvent fractions between 25% and 75% and for
        both hands of the substructure. N.B. for a chiral screw axis (e.g. P41)
        this will also invert the spacegroup (to e.g. P43) which needs to
        be remembered in transforming the output.'''

        t0 = time.time()

        cluster = self._cluster
        njobs = self._machines
        ncpu = self._cpu

        solvent_fractions = [0.25 + 0.05 * j for j in range(11)]
        timeout = 600 + self._ncycle

        jobs = [ ]

        for solvent_fraction in solvent_fractions:
            wd = os.path.join(self._wd, '%.2f' % solvent_fraction)
            if not os.path.exists(wd):
                os.makedirs(wd)
            shutil.copyfile(os.path.join(self._wd, 'sad.hkl'),
                            os.path.join(wd, 'sad.hkl'))
            for ending in 'lst', 'pdb', 'res', 'hkl':
                shutil.copyfile(os.path.join(self._wd, 'sad_fa.%s' % ending),
                                os.path.join(wd, 'sad_fa.%s' % ending))

            jobs.append({'nsite':self._best_nsite, 'solv':solvent_fraction,
                         'ncycle':self._ncycle,
                         'hand':'original', 'wd':wd})
            jobs.append({'nsite':self._best_nsite, 'solv':solvent_fraction,
                         'ncycle':self._ncycle,
                         'hand':'inverted', 'wd':wd})

        logging.info('Running %d x shelxe jobs' % len(jobs))


        if cluster:
            run_shelxe_drmaa_array(self._wd, njobs, jobs, timeout, self._sge_project)
        else:
            pool = Pool(min(njobs * ncpu, len(jobs)))
            pool.map(run_shelxe_local, jobs)

        shelxe_stats = read_shelxe_log(self._wd, solvent_fractions)
        skey = lambda s: '%.2f' % s

        best_solvent, best_hand, best_fom = max(((solv, hand, shelxe_stats['mean_fom_cc'][skey(solv)][hand]['mean_fom'])
                                                for solv, hand in product(solvent_fractions, ['original', 'inverted'])),
                                                key=lambda v: v[-1])

        try:
            plot_shelxe_contrast({best_solvent: shelxe_stats['contrast'][skey(best_solvent)]},
                                 os.path.join(self._wd, 'sad_best.png'), True)
            plot_shelxe_contrast(shelxe_stats['contrast'],
                                 os.path.join(self._wd, 'sad.png'))
            plot_shelxe_fom_mapcc(shelxe_stats['fom_mapcc'],
                                 os.path.join(self._wd, 'fom_mapcc.png'))
            plot_shelxe_mean_fom_cc(shelxe_stats['mean_fom_cc'],
                                 os.path.join(self._wd, 'mean_fom_cc.png'))
        except:
            logging.warning("WARNING: Exception thrown while plotting SHELXE results.")

        self._best_fom = best_fom
        self._best_solvent = best_solvent
        self._best_hand = best_hand

        logging.info('Solv. Orig. Inv.')
        for solvent_fraction in solvent_fractions:
            fom_orig, fom_inv = [shelxe_stats['mean_fom_cc'][skey(solvent_fraction)][hand]['pseudo_cc']
                                 for hand in ['original', 'inverted']]
            if solvent_fraction == best_solvent:
                logging.info(
                    '%.2f %.3f %.3f (best)' % (solvent_fraction, fom_orig,
                                               fom_inv))
            else:
                logging.info('%.2f %.3f %.3f' % (solvent_fraction, fom_orig,
                                              fom_inv))

        logging.info('Best solvent: %.2f' % best_solvent)
        logging.info('Best hand:    %s' % best_hand)

        wd = os.path.join(self._wd, skey(best_solvent))

        best_fom_mapcc = shelxe_stats['fom_mapcc'][skey(best_solvent)][best_hand]
        parse_pairs = [([self._dmax,] + best_fom_mapcc['resol'][:-1], 'RESOLUTION_LOW'),
                       (best_fom_mapcc['resol'], 'RESOLUTION_HIGH'),
                       (best_fom_mapcc['fom'], 'FOM'),
                       (best_fom_mapcc['mapcc'], 'MAPCC'),
                       (best_fom_mapcc['nrefl'], 'NREFLECTIONS')]
        for field_values, field_name in parse_pairs:
            store_string_xml(self._xml_results, field_values, field_name)
        self._xml_results['FOM'] = best_fom
        self._xml_results['SOLVENTCONTENT'] = best_solvent
        self._xml_results['ENANTIOMORPH'] = (best_hand=='inverted')

        # copy the result files from the most successful shelxe run into the
        # working directory, before converting to mtz format for inspection with
        # e.g. coot.

        # FIXME in here map correct site file to ASU

        from fast_ep_helpers import map_sites_to_asu
        if best_hand == 'original':
            map_sites_to_asu(self._best_spacegroup,
                             os.path.join(wd, 'sad_fa.pdb'),
                             os.path.join(self._wd, 'sites.pdb'))
        else:
            map_sites_to_asu(self._best_spacegroup,
                             os.path.join(wd, 'sad_fa.pdb'),
                             os.path.join(self._wd, 'sites.pdb'),
                             invert=True)

        if best_hand == 'original':
            for ending in ['phs', 'pha', 'lst', 'hat']:
                shutil.copyfile(os.path.join(wd, 'sad.%s' % ending),
                                os.path.join(self._wd, 'sad.%s' % ending))
        else:
            for ending in ['phs', 'pha', 'lst', 'hat']:
                shutil.copyfile(os.path.join(wd, 'sad_i.%s' % ending),
                                os.path.join(self._wd, 'sad.%s' % ending))
            self._best_spacegroup = spacegroup_enantiomorph(
                self._best_spacegroup)

        logging.info('Best spacegroup: %s' % self._best_spacegroup)

        if self._trace:
            # rerun shelxe to trace the chain
            self._nres_trace = 0
            arguments = ['sad', 'sad_fa', '-h%d' % self._best_nsite,
                         '-s%.2f' % best_solvent, '-d%.2f' % self._best_ano_rlimit, '-a3', '-m20']
            if not best_hand == 'original':
                arguments.append('-i')
            output = run_job('shelxe', arguments, [], self._wd)
            for record in output:
                if 'residues left after pruning' in record:
                    self._nres_trace = int(record.split()[0])
            pdb_org = os.path.join(self._wd, 'sad.pdb')
            pdb_inv = os.path.join(self._wd, 'sad_i.pdb')
            pdb_final = os.path.join(self._wd, 'sad_trace.pdb')
            try:
                if best_hand == 'inverted':
                    shutil.copyfile(pdb_inv, pdb_final)
                else:
                    shutil.copyfile(pdb_org, pdb_final)
                logging.info('Traced:       %d residues' % self._nres_trace)
            except IOError:
                logging.info('Chain tracing was unsuccessful.')

        # convert sites to pdb, inverting if needed

        xs = pdb.input(os.path.join(
            self._wd, 'sad_fa.pdb')).xray_structure_simple()
        if best_hand == 'inverted':
            open('sad.pdb', 'w').write(xs.change_hand().as_pdb_file())
        else:
            open('sad.pdb', 'w').write(xs.as_pdb_file())

        o = run_job('convert2mtz', ['-hklin', 'sad.phs', '-mtzout', 'sad.mtz',
                                   '-colin', 'F FOM PHI SIGF',
                                   '-cell', '%f %f %f %f %f %f' % self._unit_cell,
                                   '-spacegroup',
                                   spacegroup_full(self._best_spacegroup)],
            [], self._wd)

        open('convert2mtz.log', 'w').write('\n'.join(o))

        t1 = time.time()
        logging.info('Time: %.2f' % (t1 - t0))

        return
Beispiel #6
0
    def fa_values(self):

        logging.info('Input:       %s' % self._hklin)
        logging.info('N try:       %d' % self._ntry)
        dataset_names = ['sad',] if len(self._all_data) == 1 else ['peak', 'infl', 'hrem', 'lrem']
        if 'sad' in dataset_names:
            self._xml_results['SUBSTRUCTURE_METHOD'] = 'SAD'
        else:
            self._xml_results['SUBSTRUCTURE_METHOD'] = 'MAD'
        zip_dataset_names = zip(dataset_names, self._all_data)
        if self._native:
            zip_dataset_names.append(('nat', self._native))

        # write out a nice summary of the data set properties and what columns
        # were selected for analysis #todo if unmerged make custom copy of
        # merged data with pairs separated => get dF/F etc.
        self._dataset_table = []
        for dtname, data in zip_dataset_names:
            logging.info('Dataset:     %s' % dtname)
            logging.info('Columns:     %s' % data.info().label_string())
            self._unit_cell = data.unit_cell().parameters()
            logging.info('Unit cell:   %.2f %.2f %.2f %.2f %.2f %.2f' % \
                      self._unit_cell)
            logging.info('Pointgroup:  %s' % data.crystal_symmetry().space_group().type().lookup_symbol())
            logging.info('Resolution:  %.2f - %.2f' % data.resolution_range())
            if data.is_unmerged_intensity_array():
                indices = self._file_content.extract_original_index_miller_indices()
                adata = data.customized_copy(indices=indices, info=data.info(),
                                             anomalous_flag=True)
                merger = adata.merge_equivalents(use_internal_variance=False)
                merged = merger.array()
                logging.info('Rmeas%%:      %.2f' % (100*merger.r_meas()))
                logging.info('Rpim%%:       %.2f' % (100*merger.r_pim()))
                logging.info('Nrefl:       %d / %d / %d' %
                    (data.size(), merged.size(), merged.n_bijvoet_pairs()))
                logging.info('DF/F:        %.3f' % merged.anomalous_signal())

                differences = merged.anomalous_differences()

                logging.info('dI/sig(dI):  %.3f' % (sum(abs(differences.data())) /
                                                 sum(differences.sigmas())))

            else:
                logging.info('Nrefl:       %d / %d' % (data.size(),
                                                    data.n_bijvoet_pairs()))
                logging.info('DF/F:        %.3f' % data.anomalous_signal())

                differences = data.anomalous_differences()

                logging.info('dI/sig(dI):  %.3f' % (sum(abs(differences.data())) /
                                                 sum(differences.sigmas())))

            table_vals = {'dtname': dtname,
                          'col_labels': data.info().label_string(),
                          'unit_cell': self._unit_cell,
                          'pg': data.crystal_symmetry().space_group().type().lookup_symbol(),
                          'resol_range': data.resolution_range(),
                          'nrefl': data.size(),
                          'n_pairs': data.n_bijvoet_pairs() if data.anomalous_flag() else 0,
                          'anom_flg': data.anomalous_flag()
                         }
            if data.anomalous_flag():
                table_vals.update({'anom_signal': data.anomalous_signal(),
                                   'anom_diff': (sum(abs(data.anomalous_differences().data())) /
                                                 sum(data.anomalous_differences().sigmas())),
                                  })

            self._dataset_table.append(table_vals)


            # Now set up the job - run shelxc, assess anomalous signal, compute
            # possible spacegroup options, generate scalepack format reflection
            # file etc.

            if self._is_merged:
                intensities = data
            else:
                indices = self._file_content.extract_original_index_miller_indices()
                intensities = data.customized_copy(indices=indices, info=data.info())

            merge_scalepack.write(file_name = '.'.join([dtname, 'sca']),
                                  miller_array = intensities)

        # in here run shelxc to generate the ins file (which will need to be
        # modified) and the hkl files, which will need to be copied.

        if not self._spacegroups:
            self._spacegroups = generate_chiral_spacegroups_unique(self._pointgroup)

        logging.info('Spacegroups: %s' % ' '.join(self._spacegroups))

        if not self._nsites:
            self._nsites = useful_number_sites(self._unit_cell, self._pointgroup)

        spacegroup = self._spacegroups[0]
        nsite = self._nsites[0]
        ntry = self._ntry

        self._xml_results['SHELXC_SPACEGROUP_ID'] = space_group_symbols(spacegroup).number()

        shelxc_input_files = ['%s %s.sca' % (v[0],v[0]) for v in zip_dataset_names]
        shelxc_stdin = ['cell %.3f %.3f %.3f %.3f %.3f %.3f' % self._unit_cell,
                 'spag %s' % sanitize_spacegroup(spacegroup),
                 'sfac %s' % self._atom.upper(),
                 'find %d' % nsite,
                 'mind -3.5',
                 'ntry %d' % ntry]
        shelxc_output = run_job('shelxc', ['sad'],
                                shelxc_input_files +
                                shelxc_stdin)

        # FIXME in here perform some analysis of the shelxc output - how much
        # anomalous signal was reported?

        open('shelxc.log', 'w').write(''.join(shelxc_output))

        table = { }

        for record in shelxc_output:
            if record.strip().startswith('Resl.'):
                resolutions = map(float, record.replace(' - ', ' ').split()[2:])
                table['dmin'] = resolutions
            if record.strip().startswith('<I/sig>'):
                table['isig'] = map(float, record.split()[1:])
            if record.strip().startswith('%Complete'):
                table['comp'] = map(float, record.split()[1:])
            if record.strip().startswith('<d"/sig>'):
                table['dsig'] = map(float, record.split()[1:])
            if record.strip().startswith('Chi-sq'):
                table['chi2'] = map(float, record.split()[1:])
            if record.strip().startswith('CC(1/2)'):
                table['cc12'] = map(float, record.split()[1:])

        for row in ['isig', 'comp', 'dsig', 'chi2', 'cc12']:
            try:
                pad = len(table['dmin']) - len(table[row])
            except KeyError:
                continue
            if pad > 0:
                table[row] += [float('nan')] * pad

        shells = len(table['dmin'])

        logging.info('SHELXC summary:')
        if 'cc12' in table and 'chi2' in table:
            logging.info('Dmin  <I/sig>  Chi^2  %comp  CC(anom) <d"/sig>')
            for j in range(shells):
                logging.info('%5.2f  %6.2f %6.2f  %6.2f  %6.2f  %5.2f' %
                        (table['dmin'][j], table['isig'][j], table['chi2'][j],
                         table['comp'][j], table['cc12'][j], table['dsig'][j]))
            plot_anom_shelxc(table['dmin'], table['isig'], table['dsig'], table['chi2'], table['cc12'], 'shelxc_anom.png')
        else:
            logging.info('Dmin  <I/sig>  %comp  <d"/sig>')
            for j in range(shells):
                logging.info('%5.2f  %6.2f  %6.2f  %5.2f' %
                        (table['dmin'][j], table['isig'][j],
                        table['comp'][j], table['dsig'][j]))
            plot_anom_shelxc(table['dmin'], table['isig'], table['dsig'], None, None, 'shelxc_anom.png')

        # FIXME conventionally dmax is the *low* resolution limit!
        if self._ano_rlimits == [0]:
            self._ano_rlimits = [self._dmin]
        elif not self._ano_rlimits:
            if self._mode == 'basic':
                self._ano_rlimits = [self._dmin]
            else:
                self._ano_rlimits = [self._dmin, self._dmin + 0.25, self._dmin + 0.5]

        logging.info('Anomalous limits: %s' %  ' '.join(["%.2f" % v for v in self._ano_rlimits]))

        # store the ins file text - will need to modify this when we come to
        # run shelxd...

        self._ins_text = open('sad_fa.ins', 'r').readlines()

        return