def run_shelxe_local(_settings): '''Run shelxe locally with settings given in dictionary, containing: nsite - number of sites solv - solvent fraction resol - high resolution limit hand - original or inverted wd - working directory''' if not 'resol' in _settings: _settings['resol'] = 0.0 nsite = _settings['nsite'] solv = _settings['solv'] hand = _settings['hand'] resol = _settings['resol'] nrefl = _settings['nrefl'] wd = _settings['wd'] if hand == 'original': job_output = run_job('shelxe', ['sad', 'sad_fa', '-h%d' % nsite, '-l%d' % nrefl, '-s%f' % solv, '-d%f' % resol, '-m20'], [], wd) else: job_output = run_job('shelxe', ['sad', 'sad_fa', '-h%d' % nsite, '-l%d' % nrefl, '-s%f' % solv, '-d%f' % resol, '-m20', '-i'], [], wd) return
def run_shelxd_local(_settings): '''Run shelxd locally settings given in dictionary, containing: nrefl = 1 + floor(nref / 100000) - space to allocate ncpu - number of cpus to use wd - working directory''' nrefl = _settings['nrefl'] ncpu = _settings['ncpu'] wd = _settings['wd'] job_output = run_job( 'shelxd', ['-L%d' % nrefl, 'sad_fa', '-t%d' % ncpu], [], wd) open(os.path.join(wd, 'shelxd.log'), 'w').write(''.join(job_output)) return
def ctruncate_anomalous_signal(hklin): ''' Estimated limits of anomalous signal Wang limit (deltaI/I) > 0.6% : 1.3 A anomalous limit (deltaI/sig) > 1.3 : 2.1 A measurability limit (Nanon/Nov) > 5% : 1.8 A Use measurability limit (Nanon/Nov) > 5% : 1.8 A + / - 0.2 A from ctruncate -mtzin ../AUTOMATIC_DEFAULT_scaled.mtz -mtzout truncated.mtz -colano '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]' ''' ctruncate_output = run_job( ' '.join([ 'ctruncate', '-mtzin', hklin, '-mtzout', 'truncated.mtz', '-colano', '"/*/*/[I(+),SIGI(+),I(-),SIGI(-)]"' ]), [], []) open('ctruncate.log', 'w').write(''.join(ctruncate_output)) maximum_resolution = float('nan') rlimit = float('nan') for record in ctruncate_output: if 'Maximum resolution =' in record: maximum_resolution = float(record.split()[-2]) if "measurability limit (Nanon/Nov)" in record: rlimit = float(record.split()[-2]) break if not isinf(rlimit) and not isnan(rlimit) and \ not isinf(maximum_resolution) and not isnan(maximum_resolution): return [max(maximum_resolution, rlimit - 0.2), rlimit, rlimit + 0.2] return None
def main(): """ main function - creates session and runs jobs """ time_start = time.time() dx_session_obj = get_session.GetSession() dx_logging.logging_est(ARGUMENTS['--logdir']) config_file_path = ARGUMENTS['--config'] single_thread = ARGUMENTS['--single_thread'] engine = ARGUMENTS['--engine'] try: dx_session_obj.get_config(config_file_path) # This is the function that will handle processing main_workflow for # all the servers. for each in run_job.run_job(main_workflow, dx_session_obj, engine, single_thread): # join them back together so that we wait for all threads to # complete each.join() elapsed_minutes = run_job.time_elapsed(time_start) dx_logging.print_info(f'script took {elapsed_minutes} minutes to ' f'get this far.') # Here we handle what we do when the unexpected happens except SystemExit as err: # This is what we use to handle our sys.exit(#) sys.exit(err) except dlpx_exceptions.DlpxException as err: # We use this exception handler when an error occurs in a function # call. dx_logging.print_exception(f'ERROR: Please check the ERROR message ' f'below:\n {err.error}') sys.exit(2) except exceptions.HttpError as err: # We use this exception handler when our connection to Delphix fails dx_logging.print_exception( f'ERROR: Connection failed to the Delphix DDP. Please check ' f'the ERROR message below:\n{err.status}') sys.exit(2) except KeyError as err: dx_logging.print_exception(f'ERROR: Key not found:\n{err}') sys.exit(2) except exceptions.JobError as err: # We use this exception handler when a job fails in Delphix so that we # have actionable data elapsed_minutes = run_job.time_elapsed(time_start) dx_logging.print_exception( f'A job failed in the Delphix Engine:\n{err.job}.' f'{basename(__file__)} took {elapsed_minutes} minutes to get ' f'this far') sys.exit(3) except KeyboardInterrupt: # We use this exception handler to gracefully handle ctrl+c exits dx_logging.print_debug('You sent a CTRL+C to interrupt the process') elapsed_minutes = run_job.time_elapsed(time_start) dx_logging.print_info(f'{basename(__file__)} took {elapsed_minutes} ' f'minutes to get this far.')
def phase(self): '''Perform the phasing following from the substructure determination, using the best solution found, using shelxe. This will be run for a range of sensible solvent fractions between 25% and 75% and for both hands of the substructure. N.B. for a chiral screw axis (e.g. P41) this will also invert the spacegroup (to e.g. P43) which needs to be remembered in transforming the output.''' t0 = time.time() cluster = self._cluster njobs = self._machines ncpu = self._cpu solvent_fractions = [0.25 + 0.05 * j for j in range(11)] timeout = 600 + self._ncycle jobs = [ ] for solvent_fraction in solvent_fractions: wd = os.path.join(self._wd, '%.2f' % solvent_fraction) if not os.path.exists(wd): os.makedirs(wd) shutil.copyfile(os.path.join(self._wd, 'sad.hkl'), os.path.join(wd, 'sad.hkl')) for ending in 'lst', 'pdb', 'res', 'hkl': shutil.copyfile(os.path.join(self._wd, 'sad_fa.%s' % ending), os.path.join(wd, 'sad_fa.%s' % ending)) jobs.append({'nsite':self._best_nsite, 'solv':solvent_fraction, 'ncycle':self._ncycle, 'hand':'original', 'wd':wd}) jobs.append({'nsite':self._best_nsite, 'solv':solvent_fraction, 'ncycle':self._ncycle, 'hand':'inverted', 'wd':wd}) logging.info('Running %d x shelxe jobs' % len(jobs)) if cluster: run_shelxe_drmaa_array(self._wd, njobs, jobs, timeout, self._sge_project) else: pool = Pool(min(njobs * ncpu, len(jobs))) pool.map(run_shelxe_local, jobs) shelxe_stats = read_shelxe_log(self._wd, solvent_fractions) skey = lambda s: '%.2f' % s best_solvent, best_hand, best_fom = max(((solv, hand, shelxe_stats['mean_fom_cc'][skey(solv)][hand]['mean_fom']) for solv, hand in product(solvent_fractions, ['original', 'inverted'])), key=lambda v: v[-1]) try: plot_shelxe_contrast({best_solvent: shelxe_stats['contrast'][skey(best_solvent)]}, os.path.join(self._wd, 'sad_best.png'), True) plot_shelxe_contrast(shelxe_stats['contrast'], os.path.join(self._wd, 'sad.png')) plot_shelxe_fom_mapcc(shelxe_stats['fom_mapcc'], os.path.join(self._wd, 'fom_mapcc.png')) plot_shelxe_mean_fom_cc(shelxe_stats['mean_fom_cc'], os.path.join(self._wd, 'mean_fom_cc.png')) except: logging.warning("WARNING: Exception thrown while plotting SHELXE results.") self._best_fom = best_fom self._best_solvent = best_solvent self._best_hand = best_hand logging.info('Solv. Orig. Inv.') for solvent_fraction in solvent_fractions: fom_orig, fom_inv = [shelxe_stats['mean_fom_cc'][skey(solvent_fraction)][hand]['pseudo_cc'] for hand in ['original', 'inverted']] if solvent_fraction == best_solvent: logging.info( '%.2f %.3f %.3f (best)' % (solvent_fraction, fom_orig, fom_inv)) else: logging.info('%.2f %.3f %.3f' % (solvent_fraction, fom_orig, fom_inv)) logging.info('Best solvent: %.2f' % best_solvent) logging.info('Best hand: %s' % best_hand) wd = os.path.join(self._wd, skey(best_solvent)) best_fom_mapcc = shelxe_stats['fom_mapcc'][skey(best_solvent)][best_hand] parse_pairs = [([self._dmax,] + best_fom_mapcc['resol'][:-1], 'RESOLUTION_LOW'), (best_fom_mapcc['resol'], 'RESOLUTION_HIGH'), (best_fom_mapcc['fom'], 'FOM'), (best_fom_mapcc['mapcc'], 'MAPCC'), (best_fom_mapcc['nrefl'], 'NREFLECTIONS')] for field_values, field_name in parse_pairs: store_string_xml(self._xml_results, field_values, field_name) self._xml_results['FOM'] = best_fom self._xml_results['SOLVENTCONTENT'] = best_solvent self._xml_results['ENANTIOMORPH'] = (best_hand=='inverted') # copy the result files from the most successful shelxe run into the # working directory, before converting to mtz format for inspection with # e.g. coot. # FIXME in here map correct site file to ASU from fast_ep_helpers import map_sites_to_asu if best_hand == 'original': map_sites_to_asu(self._best_spacegroup, os.path.join(wd, 'sad_fa.pdb'), os.path.join(self._wd, 'sites.pdb')) else: map_sites_to_asu(self._best_spacegroup, os.path.join(wd, 'sad_fa.pdb'), os.path.join(self._wd, 'sites.pdb'), invert=True) if best_hand == 'original': for ending in ['phs', 'pha', 'lst', 'hat']: shutil.copyfile(os.path.join(wd, 'sad.%s' % ending), os.path.join(self._wd, 'sad.%s' % ending)) else: for ending in ['phs', 'pha', 'lst', 'hat']: shutil.copyfile(os.path.join(wd, 'sad_i.%s' % ending), os.path.join(self._wd, 'sad.%s' % ending)) self._best_spacegroup = spacegroup_enantiomorph( self._best_spacegroup) logging.info('Best spacegroup: %s' % self._best_spacegroup) if self._trace: # rerun shelxe to trace the chain self._nres_trace = 0 arguments = ['sad', 'sad_fa', '-h%d' % self._best_nsite, '-s%.2f' % best_solvent, '-d%.2f' % self._best_ano_rlimit, '-a3', '-m20'] if not best_hand == 'original': arguments.append('-i') output = run_job('shelxe', arguments, [], self._wd) for record in output: if 'residues left after pruning' in record: self._nres_trace = int(record.split()[0]) pdb_org = os.path.join(self._wd, 'sad.pdb') pdb_inv = os.path.join(self._wd, 'sad_i.pdb') pdb_final = os.path.join(self._wd, 'sad_trace.pdb') try: if best_hand == 'inverted': shutil.copyfile(pdb_inv, pdb_final) else: shutil.copyfile(pdb_org, pdb_final) logging.info('Traced: %d residues' % self._nres_trace) except IOError: logging.info('Chain tracing was unsuccessful.') # convert sites to pdb, inverting if needed xs = pdb.input(os.path.join( self._wd, 'sad_fa.pdb')).xray_structure_simple() if best_hand == 'inverted': open('sad.pdb', 'w').write(xs.change_hand().as_pdb_file()) else: open('sad.pdb', 'w').write(xs.as_pdb_file()) o = run_job('convert2mtz', ['-hklin', 'sad.phs', '-mtzout', 'sad.mtz', '-colin', 'F FOM PHI SIGF', '-cell', '%f %f %f %f %f %f' % self._unit_cell, '-spacegroup', spacegroup_full(self._best_spacegroup)], [], self._wd) open('convert2mtz.log', 'w').write('\n'.join(o)) t1 = time.time() logging.info('Time: %.2f' % (t1 - t0)) return
def fa_values(self): logging.info('Input: %s' % self._hklin) logging.info('N try: %d' % self._ntry) dataset_names = ['sad',] if len(self._all_data) == 1 else ['peak', 'infl', 'hrem', 'lrem'] if 'sad' in dataset_names: self._xml_results['SUBSTRUCTURE_METHOD'] = 'SAD' else: self._xml_results['SUBSTRUCTURE_METHOD'] = 'MAD' zip_dataset_names = zip(dataset_names, self._all_data) if self._native: zip_dataset_names.append(('nat', self._native)) # write out a nice summary of the data set properties and what columns # were selected for analysis #todo if unmerged make custom copy of # merged data with pairs separated => get dF/F etc. self._dataset_table = [] for dtname, data in zip_dataset_names: logging.info('Dataset: %s' % dtname) logging.info('Columns: %s' % data.info().label_string()) self._unit_cell = data.unit_cell().parameters() logging.info('Unit cell: %.2f %.2f %.2f %.2f %.2f %.2f' % \ self._unit_cell) logging.info('Pointgroup: %s' % data.crystal_symmetry().space_group().type().lookup_symbol()) logging.info('Resolution: %.2f - %.2f' % data.resolution_range()) if data.is_unmerged_intensity_array(): indices = self._file_content.extract_original_index_miller_indices() adata = data.customized_copy(indices=indices, info=data.info(), anomalous_flag=True) merger = adata.merge_equivalents(use_internal_variance=False) merged = merger.array() logging.info('Rmeas%%: %.2f' % (100*merger.r_meas())) logging.info('Rpim%%: %.2f' % (100*merger.r_pim())) logging.info('Nrefl: %d / %d / %d' % (data.size(), merged.size(), merged.n_bijvoet_pairs())) logging.info('DF/F: %.3f' % merged.anomalous_signal()) differences = merged.anomalous_differences() logging.info('dI/sig(dI): %.3f' % (sum(abs(differences.data())) / sum(differences.sigmas()))) else: logging.info('Nrefl: %d / %d' % (data.size(), data.n_bijvoet_pairs())) logging.info('DF/F: %.3f' % data.anomalous_signal()) differences = data.anomalous_differences() logging.info('dI/sig(dI): %.3f' % (sum(abs(differences.data())) / sum(differences.sigmas()))) table_vals = {'dtname': dtname, 'col_labels': data.info().label_string(), 'unit_cell': self._unit_cell, 'pg': data.crystal_symmetry().space_group().type().lookup_symbol(), 'resol_range': data.resolution_range(), 'nrefl': data.size(), 'n_pairs': data.n_bijvoet_pairs() if data.anomalous_flag() else 0, 'anom_flg': data.anomalous_flag() } if data.anomalous_flag(): table_vals.update({'anom_signal': data.anomalous_signal(), 'anom_diff': (sum(abs(data.anomalous_differences().data())) / sum(data.anomalous_differences().sigmas())), }) self._dataset_table.append(table_vals) # Now set up the job - run shelxc, assess anomalous signal, compute # possible spacegroup options, generate scalepack format reflection # file etc. if self._is_merged: intensities = data else: indices = self._file_content.extract_original_index_miller_indices() intensities = data.customized_copy(indices=indices, info=data.info()) merge_scalepack.write(file_name = '.'.join([dtname, 'sca']), miller_array = intensities) # in here run shelxc to generate the ins file (which will need to be # modified) and the hkl files, which will need to be copied. if not self._spacegroups: self._spacegroups = generate_chiral_spacegroups_unique(self._pointgroup) logging.info('Spacegroups: %s' % ' '.join(self._spacegroups)) if not self._nsites: self._nsites = useful_number_sites(self._unit_cell, self._pointgroup) spacegroup = self._spacegroups[0] nsite = self._nsites[0] ntry = self._ntry self._xml_results['SHELXC_SPACEGROUP_ID'] = space_group_symbols(spacegroup).number() shelxc_input_files = ['%s %s.sca' % (v[0],v[0]) for v in zip_dataset_names] shelxc_stdin = ['cell %.3f %.3f %.3f %.3f %.3f %.3f' % self._unit_cell, 'spag %s' % sanitize_spacegroup(spacegroup), 'sfac %s' % self._atom.upper(), 'find %d' % nsite, 'mind -3.5', 'ntry %d' % ntry] shelxc_output = run_job('shelxc', ['sad'], shelxc_input_files + shelxc_stdin) # FIXME in here perform some analysis of the shelxc output - how much # anomalous signal was reported? open('shelxc.log', 'w').write(''.join(shelxc_output)) table = { } for record in shelxc_output: if record.strip().startswith('Resl.'): resolutions = map(float, record.replace(' - ', ' ').split()[2:]) table['dmin'] = resolutions if record.strip().startswith('<I/sig>'): table['isig'] = map(float, record.split()[1:]) if record.strip().startswith('%Complete'): table['comp'] = map(float, record.split()[1:]) if record.strip().startswith('<d"/sig>'): table['dsig'] = map(float, record.split()[1:]) if record.strip().startswith('Chi-sq'): table['chi2'] = map(float, record.split()[1:]) if record.strip().startswith('CC(1/2)'): table['cc12'] = map(float, record.split()[1:]) for row in ['isig', 'comp', 'dsig', 'chi2', 'cc12']: try: pad = len(table['dmin']) - len(table[row]) except KeyError: continue if pad > 0: table[row] += [float('nan')] * pad shells = len(table['dmin']) logging.info('SHELXC summary:') if 'cc12' in table and 'chi2' in table: logging.info('Dmin <I/sig> Chi^2 %comp CC(anom) <d"/sig>') for j in range(shells): logging.info('%5.2f %6.2f %6.2f %6.2f %6.2f %5.2f' % (table['dmin'][j], table['isig'][j], table['chi2'][j], table['comp'][j], table['cc12'][j], table['dsig'][j])) plot_anom_shelxc(table['dmin'], table['isig'], table['dsig'], table['chi2'], table['cc12'], 'shelxc_anom.png') else: logging.info('Dmin <I/sig> %comp <d"/sig>') for j in range(shells): logging.info('%5.2f %6.2f %6.2f %5.2f' % (table['dmin'][j], table['isig'][j], table['comp'][j], table['dsig'][j])) plot_anom_shelxc(table['dmin'], table['isig'], table['dsig'], None, None, 'shelxc_anom.png') # FIXME conventionally dmax is the *low* resolution limit! if self._ano_rlimits == [0]: self._ano_rlimits = [self._dmin] elif not self._ano_rlimits: if self._mode == 'basic': self._ano_rlimits = [self._dmin] else: self._ano_rlimits = [self._dmin, self._dmin + 0.25, self._dmin + 0.5] logging.info('Anomalous limits: %s' % ' '.join(["%.2f" % v for v in self._ano_rlimits])) # store the ins file text - will need to modify this when we come to # run shelxd... self._ins_text = open('sad_fa.ins', 'r').readlines() return