def exercise_int(): fa = flex.int_range(1,7) na = fa.as_numpy_array() assert na.tolist() == list(fa) fna = flex.int(na) assert fna.all() == (6,) assert fna.origin() == (0,) assert fna.focus() == (6,) assert fna.all_eq(fa) fa[0] = 99 assert na[0] == 1 fa[0] = 1 # fa.reshape(flex.grid(2,3)) na = fa.as_numpy_array() assert na.tolist() == [[1, 2, 3], [4, 5, 6]] fna = flex.int(na) assert fna.all() == (2,3) assert fna.origin() == (0,0) assert fna.focus() == (2,3) assert fna.all_eq(fa) # fa = flex.int_range(4*2*3) + 1 fa.reshape(flex.grid(4,2,3)) na = fa.as_numpy_array() assert na.tolist() == [ [[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]], [[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]] fna = flex.int(na) assert fna.all() == (4,2,3) assert fna.origin() == (0,0,0) assert fna.focus() == (4,2,3) assert fna.all_eq(fa)
def test_unit_cell(): # generate some random unit cells sgi = sgtbx.space_group_info("P1") crystal_symmetries = [ sgi.any_compatible_crystal_symmetry(volume=random.uniform(990, 1010)) for i in range(10) ] lattice_ids = flex.int_range(0, len(crystal_symmetries)).as_string() ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) clusters, dendrogram, _ = ucs.ab_cluster(write_file_lists=False, doplot=False)
def test_scipy_dendrogram_to_plotly_json(): # generate some random unit cells sgi = sgtbx.space_group_info("P1") crystal_symmetries = [ sgi.any_compatible_crystal_symmetry(volume=random.uniform(990, 1010)) for i in range(10) ] lattice_ids = flex.int_range(0, len(crystal_symmetries)).as_string() ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) _, dendrogram, _ = ucs.ab_cluster(write_file_lists=False, doplot=False) d = plots.scipy_dendrogram_to_plotly_json(dendrogram, title="Unit cell clustering") assert set(d) == {"layout", "data"}
def _label_clusters_first_pass(self, n_datasets, n_sym_ops): """First pass labelling of clusters. Labels points into clusters such that cluster contains exactly one copy of each dataset. Args: n_datasets (int): The number of datasets. n_sym_ops (int): The number of symmetry operations. Returns: cluster_labels (scitbx.array_family.flex.int): A label for each coordinate, labelled from 0 .. n_sym_ops. """ # initialise cluster labels: -1 signifies doesn't belong to a cluster cluster_labels = flex.int(self.coords.all()[0], -1) X_orig = self.coords.as_numpy_array() cluster_id = 0 while cluster_labels.count(-1) > 0: dataset_ids = (flex.int_range(n_datasets * n_sym_ops) % n_datasets).as_numpy_array() coord_ids = flex.int_range(dataset_ids.size).as_numpy_array() # select only those points that don't already belong to a cluster sel = np.where(cluster_labels == -1) X = X_orig[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] # choose a high density point as seed for cluster nbrs = NearestNeighbors(n_neighbors=min(11, len(X)), algorithm="brute", metric="cosine").fit(X) distances, indices = nbrs.kneighbors(X) average_distance = flex.double( [dist[1:].mean() for dist in distances]) i = flex.min_index(average_distance) d_id = dataset_ids[i] cluster = np.array([coord_ids[i]]) cluster_dataset_ids = np.array([d_id]) xis = np.array([X[i]]) for j in range(n_datasets - 1): # select only those rows that don't correspond to a dataset already # present in current cluster sel = np.where(dataset_ids != d_id) X = X[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] assert len(X) > 0 # Find nearest neighbour in cosine-space to the current cluster centroid nbrs = NearestNeighbors(n_neighbors=min(1, len(X)), algorithm="brute", metric="cosine").fit(X) distances, indices = nbrs.kneighbors([xis.mean(axis=0)]) k = indices[0][0] d_id = dataset_ids[k] cluster = np.append(cluster, coord_ids[k]) cluster_dataset_ids = np.append(cluster_dataset_ids, d_id) xis = np.append(xis, [X[k]], axis=0) # label this cluster cluster_labels.set_selected(flex.size_t(cluster.tolist()), cluster_id) cluster_id += 1 return cluster_labels
def seed_clustering(self): eps = 1e-6 X_orig = self.coords.as_numpy_array() import numpy as np from scipy.cluster import hierarchy import scipy.spatial.distance as ssd from sklearn.neighbors import NearestNeighbors from sklearn import metrics # initialise cluster labels: -1 signifies doesn't belong to a cluster self.cluster_labels = flex.int(self.coords.all()[0], -1) cluster_id = 0 while self.cluster_labels.count(-1) > 0: dataset_ids = (flex.int_range( len(self.datasets) * len(self.target.get_sym_ops())) % len(self.datasets)).as_numpy_array() coord_ids = flex.int_range(dataset_ids.size).as_numpy_array() # select only those points that don't already belong to a cluster sel = np.where(self.cluster_labels == -1) X = X_orig[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] # choose a high density point as seed for cluster nbrs = NearestNeighbors(n_neighbors=min(11, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors(X) average_distance = flex.double( [dist[1:].mean() for dist in distances]) i = flex.min_index(average_distance) d_id = dataset_ids[i] cluster = np.array([coord_ids[i]]) cluster_dataset_ids = np.array([d_id]) xis = np.array([X[i]]) for j in range(len(self.datasets) - 1): # select only those rows that don't correspond to a dataset already # present in current cluster sel = np.where(dataset_ids != d_id) X = X[sel] dataset_ids = dataset_ids[sel] coord_ids = coord_ids[sel] assert len(X) > 0 # Find nearest neighbour in cosine-space to the current cluster centroid nbrs = NearestNeighbors(n_neighbors=min(1, len(X)), algorithm='brute', metric='cosine').fit(X) distances, indices = nbrs.kneighbors([xis.mean(axis=0)]) k = indices[0][0] d_id = dataset_ids[k] cluster = np.append(cluster, coord_ids[k]) cluster_dataset_ids = np.append(cluster_dataset_ids, d_id) xis = np.append(xis, [X[k]], axis=0) # label this cluster self.cluster_labels.set_selected(flex.size_t(cluster.tolist()), cluster_id) cluster_id += 1 if flex.max(self.cluster_labels) == 0: # assume single cluster return self.cluster_labels cluster_centroids = [] X = self.coords.as_numpy_array() for i in set(self.cluster_labels): sel = self.cluster_labels == i cluster_centroids.append(X[( self.cluster_labels == i).iselection().as_numpy_array()].mean( axis=0)) # hierarchical clustering of cluster centroids, using cosine metric dist_mat = ssd.pdist(cluster_centroids, metric='cosine') linkage_matrix = hierarchy.linkage(dist_mat, method='average') # compare valid equal-sized clustering using silhouette scores # https://en.wikipedia.org/wiki/Silhouette_(clustering) # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = flex.double() thresholds = flex.double() n_clusters = flex.size_t() for threshold in distances[1:]: cluster_labels = self.cluster_labels.deep_copy() labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion='distance').tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, int(labels[i] - 1)) silhouette_avg = metrics.silhouette_score( X, cluster_labels.as_numpy_array(), metric='cosine') # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( X, cluster_labels.as_numpy_array(), metric='cosine') silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info('Clustering:') logger.info(' Number of clusters: %i' % n) logger.info(' Threshold score: %.3f (%.1f deg)' % (threshold, math.degrees(math.acos(1 - threshold)))) logger.info(' Silhouette score: %.3f' % silhouette_avg) logger.info(' -ve silhouette scores: %.1f%%' % (100 * count_negative / sample_silhouette_values.size)) if self.params.save_plot: plot_silhouette(sample_silhouette_values, cluster_labels.as_numpy_array(), file_name='%ssilhouette_%i.png' % (self.params.plot_prefix, n)) if self.params.cluster.seed.n_clusters is Auto: idx = flex.max_index(silhouette_scores) else: idx = flex.first_index(n_clusters, self.params.cluster.seed.n_clusters) if idx is None: raise Sorry('No valid clustering with %i clusters' % self.params.cluster.seed.n_clusters) if (self.params.cluster.seed.n_clusters is Auto and silhouette_scores[idx] < self.params.cluster.seed.min_silhouette_score): # assume single cluster self.cluster_labels = flex.int(self.cluster_labels.size(), 0) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion='distance') cluster_labels = flex.double(self.cluster_labels.size(), -1) for i in range(len(labels)): cluster_labels.set_selected(self.cluster_labels == i, labels[i] - 1) self.cluster_labels = cluster_labels if self.params.save_plot: plot_matrix(1 - ssd.squareform(dist_mat), linkage_matrix, '%sseed_clustering_cos_angle_matrix.png' % self.params.plot_prefix, color_threshold=threshold) plot_dendrogram(linkage_matrix, '%sseed_clustering_cos_angle_dendrogram.png' % self.params.plot_prefix, color_threshold=threshold) return self.cluster_labels
def generate_xia2_html(xinfo, filename="xia2.html", params=None, args=[]): assert params is None or len(args) == 0 if params is None: from xia2.Modules.Analysis import phil_scope interp = phil_scope.command_line_argument_interpreter() params, unhandled = interp.process_and_fetch( args, custom_processor="collect_remaining") params = params.extract() xia2_txt = os.path.join(os.path.abspath(os.path.curdir), "xia2.txt") assert os.path.isfile(xia2_txt), xia2_txt with open(xia2_txt, "r") as f: xia2_output = html.escape(f.read()) styles = {} columns = [] columns.append([ "", "Wavelength (Å)", "Resolution range (Å)", "Completeness (%)", "Multiplicity", "CC-half", "I/sigma", "Rmerge(I)", # anomalous statistics "Anomalous completeness (%)", "Anomalous multiplicity", ]) individual_dataset_reports = {} for cname, xcryst in xinfo.get_crystals().items(): reflection_files = xcryst.get_scaled_merged_reflections() for wname, unmerged_mtz in reflection_files["mtz_unmerged"].items(): xwav = xcryst.get_xwavelength(wname) from xia2.Modules.MultiCrystalAnalysis import batch_phil_scope scope = phil.parse(batch_phil_scope) scaler = xcryst._scaler try: for si in scaler._sweep_information.values(): batch_params = scope.extract().batch[0] batch_params.id = si["sname"] batch_params.range = si["batches"] params.batch.append(batch_params) except AttributeError: for si in scaler._sweep_handler._sweep_information.values(): batch_params = scope.extract().batch[0] batch_params.id = si.get_sweep_name() batch_params.range = si.get_batch_range() params.batch.append(batch_params) report_path = xinfo.path.joinpath(cname, "report") report_path.mkdir(parents=True, exist_ok=True) report = Report.from_unmerged_mtz(unmerged_mtz, params, report_dir=str(report_path)) xtriage_success, xtriage_warnings, xtriage_danger = None, None, None if params.xtriage_analysis: try: ( xtriage_success, xtriage_warnings, xtriage_danger, ) = report.xtriage_report() except Exception as e: params.xtriage_analysis = False logger.debug("Exception running xtriage:") logger.debug(e, exc_info=True) ( overall_stats_table, merging_stats_table, stats_plots, ) = report.resolution_plots_and_stats() d = {} d["merging_statistics_table"] = merging_stats_table d["overall_statistics_table"] = overall_stats_table individual_dataset_reports[wname] = d json_data = {} if params.xtriage_analysis: json_data["xtriage"] = (xtriage_success + xtriage_warnings + xtriage_danger) json_data.update(stats_plots) json_data.update(report.batch_dependent_plots()) json_data.update(report.intensity_stats_plots(run_xtriage=False)) json_data.update(report.pychef_plots()) json_data.update(report.pychef_plots(n_bins=1)) from scitbx.array_family import flex max_points = 500 for g in ( "scale_rmerge_vs_batch", "completeness_vs_dose", "rcp_vs_dose", "scp_vs_dose", "rd_vs_batch_difference", ): for i, data in enumerate(json_data[g]["data"]): x = data["x"] n = len(x) if n > max_points: step = n // max_points sel = (flex.int_range(n) % step) == 0 data["x"] = list(flex.int(data["x"]).select(sel)) data["y"] = list(flex.double(data["y"]).select(sel)) resolution_graphs = OrderedDict((k + "_" + wname, json_data[k]) for k in ( "cc_one_half", "i_over_sig_i", "second_moments", "wilson_intensity_plot", "completeness", "multiplicity_vs_resolution", ) if k in json_data) if params.include_radiation_damage: batch_graphs = OrderedDict((k + "_" + wname, json_data[k]) for k in ( "scale_rmerge_vs_batch", "i_over_sig_i_vs_batch", "completeness_vs_dose", "rcp_vs_dose", "scp_vs_dose", "rd_vs_batch_difference", )) else: batch_graphs = OrderedDict((k + "_" + wname, json_data[k]) for k in ("scale_rmerge_vs_batch", "i_over_sig_i_vs_batch")) misc_graphs = OrderedDict((k, json_data[k]) for k in ( "cumulative_intensity_distribution", "l_test", "multiplicities", ) if k in json_data) for k, v in report.multiplicity_plots().items(): misc_graphs[k + "_" + wname] = {"img": v} d["resolution_graphs"] = resolution_graphs d["batch_graphs"] = batch_graphs d["misc_graphs"] = misc_graphs d["xtriage"] = { "success": xtriage_success, "warnings": xtriage_warnings, "danger": xtriage_danger, } merging_stats = report.merging_stats merging_stats_anom = report.merging_stats_anom overall = merging_stats.overall overall_anom = merging_stats_anom.overall outer_shell = merging_stats.bins[-1] outer_shell_anom = merging_stats_anom.bins[-1] column = [ wname, str(xwav.get_wavelength()), "%.2f - %.2f (%.2f - %.2f)" % (overall.d_max, overall.d_min, outer_shell.d_max, outer_shell.d_min), "%.2f (%.2f)" % (overall.completeness * 100, outer_shell.completeness * 100), f"{overall.mean_redundancy:.2f} ({outer_shell.mean_redundancy:.2f})", f"{overall.cc_one_half:.4f} ({outer_shell.cc_one_half:.4f})", "%.2f (%.2f)" % (overall.i_over_sigma_mean, outer_shell.i_over_sigma_mean), f"{overall.r_merge:.4f} ({outer_shell.r_merge:.4f})", # anomalous statistics "%.2f (%.2f)" % ( overall_anom.anom_completeness * 100, outer_shell_anom.anom_completeness * 100, ), "%.2f (%.2f)" % (overall_anom.mean_redundancy, outer_shell_anom.mean_redundancy), ] columns.append(column) table = [[c[i] for c in columns] for i in range(len(columns[0]))] from cctbx import sgtbx space_groups = xcryst.get_likely_spacegroups() space_groups = [ sgtbx.space_group_info(symbol=str(symbol)) for symbol in space_groups ] space_group = space_groups[0].symbol_and_number() alternative_space_groups = [ sg.symbol_and_number() for sg in space_groups[1:] ] unit_cell = str(report.intensities.unit_cell()) # reflection files for cname, xcryst in xinfo.get_crystals().items(): # hack to replace path to reflection files with DataFiles directory data_dir = os.path.join(os.path.abspath(os.path.curdir), "DataFiles") g = glob.glob(os.path.join(data_dir, "*")) reflection_files = xcryst.get_scaled_merged_reflections() for k, rfile in reflection_files.items(): if isinstance(rfile, str): for datafile in g: if os.path.basename(datafile) == os.path.basename(rfile): reflection_files[k] = datafile break else: for kk in rfile: for datafile in g: if os.path.basename(datafile) == os.path.basename( rfile[kk]): reflection_files[k][kk] = datafile break headers = ["Dataset", "File name"] merged_mtz = reflection_files["mtz"] mtz_files = [ headers, [ "All datasets", '<a href="%s">%s</a>' % (os.path.relpath(merged_mtz), os.path.basename(merged_mtz)), ], ] for wname, unmerged_mtz in reflection_files["mtz_unmerged"].items(): mtz_files.append([ wname, '<a href="%s">%s</a>' % (os.path.relpath(unmerged_mtz), os.path.basename(unmerged_mtz)), ]) sca_files = [headers] if "sca" in reflection_files: for wname, merged_sca in reflection_files["sca"].items(): sca_files.append([ wname, '<a href="%s">%s</a>' % (os.path.relpath(merged_sca), os.path.basename(merged_sca)), ]) unmerged_sca_files = [headers] if "sca_unmerged" in reflection_files: for wname, unmerged_sca in reflection_files["sca_unmerged"].items( ): unmerged_sca_files.append([ wname, '<a href="%s">%s</a>' % ( os.path.relpath(unmerged_sca), os.path.basename(unmerged_sca), ), ]) # other files other_files = [] other_files.append(["File name", "Description"]) for other_file, description in sorted([ ("xia2.cif", "Crystallographic information file"), ("xia2.mmcif", "Macromolecular crystallographic information file"), ("shelxt.hkl", "merged structure factors for SHELXT"), ("shelxt.ins", "SHELXT instruction file"), ] + [(fn, "XPREP input file") for fn in os.listdir(os.path.join(data_dir)) if fn.endswith(".p4p")]): if os.path.exists(os.path.join(data_dir, other_file)): other_files.append([ '<a href="DataFiles/{filename}">{filename}</a>'.format( filename=other_file), description, ]) # log files log_files_table = [] log_dir = os.path.join(os.path.abspath(os.path.curdir), "LogFiles") g = glob.glob(os.path.join(log_dir, "*.log")) for logfile in g: html_file = make_logfile_html(logfile) html_file = os.path.splitext(logfile)[0] + ".html" if os.path.exists(html_file): log_files_table.append([ os.path.basename(logfile), '<a href="%s">original</a>' % os.path.relpath(logfile), '<a href="%s">html</a>' % os.path.relpath(html_file), ]) else: log_files_table.append([ os.path.basename(logfile), '<a href="%s">original</a>' % os.path.relpath(logfile), " ", ]) references = { cdict["acta"]: cdict.get("url") for cdict in Citations.get_citations_dicts() } from jinja2 import Environment, ChoiceLoader, PackageLoader loader = ChoiceLoader([ PackageLoader("xia2", "templates"), PackageLoader("dials", "templates") ]) env = Environment(loader=loader) template = env.get_template("xia2.html") html_source = template.render( page_title="xia2 processing report", xia2_output=xia2_output, space_group=space_group, alternative_space_groups=alternative_space_groups, unit_cell=unit_cell, overall_stats_table=table, cc_half_significance_level=params.cc_half_significance_level, mtz_files=mtz_files, sca_files=sca_files, unmerged_sca_files=unmerged_sca_files, other_files=other_files, log_files_table=log_files_table, individual_dataset_reports=individual_dataset_reports, references=references, styles=styles, ) with open("%s-report.json" % os.path.splitext(filename)[0], "w") as fh: json.dump(json_data, fh, indent=2) with open(filename, "wb") as f: f.write(html_source.encode("utf-8", "xmlcharrefreplace"))
def generate_xia2_html(xinfo, filename='xia2.html', params=None, args=[]): assert params is None or len(args) == 0 if params is None: from xia2.Modules.Analysis import phil_scope interp = phil_scope.command_line_argument_interpreter() params, unhandled = interp.process_and_fetch( args, custom_processor='collect_remaining') params = params.extract() from xia2.command_line.report import xia2_report crystal = xinfo.get_crystals().values()[0] xia2_txt = os.path.join(os.path.abspath(os.path.curdir), 'xia2.txt') assert os.path.isfile(xia2_txt), xia2_txt with open(xia2_txt, 'rb') as f: xia2_output = f.read().encode('ascii', 'xmlcharrefreplace') xia2_output = cgi.escape(xia2_output) styles = {} reports = [] columns = [] columns.append([ '', u'Wavelength (Å)', u'Resolution range (Å)', 'Completeness (%)', 'Multiplicity', 'CC-half', 'I/sigma', 'Rmerge(I)', # anomalous statistics 'Anomalous completeness (%)', 'Anomalous multiplicity' ]) individual_dataset_reports = {} for cname, xcryst in xinfo.get_crystals().iteritems(): reflection_files = xcryst.get_scaled_merged_reflections() for wname, unmerged_mtz in reflection_files['mtz_unmerged'].iteritems( ): xwav = xcryst.get_xwavelength(wname) report = xia2_report(unmerged_mtz, params) reports.append(report) merging_stats = report.merging_stats merging_stats_anom = report.merging_stats_anom overall = merging_stats.overall overall_anom = merging_stats_anom.overall outer_shell = merging_stats.bins[-1] outer_shell_anom = merging_stats_anom.bins[-1] column = [ wname, str(xwav.get_wavelength()), '%.2f - %.2f (%.2f - %.2f)' % (overall.d_max, overall.d_min, outer_shell.d_max, outer_shell.d_min), '%.2f (%.2f)' % (overall.completeness * 100, outer_shell.completeness * 100), '%.2f (%.2f)' % (overall.mean_redundancy, outer_shell.mean_redundancy), '%.4f (%.4f)' % (overall.cc_one_half, outer_shell.cc_one_half), '%.2f (%.2f)' % (overall.i_over_sigma_mean, outer_shell.i_over_sigma_mean), '%.4f (%.4f)' % (overall.r_merge, outer_shell.r_merge), # anomalous statistics '%.2f (%.2f)' % (overall_anom.anom_completeness * 100, outer_shell_anom.anom_completeness * 100), '%.2f (%.2f)' % (overall_anom.mean_redundancy, outer_shell_anom.mean_redundancy), ] columns.append(column) xtriage_success, xtriage_warnings, xtriage_danger = None, None, None if params.xtriage_analysis: try: xtriage_success, xtriage_warnings, xtriage_danger = report.xtriage_report( ) except Exception as e: from xia2.Handlers.Phil import PhilIndex if PhilIndex.params.xia2.settings.small_molecule == True: print("Xtriage output not available: %s" % str(e)) else: raise d = {} d['merging_statistics_table'] = report.merging_statistics_table() d['overall_statistics_table'] = report.overall_statistics_table() individual_dataset_reports[wname] = d json_data = {} json_data.update(report.multiplicity_vs_resolution_plot()) json_data.update(report.multiplicity_histogram()) json_data.update(report.completeness_plot()) json_data.update(report.scale_rmerge_vs_batch_plot()) json_data.update(report.cc_one_half_plot()) json_data.update(report.i_over_sig_i_plot()) json_data.update(report.i_over_sig_i_vs_batch_plot()) json_data.update(report.second_moments_plot()) json_data.update(report.cumulative_intensity_distribution_plot()) json_data.update(report.l_test_plot()) json_data.update(report.wilson_plot()) json_data.update(report.pychef_plots(n_bins=1)) from scitbx.array_family import flex max_points = 500 for g in ('scale_rmerge_vs_batch', 'completeness_vs_dose', 'rcp_vs_dose', 'scp_vs_dose', 'rd_vs_batch_difference'): for i, data in enumerate(json_data[g]['data']): x = data['x'] n = len(x) if n > max_points: step = n // max_points sel = (flex.int_range(n) % step) == 0 data['x'] = list(flex.int(data['x']).select(sel)) data['y'] = list(flex.double(data['y']).select(sel)) resolution_graphs = collections.OrderedDict( (k + '_' + wname, json_data[k]) for k in ('cc_one_half', 'i_over_sig_i', 'second_moments', 'wilson_intensity_plot', 'completeness', 'multiplicity_vs_resolution') if k in json_data) if params.include_radiation_damage: batch_graphs = collections.OrderedDict( (k + '_' + wname, json_data[k]) for k in ('scale_rmerge_vs_batch', 'i_over_sig_i_vs_batch', 'completeness_vs_dose', 'rcp_vs_dose', 'scp_vs_dose', 'rd_vs_batch_difference')) else: batch_graphs = collections.OrderedDict( (k + '_' + wname, json_data[k]) for k in ('scale_rmerge_vs_batch', 'i_over_sig_i_vs_batch')) misc_graphs = collections.OrderedDict( (k + '_' + wname, json_data[k]) for k in ('cumulative_intensity_distribution', 'l_test', 'multiplicities') if k in json_data) for k, v in report.multiplicity_plots().iteritems(): misc_graphs[k + '_' + wname] = {'img': v} d['resolution_graphs'] = resolution_graphs d['batch_graphs'] = batch_graphs d['misc_graphs'] = misc_graphs d['xtriage'] = { 'success': xtriage_success, 'warnings': xtriage_warnings, 'danger': xtriage_danger } table = [[c[i] for c in columns] for i in range(len(columns[0]))] cell = xcryst.get_cell() from cctbx import sgtbx space_groups = xcryst.get_likely_spacegroups() space_groups = [ sgtbx.space_group_info(symbol=str(symbol)) for symbol in space_groups ] space_group = space_groups[0].symbol_and_number() alternative_space_groups = [ sg.symbol_and_number() for sg in space_groups[1:] ] unit_cell = str(report.intensities.unit_cell()) #twinning_score = xcryst._get_scaler()._scalr_twinning_score #twinning_conclusion = xcryst._get_scaler()._scalr_twinning_conclusion #if twinning_score is not None: #table.append(['','','']) #table.append(['Twinning score', '%.2f' %twinning_score, '']) #if twinning_conclusion is not None: #table.append(['', twinning_conclusion, '']) for row in table: for i in range(len(row)): row[i] = row[i].encode('ascii', 'xmlcharrefreplace') #from libtbx import table_utils #print table_utils.format(rows=table, has_header=True) # reflection files for cname, xcryst in xinfo.get_crystals().iteritems(): # hack to replace path to reflection files with DataFiles directory data_dir = os.path.join(os.path.abspath(os.path.curdir), 'DataFiles') g = glob.glob(os.path.join(data_dir, '*')) reflection_files = xcryst.get_scaled_merged_reflections() for k, rfile in reflection_files.iteritems(): if isinstance(rfile, basestring): for datafile in g: if os.path.basename(datafile) == os.path.basename(rfile): reflection_files[k] = datafile break else: for kk in rfile.keys(): for datafile in g: if os.path.basename(datafile) == os.path.basename( rfile[kk]): reflection_files[k][kk] = datafile break headers = ['Dataset', 'File name'] merged_mtz = reflection_files['mtz'] mtz_files = [ headers, [ 'All datasets', '<a href="%s">%s</a>' % (os.path.relpath(merged_mtz), os.path.basename(merged_mtz)) ] ] for wname, unmerged_mtz in reflection_files['mtz_unmerged'].iteritems( ): mtz_files.append([ wname, '<a href="%s">%s</a>' % (os.path.relpath(unmerged_mtz), os.path.basename(unmerged_mtz)) ]) sca_files = [headers] for wname, merged_sca in reflection_files['sca'].iteritems(): sca_files.append([ wname, '<a href="%s">%s</a>' % (os.path.relpath(merged_sca), os.path.basename(merged_sca)) ]) unmerged_sca_files = [headers] for wname, unmerged_sca in reflection_files['sca_unmerged'].iteritems( ): unmerged_sca_files.append([ wname, '<a href="%s">%s</a>' % (os.path.relpath(unmerged_sca), os.path.basename(unmerged_sca)) ]) # other files other_files = [] other_files.append(['File name', 'Description']) for other_file, description in sorted([ ('xia2.cif', 'Crystallographic information file'), ('xia2.mmcif', 'Macromolecular crystallographic information file'), ('shelxt.hkl', 'merged structure factors for SHELXT'), ('shelxt.ins', 'SHELXT instruction file'), ] + [ (fn, 'XPREP input file') for fn in os.listdir(os.path.join(data_dir)) \ if fn.endswith('.p4p') ]): if os.path.exists(os.path.join(data_dir, other_file)): other_files.append([ '<a href="DataFiles/{filename}">{filename}</a>'.format( filename=other_file), description ]) # log files log_files_table = [] log_dir = os.path.join(os.path.abspath(os.path.curdir), 'LogFiles') g = glob.glob(os.path.join(log_dir, '*.log')) for logfile in g: html_file = make_logfile_html(logfile) html_file = os.path.splitext(logfile)[0] + '.html' if os.path.exists(html_file): log_files_table.append([ os.path.basename(logfile), '<a href="%s">original</a>' % os.path.relpath(logfile), '<a href="%s">html</a>' % os.path.relpath(html_file), ]) else: log_files_table.append([ os.path.basename(logfile), '<a href="%s">original</a>' % os.path.relpath(logfile), ' ', ]) # references references = {} for cdict in Citations.get_citations_dicts(): references[cdict['acta']] = cdict.get('url') from jinja2 import Environment, ChoiceLoader, PackageLoader loader = ChoiceLoader([ PackageLoader('xia2', 'templates'), PackageLoader('dials', 'templates') ]) env = Environment(loader=loader) template = env.get_template('xia2.html') html = template.render( page_title='xia2 processing report', xia2_output=xia2_output, space_group=space_group, alternative_space_groups=alternative_space_groups, unit_cell=unit_cell, xtriage_success=xtriage_success, xtriage_warnings=xtriage_warnings, xtriage_danger=xtriage_danger, overall_stats_table=table, cc_half_significance_level=params.cc_half_significance_level, mtz_files=mtz_files, sca_files=sca_files, unmerged_sca_files=unmerged_sca_files, other_files=other_files, log_files_table=log_files_table, individual_dataset_reports=individual_dataset_reports, references=references, styles=styles) with open(filename, 'wb') as f: f.write(html.encode('ascii', 'xmlcharrefreplace'))