def exercise_int():
  fa = flex.int_range(1,7)
  na = fa.as_numpy_array()
  assert na.tolist() == list(fa)
  fna = flex.int(na)
  assert fna.all() == (6,)
  assert fna.origin() == (0,)
  assert fna.focus() == (6,)
  assert fna.all_eq(fa)
  fa[0] = 99
  assert na[0] == 1
  fa[0] = 1
  #
  fa.reshape(flex.grid(2,3))
  na = fa.as_numpy_array()
  assert na.tolist() == [[1, 2, 3], [4, 5, 6]]
  fna = flex.int(na)
  assert fna.all() == (2,3)
  assert fna.origin() == (0,0)
  assert fna.focus() == (2,3)
  assert fna.all_eq(fa)
  #
  fa = flex.int_range(4*2*3) + 1
  fa.reshape(flex.grid(4,2,3))
  na = fa.as_numpy_array()
  assert na.tolist() == [
    [[1, 2, 3], [4, 5, 6]],
    [[7, 8, 9], [10, 11, 12]],
    [[13, 14, 15], [16, 17, 18]],
    [[19, 20, 21], [22, 23, 24]]]
  fna = flex.int(na)
  assert fna.all() == (4,2,3)
  assert fna.origin() == (0,0,0)
  assert fna.focus() == (4,2,3)
  assert fna.all_eq(fa)
def test_unit_cell():
    # generate some random unit cells
    sgi = sgtbx.space_group_info("P1")
    crystal_symmetries = [
        sgi.any_compatible_crystal_symmetry(volume=random.uniform(990, 1010))
        for i in range(10)
    ]
    lattice_ids = flex.int_range(0, len(crystal_symmetries)).as_string()
    ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                  lattice_ids=lattice_ids)
    clusters, dendrogram, _ = ucs.ab_cluster(write_file_lists=False,
                                             doplot=False)
Beispiel #3
0
def test_scipy_dendrogram_to_plotly_json():
    # generate some random unit cells
    sgi = sgtbx.space_group_info("P1")
    crystal_symmetries = [
        sgi.any_compatible_crystal_symmetry(volume=random.uniform(990, 1010))
        for i in range(10)
    ]
    lattice_ids = flex.int_range(0, len(crystal_symmetries)).as_string()
    ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                  lattice_ids=lattice_ids)
    _, dendrogram, _ = ucs.ab_cluster(write_file_lists=False, doplot=False)

    d = plots.scipy_dendrogram_to_plotly_json(dendrogram,
                                              title="Unit cell clustering")
    assert set(d) == {"layout", "data"}
Beispiel #4
0
    def _label_clusters_first_pass(self, n_datasets, n_sym_ops):
        """First pass labelling of clusters.

        Labels points into clusters such that cluster contains exactly one copy
        of each dataset.

        Args:
          n_datasets (int): The number of datasets.
          n_sym_ops (int): The number of symmetry operations.

        Returns:
          cluster_labels (scitbx.array_family.flex.int): A label for each coordinate, labelled from
          0 .. n_sym_ops.
        """
        # initialise cluster labels: -1 signifies doesn't belong to a cluster
        cluster_labels = flex.int(self.coords.all()[0], -1)
        X_orig = self.coords.as_numpy_array()

        cluster_id = 0
        while cluster_labels.count(-1) > 0:
            dataset_ids = (flex.int_range(n_datasets * n_sym_ops) %
                           n_datasets).as_numpy_array()
            coord_ids = flex.int_range(dataset_ids.size).as_numpy_array()

            # select only those points that don't already belong to a cluster
            sel = np.where(cluster_labels == -1)
            X = X_orig[sel]
            dataset_ids = dataset_ids[sel]
            coord_ids = coord_ids[sel]

            # choose a high density point as seed for cluster
            nbrs = NearestNeighbors(n_neighbors=min(11, len(X)),
                                    algorithm="brute",
                                    metric="cosine").fit(X)
            distances, indices = nbrs.kneighbors(X)
            average_distance = flex.double(
                [dist[1:].mean() for dist in distances])
            i = flex.min_index(average_distance)

            d_id = dataset_ids[i]
            cluster = np.array([coord_ids[i]])
            cluster_dataset_ids = np.array([d_id])
            xis = np.array([X[i]])

            for j in range(n_datasets - 1):
                # select only those rows that don't correspond to a dataset already
                # present in current cluster
                sel = np.where(dataset_ids != d_id)
                X = X[sel]
                dataset_ids = dataset_ids[sel]
                coord_ids = coord_ids[sel]

                assert len(X) > 0

                # Find nearest neighbour in cosine-space to the current cluster centroid
                nbrs = NearestNeighbors(n_neighbors=min(1, len(X)),
                                        algorithm="brute",
                                        metric="cosine").fit(X)
                distances, indices = nbrs.kneighbors([xis.mean(axis=0)])
                k = indices[0][0]
                d_id = dataset_ids[k]
                cluster = np.append(cluster, coord_ids[k])
                cluster_dataset_ids = np.append(cluster_dataset_ids, d_id)
                xis = np.append(xis, [X[k]], axis=0)

            # label this cluster
            cluster_labels.set_selected(flex.size_t(cluster.tolist()),
                                        cluster_id)
            cluster_id += 1
        return cluster_labels
Beispiel #5
0
    def seed_clustering(self):
        eps = 1e-6
        X_orig = self.coords.as_numpy_array()

        import numpy as np
        from scipy.cluster import hierarchy
        import scipy.spatial.distance as ssd
        from sklearn.neighbors import NearestNeighbors
        from sklearn import metrics

        # initialise cluster labels: -1 signifies doesn't belong to a cluster
        self.cluster_labels = flex.int(self.coords.all()[0], -1)

        cluster_id = 0
        while self.cluster_labels.count(-1) > 0:
            dataset_ids = (flex.int_range(
                len(self.datasets) * len(self.target.get_sym_ops())) %
                           len(self.datasets)).as_numpy_array()
            coord_ids = flex.int_range(dataset_ids.size).as_numpy_array()

            # select only those points that don't already belong to a cluster
            sel = np.where(self.cluster_labels == -1)
            X = X_orig[sel]
            dataset_ids = dataset_ids[sel]
            coord_ids = coord_ids[sel]

            # choose a high density point as seed for cluster
            nbrs = NearestNeighbors(n_neighbors=min(11, len(X)),
                                    algorithm='brute',
                                    metric='cosine').fit(X)
            distances, indices = nbrs.kneighbors(X)
            average_distance = flex.double(
                [dist[1:].mean() for dist in distances])
            i = flex.min_index(average_distance)

            d_id = dataset_ids[i]
            cluster = np.array([coord_ids[i]])
            cluster_dataset_ids = np.array([d_id])
            xis = np.array([X[i]])

            for j in range(len(self.datasets) - 1):
                # select only those rows that don't correspond to a dataset already
                # present in current cluster
                sel = np.where(dataset_ids != d_id)
                X = X[sel]
                dataset_ids = dataset_ids[sel]
                coord_ids = coord_ids[sel]

                assert len(X) > 0

                # Find nearest neighbour in cosine-space to the current cluster centroid
                nbrs = NearestNeighbors(n_neighbors=min(1, len(X)),
                                        algorithm='brute',
                                        metric='cosine').fit(X)
                distances, indices = nbrs.kneighbors([xis.mean(axis=0)])
                k = indices[0][0]
                d_id = dataset_ids[k]
                cluster = np.append(cluster, coord_ids[k])
                cluster_dataset_ids = np.append(cluster_dataset_ids, d_id)
                xis = np.append(xis, [X[k]], axis=0)

            # label this cluster
            self.cluster_labels.set_selected(flex.size_t(cluster.tolist()),
                                             cluster_id)
            cluster_id += 1

        if flex.max(self.cluster_labels) == 0:
            # assume single cluster
            return self.cluster_labels

        cluster_centroids = []
        X = self.coords.as_numpy_array()
        for i in set(self.cluster_labels):
            sel = self.cluster_labels == i
            cluster_centroids.append(X[(
                self.cluster_labels == i).iselection().as_numpy_array()].mean(
                    axis=0))

        # hierarchical clustering of cluster centroids, using cosine metric
        dist_mat = ssd.pdist(cluster_centroids, metric='cosine')
        linkage_matrix = hierarchy.linkage(dist_mat, method='average')

        # compare valid equal-sized clustering using silhouette scores
        # https://en.wikipedia.org/wiki/Silhouette_(clustering)
        # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html
        distances = linkage_matrix[::, 2]
        distances = np.insert(distances, 0, 0)
        silhouette_scores = flex.double()
        thresholds = flex.double()
        n_clusters = flex.size_t()
        for threshold in distances[1:]:
            cluster_labels = self.cluster_labels.deep_copy()
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold - eps,
                                        criterion='distance').tolist()
            counts = [labels.count(l) for l in set(labels)]
            if len(set(counts)) > 1:
                # only equal-sized clusters are valid
                continue

            n = len(set(labels))
            if n == 1: continue
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            int(labels[i] - 1))
            silhouette_avg = metrics.silhouette_score(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            # Compute the silhouette scores for each sample
            sample_silhouette_values = metrics.silhouette_samples(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            silhouette_avg = sample_silhouette_values.mean()
            silhouette_scores.append(silhouette_avg)
            thresholds.append(threshold)
            n_clusters.append(n)

            count_negative = (sample_silhouette_values < 0).sum()
            logger.info('Clustering:')
            logger.info('  Number of clusters: %i' % n)
            logger.info('  Threshold score: %.3f (%.1f deg)' %
                        (threshold, math.degrees(math.acos(1 - threshold))))
            logger.info('  Silhouette score: %.3f' % silhouette_avg)
            logger.info('  -ve silhouette scores: %.1f%%' %
                        (100 * count_negative / sample_silhouette_values.size))

            if self.params.save_plot:
                plot_silhouette(sample_silhouette_values,
                                cluster_labels.as_numpy_array(),
                                file_name='%ssilhouette_%i.png' %
                                (self.params.plot_prefix, n))

        if self.params.cluster.seed.n_clusters is Auto:
            idx = flex.max_index(silhouette_scores)
        else:
            idx = flex.first_index(n_clusters,
                                   self.params.cluster.seed.n_clusters)
            if idx is None:
                raise Sorry('No valid clustering with %i clusters' %
                            self.params.cluster.seed.n_clusters)

        if (self.params.cluster.seed.n_clusters is Auto
                and silhouette_scores[idx] <
                self.params.cluster.seed.min_silhouette_score):
            # assume single cluster
            self.cluster_labels = flex.int(self.cluster_labels.size(), 0)
        else:
            threshold = thresholds[idx] - eps
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold,
                                        criterion='distance')
            cluster_labels = flex.double(self.cluster_labels.size(), -1)
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            labels[i] - 1)
            self.cluster_labels = cluster_labels

        if self.params.save_plot:
            plot_matrix(1 - ssd.squareform(dist_mat),
                        linkage_matrix,
                        '%sseed_clustering_cos_angle_matrix.png' %
                        self.params.plot_prefix,
                        color_threshold=threshold)
            plot_dendrogram(linkage_matrix,
                            '%sseed_clustering_cos_angle_dendrogram.png' %
                            self.params.plot_prefix,
                            color_threshold=threshold)

        return self.cluster_labels
Beispiel #6
0
def generate_xia2_html(xinfo, filename="xia2.html", params=None, args=[]):
    assert params is None or len(args) == 0
    if params is None:
        from xia2.Modules.Analysis import phil_scope

        interp = phil_scope.command_line_argument_interpreter()
        params, unhandled = interp.process_and_fetch(
            args, custom_processor="collect_remaining")
        params = params.extract()

    xia2_txt = os.path.join(os.path.abspath(os.path.curdir), "xia2.txt")
    assert os.path.isfile(xia2_txt), xia2_txt

    with open(xia2_txt, "r") as f:
        xia2_output = html.escape(f.read())

    styles = {}

    columns = []
    columns.append([
        "",
        "Wavelength (Å)",
        "Resolution range (Å)",
        "Completeness (%)",
        "Multiplicity",
        "CC-half",
        "I/sigma",
        "Rmerge(I)",
        # anomalous statistics
        "Anomalous completeness (%)",
        "Anomalous multiplicity",
    ])

    individual_dataset_reports = {}

    for cname, xcryst in xinfo.get_crystals().items():
        reflection_files = xcryst.get_scaled_merged_reflections()
        for wname, unmerged_mtz in reflection_files["mtz_unmerged"].items():
            xwav = xcryst.get_xwavelength(wname)

            from xia2.Modules.MultiCrystalAnalysis import batch_phil_scope

            scope = phil.parse(batch_phil_scope)
            scaler = xcryst._scaler
            try:
                for si in scaler._sweep_information.values():
                    batch_params = scope.extract().batch[0]
                    batch_params.id = si["sname"]
                    batch_params.range = si["batches"]
                    params.batch.append(batch_params)
            except AttributeError:
                for si in scaler._sweep_handler._sweep_information.values():
                    batch_params = scope.extract().batch[0]
                    batch_params.id = si.get_sweep_name()
                    batch_params.range = si.get_batch_range()
                    params.batch.append(batch_params)

            report_path = xinfo.path.joinpath(cname, "report")
            report_path.mkdir(parents=True, exist_ok=True)
            report = Report.from_unmerged_mtz(unmerged_mtz,
                                              params,
                                              report_dir=str(report_path))

            xtriage_success, xtriage_warnings, xtriage_danger = None, None, None
            if params.xtriage_analysis:
                try:
                    (
                        xtriage_success,
                        xtriage_warnings,
                        xtriage_danger,
                    ) = report.xtriage_report()
                except Exception as e:
                    params.xtriage_analysis = False
                    logger.debug("Exception running xtriage:")
                    logger.debug(e, exc_info=True)

            (
                overall_stats_table,
                merging_stats_table,
                stats_plots,
            ) = report.resolution_plots_and_stats()

            d = {}
            d["merging_statistics_table"] = merging_stats_table
            d["overall_statistics_table"] = overall_stats_table

            individual_dataset_reports[wname] = d

            json_data = {}

            if params.xtriage_analysis:
                json_data["xtriage"] = (xtriage_success + xtriage_warnings +
                                        xtriage_danger)

            json_data.update(stats_plots)
            json_data.update(report.batch_dependent_plots())
            json_data.update(report.intensity_stats_plots(run_xtriage=False))
            json_data.update(report.pychef_plots())
            json_data.update(report.pychef_plots(n_bins=1))

            from scitbx.array_family import flex

            max_points = 500
            for g in (
                    "scale_rmerge_vs_batch",
                    "completeness_vs_dose",
                    "rcp_vs_dose",
                    "scp_vs_dose",
                    "rd_vs_batch_difference",
            ):
                for i, data in enumerate(json_data[g]["data"]):
                    x = data["x"]
                    n = len(x)
                    if n > max_points:
                        step = n // max_points
                        sel = (flex.int_range(n) % step) == 0
                        data["x"] = list(flex.int(data["x"]).select(sel))
                        data["y"] = list(flex.double(data["y"]).select(sel))

            resolution_graphs = OrderedDict((k + "_" + wname, json_data[k])
                                            for k in (
                                                "cc_one_half",
                                                "i_over_sig_i",
                                                "second_moments",
                                                "wilson_intensity_plot",
                                                "completeness",
                                                "multiplicity_vs_resolution",
                                            ) if k in json_data)

            if params.include_radiation_damage:
                batch_graphs = OrderedDict((k + "_" + wname, json_data[k])
                                           for k in (
                                               "scale_rmerge_vs_batch",
                                               "i_over_sig_i_vs_batch",
                                               "completeness_vs_dose",
                                               "rcp_vs_dose",
                                               "scp_vs_dose",
                                               "rd_vs_batch_difference",
                                           ))
            else:
                batch_graphs = OrderedDict((k + "_" + wname, json_data[k])
                                           for k in ("scale_rmerge_vs_batch",
                                                     "i_over_sig_i_vs_batch"))

            misc_graphs = OrderedDict((k, json_data[k]) for k in (
                "cumulative_intensity_distribution",
                "l_test",
                "multiplicities",
            ) if k in json_data)

            for k, v in report.multiplicity_plots().items():
                misc_graphs[k + "_" + wname] = {"img": v}

            d["resolution_graphs"] = resolution_graphs
            d["batch_graphs"] = batch_graphs
            d["misc_graphs"] = misc_graphs
            d["xtriage"] = {
                "success": xtriage_success,
                "warnings": xtriage_warnings,
                "danger": xtriage_danger,
            }

            merging_stats = report.merging_stats
            merging_stats_anom = report.merging_stats_anom

            overall = merging_stats.overall
            overall_anom = merging_stats_anom.overall
            outer_shell = merging_stats.bins[-1]
            outer_shell_anom = merging_stats_anom.bins[-1]

            column = [
                wname,
                str(xwav.get_wavelength()),
                "%.2f - %.2f (%.2f - %.2f)" %
                (overall.d_max, overall.d_min, outer_shell.d_max,
                 outer_shell.d_min),
                "%.2f (%.2f)" %
                (overall.completeness * 100, outer_shell.completeness * 100),
                f"{overall.mean_redundancy:.2f} ({outer_shell.mean_redundancy:.2f})",
                f"{overall.cc_one_half:.4f} ({outer_shell.cc_one_half:.4f})",
                "%.2f (%.2f)" %
                (overall.i_over_sigma_mean, outer_shell.i_over_sigma_mean),
                f"{overall.r_merge:.4f} ({outer_shell.r_merge:.4f})",
                # anomalous statistics
                "%.2f (%.2f)" % (
                    overall_anom.anom_completeness * 100,
                    outer_shell_anom.anom_completeness * 100,
                ),
                "%.2f (%.2f)" % (overall_anom.mean_redundancy,
                                 outer_shell_anom.mean_redundancy),
            ]
            columns.append(column)

    table = [[c[i] for c in columns] for i in range(len(columns[0]))]

    from cctbx import sgtbx

    space_groups = xcryst.get_likely_spacegroups()
    space_groups = [
        sgtbx.space_group_info(symbol=str(symbol)) for symbol in space_groups
    ]
    space_group = space_groups[0].symbol_and_number()
    alternative_space_groups = [
        sg.symbol_and_number() for sg in space_groups[1:]
    ]
    unit_cell = str(report.intensities.unit_cell())

    # reflection files

    for cname, xcryst in xinfo.get_crystals().items():
        # hack to replace path to reflection files with DataFiles directory
        data_dir = os.path.join(os.path.abspath(os.path.curdir), "DataFiles")
        g = glob.glob(os.path.join(data_dir, "*"))
        reflection_files = xcryst.get_scaled_merged_reflections()
        for k, rfile in reflection_files.items():
            if isinstance(rfile, str):
                for datafile in g:
                    if os.path.basename(datafile) == os.path.basename(rfile):
                        reflection_files[k] = datafile
                        break
            else:
                for kk in rfile:
                    for datafile in g:
                        if os.path.basename(datafile) == os.path.basename(
                                rfile[kk]):
                            reflection_files[k][kk] = datafile
                            break

        headers = ["Dataset", "File name"]
        merged_mtz = reflection_files["mtz"]
        mtz_files = [
            headers,
            [
                "All datasets",
                '<a href="%s">%s</a>' %
                (os.path.relpath(merged_mtz), os.path.basename(merged_mtz)),
            ],
        ]

        for wname, unmerged_mtz in reflection_files["mtz_unmerged"].items():
            mtz_files.append([
                wname,
                '<a href="%s">%s</a>' % (os.path.relpath(unmerged_mtz),
                                         os.path.basename(unmerged_mtz)),
            ])

        sca_files = [headers]
        if "sca" in reflection_files:
            for wname, merged_sca in reflection_files["sca"].items():
                sca_files.append([
                    wname,
                    '<a href="%s">%s</a>' % (os.path.relpath(merged_sca),
                                             os.path.basename(merged_sca)),
                ])

        unmerged_sca_files = [headers]
        if "sca_unmerged" in reflection_files:
            for wname, unmerged_sca in reflection_files["sca_unmerged"].items(
            ):
                unmerged_sca_files.append([
                    wname,
                    '<a href="%s">%s</a>' % (
                        os.path.relpath(unmerged_sca),
                        os.path.basename(unmerged_sca),
                    ),
                ])

    # other files
    other_files = []
    other_files.append(["File name", "Description"])
    for other_file, description in sorted([
        ("xia2.cif", "Crystallographic information file"),
        ("xia2.mmcif", "Macromolecular crystallographic information file"),
        ("shelxt.hkl", "merged structure factors for SHELXT"),
        ("shelxt.ins", "SHELXT instruction file"),
    ] + [(fn, "XPREP input file")
         for fn in os.listdir(os.path.join(data_dir)) if fn.endswith(".p4p")]):
        if os.path.exists(os.path.join(data_dir, other_file)):
            other_files.append([
                '<a href="DataFiles/{filename}">{filename}</a>'.format(
                    filename=other_file),
                description,
            ])

    # log files
    log_files_table = []
    log_dir = os.path.join(os.path.abspath(os.path.curdir), "LogFiles")
    g = glob.glob(os.path.join(log_dir, "*.log"))
    for logfile in g:
        html_file = make_logfile_html(logfile)
        html_file = os.path.splitext(logfile)[0] + ".html"
        if os.path.exists(html_file):
            log_files_table.append([
                os.path.basename(logfile),
                '<a href="%s">original</a>' % os.path.relpath(logfile),
                '<a href="%s">html</a>' % os.path.relpath(html_file),
            ])
        else:
            log_files_table.append([
                os.path.basename(logfile),
                '<a href="%s">original</a>' % os.path.relpath(logfile),
                " ",
            ])

    references = {
        cdict["acta"]: cdict.get("url")
        for cdict in Citations.get_citations_dicts()
    }

    from jinja2 import Environment, ChoiceLoader, PackageLoader

    loader = ChoiceLoader([
        PackageLoader("xia2", "templates"),
        PackageLoader("dials", "templates")
    ])
    env = Environment(loader=loader)

    template = env.get_template("xia2.html")
    html_source = template.render(
        page_title="xia2 processing report",
        xia2_output=xia2_output,
        space_group=space_group,
        alternative_space_groups=alternative_space_groups,
        unit_cell=unit_cell,
        overall_stats_table=table,
        cc_half_significance_level=params.cc_half_significance_level,
        mtz_files=mtz_files,
        sca_files=sca_files,
        unmerged_sca_files=unmerged_sca_files,
        other_files=other_files,
        log_files_table=log_files_table,
        individual_dataset_reports=individual_dataset_reports,
        references=references,
        styles=styles,
    )

    with open("%s-report.json" % os.path.splitext(filename)[0], "w") as fh:
        json.dump(json_data, fh, indent=2)

    with open(filename, "wb") as f:
        f.write(html_source.encode("utf-8", "xmlcharrefreplace"))
Beispiel #7
0
def generate_xia2_html(xinfo, filename='xia2.html', params=None, args=[]):

    assert params is None or len(args) == 0
    if params is None:
        from xia2.Modules.Analysis import phil_scope
        interp = phil_scope.command_line_argument_interpreter()
        params, unhandled = interp.process_and_fetch(
            args, custom_processor='collect_remaining')
        params = params.extract()

    from xia2.command_line.report import xia2_report
    crystal = xinfo.get_crystals().values()[0]

    xia2_txt = os.path.join(os.path.abspath(os.path.curdir), 'xia2.txt')
    assert os.path.isfile(xia2_txt), xia2_txt

    with open(xia2_txt, 'rb') as f:
        xia2_output = f.read().encode('ascii', 'xmlcharrefreplace')

    xia2_output = cgi.escape(xia2_output)

    styles = {}
    reports = []

    columns = []
    columns.append([
        '',
        u'Wavelength (Å)',
        u'Resolution range (Å)',
        'Completeness (%)',
        'Multiplicity',
        'CC-half',
        'I/sigma',
        'Rmerge(I)',
        # anomalous statistics
        'Anomalous completeness (%)',
        'Anomalous multiplicity'
    ])

    individual_dataset_reports = {}

    for cname, xcryst in xinfo.get_crystals().iteritems():
        reflection_files = xcryst.get_scaled_merged_reflections()
        for wname, unmerged_mtz in reflection_files['mtz_unmerged'].iteritems(
        ):
            xwav = xcryst.get_xwavelength(wname)
            report = xia2_report(unmerged_mtz, params)
            reports.append(report)

            merging_stats = report.merging_stats
            merging_stats_anom = report.merging_stats_anom

            overall = merging_stats.overall
            overall_anom = merging_stats_anom.overall
            outer_shell = merging_stats.bins[-1]
            outer_shell_anom = merging_stats_anom.bins[-1]

            column = [
                wname,
                str(xwav.get_wavelength()),
                '%.2f - %.2f (%.2f - %.2f)' %
                (overall.d_max, overall.d_min, outer_shell.d_max,
                 outer_shell.d_min),
                '%.2f (%.2f)' %
                (overall.completeness * 100, outer_shell.completeness * 100),
                '%.2f (%.2f)' %
                (overall.mean_redundancy, outer_shell.mean_redundancy),
                '%.4f (%.4f)' % (overall.cc_one_half, outer_shell.cc_one_half),
                '%.2f (%.2f)' %
                (overall.i_over_sigma_mean, outer_shell.i_over_sigma_mean),
                '%.4f (%.4f)' % (overall.r_merge, outer_shell.r_merge),
                # anomalous statistics
                '%.2f (%.2f)' % (overall_anom.anom_completeness * 100,
                                 outer_shell_anom.anom_completeness * 100),
                '%.2f (%.2f)' % (overall_anom.mean_redundancy,
                                 outer_shell_anom.mean_redundancy),
            ]
            columns.append(column)

            xtriage_success, xtriage_warnings, xtriage_danger = None, None, None
            if params.xtriage_analysis:
                try:
                    xtriage_success, xtriage_warnings, xtriage_danger = report.xtriage_report(
                    )
                except Exception as e:
                    from xia2.Handlers.Phil import PhilIndex
                    if PhilIndex.params.xia2.settings.small_molecule == True:
                        print("Xtriage output not available: %s" % str(e))
                    else:
                        raise

            d = {}
            d['merging_statistics_table'] = report.merging_statistics_table()
            d['overall_statistics_table'] = report.overall_statistics_table()

            individual_dataset_reports[wname] = d

            json_data = {}

            json_data.update(report.multiplicity_vs_resolution_plot())
            json_data.update(report.multiplicity_histogram())
            json_data.update(report.completeness_plot())
            json_data.update(report.scale_rmerge_vs_batch_plot())
            json_data.update(report.cc_one_half_plot())
            json_data.update(report.i_over_sig_i_plot())
            json_data.update(report.i_over_sig_i_vs_batch_plot())
            json_data.update(report.second_moments_plot())
            json_data.update(report.cumulative_intensity_distribution_plot())
            json_data.update(report.l_test_plot())
            json_data.update(report.wilson_plot())
            json_data.update(report.pychef_plots(n_bins=1))

            from scitbx.array_family import flex
            max_points = 500
            for g in ('scale_rmerge_vs_batch', 'completeness_vs_dose',
                      'rcp_vs_dose', 'scp_vs_dose', 'rd_vs_batch_difference'):
                for i, data in enumerate(json_data[g]['data']):
                    x = data['x']
                    n = len(x)
                    if n > max_points:
                        step = n // max_points
                        sel = (flex.int_range(n) % step) == 0
                        data['x'] = list(flex.int(data['x']).select(sel))
                        data['y'] = list(flex.double(data['y']).select(sel))

            resolution_graphs = collections.OrderedDict(
                (k + '_' + wname, json_data[k])
                for k in ('cc_one_half', 'i_over_sig_i', 'second_moments',
                          'wilson_intensity_plot', 'completeness',
                          'multiplicity_vs_resolution') if k in json_data)

            if params.include_radiation_damage:
                batch_graphs = collections.OrderedDict(
                    (k + '_' + wname, json_data[k])
                    for k in ('scale_rmerge_vs_batch', 'i_over_sig_i_vs_batch',
                              'completeness_vs_dose', 'rcp_vs_dose',
                              'scp_vs_dose', 'rd_vs_batch_difference'))
            else:
                batch_graphs = collections.OrderedDict(
                    (k + '_' + wname, json_data[k])
                    for k in ('scale_rmerge_vs_batch',
                              'i_over_sig_i_vs_batch'))

            misc_graphs = collections.OrderedDict(
                (k + '_' + wname, json_data[k])
                for k in ('cumulative_intensity_distribution', 'l_test',
                          'multiplicities') if k in json_data)

            for k, v in report.multiplicity_plots().iteritems():
                misc_graphs[k + '_' + wname] = {'img': v}

            d['resolution_graphs'] = resolution_graphs
            d['batch_graphs'] = batch_graphs
            d['misc_graphs'] = misc_graphs
            d['xtriage'] = {
                'success': xtriage_success,
                'warnings': xtriage_warnings,
                'danger': xtriage_danger
            }

    table = [[c[i] for c in columns] for i in range(len(columns[0]))]

    cell = xcryst.get_cell()
    from cctbx import sgtbx
    space_groups = xcryst.get_likely_spacegroups()
    space_groups = [
        sgtbx.space_group_info(symbol=str(symbol)) for symbol in space_groups
    ]
    space_group = space_groups[0].symbol_and_number()
    alternative_space_groups = [
        sg.symbol_and_number() for sg in space_groups[1:]
    ]
    unit_cell = str(report.intensities.unit_cell())

    #twinning_score = xcryst._get_scaler()._scalr_twinning_score
    #twinning_conclusion = xcryst._get_scaler()._scalr_twinning_conclusion
    #if twinning_score is not None:
    #table.append(['','',''])
    #table.append(['Twinning score', '%.2f' %twinning_score, ''])
    #if twinning_conclusion is not None:
    #table.append(['', twinning_conclusion, ''])

    for row in table:
        for i in range(len(row)):
            row[i] = row[i].encode('ascii', 'xmlcharrefreplace')

    #from libtbx import table_utils
    #print table_utils.format(rows=table, has_header=True)

    # reflection files

    for cname, xcryst in xinfo.get_crystals().iteritems():

        # hack to replace path to reflection files with DataFiles directory
        data_dir = os.path.join(os.path.abspath(os.path.curdir), 'DataFiles')
        g = glob.glob(os.path.join(data_dir, '*'))
        reflection_files = xcryst.get_scaled_merged_reflections()
        for k, rfile in reflection_files.iteritems():
            if isinstance(rfile, basestring):
                for datafile in g:
                    if os.path.basename(datafile) == os.path.basename(rfile):
                        reflection_files[k] = datafile
                        break
            else:
                for kk in rfile.keys():
                    for datafile in g:
                        if os.path.basename(datafile) == os.path.basename(
                                rfile[kk]):
                            reflection_files[k][kk] = datafile
                            break

        headers = ['Dataset', 'File name']
        merged_mtz = reflection_files['mtz']
        mtz_files = [
            headers,
            [
                'All datasets',
                '<a href="%s">%s</a>' %
                (os.path.relpath(merged_mtz), os.path.basename(merged_mtz))
            ]
        ]

        for wname, unmerged_mtz in reflection_files['mtz_unmerged'].iteritems(
        ):
            mtz_files.append([
                wname,
                '<a href="%s">%s</a>' %
                (os.path.relpath(unmerged_mtz), os.path.basename(unmerged_mtz))
            ])

        sca_files = [headers]
        for wname, merged_sca in reflection_files['sca'].iteritems():
            sca_files.append([
                wname,
                '<a href="%s">%s</a>' %
                (os.path.relpath(merged_sca), os.path.basename(merged_sca))
            ])

        unmerged_sca_files = [headers]
        for wname, unmerged_sca in reflection_files['sca_unmerged'].iteritems(
        ):
            unmerged_sca_files.append([
                wname,
                '<a href="%s">%s</a>' %
                (os.path.relpath(unmerged_sca), os.path.basename(unmerged_sca))
            ])

    # other files
    other_files = []
    other_files.append(['File name', 'Description'])
    for other_file, description in sorted([
          ('xia2.cif', 'Crystallographic information file'),
          ('xia2.mmcif', 'Macromolecular crystallographic information file'),
          ('shelxt.hkl', 'merged structure factors for SHELXT'),
          ('shelxt.ins', 'SHELXT instruction file'),
        ] + [
          (fn, 'XPREP input file') for fn in os.listdir(os.path.join(data_dir)) \
                                   if fn.endswith('.p4p')
        ]):
        if os.path.exists(os.path.join(data_dir, other_file)):
            other_files.append([
                '<a href="DataFiles/{filename}">{filename}</a>'.format(
                    filename=other_file), description
            ])

    # log files
    log_files_table = []
    log_dir = os.path.join(os.path.abspath(os.path.curdir), 'LogFiles')
    g = glob.glob(os.path.join(log_dir, '*.log'))
    for logfile in g:
        html_file = make_logfile_html(logfile)
        html_file = os.path.splitext(logfile)[0] + '.html'
        if os.path.exists(html_file):
            log_files_table.append([
                os.path.basename(logfile),
                '<a href="%s">original</a>' % os.path.relpath(logfile),
                '<a href="%s">html</a>' % os.path.relpath(html_file),
            ])
        else:
            log_files_table.append([
                os.path.basename(logfile),
                '<a href="%s">original</a>' % os.path.relpath(logfile),
                ' ',
            ])

    # references

    references = {}
    for cdict in Citations.get_citations_dicts():
        references[cdict['acta']] = cdict.get('url')

    from jinja2 import Environment, ChoiceLoader, PackageLoader
    loader = ChoiceLoader([
        PackageLoader('xia2', 'templates'),
        PackageLoader('dials', 'templates')
    ])
    env = Environment(loader=loader)

    template = env.get_template('xia2.html')
    html = template.render(
        page_title='xia2 processing report',
        xia2_output=xia2_output,
        space_group=space_group,
        alternative_space_groups=alternative_space_groups,
        unit_cell=unit_cell,
        xtriage_success=xtriage_success,
        xtriage_warnings=xtriage_warnings,
        xtriage_danger=xtriage_danger,
        overall_stats_table=table,
        cc_half_significance_level=params.cc_half_significance_level,
        mtz_files=mtz_files,
        sca_files=sca_files,
        unmerged_sca_files=unmerged_sca_files,
        other_files=other_files,
        log_files_table=log_files_table,
        individual_dataset_reports=individual_dataset_reports,
        references=references,
        styles=styles)

    with open(filename, 'wb') as f:
        f.write(html.encode('ascii', 'xmlcharrefreplace'))