Beispiel #1
0
def scrape_results(project: Project, dataset) -> Optional[ProcStats]:
    summary_report = _get_summary_report(project, dataset)
    if summary_report is None:
        return None

    stats = ProcStats("autoproc")
    stats.status = _scrape_summary_html(summary_report)

    if stats.status == ToolStatus.SUCCESS:
        _parse_statistics(stats, summary_report)

    return stats
Beispiel #2
0
def scrape_results(project: Project, logs_dir: Path) -> Optional[ProcStats]:
    stats = ProcStats()
    stats.status = _scrape_outcome(project, logs_dir)

    if stats.status is None:
        return None

    xia2_html = Path(logs_dir, "xia2.html")
    if not xia2_html.is_file():
        stats.status = ToolStatus.FAILURE
        return stats

    _parse_xia2_html(project, xia2_html, stats)
    return stats
Beispiel #3
0
def scrape_results(project: Project, dataset) -> Optional[ProcStats]:
    """
    check auto-processing folder, to try to guesstimate
    if enda was successful processing the specified dataset
    """
    edna_dir, mtz_file = _find_results(project, dataset)

    if edna_dir is None:
        return None

    stats = ProcStats("edna")

    if mtz_file is None:
        stats.status = ToolStatus.FAILURE
        return stats

    stats.status = ToolStatus.SUCCESS
    _parse_statistics(project, edna_dir, dataset, stats)
    stats.isa = _scrape_isa(project, dataset)

    return stats
Beispiel #4
0
def scrape_results(project: Project, dataset) -> Optional[ProcStats]:
    xdsapp_dir = _get_xdsapp_dir(project, dataset)

    if not xdsapp_dir.is_dir():
        return None

    stats = ProcStats("xdsapp")
    stats.status = ToolStatus.SUCCESS

    mtz = next(xdsapp_dir.glob("*F.mtz"), None)
    if mtz is None:
        # MTZ file found, we assume great success
        stats.status = ToolStatus.FAILURE

    results_log = _get_results_log(project, xdsapp_dir, dataset)
    if results_log is None:
        stats.status = ToolStatus.FAILURE

    if stats.status == ToolStatus.SUCCESS:
        _parse_results_log(project, results_log, stats)  # type: ignore

    return stats
Beispiel #5
0
def _parse_xia2_html(project: Project, xia2_html: Path, stats: ProcStats):
    for line in read_text_lines(project, xia2_html):
        if "High resolution limit  " in line:
            stats.high_resolution_average = line.split()[-3]
            stats.high_resolution_out = line.split()[-1]
        if "Low resolution limit  " in line:
            stats.low_resolution_average = line.split()[-3]
            stats.low_resolution_out = line.split()[-1]
        if "Completeness  " in line:
            stats.completeness_average = line.split()[-3]
            stats.completeness_out = line.split()[-1]
        if "Multiplicity  " in line:
            stats.multiplicity = line.split()[-3]
        if "Rmeas(I+/-) " in line:
            stats.r_meas_average = line.split()[-3]
            stats.r_meas_out = line.split()[-1]
        if "Total unique" in line:
            stats.unique_reflections = line.split()[-3]
        if "Total observations" in line:
            stats.reflections = line.split()[-3]
        if "Mosaic spread" in line:
            stats.mosaicity = line.split()[-1]
        if "I/sigma  " in line:
            stats.i_sig_average = line.split()[-3]
            stats.i_sig_out = line.split()[-1]
        if "Space group:  " in line:
            stats.space_group = "".join(line.split()[2:])
        if "Unit cell: " in line:
            vals = line.split(":")[1].strip()
            (
                stats.unit_cell_a,
                stats.unit_cell_b,
                stats.unit_cell_c,
                stats.unit_cell_alpha,
                stats.unit_cell_beta,
                stats.unit_cell_gamma,
            ) = vals.split(", ")
Beispiel #6
0
    def setUp(self):
        self.stats = ProcStats("xdsapp")
        self.stats.status = ToolStatus.SUCCESS

        self.project = Mock()
        self.project.encrypted = False
Beispiel #7
0
def _parse_statistics(project: Project, edna_dir: Path, dataset, stats: ProcStats):
    log_file = Path(edna_dir, f"ep_{dataset.name}_aimless_anom.log")

    with open(log_file, "r", encoding="utf-8") as r:
        log = r.readlines()

    for line in log:
        if "Space group:" in line:
            stats.space_group = "".join(line.split()[2:])
        if "Number of unique reflections" in line:
            stats.unique_reflections = line.split()[-1]
        if "Total number of observations" in line:
            stats.reflections = line.split()[-3]
        if "Low resolution limit" in line:
            stats.low_resolution_average = line.split()[3]
            stats.low_resolution_out = line.split()[-1]
        if "High resolution limit" in line:
            stats.high_resolution_average = line.split()[3]
            stats.high_resolution_out = line.split()[-1]
        if line.startswith(UNIT_CELL):
            (
                stats.unit_cell_a,
                stats.unit_cell_b,
                stats.unit_cell_c,
                stats.unit_cell_alpha,
                stats.unit_cell_beta,
                stats.unit_cell_gamma,
            ) = line[len(UNIT_CELL) :].split()
        if "Multiplicity" in line:
            stats.multiplicity = line.split()[1]
        if "Mean((I)/sd(I))" in line:
            stats.i_sig_average = line.split()[1]
            stats.i_sig_out = line.split()[-1]
        if "Rmeas (all I+ & I-)" in line:
            stats.r_meas_average = line.split()[5]
            stats.r_meas_out = line.split()[-1]
        if "completeness" in line:
            stats.completeness_average = line.split()[-3]
            stats.completeness_out = line.split()[-1]
        if "mosaicity" in line:
            stats.mosaicity = line.split()[-1]

    return stats
Beispiel #8
0
def _parse_results_log(project: Project, results_file: Path, stats: ProcStats):
    def _parse_line(line, prefix, parser_func):
        text = line[len(prefix):].strip()
        return parser_func(text)

    def _space_group(text):
        # remove all spaces in space group string
        spg = "".join(text.split(" "))
        return spg

    def _unit_cell(text):
        return text.split()

    def _resolution(text):
        return RESOLUTION_RE.match(text).groups()

    def _pair(text):
        return PAIR_RE.match(text).groups()

    def _first_number(text):

        return OPTIONAL_PAIR_RE.match(text).groups()[0]

    def _mosaicity(text):
        if text == "-":
            return None

        return _first_number(text)

    for line in read_text_lines(project, results_file):
        if not line.startswith("    "):
            # all lines we want to parse are indented with 4 spaces,
            # ignore all other lines
            continue

        line = line.strip()

        if line.startswith(SPACE_GROUP):
            stats.space_group = _parse_line(line, SPACE_GROUP, _space_group)
        elif line.startswith(UNIT_CELL):
            (
                stats.unit_cell_a,
                stats.unit_cell_b,
                stats.unit_cell_c,
                stats.unit_cell_alpha,
                stats.unit_cell_beta,
                stats.unit_cell_gamma,
            ) = _parse_line(line, UNIT_CELL, _unit_cell)
        elif line.startswith(RESOLUTION):
            (
                stats.low_resolution_average,
                stats.high_resolution_average,
                stats.low_resolution_out,
                stats.high_resolution_out,
            ) = _parse_line(line, RESOLUTION, _resolution)
        elif line.startswith(REFLECTIONS):
            stats.reflections = _parse_line(line, REFLECTIONS, _first_number)
        elif line.startswith(UNIQUE_REFLECTIONS):
            stats.unique_reflections = _parse_line(line, UNIQUE_REFLECTIONS,
                                                   _first_number)
        elif line.startswith(I_SIGI):
            stats.i_sig_average, stats.i_sig_out = _parse_line(
                line, I_SIGI, _pair)
        elif line.startswith(MULTIPLICITY):
            stats.multiplicity = _parse_line(line, MULTIPLICITY, _first_number)
        elif line.startswith(R_MEAS):
            stats.r_meas_average, stats.r_meas_out = _parse_line(
                line, R_MEAS, _pair)
        elif line.startswith(COMPLETENESS):
            stats.completeness_average, stats.completeness_out = _parse_line(
                line, COMPLETENESS, _pair)
        elif line.startswith(MOSAICITY):
            stats.mosaicity = _parse_line(line, MOSAICITY, _mosaicity)
        elif line.startswith(ISA):
            stats.isa = _parse_line(line, ISA, lambda x: x)

    return stats
Beispiel #9
0
def _parse_statistics(stats: ProcStats, report):
    with open(report, "r", encoding="utf-8") as r:
        log = r.readlines()

    for n, line in enumerate(log):
        if "Unit cell and space group:" in line:
            parts = line.split()
            stats.space_group = "".join(parts[11:]).replace("'", "")
            (
                stats.unit_cell_a,
                stats.unit_cell_b,
                stats.unit_cell_c,
                stats.unit_cell_alpha,
                stats.unit_cell_beta,
                stats.unit_cell_gamma,
            ) = parts[5:11]
        if "Low resolution limit  " in line:
            stats.low_resolution_average, stats.low_resolution_out = (
                line.split()[3],
                line.split()[5],
            )
        if "High resolution limit  " in line:
            stats.high_resolution_out, stats.high_resolution_average = (
                line.split()[3],
                line.split()[5],
            )
        if "Total number of observations  " in line:
            stats.reflections = line.split()[-3]
        if "Total number unique  " in line:
            stats.unique_reflections = line.split()[-3]
        if "Multiplicity  " in line:
            stats.multiplicity = line.split()[1]
        if "Mean(I)/sd(I)" in line:
            stats.i_sig_average = line.split()[1]
            stats.i_sig_out = line.split()[-1]
        if "Completeness (ellipsoidal)" in line or "Completeness (spherical)" in line:
            stats.completeness_average = line.split()[2]
            stats.completeness_out = line.split()[-1]
        if "Rmeas   (all I+ & I-)" in line:
            stats.r_meas_average = line.split()[-3]
            stats.r_meas_out = line.split()[-1]
        elif "Rmeas" in line:
            stats.r_meas_average = line.split()[-3]
            stats.r_meas_out = line.split()[-1]
        if "CRYSTAL MOSAICITY (DEGREES)" in line:
            stats.mosaicity = line.split()[-1]
        if "ISa (" in line:
            stats.isa = log[n + 1].split()[-1]

    return stats