def scrape_results(project: Project, dataset) -> Optional[ProcStats]: summary_report = _get_summary_report(project, dataset) if summary_report is None: return None stats = ProcStats("autoproc") stats.status = _scrape_summary_html(summary_report) if stats.status == ToolStatus.SUCCESS: _parse_statistics(stats, summary_report) return stats
def scrape_results(project: Project, logs_dir: Path) -> Optional[ProcStats]: stats = ProcStats() stats.status = _scrape_outcome(project, logs_dir) if stats.status is None: return None xia2_html = Path(logs_dir, "xia2.html") if not xia2_html.is_file(): stats.status = ToolStatus.FAILURE return stats _parse_xia2_html(project, xia2_html, stats) return stats
def scrape_results(project: Project, dataset) -> Optional[ProcStats]: """ check auto-processing folder, to try to guesstimate if enda was successful processing the specified dataset """ edna_dir, mtz_file = _find_results(project, dataset) if edna_dir is None: return None stats = ProcStats("edna") if mtz_file is None: stats.status = ToolStatus.FAILURE return stats stats.status = ToolStatus.SUCCESS _parse_statistics(project, edna_dir, dataset, stats) stats.isa = _scrape_isa(project, dataset) return stats
def scrape_results(project: Project, dataset) -> Optional[ProcStats]: xdsapp_dir = _get_xdsapp_dir(project, dataset) if not xdsapp_dir.is_dir(): return None stats = ProcStats("xdsapp") stats.status = ToolStatus.SUCCESS mtz = next(xdsapp_dir.glob("*F.mtz"), None) if mtz is None: # MTZ file found, we assume great success stats.status = ToolStatus.FAILURE results_log = _get_results_log(project, xdsapp_dir, dataset) if results_log is None: stats.status = ToolStatus.FAILURE if stats.status == ToolStatus.SUCCESS: _parse_results_log(project, results_log, stats) # type: ignore return stats
def _parse_xia2_html(project: Project, xia2_html: Path, stats: ProcStats): for line in read_text_lines(project, xia2_html): if "High resolution limit " in line: stats.high_resolution_average = line.split()[-3] stats.high_resolution_out = line.split()[-1] if "Low resolution limit " in line: stats.low_resolution_average = line.split()[-3] stats.low_resolution_out = line.split()[-1] if "Completeness " in line: stats.completeness_average = line.split()[-3] stats.completeness_out = line.split()[-1] if "Multiplicity " in line: stats.multiplicity = line.split()[-3] if "Rmeas(I+/-) " in line: stats.r_meas_average = line.split()[-3] stats.r_meas_out = line.split()[-1] if "Total unique" in line: stats.unique_reflections = line.split()[-3] if "Total observations" in line: stats.reflections = line.split()[-3] if "Mosaic spread" in line: stats.mosaicity = line.split()[-1] if "I/sigma " in line: stats.i_sig_average = line.split()[-3] stats.i_sig_out = line.split()[-1] if "Space group: " in line: stats.space_group = "".join(line.split()[2:]) if "Unit cell: " in line: vals = line.split(":")[1].strip() ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = vals.split(", ")
def setUp(self): self.stats = ProcStats("xdsapp") self.stats.status = ToolStatus.SUCCESS self.project = Mock() self.project.encrypted = False
def _parse_statistics(project: Project, edna_dir: Path, dataset, stats: ProcStats): log_file = Path(edna_dir, f"ep_{dataset.name}_aimless_anom.log") with open(log_file, "r", encoding="utf-8") as r: log = r.readlines() for line in log: if "Space group:" in line: stats.space_group = "".join(line.split()[2:]) if "Number of unique reflections" in line: stats.unique_reflections = line.split()[-1] if "Total number of observations" in line: stats.reflections = line.split()[-3] if "Low resolution limit" in line: stats.low_resolution_average = line.split()[3] stats.low_resolution_out = line.split()[-1] if "High resolution limit" in line: stats.high_resolution_average = line.split()[3] stats.high_resolution_out = line.split()[-1] if line.startswith(UNIT_CELL): ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = line[len(UNIT_CELL) :].split() if "Multiplicity" in line: stats.multiplicity = line.split()[1] if "Mean((I)/sd(I))" in line: stats.i_sig_average = line.split()[1] stats.i_sig_out = line.split()[-1] if "Rmeas (all I+ & I-)" in line: stats.r_meas_average = line.split()[5] stats.r_meas_out = line.split()[-1] if "completeness" in line: stats.completeness_average = line.split()[-3] stats.completeness_out = line.split()[-1] if "mosaicity" in line: stats.mosaicity = line.split()[-1] return stats
def _parse_results_log(project: Project, results_file: Path, stats: ProcStats): def _parse_line(line, prefix, parser_func): text = line[len(prefix):].strip() return parser_func(text) def _space_group(text): # remove all spaces in space group string spg = "".join(text.split(" ")) return spg def _unit_cell(text): return text.split() def _resolution(text): return RESOLUTION_RE.match(text).groups() def _pair(text): return PAIR_RE.match(text).groups() def _first_number(text): return OPTIONAL_PAIR_RE.match(text).groups()[0] def _mosaicity(text): if text == "-": return None return _first_number(text) for line in read_text_lines(project, results_file): if not line.startswith(" "): # all lines we want to parse are indented with 4 spaces, # ignore all other lines continue line = line.strip() if line.startswith(SPACE_GROUP): stats.space_group = _parse_line(line, SPACE_GROUP, _space_group) elif line.startswith(UNIT_CELL): ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = _parse_line(line, UNIT_CELL, _unit_cell) elif line.startswith(RESOLUTION): ( stats.low_resolution_average, stats.high_resolution_average, stats.low_resolution_out, stats.high_resolution_out, ) = _parse_line(line, RESOLUTION, _resolution) elif line.startswith(REFLECTIONS): stats.reflections = _parse_line(line, REFLECTIONS, _first_number) elif line.startswith(UNIQUE_REFLECTIONS): stats.unique_reflections = _parse_line(line, UNIQUE_REFLECTIONS, _first_number) elif line.startswith(I_SIGI): stats.i_sig_average, stats.i_sig_out = _parse_line( line, I_SIGI, _pair) elif line.startswith(MULTIPLICITY): stats.multiplicity = _parse_line(line, MULTIPLICITY, _first_number) elif line.startswith(R_MEAS): stats.r_meas_average, stats.r_meas_out = _parse_line( line, R_MEAS, _pair) elif line.startswith(COMPLETENESS): stats.completeness_average, stats.completeness_out = _parse_line( line, COMPLETENESS, _pair) elif line.startswith(MOSAICITY): stats.mosaicity = _parse_line(line, MOSAICITY, _mosaicity) elif line.startswith(ISA): stats.isa = _parse_line(line, ISA, lambda x: x) return stats
def _parse_statistics(stats: ProcStats, report): with open(report, "r", encoding="utf-8") as r: log = r.readlines() for n, line in enumerate(log): if "Unit cell and space group:" in line: parts = line.split() stats.space_group = "".join(parts[11:]).replace("'", "") ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = parts[5:11] if "Low resolution limit " in line: stats.low_resolution_average, stats.low_resolution_out = ( line.split()[3], line.split()[5], ) if "High resolution limit " in line: stats.high_resolution_out, stats.high_resolution_average = ( line.split()[3], line.split()[5], ) if "Total number of observations " in line: stats.reflections = line.split()[-3] if "Total number unique " in line: stats.unique_reflections = line.split()[-3] if "Multiplicity " in line: stats.multiplicity = line.split()[1] if "Mean(I)/sd(I)" in line: stats.i_sig_average = line.split()[1] stats.i_sig_out = line.split()[-1] if "Completeness (ellipsoidal)" in line or "Completeness (spherical)" in line: stats.completeness_average = line.split()[2] stats.completeness_out = line.split()[-1] if "Rmeas (all I+ & I-)" in line: stats.r_meas_average = line.split()[-3] stats.r_meas_out = line.split()[-1] elif "Rmeas" in line: stats.r_meas_average = line.split()[-3] stats.r_meas_out = line.split()[-1] if "CRYSTAL MOSAICITY (DEGREES)" in line: stats.mosaicity = line.split()[-1] if "ISa (" in line: stats.isa = log[n + 1].split()[-1] return stats