def _scrape_outcome(project: Project, logs_dir: Path) -> Optional[ToolStatus]: """ examine xia2 logs, to try to figure the results of the processing run """ if not logs_dir.is_dir(): return None log_file = Path(logs_dir, "xia2.txt") if not log_file.exists(): return None # # if the log file contains lines: # # 'Scaled reflection:' # 'Status: normal termination' # # then, most likely, processing was successful # for line in read_text_lines(project, log_file): if line.startswith("Scaled reflections:"): return ToolStatus.SUCCESS if line.startswith("Status: normal termination"): return ToolStatus.SUCCESS # magic lines not found, probably something went wrong return ToolStatus.FAILURE
def pandda_to_fragmax_html(project: Project, method: str, date: str): pandda_analyse_html = get_analysis_html_file(project, method, date) pandda_html = "" for line in read_text_lines(project, pandda_analyse_html): if '<th class="text-nowrap" scope="row">' in line: dt = line.split('scope="row">')[-1].split("<")[0] line = ( f'<td class="sorting_1" style="text-align: center;" >' f'<a href="/pandda_densityA/{method}/{dt}" target="_blank" class="btn">' f"Open</a></td>{line}" ) pandda_html += f"{line}\n" pandda_html = pandda_html.replace( '<th class="text-nowrap">Dataset</th>', '<th class="text-nowrap">Open</th><th class="text-nowrap">Dataset</th>', ) pandda_html = pandda_html.replace( 'class="table table-bordered table-striped"', 'class="table table-bordered table-striped" data-page-length="50"', ) pandda_html = pandda_html.replace( "PANDDA Processing Output", "PANDDA Processing Output for " + method ) return pandda_html
def _scrape_blobs(project, logs_dir): for line in read_text_lines(project, Path(logs_dir, "dimple.log")): if line.startswith(BLOBS_LINE): return _cut_prefix_strip(BLOBS_LINE, line) # no blobs list found in the log file # dimple probably could not find any 'Unmodelled blobs' return "[]"
def test_read_text_lines(self): """ test read_text_lines() on un-encrypted project """ # write test file _write_file(self.file_path) # read file's lines lines = read_text_lines(self.proj, self.file_path) # check that we get expected lines self.assertListEqual(list(lines), _expected_lines())
def test_read_text_lines(self): """ test read_text_lines() on encrypted project """ # write test file with encryption.EncryptedFile(self.project.encryption_key, self.file_path) as f: f.write(DUMMY_DATA) # read file's lines lines = read_text_lines(self.project, self.file_path) # check that we get expected lines self.assertListEqual(list(lines), _expected_lines())
def _scrape_isa(project: Project, dataset): edna_dir, _ = _find_results(project, dataset) isa = None for log in _get_xscale_logs(edna_dir): log_lines = list(read_text_lines(project, log)) for n, line in enumerate(log_lines): if "ISa" in line: if log_lines[n + 1].split(): isa = log_lines[n + 1].split()[-2] if isa == "b": isa = "" return isa
def _scrape_isa(project, xds_dir: Path) -> Optional[str]: log_file = Path(xds_dir, "LogFiles", "AUTOMATIC_DEFAULT_XSCALE.log") if not log_file.is_file(): # log file not found, treat as unknown ISa return None logfile = list(read_text_lines(project, log_file)) isa = None for n, line in enumerate(logfile): if "ISa" in line: if logfile[n + 3].split(): isa = logfile[n + 3].split()[-2] return isa
def _scrape_blobs(project, results_dir: Path) -> str: blobs_log = Path(results_dir, "blobs.log") if not blobs_log.is_file(): return "[]" blobs = [] # # look for 'INFO:: cluster at xyz ...' lines in the blobs.log file, # and parse out blob coordinates # for line in read_text_lines(project, blobs_log): match = CLUSTER_RE.match(line) if match is None: continue x, y, z = match.groups() blobs.append([float(x), float(y), float(z)]) return str(blobs)
def _parse_xia2_html(project: Project, xia2_html: Path, stats: ProcStats): for line in read_text_lines(project, xia2_html): if "High resolution limit " in line: stats.high_resolution_average = line.split()[-3] stats.high_resolution_out = line.split()[-1] if "Low resolution limit " in line: stats.low_resolution_average = line.split()[-3] stats.low_resolution_out = line.split()[-1] if "Completeness " in line: stats.completeness_average = line.split()[-3] stats.completeness_out = line.split()[-1] if "Multiplicity " in line: stats.multiplicity = line.split()[-3] if "Rmeas(I+/-) " in line: stats.r_meas_average = line.split()[-3] stats.r_meas_out = line.split()[-1] if "Total unique" in line: stats.unique_reflections = line.split()[-3] if "Total observations" in line: stats.reflections = line.split()[-3] if "Mosaic spread" in line: stats.mosaicity = line.split()[-1] if "I/sigma " in line: stats.i_sig_average = line.split()[-3] stats.i_sig_out = line.split()[-1] if "Space group: " in line: stats.space_group = "".join(line.split()[2:]) if "Unit cell: " in line: vals = line.split(":")[1].strip() ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = vals.split(", ")
def _parse_results_log(project: Project, results_file: Path, stats: ProcStats): def _parse_line(line, prefix, parser_func): text = line[len(prefix):].strip() return parser_func(text) def _space_group(text): # remove all spaces in space group string spg = "".join(text.split(" ")) return spg def _unit_cell(text): return text.split() def _resolution(text): return RESOLUTION_RE.match(text).groups() def _pair(text): return PAIR_RE.match(text).groups() def _first_number(text): return OPTIONAL_PAIR_RE.match(text).groups()[0] def _mosaicity(text): if text == "-": return None return _first_number(text) for line in read_text_lines(project, results_file): if not line.startswith(" "): # all lines we want to parse are indented with 4 spaces, # ignore all other lines continue line = line.strip() if line.startswith(SPACE_GROUP): stats.space_group = _parse_line(line, SPACE_GROUP, _space_group) elif line.startswith(UNIT_CELL): ( stats.unit_cell_a, stats.unit_cell_b, stats.unit_cell_c, stats.unit_cell_alpha, stats.unit_cell_beta, stats.unit_cell_gamma, ) = _parse_line(line, UNIT_CELL, _unit_cell) elif line.startswith(RESOLUTION): ( stats.low_resolution_average, stats.high_resolution_average, stats.low_resolution_out, stats.high_resolution_out, ) = _parse_line(line, RESOLUTION, _resolution) elif line.startswith(REFLECTIONS): stats.reflections = _parse_line(line, REFLECTIONS, _first_number) elif line.startswith(UNIQUE_REFLECTIONS): stats.unique_reflections = _parse_line(line, UNIQUE_REFLECTIONS, _first_number) elif line.startswith(I_SIGI): stats.i_sig_average, stats.i_sig_out = _parse_line( line, I_SIGI, _pair) elif line.startswith(MULTIPLICITY): stats.multiplicity = _parse_line(line, MULTIPLICITY, _first_number) elif line.startswith(R_MEAS): stats.r_meas_average, stats.r_meas_out = _parse_line( line, R_MEAS, _pair) elif line.startswith(COMPLETENESS): stats.completeness_average, stats.completeness_out = _parse_line( line, COMPLETENESS, _pair) elif line.startswith(MOSAICITY): stats.mosaicity = _parse_line(line, MOSAICITY, _mosaicity) elif line.startswith(ISA): stats.isa = _parse_line(line, ISA, lambda x: x) return stats