def run_waters_pipeline(raw_folder, fasta_file, out_folder="C:/Symphony/Temp/test", parameters_file="C:/Symphony/Search/251.xml", apex_kwds={}, pep3d_kwds={}, iadbs_kwds={}): """Run Waters pipeline. A convenience wrapper around apex3d, peptide3d, and iaDBs. Args: raw_folder (str): a path to the input folder with raw Waters data. fasta_file (str): Path to the fasta file used in iaDBs peptide search. out_folder (str): Path to where to place the output. parameters_file (str): Path to the search parameters used in iaDBs peptide search. apex_kwds (dict): Other named arguments to apex3d. pep3d_kwds (dict): Other named arguments to peptide3d. iadbs_kwds (dict): Other named arguments to iaDBs. Returns: Paths to Apex3D, Peptide3D and iaDBs output files. """ raw_folder = Path(raw_folder) out_folder = Path(out_folder) apexOutPath, apex_proc = apex3d(raw_folder, out_folder, write_binary=True, capture_output=True, **apex_kwds) apexOutBIN = apexOutPath.with_suffix('.bin') pep3dOutPath, pep_proc = peptide3d(apexOutBIN, out_folder, write_binary=True, min_LEMHPlus=350.0, capture_output=True, **pep3d_kwds) pep3dOutXML = pep3dOutPath.with_suffix('.xml') iadbsOutXML, iadbs_proc = iadbs(pep3dOutXML, out_folder, fasta_file=Path(fasta_file), parameters_file=Path(parameters_file), capture_output=True, **iadbs_kwds) return apexOutBIN, pep3dOutXML, iadbsOutXML
raw_folder = rawdatapath.stem out_folder = Path( "D:/projects/proteome_tools/RES/pool2") / raw_folder[0:5] / raw_folder if debug: print('rawdatapath', '\n\t', rawdatapath, '\n\t', str(rawdatapath)) print('fastapath', '\n\t', fastapath, '\n\t', str(fastapath)) print('raw_folder', '\n\t', raw_folder) print('out_folder', '\n\t', out_folder) print('parameters_file', '\n\t', parameters_file) print('timeout\n\t{} h'.format(timeout / 3600)) timings[raw_folder] = {} try: t_apex = time() apexOut, apex_proc = apex3d(rawdatapath, out_folder, write_binary=True, capture_output=capture_output, debug=debug, timeout=timeout) timings[raw_folder]['apex3d'] = t_apex = time() - t_apex if debug: print(apexOut, apex_proc) except subprocess.TimeoutExpired: print("apex3d reached a timeout of {} hour(s).".format(timeout / 3600)) OK = False except StdErr as e: print("Sometimes the errors are not reflected in the output.") print(e.err) apexOut = out_folder / (out_folder.name + "_Apex3D.bin") exceptions[rawdatapath].append(e) OK = False if OK:
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) subprocess.Popen("Taskkill /IM Apex3D64.exe /F") from pathlib import Path from vodkas import apex3d from subprocess import TimeoutExpired raw_folder = "C:/ms_soft/MasterOfPipelines/Data/O190302_01.raw" output_dir = "C:/SYMPHONY_VODKAS/temp/subproc_test" try: apex3d(raw_folder, output_dir, timeout=2, make_log=True) except TimeoutExpired as e: print(e) try: outs, errs = proc.communicate(timeout=15) except TimeoutExpired: proc.kill() outs, errs = proc.communicate() if make_log: log = output_dir/"apex3d.log" kill = "Taskkill /IM {} /F".format(algo.name)
"C:/SYMPHONY_VODKAS/temp/O1903/O190302_01_IA_workflow.xml") pprint(xml_params['apex3d']) pprint(xml_params['apex3d']) proj_tag = "O1903" p = Path() len(list(p.parent.glob(proj_tag))) # there is a problem raw_folder = 'C:/SYMPHONY_VODKAS/temp/O1903' out_folder = 'C:/SYMPHONY_VODKAS/temp' network_out_folder = 'J:/test_RES' raw_folder = 'C:/SYMPHONY_VODKAS/temp/O1904' out_folder = 'C:/SYMPHONY_VODKAS/temp' network_out_folder = 'J:/test_RES' raw = Path(raw_folder) out = Path(out_folder) net_out = Path(network_out_folder) proj_tag = raw.name[:5] from vodkas import apex3d from pathlib import Path pr = apex3d('a', 'b') pr.communicate() pr.poll() pr.stderr
log.info(f"analyzing folders: {dump2json(raw_folders)}") for raw_folder in raw_folders: try: if not raw_folder.is_dir(): log.error(f"missing: {raw_folder}") continue log.info(f"analyzing: {raw_folder}") sender.update_group(raw_folder) # wtf??? change name .... acquired_name = raw_folder.stem header_txt = parse_header_txt(raw_folder / '_HEADER.TXT') sample_set = header_txt['Sample Description'][:8] # C:/SYMPHONY_PIPELINE/2019-008/O191017-04 local_folder = local_output_folder / sample_set / acquired_name a = apex3d(raw_folder, local_folder, **apex3d_kwds) if peptide3d_kwds['timeout'] >= 0: p = peptide3d(a.with_suffix('.bin'), local_folder, **peptide3d_kwds) if iadbs_kwds['timeout'] >= 0: i = iadbs(p, local_folder, fasta_file, parameters_file, **iadbs_kwds) if i is not None: params = create_params_file(a, p, i) # for projectizer2.0 with open(a.parent / "params.json", 'w') as f: json.dump(params, f) search_stats = get_search_stats(i) rows2csv(i.parent / 'stats.csv', [list(search_stats), list(search_stats.values())]) if net_folder:
from pathlib import Path from vodkas import apex3d, peptide3d, iadbs from vodkas.fs import cp # if __name__ == "__main__": # raw = Path("//MSSERVER/restoredData/proteome_tools/net/idefix/WIRD_GESICHERT/T1707/T170722_03.raw")#big raw = Path("C:/ms_soft/MasterOfPipelines/RAW/O1903/O190302_01.raw") #small temp = Path("C:/Symphony/Temp/test") #TODO: WTF if this file already existed? apexOutPath, apex_proc = apex3d(raw, temp, write_binary=True, capture_output=True) # apexOutPath = temp/(raw.stem + "_Apex3D") apexOutBIN = apexOutPath.with_suffix('.bin') pep3dOutPath, pep_proc = peptide3d(apexOutBIN, temp, write_binary=True, min_LEMHPlus=350.0, capture_output=True) # pep3dOutPath = temp/(raw.stem + "_Pep3D_Spectrum") pep3dOutXML = pep3dOutPath.with_suffix('.xml') iadbsOutPath, iadbs_proc = iadbs(pep3dOutXML, temp, fasta_file="C:/Symphony/Search/wheat.fasta", parameters_file="C:/Symphony/Search/251.xml", capture_output=True)