def plgs(fastas, raw_folder, out_folder, apex3d_kwds={}, peptide3d_kwds={}, iadbs_kwds={}): """Run PLGS. Run complete PLGS analysis. Args: fastas (str): Path to fasta file. raw_folder (str): Path to the raw folder acquired Waters data. out_folder (str): Path to folder for the output. apex3d_kwds (dict): Arguments for apex3d. peptide3d_kwds (dict): Arguments for peptide3d. iadbs_kwds (dict): Arguments for iadbs. Returns: dict: parsed parameters from the xml files. """ a, _ = apex3d(raw_folder, out_folder, **apex3d_kwds) # this will make .bin only p, _ = peptide3d(a.with_suffix('.bin'), out_folder, **peptide3d_kwds) # this will make .xml only i, _ = iadbs(p, out_folder, fastas, **iadbs_kwds) create_params_file(a, p, i) # for projectizer2.0 search_stats = get_search_stats() rows2csv( i.parent / 'stats.csv', [list(search_stats), list(search_stats.values())])
# C:/SYMPHONY_PIPELINE/2019-008/O191017-04 local_folder = local_output_folder / sample_set / acquired_name a = apex3d(raw_folder, local_folder, **apex3d_kwds) if peptide3d_kwds['timeout'] >= 0: p = peptide3d(a.with_suffix('.bin'), local_folder, **peptide3d_kwds) if iadbs_kwds['timeout'] >= 0: i = iadbs(p, local_folder, fasta_file, parameters_file, **iadbs_kwds) if i is not None: params = create_params_file(a, p, i) # for projectizer2.0 with open(a.parent / "params.json", 'w') as f: json.dump(params, f) search_stats = get_search_stats(i) rows2csv(i.parent / 'stats.csv', [list(search_stats), list(search_stats.values())]) if net_folder: # Y:/RES/2019-008 net_set_folder = Path(net_folder) / sample_set net_set_folder.mkdir(parents=True, exist_ok=True) # if reanalysing, the old folder is preserved, # and a version number appended to the new one # e.g. Y:/RES/2019-008/O191017-04 # replaced with: Y:/RES/2019-008/O191017-04__v1 final_net_folder = find_free_path(net_set_folder / acquired_name) try: #replace that with the general save moving routine with check-sums move_folder(local_folder, final_net_folder) if local_folder.parent.exists( ) and not local_folder.parent.glob('*'): local_folder.parent.rmdir()
help= "Paths to outputs of the iaDBs. If ending with '.xml', will use directly. If supplied folders, these will be searched recursively for files like '**/*_IA_workflow.xml'." ) args = p.parse_args() try: print('Welcome to stats.csv maker') from fs_ops.csv import rows2csv from fs_ops.paths import find_suffixed_files from waters.parsers import iaDBsXMLparser xmls = list( find_suffixed_files(args.paths, ['**/*_IA_workflow.xml'], ['.xml'])) print('Supplied paths:') pprint(xmls) for xml in xmls: XML = iaDBsXMLparser(xml) info = XML.info() pprint(info) print('dumping to csv') rows2csv(xml.parent / 'stats.csv', [list(info), list(info.values())]) except Exception as e: print(e) print() print('Let Thor enlight your path with lightnings!') print('And have a nice day..') input('press ENTER')
data_path = Path("~/Projects/WatersData/O190303_78/O190303_78_IA_workflow.xml").expanduser() assert data_path.exists() iaDBsXML = iaDBsXMLparser(data_path) prots = iaDBsXML.prot_ids() iaDBsXML.get_tag_counts() iaDBsXML.proteins() iaDBsXML.products() iaDBsXML.get_tag_counts() iaDBsXML.parameters() iaDBsXML.query_masses() iaDBsXML.count_proteins_per_hit() info = iaDBsXML.info() rows2csv(Path("~/Projects/waters/data/info.csv").expanduser(), [list(info), list(info.values())]) scripts_loc = Path(r"C:\Users\stefan\AppData\Local\Programs\Python\Python38\Scripts") sendto_loc = Path(r"C:\Users\stefan\AppData\Roaming\Microsoft\Windows\SendTo") os.link(scripts_loc/"iadbs2csv.py", sendto_loc/"_iadbs2csv.py") os.link(scripts_loc/"iadbs2stats.py", sendto_loc/"_iadbs2stats.py") prots.columns tree = iaDBsXML.tree tree. data_path2 = Path("~/Projects/waters/data/T180222_10/T180222_10_IA_workflow.xml").expanduser() iaDBsXML2 = iaDBsXMLparser(data_path2) print(iaDBsXML.get_tag_counts())