import pandas as pd, numpy as np, pathlib import pickle import matplotlib matplotlib.use('Agg') if 0: sys.argv = [ "%(prog)s", "./RT_align", "./iproph", "/data/dattam/PROJECTS/CoreFacility/PDLC/dda-lib-atcc-mm-R1/workdir/iproph", "/data/teog/tpp5/bin/" ] has_DDA = sys.argv[3] != "none" rtalign_data_directory = str_to_path(sys.argv[1]) dia_pepxml_directory = str_to_path(sys.argv[2]) dda_pepxml_directory = str_to_path(sys.argv[3]) if has_DDA else None TPP_BIN = str_to_path(sys.argv[4]) rtalign_data_directory.mkdir(parents=True, exist_ok=False) rt_dicts_file = rtalign_data_directory / "RT_dicts.pickle" PEPTIDE_PROB = 0.9 abs_paths = [ None if e is None else e.resolve() for e in [ rtalign_data_directory, dia_pepxml_directory, dda_pepxml_directory, TPP_BIN / 'indexmzXML' ] ] print((TPP_BIN / 'indexmzXML').resolve(strict=True))
# if __name__=='__main__': # def main(): import doctest doctest.testmod(verbose=False) script_dir = pathlib.Path(__file__).resolve().parent if script_dir.suffix == '.pyz': script_dir = script_dir.parent # philosopher_path = str_to_path(sys.argv[1]).resolve(strict=True) SPECTRAST_PATH = (script_dir / { 'linux': 'linux/spectrast', 'win32': 'win/spectrast.exe' }[sys.platform]).resolve(strict=True) fasta = str_to_path(sys.argv[1]).resolve(strict=True) # msproteomicstools_bin_path = str_to_path(sys.argv[3]).resolve(strict=True) # msproteomicstools_bin_path = str_to_path(pathlib.Path(sys.executable).parent).resolve(strict=True) iproph_RT_aligned = str_to_path(sys.argv[2]).resolve(strict=True) prot_xml_file = str_to_path(sys.argv[3]).resolve(strict=True) output_directory = str_to_path(sys.argv[4]) if 'PATHEXT' in os.environ: os.environ['PATHEXT'] = '.py' + os.pathsep + os.environ['PATHEXT'] os.environ['PATH'] = os.getcwd() + os.pathsep + os.environ['PATH'] philosopher = pathlib.Path(shutil.which('philosopher')) # philosopher = philosopher_path # msproteomicstools_path = pathlib.Path('/storage/teog/anaconda3/bin') align_with_iRT: bool = True # spectrast2spectrast_irt_py_path = msproteomicstools_bin_path / 'spectrast2spectrast_irt.py'
# "/data/dattam/PROJECTS/CoreFacility/PDLC/dda-lib-atcc-mm-singleInjection-R1/workdir/con_lib.tsv" # "/data/teog/LFQbench/TTOF6600_SWATH_1ug_64windows_subset/workdir_tpp5/libgen/con_lib.tsv", # "/data/teog/LFQbench/TTOF6600_SWATH_1ug_64windows_subset/workdir_tpp5/libgen_DDA/con_lib.tsv" # "/data/teog/LFQbench/TTOF6600_SWATH_1ug_32windows/workdir/con_lib.tsv", # "/data/teog/LFQbench/DDA/workdir/lib_TTOF6600_32win/con_lib.tsv", # "/data/teog/LFQbench/TTOF6600_SWATH_1ug_64windows/workdir/con_lib.tsv", # "/data/teog/LFQbench/DDA/workdir/lib_TTOF6600_64win/con_lib.tsv", # "/data/dattam/PROJECTS/CoreFacility/PDLC/dia-atcc-mm-R1/workdir/con_lib.tsv", # "/data/dattam/PROJECTS/CoreFacility/PDLC/dda-lib-atcc-mm-R1/workdir/con_lib.tsv" #"/data/dattam/PROJECTS/CoreFacility/PDLC/dda-lib-atcc-mm-singleInjection-R1/workdir/con_lib.tsv" ] dia_con_lib_path = str_to_path(sys.argv[1]) dda_con_lib_path = str_to_path(sys.argv[2]) assert dia_con_lib_path.exists() assert dda_con_lib_path.exists() dia = pd.read_csv(dia_con_lib_path, sep='\t') dia_transition_group_id = dia["transition_group_id"].values # last int of the trans group id dia_trans_grp_id_end = int(dia_transition_group_id[-1].split("_")[0]) dia_startidx = np.fromiter((next(v) for k, v in itertools.groupby( dia.index, lambda x: dia_transition_group_id[x])), dtype=dia.index.dtype) dia_pep_name_charge = dia[["FullUniModPeptideName", "PrecursorCharge"]].values dia_pep_name_charge_set = { tuple(dia_pep_name_charge[lo])