Exemple #1
0
def run_waters_pipeline(raw_folder,
                        fasta_file,
                        out_folder="C:/Symphony/Temp/test",
                        parameters_file="C:/Symphony/Search/251.xml",
                        apex_kwds={},
                        pep3d_kwds={},
                        iadbs_kwds={}):
    """Run Waters pipeline.

    A convenience wrapper around apex3d, peptide3d, and iaDBs.

    Args:
        raw_folder (str): a path to the input folder with raw Waters data.
        fasta_file (str): Path to the fasta file used in iaDBs peptide search.
        out_folder (str): Path to where to place the output.
        parameters_file (str): Path to the search parameters used in iaDBs peptide search.
        apex_kwds (dict): Other named arguments to apex3d.
        pep3d_kwds (dict): Other named arguments to peptide3d.
        iadbs_kwds (dict): Other named arguments to iaDBs.
    Returns:
        Paths to Apex3D, Peptide3D and iaDBs output files.
    """
    raw_folder = Path(raw_folder)
    out_folder = Path(out_folder)
    apexOutPath, apex_proc = apex3d(raw_folder,
                                    out_folder,
                                    write_binary=True,
                                    capture_output=True,
                                    **apex_kwds)
    apexOutBIN = apexOutPath.with_suffix('.bin')
    pep3dOutPath, pep_proc = peptide3d(apexOutBIN,
                                       out_folder,
                                       write_binary=True,
                                       min_LEMHPlus=350.0,
                                       capture_output=True,
                                       **pep3d_kwds)
    pep3dOutXML = pep3dOutPath.with_suffix('.xml')
    iadbsOutXML, iadbs_proc = iadbs(pep3dOutXML,
                                    out_folder,
                                    fasta_file=Path(fasta_file),
                                    parameters_file=Path(parameters_file),
                                    capture_output=True,
                                    **iadbs_kwds)
    return apexOutBIN, pep3dOutXML, iadbsOutXML
         if debug:
             print(pep3dOut, pep_proc)
     except Exception as e:
         print(e)
         OK = False
         exceptions[rawdatapath].append(e)
     except subprocess.TimeoutExpired:
         print("pep3d reached a timeout of {} hour(s).".format(timeout /
                                                               3600))
         OK = False
 if OK:
     try:
         t_iadbs = time()
         iadbsOut, iadbs_proc = iadbs(pep3dOut.with_suffix('.xml'),
                                      out_folder,
                                      fasta_file=fastapath,
                                      parameters_file=parameters_file,
                                      capture_output=capture_output,
                                      debug=debug)
         timings[raw_folder]['iadbs'] = t_iadbs = time() - t_iadbs
         if debug:
             print(iadbsOut, iadbs_proc)
     except Exception as e:
         print(e)
         exceptions[rawdatapath].append(e)
         OK = False
     except subprocess.TimeoutExpired:
         print("iadbs reached a timeout of {} hour(s).".format(timeout /
                                                               3600))
         OK = False
 # iadbsOut = Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03/T170722_03_IA_workflow")
 # out_folder= Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03")
Exemple #3
0
from vodkas import peptide3d, iadbs
from vodkas.fastas import get_fastas
from vodkas import
from pathlib import Path

from vodkas.xml_parser import create_params_file

p = Path(r'C:\SYMPHONY_VODKAS\temp\2019-095\O190920_21')
a = p/'O190920_21_Apex3D.bin'
o, _ = peptide3d(a, p)

fastas = get_fastas('human')
i, _ = iadbs(o.with_suffix('.xml'), p, fastas)

create_params_file(a, o, i)


U:\Matteo\20191211_2019-015_reprocessing_with_Matteos_pipeline
#                                    write_binary=True,
#                                    min_LEMHPlus=350.0,
#                                    capture_output=True,
#                                    debug=True)
# pep3dOutXML = pep3dOut.with_suffix('.xml')
pep3dOutXML = temp_folder / (raw_folder + "_Pep3D_Spectrum.xml")
fasta_file = Path(settings[raw_folder])
if debug:
    print(pep3dOutXML, fasta_file, str(fasta_file))
for parameters_file in (proj_folder / "params").iterdir():
    if debug:
        print(parameters_file)
        print(temp_folder / parameters_file.stem)
    iadbsOut, iadbs_proc = iadbs(pep3dOutXML,
                                 temp_folder / parameters_file.stem,
                                 fasta_file=fasta_file,
                                 parameters_file=parameters_file,
                                 capture_output=True,
                                 debug=True)
    if debug:
        print(iadbsOut, iadbs_proc)
    report, wx2csv_proc = wx2csv(iadbsOut.with_suffix('.xml'),
                                 temp_folder / parameters_file.stem /
                                 "report.csv",
                                 debug=debug)
# cp(apexOutBIN,  final_folder)
# cp(pep3dOutXML, final_folder)
# cp(iadbsOutXML, final_folder)
# cp(temp_folder/'apex3d.log', final_folder)
# cp(temp_folder/'peptide3d.log', final_folder)
# cp(temp_folder/'iadbs.log', final_folder)
#TODO: add file removal!!!!
Exemple #5
0
            log.error(f"missing: {raw_folder}")
            continue
        log.info(f"analyzing: {raw_folder}")

        sender.update_group(raw_folder)  # wtf??? change name ....
        acquired_name = raw_folder.stem
        header_txt = parse_header_txt(raw_folder / '_HEADER.TXT')
        sample_set = header_txt['Sample Description'][:8]
        #                   C:/SYMPHONY_PIPELINE/2019-008/O191017-04
        local_folder = local_output_folder / sample_set / acquired_name
        a = apex3d(raw_folder, local_folder, **apex3d_kwds)
        if peptide3d_kwds['timeout'] >= 0:
            p = peptide3d(a.with_suffix('.bin'), local_folder,
                          **peptide3d_kwds)
            if iadbs_kwds['timeout'] >= 0:
                i = iadbs(p, local_folder, fasta_file, parameters_file,
                          **iadbs_kwds)
                if i is not None:
                    params = create_params_file(a, p, i)  # for projectizer2.0
                    with open(a.parent / "params.json", 'w') as f:
                        json.dump(params, f)
                    search_stats = get_search_stats(i)
                    rows2csv(i.parent / 'stats.csv',
                             [list(search_stats),
                              list(search_stats.values())])
        if net_folder:
            #                     Y:/RES/2019-008
            net_set_folder = Path(net_folder) / sample_set
            net_set_folder.mkdir(parents=True, exist_ok=True)
            # if reanalysing, the old folder is preserved,
            # and a version number appended to the new one
            # e.g.              Y:/RES/2019-008/O191017-04
from pathlib import Path

from vodkas import apex3d, peptide3d, iadbs
from vodkas.fs import cp

# if __name__ == "__main__":
# raw = Path("//MSSERVER/restoredData/proteome_tools/net/idefix/WIRD_GESICHERT/T1707/T170722_03.raw")#big
raw = Path("C:/ms_soft/MasterOfPipelines/RAW/O1903/O190302_01.raw")  #small
temp = Path("C:/Symphony/Temp/test")  #TODO: WTF if this file already existed?
apexOutPath, apex_proc = apex3d(raw,
                                temp,
                                write_binary=True,
                                capture_output=True)
# apexOutPath = temp/(raw.stem + "_Apex3D")
apexOutBIN = apexOutPath.with_suffix('.bin')
pep3dOutPath, pep_proc = peptide3d(apexOutBIN,
                                   temp,
                                   write_binary=True,
                                   min_LEMHPlus=350.0,
                                   capture_output=True)
# pep3dOutPath = temp/(raw.stem + "_Pep3D_Spectrum")
pep3dOutXML = pep3dOutPath.with_suffix('.xml')
iadbsOutPath, iadbs_proc = iadbs(pep3dOutXML,
                                 temp,
                                 fasta_file="C:/Symphony/Search/wheat.fasta",
                                 parameters_file="C:/Symphony/Search/251.xml",
                                 capture_output=True)
# raw, fasta = pool1fix[0]
for raw, fasta in pool1fix:
    # copy
    raw = Path(raw)
    fasta = Path(fasta)
    res_folder = res_path / raw.parent.stem / raw.stem
    shutil.copy(str(fasta), str(res_folder / fasta.name))
    # reversal
    FF = Fastas(reformulate_fasta(f) for f in fastas(fasta))
    FF.reverse()
    rev_fasta_path = res_folder / f"{fasta.stem}_reversed.fasta"
    FF.write(rev_fasta_path)
    # rerun iadbs
    try:
        outfile, _ = iadbs(res_folder / f"{raw.stem}_Pep3D_Spectrum.xml",
                           res_folder / 'reversed_search', rev_fasta_path)
    except Exception as e:
        problems.append((str(f), repr(e)))
        logger.warning(repr(e))

# troubles = []
# # raw,_ = pool1fix[0]
# for raw,_ in pool1fix:
#     raw = Path(raw)
#     res_folder = res_path/raw.parent.stem/raw.stem
#     try:
#         df,_ = wx2csv(res_folder/'reversed_search'/f"{raw.stem}_IA_workflow.xml",
#                       res_folder/'reversed_search'/f"{raw.stem}_report.csv")
#     except Exception as e:
#         print(e)
#         troubles.append(e)
Exemple #8
0
fold = Path(r"Y:\RES\2019-095")


def zeropad(x, k=2):
    if x < 10:
        x = f"0{x}"
    return str(x)


folders = [
    fold / f"O190920_{zeropad(i)}" for i in range(2, 9)
    if i not in (8, 15, 22, 29)
]
# folders = [fold/f"O190920_{zeropad(i)}"
#            for i in range(2,29) if i not in (8,15,22,29)]
fastas = get_fastas('human')

problems = []
# f = folders[0]
for f in folders[1:]:
    try:
        input = f / f"{f.stem}_Pep3D_Spectrum.xml"
        outfile, _ = iadbs(input, f, fastas)
    except Exception as e:
        problems.append((str(f), repr(e)))

if problems:
    with open(Path('C:/SYMPHONY_VODKAS/problems')) as h:
        json.dump(problems, h, indent=4)