print(iadbsOut, iadbs_proc)
        except Exception as e:
            print(e)
            exceptions[rawdatapath].append(e)
            OK = False
        except subprocess.TimeoutExpired:
            print("iadbs reached a timeout of {} hour(s).".format(timeout /
                                                                  3600))
            OK = False
    # iadbsOut = Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03/T170722_03_IA_workflow")
    # out_folder= Path(r"D:/projects/proteome_tools/RES/T1707/T170722_03")
    if OK:
        try:
            t_wx2csv = time()
            report, wx2csv_proc = wx2csv(iadbsOut.with_suffix('.xml'),
                                         out_folder / "report.csv",
                                         debug=debug)
            timings[raw_folder]['wx2csv'] = t_wx2csv = time() - t_wx2csv
            if debug:
                print(report, wx2csv_proc)
                print("Finished")
        except Exception as e:
            print(e)
            exceptions.append(e)

timings_path = Path("D:/projects/proteome_tools/RES/pool2/timings.json")
with open(timings_path, 'w') as f:
    json.dump(timings, f, indent=4)

print("And now all the exceptions:")
print(exceptions)
# pep3dOutXML = pep3dOut.with_suffix('.xml')
pep3dOutXML = temp_folder / (raw_folder + "_Pep3D_Spectrum.xml")
fasta_file = Path(settings[raw_folder])
if debug:
    print(pep3dOutXML, fasta_file, str(fasta_file))
for parameters_file in (proj_folder / "params").iterdir():
    if debug:
        print(parameters_file)
        print(temp_folder / parameters_file.stem)
    iadbsOut, iadbs_proc = iadbs(pep3dOutXML,
                                 temp_folder / parameters_file.stem,
                                 fasta_file=fasta_file,
                                 parameters_file=parameters_file,
                                 capture_output=True,
                                 debug=True)
    if debug:
        print(iadbsOut, iadbs_proc)
    report, wx2csv_proc = wx2csv(iadbsOut.with_suffix('.xml'),
                                 temp_folder / parameters_file.stem /
                                 "report.csv",
                                 debug=debug)
# cp(apexOutBIN,  final_folder)
# cp(pep3dOutXML, final_folder)
# cp(iadbsOutXML, final_folder)
# cp(temp_folder/'apex3d.log', final_folder)
# cp(temp_folder/'peptide3d.log', final_folder)
# cp(temp_folder/'iadbs.log', final_folder)
#TODO: add file removal!!!!
if debug:
    print("Finished")
Ejemplo n.º 3
0
missing = pool2 - existing
missing_files = [(f, fas) for f, fas in settings if Path(f).stem in missing]

logging.basicConfig(
    filename=out_folder / "pool2.log",
    format='PLGS %(asctime)s:%(name)s:%(levelname)s:%(message)s:',
    level=logging.INFO)
logger = logging.getLogger('PLGS')
timeout = 8 * 60  # 8 hours timeout [in minutes]

for rawdatapath, fastapath in missing_files:
    rawdatapath = Path(rawdatapath)
    fastapath = Path(fastapath)
    raw_folder = rawdatapath.stem
    final_out_folder = out_folder / raw_folder[0:5] / raw_folder
    for low_energy_thr in [300, 400, 500]:
        try:
            ok = plgs(rawdatapath,
                      final_out_folder,
                      low_energy_thr=low_energy_thr,
                      fastas=fastapath,
                      timeout_apex3d=timeout,
                      timeout_peptide3d=timeout,
                      timeout_iadbs=timeout)

            workflow_xml = next(final_out_folder.glob('*_IA_workflow.xml'))
            df, _ = wx2csv(workflow_xml, final_out_folder / 'report.csv')
            break
        except Exception as e:
            logger.warning(repr(e))
#     try:
#         df,_ = wx2csv(res_folder/'reversed_search'/f"{raw.stem}_IA_workflow.xml",
#                       res_folder/'reversed_search'/f"{raw.stem}_report.csv")
#     except Exception as e:
#         print(e)
#         troubles.append(e)

res = Path(r"D:/projects/proteome_tools/RES")

# RECALCULATING ALL THE BLOODY REPORTS
troubles = []
# proj_folder = next(res.glob("pool*/*/*"))
for proj_folder in res.glob("pool*/*/*"):
    try:
        df, _ = wx2csv(
            proj_folder / 'reversed_search' /
            f"{proj_folder.stem}_IA_workflow.xml",
            proj_folder / 'reversed_search' / f"{proj_folder.stem}_report.csv")
    except Exception as e:
        print(e)
        troubles.append(e)


# proj_folder = next(res.glob("pool*/*/*"))
def iter_results(res):
    for proj_folder in res.glob("pool*/*/*"):
        report_path = proj_folder / "reversed_search" / f"{proj_folder.stem}_report.csv"
        fasta_path = next(proj_folder.glob("*_reversed.fasta"))
        peptides, target, qcs, all_peptides_no = get_input_for_coverages(
            report_path, fasta_path)
        r = get_coverages(peptides, target, qcs)
        r['fasta'] = fasta_path.stem
Ejemplo n.º 5
0
troubles = []
dfs = {}
# proj = next(iter(zero_cover_raw_folders))
for proj in zero_cover_raw_folders:
    FA = fastas[proj]
    RF = raw_folders[proj]
    pool = FA.parent.parent.parent.stem
    OF = output_path / pool / proj[:5] / proj
    OK = False
    try:
        OK = plgs(raw_folder=RF, out_folder=OF, fastas=FA, timeout_apex3d=120)
    except TimeoutExpired:
        logger.warning('Trying out higher energies.')
        try:
            OK = plgs(raw_folder=RF,
                      out_folder=OF,
                      fastas=FA,
                      timeout_apex3d=120,
                      low_energy_thr=600,
                      high_energy_thr=60)
        except Exception as e:
            logger.warning('Failed again.')
            troubles.append((p, e))
    if OK:
        try:
            dfs[proj], _ = wx2csv(OF / f"{proj}_IA_workflow.xml",
                                  OF / f"{proj}_report.csv")
        except Exception as e:
            troubles.append((p, e))