def validate_run(self, log, info, exit_code, stdout): if 'max_rt_diff = self._stdev_max_rt_per_run * tr_data.getStdev(source, target)' in stdout: raise RuntimeError("No peptides found which are shared between all runs. Try to increase 'alignment_score'.") validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['ALIGNMENT_TSV']) validation.check_file(log, info['ALIGNMENT_YAML']) out2log = os.path.join(info[Keys.WORKDIR], "feature_alignment.out.txt") f = open(out2log, "w") f.write(stdout) f.close() info["ALIGNER_STDOUT"] = out2log # Move out .tr files of pyprophet to be rescue safe info["TRAFO_FILES"] = [] for fil in info["MPROPHET_TSV"]: trfile = glob.glob(os.path.dirname(fil) + "/*.tr") if len(trfile) != 1: raise RuntimeError("More than one .tr file for " + fil) basename = os.path.basename(trfile[0]) tgt = os.path.join(info['WORKDIR'], basename) log.debug("Moved tr file %s into WORKDIR" % basename) shutil.move(trfile[0], tgt) info["TRAFO_FILES"].append(tgt) return info
def validate_run(self, log, info, run_code, out): if "No decoys with label DECOY_ were found" in out: raise RuntimeError("No DECOY_s found in fasta. Please use other fasta!") validation.check_stdout(log, out) validation.check_exitcode(log, run_code) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) for i in info['APMS_OUT']: validation.check_file(log, i) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['SPLIB']) validation.check_file(log, info['TSV']) validation.check_xml(log, info['TRAML']) return info
def validate_run(self, log, info, run_code, out): for line in out.splitlines(): if "OpenMS peak type estimation indicates that this is not profile data!" in line: raise RuntimeError("Found centroid data but LFQ must be run on profile mode data!") validation.check_stdout(log, out) validation.check_exitcode(log, run_code) validation.check_xml(log, info['FEATUREXML']) return info
def validate_run(self, log, info, run_code, out): if "No decoys with label DECOY_ were found" in out: raise RuntimeError( "No DECOY_s found in fasta. Please use other fasta!") validation.check_stdout(log, out) validation.check_exitcode(log, run_code) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, out): validation.check_exitcode(log, exit_code) if 'Valid models = 0' in out: raise RuntimeError('No valid model found') validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): if exit_code == -8: raise RuntimeError("iProphet failed most probably because too few peptides were found in the search before") for line in stdout.splitlines(): if 'fin: error opening' in line: raise RuntimeError("Could not read the input file " + line) validation.check_exitcode(log, exit_code) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) for msg in ['Error:','did not find any InterProphet results in input data!', 'no data - quitting', 'WARNING: No database referenced']: if msg in stdout: raise RuntimeError('ProteinProphet error [%s]' % msg) validation.check_xml(log, info[Keys.PROTXML]) return info
def validate_run(self, log, info, exit_code, stdout): if "Warning - no spectra searched" in stdout: raise RuntimeError("No spectra in mzXML!") if "CometMemAlloc" in stdout: #print to stdout to reach gUSE rescue functionality. ugly, no? print "MemoryError" raise RuntimeError("The job run out of RAM!") check_stdout(log,stdout) check_exitcode(log, exit_code) check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): log.debug("Cp validation") # self checked if "No such file" in stdout: raise RuntimeError("Inputfile not found") if "Permission denied" in stdout: raise RuntimeError("Was not allowed to read inputfile. Need more rights") # validation util validation.check_file(log, info["COPY"]) validation.check_exitcode(log, exit_code) return info
def validate_run(self, log, info, exit_code, stdout): log.debug("Cp validation") #self checked if "No such file" in stdout: raise RuntimeError("Inputfile not found") if "Permission denied" in stdout: raise RuntimeError( "Was not allowed to read inputfile. Need more rights") #validation util validation.check_file(log, info['COPY']) validation.check_exitcode(log, exit_code) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) for msg in [ 'Error:', 'did not find any InterProphet results in input data!', 'no data - quitting', 'WARNING: No database referenced' ]: if msg in stdout: raise RuntimeError('ProteinProphet error [%s]' % msg) validation.check_xml(log, info[Keys.PROTXML]) return info
def validate_run(self, log, info, exit_code, stdout): if "ERROR [root] Parsing </precursorMz> failed. scan number" in stdout: raise ValueError("the chosen mzXML %s file contains " "MS2 spectra without precursor information" % info.get("MZXML")) validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) info["MD5_SUMS"] = [] for p in info["Q_FILES"]: validation.check_file(log, p) md5sum = open(p + ".md5", "r").read().split(" ")[0].strip() info["MD5_SUMS"].append(md5sum) return info
def validate_run(self, log, info, exit_code, stdout): check_exitcode(log, exit_code) check_xml(log, info[Keys.PEPXML]) #https://groups.google.com/forum/#!topic/spctools-discuss/dV8LSaE60ao shutil.move(info[Keys.PEPXML], info[Keys.PEPXML]+'.broken') fout = open(info[Keys.PEPXML],'w') for line in open(info[Keys.PEPXML]+'.broken').readlines(): if 'spectrumNativeID' in line: line = re.sub('spectrumNativeID="[^"]*"', '', line) fout.write(line) fout.close() return info
def validate_run(self, log, info, exit_code, stdout): if "ERROR [root] Parsing </precursorMz> failed. scan number" in stdout: raise ValueError("the chosen mzXML %s file contains " "MS2 spectra without precursor information" % info.get("MZXML") ) validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) info["MD5_SUMS"] = [] for p in info["Q_FILES"]: validation.check_file(log, p) md5sum = open(p + ".md5", "r").read().split(" ")[0].strip() info["MD5_SUMS"].append(md5sum) return info
def validate_run(self, log, info, exit_code, stdout): if "Warning - no spectra searched" in stdout: raise RuntimeError("No spectra in mzXML!") if "CometMemAlloc" in stdout: #print to stdout to reach gUSE rescue functionality. ugly, no? print "MemoryError" raise RuntimeError("The job run out of RAM!") check_stdout(log,stdout) if exit_code: log.warn("exit_code is %s", exit_code) mzxml = info[Keys.MZXML] log.warn("maybe the input file %s does not exist any more. check this !" % mzxml) check_exitcode(log, exit_code) check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): if info['RUNRT'] == 'True': # Spectrast imports sample *whitout error* when no iRTs are found. Thus look for "Comment:" entries without # iRT= attribute in splib notenough = set() for line in open(info['SPLIB']).readlines(): if "Comment:" in line and not "iRT=" in line: samplename = re.search("RawSpectrum=([^\.]*)\.", line).group(1) notenough.add(samplename) if notenough: log.error("No/not enough iRT peptides found in sample(s): " + ", ".join(notenough)) #when irt.txt not readable: PEPXML IMPORT: Cannot read landmark table. No RT normalization will be performed. rtcalibfailed = False for line in open(info['SPLOG']).readlines(): if "Cannot read landmark table" in line: log.error("Problem with reading rtkit file %s!"%info['RTKIT']) rtcalibfailed = True # Parse logfile to see whether R^2 is high enough. Example log for failed calibration (line 3 only when <0.9): # PEPXML IMPORT: RT normalization by linear regression. Found 10 landmarks in MS run "CHLUD_L110830_21". # PEPXML_IMPORT: Final fitted equation: iRT = (rRT - 1758) / (8.627); R^2 = 0.5698; 5 outliers removed. # ERROR PEPXML_IMPORT: R^2 still too low at required coverage. No RT normalization performed. Consider... rsqlow = False for line in open(info['SPLOG']).readlines(): if "Final fitted equation:" in line: samplename = prevline.strip().split(" ")[-1] rsq = line.split()[-4].replace(";", "") if float(rsq) < float(info['RSQ_THRESHOLD']): log.error( "R^2 of %s is below threshold of %s for %s" % (rsq, info['RSQ_THRESHOLD'], samplename)) rsqlow = True else: log.debug("R^2 of %s is OK for %s" % (rsq, samplename)) else: prevline = line # Raise only here to have all errors shown if rsqlow or rtcalibfailed or notenough: raise RuntimeError("Error in iRT calibration.") # Double check "Spectrast finished ..." if not " without error." in stdout: raise RuntimeError("SpectraST finished with some error!") validation.check_exitcode(log, exit_code) validation.check_file(log, info['SPLIB']) return info
def validate_run(self, log, info, exit_code, stdout): if "Warning - no spectra searched" in stdout: raise RuntimeError("No spectra in mzXML!") if "CometMemAlloc" in stdout: #print to stdout to reach gUSE rescue functionality. ugly, no? print "MemoryError" raise RuntimeError("The job run out of RAM!") check_stdout(log, stdout) if exit_code: log.warn("exit_code is %s", exit_code) mzxml = info[Keys.MZXML] log.warn( "maybe the input file %s does not exist any more. check this !" % mzxml) check_exitcode(log, exit_code) check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) base = os.path.join(info[Keys.WORKDIR], os.path.splitext(os.path.basename(info['FEATURETSV']))[0]) info['MPROPHET_TSV'] = base + "_with_dscore_filtered.csv" validation.check_file(log, info['MPROPHET_TSV']) prophet_stats = [] for end in ["_full_stat.csv", "_scorer.bin", "_weights.txt", "_report.pdf", "_dscores_top_target_peaks.txt", "_dscores_top_decoy_peaks.txt"]: f = base + end if os.path.exists(f): prophet_stats.append(f) if prophet_stats: info['MPROPHET_STATS'] = prophet_stats return info
def validate_run(self, log, info, exit_code, stdout): for line in stdout.splitlines(): # Determined there to be 35792 SWATH windows and in total 6306 MS1 spectra if 'Determined there to be' in line: no_swathes = float(line.split()[4]) if no_swathes > 128: raise RuntimeError('This is a DDA sample, not SWATH!') if 'is below limit of ' in line: raise RuntimeError("iRT calibration failed: " + line) # validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['FEATURETSV']) if os.path.getsize(info['FEATURETSV']) < 1000: raise RuntimeError("No peak found, output is empty!") if 'CHROM_MZML' in info: #don't use check_xml() because of .gz validation.check_file(log, info['CHROM_MZML']) return info
def validate_run(self, log, info, exit_code, out): if "TypeError: expected str or unicode but got <type 'NoneType'>" in out: raise RuntimeError("Dataset is archived. Please unarchive first!") if "traceback" in out.lower(): raise RuntimeError("traceback when talking to openbis: %s" % out) validation.check_exitcode(log, exit_code) missing = [] for line in open(self.rfile): fields = line.strip().rsplit(None, 1) if len(fields) == 2: path = fields[1] if not os.path.exists(path): missing.append(path) executable = info[Keys.EXECUTABLE] if missing: for p in missing: log.error("%s failed for %s" % (executable, p)) raise Exception("files which should be extracted from openbis are missing") #KEY where to store downloaded file paths key = self.default_keys[executable] #VALUE is a list of files or the mzXMLlink dsfls = [] with open(self.rfile) as f: for downloaded in [line.strip() for line in f.readlines()]: ds, fl = downloaded.split("\t") if ds == info[Keys.DATASET_CODE] or ds == info['EXPERIMENT']: dsfls.append(fl) #MZXML is expected only 1 if key == 'MZXML': dsfls = dsfls[0] log.debug("Adding %s to %s" % (dsfls, key)) info[key] = dsfls return info
def validate_run(self, log, info, exit_code, out): if "TypeError: expected str or unicode but got <type 'NoneType'>" in out: raise RuntimeError("Dataset is archived. Please unarchive first!") if "traceback" in out.lower(): raise RuntimeError("traceback when talking to openbis: %s" % out) validation.check_exitcode(log, exit_code) missing = [] for line in open(self.rfile): fields = line.strip().rsplit(None, 1) if len(fields) == 2: path = fields[1] if not os.path.exists(path): missing.append(path) executable = info[Keys.EXECUTABLE] if missing: for p in missing: log.error("%s failed for %s" % (executable, p)) raise Exception( "files which should be extracted from openbis are missing") #KEY where to store downloaded file paths key = self.default_keys[executable] #VALUE is a list of files or the mzXMLlink dsfls = [] with open(self.rfile) as f: for downloaded in [line.strip() for line in f.readlines()]: ds, fl = downloaded.split("\t") if ds == info[Keys.DATASET_CODE] or ds == info['EXPERIMENT']: dsfls.append(fl) #MZXML is expected only 1 if key == 'MZXML': dsfls = dsfls[0] log.debug("Adding %s to %s" % (dsfls, key)) info[key] = dsfls return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) base = os.path.join( info[Keys.WORKDIR], os.path.splitext(os.path.basename(info['FEATURETSV']))[0]) info['MPROPHET_TSV'] = base + "_with_dscore_filtered.csv" validation.check_file(log, info['MPROPHET_TSV']) prophet_stats = [] for end in [ "_full_stat.csv", "_scorer.bin", "_weights.txt", "_report.pdf", "_dscores_top_target_peaks.txt", "_dscores_top_decoy_peaks.txt" ]: f = base + end if os.path.exists(f): prophet_stats.append(f) if prophet_stats: info['MPROPHET_STATS'] = prophet_stats return info
def validate_run(self, log, info, run_code, out): validation.check_exitcode(log,run_code) if info["DB_SOURCE"] == "BioDB": log.info("Database remains " + info["DBASE"]) else: f = open(self.rfile) found = False for line in f.readlines(): #if info['DB_TYPE'].lower in line.lower(): if '.fasta' in line.lower() or '.txt' in line.lower(): info['DBASE'] = line.split()[1] log.info("FASTA database file found " + info["DBASE"]) found = True if '.traml' in line.lower(): info['TRAML'] = line.split()[1] log.info("TraML database file found " + info["TRAML"]) found = True f.close() if not found: raise RuntimeError("No FASTA ('*.fasta') or TraML ('*.traml') database file found in the dataset files: %s" % self.rfile) return info
def validate_run(self, log, info, exit_code, out): if "TypeError: expected str or unicode but got <type 'NoneType'>" in out: raise RuntimeError("Dataset is archived. Please unarchive first!") validation.check_exitcode(log, exit_code) # KEY where to store downloaded file paths default_keys = {"getmsdata": "MZXML", "getexperiment": "SEARCH", "getdataset": "DSSOUT"} key = default_keys[info[Keys.EXECUTABLE]] # VALUE is a list of files or the mzXMLlink dsfls = [] with open(self.rfile) as f: for downloaded in [line.strip() for line in f.readlines()]: ds, fl = downloaded.split("\t") if ds == info[Keys.DATASET_CODE] or ds == info["EXPERIMENT"]: dsfls.append(fl) # MZXML is expected only 1 if key == "MZXML": dsfls = dsfls[0] log.debug("Adding %s to %s" % (dsfls, key)) info[key] = dsfls return info
def validate_run(self, log, info, run_code, out): validation.check_exitcode(log, run_code) if info["DB_SOURCE"] == "BioDB": log.info("Database remains " + info["DBASE"]) else: f = open(self.rfile) found = False for line in f.readlines(): # if info['DB_TYPE'].lower in line.lower(): if ".fasta" in line.lower() or ".txt" in line.lower(): info["DBASE"] = line.split()[1] log.info("Database found " + info["DBASE"]) found = True if ".traml" in line.lower(): info["TRAML"] = line.split()[1] log.info("TraML found " + info["TRAML"]) found = True f.close() if not found: log.error("No matching database (.fasta or .traml) found in dataset!") return info return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['ALIGNMENT_MATRIX']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_stdout(log, stdout) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, run_code, stdout): validation.check_exitcode(log, run_code) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_xml(log, info['PROTXML']) return info
def validate_run(self, log, info, run_code, out): validation.check_exitcode(log, run_code) validation.check_file(log, info['PROTCSV']) validation.check_file(log, info['PEPCSV']) validation.check_xml(log, info['CONSENSUSXML']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) if info.get('DO_CHROMML_REQUANT', "") != "false": validation.check_file(log, info['REQUANT_TSV']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_file(log, info['MAYUOUT']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_stdout(log, stdout) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_stdout(log,stdout) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): for f in info['DUMP_MZXML']: validation.check_file(log, f) validation.check_exitcode(log, exit_code) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['ALIGNMENT_MATRIX']) return info