def validate_run(self, log, info, run_code, out): if "No decoys with label DECOY_ were found" in out: raise RuntimeError("No DECOY_s found in fasta. Please use other fasta!") validation.check_stdout(log, out) validation.check_exitcode(log, run_code) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): if 'max_rt_diff = self._stdev_max_rt_per_run * tr_data.getStdev(source, target)' in stdout: raise RuntimeError("No peptides found which are shared between all runs. Try to increase FDR.") validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['ALIGNMENT_TSV']) validation.check_file(log, info['ALIGNMENT_YAML']) out2log = os.path.join(info[Keys.WORKDIR], "feature_alignment.out.txt") f = open(out2log, "w") f.write(stdout) f.close() info["ALIGNER_STDOUT"] = out2log for line in stdout.splitlines(): if "We were able to quantify " in line: aligned = int(line.split()[13]) before = int(line.split()[19]) if aligned<before/2: log.warn("Much less features after alignment than before!") # Move out .tr files of pyprophet to be rescue safe info["TRAFO_FILES"] = [] for fil in info["MPROPHET_TSV"]: trfile = glob.glob(os.path.dirname(fil) + "/*.tr") if len(trfile) != 1: raise RuntimeError("More than one .tr file for " + fil) basename = os.path.basename(trfile[0]) tgt = os.path.join(info['WORKDIR'], basename) log.debug("Moved tr file %s into WORKDIR" % basename) shutil.move(trfile[0], tgt) info["TRAFO_FILES"].append(tgt) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['SPLIB']) validation.check_file(log, info['TSV']) validation.check_xml(log, info['TRAML']) return info
def validate_run(self, log, info, exit_code, stdout): if "Warning - no spectra searched" in stdout: raise RuntimeError("No spectra in mzXML!") check_stdout(log, stdout) check_exitcode(log, exit_code) check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) for i in info['APMS_OUT']: validation.check_file(log, i) return info
def validate_run(self, log, info, run_code, out): for line in out.splitlines(): if "OpenMS peak type estimation indicates that this is not profile data!" in line: raise RuntimeError("Found centroid data but LFQ must be run on profile mode data!") validation.check_stdout(log, out) validation.check_exitcode(log, run_code) validation.check_xml(log, info['FEATUREXML']) return info
def validate_run(self, log, info, exit_code, out): validation.check_exitcode(log, exit_code) if 'Valid models = 0' in out: raise RuntimeError('No valid model found') validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): # Double check "Spectrast finished ..." if not " without error." in stdout: raise RuntimeError("SpectraST finished with some error!") validation.check_exitcode(log, exit_code) validation.check_file(log, info['SPLIB']) return info
def validate_run(self, log, info, exit_code, stdout): if exit_code == -8: raise RuntimeError("iProphet failed most probably because too few peptides were found in the search before") for line in stdout.splitlines(): if 'fin: error opening' in line: raise RuntimeError("Could not read the input file " + line) validation.check_exitcode(log, exit_code) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) for msg in ['Error:','did not find any InterProphet results in input data!', 'no data - quitting', 'WARNING: No database referenced']: if msg in stdout: raise RuntimeError('ProteinProphet error [%s]' % msg) validation.check_xml(log, info[Keys.PROTXML]) return info
def validate_run(self, log, info, exit_code, stdout): log.debug("Cp validation") # self checked if "No such file" in stdout: raise RuntimeError("Inputfile not found") if "Permission denied" in stdout: raise RuntimeError("Was not allowed to read inputfile. Need more rights") # validation util validation.check_file(log, info["COPY"]) validation.check_exitcode(log, exit_code) return info
def validate_run(self, log, info, exit_code, stdout): check_exitcode(log,exit_code) respiv = pso.concat_all_MHC_outputs(self.outfiles, self.df) imageloc = os.path.join(info[Keys.WORKDIR], 'heatmap.png') self.plot_heatmap(respiv, imageloc) info['NETMHC_OUT'] = os.path.join(info[Keys.WORKDIR], 'netmhccons.output.tsvh') respiv.to_csv(info ['NETMHC_OUT'], sep="\t") # get all the generated outptuts return info
def validate_run(self, log, info, exit_code, stdout): check_exitcode(log, exit_code) weblogoexe = info['WEBLOGO'] weblogoinputs = [] for file in self.outfiles: weblogoinputs += fixStupidGibbsclusterFile(file, file + "out") commands = runweblogo_on_files(weblogoexe, weblogoinputs) self.execute_run(log, info, commands) return info
def validate_run(self, log, info, exit_code, stdout): check_exitcode(log, exit_code) folders = map(os.path.dirname, self.outfiles) ufold = list(set(folders)) info['NETMHC_OUT'] = [] for fold in ufold: files = [x for x in self.outfiles if os.path.dirname(x) in fold] respiv = pso.concat_all_MHC_outputs(files, self.iprophet) outfile = os.path.join(fold, 'netmhccons.output.csv') respiv.to_csv(outfile, sep="\t") info['NETMHC_OUT'] += outfile return info
def validate_run(self, info, exit_code, stdout): logging.debug("Cp validation") #self checked if "No such file" in stdout: raise RuntimeError("Inputfile not found") if "Permission denied" in stdout: raise RuntimeError( "Was not allowed to read inputfile. Need more rights") #validation util validation.check_file(info['COPY']) validation.check_exitcode(exit_code) return info
def validate_run(self, log, info, exit_code, stdout): if info['RUNRT'] == 'True': # Spectrast imports sample *whitout error* when no iRTs are found. Thus look for "Comment:" entries without # iRT= attribute in splib notenough = set() for line in open(info['SPLIB']).readlines(): if "Comment:" in line and not "iRT=" in line: samplename = re.search("RawSpectrum=([^\.]*)\.", line).group(1) notenough.add(samplename) if notenough: log.error("No/not enough iRT peptides found in sample(s): " + ", ".join(notenough)) #when irt.txt not readable: PEPXML IMPORT: Cannot read landmark table. No RT normalization will be performed. rtcalibfailed = False for line in open(info['SPLOG']).readlines(): if "Cannot read landmark table" in line: log.error("Problem with reading rtkit file %s!"%info['RTKIT']) rtcalibfailed = True # Parse logfile to see whether R^2 is high enough. Example log for failed calibration (line 3 only when <0.9): # PEPXML IMPORT: RT normalization by linear regression. Found 10 landmarks in MS run "CHLUD_L110830_21". # PEPXML_IMPORT: Final fitted equation: iRT = (rRT - 1758) / (8.627); R^2 = 0.5698; 5 outliers removed. # ERROR PEPXML_IMPORT: R^2 still too low at required coverage. No RT normalization performed. Consider... rsqlow = False for line in open(info['SPLOG']).readlines(): if "Final fitted equation:" in line: samplename = prevline.strip().split(" ")[-1] rsq = line.split()[-4].replace(";", "") if float(rsq) < float(info['RSQ_THRESHOLD']): log.error( "R^2 of %s is below threshold of %s for %s" % (rsq, info['RSQ_THRESHOLD'], samplename)) rsqlow = True else: log.debug("R^2 of %s is OK for %s" % (rsq, samplename)) else: prevline = line # Raise only here to have all errors shown if rsqlow or rtcalibfailed or notenough: raise RuntimeError("Error in iRT calibration.") # Double check "Spectrast finished ..." if not " without error." in stdout: raise RuntimeError("SpectraST finished with some error!") validation.check_exitcode(log, exit_code) validation.check_file(log, info['SPLIB']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) base = os.path.join(info[Keys.WORKDIR], os.path.splitext(os.path.basename(info['FEATURETSV']))[0]) info['MPROPHET_TSV'] = base + "_with_dscore_filtered.csv" validation.check_file(log, info['MPROPHET_TSV']) prophet_stats = [] for end in ["_full_stat.csv", "_scorer.bin", "_weights.txt", "_report.pdf", "_dscores_top_target_peaks.txt", "_dscores_top_decoy_peaks.txt"]: f = base + end if os.path.exists(f): prophet_stats.append(f) if prophet_stats: info['MPROPHET_STATS'] = prophet_stats return info
def validate_run(self, log, info, exit_code, stdout): for line in stdout.splitlines(): # Determined there to be 35792 SWATH windows and in total 6306 MS1 spectra if 'Determined there to be' in line: no_swathes = float(line.split()[4]) if no_swathes > 128: raise RuntimeError('This is a DDA sample, not SWATH!') if 'is below limit of ' in line: raise RuntimeError("iRT calibration failed: " + line) # validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['FEATURETSV']) if os.path.getsize(info['FEATURETSV']) < 1000: raise RuntimeError("No peak found, output is empty!") if 'CHROM_MZML' in info: #don't use check_xml() because of .gz validation.check_file(log, info['CHROM_MZML']) return info
def validate_run(self, log, info, run_code, out): validation.check_exitcode(log,run_code) if info["DB_SOURCE"] == "BioDB": log.info("Database remains " + info["DBASE"]) else: f = open(self.rfile) found = False for line in f.readlines(): #if info['DB_TYPE'].lower in line.lower(): if '.fasta' in line.lower() or '.txt' in line.lower(): info['DBASE'] = line.split()[1] log.info("Database found " + info["DBASE"]) found = True if '.traml' in line.lower(): info['TRAML'] = line.split()[1] log.info("TraML found " + info["TRAML"]) found = True f.close() if not found: log.error("No matching database (.fasta or .traml) found in dataset!") return info return info
def validate_run(self, log, info, exit_code, out): if "TypeError: expected str or unicode but got <type 'NoneType'>" in out: raise RuntimeError("Dataset is archived. Please unarchive first!") validation.check_exitcode(log, exit_code) #KEY where to store downloaded file paths default_keys = {'getmsdata': 'MZXML', 'getexperiment': 'SEARCH', 'getdataset': 'DSSOUT'} key = default_keys[info[Keys.EXECUTABLE]] #VALUE is a list of files or the mzXMLlink dsfls = [] with open(self.rfile) as f: for downloaded in [line.strip() for line in f.readlines()]: ds, fl = downloaded.split("\t") if ds == info[Keys.DATASET_CODE] or ds == info['EXPERIMENT']: dsfls.append(fl) #MZXML is expected only 1 if key == 'MZXML': dsfls = dsfls[0] log.debug("Adding %s to %s" % (dsfls, key)) info[key] = dsfls return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_xml(log, info['PROTXML']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log, stdout) validation.check_exitcode(log, exit_code) if info.get('DO_CHROMML_REQUANT', "") != "false": validation.check_file(log, info['REQUANT_TSV']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_stdout(log,stdout) validation.check_xml(log, info[Keys.PEPXML]) return info
def validate_run(cls, info, exit_code, out): """validate the run.""" if out: logging.debug("out set, not used: %s", out) validation.check_exitcode(exit_code) return info
def validate_run(self, log, info, run_code, stdout): validation.check_exitcode(log, run_code) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log,exit_code) validation.check_file(log,info['TRAML_CSV']) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_exitcode(log, exit_code) validation.check_stdout(log, stdout) return info
def validate_run(self, info, exit_code, stdout): validation.check_file(info['FILE']) validation.check_exitcode(exit_code) return info
def validate_run(cls, info, exit_code, stdout): """Validate the run.""" validation.check_exitcode(exit_code) validation.check_stdout(stdout) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_file(log, info['FILE']) validation.check_exitcode(log, exit_code) return info
def validate_run(self, log, info, exit_code, stdout): validation.check_stdout(log,stdout) validation.check_exitcode(log, exit_code) validation.check_file(log, info['ALIGNMENT_MATRIX']) return info