def scan(language, file_manifest, source_file_names): # Determine the files to scan. If no files are given, use a default manifest. if len(source_file_names) == 0 and file_manifest is None: file_manifest = manifest.default_manifest(language) source_file_names = set(source_file_names) if file_manifest is not None: source_file_names.update(set(manifest.contents(file_manifest))) supplier = ast_suppliers.abstract_syntax_tree_suppliers[language] # TODO: Configuration files! parameters = Parameters() parameters.distance_threshold = supplier.distance_threshold parameters.size_threshold = supplier.size_threshold source_files = [] report = Report(parameters) def parse_file(file_name): try: logging.info('Parsing ' + file_name + '...') source_file = supplier(file_name, parameters) source_file.getTree().propagateCoveredLineNumbers() source_file.getTree().propagateHeight() source_files.append(source_file) report.addFileName(file_name) logging.info('done') except: logging.warn('Can\'t parse "%s" \n: ' % (file_name,) + traceback.format_exc()) for file_name in source_file_names: parse_file(file_name) duplicates = clone_detection_algorithm.findDuplicateCode(source_files, report) n = 1 for duplicate in duplicates: distance = duplicate.calcDistance() summary = CloneSummary( "Clone #"+str(n), [ # TODO: This is a mess! Most of this info should be assembled on the fly and in member functions. Snippet( duplicate[i].getSourceFile()._file_name, duplicate[i].getCoveredLineNumbers(), '\n'.join([line for line in duplicate[i].getSourceLines()]) ) for i in [0, 1]], distance) report.addClone(summary) n += 1 report.sortByCloneSize() save_report(".orphanblack", report)
def generate_report(param): """ Either generates a report or saves the report. Get's hold of all the variables and passes them along to the generate_report function """ number=int(param.get('number',[''])[0]) items=[] title=param.get('title',[''])[0] for i in range(1,number+1): stat_type=param.get('type'+str(i),[''])[0] if stat_type=='count': chart_type=param.get('counttype'+str(i),[''])[0] start=param.get('countstart'+str(i)+'_year',[''])[0]+'-'+param.get('countstart'+str(i)+'_month',[''])[0]+'-'+param.get('countstart'+str(i)+'_day',[''])[0] end=param.get('countend'+str(i)+'_year',[''])[0]+'-'+param.get('countend'+str(i)+'_month',[''])[0]+'-'+param.get('countend'+str(i)+'_day',[''])[0] group=param.get('countgroup'+str(i),['']) cutoff=param.get('countcutoff'+str(i),['']) calc=param.get('countcalc'+str(i),['']) items.append({'type':'count','start':start,'end':end,'cutoff':cutoff,'calculation':calc,'group':group,'chart_type':chart_type}) elif stat_type=='scatter': variables=param.get('scattervariables'+str(i),['']) calc=param.get('scattercalc'+str(i),['']) items.append({'type':'scatter','variables':variables,'calculation':calc}) elif stat_type=='compare': group=param.get('comparegroup'+str(i),['']) variable=param.get('comparevariables'+str(i),[''])[0] calc=param.get('comparecalc'+str(i),['']) calcvariable=param.get('comparecalcvariable'+str(i),[''])[0] cutoff=param.get('comparecutoff'+str(i),['']) items.append({'type':'compare','variable':variable,'group':group,'calcvariable':calcvariable,'calculation':calc,'cutoff':cutoff}) action=param.get('action',[''])[0] if action=='save': report.save_report(title,items) return pdf(param) elif action=='generate': content=report.generate_report(title,items) return ('file',(content,title))
def save_results(self, result, extra_writes, out_pathes): summ_stat_path = extra_writes.get("summ_stat_path") if summ_stat_path is not None: result.summary_statistics.to_csv(summ_stat_path, self.delim_out, index=False) print "WRITTEN: ", summ_stat_path full_stat_path = extra_writes.get("full_stat_path") if full_stat_path is not None: result.final_statistics.to_csv(full_stat_path, sep=self.delim_out, index=False) print "WRITTEN: ", full_stat_path for scored_table, out_path in zip(result.scored_tables, out_pathes): cutoff = CONFIG.get("d_score.cutoff") scored_table.to_csv(out_path.scored_table, out_path.filtered_table, cutoff, sep=self.delim_out, index=False) print "WRITTEN: ", out_path.scored_table print "WRITTEN: ", out_path.filtered_table if result.final_statistics is not None: cutoffs = result.final_statistics["cutoff"].values svalues = result.final_statistics["svalue"].values qvalues = result.final_statistics["qvalue"].values decoys, targets, top_decoys, top_targets = scored_table.scores() plot_data = save_report( out_path.report, self.prefix, decoys, targets, top_decoys, top_targets, cutoffs, svalues, qvalues) print "WRITTEN: ", out_path.report cutoffs, svalues, qvalues, top_targets, top_decoys = plot_data for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues), ("d_scores_top_target_peaks", top_targets), ("d_scores_top_decoy_peaks", top_decoys)]: path = out_path[name] with open(path, "w") as fp: fp.write(" ".join("%e" % v for v in values)) print "WRITTEN: ", path if CONFIG.get("export.mayu"): if result.final_statistics: export_mayu(out_pathes.mayu_cutoff, out_pathes.mayu_fasta, out_pathes.mayu_csv, scored_table, result.final_statistics) print "WRITTEN: ", out_pathes.mayu_cutoff print "WRITTEN: ", out_pathes.mayu_fasta print "WRITTEN: ", out_pathes.mayu_csv else: logging.warn("can not write mayu table in this case")
def save_results(self, result, extra_writes, out_pathes, pvalues): summ_stat_path = extra_writes.get("summ_stat_path") if summ_stat_path is not None: result.summary_statistics.to_csv(summ_stat_path, self.delim_out, index=False) print "WRITTEN: ", summ_stat_path full_stat_path = extra_writes.get("full_stat_path") if full_stat_path is not None: result.final_statistics.to_csv(full_stat_path, sep=self.delim_out, index=False) print "WRITTEN: ", full_stat_path for input_path, scored_table, out_path in zip(self.pathes, result.scored_tables, out_pathes): cutoff = CONFIG.get("d_score.cutoff") scored_table.to_csv(out_path.scored_table, out_path.filtered_table, cutoff, sep=self.delim_out, index=False) print "WRITTEN: ", out_path.scored_table print "WRITTEN: ", out_path.filtered_table if CONFIG.get("rewrite_sqmass"): # get basepath basepath = input_path.split(".tsv")[0] basepath = basepath.split(".txt")[0] basepath = basepath.split(".csv")[0] # try to find a matching sqMass file sqmass_file = None if os.path.exists(basepath + ".chrom.sqMass"): sqmass_file = basepath + ".chrom.sqMass" elif os.path.exists(basepath + ".sqMass"): sqmass_file = basepath + ".sqMass" # get selected chromatograms on the filtered table df = scored_table.df[scored_table.df.d_score > cutoff] fragment_anno = df.aggr_Fragment_Annotation.unique() prec_anno = df.aggr_prec_Fragment_Annotation.unique() labels = [] for l in fragment_anno: labels.extend(l.split(";")) for l in prec_anno: labels.extend(l.split(";")) filterChromByLabels(sqmass_file, out_path.filtered_chroms, labels) if result.final_statistics is not None: cutoffs = result.final_statistics["cutoff"].values svalues = result.final_statistics["svalue"].values qvalues = result.final_statistics["qvalue"].values # pvalues = result.final_statistics["pvalue"].values decoys, targets, top_decoys, top_targets = scored_table.scores( ) lambda_ = CONFIG.get("final_statistics.lambda") plot_data = save_report(out_path.report, self.prefix, decoys, targets, top_decoys, top_targets, cutoffs, svalues, qvalues, pvalues, lambda_) print "WRITTEN: ", out_path.report cutoffs, svalues, qvalues, top_targets, top_decoys = plot_data for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues), ("d_scores_top_target_peaks", top_targets), ("d_scores_top_decoy_peaks", top_decoys)]: path = out_path[name] with open(path, "w") as fp: fp.write(" ".join("%e" % v for v in values)) print "WRITTEN: ", path if CONFIG.get("export.mayu"): if result.final_statistics is not None: export_mayu(out_pathes[0]['mayu_cutoff'], out_pathes[0]['mayu_fasta'], out_pathes[0]['mayu_csv'], scored_table, result.final_statistics) print "WRITTEN: ", out_pathes[0]['mayu_cutoff'] print "WRITTEN: ", out_pathes[0]['mayu_fasta'] print "WRITTEN: ", out_pathes[0]['mayu_csv'] else: logging.warn("can not write mayu table in this case")
def _main(args): options = dict() path = None if "--help" in args: print_help() return if "--version" in args: print_version() return for arg in args: if arg.startswith("--"): if "=" in arg: pre, __, post = arg.partition("=") options[pre[2:]] = post else: options[arg[2:]] = True else: if path is not None: print_help() raise Exception("duplicate input file argument") path = arg if path is None: print_help() raise Exception("no input file given") CONFIG, info = standard_config() CONFIG.update(options) fix_config_types(CONFIG) dump_config(CONFIG) delim_in = CONFIG.get("delim.in", ",") delim_out = CONFIG.get("delim.out", ",") dirname = CONFIG.get("target.dir", None) if dirname is None: dirname = os.path.dirname(path) basename = os.path.basename(path) prefix, __ = os.path.splitext(basename) persisted_scorer = None apply_scorer = CONFIG.get("apply_scorer") if apply_scorer: if not os.path.exists(apply_scorer): raise Exception("scorer file %s does not exist" % apply_scorer) try: persisted_scorer = cPickle.loads(zlib.decompress(open(apply_scorer, "rb").read())) except: import traceback traceback.print_exc() raise apply_existing_scorer = persisted_scorer is not None persisted_weights = None apply_weights = CONFIG.get("apply_weights") if apply_weights: if not os.path.exists(apply_weights): raise Exception("weights file %s does not exist" % apply_weights) try: persisted_weights = np.loadtxt(apply_weights) except: import traceback traceback.print_exc() raise apply_existing_weights = persisted_weights is not None class Pathes(dict): def __init__(self, prefix=prefix, dirname=dirname, **kw): for k, postfix in kw.items(): self[k] = os.path.join(dirname, prefix + postfix) __getattr__ = dict.__getitem__ pathes = Pathes(scored_table="_with_dscore.csv", filtered_table="_with_dscore_filtered.csv", final_stat="_full_stat.csv", summ_stat="_summary_stat.csv", report="_report.pdf", cutoffs="_cutoffs.txt", svalues="_svalues.txt", qvalues="_qvalues.txt", d_scores_top_target_peaks="_dscores_top_target_peaks.txt", d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt", mayu_cutoff="_mayu.cutoff", mayu_fasta="_mayu.fasta", mayu_csv="_mayu.csv", ) if not apply_existing_scorer: pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin") if not apply_existing_weights: trained_weights_path = os.path.join(dirname, prefix + "_weights.txt") if not CONFIG.get("target.overwrite", False): found_exsiting_file = False to_check = list(pathes.keys()) if not apply_existing_scorer: to_check.append(pickled_scorer_path) if not apply_existing_weights: to_check.append(trained_weights_path) for p in to_check: if os.path.exists(p): found_exsiting_file = True print "ERROR: %s already exists" % p if found_exsiting_file: print print "please use --target.overwrite option" print return format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s" logging.basicConfig(level=logging.INFO, format=format_) logging.info("config settings:") for k, v in sorted(CONFIG.items()): logging.info(" %s: %s" % (k, v)) start_at = time.time() with warnings.catch_warnings(): warnings.simplefilter("ignore") result, needed_to_persist, trained_weights = PyProphet().process_csv(path, delim_in, persisted_scorer, persisted_weights) (summ_stat, final_stat, scored_table) = result needed = time.time() - start_at print print "=" * 78 print print summ_stat print print "=" * 78 print if summ_stat is not None: summ_stat.to_csv(pathes.summ_stat, sep=delim_out, index=False) print "WRITTEN: ", pathes.summ_stat if final_stat is not None: final_stat.to_csv(pathes.final_stat, sep=delim_out, index=False) print "WRITTEN: ", pathes.final_stat plot_data = save_report(pathes.report, basename, scored_table, final_stat) print "WRITTEN: ", pathes.report cutoffs, svalues, qvalues, top_target, top_decoys = plot_data for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues), ("d_scores_top_target_peaks", top_target), ("d_scores_top_decoy_peaks", top_decoys)]: path = pathes[name] with open(path, "w") as fp: fp.write(" ".join("%e" % v for v in values)) print "WRITTEN: ", path scored_table.to_csv(pathes.scored_table, sep=delim_out, index=False) print "WRITTEN: ", pathes.scored_table filtered_table = scored_table[scored_table.d_score > CONFIG.get("d_score.cutoff")] filtered_table.to_csv(pathes.filtered_table, sep=delim_out, index=False) print "WRITTEN: ", pathes.filtered_table if not apply_existing_scorer: bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2)) with open(pickled_scorer_path, "wb") as fp: fp.write(bin_data) print "WRITTEN: ", pickled_scorer_path if not apply_existing_weights: np.savetxt(trained_weights_path,trained_weights,delimiter="\t") print "WRITTEN: ", trained_weights_path if CONFIG.get("export.mayu", True): export_mayu(pathes.mayu_cutoff, pathes.mayu_fasta, pathes.mayu_csv, scored_table, final_stat) print "WRITTEN: ", pathes.mayu_cutoff print "WRITTEN: ", pathes.mayu_fasta print "WRITTEN: ", pathes.mayu_csv print seconds = int(needed) msecs = int(1000 * (needed - seconds)) minutes = int(needed / 60.0) print "NEEDED", if minutes: print minutes, "minutes and", print "%d seconds and %d msecs wall time" % (seconds, msecs) print
def _main(args): options = dict() path = None print "PyProphet, unified edition" if "--help" in args: print_help() return if "--version" in args: print_version() return def USER_ERROR(str): print "USER ERROR:", str for arg in args: if arg.startswith("--"): if "=" in arg: pre, __, post = arg.partition("=") options[pre[2:]] = post else: options[arg[2:]] = True else: if path is not None: print_help() USER_ERROR("duplicate input file argument") sys.exit(EX_USAGE) path = arg if path is None: print_help() USER_ERROR("no input file given") sys.exit(EX_USAGE) CONFIG, info = standard_config() invalid_params = get_invalid_params(CONFIG, options) if len(invalid_params) > 0: print_help() for p in invalid_params: USER_ERROR("invalid parameter '%s'" % p) sys.exit(EX_CONFIG) CONFIG.update(options) fix_config_types(CONFIG) dump_config(CONFIG) delim_in = CONFIG.get("delim.in", ",") delim_out = CONFIG.get("delim.out", ",") dirname = CONFIG.get("target.dir", None) if dirname is None: dirname = os.path.dirname(path) basename = os.path.basename(path) prefix, __ = os.path.splitext(basename) persisted_scorer = None apply_scorer = CONFIG.get("apply_scorer") if apply_scorer: if not os.path.exists(apply_scorer): USER_ERROR("scorer file %s does not exist" % apply_scorer) sys.exit(EX_CONFIG) try: persisted_scorer = cPickle.loads(zlib.decompress(open(apply_scorer, "rb").read())) except: import traceback traceback.print_exc() raise # print "## SCORER PATH: ", apply_scorer # print "## PERSISTED SCORER: ", persisted_scorer apply_existing_scorer = persisted_scorer is not None if not apply_existing_scorer: pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin") persisted_weights = None apply_weights = CONFIG.get("apply_weights") if apply_weights: if not os.path.exists(apply_weights): USER_ERROR("weights file %s does not exist" % apply_weights) sys.exit(EX_CONFIG) try: persisted_weights = np.loadtxt(apply_weights) except: import traceback traceback.print_exc() raise apply_existing_weights = persisted_weights is not None if not apply_existing_weights: trained_weights_path = os.path.join(dirname, prefix + "_weights.txt") class Paths(dict): def __init__(self, prefix=prefix, dirname=dirname, **kw): for k, postfix in kw.items(): self[k] = os.path.join(dirname, prefix + postfix) __getattr__ = dict.__getitem__ paths = Paths(scored_table="_with_dscore.csv", filtered_table="_with_dscore_filtered.csv", output="_output.csv", final_stat="_full_stat.csv", summ_stat="_summary_stat.csv", report="_report.pdf", cutoffs="_cutoffs.txt", svalues="_svalues.txt", d_scores_top_target_peaks="_dscores_top_target_peaks.txt", d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt", mayu_cutoff="_mayu.cutoff", mayu_fasta="_mayu.fasta", mayu_csv="_mayu.csv", ) if not CONFIG.get("target.overwrite", False): found_existing_file = False to_check = list(paths.keys()) if not apply_existing_scorer: to_check.append(pickled_scorer_path) if not apply_existing_weights: to_check.append(trained_weights_path) for p in to_check: if os.path.exists(p): found_existing_file = True print "OUTPUT ERROR: %s already exists" % p if found_existing_file: print print "please use --target.overwrite option" print sys.exit(EX_CANTCREAT) format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s" logging.basicConfig(level=logging.INFO, format=format_) logging.info("config settings:") for k, v in sorted(CONFIG.items()): logging.info(" %s: %s" % (k, v)) start_at = time.time() with warnings.catch_warnings(): warnings.simplefilter("ignore") classifierType = CONFIG.get("classifier.type") if classifierType == "LDA": classifier = LDALearner elif classifierType == "SGD": classifier = SGDLearner elif classifierType == "linSVM": classifier = LinearSVMLearner elif classifierType == "rbfSVM": classifier = RbfSVMLearner elif classifierType == "polySVM": classifier = PolySVMLearner elif classifierType == "logit": classifier = LogitLearner else: USER_ERROR("classifier '%s' is not supported" % classifierType) sys.exit(EX_CONFIG) method = HolyGostQuery(StandardSemiSupervisedTeacher(classifier)) result_tables, clfs_df, needed_to_persist, trained_weights = method.process_csv(path, delim_in, persisted_scorer, persisted_weights) needed = time.time() - start_at train_frac = CONFIG.get("train.fraction") def printSumTable(str, df): with warnings.catch_warnings(): warnings.filterwarnings("ignore",category=DeprecationWarning) if df is not None: print str print df[df.qvalue < 0.21][['qvalue', 'TP', 'cutoff']] print print "=" * 78 print "%d%% of data used for training" % (train_frac*100) print "'" * 78 print #for k in result_dict.iterkeys(): printSumTable(k, result_tables[0]) print print "=" * 78 print if not CONFIG.get("no.file.output"): summ_stat, final_stat, scored_table = result_tables #if 'true_normal' in result_tables: # summ_statT, final_statT, scored_tableT = result_tables['true_normal'] # summ_stat.to_csv(paths.summ_stat, sep=delim_out, index=False) # print "WRITTEN: ", paths.summ_stat # plot_data = save_report(paths.reportT, basename, scored_tableT, final_statT) # print "WRITTEN: ", paths.report if summ_stat is not None: summ_stat.to_csv(paths.summ_stat, sep=delim_out, index=False) print "WRITTEN: ", paths.summ_stat if final_stat is not None: plot_data = save_report(paths.report, basename, scored_table, final_stat) print "WRITTEN: ", paths.report if True: #CONFIG.get("all.output"): final_stat.to_csv(paths.final_stat, sep=delim_out, index=False) print "WRITTEN: ", paths.final_stat cutoffs, svalues, qvalues, top_target, top_decoys = plot_data for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues), ("d_scores_top_target_peaks", top_target), ("d_scores_top_decoy_peaks", top_decoys)]: path = paths[name] with open(path, "w") as fp: fp.write(" ".join("%e" % v for v in values)) print "WRITTEN: ", path if clfs_df is not None and CONFIG.get("all.output"): clfs_df.to_csv("clfs.csv", sep=delim_out, index=False) print "WRITTEN: ", "clfs.csv" scored_table.to_csv(paths.scored_table, sep=delim_out, index=False) print "WRITTEN: ", paths.scored_table output = scored_table.rename(columns = {"d_score" : "pyProph_score", "m_score" : "qvalue"}) output.to_csv(paths.output, sep=delim_out, index=False) print "WRITTEN: ", paths.output filtered_table = scored_table[scored_table.d_score > CONFIG.get("d_score.cutoff")] filtered_table.to_csv(paths.filtered_table, sep=delim_out, index=False) print "WRITTEN: ", paths.filtered_table if not apply_existing_scorer: # and CONFIG.get("all.output"): bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2)) with open(pickled_scorer_path, "wb") as fp: fp.write(bin_data) print "WRITTEN: ", pickled_scorer_path if not apply_existing_weights: np.savetxt(trained_weights_path,trained_weights,delimiter="\t") print "WRITTEN: ", trained_weights_path if CONFIG.get("export.mayu", True): export_mayu(paths.mayu_cutoff, paths.mayu_fasta, paths.mayu_csv, scored_table, final_stat) print "WRITTEN: ", paths.mayu_cutoff print "WRITTEN: ", paths.mayu_fasta print "WRITTEN: ", paths.mayu_csv print print "NEEDED %s wall time" % (nice_time(needed)) print
def _main(args): options = dict() path = None if "--help" in args: print_help() return if "--version" in args: print_version() return for arg in args: if arg.startswith("--"): if "=" in arg: pre, __, post = arg.partition("=") options[pre[2:]] = post else: options[arg[2:]] = True else: if path is not None: print_help() raise Exception("duplicate input file argument") path = arg if path is None: print_help() raise Exception("no input file given") CONFIG, info = standard_config() CONFIG.update(options) fix_config_types(CONFIG) dump_config(CONFIG) delim_in = CONFIG.get("delim.in", ",") delim_out = CONFIG.get("delim.out", ",") dirname = CONFIG.get("target.dir", None) if dirname is None: dirname = os.path.dirname(path) basename = os.path.basename(path) prefix, __ = os.path.splitext(basename) persisted = None apply_ = CONFIG.get("apply") if apply_: if not os.path.exists(apply_): raise Exception("scorer file %s does not exist" % apply_) try: persisted = cPickle.loads(zlib.decompress(open(apply_, "rb").read())) except: import traceback traceback.print_exc() raise apply_existing_scorer = persisted is not None class Pathes(dict): def __init__(self, prefix=prefix, dirname=dirname, **kw): for k, postfix in kw.items(): self[k] = os.path.join(dirname, prefix + postfix) __getattr__ = dict.__getitem__ pathes = Pathes(scored_table="_with_dscore.csv", final_stat="_full_stat.csv", summ_stat="_summary_stat.csv", report="_report.pdf", cutoffs="_cutoffs.txt", svalues="_svalues.txt", qvalues="_qvalues.txt", d_scores_top_target_peaks="_dscores_top_target_peaks.txt", d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt", mayu_cutoff="_mayu.cutoff", mayu_fasta="_mayu.fasta", mayu_csv="_mayu.csv", ) if not apply_existing_scorer: pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin") if not CONFIG.get("target.overwrite", False): found_exsiting_file = False to_check = list(pathes.keys()) if not apply_existing_scorer: to_check.append(pickled_scorer_path) for p in to_check: if os.path.exists(p): found_exsiting_file = True print "ERROR: %s already exists" % p if found_exsiting_file: print print "please use --target.overwrite option" print return format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s" logging.basicConfig(level=logging.INFO, format=format_) logging.info("config settings:") for k, v in sorted(CONFIG.items()): logging.info(" %s: %s" % (k, v)) start_at = time.time() with warnings.catch_warnings(): warnings.simplefilter("ignore") result, needed_to_persist = PyProphet().process_csv(path, delim_in, persisted) (summ_stat, final_stat, scored_table) = result needed = time.time() - start_at print print "=" * 78 print print summ_stat print print "=" * 78 print if summ_stat is not None: summ_stat.to_csv(pathes.summ_stat, sep=delim_out, index=False) print "WRITTEN: ", pathes.summ_stat if final_stat is not None: final_stat.to_csv(pathes.final_stat, sep=delim_out, index=False) print "WRITTEN: ", pathes.final_stat plot_data = save_report(pathes.report, basename, scored_table, final_stat) print "WRITTEN: ", pathes.report cutoffs, svalues, qvalues, top_target, top_decoys = plot_data for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues), ("d_scores_top_target_peaks", top_target), ("d_scores_top_decoy_peaks", top_decoys)]: path = pathes[name] with open(path, "w") as fp: fp.write(" ".join("%e" % v for v in values)) print "WRITTEN: ", path scored_table.to_csv(pathes.scored_table, sep=delim_out, index=False) print "WRITTEN: ", pathes.scored_table if not apply_existing_scorer: bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2)) with open(pickled_scorer_path, "wb") as fp: fp.write(bin_data) print "WRITTEN: ", pickled_scorer_path if CONFIG.get("export.mayu", True): export_mayu(pathes.mayu_cutoff, pathes.mayu_fasta, pathes.mayu_csv, scored_table, final_stat) print "WRITTEN: ", pathes.mayu_cutoff print "WRITTEN: ", pathes.mayu_fasta print "WRITTEN: ", pathes.mayu_csv print seconds = int(needed) msecs = int(1000 * (needed - seconds)) minutes = int(needed / 60.0) print "NEEDED", if minutes: print minutes, "minutes and", print "%d seconds and %d msecs wall time" % (seconds, msecs) print