Example #1
0
def scan(language, file_manifest, source_file_names):

  # Determine the files to scan. If no files are given, use a default manifest.
  if len(source_file_names) == 0 and file_manifest is None:
    file_manifest = manifest.default_manifest(language)

  source_file_names = set(source_file_names)
  if file_manifest is not None:
    source_file_names.update(set(manifest.contents(file_manifest)))

  supplier = ast_suppliers.abstract_syntax_tree_suppliers[language]

  # TODO: Configuration files!
  parameters = Parameters()
  parameters.distance_threshold = supplier.distance_threshold
  parameters.size_threshold = supplier.size_threshold

  source_files = []

  report = Report(parameters)

  def parse_file(file_name):
    try:
      logging.info('Parsing ' + file_name + '...')
      source_file = supplier(file_name, parameters)
      source_file.getTree().propagateCoveredLineNumbers()
      source_file.getTree().propagateHeight()
      source_files.append(source_file)
      report.addFileName(file_name)
      logging.info('done')
    except:
      logging.warn('Can\'t parse "%s" \n: ' % (file_name,) + traceback.format_exc())

  for file_name in source_file_names:
      parse_file(file_name)

  duplicates = clone_detection_algorithm.findDuplicateCode(source_files, report)
  n = 1
  for duplicate in duplicates:
    distance = duplicate.calcDistance()
    summary = CloneSummary(
      "Clone #"+str(n),
      [  # TODO: This is a mess! Most of this info should be assembled on the fly and in member functions.
       Snippet(
        duplicate[i].getSourceFile()._file_name,
        duplicate[i].getCoveredLineNumbers(),
        '\n'.join([line for line in duplicate[i].getSourceLines()])
        ) for i in [0, 1]], distance)
    report.addClone(summary)
    n += 1
  report.sortByCloneSize()

  save_report(".orphanblack", report)
Example #2
0
def generate_report(param):
    """
    Either generates a report or saves the report.
    Get's hold of all the variables and passes them along to the generate_report function
    """
    number=int(param.get('number',[''])[0])
    items=[]
    title=param.get('title',[''])[0]
    for i in range(1,number+1):
        stat_type=param.get('type'+str(i),[''])[0]
        if stat_type=='count':
            chart_type=param.get('counttype'+str(i),[''])[0]
            start=param.get('countstart'+str(i)+'_year',[''])[0]+'-'+param.get('countstart'+str(i)+'_month',[''])[0]+'-'+param.get('countstart'+str(i)+'_day',[''])[0]
            end=param.get('countend'+str(i)+'_year',[''])[0]+'-'+param.get('countend'+str(i)+'_month',[''])[0]+'-'+param.get('countend'+str(i)+'_day',[''])[0]

            group=param.get('countgroup'+str(i),[''])
            cutoff=param.get('countcutoff'+str(i),[''])
            calc=param.get('countcalc'+str(i),[''])
            items.append({'type':'count','start':start,'end':end,'cutoff':cutoff,'calculation':calc,'group':group,'chart_type':chart_type})
        elif stat_type=='scatter':
            variables=param.get('scattervariables'+str(i),[''])
            calc=param.get('scattercalc'+str(i),[''])
            items.append({'type':'scatter','variables':variables,'calculation':calc})
        elif stat_type=='compare':
            group=param.get('comparegroup'+str(i),[''])
            variable=param.get('comparevariables'+str(i),[''])[0]
            calc=param.get('comparecalc'+str(i),[''])
            calcvariable=param.get('comparecalcvariable'+str(i),[''])[0]
            cutoff=param.get('comparecutoff'+str(i),[''])
            items.append({'type':'compare','variable':variable,'group':group,'calcvariable':calcvariable,'calculation':calc,'cutoff':cutoff})
    


    action=param.get('action',[''])[0]
    if action=='save':
        report.save_report(title,items)
        return pdf(param)
    elif action=='generate':
        content=report.generate_report(title,items)
        return ('file',(content,title))
Example #3
0
    def save_results(self, result, extra_writes, out_pathes):
        summ_stat_path = extra_writes.get("summ_stat_path")
        if summ_stat_path is not None:
            result.summary_statistics.to_csv(summ_stat_path, self.delim_out, index=False)
            print "WRITTEN: ", summ_stat_path

        full_stat_path = extra_writes.get("full_stat_path")
        if full_stat_path is not None:
            result.final_statistics.to_csv(full_stat_path, sep=self.delim_out, index=False)
            print "WRITTEN: ", full_stat_path

        for scored_table, out_path in zip(result.scored_tables, out_pathes):

            cutoff = CONFIG.get("d_score.cutoff")
            scored_table.to_csv(out_path.scored_table, out_path.filtered_table, cutoff, sep=self.delim_out, index=False)
            print "WRITTEN: ", out_path.scored_table
            print "WRITTEN: ", out_path.filtered_table

            if result.final_statistics is not None:

                cutoffs = result.final_statistics["cutoff"].values
                svalues = result.final_statistics["svalue"].values
                qvalues = result.final_statistics["qvalue"].values
                decoys, targets, top_decoys, top_targets = scored_table.scores()
                plot_data = save_report(
                    out_path.report, self.prefix, decoys, targets, top_decoys, top_targets, cutoffs, svalues, qvalues)
                print "WRITTEN: ", out_path.report

                cutoffs, svalues, qvalues, top_targets, top_decoys = plot_data
                for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues),
                                       ("d_scores_top_target_peaks", top_targets),
                                       ("d_scores_top_decoy_peaks", top_decoys)]:
                    path = out_path[name]
                    with open(path, "w") as fp:
                        fp.write(" ".join("%e" % v for v in values))
                    print "WRITTEN: ", path

            if CONFIG.get("export.mayu"):
                if result.final_statistics:
                    export_mayu(out_pathes.mayu_cutoff, out_pathes.mayu_fasta,
                                out_pathes.mayu_csv, scored_table, result.final_statistics)
                    print "WRITTEN: ", out_pathes.mayu_cutoff
                    print "WRITTEN: ", out_pathes.mayu_fasta
                    print "WRITTEN: ", out_pathes.mayu_csv
                else:
                    logging.warn("can not write mayu table in this case")
Example #4
0
    def save_results(self, result, extra_writes, out_pathes, pvalues):
        summ_stat_path = extra_writes.get("summ_stat_path")
        if summ_stat_path is not None:
            result.summary_statistics.to_csv(summ_stat_path,
                                             self.delim_out,
                                             index=False)
            print "WRITTEN: ", summ_stat_path

        full_stat_path = extra_writes.get("full_stat_path")
        if full_stat_path is not None:
            result.final_statistics.to_csv(full_stat_path,
                                           sep=self.delim_out,
                                           index=False)
            print "WRITTEN: ", full_stat_path

        for input_path, scored_table, out_path in zip(self.pathes,
                                                      result.scored_tables,
                                                      out_pathes):

            cutoff = CONFIG.get("d_score.cutoff")
            scored_table.to_csv(out_path.scored_table,
                                out_path.filtered_table,
                                cutoff,
                                sep=self.delim_out,
                                index=False)
            print "WRITTEN: ", out_path.scored_table
            print "WRITTEN: ", out_path.filtered_table

            if CONFIG.get("rewrite_sqmass"):

                # get basepath
                basepath = input_path.split(".tsv")[0]
                basepath = basepath.split(".txt")[0]
                basepath = basepath.split(".csv")[0]

                # try to find a matching sqMass file
                sqmass_file = None
                if os.path.exists(basepath + ".chrom.sqMass"):
                    sqmass_file = basepath + ".chrom.sqMass"
                elif os.path.exists(basepath + ".sqMass"):
                    sqmass_file = basepath + ".sqMass"

                # get selected chromatograms on the filtered table
                df = scored_table.df[scored_table.df.d_score > cutoff]
                fragment_anno = df.aggr_Fragment_Annotation.unique()
                prec_anno = df.aggr_prec_Fragment_Annotation.unique()

                labels = []
                for l in fragment_anno:
                    labels.extend(l.split(";"))
                for l in prec_anno:
                    labels.extend(l.split(";"))

                filterChromByLabels(sqmass_file, out_path.filtered_chroms,
                                    labels)

            if result.final_statistics is not None:

                cutoffs = result.final_statistics["cutoff"].values
                svalues = result.final_statistics["svalue"].values
                qvalues = result.final_statistics["qvalue"].values
                # pvalues = result.final_statistics["pvalue"].values
                decoys, targets, top_decoys, top_targets = scored_table.scores(
                )
                lambda_ = CONFIG.get("final_statistics.lambda")
                plot_data = save_report(out_path.report, self.prefix, decoys,
                                        targets, top_decoys, top_targets,
                                        cutoffs, svalues, qvalues, pvalues,
                                        lambda_)
                print "WRITTEN: ", out_path.report

                cutoffs, svalues, qvalues, top_targets, top_decoys = plot_data
                for (name,
                     values) in [("cutoffs", cutoffs), ("svalues", svalues),
                                 ("qvalues", qvalues),
                                 ("d_scores_top_target_peaks", top_targets),
                                 ("d_scores_top_decoy_peaks", top_decoys)]:
                    path = out_path[name]
                    with open(path, "w") as fp:
                        fp.write(" ".join("%e" % v for v in values))
                    print "WRITTEN: ", path

            if CONFIG.get("export.mayu"):
                if result.final_statistics is not None:
                    export_mayu(out_pathes[0]['mayu_cutoff'],
                                out_pathes[0]['mayu_fasta'],
                                out_pathes[0]['mayu_csv'], scored_table,
                                result.final_statistics)
                    print "WRITTEN: ", out_pathes[0]['mayu_cutoff']
                    print "WRITTEN: ", out_pathes[0]['mayu_fasta']
                    print "WRITTEN: ", out_pathes[0]['mayu_csv']
                else:
                    logging.warn("can not write mayu table in this case")
Example #5
0
def _main(args):

    options = dict()
    path = None

    if "--help" in args:
        print_help()
        return

    if "--version" in args:
        print_version()
        return

    for arg in args:
        if arg.startswith("--"):
            if "=" in arg:
                pre, __, post = arg.partition("=")
                options[pre[2:]] = post
            else:
                options[arg[2:]] = True
        else:
            if path is not None:
                print_help()
                raise Exception("duplicate input file argument")
            path = arg

    if path is None:
        print_help()
        raise Exception("no input file given")

    CONFIG, info = standard_config()
    CONFIG.update(options)
    fix_config_types(CONFIG)
    dump_config(CONFIG)

    delim_in = CONFIG.get("delim.in", ",")
    delim_out = CONFIG.get("delim.out", ",")

    dirname = CONFIG.get("target.dir", None)
    if dirname is None:
        dirname = os.path.dirname(path)

    basename = os.path.basename(path)
    prefix, __ = os.path.splitext(basename)

    persisted_scorer = None
    apply_scorer = CONFIG.get("apply_scorer")
    if apply_scorer:
        if not os.path.exists(apply_scorer):
            raise Exception("scorer file %s does not exist" % apply_scorer)
        try:
            persisted_scorer = cPickle.loads(zlib.decompress(open(apply_scorer, "rb").read()))
        except:
            import traceback
            traceback.print_exc()
            raise

    apply_existing_scorer = persisted_scorer is not None

    persisted_weights = None
    apply_weights = CONFIG.get("apply_weights")
    if apply_weights:
        if not os.path.exists(apply_weights):
            raise Exception("weights file %s does not exist" % apply_weights)
        try:
            persisted_weights = np.loadtxt(apply_weights)

        except:
            import traceback
            traceback.print_exc()
            raise

    apply_existing_weights = persisted_weights is not None


    class Pathes(dict):

        def __init__(self, prefix=prefix, dirname=dirname, **kw):
            for k, postfix in kw.items():
                self[k] = os.path.join(dirname, prefix + postfix)
        __getattr__ = dict.__getitem__

    pathes = Pathes(scored_table="_with_dscore.csv",
                    filtered_table="_with_dscore_filtered.csv",
                    final_stat="_full_stat.csv",
                    summ_stat="_summary_stat.csv",
                    report="_report.pdf",
                    cutoffs="_cutoffs.txt",
                    svalues="_svalues.txt",
                    qvalues="_qvalues.txt",
                    d_scores_top_target_peaks="_dscores_top_target_peaks.txt",
                    d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt",
                    mayu_cutoff="_mayu.cutoff",
                    mayu_fasta="_mayu.fasta",
                    mayu_csv="_mayu.csv",
                    )

    if not apply_existing_scorer:
        pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin")

    if not apply_existing_weights:
        trained_weights_path = os.path.join(dirname, prefix + "_weights.txt")

    if not CONFIG.get("target.overwrite", False):
        found_exsiting_file = False
        to_check = list(pathes.keys())
        if not apply_existing_scorer:
            to_check.append(pickled_scorer_path)
        if not apply_existing_weights:
            to_check.append(trained_weights_path)
        for p in to_check:
            if os.path.exists(p):
                found_exsiting_file = True
                print "ERROR: %s already exists" % p
        if found_exsiting_file:
            print
            print "please use --target.overwrite option"
            print
            return

    format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s"
    logging.basicConfig(level=logging.INFO, format=format_)
    logging.info("config settings:")
    for k, v in sorted(CONFIG.items()):
        logging.info("    %s: %s" % (k, v))
    start_at = time.time()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result, needed_to_persist, trained_weights = PyProphet().process_csv(path, delim_in, persisted_scorer, persisted_weights)
        (summ_stat, final_stat, scored_table) = result
    needed = time.time() - start_at

    print
    print "=" * 78
    print
    print summ_stat
    print
    print "=" * 78

    print
    if summ_stat is not None:
        summ_stat.to_csv(pathes.summ_stat, sep=delim_out, index=False)
        print "WRITTEN: ", pathes.summ_stat
    if final_stat is not None:
        final_stat.to_csv(pathes.final_stat, sep=delim_out, index=False)
        print "WRITTEN: ", pathes.final_stat
        plot_data = save_report(pathes.report, basename, scored_table, final_stat)
        print "WRITTEN: ", pathes.report
        cutoffs, svalues, qvalues, top_target, top_decoys = plot_data
        for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues),
                               ("d_scores_top_target_peaks", top_target),
                               ("d_scores_top_decoy_peaks", top_decoys)]:
            path = pathes[name]
            with open(path, "w") as fp:
                fp.write(" ".join("%e" % v for v in values))
            print "WRITTEN: ", path
    scored_table.to_csv(pathes.scored_table, sep=delim_out, index=False)
    print "WRITTEN: ", pathes.scored_table

    filtered_table = scored_table[scored_table.d_score > CONFIG.get("d_score.cutoff")]

    filtered_table.to_csv(pathes.filtered_table, sep=delim_out, index=False)
    print "WRITTEN: ", pathes.filtered_table

    if not apply_existing_scorer:
        bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2))
        with open(pickled_scorer_path, "wb") as fp:
            fp.write(bin_data)
        print "WRITTEN: ", pickled_scorer_path

    if not apply_existing_weights:
        np.savetxt(trained_weights_path,trained_weights,delimiter="\t")
        print "WRITTEN: ", trained_weights_path

    if CONFIG.get("export.mayu", True):
        export_mayu(pathes.mayu_cutoff, pathes.mayu_fasta, pathes.mayu_csv, scored_table, final_stat)
        print "WRITTEN: ", pathes.mayu_cutoff
        print "WRITTEN: ", pathes.mayu_fasta
        print "WRITTEN: ", pathes.mayu_csv
    print

    seconds = int(needed)
    msecs = int(1000 * (needed - seconds))
    minutes = int(needed / 60.0)

    print "NEEDED",
    if minutes:
        print minutes, "minutes and",

    print "%d seconds and %d msecs wall time" % (seconds, msecs)
    print
Example #6
0
def _main(args):

	options = dict()
	path = None
	
	print "PyProphet, unified edition"
	
	if "--help" in args:
		print_help()
		return

	if "--version" in args:
		print_version()
		return

	def USER_ERROR(str):
		print "USER ERROR:", str

	for arg in args:
		if arg.startswith("--"):
			if "=" in arg:
				pre, __, post = arg.partition("=")
				options[pre[2:]] = post
			else:
				options[arg[2:]] = True
		else:
			if path is not None:
				print_help()
				USER_ERROR("duplicate input file argument")
				sys.exit(EX_USAGE)
			path = arg

	if path is None:
		print_help()
		USER_ERROR("no input file given")
		sys.exit(EX_USAGE)

	CONFIG, info = standard_config()
	invalid_params = get_invalid_params(CONFIG, options)
	if len(invalid_params) > 0:
		print_help()
		for p in invalid_params:
			USER_ERROR("invalid parameter '%s'" % p)
		sys.exit(EX_CONFIG)

	CONFIG.update(options)
	fix_config_types(CONFIG)
	dump_config(CONFIG)

	delim_in = CONFIG.get("delim.in", ",")
	delim_out = CONFIG.get("delim.out", ",")

	dirname = CONFIG.get("target.dir", None)
	if dirname is None:
		dirname = os.path.dirname(path)

	basename = os.path.basename(path)
	prefix, __ = os.path.splitext(basename)



	persisted_scorer = None
	apply_scorer = CONFIG.get("apply_scorer")
	if apply_scorer:
		if not os.path.exists(apply_scorer):
			USER_ERROR("scorer file %s does not exist" % apply_scorer)
			sys.exit(EX_CONFIG)
		try:
			persisted_scorer = cPickle.loads(zlib.decompress(open(apply_scorer, "rb").read()))
		except:
			import traceback
			traceback.print_exc()
			raise

#	print "## SCORER PATH: ", apply_scorer	
#	print "## PERSISTED SCORER: ", persisted_scorer	
	apply_existing_scorer = persisted_scorer is not None
	if not apply_existing_scorer:
		pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin")



	persisted_weights = None
	apply_weights = CONFIG.get("apply_weights")
	if apply_weights:
		if not os.path.exists(apply_weights):
			USER_ERROR("weights file %s does not exist" % apply_weights)
			sys.exit(EX_CONFIG)
		try:
			persisted_weights = np.loadtxt(apply_weights)
		except:
			import traceback
			traceback.print_exc()
			raise

	apply_existing_weights = persisted_weights is not None
	if not apply_existing_weights:
		trained_weights_path = os.path.join(dirname, prefix + "_weights.txt")


	class Paths(dict):
		def __init__(self, prefix=prefix, dirname=dirname, **kw):
			for k, postfix in kw.items():
				self[k] = os.path.join(dirname, prefix + postfix)
		__getattr__ = dict.__getitem__

	paths = Paths(scored_table="_with_dscore.csv",
					filtered_table="_with_dscore_filtered.csv",
					output="_output.csv",
					final_stat="_full_stat.csv",
					summ_stat="_summary_stat.csv",
					report="_report.pdf",
					cutoffs="_cutoffs.txt",
					svalues="_svalues.txt",
					d_scores_top_target_peaks="_dscores_top_target_peaks.txt",
					d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt",
					mayu_cutoff="_mayu.cutoff",
					mayu_fasta="_mayu.fasta",
					mayu_csv="_mayu.csv",
					)


	
	if not CONFIG.get("target.overwrite", False):
		found_existing_file = False
		to_check = list(paths.keys())
		if not apply_existing_scorer:
			to_check.append(pickled_scorer_path)
		if not apply_existing_weights:
			to_check.append(trained_weights_path)
		for p in to_check:
			if os.path.exists(p):
				found_existing_file = True
				print "OUTPUT ERROR: %s already exists" % p
		if found_existing_file:
			print
			print "please use --target.overwrite option"
			print
			sys.exit(EX_CANTCREAT)

	format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s"
	logging.basicConfig(level=logging.INFO, format=format_)
	logging.info("config settings:")
	for k, v in sorted(CONFIG.items()):
		logging.info("	%s: %s" % (k, v))
	start_at = time.time()
	with warnings.catch_warnings():
		warnings.simplefilter("ignore")
		
		classifierType = CONFIG.get("classifier.type")
		if classifierType == "LDA":
			classifier = LDALearner
		elif classifierType == "SGD":
			classifier = SGDLearner
		elif classifierType == "linSVM":
			classifier = LinearSVMLearner
		elif classifierType == "rbfSVM":
			classifier = RbfSVMLearner
		elif classifierType == "polySVM":
			classifier = PolySVMLearner
		elif classifierType == "logit":
			classifier = LogitLearner
		else:
			USER_ERROR("classifier '%s' is not supported" % classifierType)
			sys.exit(EX_CONFIG)
		
		method = HolyGostQuery(StandardSemiSupervisedTeacher(classifier))
		result_tables, clfs_df, needed_to_persist, trained_weights = method.process_csv(path, delim_in, persisted_scorer, persisted_weights)
	
	needed = time.time() - start_at

	train_frac 	= CONFIG.get("train.fraction")
	def printSumTable(str, df):
		with warnings.catch_warnings():
			warnings.filterwarnings("ignore",category=DeprecationWarning)
			if df is not None:
				print str
				print df[df.qvalue < 0.21][['qvalue', 'TP', 'cutoff']]
	print
	print "=" * 78
	print "%d%% of data used for training" % (train_frac*100)
	print "'" * 78
	print
	#for k in result_dict.iterkeys():
	printSumTable(k, result_tables[0])
	print
	print "=" * 78
	print
	
	if not CONFIG.get("no.file.output"):
		summ_stat, final_stat, scored_table = result_tables
		#if 'true_normal' in result_tables:
		#	summ_statT, final_statT, scored_tableT = result_tables['true_normal']
		#	summ_stat.to_csv(paths.summ_stat, sep=delim_out, index=False)
		#	print "WRITTEN: ", paths.summ_stat
		#	plot_data = save_report(paths.reportT, basename, scored_tableT, final_statT)
		#	print "WRITTEN: ", paths.report
		if summ_stat is not None:
			summ_stat.to_csv(paths.summ_stat, sep=delim_out, index=False)
			print "WRITTEN: ", paths.summ_stat

		if final_stat is not None:
			plot_data = save_report(paths.report, basename, scored_table, final_stat)
			print "WRITTEN: ", paths.report
			
			if True: #CONFIG.get("all.output"):
				final_stat.to_csv(paths.final_stat, sep=delim_out, index=False)
				print "WRITTEN: ", paths.final_stat
				
				cutoffs, svalues, qvalues, top_target, top_decoys = plot_data
				for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues),
								   ("d_scores_top_target_peaks", top_target),
								   ("d_scores_top_decoy_peaks", top_decoys)]:
					path = paths[name]
					with open(path, "w") as fp:
						fp.write(" ".join("%e" % v for v in values))
					print "WRITTEN: ", path
		
		if clfs_df is not None and CONFIG.get("all.output"):
			clfs_df.to_csv("clfs.csv", sep=delim_out, index=False)
			print "WRITTEN: ", "clfs.csv"
		
		scored_table.to_csv(paths.scored_table, sep=delim_out, index=False)
		print "WRITTEN: ", paths.scored_table

		output = scored_table.rename(columns = {"d_score" : "pyProph_score", "m_score" : "qvalue"})
		output.to_csv(paths.output, sep=delim_out, index=False)
		print "WRITTEN: ", paths.output

		filtered_table = scored_table[scored_table.d_score > CONFIG.get("d_score.cutoff")]
		filtered_table.to_csv(paths.filtered_table, sep=delim_out, index=False)
		print "WRITTEN: ", paths.filtered_table
		
		if not apply_existing_scorer: # and CONFIG.get("all.output"):
			bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2))
			with open(pickled_scorer_path, "wb") as fp:
				fp.write(bin_data)
			print "WRITTEN: ", pickled_scorer_path

		if not apply_existing_weights:
			np.savetxt(trained_weights_path,trained_weights,delimiter="\t")
			print "WRITTEN: ", trained_weights_path

		if CONFIG.get("export.mayu", True):
			export_mayu(paths.mayu_cutoff, paths.mayu_fasta, paths.mayu_csv, scored_table, final_stat)
			print "WRITTEN: ", paths.mayu_cutoff
			print "WRITTEN: ", paths.mayu_fasta
			print "WRITTEN: ", paths.mayu_csv
		print


	print "NEEDED %s wall time" % (nice_time(needed))
	print
Example #7
0
def _main(args):

    options = dict()
    path = None

    if "--help" in args:
        print_help()
        return

    if "--version" in args:
        print_version()
        return

    for arg in args:
        if arg.startswith("--"):
            if "=" in arg:
                pre, __, post = arg.partition("=")
                options[pre[2:]] = post
            else:
                options[arg[2:]] = True
        else:
            if path is not None:
                print_help()
                raise Exception("duplicate input file argument")
            path = arg

    if path is None:
        print_help()
        raise Exception("no input file given")

    CONFIG, info = standard_config()
    CONFIG.update(options)
    fix_config_types(CONFIG)
    dump_config(CONFIG)

    delim_in = CONFIG.get("delim.in", ",")
    delim_out = CONFIG.get("delim.out", ",")

    dirname = CONFIG.get("target.dir", None)
    if dirname is None:
        dirname = os.path.dirname(path)

    basename = os.path.basename(path)
    prefix, __ = os.path.splitext(basename)

    persisted = None
    apply_ = CONFIG.get("apply")
    if apply_:
        if not os.path.exists(apply_):
            raise Exception("scorer file %s does not exist" % apply_)
        try:
            persisted = cPickle.loads(zlib.decompress(open(apply_, "rb").read()))
        except:
            import traceback
            traceback.print_exc()
            raise

    apply_existing_scorer = persisted is not None

    class Pathes(dict):
        def __init__(self, prefix=prefix, dirname=dirname, **kw):
            for k, postfix in kw.items():
                self[k] = os.path.join(dirname, prefix + postfix)
        __getattr__ = dict.__getitem__

    pathes = Pathes(scored_table="_with_dscore.csv",
                    final_stat="_full_stat.csv",
                    summ_stat="_summary_stat.csv",
                    report="_report.pdf",
                    cutoffs="_cutoffs.txt",
                    svalues="_svalues.txt",
                    qvalues="_qvalues.txt",
                    d_scores_top_target_peaks="_dscores_top_target_peaks.txt",
                    d_scores_top_decoy_peaks="_dscores_top_decoy_peaks.txt",
                    mayu_cutoff="_mayu.cutoff",
                    mayu_fasta="_mayu.fasta",
                    mayu_csv="_mayu.csv",
    )

    if not apply_existing_scorer:
        pickled_scorer_path = os.path.join(dirname, prefix + "_scorer.bin")

    if not CONFIG.get("target.overwrite", False):
        found_exsiting_file = False
        to_check = list(pathes.keys())
        if not apply_existing_scorer:
            to_check.append(pickled_scorer_path)
        for p in to_check:
            if os.path.exists(p):
                found_exsiting_file = True
                print "ERROR: %s already exists" % p
        if found_exsiting_file:
            print
            print "please use --target.overwrite option"
            print
            return

    format_ = "%(levelname)s -- [pid=%(process)s] : %(asctime)s: %(message)s"
    logging.basicConfig(level=logging.INFO, format=format_)
    logging.info("config settings:")
    for k, v in sorted(CONFIG.items()):
        logging.info("    %s: %s" % (k, v))
    start_at = time.time()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result, needed_to_persist = PyProphet().process_csv(path, delim_in, persisted)
        (summ_stat, final_stat, scored_table) = result
    needed = time.time() - start_at

    print
    print "=" * 78
    print
    print summ_stat
    print
    print "=" * 78

    print
    if summ_stat is not None:
        summ_stat.to_csv(pathes.summ_stat, sep=delim_out, index=False)
        print "WRITTEN: ", pathes.summ_stat
    if final_stat is not None:
        final_stat.to_csv(pathes.final_stat, sep=delim_out, index=False)
        print "WRITTEN: ", pathes.final_stat
        plot_data = save_report(pathes.report, basename, scored_table, final_stat)
        print "WRITTEN: ", pathes.report
        cutoffs, svalues, qvalues, top_target, top_decoys = plot_data
        for (name, values) in [("cutoffs", cutoffs), ("svalues", svalues), ("qvalues", qvalues),
                               ("d_scores_top_target_peaks", top_target),
                               ("d_scores_top_decoy_peaks", top_decoys)]:
            path = pathes[name]
            with open(path, "w") as fp:
                fp.write(" ".join("%e" % v for v in values))
            print "WRITTEN: ", path
    scored_table.to_csv(pathes.scored_table, sep=delim_out, index=False)
    print "WRITTEN: ", pathes.scored_table

    if not apply_existing_scorer:
        bin_data = zlib.compress(cPickle.dumps(needed_to_persist, protocol=2))
        with open(pickled_scorer_path, "wb") as fp:
            fp.write(bin_data)
        print "WRITTEN: ", pickled_scorer_path

    if CONFIG.get("export.mayu", True):
        export_mayu(pathes.mayu_cutoff, pathes.mayu_fasta, pathes.mayu_csv, scored_table, final_stat)
        print "WRITTEN: ", pathes.mayu_cutoff
        print "WRITTEN: ", pathes.mayu_fasta
        print "WRITTEN: ", pathes.mayu_csv
    print

    seconds = int(needed)
    msecs = int(1000 * (needed - seconds))
    minutes = int(needed / 60.0)

    print "NEEDED",
    if minutes:
        print minutes, "minutes and",

    print "%d seconds and %d msecs wall time" % (seconds, msecs)
    print