Example #1
0
def adjust_scores(dataset_name, *batchscore_ids, **kwargs):
    """
    Create normalized versions of batchscore runs in `batchscore_ids`.
    Note that zscores with an absolute of higher than 5 are chopped off (set to 5).
    """
    if 'overwrite' in kwargs:
        overwrite = kwargs['overwrite']
    else:
        overwrite = False

    # Determine adjustment type
    adj_type = None
    for k,v in kwargs.iteritems():
        if k in ['norm', 'window', 'neg']:
            if adj_type:
                raise Exception("Cannot adjust in multiple ways: %s, %s" % (adj_type, k))
            if k == 'window':
                window_size = int(v)
                adj_type = '%s_%d' % (k, window_size)
            else:
                adj_type = k

    # Perform adjustments on all scores..
    for batchscore_id in batchscore_ids:
        dc = DATASET_CONFIGS[dataset_name]

        ds_dir = get_batchscore_dir(dataset_name)
        sc_dir = os.path.join(ds_dir, batchscore_id)
        sc_dir_adj = "%s-%s" % (sc_dir, adj_type)
        if not overwrite:
            resp = raw_input("Creating %s'd version of\n  %s\nat\n  %s\nContinue? y/[n]: "
                    % (adj_type, sc_dir, sc_dir_adj))
            if resp != 'y':
                continue

        if not os.path.exists(sc_dir_adj):
            os.mkdir(sc_dir_adj)
        else:
            for fname in os.listdir(sc_dir_adj):
                os.remove(os.path.join(sc_dir_adj, fname))

        align_files = dc.get_align_files()
        for align_file in align_files:
            out_file = dc.get_out_file(align_file, sc_dir)
            if not os.path.exists(out_file):
                continue
            scores = read_batchscores(out_file)

            if adj_type is 'norm':
                scores_adj = norm_scores(scores, filter=5)
            elif adj_type is 'neg':
                scores_adj = [-s for s in scores]
            elif adj_type.startswith('window'):
                scores_adj = window_scores(scores, window_size)

            out_file_adj = os.path.join(sc_dir_adj, os.path.split(out_file)[-1])
            write_batchscores(out_file_adj, scores_adj)

        print "Created %s" % sc_dir_adj
Example #2
0
 def run_experiment(align_file):
     """
     Run scorers on one aln file.  This is a helper for multithreading the
     scoring of each aln file.
     """
     alignment = Alignment(align_file)
     for scorer in scorers:
         # Score.
         try:
             scores = scorer.score(alignment)
         except Exception, e:
             import traceback
             sys.stderr.write("\nError scoring %s via %s\n" %
                 (alignment.align_file, type(scorer).__name__))
             traceback.print_exc()
             continue
         # Write scores.
         out_file = dataset_config.get_out_file(align_file, scorer.output_dir)
         write_batchscores(out_file, scores)