예제 #1
0
def db_results_printJSON(ssea_dir, ss_id):
    results_json_file = os.path.join(ssea_dir, 'results.json')
    with open(results_json_file, 'r') as fin:
        for line in fin:
            # load json document (one per line)
            result = Result.from_json(line.strip())
            result.ss_id = int(ss_id)
            print result.to_json()
예제 #2
0
def db_results_printJSON(ssea_dir, ss_id):
    results_json_file = os.path.join(ssea_dir,
                                         'results.json')
    with open(results_json_file, 'r') as fin:
        for line in fin:
            # load json document (one per line)
            result = Result.from_json(line.strip())  
            result.ss_id = int(ss_id)
            print result.to_json()
예제 #3
0
def parse_results(filename):
    with open(filename, 'r') as fp:
        for line in fp:
            result = Result.from_json(line.strip())
            yield result
예제 #4
0
def _cmp_json_nes(line):
    '''comparison function for batch_sort'''
    res = Result.from_json(line.strip())
    return abs(res.nes)
예제 #5
0
def compute_qvalues(json_iterator, hists_file):
    '''
    computes fdr q values from json Result objects sorted
    by abs(NES) (low to high)
    
    json_iterator: iterator that yields json objects in sorted order
    hists_file: contains histogram data from null distribution
    '''
    # load histogram data
    hists = np.load(hists_file)
    # compute cumulative sums for fdr interpolation
    cdfs = {}
    for k in ('null_nes_neg', 'null_nes_pos', 'obs_nes_neg', 'obs_nes_pos'):
        h = hists[k]
        cdf = np.zeros(h.shape[0] + 1, dtype=np.float)
        cdf[1:] = h.cumsum()
        cdfs[k] = cdf
    # keep track of minimum FDR and rank for positive
    # and negative NES separately
    NEG = 0
    POS = 1
    null_keys = ['null_nes_neg', 'null_nes_pos']
    obs_keys = ['obs_nes_neg', 'obs_nes_pos']
    tot_obs = [cdfs['obs_nes_neg'][-1], cdfs['obs_nes_pos'][-1]]
    cur_ranks = [tot_obs[0], tot_obs[1]]
    min_fdrs = [1.0, 1.0]
    # perform merge of sorted json files
    for line in json_iterator:
        # load json document (one per line)
        res = Result.from_json(line.strip())
        es = res.es
        log_nes_clip = np.log10(np.clip(abs(res.nes), NES_MIN, NES_MAX))
        if es != 0:
            if es < 0:
                sign_ind = NEG
                sign = -1.0
            else:
                sign_ind = POS
                sign = 1.0
            # For a given NES(S) = NES* >= 0, the FDR is the ratio of the
            # percentage of all permutations NES(S,null) >= 0, whose
            # NES(S,null) >= NES*, divided by the percentage of observed S with
            # NES(S) >= 0, whose NES(S) >= NES*, and similarly for
            # NES(S) = NES* <= 0.
            # to compute a sample set specific FDR q value we look at the
            # aggregated enrichment scores for all tests of that sample set
            # compute the cumulative sums of NES histograms use interpolation
            # to find fraction NES(null) >= NES* and account for the observed
            # permutation in the null set interpolate NES in log space
            null_nes_cumsum = cdfs[null_keys[sign_ind]]
            null_n = interp(log_nes_clip, LOG_NES_BINS, null_nes_cumsum)
            obs_nes_cumsum = cdfs[obs_keys[sign_ind]]
            obs_n = interp(log_nes_clip, LOG_NES_BINS, obs_nes_cumsum)
            n = 1.0 - (null_n / null_nes_cumsum[-1])
            d = 1.0 - (obs_n / obs_nes_cumsum[-1])
            #print 'SS_ID=%d ES=%f NES=%f n=%f (%f / %f) d=%f (%f / %f)' % (i, res.es, res.nes, n, null_n, null_nes_cumsum[-1], d, obs_n, obs_nes_cumsum[-1])
            # update json dict
            if (n <= 0.0) or (d <= 0.0):
                res.ss_fdr_q_value = 0.0
            else:
                res.ss_fdr_q_value = n / d
            #print 'SS_ID=%d ES=%f NES=%f fdr=%f minfdr=%f' % (i, res.es, res.nes, res.ss_fdr_q_value, min_fdrs[i])
            # compare with minimum FDR and adjust minimum FDR if necessary
            if res.ss_fdr_q_value < min_fdrs[sign_ind]:
                min_fdrs[sign_ind] = res.ss_fdr_q_value
            else:
                res.ss_fdr_q_value = min_fdrs[sign_ind]
            res.ss_rank = cur_ranks[sign_ind]
            res.ss_frac = sign * (1.0 - (
                (res.ss_rank - 1) / float(tot_obs[sign_ind])))
            cur_ranks[sign_ind] -= 1
        # convert back to json
        yield res.to_json()
        yield os.linesep
    # cleanup
    hists.close()
예제 #6
0
def parse_results(filename):
    with open(filename, 'r') as fp:
        for line in fp:
            result = Result.from_json(line.strip())
            yield result
예제 #7
0
파일: algo.py 프로젝트: BioXiao/ssea
def _cmp_json_nes(line):
    '''comparison function for batch_sort'''
    res = Result.from_json(line.strip())
    return abs(res.nes)
예제 #8
0
파일: algo.py 프로젝트: BioXiao/ssea
def compute_qvalues(json_iterator, hists_file):
    '''
    computes fdr q values from json Result objects sorted
    by abs(NES) (low to high)
    
    json_iterator: iterator that yields json objects in sorted order
    hists_file: contains histogram data from null distribution
    '''
    # load histogram data
    hists = np.load(hists_file)
    # compute cumulative sums for fdr interpolation
    cdfs = {}
    for k in ('null_nes_neg', 'null_nes_pos', 'obs_nes_neg', 'obs_nes_pos'):
        h = hists[k]
        cdf = np.zeros(h.shape[0]+1, dtype=np.float)
        cdf[1:] = h.cumsum()
        cdfs[k] = cdf
    # keep track of minimum FDR and rank for positive
    # and negative NES separately
    NEG = 0
    POS = 1
    null_keys = ['null_nes_neg', 'null_nes_pos']
    obs_keys = ['obs_nes_neg', 'obs_nes_pos']
    tot_obs = [cdfs['obs_nes_neg'][-1], cdfs['obs_nes_pos'][-1]]
    cur_ranks = [tot_obs[0], tot_obs[1]]
    min_fdrs = [1.0, 1.0]
    # perform merge of sorted json files 
    for line in json_iterator:
        # load json document (one per line)
        res = Result.from_json(line.strip())
        es = res.es
        log_nes_clip = np.log10(np.clip(abs(res.nes), NES_MIN, NES_MAX))
        if es != 0:
            if es < 0:
                sign_ind = NEG
                sign = -1.0
            else:
                sign_ind = POS
                sign = 1.0
            # For a given NES(S) = NES* >= 0, the FDR is the ratio of the 
            # percentage of all permutations NES(S,null) >= 0, whose 
            # NES(S,null) >= NES*, divided by the percentage of observed S with 
            # NES(S) >= 0, whose NES(S) >= NES*, and similarly for 
            # NES(S) = NES* <= 0.        
            # to compute a sample set specific FDR q value we look at the
            # aggregated enrichment scores for all tests of that sample set
            # compute the cumulative sums of NES histograms use interpolation 
            # to find fraction NES(null) >= NES* and account for the observed 
            # permutation in the null set interpolate NES in log space
            null_nes_cumsum = cdfs[null_keys[sign_ind]]
            null_n = interp(log_nes_clip, LOG_NES_BINS, null_nes_cumsum)
            obs_nes_cumsum = cdfs[obs_keys[sign_ind]]
            obs_n = interp(log_nes_clip, LOG_NES_BINS, obs_nes_cumsum)
            n = 1.0 - (null_n / null_nes_cumsum[-1])
            d = 1.0 - (obs_n / obs_nes_cumsum[-1])
            #print 'SS_ID=%d ES=%f NES=%f n=%f (%f / %f) d=%f (%f / %f)' % (i, res.es, res.nes, n, null_n, null_nes_cumsum[-1], d, obs_n, obs_nes_cumsum[-1])
            # update json dict
            if (n <= 0.0) or (d <= 0.0):
                res.ss_fdr_q_value = 0.0
            else:
                res.ss_fdr_q_value = n / d
            #print 'SS_ID=%d ES=%f NES=%f fdr=%f minfdr=%f' % (i, res.es, res.nes, res.ss_fdr_q_value, min_fdrs[i]) 
            # compare with minimum FDR and adjust minimum FDR if necessary
            if res.ss_fdr_q_value < min_fdrs[sign_ind]:
                min_fdrs[sign_ind] = res.ss_fdr_q_value
            else:
                res.ss_fdr_q_value = min_fdrs[sign_ind]            
            res.ss_rank = cur_ranks[sign_ind]
            res.ss_frac = sign * (1.0 - ((res.ss_rank - 1) / float(tot_obs[sign_ind])))
            cur_ranks[sign_ind] -= 1
        # convert back to json
        yield res.to_json()
        yield os.linesep
    # cleanup
    hists.close()