コード例 #1
0
def extract_features(df):
    df_n = df.xs('01', level=1, axis=1)
    binary = df_n > -1
    binary = binary[binary.sum(1).isin(range(20, df.shape[1]/2))]
    rr = df.ix[binary.index].apply(exp_change, 1)
    binary = binary.ix[true_index(rr.p < .05)]
    
    real = df_n.ix[df_n.index.diff(binary.index)]
    singles = real[((real.max(1) - real.min(1)) > 1)]
    singles = singles[(singles.std(1) > .25)]
    ch = df.ix[singles.index].apply(exp_change, 1)
    singles = df_n.ix[true_index(ch.p < .01)]
    return binary, singles, real
コード例 #2
0
def extract_features(df):
    df_n = df.xs('01', level=1, axis=1)
    binary = df_n > -1
    binary = binary[binary.sum(1).isin(range(20, df.shape[1] / 2))]
    rr = df.ix[binary.index].apply(exp_change, 1)
    binary = binary.ix[true_index(rr.p < .05)]
    
    real = df_n.ix[df_n.index.diff(binary.index)]
    singles = real[((real.max(1) - real.min(1)) > 1)]
    singles = singles[(singles.std(1) > .25)]
    ch = df.ix[singles.index].apply(exp_change, 1)
    singles = df_n.ix[true_index(ch.p < .01)]
    return binary, singles, real
コード例 #3
0
def extract_pc_filtered(df, pc_threshold=.2, filter_down=True):
    '''
    First pre-filters for patients with no tumor/normal change.
    Then normalizes by normals. 
    '''
    if ('11' in df.columns.levels[1]) and filter_down:
        tt = df.xs('11', axis=1, level=1)
        rr = df.apply(exp_change, 1).sort('p')
        m, s = tt.mean(1), tt.std(1)
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - m) / s).T
        df_n = df_n.ix[true_index(rr.p < .05)]
    else: #No matched normals
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - df_n.mean(1)) / df_n.std(1)).T
    pc = extract_pc(df_n, pc_threshold, standardize=False)
    return pc
コード例 #4
0
ファイル: Intermediate.py プロジェクト: Krysia/TCGA
def rna_filter(cn, val, rna):
    '''
    Filter copy number events with rna expression data.
    Here we test whether the event is associated with a subsequent
    change in expression in those patients. 
    
    cn: copy number matrix, should have a MultiIndex, with the gene name
        in the last level
    val: value of the copy number to test in [-2, -1, 1, 2] 
    '''
    assert val in [-2, -1, 1, 2]
    change = pd.DataFrame({g: kruskal_pandas(vec == val, rna.ix[g[-1]])
                           for g, vec in cn.iterrows() 
                           if g[-1] in rna.index}).T
    q_vals = bhCorrection(change.p)
    filtered = cn.ix[true_index(q_vals < .1)]
    return filtered
コード例 #5
0
def extract_pc_filtered(df, pc_threshold=.2, filter_down=True):
    '''
    First pre-filters for patients with no tumor/normal change.
    Then normalizes by normals. 
    '''
    if ('11' in df.columns.levels[1]) and filter_down:
        tt = df.xs('11', axis=1, level=1)
        rr = df.apply(exp_change, 1).sort('p')
        m, s = tt.mean(1), tt.std(1)
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - m) / s).T
        df_n = df_n.ix[true_index(rr.p < .05)]
    else:  # No matched normals
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - df_n.mean(1)) / df_n.std(1)).T
    pc = extract_pc(df_n, pc_threshold, standardize=False)
    return pc
コード例 #6
0
def rna_filter(cn, val, rna):
    '''
    Filter copy number events with rna expression data.
    Here we test whether the event is associated with a subsequent
    change in expression in those patients. 
    
    cn: copy number matrix, should have a MultiIndex, with the gene name
        in the last level
    val: value of the copy number to test in [-2, -1, 1, 2] 
    '''
    assert val in [-2, -1, 1, 2]
    change = pd.DataFrame({
        g: kruskal_pandas(vec == val, rna.ix[g[-1]])
        for g, vec in cn.iterrows() if g[-1] in rna.index
    }).T
    q_vals = bhCorrection(change.p)
    filtered = cn.ix[true_index(q_vals < .1)]
    return filtered