Ejemplo n.º 1
0
def network_lasso(data, response_type='level', ground_truth=None, inhib_targets=None, perfect=True, group_stimuli=False):
    '''
    do lasso. automatically do CV to find best alpha.

    input:
        data
        response_type : (level, rate)
        ground_truth : adjacency matrix
        group_stimuli : binary
    '''
    from sklearn import preprocessing, linear_model, cross_validation, metrics

    # model interventions if supplied an inhib_targets dict
    if inhib_targets:
        training_dict = prepare_markov_data(introduce_inhibs(data, inhib_targets=inhib_targets, perfect=perfect), response_type, group_stimuli)
    else:
        training_dict = prepare_markov_data(data, response_type, group_stimuli)

    antibodies = [col for col in data.columns if col not in ['Cell Line', 'Inhibitor', 'Stimulus', 'Timepoint']]
    stims = set(data['Stimulus'])

    # fit lasso for each (X,Y) pair
    A = {}
    for key in training_dict:
        X = training_dict[key][0]
        Y = training_dict[key][1]
        preprocessing.StandardScaler().fit_transform(X)

        A[key] = pd.DataFrame(np.zeros((X.shape[1], X.shape[1])), columns=X.columns, index=X.columns)

        for col in Y.columns:
            #print col
            # check if col is not all the identical
            if len(set(Y[col])) > 1:
                rgn = linear_model.LassoCV(verbose=False).fit(X, Y[col])
                if np.max(rgn.coef_) != 0:
                    A[key].ix[:,col] = np.abs(rgn.coef_) / np.abs(rgn.coef_).max()
            else:
                A[key].ix[:,col] = np.zeros((X.shape[1],))

    if ground_truth:
        auc = {}
        for key in training_dict:
            auc[key] = score_network(A[key], ground_truth)
        return A, auc 
    else:
        return A
Ejemplo n.º 2
0
                        'TestInhib3' : ['mTOR_pS2448'],
                        'TestInhib4' : ['EGFR_pY1068', 'EGFR_pY1173', 'EGFR_pY992'],
                        'TestInhib5' : []}

print '----------- ' + cell_line + ' ------------'
data = pd.read_csv('data/{0}_main.csv'.format(cell_line), header=0)
inhibs = set(data['Inhibitor'])
stims = set(data['Stimulus'])

node_list = data.columns[4:]
inhib_targets = {'GSK690693' : ['AKT_pT308','AKT_pS473'],
                 'GSK690693_GSK1120212' : ['AKT_pT308','AKT_pS473','MEK1_pS217_S221']}

regGBR= {}
scalar = {}
td = utilities.prepare_markov_data(utilities.introduce_inhibs(data, inhib_targets=inhib_targets, perfect=True), 'level', group_stimuli=False)
for stim in stims:
    X, Y = td[stim]
    scalar[stim] = preprocessing.StandardScaler()
    scalar[stim].fit_transform(X)
    X.ix[X.ix[:,'Inhib_GSK690693']>0,'Inhib_GSK690693'] = 1
    X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']>0,'Inhib_GSK690693_GSK1120212'] = 1
    X.ix[X.ix[:,'Inhib_GSK690693']<0,'Inhib_GSK690693'] = 0
    X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']<0,'Inhib_GSK690693_GSK1120212'] = 0
    n_estimators = 100
    max_depth = 3

    regGBR[stim] = do_gbr(X, Y, n_estimators=n_estimators, max_depth=max_depth)


# predict
Ejemplo n.º 3
0
uacc812 = pd.read_csv('data/UACC812_main.csv')

# load in the network prior curated from the literature
prior = pd.read_csv('data/experimental_prior.csv', index_col=0, header=0)

## BT20
##############################################
print '----------- ' + 'BT20' + ' ------------'
inhibs = set(bt20['Inhibitor'])
stims = set(bt20['Stimulus'])
node_list = bt20.columns[4:]
inhib_targets = {'GSK690693' : ['AKT_pT308','AKT_pS473'],
                 'GSK690693_GSK1120212' : ['AKT_pT308','AKT_pS473','MEK1_pS217_S221']}
regGBR= {}
scalar = {}
td_bt20 = utilities.prepare_markov_data(utilities.introduce_inhibs(bt20, inhib_targets=inhib_targets, perfect=True), 'level', group_stimuli=False)
for stim in td_bt20:
    X, Y = td_bt20[stim]
    scalar[stim] = preprocessing.StandardScaler()
    scalar[stim].fit_transform(X)
    X.ix[X.ix[:,'Inhib_GSK690693']>0,'Inhib_GSK690693'] = 1
    X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']>0,'Inhib_GSK690693_GSK1120212'] = 1
    X.ix[X.ix[:,'Inhib_GSK690693']<0,'Inhib_GSK690693'] = 0
    X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']<0,'Inhib_GSK690693_GSK1120212'] = 0
    n_estimators = 100
    max_depth = 3
    regGBR[stim] = do_gbr(X, Y, n_estimators=n_estimators, max_depth=max_depth)

adj = build_adj_matrix(regGBR, node_list, stims)
A_true_bt20 = prior
for stim in adj: