コード例 #1
0
def perform_test(X_matr,
                 Y_matr,
                 lag,
                 fit_method,
                 replace_row,
                 has_reps=False,
                 **kwargs):
    """
    X_matr: n x T (x r) matrix of input genes
    Y_matr: 1 x T matrix of output gene
    lag: lag
    fit_method: one of the fit_ methods, e.g. fit_lasso
    hyper: hyperparams for calling test
    replace_rows: which row Y_matr is in in X

    return: X_t, Y_t, Y_pred, coef, intercept, fit_result
    """

    ## Get X and Y lagged

    X_t, Y_t = get_XY_lagged(X_matr,
                             Y_matr,
                             lag,
                             replace_row=replace_row,
                             has_reps=has_reps)

    t = time.time()
    Y_pred, coef, intercept, fit_result = fit_method(X_t, Y_t, **kwargs)
    fit_result["time"] = round(time.time() - t, 3)

    return X_t, Y_t, Y_pred, coef, intercept, fit_result
コード例 #2
0
def perform_loto_cv(X_matr,
                    Y_matr,
                    lag,
                    fit_method,
                    replace_row,
                    verbose=False,
                    has_reps=False,
                    **kwargs):
    """
    Perform leave-one-timepoint-out cross-validation.

    X_matr: n x T (x r) matrix of input genes, where r is # reps
    Y_matr: 1 x T (x r) matrix of output gene
    lag: lag
    fit_method: one of the fit_ methods, e.g. fit_lasso
    hyper: hyperparams for calling test
    replace_row: which row Y_matr is in in X

    return: fit_result
    """

    X_t, Y_t = get_XY_lagged(X_matr,
                             Y_matr,
                             lag,
                             replace_row=replace_row,
                             has_reps=has_reps)

    T_test = X_t.shape[0]

    loo = LeaveOneOut(T_test)

    Y_tests = np.zeros(T_test)
    Y_preds = np.zeros(T_test)
    dfs = np.zeros(T_test)

    for train_index, test_index in loo:
        X_train = X_t[train_index]
        Y_train = Y_t[train_index]
        X_test = X_t[test_index]
        Y_test = Y_t[test_index]

        _, coef, intercept, _ = fit_method(X_train, Y_train, **kwargs)

        Y_pred = predict(X_test, coef, intercept)

        Y_tests[test_index] = Y_test
        Y_preds[test_index] = Y_pred
        dfs[test_index] = len(np.nonzero(coef)[0])

    mse = mean_squared_error(Y_tests, Y_preds)
    sse = np.sum((Y_tests - Y_preds)**2)
    avg_df = np.average(dfs)
    r2 = r2_score(Y_tests, Y_preds)

    fit_result = collections.OrderedDict()
    fit_result["n"] = T_test
    fit_result["lag"] = lag
    fit_result["mse"] = mse
    fit_result["sse"] = sse
    fit_result["avg_df"] = avg_df
    fit_result["r2"] = r2

    if verbose:
        print "Y_tests: ", Y_tests
        print "Y_preds: ", Y_preds
        print fit_result

    return fit_result
コード例 #3
0
def perform_test_random(X_matr,
                        rand_X_matr,
                        Y_matr,
                        lag,
                        fit_method,
                        replace_row,
                        has_reps=False,
                        verbose=False,
                        **kwargs):
    """
    Perform a single fit.

    X_matr: n x T (x r) matrix of input genes
    rand_X_matr: n x T (x r) matrix of randomized input genes, where randomized across time.
    Y_matr: 1 x T (x r) matrix of output gene
    lag: lag
    fit_method: one of the fit_ methods, e.g. fit_lasso
    hyper: hyperparams for calling test
    replace_rows: which row Y_matr is in in X. Set to None, otherwise. If it is inside, set the coefficients to zero.

    return: coef, intercept, fit_result. Note the 0, i * lag indices of the coef are for the output genes.
    """

    ## Get X and Y lagged

    # iterate through all possible predictors

    n = X_matr.shape[0]
    T = X_matr.shape[1]

    coef = np.zeros(n * lag)

    coef_temps = np.zeros((n * lag, n))
    intercept_temps = np.zeros((1, n))

    # There will be m different fits. fit_result should just average all of the individual fit_results
    fit_result_temps = []

    X_t_orig, Y_t_orig = get_XY_lagged(X_matr,
                                       Y_matr,
                                       lag,
                                       replace_row=replace_row,
                                       has_reps=has_reps)
    Y_pred_orig, coef_orig, intercept_orig, fit_result_orig = fit_method(
        X_t_orig, Y_t_orig, **kwargs)

    for p in range(n):

        if p != replace_row:

            X_matr_temp = X_matr.copy()

            # replace the predictor row with the randomized row
            X_matr_temp[p] = rand_X_matr[p]

            # X_t_temp is a matrix of T - lag replace_rows where for each row j, i = j + lag-1
            #[A_i, B_i, .... Z_i, A_{i-1}, B_{i-1}, .... Z_{i-1}......  A_{i-lag+1}, B_{i-lag+1}, .... Z_{i-lag+1}

            X_t_temp, Y_t_temp = get_XY_lagged(X_matr_temp,
                                               Y_matr,
                                               lag,
                                               replace_row=replace_row,
                                               has_reps=has_reps)

            # coef_temp is (A_i, B_i, .... Z_i, A_{i-1}, B_{i-1}, .... Z_{i-1}......  A_{i-lag+1}, B_{i-lag+1}, .... Z_{i-lag+1}, 1)

            # we only want the p, p + n,... p + (lag - 1) * n indices.

            t = time.time()
            Y_pred_temp, coef_temp, intercept_temp, fit_result_temp = fit_method(
                X_t_temp, Y_t_temp, **kwargs)

            fit_result_temp["time"] = round(time.time() - t, 3)

            # Not
            coef[p:p + (lag - 1) * n + 1:n] = coef_temp[p:p + (lag - 1) * n +
                                                        1:n].flatten()

            # Store in the list of all coefs
            coef_temps[:, p] = coef_temp.flatten()

            intercept_temps[:, p] = intercept_temp

            fit_result_temps.append(fit_result_temp)

            if verbose:

                print "Original TS: ", X_matr[p]
                print "Randomized TS: ", X_matr_temp[p]

                print "Original X_t: ", X_t_orig[:T - lag, p:-1:n]
                print "Randomized X_t", X_t_temp[:T - lag, p:-1:n]

                print "Same Y_t?", (Y_t_orig == Y_t_temp).all()

                print "Orig coefs: ", coef_orig[p:lag * n + 1:n]
                print "Right around: "
                print coef_orig[p - 1:lag * n + 1:n]
                print coef_orig[p + 1:lag * n + 1:n]

                print "Updated coefs: ", coef_temp[p:lag * n + 1:n]
                print "Right around: "
                print coef_temp[p - 1:lag * n + 1:n]
                print coef_temp[p + 1:lag * n + 1:n]

                print "Updated coefs:", coef
                print "Y_pred_temp", Y_pred_temp

    coef = coef.reshape(n * lag, 1)

    fit_result_temps_df = pd.DataFrame(fit_result_temps)

    fit_result_std_dict = fit_result_temps_df.std()

    fit_result = fit_result_temps_df.mean().to_dict()

    keys = fit_result.keys()

    for key in keys:
        fit_result[key + "_mean"] = fit_result[key]  # LEFT OFF HERE 1/25
        del fit_result[key]
        fit_result[key + "_std"] = fit_result_std_dict[key]

    return coef, fit_result, coef_temp, coef_temps, intercept_temps