def GSLR(pathway_id_and_filepath_and_nodes_and_edges_and_costs):

    pathway_id, filepath, nodes, edges, costs = pathway_id_and_filepath_and_nodes_and_edges_and_costs

    # we had done dataset.to_csv(filename, index=True, header=True)
    dataset = pd.read_csv(filepath, index_col=0)
    y = LabelEncoder().fit_transform(dataset.index.tolist())

    dataset = dataset.transpose().reindex(index=nodes).transpose()
    X = dataset.values

    d = len(nodes)
    c = 2

    graph_opts = gslr.GraphOptions(edges=edges,
                                   root=-1,
                                   num_clusters=1,
                                   pruning='strong')

    sparsity_low = 50
    sparsity_high = 100

    verbosity_level = 0

    num_steps = 35
    possible_steps = np.array([0.03, 0.1, 0.3])
    steps = np.tile(possible_steps, (num_steps, 1))

    features = []
    accuracies = []

    for train, test in StratifiedKFold(n_splits=10).split(X, y):

        W0 = np.zeros((c, d))

        W_hat, losses = gslr.gslr(X[train],
                                  y[train],
                                  W0,
                                  sparsity_low,
                                  sparsity_high,
                                  graph_opts,
                                  steps,
                                  verbosity_level,
                                  edge_costs=costs,
                                  edge_costs_multiplier=2)

        yhat = gslr.predict(X[test], W_hat)
        num_cor = gslr.num_correct(y[test], yhat)
        accuracy = num_cor / float(len(test))

        features.append(W_hat[0])
        accuracies.append(accuracy)

    features = pd.DataFrame(features, columns=dataset.columns)
    features = features.columns[(features != 0).any()].tolist()

    return pathway_id, accuracies, features
Exemple #2
0
repo_path = '/scratch/users/lenail/gslr/'
interactome_path = repo_path + 'experiments/algorithms/pcsf/inbiomap_temp.tsv'

sys.path.append(repo_path + 'gslr/')
import gslr



### V. Graph-Sparse Logistic Regression

def GSLR(X, y)

	d = len(nodes)
	c = 2

	graph_opts = gslr.GraphOptions(edges=edges, root=-1, num_clusters=1, pruning='strong')

	sparsity_low = 150
	sparsity_high = 400

	verbosity_level = 1

	num_steps = 100
	possible_steps = np.array([0.03, 0.1, 0.3])
	steps = np.tile(possible_steps, (num_steps, 1))

	W0 = np.zeros((c, d))

	W_hat, losses = gslr.gslr(X, y, W0, sparsity_low, sparsity_high, graph_opts, steps, verbosity_level, edge_costs=inbiomap_experimentally.cost.values, edge_costs_multiplier=6)

	yhat = gslr.predict(X, W_hat)