def GSLR(pathway_id_and_filepath_and_nodes_and_edges_and_costs): pathway_id, filepath, nodes, edges, costs = pathway_id_and_filepath_and_nodes_and_edges_and_costs # we had done dataset.to_csv(filename, index=True, header=True) dataset = pd.read_csv(filepath, index_col=0) y = LabelEncoder().fit_transform(dataset.index.tolist()) dataset = dataset.transpose().reindex(index=nodes).transpose() X = dataset.values d = len(nodes) c = 2 graph_opts = gslr.GraphOptions(edges=edges, root=-1, num_clusters=1, pruning='strong') sparsity_low = 50 sparsity_high = 100 verbosity_level = 0 num_steps = 35 possible_steps = np.array([0.03, 0.1, 0.3]) steps = np.tile(possible_steps, (num_steps, 1)) features = [] accuracies = [] for train, test in StratifiedKFold(n_splits=10).split(X, y): W0 = np.zeros((c, d)) W_hat, losses = gslr.gslr(X[train], y[train], W0, sparsity_low, sparsity_high, graph_opts, steps, verbosity_level, edge_costs=costs, edge_costs_multiplier=2) yhat = gslr.predict(X[test], W_hat) num_cor = gslr.num_correct(y[test], yhat) accuracy = num_cor / float(len(test)) features.append(W_hat[0]) accuracies.append(accuracy) features = pd.DataFrame(features, columns=dataset.columns) features = features.columns[(features != 0).any()].tolist() return pathway_id, accuracies, features
repo_path = '/scratch/users/lenail/gslr/' interactome_path = repo_path + 'experiments/algorithms/pcsf/inbiomap_temp.tsv' sys.path.append(repo_path + 'gslr/') import gslr ### V. Graph-Sparse Logistic Regression def GSLR(X, y) d = len(nodes) c = 2 graph_opts = gslr.GraphOptions(edges=edges, root=-1, num_clusters=1, pruning='strong') sparsity_low = 150 sparsity_high = 400 verbosity_level = 1 num_steps = 100 possible_steps = np.array([0.03, 0.1, 0.3]) steps = np.tile(possible_steps, (num_steps, 1)) W0 = np.zeros((c, d)) W_hat, losses = gslr.gslr(X, y, W0, sparsity_low, sparsity_high, graph_opts, steps, verbosity_level, edge_costs=inbiomap_experimentally.cost.values, edge_costs_multiplier=6) yhat = gslr.predict(X, W_hat)