def load_data(self): print("Building StringDB Graph. It can take a while the first time...") self.proteinlinks = self.datastore + "/graphs/9606.protein.links.detailed.v11.0.txt" savefile = self.datastore + "/graphs/stringdb_graph_" + self.graph_type + "_edges.adjlist" if os.path.isfile(savefile): self.nx_graph = nx.read_adjlist(savefile) else: print(" ensp_to_hugo_map") ensmap = ensp_to_hugo_map(self.datastore) print(" reading self.proteinlinks") edges = pd.read_csv(self.proteinlinks, sep=' ') selected_edges = edges[self.name_to_edge[self.graph_type]] != 0 edgelist = edges[selected_edges][["protein1", "protein2"]].values.tolist() edgelist = [[ensmap[edge[0][5:]], ensmap[edge[1][5:]]] for edge in edgelist if edge[0][5:] in ensmap.keys() and edge[1][5:] in ensmap.keys()] print(" creating OrderedGraph") self.nx_graph = nx.OrderedGraph(edgelist) print(" writing graph") nx.write_adjlist(self.nx_graph, savefile) # Randomize if self.randomize: self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, randmap(self.nx_graph.nodes)) print("Graph built !")
def load_data(self): self.nx_graph = nx.OrderedGraph( nx.readwrite.gpickle.read_gpickle( at.get(self.at_hash, datastore=self.datastore))) # Randomize if self.randomize: self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, randmap(self.nx_graph.nodes))
def load_data(self): dir_path = self.datastore #os.path.dirname(os.path.realpath(__file__)) self.location = os.path.join(dir_path, 'graphs/') pkl_file = os.path.join(self.location, self.filename) if not os.path.isfile(pkl_file): self._process_and_pickle(save_name=pkl_file) self.nx_graph = nx.OrderedGraph(nx.read_gpickle(pkl_file)) # Randomize if self.randomize: self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, randmap(self.nx_graph.nodes))
def __init__(self, relabel_genes=True, datastore=None, randomize=False): if datastore is None: self.datastore = os.path.dirname(os.path.abspath(__file__)) else: self.datastore = datastore self.load_data() self.nx_graph = nx.relabel.relabel_nodes(self.nx_graph, symbol_map(self.nx_graph.nodes)) # Randomize self.randomize = randomize if self.randomize: print("Randomizing the graph") self.nx_graph = nx.relabel.relabel_nodes(self.nx_graph, randmap(self.nx_graph.nodes))
def load_data(self): self.benchmark = self.datastore + "/graphs/HumanNet-XN.tsv" edgelist = pd.read_csv(self.benchmark, header=None, sep="\t", skiprows=1).values[:, :2].tolist() self.nx_graph = nx.OrderedGraph(edgelist) # Map nodes from ncbi to hugo names self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, ncbi_to_hugo_map(self.nx_graph.nodes, datastore=self.datastore)) # Remove nodes which are not covered by the map for node in list(self.nx_graph.nodes): if isinstance(node, float): self.nx_graph.remove_node(node) # Randomize if self.randomize: self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, randmap(self.nx_graph.nodes))
"train_size": train_size, "seed": seed, } print(experiment) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(features, target.to_numpy(), stratify=target.to_numpy(), train_size=train_size, test_size=test_size, shuffle=True, random_state=seed ) if gene_graph.randomize: print("Randomizing the graph") gene_graph.nx_graph = nx.relabel.relabel_nodes(gene_graph.nx_graph, randmap(gene_graph.nx_graph.nodes)) if num_genes == 'all': if 'MLP' in model_name: x_train = X_train.copy() x_test = X_test.copy() adj = None else: neighbors = gene_graph.nx_graph intersection_nodes = np.intersect1d(X_train.columns, neighbors.nodes) x_train = X_train[list(intersection_nodes)].copy() x_test = X_test[list(intersection_nodes)].copy() toremove = set(neighbors.nodes) toremove = toremove.difference(intersection_nodes) neighbors.remove_nodes_from(toremove)
"seed": seed, } print(experiment) X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( features, target.to_numpy(), stratify=target.to_numpy(), train_size=train_size, test_size=test_size, shuffle=True, random_state=seed) if gene_graph.randomize: print("Randomizing the graph") gene_graph.nx_graph = nx.relabel.relabel_nodes( gene_graph.nx_graph, randmap(gene_graph.nx_graph.nodes)) if num_genes == 'all': if 'MLP' in model_name: x_train = X_train.copy() x_test = X_test.copy() adj = None else: neighbors = gene_graph.nx_graph intersection_nodes = np.intersect1d(X_train.columns, neighbors.nodes) x_train = X_train[list(intersection_nodes)].copy() x_test = X_test[list(intersection_nodes)].copy() toremove = set(neighbors.nodes) toremove = toremove.difference(intersection_nodes)