def _run(self): """ Run the experiment. """ logging.info("Loading Network...") self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network( self.params.ppi_network) logging.info("Loading PPI Matrices...") self.ppi_matrices = load_ppi_matrices(self.params.ppi_matrices) logging.info("Sorting Nodes by Degree...") self.nodes_sorted_by_deg, self.nodes_ranked_by_deg = sort_by_degree( self.ppi_networkx) logging.info("Running Experiment...") self.results = [] if self.params.n_processes > 1: with tqdm(total=len(self.diseases)) as t: p = Pool(self.params.n_processes) for results in p.imap(process_disease_wrapper, self.diseases.values()): self.results.append(results) t.update() else: with tqdm(total=len(self.diseases)) as t: for disease in self.diseases.values(): results = self.process_disease(disease) self.results.append(results) t.update() self.results = pd.DataFrame(self.results)
def _run(self): """ Run the experiment. """ logging.info("Loading Network...") self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network( self.params.ppi_network) self.node_to_protein = { node: protein for protein, node in self.protein_to_node.items() } logging.info("Loading PPI Matrices...") self.ppi_matrices = load_ppi_matrices(self.params.ppi_matrices) logging.info("Building Degree Buckets...") self.degree_to_bucket = build_degree_buckets( self.ppi_networkx, min_len=self.params.min_bucket_len) for degree, bucket in self.degree_to_bucket.items(): print("Degree: {}, Size: {}".format(degree, len(bucket))) logging.info("Running Experiment...") self.results = [] self.indices = [] if self.params.n_processes > 1: with tqdm(total=len(self.diseases)) as t: p = Pool(self.params.n_processes) for indices, results in p.imap(process_disease_wrapper, self.diseases.values()): self.indices.extend(indices) self.results.extend(results) t.update() else: with tqdm(total=len(self.diseases)) as t: for disease in self.diseases.values(): indices, results = self.process_disease(disease) self.indices.extend(indices) self.results.extend(results) t.update() index = pd.MultiIndex.from_tuples(self.indices, names=['disease', 'protein']) self.results = pd.DataFrame(self.results, index=index)
def __init__(self, dir): super(CodiseaseProbExp, self).__init__(dir) # Set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) # Log title logging.info("Co-disease probability in the PPI Network") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Network...") self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network( self.params.ppi_network) logging.info("Loading Disease Associations...") self.diseases = load_diseases(self.params.diseases_path, self.params.disease_subset) # unpack params self.ppi_matrices = { name: np.load(file) for name, file in self.params.ppi_matrices.items() } self.top_k = self.params.top_k self.n_buckets = self.params.n_buckets self.window_length = self.params.window_length self.smooth = self.params.smooth self.plots = self.params.plots if hasattr(self.params, 'codisease_matrix'): logging.info("Loading Codisease Matrix...") self.codisease_matrix = np.load(self.params.codisease_matrix) else: logging.info("Building Codisease Matrix...") self.codisease_matrix = self.build_codisease_matrix()
params = Params(json_path) params.update(json_path) # Set the logger set_logger(os.path.join(args.experiment_dir, 'experiment.log'), level=logging.INFO, console=True) # Log Title logging.info("Complementarity Sharing in Disease Pathways") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") # Load data from params file logging.info("Loading PPI Network...") _, _, protein_to_node = load_network(params.ppi_network) logging.info("Loading Disease Associations...") diseases_dict = load_diseases(params.diseases_path, params.disease_subset) logging.info("Loading PPI Matrices...") ppi_matrices = {} for name, path in params.ppi_matrices.items(): ppi_matrices[name] = np.load(path) #Run Experiment logging.info("Running Experiment...") #Compute mean and standard deviation means = {} stds = {}
# Set the logger set_logger(os.path.join(args.prediction_dir, 'experiment.log'), level=logging.INFO, console=True) # Log Title logging.info("Disease protein Prediction") logging.info("Sabri Eyuboglu -- SNAP Group -- Stanford University") logging.info("======================================") # Load Data logging.info("Loading Diseases...") diseases = load_diseases(params.diseases_path, params.disease_subset) logging.info("Loading PPI Network...") ppi_networkx, ppi_network_adj, protein_to_node = load_network( params.ppi_network) node_to_protein = { node: protein for protein, node in protein_to_node.items() } if (params.method == "ppi_matrix"): logging.info("Loading PPI Matrix...") ppi_matrix = np.load(params.ppi_matrix) elif (params.method == 'gcn'): gcn_method = GCN(params, ppi_network_adj) # Run Predictions if MULTIPROCESS: p = Pool(N_PROCESSES)
def __init__(self, dir): """ Initialize the disease protein prediction experiment Args: dir (string) The directory where the experiment should be run """ super(DPPExperiment, self).__init__(dir) # Set the logger set_logger(os.path.join(args.dir, 'experiment.log'), level=logging.INFO, console=True) # Log Title logging.info("Disease Protein Prediction in the PPI Network") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") # Load data from params file logging.info("Loading PPI Network...") self.ppi_networkx, self.ppi_network_adj, self.protein_to_node = load_network(self.params.ppi_network) self.node_to_protein = {node: protein for protein, node in self.protein_to_node.items()} logging.info("Loading Disease Associations...") self.diseases_dict = load_diseases(self.params.diseases_path, self.params.disease_subset, ['none']) # Load method specific data # TODO: Build class for each method if(self.params.method == "ppi_matrix"): logging.info("Loading PPI Matrix...") self.ppi_matrix = np.load(self.params.ppi_matrix) # normalize columns of ppi_matrix if(self.params.normalize): if hasattr(self.params, "norm_type"): if self.params.norm_type == "frac": self.ppi_matrix = self.ppi_matrix / np.sum(self.ppi_matrix, axis=0) elif self.params.norm_type == "zscore": self.ppi_matrix = (self.ppi_matrix - np.mean(self.ppi_matrix, axis=0)) / np.std(self.ppi_matrix, axis=0) else: self.ppi_matrix = (self.ppi_matrix - np.mean(self.ppi_matrix, axis=0)) / np.std(self.ppi_matrix, axis=0) # zero out the diagonal np.fill_diagonal(self.ppi_matrix, 0) elif (self.params.method == 'lr'): logging.info("Loading Feature Matrices...") self.feature_matrices = [] for features_filename in self.params.features: self.feature_matrices.append( build_embedding_feature_matrix(self.protein_to_node, features_filename)) elif (self.params.method == 'l2_rw'): self.method = L2RandomWalk(self.params) elif (self.params.method == 'pathway_expansion'): self.method = PathwayExpansion(self.params, self.ppi_networkx, self.ppi_network_adj) elif (self.params.method == "learned_cn"): self.method = LearnedCN(self.dir, self.params, self.ppi_network_adj, self.diseases_dict, self.protein_to_node) elif (self.params.method == 'gcn'): self.method = GCN(self.params, self.ppi_network_adj)