def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) logging.info("Loading disease associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading network...") self.network = Network(self.params["ppi_network"]) self.degrees = np.array(list(dict(self.network.nx.degree()).values())) logging.info("Loading weights...") with open(os.path.join(params["model_path"], "models", "models.tar"), "rb") as f: split_to_model = pickle.load(f) self.ci_weights = ci_weights = np.mean([model['ci_weight'][0, 0].numpy() for model in split_to_model.values()], axis=0) self.ci_weights_norm = self.ci_weights / np.sqrt(self.degrees) logging.info("Loading drugs...") self.drug_to_targets = load_drug_targets(params["drug_targets_path"])
def __init__(self, dir, params): """ Initialize the disease protein prediction experiment Args: dir (string) The directory where the experiment should be run """ super().__init__(dir, params) # set the logger set_logger(os.path.join(dir, 'experiment.log'), level=logging.INFO, console=True) # log Title logging.info("Node set expansion evaluation") logging.info( "Sabri Eyuboglu, Marinka Zitnik and Jure Leskovec -- SNAP Group") logging.info("======================================") # load data from params file logging.info("Loading Network...") self.network = Network(self.params["ppi_network"], remove_nodes=self.params.get("remove_nodes", 0), remove_edges=self.params.get("remove_edges", 0)) logging.info("Loading Associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) # load method self.params["method_params"]["dir"] = dir self.method = globals()[self.params["method_class"]]( self.network, self.diseases_dict, self.params["method_params"])
def __init__(self, dir, params): """ """ super().__init__(dir, params) set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) logging.info("Loading disease associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading network...") self.network = Network(self.params["ppi_network"]) self.degrees = np.array(list(dict(self.network.nx.degree()).values())) logging.info("Loading weights...") with open(os.path.join(params["model_path"], "models", "models.tar"), "rb") as f: split_to_model = pickle.load(f) self.ci_weights = ci_weights = np.mean([model['ci_weight'][0, 0].numpy() for model in split_to_model.values()], axis=0) self.ci_weights_norm = self.ci_weights / np.sqrt(self.degrees) logging.info("Loading enrichment study...") geneid2go = read_ncbi_gene2go("data/go/gene2go.txt", taxids=[9606]) obodag = GODag("data/go/go-basic.obo") self.go_study = GOEnrichmentStudy(self.network.get_names(), geneid2go, obodag, propagate_counts = True, alpha = 0.05, methods = ['fdr_bh'])
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # Set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) # Log title logging.info("Disease Protein Prediction") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Disease Associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading Network...") self.network = Network(self.params["ppi_network"]) self.params["method_params"]["dir"] = dir self.method = globals()[self.params["method_class"]](self.network, self.diseases_dict, self.params["method_params"])
def _load_data(self): """ """ logging.info("Loading Disease Associations...") self.diseases_dict = load_diseases(self.params["associations_path"], exclude_splits=["none"]) print(len(self.diseases_dict)) logging.info("Loading Results...") method_to_metrics = {} for name, exp_dir in self.params["method_exp_dirs"].items(): metrics = pd.read_csv(os.path.join(exp_dir, "metrics.csv"), index_col=0) print(len(metrics)) method_to_metrics[name] = metrics self.method_to_metrics = method_to_metrics
def _run(self): """ Run the experiment. """ logging.info("Loading network...") network = Network(self.params["ppi_network"]) logging.info("Loading molecule associations...") associations = {} for association_path in self.params["association_paths"]: dct = load_diseases(association_path) associations.update(dct) association_matrix, _ = build_disease_matrix(associations, network) association_jaccard = compute_jaccard(association_matrix.T) mi_matrix = mi_matrix = load_network_matrices( {"mi": self.params["mi_dir"]}, network=network)["mi"] mi_values = mi_matrix[np.triu_indices(mi_matrix.shape[0], k=1)] adj_values = network.adj_matrix[np.triu_indices( network.adj_matrix.shape[0], k=1)] jaccard_values = association_jaccard[np.triu_indices( association_jaccard.shape[0], k=1)] k = adj_values.sum().astype(int) statistic, pvalue = ttest_rel( jaccard_values[np.argpartition(mi_values, -k)[-k:]], jaccard_values[np.argpartition(adj_values, -k)[-k:]], ) metrics = { "test": "ttest_rel", "statistic": statistic, "pvalue": pvalue, "mi_mean": jaccard_values[np.argpartition(mi_values, -k)[-k:]].mean(), "adj_mean": jaccard_values[np.argpartition(adj_values, -k)[-k:]].mean(), } with open(os.path.join(self.dir, "results.json"), "w") as f: json.dump(metrics, f, indent=4)
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # Set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) # Log title logging.info("Disease Protein Prediction") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Disease Associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading Network...") self.network = Network(self.params["ppi_network"]) logging.info("Loading enrichment study...") obodag = GODag(self.params["go_path"]) geneid2go = read_ncbi_gene2go(self.params["gene_to_go_path"], taxids=[9606]) self.enrichment_study = GOEnrichmentStudy(self.network.get_names(), geneid2go, obodag, log=None, **self.params["enrichment_params"]) logging.info("Loading predictions...") self.method_to_preds = {name: pd.read_csv(os.path.join(preds, "predictions.csv"), index_col=0) for name, preds in self.params["method_to_preds"].items()} outputs_path = os.path.join(self.dir, "outputs.pkl") if os.path.exists(outputs_path): logging.info("Loading outputs...") with open(outputs_path, 'rb') as f: self.outputs = pickle.load(f) else: self.outputs = {}
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # Set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) # Log title logging.info("Metric Significance of Diseases in the PPI Network") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Disease Associations...") self.diseases = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none'])
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) logging.info("Loading disease associations...") self.diseases_dict = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading network...") self.network = Network(self.params["ppi_network"]) self.degrees = np.array(list(dict(self.network.nx.degree()).values()))
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super().__init__(dir, params) # Set the logger set_logger(os.path.join(self.dir, 'experiment.log'), level=logging.INFO, console=True) # Log title logging.info("Metric Significance of Diseases in the PPI Network") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Disease Associations...") self.diseases = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) logging.info("Loading Network...") self.network = Network(self.params["ppi_network"]) logging.info("Loading Predictions...") self.method_to_preds = { name: pd.read_csv(os.path.join(preds, "predictions.csv"), index_col=0) for name, preds in self.params["method_to_preds"].items() } logging.info("Loading Protein Data...") self.field_to_protein_data = { field: load_mapping(path=config["path"], **config["args"]) for field, config in self.params["field_to_protein_data"].items() }
def __init__(self, dir, params): """ Constructor Args: dir (string) directory of the experiment to be run """ super(Aggregate, self).__init__(dir, params) # Set the logger self.params = params # Unpack parameters self.experiments = self.params["experiments"] self.groups_columns = self.params["groups_columns"] # Log title logging.info("Aggregating Experiments") logging.info("Sabri Eyuboglu -- SNAP Group") logging.info("======================================") logging.info("Loading Disease Associations...") self.diseases = load_diseases(self.params["associations_path"], self.params["disease_subset"], exclude_splits=['none']) print(len(self.diseases))
def _load_data(self): """ """ logging.info("Loading Disease Associations...") self.diseases_dict = load_diseases(self.params["associations_path"])