def modevalworker(setting, scores, baseline): if baseline: baselines = pd.read_csv("../results/modeval_knownmodules/baseline.tsv", index_col=[0, 1, 2]) modules = Modules( json.load(open("../" + setting["output_folder"] + "modules.json"))) runinfo = json.load(open("../" + setting["output_folder"] + "runinfo.json")) dataset = json.load(open("../" + setting["dataset_location"])) settingscores = [] for regnet_name in dataset["knownmodules"].keys(): for knownmodules_name in dataset["knownmodules"][regnet_name].keys(): # baselinesoi = { # baseline_name:baseline.ix[(dataset["baselinename"], regnet_name, knownmodules_name)].to_dict() # for baseline_name, baseline in baselines.items() # } if baseline: baselineoi = { baseline_name: baseline.loc[(baseline_name, regnet_name, knownmodules_name)].to_dict() for baseline_name, baseline in baselines.groupby( level="baseline_name") } else: baselineoi = None knownmodules_location = dataset["knownmodules"][regnet_name][ knownmodules_name] knownmodules = Modules( json.load(open("../" + knownmodules_location))) settingscores_goldstandard = modevalscorer(modules, knownmodules, baselineoi) settingscores_goldstandard["settingid"] = setting["settingid"] settingscores_goldstandard["knownmodules_name"] = knownmodules_name settingscores_goldstandard["regnet_name"] = regnet_name settingscores_goldstandard[ "goldstandard"] = regnet_name + "#" + knownmodules_name settingscores_goldstandard["runningtime"] = runinfo["runningtime"] settingscores.append(settingscores_goldstandard) scores[setting["settingid"]] = settingscores
def modevalworker(setting, scores): baseline_names = ["permuted", "sticky", "scalefree"] baselines = { baseline_name: pd.read_table("../results/modeval_knownmodules/baselines_" + baseline_name + ".tsv", index_col=[0, 1, 2]) for baseline_name in baseline_names } modules = Modules( json.load(open("../" + setting["output_folder"] + "modules.json"))) runinfo = json.load(open("../" + setting["output_folder"] + "runinfo.json")) dataset = json.load(open("../" + setting["dataset_location"])) settingscores = [] for regnet_name in dataset["knownmodules"].keys(): for knownmodules_name in dataset["knownmodules"][regnet_name].keys(): baselinesoi = { baseline_name: baseline.ix[(dataset["baselinename"], regnet_name, knownmodules_name)].to_dict() for baseline_name, baseline in baselines.items() } knownmodules_location = dataset["knownmodules"][regnet_name][ knownmodules_name] knownmodules = Modules( json.load(open("../" + knownmodules_location))) settingscores_goldstandard = modevalscorer(modules, knownmodules, baselinesoi) settingscores_goldstandard["settingid"] = setting["settingid"] settingscores_goldstandard["knownmodules_name"] = knownmodules_name settingscores_goldstandard["regnet_name"] = regnet_name settingscores_goldstandard[ "goldstandard"] = regnet_name + "#" + knownmodules_name settingscores_goldstandard["runningtime"] = runinfo["runningtime"] settingscores.append(settingscores_goldstandard) scores[setting["settingid"]] = settingscores
def blockcluster(E, ngenes=10, nconditions=10, **kwargs): importr("blockcluster") print(ngenes, nconditions) rresults = ro.r["coclusterContinuous"](ro.r["as.matrix"](standardize(E)), nbcocluster=ro.IntVector([nconditions,ngenes])) modules = Modules(convert_labels2modules(rresults.slots["colclass"], E.columns)) moduleconditions = convert_labels2modules(rresults.slots["rowclass"], E.index) return(modules)
def modeval_coverage_worker(setting, scores, verbose=False): dataset = json.load(open("../" + setting["dataset_location"])) baseline_names = ["permuted", "sticky", "scalefree"] baselines = { baseline_name: pd.read_table("../results/modeval_coverage/baselines_" + baseline_name + ".tsv", index_col=[0, 1]) for baseline_name in baseline_names } baselines = { baseline_name: baseline.ix[dataset["baselinename"]] for baseline_name, baseline in baselines.items() } runinfo = json.load(open("../" + setting["output_folder"] + "runinfo.json")) modules = Modules( json.load(open("../" + setting["output_folder"] + "modules.json"))) if verbose: print("▶ " + str(setting["settingid"])) subscores = [] for bound_name, bound_location in dataset["binding"].items(): if bound_location.endswith(".pkl"): bound = pd.read_pickle("../" + bound_location) else: bound = pd.read_table("../" + bound_location, index_col=0, header=[0, 1]) subscores.append(modbindevalscorer(modules, bound)) # calculate the average over different regulatory circuit cutoffs settingscores = pd.DataFrame(subscores).mean().to_dict() for baseline_name, baseline in baselines.items(): settingscores["aucodds_" + baseline_name] = settingscores[ "aucodds"] / baseline["aucodds"].mean() settingscores["settingid"] = setting["settingid"] settingscores["goldstandard"] = "regcircuit" settingscores["runningtime"] = runinfo["runningtime"] scores[setting["settingid"]] = [settingscores] if verbose: print("◼ " + str(setting["settingid"]))
def modenrichevalworker(setting, scores): dataset = json.load(open("../" + setting["dataset_location"])) modules = Modules( json.load(open("../" + setting["output_folder"] + "modules.json"))) settingscores = [] for gsets_name in dataset["gsets"].keys(): gsets_location = dataset["gsets"][gsets_name] membership = pd.read_pickle("../" + gsets_location) connectivity = pd.read_pickle("../" + gsets_location[:-4] + "_connectivity.pkl") settingscores_gsets = modenrichevalscorer(modules, membership, connectivity) settingscores.extend([{ "settingid": setting["settingid"], "scorename": scorename + "#" + gsets_name, "score": score } for scorename, score in settingscores_gsets.items()]) scores[setting["settingid"]] = settingscores
def baseline_permuted(modules, **kwargs): modules = Modules(modules) modules = modules.shuffle() return modules