def _ica_fdrtool_signed(E, source, qvalcutoff): importr("fdrtool") rfdrtool = ro.r["fdrtool"] modules = [] print("qvalcutoff: " + str(qvalcutoff)) for source_row in source.T: rresults = rfdrtool(ro.FloatVector(source_row), plot=False, cutoff_method="fndr", verbose=False) qvals = np.array(rresults.rx2("qval")) genes = E.columns[(qvals < qvalcutoff) & (source_row > source_row.mean())] modules.append(Module(genes)) genes = E.columns[(qvals < qvalcutoff) & (source_row < source_row.mean())] modules.append(Module(genes)) return modules
def _ica_perccutoff(E, source, perccutoff): modules = [] for source_row in source.T: sortedgenes = E.columns[source_row.argsort()] genes = sortedgenes[:int(round(len(E.columns) * perccutoff))] modules.append(Module(genes)) genes = sortedgenes[-int(round(len(E.columns) * perccutoff)):] modules.append(Module(genes)) return modules
def _ica_zscore(E, source, stdcutoff): modules = [] for source_row in source.T: genes = E.columns[source_row < -source_row.std() * stdcutoff].tolist() + E.columns[source_row > +source_row.std() * stdcutoff].tolist() modules.append(Module(genes)) return modules
def flame(E, knn=10, threshold=-1, threshold2=-3.0, steps=500, **kwargs): with TemporaryDirectory() as tmpdir: with open(tmpdir + "/E.csv", "w") as outfile: outfile.write(str(E.shape[1]) + " " + str(E.shape[0]) + "\n") standardize(E).T.to_csv(outfile, index=False, header=False, sep=" ") binary = os.environ["PERSOFTWARELOCATION"] + "/flame/sample" command = "{binary} {tmpdir}/E.csv {knn} {threshold2} {steps} {threshold}".format( **locals()) process = sp.Popen(command, shell=True, stdout=sp.PIPE) out, err = process.communicate() modules = [] for row in out.decode().split("\n"): if row.startswith("Cluster") and "outliers" not in row: gids = row[row.index(":") + 1:].split(",") if gids[0] != "": module = Module([E.columns[int(gid)] for gid in gids]) modules.append(module) return modules
def genomica(E, R, n=100, **kwargs): E_genomica = pd.DataFrame(np.vstack([np.array([["desc" for i in range(len(E.columns))], ["desc" for i in range(len(E.columns))]]), scale(E)]), index=["desc", "desc"] + E.index.tolist(), columns=E.columns).T E_genomica.index.name = "genes" with TemporaryDirectory() as tmpdir: E_genomica.to_csv(tmpdir + "/E.csv", sep="\t") R_genomica = {gid:g for gid, g in enumerate(E_genomica.index) if g in R and g in E.columns} with open(tmpdir + "/regulators.csv", "w") as outfile: outfile.write("\n".join([str(gid) for gid in R_genomica.keys()])) # PERSOFTWARELOCATION is the location in which the software is installed genomica_loc = os.environ["PERSOFTWARELOCATION"] + "/Genomica/new/" genomica_command = "cd {genomica_loc};java -XX:ParallelGCThreads=1 -Xmx12G -cp .:../Genomica.jar ExampleProgram ".format(**locals()) n = int(n) command = genomica_command + "{tmpdir}/E.csv {tmpdir}/regulators.csv {n} 10 > {tmpdir}/output.txt".format(**locals()) sp.call(command, shell=True) # postprocess output file state = "members" modules = [] modulenet = [] with open(tmpdir + "/output.txt") as infile: for line in infile.readlines(): line = line.rstrip() if line.startswith("Module members: "): moduleregulator_scores = defaultdict(float) state = "members" module = Module([E.columns[int(geneid)] for geneid in line[len("Module members: "):].split(" ")]) print("----") print(module) print(len(modules)) elif line.startswith("<<<<<PROGRAM"): state = "program" elif line.find("Regulator: ") > -1: line = line.split(",") regulatorid = int(line[0].split(" ")[-1]) #print(line[0].split(" ")[-1]) #print(R_genomica[regulatorid], regulatorid, float(line[-1].split(" ")[-1])) moduleregulator_scores[R_genomica[regulatorid]] += float(line[-1].split(" ")[-1]) elif line.startswith(">>>>>MODULE"): #print(moduleregulator_scores) modules.append(module) modulenet.append(moduleregulator_scores) modulenet = pd.DataFrame(modulenet, columns=R).fillna(0) return modules, modulenet
def _ica_tail(E, source, tailcutoff): cutoff = np.percentile(source.flatten(), (1-tailcutoff)*100) modules = [] for source_row in source.T: genes = E.columns[source_row > cutoff].tolist() if len(genes) > 0: modules.append(Module(genes)) return modules
def _ica_fdrtool(E, source, qvalcutoff): # load fdr importr("fdrtool") rfdrtool = ro.r["fdrtool"] modules = [] for source_row in source.T: rresults = rfdrtool(ro.FloatVector(source_row), plot=False, cutoff_method="fndr", verbose=False) qvals = np.array(rresults.rx2("qval")) genes = E.columns[qvals < qvalcutoff] modules.append(Module(genes)) return modules
def cmeans(E, k=100, m="auto", cutoff=0.5, cluster_all=True, **kwargs): importr("Mfuzz") importr("Biobase") Exprs = ro.r["ExpressionSet"](ro.r["as.matrix"](standardize(E).T)) if m == "auto": m = ro.r["mestimate"](Exprs) rresults = ro.r["mfuzz"](Exprs, k, m) membership = np.array(rresults.rx2("membership")) modules = [] for membership_cluster in membership.T: genes = E.columns[membership_cluster >= cutoff] modules.append(Module(genes)) return modules