def convert_biclustermatrix2biclusters(rows, columns, E): bics = [] for column, row in zip(columns, rows): print(column) print(row) bic = Bicluster(E.columns[column != 0], E.index[row != 0]) if bic.size() > 0: bics.append(bic) return bics
def opsm(E, l=2, **kwargs): with TemporaryDirectory() as tmpdir: pd.DataFrame(standardize(E)).T.to_csv(tmpdir + "E.csv", index=0, header=0, sep=" ") output_location = os.path.abspath(tmpdir + "/output.txt") # PERSOFTWARELOCATION is the location in which the software is installed binary = "java -XX:ParallelGCThreads=1 -Xmx1G -jar " + os.environ["PERSOFTWARELOCATION"] + "/OPSM/OPSM.jar" command = "{binary} {E_location} {nG} {nC} {output_location} {l}".format( binary=binary, E_location = os.path.abspath(tmpdir + "E.csv"), nG = str(len(E.columns)), nC = str(len(E.index)), output_location = output_location, l = str(l) ) print(command) sp.call(command, shell=True) bics = [] with open(os.path.abspath(output_location) , "r") as infile: lines = infile.readlines() for line1, line2, _ in zip(lines[::3], lines[1::3], lines[2::3]): if len(line1) > 0 and len(line2) > 0: gids = [int(gid) for gid in line1.strip().split(" ")] cids = [int(cid) for cid in line2.strip().split(" ")] bics.append(Bicluster(E.columns[gids].tolist(), E.index[cids].tolist())) return bics
def msbe(E, alpha=0.4, beta=0.5, gamma=1.2, refgene="random 500", refcond="random 20", **kwargs): with TemporaryDirectory() as tmpdir: standardize(E).to_csv(tmpdir + "/E.csv", sep="\t") binary = "sh " + os.environ[ "PERSOFTWARELOCATION"] + "/MSBE_linux_1.0.5/additiveBi" command = "{binary} {tmpdir}/E.csv {alpha} {beta} {gamma} {refgene} {refcond} {tmpdir}/results.txt".format( **locals()) sp.call(command, shell=True) bics = [] with open(tmpdir + "/results.txt", "r") as infile: lines = infile.readlines() for _, line1, line2 in zip(lines[::3], lines[1::3], lines[2::3]): if len(line1) > 0 and len(line2) > 0: print(line1, line2) cids = [int(cid) - 1 for cid in line1.strip().split(" ")] gids = [int(gid) - 1 for gid in line2.strip().split(" ")] bics.append( Bicluster(E.columns[gids].tolist(), E.index[cids].tolist())) return bics
def spectral_biclust(E, ngenes=3, nconditions=1, spectral_method="bistochastic", n=6, n_best_ratio=0.5, **kwargs): n_best = max([int(n*n_best_ratio), 1]) spectral = SpectralBiclustering(n_clusters=(nconditions,ngenes), method=spectral_method, n_components=n, n_best=n_best) spectral.fit(standardize(E)) bics = [] for columns, rows in zip(spectral.columns_, spectral.rows_): genes = E.columns[columns] conditions = E.index[rows] bics.append(Bicluster(genes, conditions)) return bics
def fabia(E, n=13, alpha=0.01, cyc=500, spl=0., spz=0.5, non_negative=0, random=1, center=2, norm=1, scale=0, lap=1, nL=0, lL=0, bL=0, thresZ=0.5, thresL=None, **kwargs): importr("fabia") rfabia = ro.r["fabia"] if thresL is None or thresL == "None": thresL = ro.NULL rresults = rfabia( standardize(E).as_matrix().T, n, alpha, cyc, spl, spz, non_negative, random, center, norm, scale, lap, nL, lL, bL) rresults_extracted = ro.r["extractBic"](rresults, thresZ, thresL).rx2("bic") bics = [] for i in range(1, rresults_extracted.nrow): if np.min(rresults_extracted.rx2( i, "binp")) > 0: # number of rows and columns > 0 gids = [ int(g[len("gene"):]) - 1 for g in rresults_extracted.rx2(i, "bixn") ] cids = [ int(g[len("sample"):]) - 1 for g in rresults_extracted.rx2(i, "biypn") ] bics.append( Bicluster(E.columns[gids].tolist(), E.index[cids].tolist())) return bics
def biforce(E, m="o", t=0, **kwargs): with TemporaryDirectory() as tmpdir: E_location = tmpdir + "/E.csv" output_location = tmpdir + "/output.txt" standardize(E).T.to_csv(tmpdir + "/E.csv", index=False, header=False, sep="\t") binary = "java -XX:ParallelGCThreads=1 -Xmx12G -jar mbiforce.jar" command = "{binary} -i={i} -o={o} -m={m} -h=false -t={t}".format( binary=binary, i=os.path.abspath(E_location), o=os.path.abspath(output_location), m=m, t=str(t)) original_wd = os.getcwd() try: os.chdir( os.environ["PERSOFTWARELOCATION"] + "/BiForceV2/" ) # change working directory because biclue only looks for the parameter.ini file in the current working directory (...) sp.call(command, shell=True) except BaseException as e: raise e finally: os.chdir(original_wd) bics = [] with open(output_location) as infile: for line in infile.readlines()[1:-1]: line = line.strip().split(",") genes = [] conditions = [] for rowcol in line: if rowcol[0] == "R": genes.append(E.columns[int(rowcol[1:]) - 1]) elif rowcol[0] == "C": conditions.append(E.index[int(rowcol[1:]) - 1]) bics.append(Bicluster(genes, conditions)) return bics