Beispiel #1
0
def _ica_fdrtool_signed(E, source, qvalcutoff):
    importr("fdrtool")
    rfdrtool = ro.r["fdrtool"]

    modules = []

    print("qvalcutoff: " + str(qvalcutoff))

    for source_row in source.T:
        rresults = rfdrtool(ro.FloatVector(source_row),
                            plot=False,
                            cutoff_method="fndr",
                            verbose=False)
        qvals = np.array(rresults.rx2("qval"))

        genes = E.columns[(qvals < qvalcutoff)
                          & (source_row > source_row.mean())]

        modules.append(Module(genes))

        genes = E.columns[(qvals < qvalcutoff)
                          & (source_row < source_row.mean())]

        modules.append(Module(genes))
    return modules
def _ica_perccutoff(E, source, perccutoff):
    modules = []

    for source_row in source.T:
        sortedgenes = E.columns[source_row.argsort()]
        genes = sortedgenes[:int(round(len(E.columns) * perccutoff))]
        modules.append(Module(genes))

        genes = sortedgenes[-int(round(len(E.columns) * perccutoff)):]
        modules.append(Module(genes))
    return modules
def _ica_zscore(E, source, stdcutoff):
    modules = []
    for source_row in source.T:
        genes = E.columns[source_row < -source_row.std() * stdcutoff].tolist() + E.columns[source_row > +source_row.std() * stdcutoff].tolist()

        modules.append(Module(genes))
    return modules
Beispiel #4
0
def flame(E, knn=10, threshold=-1, threshold2=-3.0, steps=500, **kwargs):
    with TemporaryDirectory() as tmpdir:
        with open(tmpdir + "/E.csv", "w") as outfile:
            outfile.write(str(E.shape[1]) + " " + str(E.shape[0]) + "\n")
            standardize(E).T.to_csv(outfile,
                                    index=False,
                                    header=False,
                                    sep=" ")

        binary = os.environ["PERSOFTWARELOCATION"] + "/flame/sample"
        command = "{binary} {tmpdir}/E.csv {knn} {threshold2} {steps} {threshold}".format(
            **locals())

        process = sp.Popen(command, shell=True, stdout=sp.PIPE)
        out, err = process.communicate()

        modules = []
        for row in out.decode().split("\n"):
            if row.startswith("Cluster") and "outliers" not in row:
                gids = row[row.index(":") + 1:].split(",")
                if gids[0] != "":
                    module = Module([E.columns[int(gid)] for gid in gids])
                    modules.append(module)

    return modules
def genomica(E, R, n=100, **kwargs):
    E_genomica = pd.DataFrame(np.vstack([np.array([["desc" for i in range(len(E.columns))], ["desc" for i in range(len(E.columns))]]), scale(E)]), index=["desc", "desc"]  + E.index.tolist(), columns=E.columns).T
    E_genomica.index.name = "genes"

    with TemporaryDirectory() as tmpdir:
        E_genomica.to_csv(tmpdir + "/E.csv", sep="\t")
        
        R_genomica = {gid:g for gid, g in enumerate(E_genomica.index) if g in R and g in E.columns}
        with open(tmpdir + "/regulators.csv", "w") as outfile:
            outfile.write("\n".join([str(gid) for gid in R_genomica.keys()]))
        
        # PERSOFTWARELOCATION is the location in which the software is installed
        genomica_loc = os.environ["PERSOFTWARELOCATION"] + "/Genomica/new/"
        genomica_command = "cd {genomica_loc};java -XX:ParallelGCThreads=1 -Xmx12G -cp .:../Genomica.jar ExampleProgram ".format(**locals())
        
        n = int(n)

        command = genomica_command + "{tmpdir}/E.csv {tmpdir}/regulators.csv {n} 10 > {tmpdir}/output.txt".format(**locals())
        
        sp.call(command, shell=True)
        
        # postprocess output file
        state = "members"
        modules = []
        modulenet = []
        with open(tmpdir + "/output.txt") as infile:
            for line in infile.readlines():
                line = line.rstrip()

                if line.startswith("Module members: "):
                    moduleregulator_scores = defaultdict(float)
                    
                    state = "members"
                    module = Module([E.columns[int(geneid)] for geneid in line[len("Module members: "):].split(" ")])
                    print("----")
                    print(module)
                    print(len(modules))
                elif line.startswith("<<<<<PROGRAM"):
                    state = "program"
                elif line.find("Regulator: ") > -1:
                    
                    line = line.split(",")

                    regulatorid = int(line[0].split(" ")[-1])
                    #print(line[0].split(" ")[-1])

                    #print(R_genomica[regulatorid], regulatorid, float(line[-1].split(" ")[-1]))

                    moduleregulator_scores[R_genomica[regulatorid]] += float(line[-1].split(" ")[-1])
                elif line.startswith(">>>>>MODULE"):
                    #print(moduleregulator_scores)
                    
                    modules.append(module)
                    modulenet.append(moduleregulator_scores)

    modulenet = pd.DataFrame(modulenet, columns=R).fillna(0)

    return modules, modulenet
def _ica_tail(E, source, tailcutoff):
    cutoff = np.percentile(source.flatten(), (1-tailcutoff)*100)
    modules = []
    for source_row in source.T:
        genes = E.columns[source_row > cutoff].tolist()

        if len(genes) > 0:
            modules.append(Module(genes))
    return modules
def _ica_fdrtool(E, source, qvalcutoff):
    # load fdr
    importr("fdrtool")
    rfdrtool = ro.r["fdrtool"]

    modules = []

    for source_row in source.T:
        rresults = rfdrtool(ro.FloatVector(source_row), plot=False, cutoff_method="fndr", verbose=False)
        qvals = np.array(rresults.rx2("qval"))

        genes = E.columns[qvals < qvalcutoff]

        modules.append(Module(genes))
    return modules
def cmeans(E, k=100, m="auto", cutoff=0.5, cluster_all=True, **kwargs):
    importr("Mfuzz")
    importr("Biobase")
    Exprs = ro.r["ExpressionSet"](ro.r["as.matrix"](standardize(E).T))
    if m == "auto":
        m = ro.r["mestimate"](Exprs)

    rresults = ro.r["mfuzz"](Exprs, k, m)

    membership = np.array(rresults.rx2("membership"))

    modules = []
    for membership_cluster in membership.T:
        genes = E.columns[membership_cluster >= cutoff]
        modules.append(Module(genes))

    return modules