def merlin(E,R, h=0.6, p=5, r=4, initial_number=300, **kwargs): if E.min().min() <= 0: E = E + np.abs(E.min().min()) + 0.1 print(E.min().min()) R = sorted(list(set(R) & set(E.columns))) with TemporaryDirectory() as tmpdir: E.to_csv(tmpdir + "/E.csv", sep="\t", index=False) with open(tmpdir + "/R.csv", "w") as outfile: outfile.write("\n".join(R)) modules = agglom(E, number=initial_number) labels = convert_modules2labels(modules, E.columns) with open(tmpdir + "/clusterassign.csv", "w") as outfile: outfile.write("\n".join([g + "\t" + str(label) for g,label in labels.items()])) # PERSOFTWARELOCATION is the location in which the software is installed binary = os.environ["PERSOFTWARELOCATION"] + "/gpdream/modules/Merlin/src/merlin" command = "{binary} -d {tmpdir}/E.csv -o {tmpdir} -l {tmpdir}/R.csv -c {tmpdir}/clusterassign.csv -v 1 -h {h} -k 300 -p {p} -r {r}".format(**locals()) print(command) sp.call(command, shell=True) labels = pd.read_csv(tmpdir + "/fold0/modules.txt", sep="\t", squeeze=True, index_col=0, header=None) modules = convert_labels2modules(labels, labels.index) print(labels) netable = pd.read_csv(tmpdir + "/fold0/prediction_k300.txt", sep="\t", names=["regulator", "target", "score"]) wnet = pd.DataFrame(0, columns=R, index=E.columns) for i, (regulator, target, score) in netable.iterrows(): wnet.ix[target, regulator] = score return modules, wnet
def test_noiselabels(self): labels = [0,-1,0,1,1,2,2,-1] self.assertSequenceEqual([len(module) for module in clustering.convert_labels2modules(labels, range(len(labels)), -1)], [2,2,2])
def test_functionality(self): labels = [0,1,0,2,2,3,4] self.assertSequenceEqual([len(module) for module in clustering.convert_labels2modules(labels, range(len(labels)))], [2, 1, 2, 1, 1])