def main(datafile, threshold): filename = "out{}{}.hrc".format(os.sep, os.path.basename(datafile.name)) if not os.path.isfile(filename): header = datafile.readline() collist = [i for i, toggle in enumerate(header.split(",")) if toggle != "0"] datafile.seek(0) data = pd.read_csv(datafile, usecols=collist).as_matrix() pipeline = Pipeline([("clf", Hierarchical())]) pipeline.set_params(**{}) pipeline.fit(data) clf = pipeline.get_params()["clf"] hierarchy = clf.hierarchy_ with open(filename, "wb") as fh: fh.write(ET.tostring(hierarchy.to_xml())) else: with open(filename, "rb") as fh: hierarchy = Cluster.from_xml(ET.parse(fh).getroot()) print(ET.tostring(hierarchy.to_xml()).decode("utf-8")) if threshold != None: clusters = hierarchy.cut(threshold) print("\n".join(c.to_str(i) for i, c in enumerate(clusters))) dump_graph(clusters)
def test_from_xml_small2(self): expected = Cluster([ Cluster([np.array([3,0,4])]), Cluster([np.array([0,0,0])]) ]) actual = Cluster.from_xml( ET.fromstring('<tree height="5.0"><leaf data="[3, 0, 4]"/><leaf data="[0, 0, 0]"/></tree>') ) self.assertEqual(actual, expected)
def test_from_xml(self): actual = Cluster.from_xml(self.xml) expected = self.tree self.assertEqual(actual, expected)
def test_from_xml_small(self): expected = Cluster([np.array([7,4,8])]) actual = Cluster.from_xml( ET.fromstring('<tree height="0.0"><leaf data="[7, 4, 8]"/></tree>') ) self.assertEqual(actual, expected)