def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                          options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
Exemple #4
0
    def evaluation_data(self, model):
        try:
            loader = Loader(classname="weka.core.converters.ArffLoader")
            data_test = loader.load_file(self.dataTestName)
            #helper.print_info("Evaluating on data:")
            evaluation = ClusterEvaluation()

            evaluation.set_model(model)
            evaluation.test_model(data_test)
            #print("# clusters: " + str(evaluation.num_clusters))
            #print("# log likelihood: " + str(evaluation.log_likelihood))
            cluster_ass = evaluation.cluster_assignments
            #print("# cluster assignments:\n" + str(cluster_ass))
            f = open("result_data.txt", "w+")
            i = 0
            for ins in data_test:
                stt = "normal"
                if (cluster_ass[i] == 0):
                    stt = "anomaly"
                statement = str(ins) + "," + stt
                #print statement
                f.write(statement + "\n")
                i = i + 1

            f.close()
            return evaluation.cluster_results
        except Exception, e:
            raise e
            print(traceback.format_exc())
Exemple #5
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
Exemple #6
0
    def run_clustering_task7_manual(self,
                                    output_directory,
                                    clusterer_name,
                                    num_clusters,
                                    seed=10):
        data = Instances.copy_instances(self.training_data)
        data.no_class()
        data.delete_first_attribute()

        clusterer_name_short = clusterer_name.replace("weka.clusterers.", "")
        # build a clusterer and output model
        print("\nBuilding " + clusterer_name_short +
              " Clusterer on training data.")
        buildTimeStart = time.time()
        clusterer = Clusterer(
            classname=clusterer_name,
            options=["-N", "" + str(num_clusters), "-S", "" + str(seed)])
        clusterer.build_clusterer(data)

        resultsString = ""
        resultsString = self.print_both(str(clusterer), resultsString)

        buildTimeString = "Clusterer Built in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Evaluate Clusterer
        resultsString = self.print_both("\nClustering data.", resultsString)

        buildTimeStart = time.time()

        evl = ClusterEvaluation()
        evl.set_model(clusterer)
        evl.test_model(self.training_data)

        resultsString = self.print_both("\nCluster results\n", resultsString)
        resultsString = self.print_both(str(evl.cluster_results),
                                        resultsString)

        resultsString = self.print_both("\nClasses to clusters\n",
                                        resultsString)
        resultsString = self.print_both(str(evl.classes_to_clusters),
                                        resultsString)

        buildTimeString = "\nClustered data in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Save Results and Cleanup
        self.save_results(
            clusterer_name_short + "_" + "N" + str(num_clusters) + "_S" +
            str(seed), resultsString, output_directory)
Exemple #7
0
    def run_SKMeans_137(self):
        
        #construct output paths
        output_prefix = os.path.split(self.input_path)[-1].split(".")[0];
        print(output_prefix);
        write_date = output_prefix + "." + str(datetime.now().date());
        SKMeans_dir = os.path.join(self.output_dir,"SKMeans");
        eval_path = os.path.join(SKMeans_dir, write_date + ".cl_eval.txt");
        clust_desc_path = os.path.join(SKMeans_dir, write_date + ".cl_descr.txt");
        clust_assign_path = os.path.join(SKMeans_dir, write_date + ".cl_assign.txt");
        
        #create output dir if it doesn't already exist
        if(not os.path.exists(SKMeans_dir)):
            os.makedirs(SKMeans_dir);
        
        #clone data and build clusters
#         data_clone = copy.deepcopy(self.data_loaded);
        data_clone = self.data_loaded;
        clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N","137"]);
        clusterer.build_clusterer(data_clone);
        
        #cluster evaluation
        evaluation = ClusterEvaluation();
        evaluation.set_model(clusterer);
        evaluation.test_model(data_clone);
        with open(eval_path, 'w') as outfile:
            outfile.write("number of clusters: \t" + str(evaluation.num_clusters) + "\n");
            outfile.write("log likelihood: \t" + str(evaluation.num_clusters) + "\n");
            outfile.write("cluster assignments: \t" + str(evaluation.cluster_assignments) + "\n");
            outfile.write("***********************\n")
            outfile.write("\t".join(["SKmeans Cluster Evaluation Results\n"])); #header
            outfile.write(str(evaluation.cluster_results) + "\n");
        
        #cluster Instance objects Description of clusters
        with open(clust_desc_path, 'w') as outfile:
            outfile.write(",".join(["cluster_num","distribution\n"])); #header
            for inst in data_clone:    # data
                cl = clusterer.cluster_instance(inst); # 0-based cluster index
                dist = clusterer.distribution_for_instance(inst); #cluster membership distribution
                outfile.write(",".join([str(cl),str(dist)]));
                outfile.write("\n");
     
        #cluster assignment by row
        with open(clust_assign_path, 'w') as outfile:
            outfile.write(",".join(["row_num","SKMeans\n"])); #header
            for i, inst in enumerate(evaluation.cluster_assignments):    # data
                outfile.write(",".join([str(i),str(inst)]));
                outfile.write("\n");
        
        
        return();
        
def run_clusterer(file):
    # Get filename from Pathlib object
    filename = file.parts[-1]
    dir = file.parents[0]

    print("Running Clusterer on %s" % filename)

    if not filename.endswith(".arff"):
        print("%s not ARFF file." % filename)
        return

    # Removes '.arff' from filename
    filename_base = filename[:-5]

    # Load data with class as first attr
    full = load_Arff_file(file)
    full.class_is_first()

    full_withoutclass = load_Arff_file(file)
    #data.delete_first_attribute()

    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_first_attribute()

    dir = dir / "cluster_results_optimum"
    dir.mkdir(parents=True, exist_ok=True)
    # Init clusterer

    #"-N", "-1",
    n = "2"

    if (filename_base.startswith("fer2018_")):
        print("Changing number of clusters to 7")
        n = "7"

#clusterer = Clusterer(classname="weka.clusterers.EM", options=[ "-S", "10", "-N", n])
#clusterer = Clusterer(classname="weka.clusterers.FarthestFirst", options=[ "-S", "10", "-N", n])
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                          options=["-S", "10", "-N", n])
    clusterer.build_clusterer(data)

    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(full)

    str1 = str(filename_base) + "_cl_res.txt"

    output_results = dir / str1
    output_cluster(evaluation, output_results)
Exemple #9
0
    def run_cluster_simplek(self,
                            output_directory,
                            exc_class=False,
                            num_clusters=7):
        data = Instances.copy_instances(self.training_data)
        data.no_class()
        data.delete_first_attribute()

        # build a clusterer and output model
        print("\nBuilding Clusterer on training data.")
        buildTimeStart = time.time()
        clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                              options=["-N", "" + str(num_clusters)])
        clusterer.build_clusterer(data)

        resultsString = ""
        resultsString = self.print_both(str(clusterer), resultsString)

        buildTimeString = "Clusterer Built in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Evaluate Clusterer
        resultsString = self.print_both("\nClustering data.", resultsString)

        buildTimeStart = time.time()

        clsexc = ""
        if (exc_class):
            # no class attribute
            clsexc = "_NO_Class"
            evl = ClusterEvaluation()
            evl.set_model(clusterer)
            evl.test_model(data)
        else:
            # classes to clusters
            evl = ClusterEvaluation()
            evl.set_model(clusterer)
            evl.test_model(self.training_data)

        resultsString = self.print_both("\nCluster results\n", resultsString)
        resultsString = self.print_both(str(evl.cluster_results),
                                        resultsString)

        resultsString = self.print_both("\nClasses to clusters\n",
                                        resultsString)
        resultsString = self.print_both(str(evl.classes_to_clusters),
                                        resultsString)

        buildTimeString = "\nClustered data in " + str(
            time.time() - buildTimeStart) + " secs.\n"
        resultsString = self.print_both(buildTimeString, resultsString)

        #Save Results and Cleanup
        self.save_results("SimpleKM" + clsexc + "_", resultsString,
                          output_directory)
Exemple #10
0
# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.set_inputformat(data)
filtered = flt.filter(data)

# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.get_cluster_results())
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)

# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
             options=["-W", "weka.clusterers.SimpleKMeans -N 3"])
flt.set_inputformat(filtered)
addcl = flt.filter(filtered)
print(addcl)

# classes-to-clusters evaluation
print("\n--> Classes to clusters\n")
Exemple #11
0
# load iris
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
flt.inputformat(data)
filtered = flt.filter(data)

# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.cluster_results)
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)

# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
             options=["-W", "weka.clusterers.SimpleKMeans -N 3"])
flt.inputformat(filtered)
addcl = flt.filter(filtered)
print(addcl)

# classes-to-clusters evaluation
print("\n--> Classes to clusters\n")
Exemple #12
0
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# build KMeans
seeds = [-1, 11, 12]
for seed in seeds:
    if seed == -1:
        seedStr = "default"
    else:
        seedStr = str(seed)
    print("\n--> SimpleKMeans - seed " + seedStr + "\n")
    cl = Clusterer("weka.clusterers.SimpleKMeans")
    if seed != -1:
        cl.options = ["-S", str(seed)]
    cl.build_clusterer(data)
    evl = ClusterEvaluation()
    evl.set_model(cl)
    evl.test_model(data)
    print(evl.cluster_results)

# build XMeans
print("\n--> XMeans\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.RemoveType",
             options=["-T", "numeric", "-V"])
flt.inputformat(data)
filtered = flt.filter(data)
cl = Clusterer(classname="weka.clusterers.XMeans")
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
data.delete_attribute(2)

#saver.save_file(data,"data.arff")
num_clusters = "6"   #Number of clusters for k mean

##Performing clustering
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", num_clusters])
clusterer.build_clusterer(data)

for inst in data:
    cl = clusterer.cluster_instance(inst)  # 0-based cluster index
    dist = clusterer.distribution_for_instance(inst)   # cluster membership distribution
    #print("cluster=" + str(cl) + ", distribution=" + str(dist))

#########Getting the data about the clustered instances
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print evaluation.cluster_results
#print("# clusters: " + str(evaluation.num_clusters))
#print("log likelihood: " + str(evaluation.log_likelihood))
#print("cluster assignments:\n" + str(evaluation.cluster_assignments))
#plc.plot_cluster_assignments(evaluation, data,[],True)

####Using WEKA files to get the required results by calling them through this script

#########Calling the WEKA GUI to display the clusters
subprocess.call(["java" ,"-classpath", ".:weka.jar", "VisualizeClusterAssignments" ,"-t", "data.arff" ,"-W", "weka.clusterers.SimpleKMeans -N 6"]) ## Change the num_clusters here

#########Accuracy for clustering when target is serviceID
subprocess.call(["python", "clusterers.py", "-t", "data_with_class_serviceID.arff", "-c", "last", "weka.clusterers.SimpleKMeans", "-N", num_clusters])
Exemple #14
0
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# remove class attribute
flt = Filter(classname="weka.filters.unsupervised.attribute.Remove",
             options=["-R", "last"])
flt.inputformat(data)
filtered = flt.filter(data)

# build KMeans
print("\n--> SimpleKMeans\n")
cl = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.cluster_results)
plc.plot_cluster_assignments(evl, data, atts=[], inst_no=True, wait=True)

# use AddCluster filter
print("\n--> AddCluster filter\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.AddCluster",
             options=["-W", "weka.clusterers.SimpleKMeans -N 3"])
flt.inputformat(filtered)
addcl = flt.filter(filtered)
print(addcl)

# classes-to-clusters evaluation
print("\n--> Classes to clusters\n")
Exemple #15
0
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# build KMeans
seeds = [-1, 11, 12]
for seed in seeds:
    if seed == -1:
        seedStr = "default"
    else:
        seedStr = str(seed)
    print("\n--> SimpleKMeans - seed " + seedStr + "\n")
    cl = Clusterer("weka.clusterers.SimpleKMeans")
    if seed != -1:
        cl.set_options(["-S", str(seed)])
    cl.build_clusterer(data)
    evl = ClusterEvaluation()
    evl.set_model(cl)
    evl.test_model(data)
    print(evl.get_cluster_results())

# build XMeans
print("\n--> XMeans\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.RemoveType", options=["-T", "numeric", "-V"])
flt.set_inputformat(data)
filtered = flt.filter(data)
cl = Clusterer(classname="weka.clusterers.XMeans")
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.get_cluster_results())
Exemple #16
0
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# build KMeans
seeds = [-1, 11, 12]
for seed in seeds:
    if seed == -1:
        seedStr = "default"
    else:
        seedStr = str(seed)
    print("\n--> SimpleKMeans - seed " + seedStr + "\n")
    cl = Clusterer("weka.clusterers.SimpleKMeans")
    if seed != -1:
        cl.options = ["-S", str(seed)]
    cl.build_clusterer(data)
    evl = ClusterEvaluation()
    evl.set_model(cl)
    evl.test_model(data)
    print(evl.cluster_results)

# build XMeans
print("\n--> XMeans\n")
flt = Filter(classname="weka.filters.unsupervised.attribute.RemoveType", options=["-T", "numeric", "-V"])
flt.inputformat(data)
filtered = flt.filter(data)
cl = Clusterer(classname="weka.clusterers.XMeans")
cl.build_clusterer(filtered)
evl = ClusterEvaluation()
evl.set_model(cl)
evl.test_model(filtered)
print(evl.cluster_results)