Python Clusterer.cluster_instance Examples, weka.clusterers.Clusterer.cluster_instance Python Examples

Example #1

0

Show file

File: cluster.py Project: whaleforever/carilogo

def assign_cluster(file_location, file_out="clustered.csv", model="kmeans.model", last_filename=False):
    data = read_csv_file(file_location)
    check_jvm()
    # load clusters
    obj = serialization.read(model)
    clusterer = Clusterer(jobject=obj)

    # create file with cluster group
    with open(file_out, 'w') as output:
        for index, attrs in enumerate(data):
            tmp = []
            if last_filename:
                inst = Instance.create_instance(attrs[:-2])
            else:
                inst = Instance.create_instance(attrs[1:])

            pred = clusterer.cluster_instance(inst)
            dist = clusterer.distribution_for_instance(inst)

            if last_filename :
                tmp.append(attrs[-1])
                tmp.append(pred)
                tmp.extend(attrs[:-2])
            else:
                tmp.append(attrs[0])
                tmp.append(pred)
                tmp.extend(attrs[1:])

            print(str(index + 1) + ": label index=" +
                  str(pred) + ", class distribution=" + str(dist))
            output.write('%s\n'%(','.join(map(str,tmp)) ))

Example #2

0

Show file

File: dweka.py Project: DIPET-UVT/EDE-Dipet

 def simpleKMeansTrain(self, dataf, options, mname, temp=True):
     '''
     :param data: -> data to be clustered
     :param options: -> SimpleKMeans options
                   N -> number of clusters
                   A -> Distance function to use (ex: default is "weka.core.EuclideanDistance -R first-last")
                   l -> maximum number of iterations default 500
           num-slots -> number of execution slots, 1 means no parallelism
                   S -> Random number seed (default 10)
           example => ["-N", "10", "-S", "10"]
     :return:
     '''
     try:
         jvm.start(max_heap_size=self.wHeap)
         data = self.loadData(dataf, temp=True)
         clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=options)
         clusterer.build_clusterer(data)
         print(clusterer)
         # cluster the data
         for inst in data:
             cl = clusterer.cluster_instance(inst)  # 0-based cluster index
             dist = clusterer.distribution_for_instance(inst)  # cluster membership distribution
             print(("cluster=" + str(cl) + ", distribution=" + str(dist)))
         self.saveModel(clusterer, 'skm', mname)
     except Exception as e:
         print((traceback.format_exc()))
     finally:
         jvm.stop()

Example #3

0

Show file

File: cluster_data.py Project: xiangyu-sun-789/python-weka-wrapper3-examples

def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)

    # cluster data
    helper.print_info("Clustering data")
    for index, inst in enumerate(data):
        cl = clusterer.cluster_instance(inst)
        dist = clusterer.distribution_for_instance(inst)
        print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist))

Example #4

0

Show file

File: dweka.py Project: igabriel85/dmon-adp

 def simpleKMeansTrain(self, dataf, options, mname, temp=True):
     '''
     :param data: -> data to be clustered
     :param options: -> SimpleKMeans options
                   N -> number of clusters
                   A -> Distance function to use (ex: default is "weka.core.EuclideanDistance -R first-last")
                   l -> maximum number of iterations default 500
           num-slots -> number of execution slots, 1 means no parallelism
                   S -> Random number seed (default 10)
           example => ["-N", "10", "-S", "10"]
     :return:
     '''
     try:
         jvm.start(max_heap_size=self.wHeap)
         data = self.loadData(dataf, temp=True)
         clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=options)
         clusterer.build_clusterer(data)
         print clusterer
         # cluster the data
         for inst in data:
             cl = clusterer.cluster_instance(inst)  # 0-based cluster index
             dist = clusterer.distribution_for_instance(inst)  # cluster membership distribution
             print("cluster=" + str(cl) + ", distribution=" + str(dist))
         self.saveModel(clusterer, 'skm', mname)
     except Exception, e:
         print(traceback.format_exc())

Example #5

0

Show file

    def run_SKMeans_137(self):
        
        #construct output paths
        output_prefix = os.path.split(self.input_path)[-1].split(".")[0];
        print(output_prefix);
        write_date = output_prefix + "." + str(datetime.now().date());
        SKMeans_dir = os.path.join(self.output_dir,"SKMeans");
        eval_path = os.path.join(SKMeans_dir, write_date + ".cl_eval.txt");
        clust_desc_path = os.path.join(SKMeans_dir, write_date + ".cl_descr.txt");
        clust_assign_path = os.path.join(SKMeans_dir, write_date + ".cl_assign.txt");
        
        #create output dir if it doesn't already exist
        if(not os.path.exists(SKMeans_dir)):
            os.makedirs(SKMeans_dir);
        
        #clone data and build clusters
#         data_clone = copy.deepcopy(self.data_loaded);
        data_clone = self.data_loaded;
        clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N","137"]);
        clusterer.build_clusterer(data_clone);
        
        #cluster evaluation
        evaluation = ClusterEvaluation();
        evaluation.set_model(clusterer);
        evaluation.test_model(data_clone);
        with open(eval_path, 'w') as outfile:
            outfile.write("number of clusters: \t" + str(evaluation.num_clusters) + "\n");
            outfile.write("log likelihood: \t" + str(evaluation.num_clusters) + "\n");
            outfile.write("cluster assignments: \t" + str(evaluation.cluster_assignments) + "\n");
            outfile.write("***********************\n")
            outfile.write("\t".join(["SKmeans Cluster Evaluation Results\n"])); #header
            outfile.write(str(evaluation.cluster_results) + "\n");
        
        #cluster Instance objects Description of clusters
        with open(clust_desc_path, 'w') as outfile:
            outfile.write(",".join(["cluster_num","distribution\n"])); #header
            for inst in data_clone:    # data
                cl = clusterer.cluster_instance(inst); # 0-based cluster index
                dist = clusterer.distribution_for_instance(inst); #cluster membership distribution
                outfile.write(",".join([str(cl),str(dist)]));
                outfile.write("\n");
     
        #cluster assignment by row
        with open(clust_assign_path, 'w') as outfile:
            outfile.write(",".join(["row_num","SKMeans\n"])); #header
            for i, inst in enumerate(evaluation.cluster_assignments):    # data
                outfile.write(",".join([str(i),str(inst)]));
                outfile.write("\n");
        
        
        return();

Example #6

0

Show file

def command():
    jvm.start()

    import weka.core.converters as converters
    clusters = request.form['clusternum']
    a1 = request.form['firstcol']
    a2 = request.form['secondcol']
    # print clusters
    # print a1
    # print a2
    if (a1 == 'B' and a2 == 'C'):
        data = converters.load_any_file("Data.csv")
    elif (a1 == 'B' and a2 == 'D'):
        data = converters.load_any_file("Data1.csv")
    elif (a1 == 'C' and a2 == 'D'):
        data = converters.load_any_file("Data2.csv")
    elif (a1 == 'C' and a2 == 'E'):
        data = converters.load_any_file("Data3.csv")
    elif (a1 == 'D' and a2 == 'E'):
        data = converters.load_any_file("Data4.csv")

    #data.class_is_last()

    print(data)

    # from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
    # search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
    # evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "2", "-E", "1"])
    # attsel = AttributeSelection()
    # attsel.search(search)
    # attsel.evaluator(evaluator)
    # attsel.select_attributes(data)
    f = open("filename.txt", "w")
    from weka.clusterers import Clusterer
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                          options=["-N", "{}".format(clusters)])
    clusterer.build_clusterer(data)

    print(clusterer)
    f.write(str(clusterer))
    # cluster the data
    for inst in data:
        cl = clusterer.cluster_instance(inst)  # 0-based cluster index
        dist = clusterer.distribution_for_instance(
            inst)  # cluster membership distribution
        print("cluster=" + str(cl) + ", distribution=" + str(dist))
        f.write("cluster=" + str(cl) + ", distribution=" + str(dist))

    return render_template("output.html")
    f.close()

Example #7

0

Show file

File: cluster.py Project: whaleforever/carilogo

def query_instance(attributes, model="kmeans.model"):
    """
        get the cluster for defined attributes
        :params attributes: array or list
        :returns: cluster id
    """
    check_jvm()
    # create instance
    inst = Instance.create_instance(attributes)
    # load model
    obj = serialization.read(model)
    # load cluster and get the cluster_id
    cluster = Clusterer(jobject=obj)
    cluster_id = cluster.cluster_instance(inst)

    return cluster_id

Example #8

0

Show file

def predicaoCluster(matricula, curso, tipo_predicao):

    dados = retornarDadosCurso(curso)
    # selecionando as caracteristicas do aluno
    aluno = dados.loc[dados['MATRICULA'] == matricula][:]
    aluno.drop('MATRICULA', axis=1, inplace=True)
    aluno.drop('APROVADO', axis=1, inplace=True)
    aluno.drop('COD_DISCIPLINA', axis=1, inplace=True)
    aluno.drop('SIT_MATRICULA', axis=1, inplace=True)
    aluno = aluno.head(1)

    aluno.to_csv('aluno_temp.csv', index=False)

    from weka.clusterers import Clusterer
    import weka.core.jvm as jvm
    from weka.core.converters import Loader
    import weka.core.serialization as serialization

    jvm.start()

    if curso == 'si':
        if tipo_predicao == 'reprovacao':
            model = serialization.read_all("model/kmeans_si_reprovacao.model")
        elif tipo_predicao == 'evasao':
            model = serialization.read_all("model/kmeans_si_evasao.model")
    elif curso == 'eca':
        if tipo_predicao == 'reprovacao':
            model = serialization.read_all("model/kmeans_eca_reprovacao.model")
        elif tipo_predicao == 'evasao':
            model = serialization.read_all("model/kmeans_eca_evasao.model")
    cluster = Clusterer(jobject=model[0])

    loader = Loader(classname="weka.core.converters.CSVLoader")
    dado_aluno = loader.load_file("aluno_temp.csv")
    for aluno in dado_aluno:
        cluster_aluno_pertence = cluster.cluster_instance(aluno)

    #jvm.stop()

    caracteristica = retornarCaracteristicaCluster(curso, tipo_predicao,
                                                   cluster_aluno_pertence)

    return caracteristica

Example #9

0

Show file

File: dwekaclusterer.py Project: DIPET-UVT/EDE-Dipet

#     cl2 = clusterEM.cluster_instance(inst)
#     dist2 = clusterEM.distribution_for_instance(inst)
#     print ("cluster=" + str(cl2) + ", distribution=" + str(dist2))
#     print inst
#
clusterDBSCAN = Clusterer(
    classname="weka.clusterers.DBSCAN",
    options=[
        "-E", "0.9", "-M", "6", "-I",
        "weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase",
        "-D",
        "weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclideanDataObject"
    ])
clusterDBSCAN.build_clusterer(data)

serialization.write(os.path.join(modelDir, "dbscan.model"), clusterDBSCAN)
cluster = Clusterer(
    jobject=serialization.read(os.path.join(modelDir, "dbscan.model")))
# print clusterDBSCAN
# print clusterDBSCAN.number_of_clusters
for inst in data:
    cl3 = cluster.cluster_instance(inst)
    dist3 = cluster.distribution_for_instance(inst)
    print(("cluster=" + str(cl3) + ", distribution=" + str(dist3)))

# for inst in data:
#     cl3 = clusterDBSCAN.cluster_instance(inst)
#     dist3 = clusterDBSCAN.distribution_for_instance(inst)
#     print ("cluster=" + str(cl3) + ", distribution=" + str(dist3))
jvm.stop()

Example #10

0

Show file

File: clustering.py Project: umidha/Clustering_Uday_Midha

### Deletes the not required attributes 
data.delete_attribute(2)
data.delete_attribute(2)
#####Uncomment to save the file with has serviceId as class, forkV and ForkW as attributes
###saver.save_file(data, "data_with_class_serviceID.arff")
data.delete_attribute(2)

#saver.save_file(data,"data.arff")
num_clusters = "6"   #Number of clusters for k mean

##Performing clustering
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", num_clusters])
clusterer.build_clusterer(data)

for inst in data:
    cl = clusterer.cluster_instance(inst)  # 0-based cluster index
    dist = clusterer.distribution_for_instance(inst)   # cluster membership distribution
    #print("cluster=" + str(cl) + ", distribution=" + str(dist))

#########Getting the data about the clustered instances
evaluation = ClusterEvaluation()
evaluation.set_model(clusterer)
evaluation.test_model(data)
print evaluation.cluster_results
#print("# clusters: " + str(evaluation.num_clusters))
#print("log likelihood: " + str(evaluation.log_likelihood))
#print("cluster assignments:\n" + str(evaluation.cluster_assignments))
#plc.plot_cluster_assignments(evaluation, data,[],True)

####Using WEKA files to get the required results by calling them through this script

Example #11

0

Show file

class WekaCluster(BaseEstimator, OptionHandler, ClusterMixin):
    """
    Wraps a Weka cluster within the scikit-learn framework.
    """
    def __init__(self,
                 jobject=None,
                 cluster=None,
                 classname=None,
                 options=None,
                 nominal_input_vars=None,
                 num_nominal_input_labels=None):
        """
        Initializes the estimator. Can be either instantiated via the following priority of parameters:
        1. JB_Object representing a Java Clusterer object
        2. Clusterer pww3 wrapper
        3. classname/options

        :param jobject: the JB_Object representing a Weka cluster to use
        :type jobject: JB_Object
        :param cluster: the cluster wrapper to use
        :type cluster: Clusterer
        :param classname: the classname of the Weka cluster to instantiate
        :type classname: str
        :param options: the command-line options of the Weka cluster to instantiate
        :type options: list
        :param num_nominal_input_labels: the dictionary with the number of labels for the nominal input variables (key is 0-based attribute index)
        :type num_nominal_input_labels: dict
        """
        if jobject is not None:
            _jobject = jobject
        elif cluster is not None:
            _jobject = cluster.jobject
        elif classname is not None:
            if options is None:
                options = []
            cluster = Clusterer(classname=classname, options=options)
            _jobject = cluster.jobject
        else:
            raise Exception("At least Java classname must be provided!")

        if not is_instance_of(_jobject, "weka.clusterers.Clusterer"):
            raise Exception(
                "Java object does not implement weka.clusterers.Clusterer!")

        super(WekaCluster, self).__init__(_jobject)
        self._cluster = Clusterer(jobject=_jobject)
        self.header_ = None
        # the following references are required for get_params/set_params
        self._classname = classname
        self._options = options
        self._nominal_input_vars = nominal_input_vars
        self._num_nominal_input_labels = num_nominal_input_labels

    @property
    def cluster(self):
        """
        Returns the underlying cluster object, if any.

        :return: the cluster object
        :rtype: Clusterer
        """
        return self._cluster

    @property
    def header(self):
        """
        Returns the underlying dataset header, if any.

        :return: the dataset structure
        :rtype: Instances
        """
        return self.header_

    def fit(self, data, targets=None):
        """
        Trains the cluster.

        :param data: the input variables as matrix, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :param targets: ignored
        :type targets: ndarray
        :return: the cluster
        :rtype: WekaCluster
        """
        if self._nominal_input_vars is not None:
            data = to_nominal_attributes(data, self._nominal_input_vars)
        d = to_instances(data,
                         num_nominal_labels=self._num_nominal_input_labels)
        self._cluster.build_clusterer(d)
        self.header_ = d.template_instances(d, 0)
        return self

    def predict(self, data, targets=None):
        """
        Predicts cluster labels.

        :param data: the input variables as matrix, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :param targets: ignored
        :type targets: ndarray
        :return: the cluster labels (of type int)
        :rtype: ndarray
        """
        check_is_fitted(self)
        if self._nominal_input_vars is not None:
            data = to_nominal_attributes(data, self._nominal_input_vars)
        result = []
        for d in data:
            inst = to_instance(self.header_, d)
            result.append(int(self._cluster.cluster_instance(inst)))
        return np.array(result)

    def fit_predict(self, data, targets=None):
        """
        Trains the cluster and returns the cluster labels.

        :param data: the input variables as matrix, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :param targets: ignored
        :type targets: ndarray
        :return: the cluster labels (of type int)
        :rtype: ndarray
        """
        self.fit(data)
        return self.predict(data)

    def get_params(self, deep=True):
        """
        Returns the parameters for this cluster, basically classname and options list.

        :param deep: ignored
        :type deep: bool
        :return: the dictionary with options
        :rtype: dict
        """
        result = dict()
        result["classname"] = self._classname
        result["options"] = self._options
        if self._nominal_input_vars is not None:
            result["nominal_input_vars"] = self._nominal_input_vars
        if self._num_nominal_input_labels is not None:
            result["num_nominal_input_labels"] = self._num_nominal_input_labels
        if self._num_nominal_input_labels is not None:
            result["num_nominal_input_labels"] = self._num_nominal_input_labels
        return result

    def set_params(self, **params):
        """
        Sets the options for the cluster, expects 'classname' and 'options'.

        :param params: the parameter dictionary
        :type params: dict
        """
        if len(params) == 0:
            return
        if "classname" not in params:
            raise Exception("Cannot find 'classname' in parameters!")
        if "options" not in params:
            raise Exception("Cannot find 'options' in parameters!")
        self._classname = params["classname"]
        self._options = params["options"]
        self._cluster = Clusterer(classname=self._classname,
                                  options=self._options)
        self._nominal_input_vars = None
        if "nominal_input_vars" in params:
            self._nominal_input_vars = params["nominal_input_vars"]
        self._num_nominal_input_labels = None
        if "num_nominal_input_labels" in params:
            self._num_nominal_input_labels = params["num_nominal_input_labels"]

    def __str__(self):
        """
        For printing the model.

        :return: the model representation, if any
        :rtype: str
        """
        if self._cluster is None:
            return self._classname + ": No model built yet"
        else:
            return str(self._cluster)

    def __copy__(self):
        """
        Creates a deep copy of itself.

        :return: the copy
        :rtype: WekaEstimator
        """
        result = WekaCluster(jobject=deepcopy(self.jobject))
        result._classname = self._classname
        result._options = self._options[:]
        return result

    def __repr__(self, N_CHAR_MAX=700):
        """
        Returns a valid Python string using its classname and options.

        :param N_CHAR_MAX: ignored
        :type N_CHAR_MAX: int
        :return: the representation
        :rtype: str
        """
        if isinstance(self._nominal_input_vars, str):
            return "WekaCluster(classname='%s', options=%s, nominal_input_vars='%s')" % (
                self._cluster.classname, str(
                    self._cluster.options), str(self._nominal_input_vars))
        else:
            return "WekaCluster(classname='%s', options=%s, nominal_input_vars=%s)" % (
                self._cluster.classname, str(
                    self._cluster.options), str(self._nominal_input_vars))

Example #12

0

Show file

File: bustersAgents.py Project: DavidGomezCervera/AA_P3

class ClusterAgent (BustersAgent):


    def registerInitialState(self, gameState):
        BustersAgent.registerInitialState(self, gameState)
        self.distancer = Distancer(gameState.data.layout, False)

        #Definimos si se usa la distancia (true para v1 y v2, false para v3)
        self.dis = True

        #Para calcular los valores de la clase en las politicas.
        self.clusters = 8
        self.classes = 4
        self.classCounts = [[0 for i in range(self.classes)]for j in range(self.clusters)]

        self.classIndex = 2
        self.clusterIndex = 3

        self.readInstances()

        #Esto nos servira para guardar las instancias de entrenamiento.
        self.numInstances = 52
        self.numAttributes = 4
        #self.instances = [[" " for i in range(self.numAttributes)] for j in range(self.numInstances)]
        self.ins = [" " for i in range(self.numInstances)]

        #Para usar la libreria debemos usar la maquina virtual de java, JVM
        jvm.start()

        #Creamos el modelo
        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file("/home/dot/Escritorio/Universidad/Machine Learning/practica 2/Outputs/agent_header.arff")

        self.clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", str(self.clusters)])
        self.clusterer.build_clusterer(data)

        print(self.clusterer)

        #Aplicamos la politica
        self.politicaMax()


    def readInstances(self):

        #Direccion del fichero agente (instancias sin cabecera).
        path = os.getcwd() + "/Outputs/agent.arff"

        f = open(path, 'r')

        index = 0

        #Leemos cacda instancia
        for line in f:

            #Obtenemos los valores de los atributos (String)
            values = line.split(",")

            #Obtenemos el valor de la clase, de Norte a Oeste (0 - 3)
            classValue = 0
            classAtt = values[self.classIndex]
            if (classAtt == "East"):
                classValue = 1
            elif (classAtt == "South"):
                classValue = 2
            elif (classAtt == "West"):
                classValue = 3

            #Obtenemos el valor del cluster.
            cluster = values[self.clusterIndex]

            #Incrementamos la cuenta de la clase para el cluster.
            self.classCounts[int(cluster[-2:]) - 1][classValue] += 1

        f.close()

    #Calcula la clase mayoritaria para cada cluster
    def politicaMax(self):

        self.max = [0 for i in range(self.clusters)]

        for i in range(self.clusters):

            temp_max = 0
            class_index = 0

            for j in range(self.classes):

                if (self.classCounts[i][j] > temp_max):

                    temp_max = self.classCounts[i][j]
                    class_index = j

            self.max[i] = class_index
            #print(class_index)

        '''
        for i in range(self.clusters):
            print(self.max[i])
        '''

    def chooseAction(self, gameState):

        path = os.getcwd() + "/Outputs/newInstance.arff"

        f = open(path, 'w')

        if (self.dis):
            data = "@RELATION pacman\n" \
                    + "@ATTRIBUTE dis NUMERIC\n" \
                    + "@ATTRIBUTE relPos {-1,0,1,2,3,4,5,6,7,8}\n\n" \
                    + "@DATA\n"
        else:
            data = "@RELATION pacman\n" \
                   + "@ATTRIBUTE relPos {-1,0,1,2,3,4,5,6,7,8}\n\n" \
                   + "@DATA\n"


        # Obtenemos la posicion del pacman (x,y)
        pos_pac = gameState.data.agentStates[0].getPosition()


        # Obtenemos las distancias a los fantasmas
        for i in range(1, gameState.getNumAgents()):

            # Calculmos la distancia real (mazedistance) al fantasma i
            pos_ghost = gameState.data.agentStates[i].getPosition()

            distance = self.distancer.getDistance(pos_pac, pos_ghost)

            #Normalizacion: (distance - min)/(max - min): min = 1, max = 21
            distance = (distance - 1) / (21 - 1)

            # Si la distancia es mayor a 1000 significa que el fantasma en cuestion ya ha sido comido
            if (self.dis):
                if (distance > 1000):
                    data = data + ("-1,")
                else:
                    data = data + str(distance) + ","


        # Obtenemos las posiciones relativas de los fantasmas con respecto del pacman
        for i in range(1, gameState.getNumAgents()):

            pos_ghost = gameState.data.agentStates[i].getPosition()

            if (pos_ghost[1] < 3):
                data = data + "-1,"
                continue

            # Si el fantasma esta en la misma posicion lo indicamos como 0
            if (pos_ghost == pos_pac):
                data = data + "0,"

            # Determinamos las posiciones relativas
            # {NORTH = 1, NORTH_EAST = 2, EAST = 3, SOUTH_EAST = 4, SOUTH = 5, SOUTH_WEST = 6, WEST = 7, NORTH_WEST = 8}.
            if (pos_ghost[0] > pos_pac[0]):
                if (pos_ghost[1] > pos_pac[1]):
                    data = data + "2,"
                elif (pos_ghost[1] < pos_pac[1]):
                    data = data + "4,"
                else:
                    data = data + "3,"
            elif (pos_ghost[0] < pos_pac[0]):
                if (pos_ghost[1] > pos_pac[1]):
                    data = data + "8,"
                elif (pos_ghost[1] < pos_pac[1]):
                    data = data + "6,"
                else:
                    data = data + "7,"
            else:
                if (pos_ghost[1] > pos_pac[1]):
                    data = data + "1,"
                else:
                    data = data + "5,"

        data = data + "\n"

        #print(data)

        f.write(data)

        f.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        newData = loader.load_file("/home/dot/Escritorio/Universidad/Machine Learning/practica 2/Outputs/newInstance.arff")

        dir = 4
        direction = Directions.STOP

        for inst in newData:
            cl = self.clusterer.cluster_instance(inst)
            #print(cl)
            dir = self.max[cl]
            #print(dir)


        if (dir == 0):
            direction = Directions.NORTH
        elif (dir == 1):
            direction = Directions.EAST
        elif (dir == 2):
            direction = Directions.SOUTH
        elif (dir == 3):
            direction = Directions.WEST

        #print(direction)
        return direction

Example #13

0

Show file

class ClusteredAgent(BustersAgent):
    "An agent that charges the closest ghost."

    def __init__(self, index = 0, inference = "ExactInference", ghostAgents = None):
        BustersAgent.__init__(self, index, inference, ghostAgents)
        self.previousDistances = [0,0,0,0]
        jvm.start(max_heap_size="512m")
        self.loader = Loader(classname="weka.core.converters.ArffLoader")
        self.data = self.loader.load_file("data/game_toCluster.arff")
        self.data.delete_last_attribute()
        self.clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "10", "-S", "4", "-I", "500"])
        self.clusterer.build_clusterer(self.data)
        self.inst = ""
        self.data = self.loader.load_file("data/game_toCluster.arff")
        addCluster = Filter(classname="weka.filters.unsupervised.attribute.AddCluster", options=["-W", "weka.clusterers.SimpleKMeans -N 10 -S 4 -I 500", "-I", "last"])
        addCluster.inputformat(self.data)
        filtered = addCluster.filter(self.data)
        self.f = open('data/addCluster.arff', 'w+')
        self.f.write(str(filtered))
        self.clustered_data = self.classifyData('data/addCluster.arff')


    def classifyData(self, filename):
        self.data_clust = [[],[],[],[],[],[],[],[],[],[]]
        with open(filename, "r") as f:
            for line in f:
                if "@" not in line or line != "\n":
                    cluster_name = line.split(",")[-1]
                    if cluster_name == "cluster1\n":
                        self.data_clust[0].append(line)
                    elif cluster_name == "cluster2\n":
                        self.data_clust[1].append(line)
                    elif cluster_name == "cluster3\n":
                        self.data_clust[2].append(line)
                    elif cluster_name == "cluster4\n":
                        self.data_clust[3].append(line)
                    elif cluster_name == "cluster5\n":
                        self.data_clust[4].append(line)
                    elif cluster_name == "cluster6\n":
                        self.data_clust[5].append(line)
                    elif cluster_name == "cluster7\n":
                        self.data_clust[6].append(line)
                    elif cluster_name == "cluster8\n":
                        self.data_clust[7].append(line)
                    elif cluster_name == "cluster9\n":
                        self.data_clust[8].append(line)
                    elif cluster_name == "cluster10\n":
                        self.data_clust[9].append(line)
        return self.data_clust

    def registerInitialState(self, gameState):
        "Pre-computes the distance between every two points."
        BustersAgent.registerInitialState(self, gameState)

    def getInstance(self, gameState):

        headers = ""
        headers = headers + "@relation prueba\n\n"

        headers = headers + "@attribute score NUMERIC\n"

        headers = headers + "@attribute ghosts-living NUMERIC\n"

        headers = headers + "@attribute distance-ghost1 NUMERIC \n"
        headers = headers + "@attribute distance-ghost2 NUMERIC \n"
        headers = headers + "@attribute distance-ghost3 NUMERIC \n"
        headers = headers + "@attribute distance-ghost4 NUMERIC \n"

        headers = headers + "@attribute prev-distance-ghost1 NUMERIC \n"
        headers = headers + "@attribute prev-distance-ghost2 NUMERIC \n"
        headers = headers + "@attribute prev-distance-ghost3 NUMERIC \n"
        headers = headers + "@attribute prev-distance-ghost4 NUMERIC \n"

        headers = headers + "@attribute posX NUMERIC\n"
        headers = headers + "@attribute posY NUMERIC\n"

        headers = headers + "@attribute direction {North, South, East, West, Stop}\n"

        headers = headers + "@attribute wall-east {True, False}\n"
        headers = headers + "@attribute wall-south {True, False}\n"
        headers = headers + "@attribute wall-west {True, False}\n"
        headers = headers + "@attribute wall-north {True, False}\n"

        headers = headers + "@data\n\n\n"

        file = open('data/instances.arff', 'w+')
        file.write(headers)

        line = ""
        line = line + str(gameState.data.score) + ","


        livingGhosts = 0
        for i in gameState.livingGhosts[1:]:
            livingGhosts += 1
        line = line + str(livingGhosts) + ","

        # include the distances to the ghosts in the current turn
        for i in range(len(gameState.livingGhosts[1:])):
            if gameState.livingGhosts[i] is False:
                line = line + "0" + ","
            else:
                line = line +\
                str(self.distancer.getDistance(gameState.getPacmanPosition(), gameState.getGhostPosition(i))) + ","


        # include the distances to the ghosts in the previous turn
        for i in self.previousDistances:
            line = line + str(i) + ","

         # store the distances of this turn for the next one
        for i in range(len(gameState.livingGhosts[1:])):
            if gameState.livingGhosts[i] is False:
                self.previousDistances[i] = 0
            else:
                self.previousDistances[i] = self.distancer.getDistance(gameState.getPacmanPosition(), gameState.getGhostPosition(i))

        line = line +\
        str(gameState.data.agentStates[0].getPosition()[0]) + "," +\
        str(gameState.data.agentStates[0].getPosition()[1])+ "," +\
        str(gameState.data.agentStates[0].getDirection()) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0] - 1, gameState.getPacmanPosition()[1])) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] - 1)) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0] + 1, gameState.getPacmanPosition()[1])) + "," +\
        str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] + 1)) + ",?"


        file.write(line)
        file.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file("data/instances.arff")
        data.class_is_last()   # set class attribute
        for index, inst in enumerate(data):
            pred = self.clusterer.cluster_instance(inst)
            self.inst = inst
        return pred

    def closeMove(self, move, option):

        if move == Directions.NORTH:
            if option == 0:
                return Directions.EAST
            elif option == 1:
                return Directions.WEST
            else:
                return Directions.SOUTH
        elif move == Directions.SOUTH:
            if option == 0:
                return Directions.EAST
            elif option == 1:
                return Directions.WEST
            else:
                return Directions.NORTH
        elif move == Directions.EAST:
            if option == 0:
                return Directions.NORTH
            elif option == 1:
                return Directions.SOUTH
            else:
                return Directions.WEST
        elif move == Directions.WEST:
            if option == 0:
                return Directions.NORTH
            elif option == 1:
                return Directions.SOUTH
            else:
                return Directions.EAST
        return Directions.SOUTH

    def chooseAction(self, gameState):
        start = self.startMeasuring(gameState)
        move = self.getMove(ClusteredAgent.getInstance(self, gameState))
        end = self.endMeasuring()
        self.f_stats.write(str(end - start) + "\n")
        if move in gameState.getLegalActions(0):
            return move

        # When chose an illegal action, try to round the obstacle
        rand = random.randint(0,1)
        closemove = self.closeMove(move, rand)
        if closemove in gameState.getLegalActions(0):
            return closemove
        closemove = self.closeMove(move, (rand+1)%2)
        if closemove in gameState.getLegalActions(0):
            return closemove

        # When this is not possible, we can only backtrack
        return self.closeMove(move, 2)

    def getMove(self, clusterNum):
        # get the closest instance
        values = []
        for instance in self.clustered_data[clusterNum]:
            values.append(self.getSimilarity(instance))

        inst = values.index(min(values))
        # return the movement
        return self.clustered_data[clusterNum][inst].split(",")[-2]

    def similarityFunc(self, attrs):
        # ghosts-living
        a = float(attrs[1]) * 0.2

        # distance-ghosts
        dist = 0
        for i in attrs[2:6]:
            dist += float(i)
        a += dist * 0.2

        # poxX and posY
        a += float(int(attrs[10]) + int(attrs[11])) * 0.2

        # direction
        a += float(move_to_num[attrs[12]]) * 0.2

        # walls
        wall = 0
        for i in attrs[13:17]:
            wall += bool(i)
        a += wall * 0.2
        return a

    def getSimilarity(self, instance):
        attrs_known_inst = instance.split(",")
        attrs_new_inst = str(self.inst).split(",")

        a = self.similarityFunc(attrs_known_inst)
        b = self.similarityFunc(attrs_new_inst)

        return abs(a - b)