def assign_cluster(file_location, file_out="clustered.csv", model="kmeans.model", last_filename=False): data = read_csv_file(file_location) check_jvm() # load clusters obj = serialization.read(model) clusterer = Clusterer(jobject=obj) # create file with cluster group with open(file_out, 'w') as output: for index, attrs in enumerate(data): tmp = [] if last_filename: inst = Instance.create_instance(attrs[:-2]) else: inst = Instance.create_instance(attrs[1:]) pred = clusterer.cluster_instance(inst) dist = clusterer.distribution_for_instance(inst) if last_filename : tmp.append(attrs[-1]) tmp.append(pred) tmp.extend(attrs[:-2]) else: tmp.append(attrs[0]) tmp.append(pred) tmp.extend(attrs[1:]) print(str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) output.write('%s\n'%(','.join(map(str,tmp)) ))
def simpleKMeansTrain(self, dataf, options, mname, temp=True): ''' :param data: -> data to be clustered :param options: -> SimpleKMeans options N -> number of clusters A -> Distance function to use (ex: default is "weka.core.EuclideanDistance -R first-last") l -> maximum number of iterations default 500 num-slots -> number of execution slots, 1 means no parallelism S -> Random number seed (default 10) example => ["-N", "10", "-S", "10"] :return: ''' try: jvm.start(max_heap_size=self.wHeap) data = self.loadData(dataf, temp=True) clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=options) clusterer.build_clusterer(data) print(clusterer) # cluster the data for inst in data: cl = clusterer.cluster_instance(inst) # 0-based cluster index dist = clusterer.distribution_for_instance(inst) # cluster membership distribution print(("cluster=" + str(cl) + ", distribution=" + str(dist))) self.saveModel(clusterer, 'skm', mname) except Exception as e: print((traceback.format_exc())) finally: jvm.stop()
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") data = loader.load_file(iris_file) # remove class attribute data.delete_last_attribute() # build a clusterer and output model helper.print_title("Training SimpleKMeans clusterer") clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) clusterer.build_clusterer(data) print(clusterer) # cluster data helper.print_info("Clustering data") for index, inst in enumerate(data): cl = clusterer.cluster_instance(inst) dist = clusterer.distribution_for_instance(inst) print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist))
def simpleKMeansTrain(self, dataf, options, mname, temp=True): ''' :param data: -> data to be clustered :param options: -> SimpleKMeans options N -> number of clusters A -> Distance function to use (ex: default is "weka.core.EuclideanDistance -R first-last") l -> maximum number of iterations default 500 num-slots -> number of execution slots, 1 means no parallelism S -> Random number seed (default 10) example => ["-N", "10", "-S", "10"] :return: ''' try: jvm.start(max_heap_size=self.wHeap) data = self.loadData(dataf, temp=True) clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=options) clusterer.build_clusterer(data) print clusterer # cluster the data for inst in data: cl = clusterer.cluster_instance(inst) # 0-based cluster index dist = clusterer.distribution_for_instance(inst) # cluster membership distribution print("cluster=" + str(cl) + ", distribution=" + str(dist)) self.saveModel(clusterer, 'skm', mname) except Exception, e: print(traceback.format_exc())
def run_SKMeans_137(self): #construct output paths output_prefix = os.path.split(self.input_path)[-1].split(".")[0]; print(output_prefix); write_date = output_prefix + "." + str(datetime.now().date()); SKMeans_dir = os.path.join(self.output_dir,"SKMeans"); eval_path = os.path.join(SKMeans_dir, write_date + ".cl_eval.txt"); clust_desc_path = os.path.join(SKMeans_dir, write_date + ".cl_descr.txt"); clust_assign_path = os.path.join(SKMeans_dir, write_date + ".cl_assign.txt"); #create output dir if it doesn't already exist if(not os.path.exists(SKMeans_dir)): os.makedirs(SKMeans_dir); #clone data and build clusters # data_clone = copy.deepcopy(self.data_loaded); data_clone = self.data_loaded; clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N","137"]); clusterer.build_clusterer(data_clone); #cluster evaluation evaluation = ClusterEvaluation(); evaluation.set_model(clusterer); evaluation.test_model(data_clone); with open(eval_path, 'w') as outfile: outfile.write("number of clusters: \t" + str(evaluation.num_clusters) + "\n"); outfile.write("log likelihood: \t" + str(evaluation.num_clusters) + "\n"); outfile.write("cluster assignments: \t" + str(evaluation.cluster_assignments) + "\n"); outfile.write("***********************\n") outfile.write("\t".join(["SKmeans Cluster Evaluation Results\n"])); #header outfile.write(str(evaluation.cluster_results) + "\n"); #cluster Instance objects Description of clusters with open(clust_desc_path, 'w') as outfile: outfile.write(",".join(["cluster_num","distribution\n"])); #header for inst in data_clone: # data cl = clusterer.cluster_instance(inst); # 0-based cluster index dist = clusterer.distribution_for_instance(inst); #cluster membership distribution outfile.write(",".join([str(cl),str(dist)])); outfile.write("\n"); #cluster assignment by row with open(clust_assign_path, 'w') as outfile: outfile.write(",".join(["row_num","SKMeans\n"])); #header for i, inst in enumerate(evaluation.cluster_assignments): # data outfile.write(",".join([str(i),str(inst)])); outfile.write("\n"); return();
def command(): jvm.start() import weka.core.converters as converters clusters = request.form['clusternum'] a1 = request.form['firstcol'] a2 = request.form['secondcol'] # print clusters # print a1 # print a2 if (a1 == 'B' and a2 == 'C'): data = converters.load_any_file("Data.csv") elif (a1 == 'B' and a2 == 'D'): data = converters.load_any_file("Data1.csv") elif (a1 == 'C' and a2 == 'D'): data = converters.load_any_file("Data2.csv") elif (a1 == 'C' and a2 == 'E'): data = converters.load_any_file("Data3.csv") elif (a1 == 'D' and a2 == 'E'): data = converters.load_any_file("Data4.csv") #data.class_is_last() print(data) # from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection # search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) # evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "2", "-E", "1"]) # attsel = AttributeSelection() # attsel.search(search) # attsel.evaluator(evaluator) # attsel.select_attributes(data) f = open("filename.txt", "w") from weka.clusterers import Clusterer clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "{}".format(clusters)]) clusterer.build_clusterer(data) print(clusterer) f.write(str(clusterer)) # cluster the data for inst in data: cl = clusterer.cluster_instance(inst) # 0-based cluster index dist = clusterer.distribution_for_instance( inst) # cluster membership distribution print("cluster=" + str(cl) + ", distribution=" + str(dist)) f.write("cluster=" + str(cl) + ", distribution=" + str(dist)) return render_template("output.html") f.close()
def query_instance(attributes, model="kmeans.model"): """ get the cluster for defined attributes :params attributes: array or list :returns: cluster id """ check_jvm() # create instance inst = Instance.create_instance(attributes) # load model obj = serialization.read(model) # load cluster and get the cluster_id cluster = Clusterer(jobject=obj) cluster_id = cluster.cluster_instance(inst) return cluster_id
def predicaoCluster(matricula, curso, tipo_predicao): dados = retornarDadosCurso(curso) # selecionando as caracteristicas do aluno aluno = dados.loc[dados['MATRICULA'] == matricula][:] aluno.drop('MATRICULA', axis=1, inplace=True) aluno.drop('APROVADO', axis=1, inplace=True) aluno.drop('COD_DISCIPLINA', axis=1, inplace=True) aluno.drop('SIT_MATRICULA', axis=1, inplace=True) aluno = aluno.head(1) aluno.to_csv('aluno_temp.csv', index=False) from weka.clusterers import Clusterer import weka.core.jvm as jvm from weka.core.converters import Loader import weka.core.serialization as serialization jvm.start() if curso == 'si': if tipo_predicao == 'reprovacao': model = serialization.read_all("model/kmeans_si_reprovacao.model") elif tipo_predicao == 'evasao': model = serialization.read_all("model/kmeans_si_evasao.model") elif curso == 'eca': if tipo_predicao == 'reprovacao': model = serialization.read_all("model/kmeans_eca_reprovacao.model") elif tipo_predicao == 'evasao': model = serialization.read_all("model/kmeans_eca_evasao.model") cluster = Clusterer(jobject=model[0]) loader = Loader(classname="weka.core.converters.CSVLoader") dado_aluno = loader.load_file("aluno_temp.csv") for aluno in dado_aluno: cluster_aluno_pertence = cluster.cluster_instance(aluno) #jvm.stop() caracteristica = retornarCaracteristicaCluster(curso, tipo_predicao, cluster_aluno_pertence) return caracteristica
# cl2 = clusterEM.cluster_instance(inst) # dist2 = clusterEM.distribution_for_instance(inst) # print ("cluster=" + str(cl2) + ", distribution=" + str(dist2)) # print inst # clusterDBSCAN = Clusterer( classname="weka.clusterers.DBSCAN", options=[ "-E", "0.9", "-M", "6", "-I", "weka.clusterers.forOPTICSAndDBScan.Databases.SequentialDatabase", "-D", "weka.clusterers.forOPTICSAndDBScan.DataObjects.EuclideanDataObject" ]) clusterDBSCAN.build_clusterer(data) serialization.write(os.path.join(modelDir, "dbscan.model"), clusterDBSCAN) cluster = Clusterer( jobject=serialization.read(os.path.join(modelDir, "dbscan.model"))) # print clusterDBSCAN # print clusterDBSCAN.number_of_clusters for inst in data: cl3 = cluster.cluster_instance(inst) dist3 = cluster.distribution_for_instance(inst) print(("cluster=" + str(cl3) + ", distribution=" + str(dist3))) # for inst in data: # cl3 = clusterDBSCAN.cluster_instance(inst) # dist3 = clusterDBSCAN.distribution_for_instance(inst) # print ("cluster=" + str(cl3) + ", distribution=" + str(dist3)) jvm.stop()
### Deletes the not required attributes data.delete_attribute(2) data.delete_attribute(2) #####Uncomment to save the file with has serviceId as class, forkV and ForkW as attributes ###saver.save_file(data, "data_with_class_serviceID.arff") data.delete_attribute(2) #saver.save_file(data,"data.arff") num_clusters = "6" #Number of clusters for k mean ##Performing clustering clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", num_clusters]) clusterer.build_clusterer(data) for inst in data: cl = clusterer.cluster_instance(inst) # 0-based cluster index dist = clusterer.distribution_for_instance(inst) # cluster membership distribution #print("cluster=" + str(cl) + ", distribution=" + str(dist)) #########Getting the data about the clustered instances evaluation = ClusterEvaluation() evaluation.set_model(clusterer) evaluation.test_model(data) print evaluation.cluster_results #print("# clusters: " + str(evaluation.num_clusters)) #print("log likelihood: " + str(evaluation.log_likelihood)) #print("cluster assignments:\n" + str(evaluation.cluster_assignments)) #plc.plot_cluster_assignments(evaluation, data,[],True) ####Using WEKA files to get the required results by calling them through this script
class WekaCluster(BaseEstimator, OptionHandler, ClusterMixin): """ Wraps a Weka cluster within the scikit-learn framework. """ def __init__(self, jobject=None, cluster=None, classname=None, options=None, nominal_input_vars=None, num_nominal_input_labels=None): """ Initializes the estimator. Can be either instantiated via the following priority of parameters: 1. JB_Object representing a Java Clusterer object 2. Clusterer pww3 wrapper 3. classname/options :param jobject: the JB_Object representing a Weka cluster to use :type jobject: JB_Object :param cluster: the cluster wrapper to use :type cluster: Clusterer :param classname: the classname of the Weka cluster to instantiate :type classname: str :param options: the command-line options of the Weka cluster to instantiate :type options: list :param num_nominal_input_labels: the dictionary with the number of labels for the nominal input variables (key is 0-based attribute index) :type num_nominal_input_labels: dict """ if jobject is not None: _jobject = jobject elif cluster is not None: _jobject = cluster.jobject elif classname is not None: if options is None: options = [] cluster = Clusterer(classname=classname, options=options) _jobject = cluster.jobject else: raise Exception("At least Java classname must be provided!") if not is_instance_of(_jobject, "weka.clusterers.Clusterer"): raise Exception( "Java object does not implement weka.clusterers.Clusterer!") super(WekaCluster, self).__init__(_jobject) self._cluster = Clusterer(jobject=_jobject) self.header_ = None # the following references are required for get_params/set_params self._classname = classname self._options = options self._nominal_input_vars = nominal_input_vars self._num_nominal_input_labels = num_nominal_input_labels @property def cluster(self): """ Returns the underlying cluster object, if any. :return: the cluster object :rtype: Clusterer """ return self._cluster @property def header(self): """ Returns the underlying dataset header, if any. :return: the dataset structure :rtype: Instances """ return self.header_ def fit(self, data, targets=None): """ Trains the cluster. :param data: the input variables as matrix, array-like of shape (n_samples, n_features) :type data: ndarray :param targets: ignored :type targets: ndarray :return: the cluster :rtype: WekaCluster """ if self._nominal_input_vars is not None: data = to_nominal_attributes(data, self._nominal_input_vars) d = to_instances(data, num_nominal_labels=self._num_nominal_input_labels) self._cluster.build_clusterer(d) self.header_ = d.template_instances(d, 0) return self def predict(self, data, targets=None): """ Predicts cluster labels. :param data: the input variables as matrix, array-like of shape (n_samples, n_features) :type data: ndarray :param targets: ignored :type targets: ndarray :return: the cluster labels (of type int) :rtype: ndarray """ check_is_fitted(self) if self._nominal_input_vars is not None: data = to_nominal_attributes(data, self._nominal_input_vars) result = [] for d in data: inst = to_instance(self.header_, d) result.append(int(self._cluster.cluster_instance(inst))) return np.array(result) def fit_predict(self, data, targets=None): """ Trains the cluster and returns the cluster labels. :param data: the input variables as matrix, array-like of shape (n_samples, n_features) :type data: ndarray :param targets: ignored :type targets: ndarray :return: the cluster labels (of type int) :rtype: ndarray """ self.fit(data) return self.predict(data) def get_params(self, deep=True): """ Returns the parameters for this cluster, basically classname and options list. :param deep: ignored :type deep: bool :return: the dictionary with options :rtype: dict """ result = dict() result["classname"] = self._classname result["options"] = self._options if self._nominal_input_vars is not None: result["nominal_input_vars"] = self._nominal_input_vars if self._num_nominal_input_labels is not None: result["num_nominal_input_labels"] = self._num_nominal_input_labels if self._num_nominal_input_labels is not None: result["num_nominal_input_labels"] = self._num_nominal_input_labels return result def set_params(self, **params): """ Sets the options for the cluster, expects 'classname' and 'options'. :param params: the parameter dictionary :type params: dict """ if len(params) == 0: return if "classname" not in params: raise Exception("Cannot find 'classname' in parameters!") if "options" not in params: raise Exception("Cannot find 'options' in parameters!") self._classname = params["classname"] self._options = params["options"] self._cluster = Clusterer(classname=self._classname, options=self._options) self._nominal_input_vars = None if "nominal_input_vars" in params: self._nominal_input_vars = params["nominal_input_vars"] self._num_nominal_input_labels = None if "num_nominal_input_labels" in params: self._num_nominal_input_labels = params["num_nominal_input_labels"] def __str__(self): """ For printing the model. :return: the model representation, if any :rtype: str """ if self._cluster is None: return self._classname + ": No model built yet" else: return str(self._cluster) def __copy__(self): """ Creates a deep copy of itself. :return: the copy :rtype: WekaEstimator """ result = WekaCluster(jobject=deepcopy(self.jobject)) result._classname = self._classname result._options = self._options[:] return result def __repr__(self, N_CHAR_MAX=700): """ Returns a valid Python string using its classname and options. :param N_CHAR_MAX: ignored :type N_CHAR_MAX: int :return: the representation :rtype: str """ if isinstance(self._nominal_input_vars, str): return "WekaCluster(classname='%s', options=%s, nominal_input_vars='%s')" % ( self._cluster.classname, str( self._cluster.options), str(self._nominal_input_vars)) else: return "WekaCluster(classname='%s', options=%s, nominal_input_vars=%s)" % ( self._cluster.classname, str( self._cluster.options), str(self._nominal_input_vars))
class ClusterAgent (BustersAgent): def registerInitialState(self, gameState): BustersAgent.registerInitialState(self, gameState) self.distancer = Distancer(gameState.data.layout, False) #Definimos si se usa la distancia (true para v1 y v2, false para v3) self.dis = True #Para calcular los valores de la clase en las politicas. self.clusters = 8 self.classes = 4 self.classCounts = [[0 for i in range(self.classes)]for j in range(self.clusters)] self.classIndex = 2 self.clusterIndex = 3 self.readInstances() #Esto nos servira para guardar las instancias de entrenamiento. self.numInstances = 52 self.numAttributes = 4 #self.instances = [[" " for i in range(self.numAttributes)] for j in range(self.numInstances)] self.ins = [" " for i in range(self.numInstances)] #Para usar la libreria debemos usar la maquina virtual de java, JVM jvm.start() #Creamos el modelo loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("/home/dot/Escritorio/Universidad/Machine Learning/practica 2/Outputs/agent_header.arff") self.clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", str(self.clusters)]) self.clusterer.build_clusterer(data) print(self.clusterer) #Aplicamos la politica self.politicaMax() def readInstances(self): #Direccion del fichero agente (instancias sin cabecera). path = os.getcwd() + "/Outputs/agent.arff" f = open(path, 'r') index = 0 #Leemos cacda instancia for line in f: #Obtenemos los valores de los atributos (String) values = line.split(",") #Obtenemos el valor de la clase, de Norte a Oeste (0 - 3) classValue = 0 classAtt = values[self.classIndex] if (classAtt == "East"): classValue = 1 elif (classAtt == "South"): classValue = 2 elif (classAtt == "West"): classValue = 3 #Obtenemos el valor del cluster. cluster = values[self.clusterIndex] #Incrementamos la cuenta de la clase para el cluster. self.classCounts[int(cluster[-2:]) - 1][classValue] += 1 f.close() #Calcula la clase mayoritaria para cada cluster def politicaMax(self): self.max = [0 for i in range(self.clusters)] for i in range(self.clusters): temp_max = 0 class_index = 0 for j in range(self.classes): if (self.classCounts[i][j] > temp_max): temp_max = self.classCounts[i][j] class_index = j self.max[i] = class_index #print(class_index) ''' for i in range(self.clusters): print(self.max[i]) ''' def chooseAction(self, gameState): path = os.getcwd() + "/Outputs/newInstance.arff" f = open(path, 'w') if (self.dis): data = "@RELATION pacman\n" \ + "@ATTRIBUTE dis NUMERIC\n" \ + "@ATTRIBUTE relPos {-1,0,1,2,3,4,5,6,7,8}\n\n" \ + "@DATA\n" else: data = "@RELATION pacman\n" \ + "@ATTRIBUTE relPos {-1,0,1,2,3,4,5,6,7,8}\n\n" \ + "@DATA\n" # Obtenemos la posicion del pacman (x,y) pos_pac = gameState.data.agentStates[0].getPosition() # Obtenemos las distancias a los fantasmas for i in range(1, gameState.getNumAgents()): # Calculmos la distancia real (mazedistance) al fantasma i pos_ghost = gameState.data.agentStates[i].getPosition() distance = self.distancer.getDistance(pos_pac, pos_ghost) #Normalizacion: (distance - min)/(max - min): min = 1, max = 21 distance = (distance - 1) / (21 - 1) # Si la distancia es mayor a 1000 significa que el fantasma en cuestion ya ha sido comido if (self.dis): if (distance > 1000): data = data + ("-1,") else: data = data + str(distance) + "," # Obtenemos las posiciones relativas de los fantasmas con respecto del pacman for i in range(1, gameState.getNumAgents()): pos_ghost = gameState.data.agentStates[i].getPosition() if (pos_ghost[1] < 3): data = data + "-1," continue # Si el fantasma esta en la misma posicion lo indicamos como 0 if (pos_ghost == pos_pac): data = data + "0," # Determinamos las posiciones relativas # {NORTH = 1, NORTH_EAST = 2, EAST = 3, SOUTH_EAST = 4, SOUTH = 5, SOUTH_WEST = 6, WEST = 7, NORTH_WEST = 8}. if (pos_ghost[0] > pos_pac[0]): if (pos_ghost[1] > pos_pac[1]): data = data + "2," elif (pos_ghost[1] < pos_pac[1]): data = data + "4," else: data = data + "3," elif (pos_ghost[0] < pos_pac[0]): if (pos_ghost[1] > pos_pac[1]): data = data + "8," elif (pos_ghost[1] < pos_pac[1]): data = data + "6," else: data = data + "7," else: if (pos_ghost[1] > pos_pac[1]): data = data + "1," else: data = data + "5," data = data + "\n" #print(data) f.write(data) f.close() loader = Loader(classname="weka.core.converters.ArffLoader") newData = loader.load_file("/home/dot/Escritorio/Universidad/Machine Learning/practica 2/Outputs/newInstance.arff") dir = 4 direction = Directions.STOP for inst in newData: cl = self.clusterer.cluster_instance(inst) #print(cl) dir = self.max[cl] #print(dir) if (dir == 0): direction = Directions.NORTH elif (dir == 1): direction = Directions.EAST elif (dir == 2): direction = Directions.SOUTH elif (dir == 3): direction = Directions.WEST #print(direction) return direction
class ClusteredAgent(BustersAgent): "An agent that charges the closest ghost." def __init__(self, index = 0, inference = "ExactInference", ghostAgents = None): BustersAgent.__init__(self, index, inference, ghostAgents) self.previousDistances = [0,0,0,0] jvm.start(max_heap_size="512m") self.loader = Loader(classname="weka.core.converters.ArffLoader") self.data = self.loader.load_file("data/game_toCluster.arff") self.data.delete_last_attribute() self.clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "10", "-S", "4", "-I", "500"]) self.clusterer.build_clusterer(self.data) self.inst = "" self.data = self.loader.load_file("data/game_toCluster.arff") addCluster = Filter(classname="weka.filters.unsupervised.attribute.AddCluster", options=["-W", "weka.clusterers.SimpleKMeans -N 10 -S 4 -I 500", "-I", "last"]) addCluster.inputformat(self.data) filtered = addCluster.filter(self.data) self.f = open('data/addCluster.arff', 'w+') self.f.write(str(filtered)) self.clustered_data = self.classifyData('data/addCluster.arff') def classifyData(self, filename): self.data_clust = [[],[],[],[],[],[],[],[],[],[]] with open(filename, "r") as f: for line in f: if "@" not in line or line != "\n": cluster_name = line.split(",")[-1] if cluster_name == "cluster1\n": self.data_clust[0].append(line) elif cluster_name == "cluster2\n": self.data_clust[1].append(line) elif cluster_name == "cluster3\n": self.data_clust[2].append(line) elif cluster_name == "cluster4\n": self.data_clust[3].append(line) elif cluster_name == "cluster5\n": self.data_clust[4].append(line) elif cluster_name == "cluster6\n": self.data_clust[5].append(line) elif cluster_name == "cluster7\n": self.data_clust[6].append(line) elif cluster_name == "cluster8\n": self.data_clust[7].append(line) elif cluster_name == "cluster9\n": self.data_clust[8].append(line) elif cluster_name == "cluster10\n": self.data_clust[9].append(line) return self.data_clust def registerInitialState(self, gameState): "Pre-computes the distance between every two points." BustersAgent.registerInitialState(self, gameState) def getInstance(self, gameState): headers = "" headers = headers + "@relation prueba\n\n" headers = headers + "@attribute score NUMERIC\n" headers = headers + "@attribute ghosts-living NUMERIC\n" headers = headers + "@attribute distance-ghost1 NUMERIC \n" headers = headers + "@attribute distance-ghost2 NUMERIC \n" headers = headers + "@attribute distance-ghost3 NUMERIC \n" headers = headers + "@attribute distance-ghost4 NUMERIC \n" headers = headers + "@attribute prev-distance-ghost1 NUMERIC \n" headers = headers + "@attribute prev-distance-ghost2 NUMERIC \n" headers = headers + "@attribute prev-distance-ghost3 NUMERIC \n" headers = headers + "@attribute prev-distance-ghost4 NUMERIC \n" headers = headers + "@attribute posX NUMERIC\n" headers = headers + "@attribute posY NUMERIC\n" headers = headers + "@attribute direction {North, South, East, West, Stop}\n" headers = headers + "@attribute wall-east {True, False}\n" headers = headers + "@attribute wall-south {True, False}\n" headers = headers + "@attribute wall-west {True, False}\n" headers = headers + "@attribute wall-north {True, False}\n" headers = headers + "@data\n\n\n" file = open('data/instances.arff', 'w+') file.write(headers) line = "" line = line + str(gameState.data.score) + "," livingGhosts = 0 for i in gameState.livingGhosts[1:]: livingGhosts += 1 line = line + str(livingGhosts) + "," # include the distances to the ghosts in the current turn for i in range(len(gameState.livingGhosts[1:])): if gameState.livingGhosts[i] is False: line = line + "0" + "," else: line = line +\ str(self.distancer.getDistance(gameState.getPacmanPosition(), gameState.getGhostPosition(i))) + "," # include the distances to the ghosts in the previous turn for i in self.previousDistances: line = line + str(i) + "," # store the distances of this turn for the next one for i in range(len(gameState.livingGhosts[1:])): if gameState.livingGhosts[i] is False: self.previousDistances[i] = 0 else: self.previousDistances[i] = self.distancer.getDistance(gameState.getPacmanPosition(), gameState.getGhostPosition(i)) line = line +\ str(gameState.data.agentStates[0].getPosition()[0]) + "," +\ str(gameState.data.agentStates[0].getPosition()[1])+ "," +\ str(gameState.data.agentStates[0].getDirection()) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0] - 1, gameState.getPacmanPosition()[1])) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] - 1)) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0] + 1, gameState.getPacmanPosition()[1])) + "," +\ str(gameState.hasWall(gameState.getPacmanPosition()[0], gameState.getPacmanPosition()[1] + 1)) + ",?" file.write(line) file.close() loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("data/instances.arff") data.class_is_last() # set class attribute for index, inst in enumerate(data): pred = self.clusterer.cluster_instance(inst) self.inst = inst return pred def closeMove(self, move, option): if move == Directions.NORTH: if option == 0: return Directions.EAST elif option == 1: return Directions.WEST else: return Directions.SOUTH elif move == Directions.SOUTH: if option == 0: return Directions.EAST elif option == 1: return Directions.WEST else: return Directions.NORTH elif move == Directions.EAST: if option == 0: return Directions.NORTH elif option == 1: return Directions.SOUTH else: return Directions.WEST elif move == Directions.WEST: if option == 0: return Directions.NORTH elif option == 1: return Directions.SOUTH else: return Directions.EAST return Directions.SOUTH def chooseAction(self, gameState): start = self.startMeasuring(gameState) move = self.getMove(ClusteredAgent.getInstance(self, gameState)) end = self.endMeasuring() self.f_stats.write(str(end - start) + "\n") if move in gameState.getLegalActions(0): return move # When chose an illegal action, try to round the obstacle rand = random.randint(0,1) closemove = self.closeMove(move, rand) if closemove in gameState.getLegalActions(0): return closemove closemove = self.closeMove(move, (rand+1)%2) if closemove in gameState.getLegalActions(0): return closemove # When this is not possible, we can only backtrack return self.closeMove(move, 2) def getMove(self, clusterNum): # get the closest instance values = [] for instance in self.clustered_data[clusterNum]: values.append(self.getSimilarity(instance)) inst = values.index(min(values)) # return the movement return self.clustered_data[clusterNum][inst].split(",")[-2] def similarityFunc(self, attrs): # ghosts-living a = float(attrs[1]) * 0.2 # distance-ghosts dist = 0 for i in attrs[2:6]: dist += float(i) a += dist * 0.2 # poxX and posY a += float(int(attrs[10]) + int(attrs[11])) * 0.2 # direction a += float(move_to_num[attrs[12]]) * 0.2 # walls wall = 0 for i in attrs[13:17]: wall += bool(i) a += wall * 0.2 return a def getSimilarity(self, instance): attrs_known_inst = instance.split(",") attrs_new_inst = str(self.inst).split(",") a = self.similarityFunc(attrs_known_inst) b = self.similarityFunc(attrs_new_inst) return abs(a - b)