def handle_my_custom_event(data): print " gotten item ++++++++++++++++++++++++++++++++++++++++++ ", data['id'] out = get_clustering(data) obj = {} obj['id'] = data['id'] # print " num cluseters and cluster len ", int(data['numClusters']), int(data['clusterLen']) obj['numClusters'] = int(data['numClusters']) + int(data['clusterLen']) obj['data'] = out['data'] cenDf = pd.DataFrame(out['cluster_cen']) # print "cen df is ", cenDf print "numcluster sneding ", obj['numClusters'], int(data['numClusters']), int(data['clusterLen']) obj['clusterCen'] = cenDf.to_json() obj['colHeaders'] = json.dumps(out['col_headers'].tolist()) # print " we get out ", obj['id'], obj['numClusters'] # print " we getting data now ", out emit('on_clustering_recieve'+str(obj['id']), obj)
def BOVW_create(self, ims, im_labels): logging.debug('Total images to process for BOVW: %d' % len(ims)) ims = utils.preprocess_images(ims, **self.preprocess_params) descriptor_extractor = self.get_descriptor_extractor() keypoints, descriptors = utils.get_kp_and_des(ims, descriptor_extractor) all_descriptors = np.concatenate(descriptors) bovw = clustering.get_clustering(all_descriptors, self.cluster_model_type, self.cluster_model_params) self.BOVW = bovw self.BOVW.ims = ims self.BOVW.im_labels = im_labels self.BOVW.kp = keypoints self.BOVW.des = descriptors self.BOVW.clusters = [self.BOVW.predict(des) for des in descriptors] logging.debug('BOVW (k=%d) created.' % self.BOVW.n_clusters)
def main(): data = pd.read_csv("new_test_processed.csv") X, y = generative.target_features_split(data, "Vote") # cluster models print('Doing clustering coalitions') cluster_models = clustering.get_clustering(X, y) cluster_coalitions = clustering.create_cluster_coalitions(cluster_models, X, y, threshold=0.3, col_thresh=0.75) # generative_models print('Doing generative coalitions') gen_models = generative.train_generative(data) gen_coalitions = generative.create_gen_coalitions(gen_models, X, y) coalitions = cluster_coalitions + gen_coalitions model_names = ['MiniBatchKMeans', 'BayesianGMM', 'LDA', 'QDA'] for model, name in zip(cluster_models + gen_models, model_names): clustering.show_clusters(X, model, f'{name} Clusters In 3D PCA Values') # check how good the coalitions are scores = [] for coalition, name in zip(coalitions, model_names): col = y.isin(coalition).astype(np.int) scores.append(davies_bouldin_score(X, col)) print('') print('=========================================') print(f'{name} Coalition') print(f'Score is {str(scores[-1])[:5]}') print(f'Completeness is {str(completeness_score(y, col))[:4]}') show_col_parties(pd.Series(col), y) clustering.show_labels(X, pd.Series(col), f'{name} Coalition') best_idx = np.argmin(scores) print(f'The best model is {model_names[best_idx]}')
def main(): train = pd.read_csv('train_processed.csv') valid = pd.read_csv('valid_processed.csv') test = pd.read_csv('test_processed.csv') data = pd.concat([train, valid, test], ignore_index=True) X, Y = generative.target_features_split(data, "Vote") # cluster models print('Doing clustering coalitions') cluster_models = clustering.get_clustering(X, Y) cluster_coalitions = clustering.create_cluster_coalitions(cluster_models, X, Y, threshold=0.3) # generative_models print('Doing generative coalitions') gen_models = generative.train_generative(data) gen_coalitions = generative.create_gen_coalitions(gen_models, X, Y) coalitions = cluster_coalitions + gen_coalitions model_names = ['MiniBatchKMeans', 'BayesianGMM', 'LDA', 'QDA'] for model, name in zip(cluster_models + gen_models, model_names): clustering.show_clusters(X, model, f'{name} Clusters In 3D PCA Values') # check how good the coalitions are scores = [] for coalition, name in zip(coalitions, model_names): col = Y.isin(coalition).astype(np.int) scores.append(davies_bouldin_score(X, col)) print('') print('=========================================') print(f'{name} Coalition') print(f'Score is {scores[-1]}') print(f'Completeness is {completeness_score(Y, col)}') show_col_parties(pd.Series(col), Y) clustering.show_labels(X, pd.Series(col), f'{name} Coalition') best_idx = np.argmin(scores) print(f'The best model is {model_names[best_idx]}')
def handle_my_custom_event(data): out = get_clustering(data) # print " we get out ", out emit('on_recommend_recieve', out)
#!/usr/bin/python from clustering import get_clustering # from tdt4rel import Judge import MySQLdb cl, cli = get_clustering() # j = Judge() con = MySQLdb.connect('localhost', 'root', '!!berkeley', 'yournews-tdt4') cur = con.cursor() q = """ select userid, action, object, info from history where action in ('open_doc', 'save_note') and userid like 'vs%' and userid != 'vst' and userid not like '%-01-%' and userid not like '%-02-%' and userid not like '%-29-%' order by datetime asc """ cur.execute(q) for row in cur: userid, action, obj, info = row if action == 'open_doc': docno = obj if action == 'save_note': docno = info.split()[0].split("=")[1] if cli.has_key(docno): rc = cli[docno] system, docno, topicid = userid.split("-")