Esempio n. 1
0
def handle_my_custom_event(data):
	print " gotten item ++++++++++++++++++++++++++++++++++++++++++ ", data['id']
	out = get_clustering(data)
	obj = {}
	obj['id'] = data['id']
	# print " num cluseters and cluster len ", int(data['numClusters']), int(data['clusterLen'])
	obj['numClusters'] = int(data['numClusters']) + int(data['clusterLen'])
	obj['data'] = out['data']
	cenDf = pd.DataFrame(out['cluster_cen'])

	# print "cen df is ", cenDf
	print "numcluster sneding ", obj['numClusters'],  int(data['numClusters']), int(data['clusterLen'])
	obj['clusterCen'] = cenDf.to_json()
	obj['colHeaders'] = json.dumps(out['col_headers'].tolist())
	# print " we get out ", obj['id'], obj['numClusters']
	# print " we getting data now ", out
	emit('on_clustering_recieve'+str(obj['id']), obj)
    def BOVW_create(self, ims, im_labels):
        logging.debug('Total images to process for BOVW: %d' % len(ims))

        ims = utils.preprocess_images(ims, **self.preprocess_params)
        descriptor_extractor = self.get_descriptor_extractor()
        keypoints, descriptors = utils.get_kp_and_des(ims,
                                                      descriptor_extractor)
        all_descriptors = np.concatenate(descriptors)

        bovw = clustering.get_clustering(all_descriptors,
                                         self.cluster_model_type,
                                         self.cluster_model_params)

        self.BOVW = bovw
        self.BOVW.ims = ims
        self.BOVW.im_labels = im_labels
        self.BOVW.kp = keypoints
        self.BOVW.des = descriptors
        self.BOVW.clusters = [self.BOVW.predict(des) for des in descriptors]
        logging.debug('BOVW (k=%d) created.' % self.BOVW.n_clusters)
Esempio n. 3
0
def main():
    data = pd.read_csv("new_test_processed.csv")
    X, y = generative.target_features_split(data, "Vote")

    # cluster models
    print('Doing clustering coalitions')
    cluster_models = clustering.get_clustering(X, y)
    cluster_coalitions = clustering.create_cluster_coalitions(cluster_models,
                                                              X,
                                                              y,
                                                              threshold=0.3,
                                                              col_thresh=0.75)

    # generative_models
    print('Doing generative coalitions')
    gen_models = generative.train_generative(data)
    gen_coalitions = generative.create_gen_coalitions(gen_models, X, y)

    coalitions = cluster_coalitions + gen_coalitions

    model_names = ['MiniBatchKMeans', 'BayesianGMM', 'LDA', 'QDA']
    for model, name in zip(cluster_models + gen_models, model_names):
        clustering.show_clusters(X, model, f'{name} Clusters In 3D PCA Values')

    # check how good the coalitions are
    scores = []
    for coalition, name in zip(coalitions, model_names):
        col = y.isin(coalition).astype(np.int)

        scores.append(davies_bouldin_score(X, col))
        print('')
        print('=========================================')
        print(f'{name} Coalition')
        print(f'Score is {str(scores[-1])[:5]}')
        print(f'Completeness is {str(completeness_score(y, col))[:4]}')
        show_col_parties(pd.Series(col), y)
        clustering.show_labels(X, pd.Series(col), f'{name} Coalition')

    best_idx = np.argmin(scores)
    print(f'The best model is {model_names[best_idx]}')
Esempio n. 4
0
def main():
    train = pd.read_csv('train_processed.csv')
    valid = pd.read_csv('valid_processed.csv')
    test = pd.read_csv('test_processed.csv')

    data = pd.concat([train, valid, test], ignore_index=True)
    X, Y = generative.target_features_split(data, "Vote")

    # cluster models
    print('Doing clustering coalitions')
    cluster_models = clustering.get_clustering(X, Y)
    cluster_coalitions = clustering.create_cluster_coalitions(cluster_models, X, Y, threshold=0.3)

    # generative_models
    print('Doing generative coalitions')
    gen_models = generative.train_generative(data)
    gen_coalitions = generative.create_gen_coalitions(gen_models, X, Y)

    coalitions = cluster_coalitions + gen_coalitions

    model_names = ['MiniBatchKMeans', 'BayesianGMM', 'LDA', 'QDA']
    for model, name in zip(cluster_models + gen_models, model_names):
        clustering.show_clusters(X, model, f'{name} Clusters In 3D PCA Values')

    # check how good the coalitions are
    scores = []
    for coalition, name in zip(coalitions, model_names):
        col = Y.isin(coalition).astype(np.int)

        scores.append(davies_bouldin_score(X, col))
        print('')
        print('=========================================')
        print(f'{name} Coalition')
        print(f'Score is {scores[-1]}')
        print(f'Completeness is {completeness_score(Y, col)}')
        show_col_parties(pd.Series(col), Y)
        clustering.show_labels(X, pd.Series(col), f'{name} Coalition')

    best_idx = np.argmin(scores)
    print(f'The best model is {model_names[best_idx]}')
Esempio n. 5
0
def handle_my_custom_event(data):
	out = get_clustering(data)
	# print " we get out ", out
	emit('on_recommend_recieve', out)
#!/usr/bin/python

from clustering import get_clustering
# from tdt4rel import Judge
import MySQLdb

cl, cli = get_clustering()

# j = Judge()

con = MySQLdb.connect('localhost', 'root', '!!berkeley', 'yournews-tdt4')
cur = con.cursor()


q = """
select userid, action, object, info from history where action in ('open_doc', 'save_note') and userid like 'vs%' and userid != 'vst' and userid not like '%-01-%' and userid not like '%-02-%' and userid not like '%-29-%' order by datetime asc
"""

cur.execute(q)
for row in cur:
	userid, action, obj, info = row
	
	if action == 'open_doc':
		docno = obj
	
	if action == 'save_note':
		docno = info.split()[0].split("=")[1]
	
	if cli.has_key(docno):
		rc = cli[docno]
		system, docno, topicid = userid.split("-")