def stats(): conf = data.get_conf() if 'user' not in conf or 'dir' not in conf: return "" info = {} info['exportees'] = requests.post(sync.root + '/export_breakdown/' + conf['user']).json()['export_capacities'] info['exporters'] = [{ "name": get_friend_name(id), "capacity": capacity_for_friend(id) } for id in conf['friends'] if len(id) > 0] progress = requests.get(sync.root + '/backup_progress/' + conf['user']).json() info['backup_progress'] = progress['backup_progress'] data_size = size_for_dir(conf['dir']) remaining_to_restore = progress['capacity_left_to_restore'] info['restore_progress'] = 1 - remaining_to_restore * 1.0 / ( data_size + remaining_to_restore) if (data_size + remaining_to_restore) else 0 return templ8("stats.html", info)
def settings(): conf = data.get_conf() if 'user' not in conf: info = requests.post(sync.root + "/sign_up", { "name": platform.node() }).json() conf['user'] = info['user'] conf['secret'] = info['secret'] conf['friends'] = conf.get('friends', []) conf['dir'] = data.default_dir if not os.path.exists(conf['dir']): os.mkdir(conf['dir']) conf['email'] = "" data.save_conf(conf) if flask.request.method == 'GET': args = copy.copy(conf) args['msg'] = flask.request.args.get('msg', None) return templ8("ui.html", args) elif flask.request.method == 'POST': conf['dir'] = flask.request.form.get('dir') conf['email'] = flask.request.form.get('email') sync.postjson('/set_email', { "user": conf['user'], "secret": conf['secret'], "email": conf['email'] }) data.save_conf(conf) return flask.redirect('/ui?msg=Saved') return "Saved!"
def restore(user, secret): conf = data.get_conf() conf['user'] = user conf['secret'] = secret conf['friends'] = [] postjson('/restore', {"user": conf['user'], "secret": conf['secret']}) data.save_conf(conf) return flask.redirect('/ui?msg=Now%20pick%20a%20folder%20and%20wait%20for%20your%20files:')
def sync_now(): conf = data.get_conf() if 'user' not in conf or 'dir' not in conf: return False chunk_dir = os.path.join(data.data_dir, "chunks") if not os.path.exists(chunk_dir): os.mkdir(chunk_dir) backup_dir = os.path.join(data.data_dir, "backups") if not os.path.exists(backup_dir): os.mkdir(backup_dir) return sync(conf['user'], conf['secret'], conf['friends'], conf['dir'], chunk_dir, backup_dir)
def sync_now(): conf = data.get_conf() if 'user' not in conf or 'dir' not in conf: return "Conf. incomplete" # def sync(user, secret, friends, user_dir, chunk_dir, backup_dir): chunk_dir = os.path.join(data.data_dir, "chunks") if not os.path.exists(chunk_dir): os.mkdir(chunk_dir) backup_dir = os.path.join(data.data_dir, "backups") if not os.path.exists(backup_dir): os.mkdir(backup_dir) sync.sync(conf['user'], conf['secret'], conf['friends'], conf['dir'], chunk_dir, backup_dir) return "Okay!"
def settings(): conf = data.get_conf() if 'user' not in conf: info = requests.post(sync.root+"/sign_up").json() conf['user'] = info['user'] conf['secret'] = info['secret'] data.save_conf(conf) if flask.request.method=='GET': return templ8("settings.html", conf) elif flask.request.method=='POST': friends = flask.request.form.get('friends').split('\n') friends = [f for f in friends if len(f)>0] conf['friends'] = friends dir = flask.request.form.get('dir') conf['dir'] = dir data.save_conf(conf) return "Saved!"
def stats(): conf = data.get_conf() if "user" not in conf or "dir" not in conf: return "" info = {} info["exportees"] = requests.post(sync.root + "/export_breakdown/" + conf["user"]).json()["export_capacities"] info["exporters"] = [ {"name": get_friend_name(id), "capacity": capacity_for_friend(id)} for id in conf["friends"] if len(id) > 0 ] progress = requests.get(sync.root + "/backup_progress/" + conf["user"]).json() info["backup_progress"] = progress["backup_progress"] data_size = size_for_dir(conf["dir"]) remaining_to_restore = progress["capacity_left_to_restore"] info["restore_progress"] = ( 1 - remaining_to_restore * 1.0 / (data_size + remaining_to_restore) if (data_size + remaining_to_restore) else 0 ) return templ8("stats.html", info)
#coding: utf-8 ''' 用来创建数据库和删除数据库,插入表等操作 ''' import mysql.connector import sys import logging from data import one_tran, get_conf cf = get_conf() host = cf["host"] user = cf["user"] passwd = cf["passwd"] if passwd == "null": passwd = "" conn = mysql.connector.connect(host=host, user=user, passwd=passwd) def create(): cur = conn.cursor() cur.execute("CREATE DATABASE IF NOT EXISTS %s DEFAULT CHARSET utf8" % ("tmall")) cur.execute("use tmall") cur.execute( "CREATE table trans (user char(13),item char(13),beh tinyint,geo char(10),category char(10),dt date,tm tinyint, \ index ind1(user(8)),index ind2(item(8)),index ind3(geo),index ind4(category),index ind5(dt))"
存储并计算用户相似度矩阵 ''' import time from data import get_conf,one_tran from multiprocessing import Process,Queue from util.ds import user_vector from util.sim_function import user_sim as Us #这个队列是进程队列,用户名都放在了里面 gqueue = Queue() #这个队列的作用是文件结果都放在这里,最后取出来然后写 oqueue = Queue() cf = get_conf() #每个进程执行的函数 def handle(umatrix): print("process start") count = 0 us = Us() while gqueue.qsize() > 0: user = gqueue.get() users = umatrix.keys() print "进程执行进度",count user_vec = umatrix[user] for u1 in users:
def predict(clf): pred_test = "true" rd = 400000 temp_dev = load_data("dev",rd) dev = [] for pdev in temp_dev : dev.extend(pdev) dev_label = load_label("dev") print "dev样本大小",len(dev),len(dev_label) print "dev特征数",len(dev[0]) result = clf.predict(dev) print "dev正样本预测数",sum(result) f1_s = f1_score(dev_label, result, average='binary') * 100.0 p_s = pscore(dev_label, result, average = 'binary') * 100.0 r_s = rscore(dev_label, result, average = 'binary') * 100.0 print "f1值", f1_s print "准确率",p_s print "召回率",r_s print "手算", 2 * p_s * r_s/(p_s + r_s) if pred_test == "false": sys.exit(1) cf = get_conf() f = open(cf["pred_dir"]) f.readline() test_data = open(cf["pred129"]) final = set() t = open(cf["final"],"w") t.write("user_id,item_id\n") count = 0 rd = 200000 ui_list = [] for tran in test_data: tran = tran.split(',') user,item = tran[0],tran[1] ui_list.append("%s,%s\n"%(user,item)) feature_list = [] for line in f: sp = line.split(',') sp = [int(i) for i in sp] feature_list.append(sp) count += 1 if count % rd == 0: res = clf.predict(feature_list) for i in range(len(res)): if res[i] == 1: final.add(ui_list[i]) feature_list = [] ui_list = ui_list[rd:] print count res = clf.predict(feature_list) for i in range(len(res)): if res[i] == 1: final.add(ui_list[i]) print "剩余东西长度",len(res),len(ui_list) print "test预测结果",len(final) for i in final: t.write(i)
def predict(clf): pred_test = "true" rd = 400000 temp_dev = load_data("dev", rd) dev = [] for pdev in temp_dev: dev.extend(pdev) dev_label = load_label("dev") print "dev样本大小", len(dev), len(dev_label) print "dev特征数", len(dev[0]) result = clf.predict(dev) print "dev正样本预测数", sum(result) f1_s = f1_score(dev_label, result, average='binary') * 100.0 p_s = pscore(dev_label, result, average='binary') * 100.0 r_s = rscore(dev_label, result, average='binary') * 100.0 print "f1值", f1_s print "准确率", p_s print "召回率", r_s print "手算", 2 * p_s * r_s / (p_s + r_s) if pred_test == "false": sys.exit(1) cf = get_conf() f = open(cf["pred_dir"]) f.readline() test_data = open(cf["pred129"]) final = set() t = open(cf["final"], "w") t.write("user_id,item_id\n") count = 0 rd = 200000 ui_list = [] for tran in test_data: tran = tran.split(',') user, item = tran[0], tran[1] ui_list.append("%s,%s\n" % (user, item)) feature_list = [] for line in f: sp = line.split(',') sp = [int(i) for i in sp] feature_list.append(sp) count += 1 if count % rd == 0: res = clf.predict(feature_list) for i in range(len(res)): if res[i] == 1: final.add(ui_list[i]) feature_list = [] ui_list = ui_list[rd:] print count res = clf.predict(feature_list) for i in range(len(res)): if res[i] == 1: final.add(ui_list[i]) print "剩余东西长度", len(res), len(ui_list) print "test预测结果", len(final) for i in final: t.write(i)
def add(id): conf = data.get_conf() conf['friends'] = list(set(conf.get('friends', []) + [id])) data.save_conf(conf) return flask.redirect('/ui?msg=Added.')