def tags_filter(): conn = pymysql.connect(host=config.get('write_mysql', 'host'), user=config.get('write_mysql', 'user'), passwd=config.get('write_mysql', 'passwd'), port=int(config.get('write_mysql', 'port')), charset="utf8", binary_prefix=True) conn.select_db(config.get('write_mysql', 'db')) query_tags = 'SELECT tags.tag ' \ 'FROM tags' cur = conn.cursor() cur.execute(query_tags) tags_all = cur.fetchall() tags = [] for i in range(len(tags_all)): tags_tmp = tags_all[i][0] tags.append(tags_tmp) tags_count = dict(Counter(tags)) conn.close() tags_train = [] for k, v in tags_count.items(): if v > 25: tags_train.append(k) np.save('./tags.npy', tags_train) return tags_train, tags_count
def write_on_redis(querysql, type): try: redisConnection = redis.ConnectionPool( host=config.get('redis', 'redis_ip'), port=int(config.get('redis', 'redis_port')), db=int(config.get('redis', 'redis_db'))) redisContext = redis.Redis(connection_pool=redisConnection) minHotMark = rank_algorithm.standard_mark() print("最小的值为:" + str(minHotMark)) dataset = db.get_data(querysql, type) for k, v in dataset.items(): hotPost = [] if len(v) > 10: score = {} for post in v: # print(rank_algorithm.hot(post[13], post[3], post[9], post[7])) scoreNum = rank_algorithm.hot(post[13], post[3], post[9], post[7]) if scoreNum >= minHotMark: if scoreNum not in score: score[scoreNum] = [] score[scoreNum].append(post) else: score[scoreNum].append(post) d = [] for key in score.keys(): d.append(key) count = 0 sortdata = quick_max_k.qselect(d, MAX_K) for sortk in sortdata[::-1]: for p in score[sortk][::-1]: if (count >= 10): break hotPost.append(convert_to_json(p)) count = count + 1 print(k + '_hot_post' + json.dumps(hotPost, separators=(',', ':'))) redisContext.set(k + '_hot_post', json.dumps(hotPost, separators=(',', ':'))) except redis.ConnectionError as err: print("connect redis' failed.") print("Error: {}".format(err.msg))
def write_on_redis(querysql, type): try: redisConnection = redis.ConnectionPool(host=config.get('redis', 'redis_ip'), port=int(config.get('redis', 'redis_port')), db=int(config.get('redis', 'redis_db'))) redisContext = redis.Redis(connection_pool=redisConnection) minHotMark = rank_algorithm.standard_mark() print ("最小的值为:"+str(minHotMark)) dataset = db.get_data(querysql, type) for k, v in dataset.items(): hotPost = [] if len(v) > 10: score = {} for post in v: # print(rank_algorithm.hot(post[13], post[3], post[9], post[7])) scoreNum = rank_algorithm.hot(post[13], post[3], post[9], post[7]) if scoreNum >= minHotMark: if scoreNum not in score: score[scoreNum] = [] score[scoreNum].append(post) else: score[scoreNum].append(post) d = [] for key in score.keys(): d.append(key) count = 0 sortdata = quick_max_k.qselect(d, MAX_K) for sortk in sortdata[::-1]: for p in score[sortk][::-1]: if (count >= 10): break hotPost.append(convert_to_json(p)) count = count + 1 print(k + '_hot_post'+json.dumps(hotPost, separators=(',', ':'))) redisContext.set(k + '_hot_post', json.dumps(hotPost, separators=(',', ':'))) except redis.ConnectionError as err: print("connect redis' failed.") print("Error: {}".format(err.msg))
# -*- coding: UTF-8 -*- __author__ = 'li' import time, os, sched, hot_post_redis from datetime import datetime from read_config import config # 任务执行周期k + '_hot_post' taskCycle = int(config.get('timertask', 'task_cycle')) schedule = sched.scheduler(time.time, time.sleep) # 查询帖子S queryPostSql = "SELECT * FROM post where post.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and post.status!=0" # 查询城市帖子 queryCitySql = "SELECT * FROM citypost where citypost.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and citypost.status!=0" def perform_command(inc): # 安排inc秒后再次运行自己,即周期运行 startTime = datetime.now() hot_post_redis.write_on_redis(queryPostSql, 1) hot_post_redis.write_on_redis(queryCitySql, 2) endTime = datetime.now() print(endTime - startTime) schedule.enter(inc, 0, perform_command, (inc, )) def timming_exe(inc=60): # enter用来安排某事件的发生时间,从现在起第n秒开始启动 schedule.enter(inc, 0, perform_command, (inc, )) schedule.run()
# -*- coding: UTF-8 -*- __author__ = 'li' import MySQLdb from read_config import config user = config.get('database', 'db_user') pwd = config.get('database', 'db_password') host = config.get('database', 'db_ip') db = config.get('database', 'db_name') port = int(config.get('database', 'db_port')) """ get data from db type:帖子类型 1:代表帖子 2:代表城市帖子 """ def get_data(select_sql, type): cnx = MySQLdb.connect(user=user, passwd=pwd, port=port, host=host, db=db, charset="utf8") cursor = cnx.cursor() d = {} try: cursor.execute(select_sql) rows = cursor.fetchall() for row in rows: if type == 1:
# -*- coding: UTF-8 -*- __author__ = 'li' import MySQLdb from read_config import config user = config.get('database', 'db_user') pwd = config.get('database', 'db_password') host = config.get('database', 'db_ip') db = config.get('database', 'db_name') port = int(config.get('database', 'db_port')) select_sql = 'select * from filehash' """ get data from db """ def get_data(): cnx = MySQLdb.connect(user=user, passwd=pwd, port=port, host=host, db=db, charset="utf8") cursor = cnx.cursor() d = {} try: cursor.execute(select_sql) rows = cursor.fetchall() for row in rows: d[row[1]] = row[2] return d except Exception as err: print("query database' failed.") print("Error: {}".format(err.msg)) finally:
# -*- coding: UTF-8 -*- __author__ = 'li' import time, os, sched, hot_post_redis from datetime import datetime from read_config import config # 任务执行周期k + '_hot_post' taskCycle = int(config.get('timertask', 'task_cycle')) schedule = sched.scheduler(time.time, time.sleep) # 查询帖子S queryPostSql = "SELECT * FROM post where post.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and post.status!=0" # 查询城市帖子 queryCitySql = "SELECT * FROM citypost where citypost.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and citypost.status!=0" def perform_command(inc): # 安排inc秒后再次运行自己,即周期运行 startTime=datetime.now() hot_post_redis.write_on_redis(queryPostSql, 1) hot_post_redis.write_on_redis(queryCitySql, 2) endTime=datetime.now() print(endTime-startTime) schedule.enter(inc, 0, perform_command, (inc,)) def timming_exe(inc=60): # enter用来安排某事件的发生时间,从现在起第n秒开始启动 schedule.enter(inc, 0, perform_command, (inc,)) schedule.run()