Ejemplo n.º 1
0
def tags_filter():
    conn = pymysql.connect(host=config.get('write_mysql', 'host'),
                           user=config.get('write_mysql', 'user'),
                           passwd=config.get('write_mysql', 'passwd'),
                           port=int(config.get('write_mysql', 'port')),
                           charset="utf8",
                           binary_prefix=True)
    conn.select_db(config.get('write_mysql', 'db'))
    query_tags = 'SELECT tags.tag ' \
                'FROM tags'
    cur = conn.cursor()
    cur.execute(query_tags)
    tags_all = cur.fetchall()
    tags = []
    for i in range(len(tags_all)):
        tags_tmp = tags_all[i][0]
        tags.append(tags_tmp)
    tags_count = dict(Counter(tags))
    conn.close()
    tags_train = []
    for k, v in tags_count.items():
        if v > 25:
            tags_train.append(k)
    np.save('./tags.npy', tags_train)
    return tags_train, tags_count
Ejemplo n.º 2
0
def write_on_redis(querysql, type):
    try:
        redisConnection = redis.ConnectionPool(
            host=config.get('redis', 'redis_ip'),
            port=int(config.get('redis', 'redis_port')),
            db=int(config.get('redis', 'redis_db')))
        redisContext = redis.Redis(connection_pool=redisConnection)

        minHotMark = rank_algorithm.standard_mark()
        print("最小的值为:" + str(minHotMark))

        dataset = db.get_data(querysql, type)

        for k, v in dataset.items():
            hotPost = []

            if len(v) > 10:
                score = {}

                for post in v:
                    # print(rank_algorithm.hot(post[13], post[3], post[9], post[7]))
                    scoreNum = rank_algorithm.hot(post[13], post[3], post[9],
                                                  post[7])
                    if scoreNum >= minHotMark:
                        if scoreNum not in score:
                            score[scoreNum] = []
                            score[scoreNum].append(post)
                        else:
                            score[scoreNum].append(post)
                d = []
                for key in score.keys():
                    d.append(key)

                count = 0
                sortdata = quick_max_k.qselect(d, MAX_K)
                for sortk in sortdata[::-1]:
                    for p in score[sortk][::-1]:
                        if (count >= 10):
                            break

                        hotPost.append(convert_to_json(p))

                        count = count + 1
                print(k + '_hot_post' +
                      json.dumps(hotPost, separators=(',', ':')))

                redisContext.set(k + '_hot_post',
                                 json.dumps(hotPost, separators=(',', ':')))

    except redis.ConnectionError as err:
        print("connect redis' failed.")
        print("Error: {}".format(err.msg))
Ejemplo n.º 3
0
def write_on_redis(querysql, type):
    try:
        redisConnection = redis.ConnectionPool(host=config.get('redis', 'redis_ip'),
                                               port=int(config.get('redis', 'redis_port')),
                                               db=int(config.get('redis', 'redis_db')))
        redisContext = redis.Redis(connection_pool=redisConnection)

        minHotMark = rank_algorithm.standard_mark()
        print ("最小的值为:"+str(minHotMark))

        dataset = db.get_data(querysql, type)

        for k, v in dataset.items():
            hotPost = []

            if len(v) > 10:
                score = {}

                for post in v:
                    # print(rank_algorithm.hot(post[13], post[3], post[9], post[7]))
                    scoreNum = rank_algorithm.hot(post[13], post[3], post[9], post[7])
                    if scoreNum >= minHotMark:
                        if scoreNum not in score:
                            score[scoreNum] = []
                            score[scoreNum].append(post)
                        else:
                            score[scoreNum].append(post)
                d = []
                for key in score.keys():
                    d.append(key)

                count = 0
                sortdata = quick_max_k.qselect(d, MAX_K)
                for sortk in sortdata[::-1]:
                    for p in score[sortk][::-1]:
                        if (count >= 10):
                            break

                        hotPost.append(convert_to_json(p))

                        count = count + 1
                print(k + '_hot_post'+json.dumps(hotPost, separators=(',', ':')))

                redisContext.set(k + '_hot_post', json.dumps(hotPost, separators=(',', ':')))


    except redis.ConnectionError as err:
        print("connect redis' failed.")
        print("Error: {}".format(err.msg))
Ejemplo n.º 4
0
# -*- coding: UTF-8 -*-
__author__ = 'li'

import time, os, sched, hot_post_redis
from datetime import datetime
from read_config import config
# 任务执行周期k + '_hot_post'
taskCycle = int(config.get('timertask', 'task_cycle'))

schedule = sched.scheduler(time.time, time.sleep)
# 查询帖子S
queryPostSql = "SELECT * FROM post where post.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and post.status!=0"
# 查询城市帖子
queryCitySql = "SELECT * FROM citypost where citypost.postTime > DATE_SUB(now(),INTERVAL 7 DAY)  and citypost.status!=0"


def perform_command(inc):
    # 安排inc秒后再次运行自己,即周期运行
    startTime = datetime.now()
    hot_post_redis.write_on_redis(queryPostSql, 1)
    hot_post_redis.write_on_redis(queryCitySql, 2)
    endTime = datetime.now()
    print(endTime - startTime)
    schedule.enter(inc, 0, perform_command, (inc, ))


def timming_exe(inc=60):
    # enter用来安排某事件的发生时间,从现在起第n秒开始启动
    schedule.enter(inc, 0, perform_command, (inc, ))
    schedule.run()
Ejemplo n.º 5
0
# -*- coding: UTF-8 -*-
__author__ = 'li'
import MySQLdb
from read_config import config

user = config.get('database', 'db_user')
pwd = config.get('database', 'db_password')
host = config.get('database', 'db_ip')
db = config.get('database', 'db_name')
port = int(config.get('database', 'db_port'))
"""
get data from db
type:帖子类型 1:代表帖子 2:代表城市帖子
"""


def get_data(select_sql, type):
    cnx = MySQLdb.connect(user=user,
                          passwd=pwd,
                          port=port,
                          host=host,
                          db=db,
                          charset="utf8")
    cursor = cnx.cursor()
    d = {}
    try:
        cursor.execute(select_sql)
        rows = cursor.fetchall()
        for row in rows:

            if type == 1:
Ejemplo n.º 6
0
# -*- coding: UTF-8 -*-
__author__ = 'li'
import MySQLdb
from read_config import config

user = config.get('database', 'db_user')
pwd = config.get('database', 'db_password')
host = config.get('database', 'db_ip')
db = config.get('database', 'db_name')
port = int(config.get('database', 'db_port'))
select_sql = 'select * from filehash'
"""
get data from db

"""


def get_data():
    cnx = MySQLdb.connect(user=user, passwd=pwd, port=port, host=host, db=db, charset="utf8")
    cursor = cnx.cursor()
    d = {}
    try:
        cursor.execute(select_sql)
        rows = cursor.fetchall()
        for row in rows:
            d[row[1]] = row[2]
        return d
    except Exception as err:
        print("query database' failed.")
        print("Error: {}".format(err.msg))
    finally:
Ejemplo n.º 7
0
# -*- coding: UTF-8 -*-
__author__ = 'li'

import time, os, sched, hot_post_redis
from datetime import datetime
from read_config import config
# 任务执行周期k + '_hot_post'
taskCycle = int(config.get('timertask', 'task_cycle'))

schedule = sched.scheduler(time.time, time.sleep)
# 查询帖子S
queryPostSql = "SELECT * FROM post where post.postTime > DATE_SUB(now(),INTERVAL 7 DAY) and post.status!=0"
# 查询城市帖子
queryCitySql = "SELECT * FROM citypost where citypost.postTime > DATE_SUB(now(),INTERVAL 7 DAY)  and citypost.status!=0"


def perform_command(inc):
    # 安排inc秒后再次运行自己,即周期运行
    startTime=datetime.now()
    hot_post_redis.write_on_redis(queryPostSql, 1)
    hot_post_redis.write_on_redis(queryCitySql, 2)
    endTime=datetime.now()
    print(endTime-startTime)
    schedule.enter(inc, 0, perform_command, (inc,))


def timming_exe(inc=60):
    # enter用来安排某事件的发生时间,从现在起第n秒开始启动
    schedule.enter(inc, 0, perform_command, (inc,))
    schedule.run()