コード例 #1
0
ファイル: zhihu_question.py プロジェクト: shuhuai007/sda
 def persist_questions(self, question_list_per_topic):
     insert_sql = "INSERT IGNORE INTO ZHIHU_QUESTION " \
                  "(QUESTION_ID, QUESTION_TITLE, ANSWER, IS_TOP_QUESTION, CREATED_TIME) " \
                  "VALUES (%s, %s, %s, %s, %s)"
     tm = TransactionManager()
     tm.execute_many_sql(insert_sql, question_list_per_topic)
     tm.close_connection()
コード例 #2
0
ファイル: zhihu_answer.py プロジェクト: shuhuai007/sda
    def generate_question_id_list(self, last_visit):
        question_id_list = []
        if self.is_develop_mode():
            return get_question_id_list()

        available_ids = generate_available_ids(MAX_QUESTION_TABLE_ID, QUESTION_ID_STEP)
        available_id_list = available_ids.split(',')

        import math
        loop = int(math.ceil(float(len(available_id_list))/AVAIL_ID_SIZE_THRESHOLD))
        print "......loop:%s" % loop
        i = 0
        pre_sql = None
        tm = TransactionManager()
        while i < loop:
            begin_index = i * AVAIL_ID_SIZE_THRESHOLD
            end_index = (i + 1) * AVAIL_ID_SIZE_THRESHOLD

            sql = "SELECT QUESTION_ID FROM (select @index:=@index+1 as ID, QUESTION_ID, LAST_VISIT from ZHIHU_QUESTION_ID) AS q  WHERE timestamp(q.LAST_VISIT) < timestamp('%s')" % last_visit
            sql += " AND ID IN (%s) " % ",".join(available_id_list[begin_index:end_index])

            if i == 1:
                pre_sql = "SET @index=0;"
            results = tm.execute_sql(sql, pre_sql)
            for row in results:
                question_id_list.append(str(row[0]))
            i += 1
        tm.close_connection()
        return question_id_list
コード例 #3
0
ファイル: zhihu_util.py プロジェクト: shuhuai007/sda
def get_proxy_from_db():
    select_sql = "SELECT PROXY_IP FROM ZHIHU_PROXY ORDER BY RAND() LIMIT 1"
    tm = TransactionManager()
    results = tm.execute_sql(select_sql)
    tm.close_connection()
    if len(results) == 0:
        return ""
    for row in results:
        return str(row[0])
コード例 #4
0
ファイル: zhihu_topic.py プロジェクト: shuhuai007/sda
def persist_topics(topic_list):
    """
    Persist topics into mysql
    :param topic_list: all the topics including level 1 and level 2.
    :return: None
    """
    insert_sql = "INSERT IGNORE INTO ZHIHU_TOPIC (TOPIC_ID, NAME, PARENT_ID) \
                  VALUES (%s, %s, %s)"
    print "insert sql:%s" % insert_sql
    tm = TransactionManager()
    tm.execute_many_sql(insert_sql, topic_list)
    tm.close_connection()
コード例 #5
0
ファイル: zhihu_question.py プロジェクト: shuhuai007/sda
def get_level2_topic_id_list(last_visit_date, is_develop=False):
    level2_topic_id_list = []
    sql = "SELECT TOPIC_ID FROM ZHIHU_TOPIC WHERE TOPIC_ID != PARENT_ID AND LAST_VISIT < '%s'" \
          % last_visit_date
    available_topic_ids = generate_available_topic_ids(MAX_TOPIC_TABLE_ID, TOPIC_ID_STEP)
    sql += " AND ID IN (%s) " % available_topic_ids

    if is_develop:
        sql += " LIMIT 2"

    print "......execute sql:%s" % sql
    tm = TransactionManager()
    results = tm.execute_sql(sql)
    tm.close_connection()

    for row in results:
        level2_topic_id_list.append(str(row[0]))

    return level2_topic_id_list
コード例 #6
0
ファイル: zhihu_question.py プロジェクト: shuhuai007/sda
def update_level2_topic_timestamp(level2_topic_id):
    sql = "UPDATE ZHIHU_TOPIC SET LAST_VISIT = '%s' WHERE TOPIC_ID = %s" % \
          (get_current_timestamp(), level2_topic_id)
    tm = TransactionManager()
    tm.execute_sql(sql)
    tm.close_connection()