def persist_questions(self, question_list_per_topic): insert_sql = "INSERT IGNORE INTO ZHIHU_QUESTION " \ "(QUESTION_ID, QUESTION_TITLE, ANSWER, IS_TOP_QUESTION, CREATED_TIME) " \ "VALUES (%s, %s, %s, %s, %s)" tm = TransactionManager() tm.execute_many_sql(insert_sql, question_list_per_topic) tm.close_connection()
def generate_question_id_list(self, last_visit): question_id_list = [] if self.is_develop_mode(): return get_question_id_list() available_ids = generate_available_ids(MAX_QUESTION_TABLE_ID, QUESTION_ID_STEP) available_id_list = available_ids.split(',') import math loop = int(math.ceil(float(len(available_id_list))/AVAIL_ID_SIZE_THRESHOLD)) print "......loop:%s" % loop i = 0 pre_sql = None tm = TransactionManager() while i < loop: begin_index = i * AVAIL_ID_SIZE_THRESHOLD end_index = (i + 1) * AVAIL_ID_SIZE_THRESHOLD sql = "SELECT QUESTION_ID FROM (select @index:=@index+1 as ID, QUESTION_ID, LAST_VISIT from ZHIHU_QUESTION_ID) AS q WHERE timestamp(q.LAST_VISIT) < timestamp('%s')" % last_visit sql += " AND ID IN (%s) " % ",".join(available_id_list[begin_index:end_index]) if i == 1: pre_sql = "SET @index=0;" results = tm.execute_sql(sql, pre_sql) for row in results: question_id_list.append(str(row[0])) i += 1 tm.close_connection() return question_id_list
def get_proxy_from_db(): select_sql = "SELECT PROXY_IP FROM ZHIHU_PROXY ORDER BY RAND() LIMIT 1" tm = TransactionManager() results = tm.execute_sql(select_sql) tm.close_connection() if len(results) == 0: return "" for row in results: return str(row[0])
def persist_topics(topic_list): """ Persist topics into mysql :param topic_list: all the topics including level 1 and level 2. :return: None """ insert_sql = "INSERT IGNORE INTO ZHIHU_TOPIC (TOPIC_ID, NAME, PARENT_ID) \ VALUES (%s, %s, %s)" print "insert sql:%s" % insert_sql tm = TransactionManager() tm.execute_many_sql(insert_sql, topic_list) tm.close_connection()
def get_level2_topic_id_list(last_visit_date, is_develop=False): level2_topic_id_list = [] sql = "SELECT TOPIC_ID FROM ZHIHU_TOPIC WHERE TOPIC_ID != PARENT_ID AND LAST_VISIT < '%s'" \ % last_visit_date available_topic_ids = generate_available_topic_ids(MAX_TOPIC_TABLE_ID, TOPIC_ID_STEP) sql += " AND ID IN (%s) " % available_topic_ids if is_develop: sql += " LIMIT 2" print "......execute sql:%s" % sql tm = TransactionManager() results = tm.execute_sql(sql) tm.close_connection() for row in results: level2_topic_id_list.append(str(row[0])) return level2_topic_id_list
def update_level2_topic_timestamp(level2_topic_id): sql = "UPDATE ZHIHU_TOPIC SET LAST_VISIT = '%s' WHERE TOPIC_ID = %s" % \ (get_current_timestamp(), level2_topic_id) tm = TransactionManager() tm.execute_sql(sql) tm.close_connection()