def find_how_many_is_spider(): MYSQL_CONN = MYSQL() zhubo_rows = MYSQL_CONN.select_from_table(ZHUBO_LIVE_TABLE, "is_live = 1") len_is_live = [] for zhubo_row in zhubo_rows: len_is_live.append(zhubo_row) return len(len_is_live)
def get_zhubo_id_list(): zhubo_id_list = [] MYSQL_CONN = MYSQL() zhubo_id_row = MYSQL_CONN.select_from_table(ZHUBO_INFO_TABLE, []) for each_zhubo in zhubo_id_row: zhubo_id = each_zhubo["zhubo_id"] zhubo_id_list.append(zhubo_id) return zhubo_id_list
def get_goods_id_list(): goods_id_list = [] MYSQL_CONN = MYSQL() live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TABLE, []) for each_goods in live_goods_row: goods_id = each_goods["goods_id"] goods_id_list.append(goods_id) return goods_id_list
def get_goods_id_list_from_temp(live_id): goods_id_list = [] MYSQL_CONN = MYSQL() live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TEMP_TABLE, "live_id={}".format(live_id)) for each_goods in live_goods_row: goods_id = each_goods["goods_id"] goods_id_list.append(goods_id) return goods_id_list
def get_rows(): MYSQL_COON = MYSQL() b = [] a = MYSQL_COON.select_from_table("live_taobao_webstar_crawl_live_goods", *b) n = 1 goods_list = [] for row in a: goods_id = row["goods_id"] yield goods_id MYSQL_COON.close_db()
def update_zhubo_from_db(): MYSQL_CONN = MYSQL() #pool = multiprocessing.Pool(processes=10) zhubo_rows = MYSQL_CONN.select_from_table(ZHUBO_LIVE_TABLE, "is_live != 1") zhubo_id_list = [] for zhubo_row in zhubo_rows: zhubo_id = zhubo_row["zhubo_id"] #zhubo_id_list.append(zhubo_id) if str(0) != get_live_id(zhubo_id): logging.info("{} is living!.........".format(zhubo_id)) yield zhubo_id else: logging.info("{} is not living!".format(zhubo_id))
def from_live_goods_to_temp(live_id): goods_live_id_list = [] MYSQL_CONN = MYSQL() live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TABLE, "live_id={}".format(live_id)) for each_goods in live_goods_row: goods_id = each_goods["goods_id"] live_id = each_goods["live_id"] each_item = {"goods_id": goods_id, "live_id": live_id} goods_live_id_list.append(each_item) #goods_live_id_dict[goods_id] = live_id #goods_id_list.append(goods_id) insert_to_db(goods_live_id_list, LIVE_GOODS_TEMP_TABLE)
class TASK_OBJECT(object): ''' This class has 6 parameters. from_table: the module is begin from which table to get the data from_table_condition: select from the table's condition, the default is empty list need_to_update: the module's begin need to update the table or not which_module: which module you need to start into_table: the module's results is need to insert into which table need_to_return: the module's multiprocessing need to return the data or not ''' def __init__(self, from_table, from_table_condition, need_to_update, which_module, into_table, need_to_return, which_need_in_row, update_into_table): super(TASK_OBJECT, self).__init__() self.MYSQL_CONN = MYSQL() self.from_table = from_table self.from_table_condition = from_table_condition self.need_to_update = need_to_update self.which_module = which_module self.into_table = into_table self.need_to_return = need_to_return self.which_need_in_row = which_need_in_row self.update_into_table = update_into_table def get_rows(self): rows = self.MYSQL_CONN.select_from_table(self.from_table, self.from_table_condition) for row in rows: if self.need_to_update: row = get_update_state(row) if row: yield row def multiprocess_task(self, new_list): pool = ThreadPool(THREAD_NUM) if self.need_to_return: results = pool.map(self.which_module, new_list) else: pool.map(self.which_module, new_list) pool.close() pool.join() if self.need_to_return: return results def insert_to_db(self, results): for each_result in results: try: if self.update_into_table: self.MYSQL_CONN.insert_into_table_with_replace( each_result, self.into_table) else: self.MYSQL_CONN.insert_into_table(each_result, self.into_table) except Exception as e: logging.error(str(each_result)) logging.error(e) def task_main(self): rows = self.get_rows() new_list = [] for row in rows: new_list.append(row[self.which_need_in_row]) if len(new_list) % THREAD_NUM == 0: if self.need_to_return: results = self.multiprocess_task(new_list) self.insert_to_db(results) else: self.multiprocess_task(new_list) new_list = [] if new_list: if self.need_to_return: results = self.multiprocess_task(new_list) self.insert_to_db(results) else: self.multiprocess_task(new_list) new_list = []