Beispiel #1
0
def find_how_many_is_spider():
    MYSQL_CONN = MYSQL()
    zhubo_rows = MYSQL_CONN.select_from_table(ZHUBO_LIVE_TABLE, "is_live = 1")
    len_is_live = []
    for zhubo_row in zhubo_rows:
        len_is_live.append(zhubo_row)
    return len(len_is_live)
Beispiel #2
0
def get_zhubo_id_list():
    zhubo_id_list = []
    MYSQL_CONN = MYSQL()
    zhubo_id_row = MYSQL_CONN.select_from_table(ZHUBO_INFO_TABLE, [])
    for each_zhubo in zhubo_id_row:
        zhubo_id = each_zhubo["zhubo_id"]
        zhubo_id_list.append(zhubo_id)

    return zhubo_id_list
Beispiel #3
0
def get_goods_id_list():

    goods_id_list = []
    MYSQL_CONN = MYSQL()
    live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TABLE, [])
    for each_goods in live_goods_row:
        goods_id = each_goods["goods_id"]
        goods_id_list.append(goods_id)
    return goods_id_list
Beispiel #4
0
def get_goods_id_list_from_temp(live_id):

    goods_id_list = []
    MYSQL_CONN = MYSQL()
    live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TEMP_TABLE,
                                                  "live_id={}".format(live_id))
    for each_goods in live_goods_row:
        goods_id = each_goods["goods_id"]
        goods_id_list.append(goods_id)
    return goods_id_list
Beispiel #5
0
def get_rows():
    MYSQL_COON = MYSQL()
    b = []
    a = MYSQL_COON.select_from_table("live_taobao_webstar_crawl_live_goods", *b)

    n = 1
    goods_list = []
    for row in a:
        goods_id = row["goods_id"]
        yield goods_id
    MYSQL_COON.close_db()
Beispiel #6
0
def update_zhubo_from_db():
    MYSQL_CONN = MYSQL()
    #pool = multiprocessing.Pool(processes=10)
    zhubo_rows = MYSQL_CONN.select_from_table(ZHUBO_LIVE_TABLE, "is_live != 1")
    zhubo_id_list = []

    for zhubo_row in zhubo_rows:
        zhubo_id = zhubo_row["zhubo_id"]
        #zhubo_id_list.append(zhubo_id)
        if str(0) != get_live_id(zhubo_id):
            logging.info("{} is living!.........".format(zhubo_id))
            yield zhubo_id
        else:
            logging.info("{} is not living!".format(zhubo_id))
Beispiel #7
0
def from_live_goods_to_temp(live_id):
    goods_live_id_list = []
    MYSQL_CONN = MYSQL()
    live_goods_row = MYSQL_CONN.select_from_table(LIVE_GOODS_TABLE,
                                                  "live_id={}".format(live_id))
    for each_goods in live_goods_row:
        goods_id = each_goods["goods_id"]
        live_id = each_goods["live_id"]
        each_item = {"goods_id": goods_id, "live_id": live_id}
        goods_live_id_list.append(each_item)
        #goods_live_id_dict[goods_id] = live_id
        #goods_id_list.append(goods_id)

    insert_to_db(goods_live_id_list, LIVE_GOODS_TEMP_TABLE)
Beispiel #8
0
class TASK_OBJECT(object):
    '''
    This class has 6 parameters.
    from_table: the module is begin from which table to get the data
    from_table_condition: select from the table's condition, the default is empty list
    need_to_update: the module's begin need to update the table or not
    which_module: which module you need to start
    into_table: the module's results is need to insert into which table
    need_to_return: the module's multiprocessing need to return the data or not
    '''
    def __init__(self, from_table, from_table_condition, need_to_update,
                 which_module, into_table, need_to_return, which_need_in_row,
                 update_into_table):
        super(TASK_OBJECT, self).__init__()

        self.MYSQL_CONN = MYSQL()
        self.from_table = from_table
        self.from_table_condition = from_table_condition
        self.need_to_update = need_to_update
        self.which_module = which_module
        self.into_table = into_table
        self.need_to_return = need_to_return
        self.which_need_in_row = which_need_in_row
        self.update_into_table = update_into_table

    def get_rows(self):

        rows = self.MYSQL_CONN.select_from_table(self.from_table,
                                                 self.from_table_condition)

        for row in rows:
            if self.need_to_update:
                row = get_update_state(row)
            if row:
                yield row

    def multiprocess_task(self, new_list):
        pool = ThreadPool(THREAD_NUM)
        if self.need_to_return:
            results = pool.map(self.which_module, new_list)
        else:
            pool.map(self.which_module, new_list)
        pool.close()
        pool.join()
        if self.need_to_return:
            return results

    def insert_to_db(self, results):

        for each_result in results:
            try:
                if self.update_into_table:
                    self.MYSQL_CONN.insert_into_table_with_replace(
                        each_result, self.into_table)
                else:
                    self.MYSQL_CONN.insert_into_table(each_result,
                                                      self.into_table)
            except Exception as e:
                logging.error(str(each_result))
                logging.error(e)

    def task_main(self):

        rows = self.get_rows()

        new_list = []

        for row in rows:
            new_list.append(row[self.which_need_in_row])
            if len(new_list) % THREAD_NUM == 0:
                if self.need_to_return:
                    results = self.multiprocess_task(new_list)
                    self.insert_to_db(results)
                else:
                    self.multiprocess_task(new_list)
                new_list = []
        if new_list:
            if self.need_to_return:
                results = self.multiprocess_task(new_list)
                self.insert_to_db(results)
            else:
                self.multiprocess_task(new_list)
            new_list = []