Exemple #1
0
 def queue_items(self):
     select_queue_sql = """
     select id,action,params from hainiu_queue where 
     type=1 and is_work=0 and fail_times<=%s
     limit 0,%s for update;
     """
     update_queue_sql = """
     update hainiu_queue set is_work=1 where id in (%s);
     """
     return_list = []
     try:
         d = DBUtil(config._OGC_DB)
         sql = select_queue_sql % (self.fail_times, self.limit)
         select_dict = d.read_dict(sql)
         query_ids = []
         for record in select_dict:
             id = record['id']
             action = record['action']
             params = record['params']
             query_ids.append(str(id))
             c = OGCConsumer(id, action, params)
             return_list.append(c)
         if query_ids:
             ids = ','.join(query_ids)
             sql = update_queue_sql % ids
             d.execute(sql)
     except:
         self.rl.exception()
         self.rl.error()
         d.rollback()
     finally:
         d.close()
     return return_list
 def queue_items(self):
     ip=Util().get_local_ip()
     select_seed_sql="""
     select id,url,category,domain,host,last_crawl_time from hainiu_web_seed where 
     fail_times<=%s and locate('%s',fail_ip)=0 and status=0
     limit 0,%s for update;
     """
     update_queue_sql="""
     update hainiu_web_seed set status=1,last_crawl_time='%s' where id in (%s);
     """
     return_list=[]
     try:
         d=DBUtil(config._OGC_DB)
         sql=select_seed_sql % (self.fail_times,ip,self.limit)
         select_dict=d.read_dict(sql)
         # print select_dict
         query_ids=[]
         t=TimeUtil()
         for each in select_dict:
             id=each['id']
             url=each['url']
             category=each['category']
             domain=each['domain']
             host=each['host']
             last_crawl_time=str(each['last_crawl_time'])
             if last_crawl_time is None or int(t.str2timestamp(last_crawl_time[:13],'%Y-%m-%d %H'))<=\
                     int(t.str2timestamp(t.get_dif_time(hour=-1,format='%Y-%m-%d %H'),format='%Y-%m-%d %H')):
                 #进入这里的都是过去爬取的时间在一小时之前,或者没有爬取过
                 query_ids.append(str(id))
                 action=url
                 params=category
                 c = NewsFindActionConsumer(id, action, params)
                 return_list.append(c)
         if query_ids:
             ids=','.join(query_ids)
             sql=update_queue_sql % (t.now_time(),ids)
             print t.now_time(),ids
             d.execute(sql)
     except:
         self.rl.exception()
         self.rl.error(sql)
         d.rollback()
     finally:
         d.close()
     return return_list
Exemple #3
0
 def queue_items(self):
     ip = Util().get_local_ip()
     select_queue_sql = """
     select id,action,params from hainiu_queue where 
     type=0 and fail_times<=%s and locate('%s',fail_ip)=0
     limit 0,%s for update;
     """
     #type=1意思是url已经分配给消费者了
     update_queue_sql = """
     update hainiu_queue set type=1 where id in (%s);
     """
     return_list = []
     try:
         d = DBUtil(config._OGC_DB)
         sql = select_queue_sql % (self.fail_times, ip, self.limit)
         select_dict = d.read_dict(sql)
         print select_dict
         query_ids = []
         for each in select_dict:
             id = each['id']
             url = each['action']
             category = each['params']
             query_ids.append(str(id))
             c = NewsFindQueueConsumer(id, url, category)
             return_list.append(c)
         if query_ids:
             ids = ','.join(query_ids)
             sql = update_queue_sql % ids
             d.execute(sql)
     except:
         self.rl.exception()
         self.rl.error(sql)
         d.rollback()
     finally:
         d.close()
     return return_list
 def queue_items(self):
     ip = Util().get_local_ip()
     select_queue_sql = """
     select id,action,params from hainiu_queue where 
     fail_times<=%s and locate('%s',fail_ip)=0 and type=2
     limit 0,%s for update;
     """
     #type=3 已被消费者进程拿取过了
     update_queue_sql = """
     update hainiu_queue set type=3 where id in (%s);
     """
     return_list = []
     try:
         d = DBUtil(config._OGC_DB)
         sql = select_queue_sql % (self.fail_times, ip, self.limit)
         select_dict = d.read_dict(sql)
         query_ids = []
         t = TimeUtil()
         for each in select_dict:
             id = each['id']
             action = each['action']
             params = each['params']
             query_ids.append(str(id))
             c = DownloadActionConsumer(id, action, params)
             return_list.append(c)
         if query_ids:
             ids = ','.join(query_ids)
             sql = update_queue_sql % ids
             d.execute(sql)
     except:
         self.rl.exception()
         self.rl.error(sql)
         d.rollback()
     finally:
         d.close()
     return return_list