Python Util.u2b Exemples

Langage de programmation: Python

Espace de nommage/Pack: packages

Class/Type: Util

Méthode/Fonction: u2b

Exemples au hotexamples.com: 2

Python Util.u2b - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de packages.Util.u2b extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

intval(15)

traceback_info(12)

urljoin(10)

floatval(9)

date(6)

rfc_headers(5)

cleartext(4)

binary_type(3)

number_format(2)

specified_date(2)

file(2)

u2b(2)

get_prolist(1)

get_host(1)

get_abuyun_proxies(1)

sendmail(1)

get_static_file(1)

Méthodes fréquemment utilisées

intval (15)

traceback_info (12)

urljoin (10)

floatval (9)

date (6)

rfc_headers (5)

cleartext (4)

binary_type (3)

number_format (2)

specified_date (2)

Méthodes fréquemment utilisées

file (2)

u2b (2)

get_prolist (1)

get_host (1)

get_abuyun_proxies (1)

sendmail (1)

get_static_file (1)

Exemple #1

0

Afficher le fichier

def fetch_update_data(self, data_list=[], proxy=None, **kwargs): '''获取更新数据 @return 无论请求data_list 0 为空（无视） -401 错误（需要重试，程序出错，语法或者由于异常删除造成错误，需要检查程序） -402 数据异常（需要重试，需要检验数据获取情况） -400 代理异常（须重试，可以无视） -200 非200状态，代理异常或者数据异常（须重试，特别注意此种情况是否进入死循环） 200 正常状态，并非指http状态码 404 产品不存在已被删除 ''' # 根据url进行网站判断, 进而调用网站爬虫的模块 update_url = kwargs.get('update_url', '') if not update_url: return if '360' in update_url: return supplier_name = update_url.split('.')[1] if supplier_name is None: return None headers = { 'user-agent': random.choice(config.USER_AGENT_LIST), } try: if not hasattr(supplier, supplier_name): module_name = 'supplier.{0}'.format(supplier_name) if module_name not in sys.modules: __import__(module_name) obj = sys.modules[module_name] else: obj = getattr(supplier, supplier_name) if 'fetch_update_data' in dir(obj): _fetch_update_data = getattr(obj, 'fetch_update_data') else: kwargs['status'] = -401 data_list.append(kwargs) return None except Exception as e: config.LOG.exception('STATUS: -401, ID: {0} 导入错误,将进行重试: {1}'.format(kwargs['id'], e)) kwargs['status'] = -401 data_list.append(kwargs) return None try: kwargs['headers'] = headers kwargs['proxy'] = proxy data_list.append(_fetch_update_data(**kwargs)) except Exception as e: kwargs['status'] = -402 if 'headers' in kwargs: del kwargs['headers'] if 'proxy' in kwargs: del kwargs['proxy'] data_list.append(kwargs) config.LOG.exception('STATUS: -402, ID: %(id)s 错误: %s', {'id': util.u2b(kwargs['id']), 'e': util.traceback_info(e)}, e)

Exemple #2

0

Afficher le fichier

def fetch_search_data(self, data_list=[], err_list=[], proxy=None, supp=None, **kwargs): """ 根据搜索关键词获取产品产品数据（可能为url也可能为详细信息） """ if not supp or 'keyword' not in kwargs: return None headers = { 'user-agent': random.choice(config.USER_AGENT_LIST), } keyword = util.u2b(kwargs['keyword']) supplier_name = config.DB_KEY[supp] try: if not hasattr(supplier, supplier_name): module_name = 'supplier.{0}'.format(supplier_name) if module_name not in sys.modules: __import__(module_name) obj = sys.modules[module_name] else: obj = getattr(supplier, supplier_name) if hasattr(obj, 'api_search_data'): _fetch_function = getattr(obj, 'api_search_data') else: _fetch_function = getattr(obj, 'fetch_search_data') except Exception as e: config.LOG.exception('STATUS: -401, Keyword: %(keyword)s', {'keyword': keyword}) if kwargs.get('count', 1) < self.exception_threshold: kwargs['status'] = -401 kwargs['count'] = kwargs.get('count', 1) + 1 err_list.append(kwargs) return None data_dict = { 'detail': [], 'list': [], 'url': [] } if self.optype == 'hot' and self.use: kwargs['hot_search'] = True del kwargs['keyword'] try: _fetch_function(keyword, supp, data_dict, headers, **kwargs) except Exception as e: config.LOG.exception('STATUS: -402, Keyword: %(keyword)s', {'keyword': keyword}) if kwargs.get('count', 1) < self.exception_threshold: kwargs['status'] = -402 kwargs['count'] = kwargs.get('count', 1) + 1 kwargs['keyword'] = keyword err_list.append(kwargs) return None if data_dict['list']: try: _fetch_function = getattr(obj, 'fetch_search_list') except Exception as e: _fetch_function = None print(util.traceback_info(e, return_all=1)) if _fetch_function: res = self._crawl(_fetch_function, data_dict['list'], headers, proxy) if 'url' in res: for url in res['url']: data_dict['url'].append(url) if 'detail' in res: for data in res['detail']: data_dict['detail'].append(data) if data_dict['url']: try: _fetch_function = getattr(obj, 'fetch_data') except Exception as e: _fetch_function = None print(util.traceback_info(e, return_all=1)) if _fetch_function: res = self._crawl(_fetch_function, data_dict['url'], headers, proxy) if 'detail' in res: for data in res['detail']: data_dict['detail'].append(data) for data in data_dict['detail']: pass data_list.append(data) ''' 此处进行每条数据的清洗整理 ''' return data_list