def get_basic_hidden_info(shop_id): """ 获取基础隐藏信息(名称、地址、电话号、cityid) @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \ 'shopId=' + str(shop_id) + \ '&_token=' + str(get_token(shop_url)) + \ '&tcv=' + str(spider_config.TCV) + \ '&uuid=' + str(spider_config.UUID) + \ '&platform=1' \ '&partner=150' \ '&optimusCode=10' \ '&originUrl=' + str(shop_url) # 这里处理解决请求会异常的问题 retry_time = 5 while True: retry_time -= 1 r = requests_util.get_requests(url, request_type='proxy, no cookie') r_text = requests_util.replace_json_text(r.text, get_font_msg()) try: r_json = json.loads(r_text) # 前置验证码过滤 if r_json['code'] == 200: break if retry_time == 0: logger.warning('替换tsv和uuid') exit() except: pass # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] print('处理验证码,按任意键回车后继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text if msg['address'] is not None else '' + \ BeautifulSoup(msg[ 'crossRoad'], 'lxml').text if \ msg['crossRoad'] is not None else '' shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text if msg['phoneNo'] is not None else '' + ', ' + \ BeautifulSoup( msg['phoneNo2'], 'lxml').text if \ msg['phoneNo2'] is not None else '' return { '店铺id': shop_id, '店铺名': shop_name, '店铺地址': shop_address, '店铺电话': shop_number } else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_proxy(self): """ 获取代理 """ try: repeat_nub = int(global_config.getRaw('proxy', 'repeat_nub')) except: logger.warning('repeat_nub 格式不正确,应为正整数') sys.exit() # http 提取模式 if global_config.getRaw('proxy', 'http_extract') == '1': # 代理池为空,提取代理 if len(self.proxy_pool) == 0: proxy_url = global_config.getRaw('proxy', 'http_link') r = requests.get(proxy_url) r_json = r.json() for proxy in r_json: # 重复添加,多次利用 for _ in range(repeat_nub): self.proxy_pool.append([proxy['ip'], proxy['port']]) # 获取ip proxies = self.http_proxy_utils(self.proxy_pool[0][0], self.proxy_pool[0][1]) self.proxy_pool.remove(self.proxy_pool[0]) return proxies # 秘钥提取模式 elif global_config.getRaw('proxy', 'key_extract') == '1': pass pass
def send_webhook_message(channel_webhook_url, json_content, retries=3): """ Send a message to the specified channel via a webhook. :param channel_webhook_url: full URL for the receiving webhook :param json_content: dictionary containing data to send (usually "content" or "embed" keys) :param retries: number of times to attempt to send message again if it fails :return: True if message was successfully sent, False otherwise """ if not config_loader.DISCORD["enabled"]: return True attempt = 0 while attempt <= retries: try: response = requests.post(channel_webhook_url, json=json_content) if response.status_code in (200, 204): return True logger.warning( f"Webhook response {response.status_code}: {response.text}") except Exception: logger.exception( "Unexpected error while attempting to send webhook message.") time.sleep(5) attempt += 1 logger.error(f"Unable to send webhook message, content: {json_content}") return False
def post_view(request): post_id = request.GET.get('id') user_info = get_user_info(request) post = Post.objects.filter(id=post_id).values().last() if post is None: error_msg = '[post_id:{}] invalid access.'.format(post_id) logger.warning(error_msg) return redirect('/post') social_profile = SocialAccount.objects.get(user_id=post.get('user_id')) if social_profile: user_id = social_profile.user_id account_data = social_profile.extra_data account_property = account_data.get('properties') post['user_id'] = user_id post['nickname'] = account_property.get('nickname') if account_property.get('profile_image'): post['profile_image'] = account_property.get('profile_image').replace('http', 'https') else: post['profile_image'] = '/static/images/none_profile.png' # 댓글 comment_list = list(Comment.objects.filter(post_id=post.get('id')).order_by('created_at').values()) post['comment_list'] = get_comment_list(comment_list) return render(request, 'main/view.html', dict(post=post, user_info=user_info))
def run(self): """Main loop of the class. Calls a brain action every TIME_CYCLE""" "TODO: cleanup measure of ips" it = 0 ss = time.time() while (not self.kill_event.is_set()): start_time = datetime.now() if not self.stop_event.is_set(): try: self.brains.active_brain.execute() except AttributeError as e: logger.warning('No Brain selected') logger.error(e) dt = datetime.now() - start_time ms = (dt.days * 24 * 60 * 60 + dt.seconds) * 1000 + dt.microseconds / 1000.0 elapsed = time.time() - ss if elapsed < 1: it += 1 else: ss = time.time() # print(it) it = 0 if (ms < TIME_CYCLE): time.sleep((TIME_CYCLE - ms) / 1000.0) logger.debug('Pilot: pilot killed.')
def do(self, action_name, *args, **kwargs): if hasattr(self, 'action_' + action_name): getattr(self, 'action_' + action_name)(*args, **kwargs) else: logger.warning( f'no action named "{action_name}"" for action manager of type {type(self).__name__}' )
def send_payments_details(payments: List[Payment], recipients: List[str]) -> None: if not recipients: raise Exception( '[BATCH][PAYMENTS] Missing PASS_CULTURE_PAYMENTS_DETAILS_RECIPIENTS in environment variables' ) if all( map(lambda x: x.currentStatus.status == TransactionStatus.ERROR, payments)): logger.warning( '[BATCH][PAYMENTS] Not sending payments details as all payments have an ERROR status' ) else: details = create_all_payments_details(payments) csv = generate_payment_details_csv(details) logger.info('[BATCH][PAYMENTS] Sending %s details of %s payments' % (len(details), len(payments))) logger.info('[BATCH][PAYMENTS] Recipients of email : %s' % recipients) try: send_payment_details_email(csv, recipients, send_raw_email) except MailServiceException as e: logger.error( '[BATCH][PAYMENTS] Error while sending payment details email to MailJet', e)
def get_promo_info(shop_id): """ 优惠券信息 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'ttp://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str( shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token( shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \ '&originUrl=' + shop_url r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text + BeautifulSoup( msg['crossRoad'], 'lxml').text shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text + BeautifulSoup( msg['phoneNo2'], 'lxml').text return [shop_name, shop_address, shop_number] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def go_to_next_page(self): try: self.driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") next_btn = find_next_button(self.driver) next_btn.click() logger.info('[{}] [UrlCrawler] Click next button'.format( self.city)) except: ''' next button not found ''' try: logger.warning( '[{}] [UrlCrawler] Next button not found, try to click page index' .format(self.city)) current_page_elm = find_paging_elm(self.driver) total_page = current_page_elm.get_attribute('data-totalpage') current_page = current_page_elm.get_attribute('data-curpage') if total_page == current_page: return logger.warning( '[{}] [UrlCrawler] Alreay at last page'.format( self.city)) current_page = int(current_page) elm = find_paging_elm_index(self.driver, current_page + 1) elm.click() logger.info( '[{}] [UrlCrawler] Click page index success'.format( self.city)) except: # logger.info('BLOCKED, CHANGE PROXY') raise ProxyBlockedException()
def array_min_max_norm(arr_src: np.ndarray, prm_file_name: str, do_fit: bool = True) -> np.ndarray: ''' arr_scaled = arr_min_max_norm(arr_src, 'test1.pkl', True) arr_scaled = arr_min_max_norm(arr_src, 'test2.pkl', False) Params arr_src: source np.array prm_file_name: parameter file name do_fit: parames are obtained by fit if True, else params are from file. Returns arr_scaled: np.array scaled between 0 and 1 ''' min_max_norm = MinMaxNorm(prm_file_name) if not os.path.exists(prm_file_name): logger.warning('parame file %s does not exist. do fit', prm_file_name) do_fit = True if do_fit: logger.info('fit and transform') arr_scaled = min_max_norm.fit_transform(arr_src) min_max_norm.save_param() else: logger.info('transform using parameters from file') min_max_norm.load_param() arr_scaled = min_max_norm.transform(arr_src) return arr_scaled
def popUp(self, text=None, move=True): if self._fit_to_content['row']: self.labelList.setMinimumHeight( self.labelList.sizeHintForRow(0) * self.labelList.count() + 2 ) if self._fit_to_content['column']: self.labelList.setMinimumWidth( self.labelList.sizeHintForColumn(0) + 2 ) # if text is None, the previous label in self.edit is kept if text is None: text = self.edit.text() self.edit.setText(text) self.edit.setSelection(0, len(text)) items = self.labelList.findItems(text, QtCore.Qt.MatchFixedString) if items: if len(items) != 1: logger.warning("Label list has duplicate '{}'".format(text)) self.labelList.setCurrentItem(items[0]) row = self.labelList.row(items[0]) self.edit.completer().setCurrentRow(row) self.edit.setFocus(QtCore.Qt.PopupFocusReason) if move: self.move(QtGui.QCursor.pos()) return self.edit.text() if self.exec_() else None
def save(self, param: Any): ''' save param by pickle Params param: variable to save ''' if param is None: logger.error('param is None') return if self._param_file_name is None: logger.warning('file name is None') return if os.path.exists(self._param_file_name): logger.warning('%s exists. overwriting.' % self._param_file_name) dir_name = os.path.dirname(self._param_file_name) dir_name = '.' if dir_name == '' else dir_name if not os.path.exists(dir_name): os.makedirs(dir_name) logger.info('%s created.' % dir_name) try: with open(self._param_file_name, 'wb') as f: pickle.dump(param, f) logger.info('%s saved.' % self._param_file_name) except IOError as exc: raise exc
def build_jobs_table(self) -> Union[None, pd.DataFrame]: """ Builds the table of jobs according to search terms criteria """ df = self._get_dataframe() if df is None: logger.warning('dataframe is None') return None logger.info('Got the dataframe') if self._terms is not None: df['text_processed'] = df['text'].apply(self._text_process) df['contains_terms'] = df['text_processed'].apply( lambda x: self._search_terms(terms=self._terms, text=x)) filtered_df = df.loc[df['contains_terms'] == True].reset_index( drop=True) filtered_df.drop(['contains_terms', 'text_processed'], axis=1, inplace=True) else: filtered_df = df filtered_df.reset_index(drop=True) del df hide_index = [''] * len(filtered_df) filtered_df.index = hide_index return filtered_df
def post(self, request): try: body = json.loads(request.body) slug = body['slug'] username = body['username'] email = body['email'] website = body['website'] content = body['comment'] ip, browser, os, _ = get_client_info(request) except (KeyError, json.JSONDecodeError): logger.warning('param invalid|%s', request.body.decode('utf-8')) return ParamInvalidResponse() try: article = Article.objects.get(slug=slug) except Article.DoesNotExist: logger.warning('article not exist|%s', slug) return ObjectNotExistResponse() comment = Comment.objects.create(article=article, username=username, email=email, avatar=Gravatar(email).get_image(), website=website, content=content, publish_dt=datetime.now(), ipv4=ip, browser=browser, os=os) logger.info('add comment|%s|%s|%s', article.slug, article.title, comment.username) return SuccessResponse()
def get_auth_token(): # Validate payload if not (request.json or 'user' in request.json or 'password' in request.json or 'app' in request.json): content = json.loads(request.json) logger.debug( 'Something is missing from your payload. Double check everything is there' ) logger.debug(str(content)) abort(400) # Validate credentials content = request.json user_dict = {'user': content['user'], 'password': content['password']} response = app_creds.validate_application_credentials( content['app'], user_dict) status_code = response['code'] # Generate Token if response['success']: logger.info('User %s authenticated, generating token' % content['user']) token = auth_token.generate_auth_token(content['user']) response = { 'code': status_code, 'success': True, 'message': { 'token': token.decode('ascii') } } return success('/auth/token/generate', response) else: logger.warning('User %s not authenticated, reporting error...' % content['user']) return warn('authGen', response)
def _build_urls(self) -> None: """ Gets the links for num_jobs of jobs """ if self._sort_by == 'relevance': base_url = f'https://il.indeed.com/jobs?q={self._query}&l=israel&start=' else: # by date base_url = f'https://il.indeed.com/jobs?q={self._query}&l=israel&sort=date&start=' if self._num_recent_jobs is not None: num_jobs = ( (self._num_recent_jobs // 15) * 10) # whole numbers of 15 else: num_jobs = self._get_num_jobs(self._get_main_soup(base_url + '0')) # get the links for i in tqdm.tqdm(range(0, num_jobs, 10), desc='Getting links:', ascii=True): soup = self._get_job_soup(base_url + f'{str(i)}') self._urls.extend(self._get_jobs_links(soup)) if i == (num_jobs - 10): # finished getting all the links logger.info(f'Got all the links for the jobs searched') else: logger.warning( f'Got {len(self._urls)} links for the jobs searched')
def update_component_token(request): """定时更新 component_access_token 2小时有效期 定时间隔:10分钟,触发更新 """ api = 'https://api.weixin.qq.com/cgi-bin/component/api_component_token' ttl = global_store.ttl_component_access_token() if ttl > 11 * 60: # 无需更新:11 而不是 10 是为了有一次失败容错 logger.debug('无需更新 component_token|%d', ttl) return HttpResponse('无需更新 component_token') ticket = global_store.get_component_verify_ticket() if not ticket: logger.warning('component_verify_ticket不存在') return HttpResponse('component_verify_ticket不存在') data = { 'component_appid': config.AppID, 'component_appsecret': config.AppSecret, 'component_verify_ticket': ticket } resp = requests.post(api, data=json.dumps(data), timeout=2) resp_data = json.loads(resp.text) token = resp_data['component_access_token'] ex = int(resp_data['expires_in']) global_store.set_component_access_token(token, ex) logger.info('更新component_access_token|%s|%d', token, ex) return HttpResponse('success')
def alter_book_state(self, row, slot, num): if not self.shelf[row][slot] == num: logger.warning('ERROR') raise Exception("you cannot do this") else: logger.info('successfully altered state of row: {} slot: {}'.format(row, slot)) self.shelf[row][slot] = 1 if num == 0 else 0
def load_word_freq_dict(path): """ 加载切词词典 :param path: :return: """ word_freq = {} if path: if not os.path.exists(path): logger.warning('file not found.%s' % path) return word_freq else: with codecs.open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('#'): continue info = line.split() if len(info) < 1: continue word = info[0] # 取词频,默认1 freq = int(info[1]) if len(info) > 1 else 1 word_freq[word] = freq return word_freq
def get_review_and_star(shop_id): """ 获取评分、人均,评论数 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?shopId=' + str( shop_id) + '&cityId=19&mainCategoryId=2821&_token=' + str(get_token( shop_url)) + '&uuid=38af1c67-4a50-3220-06f6-bf9f16e71c41.1611146098&platform=1&partner=150&optimusCode=10' \ '&originUrl=' + shop_url r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: shop_base_score = r_json['fiveScore'] score_title_list = r_json['shopScoreTitleList'] avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text score_list = [] for each in r_json['shopRefinedScoreValueList']: score_list.append(BeautifulSoup(each, 'lxml').text) scores = '' for i, score in enumerate(score_list): scores = scores + ' ' + score_title_list[i] + score_list[i] return [shop_base_score, scores, avg_price, review_count] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def get_basic_hidden_info(shop_id): """ 获取基础隐藏信息(名称、地址、电话号、cityid) @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/basicHideInfo?' \ 'shopId=' + str(shop_id) + '&_token=' + str(get_token( shop_url)) + '&tcv=ck9rmnrofg&uuid=6ca1f51a-7653-b987-3cd6-95f3aadb13b8.1619854599&platform=1' \ '&partner=150&optimusCode=10&originUrl=' + str(shop_url) r = requests_util.get_requests(url, request_type='json') r_text = requests_util.replace_json_text(r.text, get_font_msg()) r_json = json.loads(r_text) # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] logger.warning('处理验证码,按任意键继续:', verify_page_url) input() elif r_json['code'] == 200: msg = r_json['msg']['shopInfo'] shop_name = msg['shopName'] shop_address = BeautifulSoup(msg['address'], 'lxml').text + BeautifulSoup( msg['crossRoad'], 'lxml').text shop_number = BeautifulSoup(msg['phoneNo'], 'lxml').text + BeautifulSoup( msg['phoneNo2'], 'lxml').text return [shop_name, shop_address, shop_number] else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def plot_all( self, num_chains: int = 0, therm_frac: float = 0., title: str = None, outdir: str = None, subplots_kwargs: dict[str, Any] = None, plot_kwargs: dict[str, Any] = None, ): plot_kwargs = {} if plot_kwargs is None else plot_kwargs subplots_kwargs = {} if subplots_kwargs is None else subplots_kwargs dataset = self.get_dataset() for idx, (key, val) in enumerate(dataset.data_vars.items()): color = f'C{idx%9}' plot_kwargs['color'] = color try: _, subfigs, ax = self.plot( # val=torch.from_numpy(val.values).T, # val=tf.Tensor(val.values).T, val=val.values.T, key=str(key), title=title, outdir=outdir, therm_frac=therm_frac, num_chains=num_chains, plot_kwargs=plot_kwargs, subplots_kwargs=subplots_kwargs, ) except: logger.warning(f'Unable to plot {key}, skipping') # if isinstance(subfigs, tuple): if subfigs is not None: # _, subfigs = fig # ax1 = subfigs[1].subplots(1, 1) edgecolor = plt.rcParams['axes.edgecolor'] plt.rcParams['axes.edgecolor'] = plt.rcParams['axes.facecolor'] ax = subfigs[0].subplots(1, 1) # ax = fig[1].subplots(constrained_layout=True) cbar_kwargs = { # 'location': 'top', # 'orientation': 'horizontal', } im = val.plot(ax=ax, cbar_kwargs=cbar_kwargs) im.colorbar.set_label(f'{key}', fontsize='large') #, labelpad=1.25) sns.despine(subfigs[0], top=True, right=True, left=True, bottom=True) # sns.despine(im.axes, top=True, right=True, left=True, bottom=True) plt.rcParams['axes.edgecolor'] = edgecolor # else: # ax1 = fig.add_subplot(1, 2, 2) # val.plot(ax=ax1) return dataset
def delete_book_from_shelf(self, row, slot): logger.info('attempting to remove book from row: {} slot: {}'.format(row, slot)) try: self.alter_book_state(row, slot, 1) return True except Exception as e: logger.warning('FAILURE REMOVING BOOK: message: {} row: {} slot: {}'.format(e, row, slot)) return False
def put_book_on_shelf(self, row, slot): logger.info('attempting to add book to row: {} slot: {}'.format(row, slot)) try: self.alter_book_state(row, slot, 0) return True except Exception as e: logger.warning('FAILURE ADDING BOOK: message: {} row: {} slot: {}'.format(e, row, slot)) return False
def get_review_and_star(shop_id): """ 获取评分、人均,评论数 @param shop_id: @return: """ assert len(shop_id) == len('H2noKWCDigM0H9c1') shop_url = get_shop_url(shop_id) url = 'http://www.dianping.com/ajax/json/shopDynamic/reviewAndStar?' \ 'shopId=' + str(shop_id) + \ '&cityId=19' \ '&mainCategoryId=2821' \ '&_token=' + str(get_token(shop_url)) + \ '&uuid=' + str(spider_config.UUID) + \ '&platform=1' \ '&partner=150' \ '&optimusCode=10' \ '&originUrl=' + shop_url # 这里处理解决请求会异常的问题 while True: r = requests_util.get_requests(url, request_type='proxy, no cookie') r_text = requests_util.replace_json_text(r.text, get_font_msg()) try: r_json = json.loads(r_text) # 前置验证码过滤 if r_json['code'] == 200: break except: pass # 验证码处理 if r_json['code'] == 406: verify_page_url = r_json['customData']['verifyPageUrl'] print('处理验证码,按任意键回车后继续:', verify_page_url) input() elif r_json['code'] == 200: shop_base_score = r_json['fiveScore'] score_title_list = r_json['shopScoreTitleList'] avg_price = BeautifulSoup(r_json['avgPrice'], 'lxml').text review_count = BeautifulSoup(r_json['defaultReviewCount'], 'lxml').text score_list = [] for each in r_json['shopRefinedScoreValueList']: score_list.append(BeautifulSoup(each, 'lxml').text) # scores = '' # for i, score in enumerate(score_list): # scores = scores + ' ' + score_title_list[i] + score_list[i] scores = {} for i, score in enumerate(score_list): scores[score_title_list[i]] = score_list[i] # return [shop_base_score, scores, avg_price, review_count] return { '店铺id': shop_id, '店铺总分': shop_base_score, '店铺评分': scores, '人均价格': avg_price, '评论总数': review_count } else: logger.warning('json响应码异常,尝试更改提pr,或者提issue')
def call_action(self, action_name, args=(), stop=False): if self.action_manager is not None: if stop: self.action_manager.stop(action_name, *args) else: self.action_manager.do(action_name, *args) else: logger.warning(f'entity of type {type(self).__name__} has no action manager')
def load_motd(self): if not path.exists(self.server.name + ".motd"): logger.warning("No motd file for " + self.server.name) return "" motd_f = open(self.server.name + ".motd") motd = motd_f.read() motd_f.close() return motd
def setup_proxy_for_driver(driver: webdriver, test_url=None, times=0): if times > 9: logger.warning('setup_proxy_for_driver no available proxy') raise TooManyTimesException('setup_proxy_for_driver') try: try: # clean cookie and close session driver.delete_all_cookies() driver.quit() except: pass proxy_url = get_proxy().get('proxy') logger.info('proxy get {}'.format(proxy_url)) capabilities = get_capabilities(proxy_url) logger.info('start new session') driver.start_session(capabilities=capabilities) logger.info('start testing proxy') ok = test_proxy(driver, test_url, proxy_url) if not ok: logger.warning('proxy checking failed for {} times'.format(times + 1)) return setup_proxy_for_driver(driver, test_url, times=times + 1) logger.info('proxy works') return driver except SessionNotCreatedException: logger.error('Failed to start a new session') return setup_proxy_for_driver(connect_to_driver(), test_url, times=times + 1) except InvalidSessionIdException as e2: logger.error('Session id invalid {}'.format(e2)) return setup_proxy_for_driver(driver, test_url, times=times + 1) except WebDriverException as e3: logger.error('No active session with ID') return setup_proxy_for_driver(driver, test_url, times=times + 1) except NoProxyAvailableException: logger.error('No proxy') mongo._report_error({ 'error_source': 'proxy', 'message': 'no_proxy_available', 'url': None, 'payload': {} }) return setup_proxy_for_driver(driver, test_url, times=times + 1) except Exception as e: logger.error(f'setup_proxy_for_driver {e}') raise e
def get(self, request, id): try: snippet = Snippet.objects.get(id=id) except Snippet.DoesNotExist: logger.warning('snippet not exist|%d', id) return ObjectNotExistResponse() logger.debug('query snippet|%d', id) return SuccessResponse(snippet.to_dict())
def get_ip(): while True: try: raw_target_item = raw_target_queue.get() raw_target_domain_ip_map = {} raw_target_domain_list = [] raw_target_ip_list = [] # extract domains main_domain = raw_target_item['domain'] if main_domain.startswith('http://') or main_domain.startswith('https://'): if main_domain not in raw_target_domain_list: raw_target_domain_list.append(main_domain) else: main_domain = 'http://' + main_domain if main_domain not in raw_target_domain_list: raw_target_domain_list.append(main_domain) for sub,sub_ips in raw_target_item['subdomains'].items(): subdomain = 'http://'+sub if subdomain not in raw_target_domain_list: raw_target_domain_list.append(subdomain) raw_target_domain_ip_map[subdomain] = [] for ip in sub_ips: if ip not in raw_target_ip_list: raw_target_ip_list.append(ip) if ip not in raw_target_domain_ip_map[subdomain]: raw_target_domain_ip_map[subdomain].append(ip) # extract ips for domain in raw_target_domain_list: if domain not in raw_target_domain_ip_map: raw_target_domain_ip_map[domain] = [] paresd = parse.urlparse(domain) netloc = paresd[1] ip = socket.gethostbyname(netloc) if ip: if ip not in raw_target_ip_list: raw_target_ip_list.append(ip) raw_target_domain_ip_map[domain].append(ip) for ip in raw_target_ip_list: ips_result.put(ip) for domain in raw_target_domain_list: domains_result.put(domain) item = {} item['name'] = raw_target_item['name'] item['url'] = raw_target_item['url'] item['domain2ip'] = raw_target_domain_ip_map targets_result.put(item) except socket.gaierror: pass except Exception: logger.warning("Error when get ip:"+netloc) finally: pbar.update(1) raw_target_queue.task_done()