def remove_proxy_server_ip_list(self): if self.proxy_list_from_file: del self.proxy_list_from_file[0] if len(self.proxy_list_from_file) == 0: filewriter.save_log_file('proxy', self.proxy_list_from_file) else: filewriter.save_log_file('proxy', self.proxy_list_from_file)
def save(self, id, text): if text and id: self.count = self.count + 1 telegrambot.send_message(text) if id not in self.log: self.log.append(id) filewriter.save_log_file(self.name, self.log)
def send_messge_and_save(self, id=None, text=None, bot_name=None): if text and id and bot_name: if id not in self.log: self.log.append(id) filewriter.save_log_file(self.name, self.log) log.logger.info('New hotdeal has just been registerd. (%s)' % (id)) telegrambot.send_message(text, bot_name)
def record_success_log(self): try: if self.is_record_call is True: return False except: self.is_record_call = True # print('called') log_success = filewriter.get_log_file('success', is_json=True) date = datetime.now().strftime('%Y_%m_%d') try: if not log_success or not log_success[date]: # print('new') log_success[date] = {}; except: log_success[date] = {}; try: if log_success[date][self.name]: return False except: log_success[date][self.name] = 1 log_success[date][self.name] = 1 # print('recorded') filewriter.save_log_file('success', log_success)
def end_collect_keyword(self): # for keyword in self.keywords: # print(keyword) print(len(self.keywords)) filewriter.save_log_file(self.name, self.keywords) log.logger.info('(%d) related keywords has just updated.' % (len(self.keywords))) self.destroy() exit()
def remove_proxy_server_ip_list(self): proxy_list_from_file = filewriter.get_log_file('proxy') if proxy_list_from_file: del proxy_list_from_file[0] if len(proxy_list_from_file) == 0: self.request_proxy_server_ip_list() else: filewriter.save_log_file('proxy', proxy_list_from_file)
def start(self): try: # if self.connect(site_url=self.UNFOLLOW_URL, is_proxy=False, default_driver='selenium', is_chrome=True) is False: # raise Exception('site connect fail') # # self.following() # exit() # 보안 블럭 관련 데이터 가져오기 self.security_code = filewriter.get_log_file('instagram_security_code') # 블럭 상태라면 security_code가 입력될 때 까지는 종료 if self.security_code[0] == 'blocked': log.logger.info('Instagram is blocked.') self.destroy() exit() # 복사된 태그 가져오기 self.tag = filewriter.get_log_file('instagramcollecttag_copied') # 파일이 없다면 태그파일 복사본을 생성 if self.tag is None or len(self.tag) == 0: self.tag = filewriter.get_log_file('instagramcollecttag') filewriter.save_log_file('instagramcollecttag_copied', self.tag) # 태그를 생성할 수 없다면 종료 if self.tag is None: self.destroy() exit() # 태그 랜덤으로 섞기 random.shuffle(self.tag) self.login() # self.driver.save_screenshot('instagram_screenshot.png') # self.destroy() # exit() # 작업 시작 # 팔로워 정리 if self.follower() is True: # 팔로윙 정리 self.following() # 작업 시작 self.scan_page() self.end_report() except Exception as e: log.logger.error(e, exc_info=True) self.end_report()
def start(self): try: self.log = filewriter.get_log_file(self.name) log.logger.info('before tags (%d)' % (len(self.log))) if self.connect(site_url=self.DETAIL_URL + self.KEYWORD, is_proxy=False, default_driver='selenium', is_chrome=True) is False: raise Exception('site connect fail') soup = BeautifulSoup(self.driver.page_source, 'html.parser') element = soup.find('table', id="table_result") if self.selenium_extract_by_xpath( xpath='//*[@id="table_result"]') is False: raise Exception('selenium_extract_by_xpath fail.') if element: for list in element.find_all('tr'): try: tag = list.find( 'td', class_='result_td01').getText().strip() tag = tag.replace("#", "") if tag: if tag not in self.log: log.logger.info(tag) self.log.append(tag) self.ADD_COUNT = self.ADD_COUNT + 1 except Exception: continue # 제외 단어 정리 for tag in self.log: if any( word in tag for word in ['화장품', '남자', '남성', '피부', '운동화', '태그', '가방', '치마', '스커트']): self.log.remove(tag) self.DEL_COUNT = self.DEL_COUNT + 1 filewriter.save_log_file(self.name, self.log) log.logger.info( 'add(%d), remove(%d), total(%d) tags has just updated.' % (self.ADD_COUNT, self.DEL_COUNT, len(self.log))) self.destroy() except Exception as e: self.destroy() log.logger.error(e, exc_info=True)
def scan_page(self, url): try: if self.selenium_extract_by_xpath(tag={ 'tag': 'table', 'attr': 'class', 'name': 'tbl_lst' }) is False: raise Exception('selenium_extract_by_xpath fail.') soup = BeautifulSoup(self.driver.page_source, 'html.parser') element = soup.find('table', class_="tbl_lst").find( 'tr', class_="_itemSection").find_all('td', recursive=False) if element: price_str = element[1].find('a').getText().strip() price = re.sub("\D", "", price_str) # 수집 성공로그 self.record_success_log() try: if self.log[url] > price: title = soup.find( 'div', class_="h_area").find('h2').getText().strip() service = element[0].find('img')['alt'] price_before = format(self.log[url], ',') price_new = format(price, ',') message = '[네이버쇼핑] 최저가가 갱신되었습니다.\n[%s]\n%s\n이전 가격: %s원\n최저 가격: %s원\n%s' % ( service, title, price_before, price_new, url) telegrambot.send_message(message, 'lowdeal') self.log[url] = price except Exception as e: self.log[url] = price #print(self.log) filewriter.save_log_file(self.name, self.log) except Exception as e: log.logger.error(e, exc_info=True)
def save_file(self, log): filewriter.save_log_file(self.name, log)
def scan_page(self): try: if self.FAIL_CNT > 3: raise Exception('Block error') if self.connect(site_url=self.TAG_URL + self.tag[0] + '/', is_proxy=False, default_driver='selenium', is_chrome=True) is False: raise Exception('site connect fail') if self.selenium_extract_by_xpath(tag={'tag': 'div', 'attr': 'class', 'name': 'EZdmt'}) is False: raise Exception('selenium_extract_by_xpath fail.') # 상단의 인기게시글 (최대 9개) list = self.driver.find_element_by_xpath("//div[@class='EZdmt']").find_elements_by_xpath('.//div[contains(@class,"v1Nh3")]/a') for li in list: try: self.is_need_sleep = False # 레이어 열기 li.click() # 레이어 기다림 if self.selenium_extract_by_xpath(xpath='//article[contains(@class,"M9sTE")]') is False: raise Exception('selenium_extract_by_xpath fail.') # 채널명 target_name = self.driver.find_element_by_xpath('//article[contains(@class,"M9sTE")]/header/div[2]/div[1]/div[1]/h2/a') if target_name: self.TARGET_NAME = target_name.text # 사용할 댓글이 없다면 수집만 먼저 if len(self.REPLY) == 0: self.reply_collect() self.selenium_click_by_xpath(xpath='//button[contains(@class,"ckWGn")]') continue if self.like() is True: self.follow() self.reply_collect() self.reply_send() # 작업이 있었다면 block을 피하기 위해 sleep if self.is_need_sleep is True: #sleep_second = random.randint(180, 200) sleep_second = random.randint(50, 60) #log.logger.info('sleeping.. %d' % (sleep_second)) sleep(sleep_second) self.is_need_sleep = True # 레이어 닫기 self.selenium_click_by_xpath(xpath='//button[contains(@class,"ckWGn")]') except Exception as e: log.logger.error(e, exc_info=True) self.FAIL_CNT = self.FAIL_CNT + 1 break self.tag.pop(0) filewriter.save_log_file('instagramcollecttag_copied', self.tag) self.CRITICAL_CNT = 0 # 팔로우 100개 마다 브라우저 리셋 duration = int((datetime.now() - self.starttime).total_seconds() / 60) # print(duration) # 10분 동안 작업 했다면 종료 if duration > 10: # if (self.FOLLOW_CNT > 5): return True if len(self.tag) > 0: self.scan_page() except Exception as e: self.CRITICAL_CNT = self.CRITICAL_CNT + 1 log.logger.error(e, exc_info=True) # 태그 삭제 tag_copy = filewriter.get_log_file('instagramcollecttag_copied') if tag_copy: tag_copy.remove(self.tag[0]) filewriter.save_log_file('instagramcollecttag_copied', tag_copy) tag = filewriter.get_log_file('instagramcollecttag') if tag: tag.remove(self.tag[0]) filewriter.save_log_file('instagramcollecttag', tag) self.end_report()
def login(self): try: # 로그인 여부 체크 if self.connect(site_url=self.UNFOLLOW_URL, is_proxy=False, default_driver='selenium', is_chrome=True) is False: raise Exception('site connect fail') self.get_cookie() if self.connect(site_url=self.UNFOLLOW_URL, is_proxy=False, default_driver='selenium', is_chrome=True) is False: raise Exception('site connect fail') try: if self.selenium_exist_by_xpath(xpath='//*[@id="react-root"]/section/nav/div[2]/div/div/div[3]/div/span/a[1]/button') is False: log.logger.info('Already loggined.') return True except: pass # 로그인 되어있지 않다면 로그인 페이지로 이동 if self.connect(site_url=self.LOGIN_URL, is_proxy=False, default_driver='selenium', is_chrome=True) is False: raise Exception('site connect fail') # 계정정보 가져오기 account_data = filewriter.get_log_file(self.name + '_account') if account_data: if self.selenium_extract_by_xpath(tag={'tag': 'input', 'attr': 'name', 'name': 'username'}) is False: raise Exception('selenium_extract_by_xpath fail.') # 아이디 입력 if self.selenium_input_text_by_xpath(text=account_data[0], tag={'tag': 'input', 'attr': 'name', 'name': 'username'}) is False: raise Exception('selenium_input_text_by_xpath fail. username') # 비번 입력 if self.selenium_input_text_by_xpath(text=account_data[1], tag={'tag': 'input', 'attr': 'name', 'name': 'password'}) is False: raise Exception('selenium_input_text_by_xpath fail. password') # 아이디 입력 if self.selenium_input_text_by_xpath(text=account_data[0], tag={'tag': 'input', 'attr': 'name', 'name': 'username'}) is False: raise Exception('selenium_input_text_by_xpath fail. username') # 비번 입력 if self.selenium_input_text_by_xpath(text=account_data[1], tag={'tag': 'input', 'attr': 'name', 'name': 'password'}) is False: raise Exception('selenium_input_text_by_xpath fail. password') # 로그인하기 선택 if self.selenium_click_by_xpath(tag={'tag': 'button', 'attr': 'type', 'name': 'submit'}) is False: raise Exception('selenium_click_by_xpath fail. submit') sleep(3) # 비정상적인 로그인 시도 처리 (내가 맞습니다) try: if self.selenium_exist_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[3]/form/div[2]/span/button') is True: self.selenium_click_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[3]/form/div[2]/span/button') except: pass # 보안코드 try: if self.selenium_exist_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[1]/div/p') is True: if self.selenium_exist_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[3]/form/span/button') is True: # 보안코드가 없다면 block 처리해서 인스타그램 프로세스를 중지 if self.security_code[0] == '': self.security_code[0] = 'blocked' filewriter.save_log_file('instagram_security_code', self.security_code) log.logger.info('Instagram has just blocked.') telegrambot.send_message('Instagram has just blocked.', 'instagram') self.destroy() exit() # 발송하기 self.selenium_click_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[3]/form/span/button') # 텔레그램 알림 telegrambot.send_message('Please check instagram security code from your email in 1 minutes.', 'instagram') log.logger.info('Please check instagram security code from your email in 1 minutes.') # 수정될 때 까지 50초 대기 sleep(50) # 새롭게 입력된 데이터를 가져옵니다. self.security_code = filewriter.get_log_file('instagram_security_code') # 보안코드 입력 if self.selenium_input_text_by_xpath(text=self.security_code[0], xpath='//*[@id="security_code"]') is False: raise Exception('selenium_input_text_by_xpath fail. security_code') # 제출 self.selenium_click_by_xpath(xpath='//*[@id="react-root"]/section/div/div/div[2]/form/span/button') log.logger.info('security_code. (%s)' % (self.security_code[0])) # 사용한 코드는 제거 self.security_code[0] = '' filewriter.save_log_file('instagram_security_code', self.security_code) sleep(5) except Exception as e: log.logger.error(e, exc_info=True) pass try: if self.selenium_exist_by_xpath(xpath='//*[@id="react-root"]/section/nav/div[2]/div/div/div[3]/div/div/div/div/div[3]/div[1]/a') is True: log.logger.info('login fail.') raise Exception('login fail error') except: pass log.logger.info('login success!') self.set_cookie() sleep(2) return True except Exception as e: log.logger.error(e, exc_info=True) self.end_report() return False
def start(self): try: # 프록시 count = 0 if self.TEST == False: proxy = Proxy() self.ips = proxy.get() self.ips_index = 0 if self.ips == False: log.logger.info('proxy ip empty') exit() log.logger.info(', '.join(self.ips)) # print(self.ips) # exit() self.log = filewriter.get_log_file(self.name) date_now = datetime.now(timezone('Asia/Seoul')) self.today = date_now.strftime('%Y-%m-%d') self.yesterday = (date_now - timedelta(days=1)).strftime('%Y-%m-%d') self.data = filewriter.get_log_file('yoonaazzi_data', is_json=True) for apt, id in self.DETAIL_URL.items(): count = count + 1 # 아파트별로 페이지 초기화 self.page = 1 exists = False # 첫아파트라면 초기화 for apted in self.data.keys(): if apted == apt: apt = apted exists = True if exists == False: print('없음') self.data[apt] = {} # 오늘 데이터가 있다면 continue try: if self.today in self.data[apt].keys(): log.logger.info('%s today exists.' % (apt)) continue except: pass log.logger.info('%s collecting start...(%d/%d)' % (apt, count, len(self.DETAIL_URL))) self.total_prices_complete = 0 self.total_jeonses_complete = 0 self.total_prices = 0 self.total_jeonses = 0 self.prices = {} self.prices_filter = {} self.jeonses = {} self.jeonses_filter = {} # 어제 데이터가 있다면 어제 데이터로 초기세팅 (0값을 없애기 위해) if self.yesterday in self.data[apt]: if 'prices' in self.data[apt][self.yesterday]: # 데이터만 복사 (참조하지 않도록) self.prices_filter = self.data[apt][ self.yesterday]['prices'].copy() try: # 가격 수집 while 1: if self.collect_price(apt=apt, id=id) == False: break if self.total_prices == 0 and self.total_jeonses == 0: continue # 가격 필터 self.filter_price(apt=apt, id=id) # 로그 저장 self.set_log(apt=apt) sleep(round(uniform(1.0, 3.0), 1)) except: pass # ** 모든 아파트 수집 완료 ** # 오늘의 데이터 저장 filewriter.save_log_file('yoonaazzi_data', self.data) log.logger.info('yoona_azzi complete.') exit() except Exception as e: log.logger.error(e, exc_info=True) exit()
def scan_page(self): try: sleep(5) self.remove_layer() # -- 문의체크 -- try: ask = self.driver.find_elements_by_xpath( '//*[@name="inquery"]/div/div[2]/ul/li') if ask: is_ask = False for ask_li in ask: ask_number = ask_li.find_element_by_xpath( './/p[@class="text-number"]').text ask_number = int(ask_number) if ask_number > 0: is_ask = True if is_ask == True: telegrambot.send_message('정성한끼 고객이 상담을 기다리고 있습니다.', 'jshk') except: pass # -- 신규주문 페이지로 이동 -- if self.selenium_click_by_xpath(tag={ 'tag': 'a', 'attr': 'data-nclicks-code', 'name': 'orddel.new' }) is False: raise Exception('selenium_click_by_xpath fail. orddel.new') sleep(10) self.remove_layer() # 주문 데이터 가져오기 iframe으로 변경 self.driver.switch_to.frame( frame_reference=self.driver.find_element_by_xpath( '//iframe[@id="__naverpay"]')) list = self.driver.find_element_by_xpath( '//*[@id="__app_root__"]/div/div[2]/div[3]/div[4]/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/table' ).find_elements_by_xpath('.//tbody/tr') order_list = {} for i, li in enumerate(list): try: if li: soup_order_info = BeautifulSoup( li.get_attribute('innerHTML'), 'html.parser') tds = soup_order_info.find_all('td') if tds: item_option = tds[16].getText().strip() item_amount = tds[18].getText().strip() item_amount = int(item_amount) destination = tds[40].getText() if item_option not in order_list: order_list[item_option] = item_amount else: order_list[item_option] = order_list[ item_option] + item_amount # 제주도인 경우 알림 if destination in '제주특별자치도': telegrambot.send_message( '제주도 주문건을 확인해주세요.', 'jshk') except Exception as e: log.logger.error(e, exc_info=True) self.destroy() exit() # -- 발송대기 주문 페이지로 이동 -- if self.selenium_click_by_xpath( xpath= '//*[@id="__app_root__"]/div/div[2]/div[1]/div/div[2]/ul/li[4]/div/a[1]' ) is False: raise Exception('selenium_click_by_xpath fail. orddel.wait') sleep(5) # 주문 데이터 가져오기 iframe으로 변경 list = self.driver.find_element_by_xpath( '//*[@id="__app_root__"]/div/div[2]/div[3]/div[4]/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/table' ).find_elements_by_xpath('.//tbody/tr') for i, li in enumerate(list): try: if li: soup_order_info = BeautifulSoup( li.get_attribute('innerHTML'), 'html.parser') tds = soup_order_info.find_all('td') if tds: item_option = tds[16].getText().strip() item_amount = tds[18].getText().strip() item_amount = int(item_amount) if item_option not in order_list: order_list[item_option] = item_amount else: order_list[item_option] = order_list[ item_option] + item_amount except Exception as e: log.logger.error(e, exc_info=True) self.destroy() exit() # -- 데이터 저장 -- if dict: filewriter.save_log_file('jshk_order_data', order_list) print(order_list) return True except Exception as e: self.driver.save_screenshot('smartstore_screenshot.png') log.logger.error(e, exc_info=True) self.destroy() exit() return False
def start(self): try: log.logger.info('[%s] collection start.' % self.name) self.count_collect_fail = 0 if self.connect( site_url='http://www.jejuair.net/jejuair/kr/main.do', is_proxy=False, default_driver='selenium', is_chrome=False) is False: raise Exception('site connect fail') # 로그인 jejuair.login() # 출발지 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'div', 'attr': 'id', 'name': 'divDepStn1' }) is False: raise Exception('selenium_click_by_xpath fail.') # 인천 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'button', 'attr': 'aircode', 'name': 'ICN' }) is False: raise Exception('selenium_click_by_xpath fail.') # 도착지 리스트 획득 if len(self.result_list) == 0: if self.get_return_country_list() is False: raise Exception('get_return_country_list fail.') # 이번에 검색할 도착국가 return_country = self.result_list[self.process_count] log.logger.info( 'searching start %s %s' % (return_country['title'], return_country['airport'])) # 검색해야 할 도착지 리스트 선택 if self.selenium_click_by_xpath( tag={ 'tag': 'button', 'attr': 'aircode', 'name': self.result_list[self.process_count]['airport'] }) is False: raise Exception('selenium_click_by_xpath fail.') # 날짜 선택완료 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'button', 'attr': 'id', 'name': 'btnDoubleOk' }) is False: raise Exception('selenium_click_by_xpath fail. btnDoubleOk') # 성인 인원 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'div', 'attr': 'id', 'name': 'divSelADT' }) is False: raise Exception('selenium_click_by_xpath fail. btnDoubleOk') # 2명 if self.selenium_click_by_xpath(tag={ 'tag': 'ul', 'attr': 'id', 'name': 'ulADT' }, etc='/li[2]') is False: raise Exception('selenium_click_by_xpath fail. btnDoubleOk') # 예매하기 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'button', 'attr': 'id', 'name': 'btnReservation' }) is False: raise Exception('selenium_click_by_xpath fail. btnReservation') # 항공권 검색 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'div', 'attr': 'id', 'name': 'btnSearch' }) is False: # 레이어 확인 if self.selenium_click_by_xpath(tag={ 'tag': 'label', 'attr': 'for', 'name': 'svch3' }) is False: raise Exception('selenium_click_by_xpath fail. svch3') else: # 레이어 확인버튼 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'div', 'attr': 'id', 'name': 'divAgreeConfirm' }) is False: raise Exception( 'selenium_click_by_xpath fail. divAgreeConfirm') else: # 항공권 검색 선택 if self.selenium_click_by_xpath(tag={ 'tag': 'div', 'attr': 'id', 'name': 'btnSearch' }) is False: raise Exception( 'selenium_click_by_xpath fail. btnSearch') # 가격 추출 시작 for country in return_country: if self.collect_price() is False: raise Exception('collect_price fail.') else: self.process_count = self.process_count + 1 log.logger.info( 'searching complete %s %s' % (return_country['title'], return_country['airport'])) log.logger.info(return_country) # 국가별 수집이 성공했을 시 현재 진행 상황을 파일에 기록 filewriter.save_log_file( self.name, [self.process_count, self.result_list]) # 다음 국가 수집 시작 self.start() except IndexError: # 종료 log.logger.info('[%s] collection complete.' % self.name) log.logger.info(self.result_list) self.send_message() self.driver.quit() exit() except Exception as e: log.logger.error(e, exc_info=True) # 국가 수집 다시 시작 self.start()