def fetch_price(self, soup): price = soup.select_one('span.item-price.bold').get_text(strip=True) pattern = '(\d)*(,)*(\d)*(,)*(\d)+' price = re.search(pattern, price).group() price = price.replace(',', '') self.item_info['販売価格'] = price logger.debug(f'販売価格:{price}円')
def main(): url = 'https://www.instagram.com/' driver = set_driver(isHeadless=False, isManager=True, isExtension=True, profile_path=CHROME_PROFILE_PATH) # Seleniumドライバ設定 if driver is None: # ドライバの設定が不正の場合はNoneが返ってくるので、システム終了 sys.exit() get_with_wait(driver, url, isWait=True) # 待機付きページ移動 soup = parse_html_selenium(driver) logger.debug(soup) # links = [] # link_nodes = soup.select('section.items-box a') # for node in link_nodes: # links.append(TOP_URL + node.attrs['href']) # items = [] # start = dt.now().strftime('%Y%m%d_%H%M%S') # for link in links: # logger.debug(f'No.{Item.count + 1}') # item = Item(link) # items.append(item) # item.fetch_info(driver) # logger.debug('') # break # end = dt.now().strftime('%Y%m%d_%H%M%S') # logger.debug(f'開始時間:{start}, 終了時間:{end}') # # ファイル名設定 # filename = dt.now().strftime('%Y%m%d_%H%M') + '_mercari_demo' + '.xlsx' # keys = items[0].item_info # 取得情報のキー取得 # # 各取得情報の空リスト作成 # values = [] # for i in range(len(keys)): # values.append([]) # item_dict = dict(zip(keys, values)) # # Itemの情報を辞書内のリストに追加 # for item in items: # for k, v in item.item_info.items(): # item_dict[k].append(v) # df = pd.DataFrame(item_dict) # ディクショナリをDataFrameに変換 # df.index += 1 # indexを1始まりに設定 # excel_save(df, filename) # Excelファイル保存 # set_font(filename) # フォントをメイリオに設定 # set_border(filename) # ボーダー追加 keep_open_driver(driver)
def fetch_info(self, driver): # 不動産ジャパンから必要情報を抽出 get_with_wait(driver, self.url, isWait=True) wait = WebDriverWait(driver, timeout=10) wait_selector = 'table[class="item-detail-table"] tr:nth-of-type(11)' try: wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, wait_selector))) except TimeoutException as err: logger.debug(err) soup = parse_html_selenium(driver) self.fetch_title(soup) self.fetch_price(soup) self.fetch_table(soup)
def fetch_time(self, time1, time2): created = self.item_info['出品時刻'] = time1.text updated = self.item_info['売却時刻'] = time2.text created_unix = dt.strptime(created, '%Y/%m/%d %H:%M:%S').timestamp() updated_unix = dt.strptime(updated, '%Y/%m/%d %H:%M:%S').timestamp() self.item_info['出品時刻(UNIX)'] = created_unix self.item_info['売却時刻(UNIX)'] = updated_unix delta = self.item_info['売却時刻-出品時刻(hours)'] = round((updated_unix - created_unix) / 3600, 2) logger.debug(f'出品時刻:{created}, UNIX:{created_unix}') logger.debug(f'売却時刻:{updated}, UNIX:{updated_unix}') logger.debug(f'売却時刻-出品時刻(hours):{delta}')
def fetch_seller(self, seller): seller_name = seller.select_one('a').get_text(strip=True) self.item_info['出品者名'] = seller_name ratings = seller.select('div.item-user-ratings span') like = int(ratings[0].get_text(strip=True)) bad = int(ratings[1].get_text(strip=True)) self.item_info['出品者評価数(like)'] = like self.item_info['出品者評価数(bad)'] = bad logger.debug(f'出品者名:{seller_name}') logger.debug(f'Like:{like}') logger.debug(f'Bad:{bad}')
def search(): front_logger('検索中・・・') url = 'https://www.mercari.com/jp/search/?sort_order=&keyword=%E3%83%8A%E3%82%A4%E3%82%AD&category_root=2&category_child=&brand_name=&brand_id=&size_group=&price_min=3000&price_max=5000&item_condition_id%5B1%5D=1&status_trading_sold_out=1' driver = set_driver(isHeadless=False, isManager=False, isExtension=True, profile_path=CHROME_PROFILE_PATH) # Seleniumドライバ設定 if driver is None: # ドライバの設定が不正の場合はNoneが返ってくるので、システム終了 sys.exit() get_with_wait(driver, url, isWait=True) # 待機付きページ移動 soup = parse_html_selenium(driver) links = [] link_nodes = soup.select('section.items-box a') for node in link_nodes: links.append(TOP_URL + node.attrs['href']) items = [] start = dt.now().strftime('%Y%m%d_%H%M%S') for link in links: logger.debug(f'No.{Item.count + 1}') item = Item(link) items.append(item) item.fetch_info(driver) logger.debug('') break end = dt.now().strftime('%Y%m%d_%H%M%S') logger.debug(f'開始時間:{start}, 終了時間:{end}') # ファイル名設定 filename = dt.now().strftime('%Y%m%d_%H%M') + '_mercari_demo' + '.xlsx' if getattr(sys, 'frozen', False): directory_path = os.path.dirname(sys.executable) if '.app' in directory_path: idx = directory_path.find('.app') directory_path = directory_path[:idx] idx = directory_path.rfind('/') directory_path = directory_path[:idx] else: directory_path = os.getcwd() file_path = join(directory_path, filename) keys = items[0].item_info # 取得情報のキー取得 # 各取得情報の空リスト作成 values = [] for i in range(len(keys)): values.append([]) item_dict = dict(zip(keys, values)) # Itemの情報を辞書内のリストに追加 for item in items: for k, v in item.item_info.items(): item_dict[k].append(v) df = pd.DataFrame(item_dict) # ディクショナリをDataFrameに変換 df.index += 1 # indexを1始まりに設定 excel_save(df, file_path) # Excelファイル保存 set_font(file_path) # フォントをメイリオに設定 set_border(file_path) # ボーダー追加 return 'Success'
def front_logger(message): now = dt.now().strftime('%m/%d %H:%M:%S: ') logger.debug(message) eel.logger(now + message)
def fetch_title(self, soup): title = soup.select_one('h1.item-name').get_text(strip=True) self.item_info['商品タイトル'] = title logger.debug(f'商品タイトル:{title}')