Ejemplo n.º 1
0
    def fetch_price(self, soup):

        price = soup.select_one('span.item-price.bold').get_text(strip=True)
        pattern = '(\d)*(,)*(\d)*(,)*(\d)+'
        price = re.search(pattern, price).group()
        price = price.replace(',', '')
        self.item_info['販売価格'] = price
        logger.debug(f'販売価格:{price}円')
Ejemplo n.º 2
0
def main():

    url = 'https://www.instagram.com/'

    driver = set_driver(isHeadless=False, isManager=True, isExtension=True, profile_path=CHROME_PROFILE_PATH)  # Seleniumドライバ設定

    if driver is None:  # ドライバの設定が不正の場合はNoneが返ってくるので、システム終了
        sys.exit()

    get_with_wait(driver, url, isWait=True)  # 待機付きページ移動
    soup = parse_html_selenium(driver)
    logger.debug(soup)

    # links = []
    # link_nodes = soup.select('section.items-box a')

    # for node in link_nodes:
    #     links.append(TOP_URL + node.attrs['href'])

    # items = []

    # start = dt.now().strftime('%Y%m%d_%H%M%S')

    # for link in links:
    #     logger.debug(f'No.{Item.count + 1}')
    #     item = Item(link)
    #     items.append(item)
    #     item.fetch_info(driver)
    #     logger.debug('')

    #     break

    # end = dt.now().strftime('%Y%m%d_%H%M%S')
    # logger.debug(f'開始時間:{start}, 終了時間:{end}')

    # # ファイル名設定
    # filename = dt.now().strftime('%Y%m%d_%H%M') + '_mercari_demo' + '.xlsx'

    # keys = items[0].item_info  # 取得情報のキー取得

    # # 各取得情報の空リスト作成
    # values = []
    # for i in range(len(keys)):
    #     values.append([])
    # item_dict = dict(zip(keys, values))

    # # Itemの情報を辞書内のリストに追加
    # for item in items:
    #     for k, v in item.item_info.items():
    #             item_dict[k].append(v)

    # df = pd.DataFrame(item_dict)  # ディクショナリをDataFrameに変換
    # df.index += 1  # indexを1始まりに設定
    # excel_save(df, filename)  # Excelファイル保存
    # set_font(filename)  # フォントをメイリオに設定
    # set_border(filename)  # ボーダー追加

    keep_open_driver(driver)
Ejemplo n.º 3
0
    def fetch_info(self, driver):

        # 不動産ジャパンから必要情報を抽出
        get_with_wait(driver, self.url, isWait=True)
        wait = WebDriverWait(driver, timeout=10)
        wait_selector = 'table[class="item-detail-table"] tr:nth-of-type(11)'
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, wait_selector)))
        except TimeoutException as err:
            logger.debug(err)
        soup = parse_html_selenium(driver)

        self.fetch_title(soup)
        self.fetch_price(soup)
        self.fetch_table(soup)
Ejemplo n.º 4
0
    def fetch_time(self, time1, time2):

        created = self.item_info['出品時刻'] = time1.text
        updated = self.item_info['売却時刻'] = time2.text
        created_unix = dt.strptime(created, '%Y/%m/%d %H:%M:%S').timestamp()
        updated_unix = dt.strptime(updated, '%Y/%m/%d %H:%M:%S').timestamp()
        self.item_info['出品時刻(UNIX)'] = created_unix
        self.item_info['売却時刻(UNIX)'] = updated_unix
        delta = self.item_info['売却時刻-出品時刻(hours)'] = round((updated_unix - created_unix) / 3600, 2)
        logger.debug(f'出品時刻:{created}, UNIX:{created_unix}')
        logger.debug(f'売却時刻:{updated}, UNIX:{updated_unix}')
        logger.debug(f'売却時刻-出品時刻(hours):{delta}')
Ejemplo n.º 5
0
    def fetch_seller(self, seller):

        seller_name = seller.select_one('a').get_text(strip=True)
        self.item_info['出品者名'] = seller_name
        ratings = seller.select('div.item-user-ratings span')
        like = int(ratings[0].get_text(strip=True))
        bad = int(ratings[1].get_text(strip=True))
        self.item_info['出品者評価数(like)'] = like
        self.item_info['出品者評価数(bad)'] = bad

        logger.debug(f'出品者名:{seller_name}')
        logger.debug(f'Like:{like}')
        logger.debug(f'Bad:{bad}')
Ejemplo n.º 6
0
def search():

    front_logger('検索中・・・')

    url = 'https://www.mercari.com/jp/search/?sort_order=&keyword=%E3%83%8A%E3%82%A4%E3%82%AD&category_root=2&category_child=&brand_name=&brand_id=&size_group=&price_min=3000&price_max=5000&item_condition_id%5B1%5D=1&status_trading_sold_out=1'

    driver = set_driver(isHeadless=False,
                        isManager=False,
                        isExtension=True,
                        profile_path=CHROME_PROFILE_PATH)  # Seleniumドライバ設定

    if driver is None:  # ドライバの設定が不正の場合はNoneが返ってくるので、システム終了
        sys.exit()

    get_with_wait(driver, url, isWait=True)  # 待機付きページ移動
    soup = parse_html_selenium(driver)

    links = []
    link_nodes = soup.select('section.items-box a')

    for node in link_nodes:
        links.append(TOP_URL + node.attrs['href'])

    items = []

    start = dt.now().strftime('%Y%m%d_%H%M%S')

    for link in links:
        logger.debug(f'No.{Item.count + 1}')
        item = Item(link)
        items.append(item)
        item.fetch_info(driver)
        logger.debug('')

        break

    end = dt.now().strftime('%Y%m%d_%H%M%S')
    logger.debug(f'開始時間:{start}, 終了時間:{end}')

    # ファイル名設定
    filename = dt.now().strftime('%Y%m%d_%H%M') + '_mercari_demo' + '.xlsx'
    if getattr(sys, 'frozen', False):
        directory_path = os.path.dirname(sys.executable)
        if '.app' in directory_path:
            idx = directory_path.find('.app')
            directory_path = directory_path[:idx]
            idx = directory_path.rfind('/')
            directory_path = directory_path[:idx]
    else:
        directory_path = os.getcwd()
    file_path = join(directory_path, filename)

    keys = items[0].item_info  # 取得情報のキー取得

    # 各取得情報の空リスト作成
    values = []
    for i in range(len(keys)):
        values.append([])
    item_dict = dict(zip(keys, values))

    # Itemの情報を辞書内のリストに追加
    for item in items:
        for k, v in item.item_info.items():
            item_dict[k].append(v)

    df = pd.DataFrame(item_dict)  # ディクショナリをDataFrameに変換
    df.index += 1  # indexを1始まりに設定
    excel_save(df, file_path)  # Excelファイル保存
    set_font(file_path)  # フォントをメイリオに設定
    set_border(file_path)  # ボーダー追加

    return 'Success'
Ejemplo n.º 7
0
def front_logger(message):

    now = dt.now().strftime('%m/%d %H:%M:%S: ')
    logger.debug(message)
    eel.logger(now + message)
Ejemplo n.º 8
0
    def fetch_title(self, soup):

        title = soup.select_one('h1.item-name').get_text(strip=True)
        self.item_info['商品タイトル'] = title
        logger.debug(f'商品タイトル:{title}')