import os import csv import time import pandas as pd from pandas import DataFrame from bs4 import BeautifulSoup, NavigableString from libs.logging_process import Logging_process from libs.selenium_process import Selenium_process from selenium import webdriver from selenium.webdriver.common.keys import Keys BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") logger = Logging_process("crawler_selenium") class Crawler_BS4(object): """crawler 부분""" def __init__(self, driver): """driver를 받아 BeautifulSoup 객체를 생성한다.""" self.html = driver.page_source self.soup = BeautifulSoup(self.html, "html.parser") self.html_save(self.soup) def find_index(self): all_index = self.soup.find('div', {'class': 'info'})
import os import time import pandas as pd from pandas import DataFrame from urllib.request import urlopen from bs4 import BeautifulSoup, NavigableString from libs.logging_process import Logging_process from libs.selenium_process import Selenium_process logger = Logging_process('scrap_macro_economics_test') class Naver_finance_crawler(object): """naver finance 에 있는 항목들을 크롤링하는데 사용함, Selenium 사용하지 않고 크롤링 함 """ def __init__(self, df, url): self.df = df self.url = url def get_page_html(self, page_num): url = self.url + "&page={}".format(page_num) logger.info(url) try: html = urlopen(url) except Exception as error:
import os import time import datetime import pandas as pd from pandas import DataFrame from urllib.request import urlopen from bs4 import BeautifulSoup, NavigableString from libs.logging_process import Logging_process logger = Logging_process('doller') class Naver_finance_crawler(object): def __init__(self, url, df): self.url = url self.df = df def get_page_html(self, url, n): url = url + "&page={}".format(n) logger.info(url) try: html = urlopen(url) except Exception as error: logger.info(error) return None, None soup = BeautifulSoup(html.read(), 'html.parser')
import json from libs.logging_process import Logging_process from naver_finance_ver2 import scrap_macro_economics_test as sme logger = Logging_process('scrap_all') class scrap_all(object): def __init__(self): self.sme = sme.scrap_macro_economics() def read_config_json(self): "(name, url, page) 가 저장된 json 파일을 읽어들임" config_json_path = './setting_files/macro_economics_config.json' try: config_f = open(config_json_path, encoding="utf-8") logger.info("macro_economics_config.json 읽어들임") except Exception as error: logger.info(error) return None config_f_data = json.load(config_f) config_f.close() return config_f_data
import naver_finance_ver2.scrap_all from libs.logging_process import Logging_process logger = Logging_process('qubot_test') logger.info("qubot_test start") scrapper = naver_finance_ver2.scrap_all.scrap_all() try: scrapper.scrap_check() logger.info("qubot_test end") except Exception as error: logger.info("qubot_test error !! : {}".format(error))
import os import time import pandas as pd from pandas import DataFrame from bs4 import BeautifulSoup, NavigableString from libs.logging_process import Logging_process from libs.selenium_process import Selenium_process logger = Logging_process('nasdaq') class Crawler_BS4(object): """Selenium driver를 이용한 stock index crawler 부분""" def __init__(self, driver): """driver를 받아 BeautifulSoup 객체를 생성한다.""" self.html = driver.page_source self.soup = BeautifulSoup(self.html, "html.parser") logger.info("soup 생성") def get_page_html(self): try: soup = self.soup thead = soup.find('thead') tbody = soup.findAll('tbody')[1] except Exception as error: logger.info(error) return None, None
# import logging import sys, os import pandas as pd from pandas import DataFrame from bs4 import BeautifulSoup, NavigableString from libs.logging_process import Logging_process logger = Logging_process("InvstgCorp_test") class InvstgCorp_Test(object): def __init__(self): self.html = self.load_html() self.soup = BeautifulSoup(self.html, "html.parser") def load_html(self): load_html_txt = "" load_path = "data\\html_file\\html_file.txt" with open(load_path, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: load_html_txt += line load_html_txt += '\n'
import os from selenium import webdriver from selenium.webdriver.common.keys import Keys from libs.logging_process import Logging_process logger = Logging_process("selenium_process") class Selenium_process(object): def __init__(self, url): self.url = url self.chromedriver_path = os.path.join("libs", "chromedriver_win32", "chromedriver.exe") logger.info(self.chromedriver_path) def run_chromedriver(self): try: driver = webdriver.Chrome(self.chromedriver_path) driver.get(self.url) logger.info("Selenium driver 생성") except Exception as error: logger.info(error) return driver def down_chromedriver(self, driver): driver.quit()