import os
import csv
import time
import pandas as pd
from pandas import DataFrame
from bs4 import BeautifulSoup, NavigableString
from libs.logging_process import Logging_process
from libs.selenium_process import Selenium_process

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")

logger = Logging_process("crawler_selenium")


class Crawler_BS4(object):
    """crawler 부분"""
    def __init__(self, driver):
        """driver를 받아 BeautifulSoup 객체를 생성한다."""

        self.html = driver.page_source
        self.soup = BeautifulSoup(self.html, "html.parser")

        self.html_save(self.soup)

    def find_index(self):

        all_index = self.soup.find('div', {'class': 'info'})
import os
import time
import pandas as pd
from pandas import DataFrame

from urllib.request import urlopen
from bs4 import BeautifulSoup, NavigableString

from libs.logging_process import Logging_process
from libs.selenium_process import Selenium_process

logger = Logging_process('scrap_macro_economics_test')


class Naver_finance_crawler(object):
    """naver finance 에 있는 항목들을 크롤링하는데 사용함,
    Selenium 사용하지 않고 크롤링 함 """
    def __init__(self, df, url):

        self.df = df
        self.url = url

    def get_page_html(self, page_num):

        url = self.url + "&page={}".format(page_num)
        logger.info(url)

        try:
            html = urlopen(url)

        except Exception as error:
Exemple #3
0
import os
import time
import datetime
import pandas as pd
from pandas import DataFrame

from urllib.request import urlopen
from bs4 import BeautifulSoup, NavigableString

from libs.logging_process import Logging_process

logger = Logging_process('doller')


class Naver_finance_crawler(object):
    def __init__(self, url, df):

        self.url = url
        self.df = df

    def get_page_html(self, url, n):

        url = url + "&page={}".format(n)
        logger.info(url)
        try:
            html = urlopen(url)
        except Exception as error:
            logger.info(error)
            return None, None

        soup = BeautifulSoup(html.read(), 'html.parser')
Exemple #4
0
import json
from libs.logging_process import Logging_process

from naver_finance_ver2 import scrap_macro_economics_test as sme

logger = Logging_process('scrap_all')


class scrap_all(object):
    def __init__(self):

        self.sme = sme.scrap_macro_economics()

    def read_config_json(self):
        "(name, url, page) 가 저장된 json 파일을 읽어들임"

        config_json_path = './setting_files/macro_economics_config.json'

        try:
            config_f = open(config_json_path, encoding="utf-8")
            logger.info("macro_economics_config.json 읽어들임")
        except Exception as error:
            logger.info(error)
            return None

        config_f_data = json.load(config_f)

        config_f.close()

        return config_f_data
Exemple #5
0
import naver_finance_ver2.scrap_all

from libs.logging_process import Logging_process

logger = Logging_process('qubot_test')

logger.info("qubot_test start")

scrapper = naver_finance_ver2.scrap_all.scrap_all()

try:
    scrapper.scrap_check()

    logger.info("qubot_test end")

except Exception as error:
    logger.info("qubot_test error !! : {}".format(error))
    
Exemple #6
0
import os
import time
import pandas as pd
from pandas import DataFrame
from bs4 import BeautifulSoup, NavigableString
from libs.logging_process import Logging_process
from libs.selenium_process import Selenium_process

logger = Logging_process('nasdaq')


class Crawler_BS4(object):
    """Selenium driver를 이용한 stock index crawler 부분"""
    def __init__(self, driver):
        """driver를 받아 BeautifulSoup 객체를 생성한다."""

        self.html = driver.page_source
        self.soup = BeautifulSoup(self.html, "html.parser")
        logger.info("soup 생성")

    def get_page_html(self):

        try:
            soup = self.soup

            thead = soup.find('thead')
            tbody = soup.findAll('tbody')[1]
        except Exception as error:
            logger.info(error)
            return None, None
# import logging
import sys, os
import pandas as pd
from pandas import DataFrame
from bs4 import BeautifulSoup, NavigableString
from libs.logging_process import Logging_process

logger = Logging_process("InvstgCorp_test")


class InvstgCorp_Test(object):
    def __init__(self):

        self.html = self.load_html()

        self.soup = BeautifulSoup(self.html, "html.parser")

    def load_html(self):

        load_html_txt = ""

        load_path = "data\\html_file\\html_file.txt"

        with open(load_path, 'r', encoding='utf-8') as f:

            lines = f.readlines()

            for line in lines:

                load_html_txt += line
                load_html_txt += '\n'
import os 
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from libs.logging_process import Logging_process

logger = Logging_process("selenium_process")

class Selenium_process(object):

    def __init__(self, url):

        self.url = url
        self.chromedriver_path = os.path.join("libs", "chromedriver_win32", "chromedriver.exe")
        logger.info(self.chromedriver_path)  

    def run_chromedriver(self):

        try:
            driver = webdriver.Chrome(self.chromedriver_path)
            driver.get(self.url)
            logger.info("Selenium driver 생성")
        except Exception as error:
            logger.info(error)

        return driver 

    def down_chromedriver(self, driver):

        driver.quit()