def dump_index(event: dict, context,
               index_group_factory: IndexGroupFactory = IndexGroupFactory,
               downloader_factory: DownloaderFactory = DownloaderFactory,
               scraper_factory: ScraperFactory = ScraperFactory):
    data = decode_pubsub_data(event)

    source = data["source"]
    index = data["index"]
    date = date_or_now(data)
    scrap_stocks = should_scrap_stocks(data)

    logging.info("Dump index for '{}' from '{}' on {}".format(index, source, date))

    index_group = index_group_factory.createFor(source, index)

    index_storage = IndexStorage(DUMP_FOLDER, index_group, date=date, storage_repository=S3Repository(BUCKET_NAME))

    downloader = downloader_factory.create(source)
    downloader.dump_index(index_group, index_storage)

    scraper = scraper_factory.create(source)
    scraper.read_stocks(index_group, index_storage)
    scraper.scrap_index(index_group, index_storage)

    if scrap_stocks:
        send_scrap_messages(index_group, date)
    else:
        logging.info("don't scape stocks because of pub/sub message")

    index_storage.compress()
Example #2
0
def read_stocks(indexGroup, index_storage: IndexStorage):
    pages = index_storage.storage_repository.list(
        index_storage.getStoragePath("list", ""))

    for page in pages:
        content = index_storage.storage_repository.load(
            index_storage.getDatedPath() + page)

        if content:
            soup = BeautifulSoup(content, 'html.parser')

            article = soup.find("article", {"class": "top-flop-box"})
            table = article.find("table")

            for row in table.findAll("tr"):
                columns = row.findAll("td")

                if len(columns) == 0:
                    continue

                firstCol = columns[0]

                link = firstCol.find("a")

                if link is not None and link.get("href") and link.get(
                        "href").startswith("/"):
                    matches = re.search(r'\/aktien\/(.*)-Aktie-(.*)',
                                        link.get("href"))
                    name = matches.group(1)
                    stock_id = matches.group(2)

                    field = firstCol.find("span").get_text().strip()

                    indexGroup.add_stock(stock_id, name, field)
Example #3
0
    def test_base_path_of_index(self):
        # given:
        index_group = IndexGroup("isin", "index_name", "source_id", "source")
        date = datetime.strptime("2018-01-01", "%Y-%m-%d")

        # when:
        index_storage = IndexStorage("/tests/dump",
                                     index_group,
                                     date,
                                     get_history=False)
        base_path = index_storage.getDatedPath()

        # then:
        self.assertEqual("/tests/dump/index_name/2018-01-01/", base_path)
Example #4
0
    def test_storage_path_of_index(self):
        # given:
        index_group = IndexGroup("isin", "index_name", "source_id", "source")
        date = datetime.strptime("2018-01-01", "%Y-%m-%d")

        # when:
        index_storage = IndexStorage("/tests/dump",
                                     index_group,
                                     date,
                                     get_history=False)
        storage_path = index_storage.getStoragePath("profile", "html")

        # then:
        self.assertEqual(
            "/tests/dump/index_name/2018-01-01/index_name.source.profile.html",
            storage_path)
def dump_stock(event: dict, context):
    data = decode_pubsub_data(event)

    logging.info("data: {}".format(data))

    source = data["source"]
    index_group = new_index_group(data["index_group"])
    stock = new_stock(data["stock"], index_group)
    date = date_or_now(data)

    logging.info("source: {}".format(source))
    logging.info("index_group: {}".format(index_group))
    logging.info("stock: {}".format(stock))
    logging.info("date: {}".format(date))

    index_storage = IndexStorage(DUMP_FOLDER, index_group, date=date, storage_repository=S3Repository(BUCKET_NAME))
    stock_storage = StockStorage(index_storage, stock, storage_repository=S3Repository(BUCKET_NAME))

    downloader = DownloaderFactory.create(source)
    downloader.dump_stock(stock, stock_storage)

    scraper = ScraperFactory.create(source)
    scraper.scrap(stock, stock_storage)

    stock_storage.store()
    stock_storage.compress()
def get_allianz_stock_storage(get_history=False, date=datetime.now()):
    indexGroup = IndexGroup("DE0008469008", "DAX", "DAX", "onvista")
    index_storage = IndexStorage("resources", indexGroup,
                                 date=date,
                                 get_history=get_history)
    stock = Stock("DE0008404005", "Allianz", indexGroup)

    return StockStorage(index_storage, stock)
def get_vw_stock_storage(get_history=False, date=datetime.now()):
    indexGroup = IndexGroup("DE0008469008", "DAX", "DAX", "onvista")
    index_storage = IndexStorage("resources", indexGroup,
                                 date=date,
                                 get_history=get_history)
    stock = Stock("DE0007664039", "Volkswagen-VZ", indexGroup)

    return StockStorage(index_storage, stock)
Example #8
0
def write_index_report(index_group: IndexGroup, index_storage: IndexStorage,
                       rating_entities: []):
    template = Template(filename="libs/templates/index-rating-overview.html")

    report = template.render(index_group=index_group,
                             rating_entities=rating_entities,
                             source=index_storage.source,
                             report_date=index_storage.date_str)

    index_storage.storage_repository.store(
        index_storage.getStoragePath("", "html"), report)
Example #9
0
def dump_index(index_group: IndexGroup, index_storage: IndexStorage):
    main_file = index_storage.getStoragePath("profil", "html")
    storage_repository = index_storage.storage_repository

    dl.download(WEBSITE + "/index/" + index_group.isin, main_file,
                storage_repository)

    notation = get_notation(main_file, storage_repository)

    download_history_by_notation(notation, index_storage)

    links = get_links(main_file, storage_repository)

    download_stock_list(links["Einzelwerte"], index_storage)
Example #10
0
def read_stocks(indexGroup, index_storage: IndexStorage):
    with open(index_storage.getStoragePath("list", "html"), mode="r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, 'html.parser')

        article = soup.find("div", {"id": "index-list-container"})
        table = article.find("table")

        for row in table.findAll("tr"):
            columns = row.findAll("td")

            if len(columns) == 0:
                continue

            firstCol = columns[0]

            link = firstCol.find("a")

            if link is not None and link.get("href") and link.get("href").startswith("/"):
                matches = re.search(r'\/aktien\/(.*)-aktie', link.get("href"))
                name = matches.group(1)

                stock_id = firstCol.get_text().strip().split("\n")[1]

                indexGroup.add_stock(stock_id, name)
Example #11
0
def dump_index(indexGroup: IndexGroup, indexStorage: IndexStorage):
    main_file = indexStorage.getStoragePath("profil", "html")
    storage_repository = indexStorage.storage_repository

    dl.download(f"%s/index/%s" % (WEBSITE, indexGroup.sourceId), main_file, storage_repository)
    dl.download(f"%s/index/%s/werte" % (WEBSITE, indexGroup.sourceId), indexStorage.getStoragePath("list", "html"), storage_repository)
Example #12
0
# indexGroup = IndexGroupFactory.createFor(SOURCE, "NASDAQ")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "S&P 500")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "Nikkei")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "Hang-Seng")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "S&P-TSX-Composite")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "AEX")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "OBX")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "PTX")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "RTS")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "OMXS-30")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "IBEX-35")
# indexGroup = IndexGroupFactory.createFor(SOURCE, "SOLACTIVE-ORGANIC-FOOD")

#date = datetime.strptime("22.11.2020", "%d.%m.%Y")
date = datetime.now()
index_storage = IndexStorage("dump", indexGroup, date=date)

index_storage.uncompress()

downloader = DownloaderFactory.create(SOURCE)
downloader.dump_index(indexGroup, index_storage)

scraper = ScraperFactory.create(SOURCE)
scraper.read_stocks(indexGroup, index_storage)
scraper.scrap_index(indexGroup, index_storage)

index_storage.compress()


def thread_body(queue: Queue):
    while True: