Exemple #1
0
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        '--processes',
        '-p',
        help='run data scraping and parsing in X parallel processes',
        default=1,
        type=int)
    arg_parser.add_argument(
        '--no-download',
        '-nd',
        help="don't download data at start (assumes data already downloaded)",
        default=False,
        action='store_true')
    arg_parser.add_argument(
        '--parse-chains',
        '-c',
        help="parse chains login data from the government webpage",
        default=False,
        action='store_true')

    args = arg_parser.parse_args()

    start = time.time()
    p = Pool(processes=args.processes)
    db = SessionController()

    # 1) get all chains (and subchains)
    if args.parse_chains:
        gov = web_scraper.GovDataScraper(db)
        gov.parse_chains_to_db()

    chains = [chain for chain in db.query(Chain)]

    # 2) download all data before starting
    if not args.no_download:
        s = time.time()
        print('Downloading all chains data')
        res = p.map(download_chain_data, chains)
        print('data download: {}'.format(time.time() - s))

    # 3) parse all chain stores
    s = time.time()
    print('parsing all chains stores')
    res = p.map(parse_chain_stores, chains)
    print('stores parsing: {}'.format(time.time() - s))

    # 4) parse stores daily prices and promos
    for chain in chains:
        s = time.time()
        print('parsing prices for chain {}'.format(chain.name))
        stores = [
            store
            for store in db.query(Store).filter(Store.chain_id == chain.id)
        ]
        p.starmap(parse_chain_prices, zip(repeat(chain), stores))
        print('chain parsing ended: {}'.format(time.time() - s))

    # ChainXmlParser.set_products_item_id(db)
    print('total time: {}'.format(time.time() - start))
Exemple #2
0
def main():
    db = SessionController(db_logging=False)

    for chain in db.query(Chain):
        if chain.name != 'סופר דוש': continue
        parser = ChainXmlParser(chain, db)
        for store in db.query(Store).filter(Store.chain_id == chain.id):
            parser.parse_store_promos(store)
            break
Exemple #3
0
def main():
    try:
        db = SessionController()
        for chain in db.query(Chain):
            scraper = db_chain_factory(chain)
            print(chain.name)
            # print(scraper.get_chain_full_id())
            print(scraper.get_stores_xml())
            print(scraper.get_prices_xml(1))
            print(scraper.get_promos_xml(1))
    except BaseException as e:
        raise e