コード例 #1
0
ファイル: main.py プロジェクト: elikochva/openprices
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        '--processes',
        '-p',
        help='run data scraping and parsing in X parallel processes',
        default=1,
        type=int)
    arg_parser.add_argument(
        '--no-download',
        '-nd',
        help="don't download data at start (assumes data already downloaded)",
        default=False,
        action='store_true')
    arg_parser.add_argument(
        '--parse-chains',
        '-c',
        help="parse chains login data from the government webpage",
        default=False,
        action='store_true')

    args = arg_parser.parse_args()

    start = time.time()
    p = Pool(processes=args.processes)
    db = SessionController()

    # 1) get all chains (and subchains)
    if args.parse_chains:
        gov = web_scraper.GovDataScraper(db)
        gov.parse_chains_to_db()

    chains = [chain for chain in db.query(Chain)]

    # 2) download all data before starting
    if not args.no_download:
        s = time.time()
        print('Downloading all chains data')
        res = p.map(download_chain_data, chains)
        print('data download: {}'.format(time.time() - s))

    # 3) parse all chain stores
    s = time.time()
    print('parsing all chains stores')
    res = p.map(parse_chain_stores, chains)
    print('stores parsing: {}'.format(time.time() - s))

    # 4) parse stores daily prices and promos
    for chain in chains:
        s = time.time()
        print('parsing prices for chain {}'.format(chain.name))
        stores = [
            store
            for store in db.query(Store).filter(Store.chain_id == chain.id)
        ]
        p.starmap(parse_chain_prices, zip(repeat(chain), stores))
        print('chain parsing ended: {}'.format(time.time() - s))

    # ChainXmlParser.set_products_item_id(db)
    print('total time: {}'.format(time.time() - start))
コード例 #2
0
ファイル: xml_parser.py プロジェクト: sagiben/openprices
def main():
    db = SessionController(db_logging=False)

    for chain in db.query(Chain):
        if chain.name != 'סופר דוש': continue
        parser = ChainXmlParser(chain, db)
        for store in db.query(Store).filter(Store.chain_id == chain.id):
            parser.parse_store_promos(store)
            break
コード例 #3
0
ファイル: web_scraper.py プロジェクト: sagiben/openprices
def main():
    try:
        db = SessionController()
        for chain in db.query(Chain):
            scraper = db_chain_factory(chain)
            print(chain.name)
            # print(scraper.get_chain_full_id())
            print(scraper.get_stores_xml())
            print(scraper.get_prices_xml(1))
            print(scraper.get_promos_xml(1))
    except BaseException as e:
        raise e
コード例 #4
0
    def __init__(self, city, db=None, logger=None):
        logging.basicConfig(level=logging.INFO)
        logger = logger or logging.getLogger(__name__)
        self.db = db or SessionController()
        self.parser = xml_parser.ChainXmlParser(db)
        self.city = city

        logger.info('getting city stores')
        self.stores = self.get_city_stores()
        logger.info(self.stores)
        self.basket = Basket()

        self.stores_items = {}
        for store in self.stores:
            logger.info('getting store {} items'.format(store))
            items = self.get_store_items(store)
            if not items:
                self.parser.parse_store_prices(store.chain, store)
                items = self.get_store_items(store)

            for item in items:
                try:
                    self.stores_items[item].append(store)
                except KeyError:
                    self.stores_items[item] = [store]
コード例 #5
0
ファイル: xml_parser.py プロジェクト: sagiben/openprices
 def __init__(self, db_chain, db=None):
     self.db = db or SessionController()
     self.page_size = 100000
     self.chain = db_chain
コード例 #6
0
ファイル: web_scraper.py プロジェクト: sagiben/openprices
 def __init__(self, db=None):
     self.db = db or SessionController()
     self.chain_table_url = "http://www.economy.gov.il/Trade/ConsumerProtection/Pages/PriceTransparencyRegulations.aspx"