コード例 #1
0
# Download images
for url, pdt in products.items():
    if 'pdct_img_main_url' in pdt and pdt[
            'pdct_img_main_url'] and brm.find_brand(
                pdt['pdct_name_on_eretailer'])['brand'] in mh_brands:
        print(pdt['pdct_name_on_eretailer'] + "." +
              pdt['pdct_img_main_url'].split('.')[-1])
        response = requests.get(pdt['pdct_img_main_url'],
                                stream=True,
                                verify=False,
                                headers=headers)
        # response.raw.decode_content = True
        tmp_file_path = '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
            abs(hash(pdt['pdct_img_main_url'])))
        img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer'])
        with open(tmp_file_path, 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
        if imghdr.what(tmp_file_path) is not None:
            img_path = img_path.split('.')[0] + '.' + imghdr.what(
                '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
                    abs(hash(pdt['pdct_img_main_url']))))
            shutil.copyfile(
                '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format(
                    abs(hash(pdt['pdct_img_main_url']))), img_path)
            products[url].update({
                'img_path': img_path,
                'img_hash': file_hash(img_path)
            })

create_csvs(products, categories, searches, shop_id,
コード例 #2
0
                    )), root_url),
            'ctg_denom_txt':
            ' '.join(' '.join(
                tree.xpath('//ul[@id="breadcrumb"]//text()')).split()),
        })
        print(products[url])

# Download images
for url, pdt in products.items():
    if 'pdct_img_main_url' in pdt and pdt[
            'pdct_img_main_url'] and brm.find_brand(
                pdt['pdct_name_on_eretailer'])['brand'] in mh_brands:
        print(pdt['pdct_name_on_eretailer'] + "." +
              pdt['pdct_img_main_url'].split('.')[-1])
        print(pdt['pdct_img_main_url'])
        img_path = img_path_namer(
            shop_id, pdt['pdct_name_on_eretailer'] + "." +
            pdt['pdct_img_main_url'].split('.')[-1])
        r = requests.get(pdt['pdct_img_main_url'])
        with open(img_path, 'wb') as out_file:
            out_file.write(r.content)
        products[url].update({
            'img_path': img_path,
            'img_hash': file_hash(img_path)
        })

create_csvs(products, categories, searches, shop_id,
            fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE)
validate_raw_files(fpath_namer(shop_id, 'raw_csv'))
driver.quit()