Python calculate_atwater Exemples

Langage de programmation: Python

Espace de nommage/Pack: flaim.data_loaders.management.commands.calculate_atwater

Méthode/Fonction: calculate_atwater

Exemples au hotexamples.com: 2

Python calculate_atwater - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de flaim.data_loaders.management.commands.calculate_atwater.calculate_atwater extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

Fichier : load_loblaws_to_db.py Projet : BFSSI-Bioinformatics-Lab/flaime

def load_loblaws(self, product_json_dir: Path, scrape_date: str): CHANGE_REASON = 'New Loblaws Scrape Batch' json_files = list(product_json_dir.glob("*.json")) if len(json_files) < 1: self.stdout.write( self.style.ERROR( f'Could not find any JSON files in {product_json_dir}, quitting!' )) quit() self.stdout.write( self.style.SUCCESS( f'Found {len(json_files)} JSON files in {product_json_dir}')) # Total valid products scraped filtered_json_files = [ f for f in json_files if f.stat().st_size > 1000 ] total_products = len(filtered_json_files) # All product codes product_codes = [f.with_suffix("").name for f in filtered_json_files] # All Loblaws products in the DB existing_products = [ x.product.product_code for x in LoblawsProduct.objects.all() ] # Total number of products missing_products = len( list(set(existing_products) - set(product_codes))) new_products = len( [x for x in product_codes if x not in existing_products]) # Create scrape batch scrape = ScrapeBatch.objects.create(missing_products=missing_products, new_products=new_products, total_products=total_products, scrape_date=scrape_date, store='LOBLAWS') # Iterate over product json files existing_codes_dict = Product.generate_existing_product_codes_dict( store='LOBLAWS') for j in tqdm(filtered_json_files, desc="Uploading JSON"): product_code = j.with_suffix( "").name # Files are named after product code try: data = read_json(j) except json.decoder.JSONDecodeError as e: # self.stdout.write(self.style.ERROR(f'Skipping product JSON {j} due to exception:\n{e}')) continue # Get or create generic Product obj = Product.objects.create(product_code=product_code) obj.save() obj.changeReason = CHANGE_REASON # Get or create Loblaws Product product = LoblawsProduct.objects.create(product=obj) product.save() # Generic fields for Product model obj.store = 'LOBLAWS' # Normalize the apostrophes for name and brand obj.name = normalize_apostrophe(get_name(data)) obj.brand = normalize_apostrophe(get_brand(data)) price_float, price_units = get_price(data) obj.price_float, obj.price_units = price_float, price_units obj.price = f'${price_float} {price_units}' upc_list = get_upc_list(data) if upc_list is not None: obj.upc_code = upc_list[ 0] # Set the representative UPC code to the first entry in the list obj.upc_array = upc_list obj.manufacturer = None # Not sure if we have this obj.nielsen_product = None # TODO: Probably populate this post-hoc. Ask Adrian about this. obj.url = get_url(data) obj.scrape_date = timezone.now() obj.nutrition_available = None obj.breadcrumbs_array = get_breadcrumbs(data) obj.breadcrumbs_text = ",".join(get_breadcrumbs(data)) obj.description = get_description(data) obj.batch = scrape # Update most_recent flag of older duplicate products if necessary if product_code in existing_codes_dict.values(): ids_to_demote = Product.test_if_most_recent( product_code=product_code, existing_codes_dict=existing_codes_dict) Product.demote_most_recent_product_list(ids_to_demote) # Loblaws fields for LoblawsProduct model product.api_data = data nutrition_facts_json = get_nutrition_facts(data) product.changeReason = CHANGE_REASON # Populate NutritionFacts model nutrition_facts, c = NutritionFacts.objects.get_or_create( product=obj) nutrition_facts.load_total_size(data) nutrition_facts.ingredients = get_ingredients(data) if nutrition_facts_json is not None: nutrition_facts.load_loblaws_nutrition_facts_json( nutrition_facts_json) obj.nutrition_available = True else: obj.nutrition_available = False if not c: nutrition_facts.changeReason = CHANGE_REASON # Commit to DB obj.save() product.save() nutrition_facts.save() self.stdout.write( self.style.SUCCESS( f'Done loading Loblaws-{str(scrape_date)} products to database!' )) self.stdout.write( self.style.SUCCESS(f'Conducting category assignment step')) assign_categories() self.stdout.write( self.style.SUCCESS(f'Conducting variety pack assignment step')) assign_variety_pack_flag() self.stdout.write( self.style.SUCCESS(f'Calculating Atwater result for products')) calculate_atwater() self.stdout.write(self.style.SUCCESS(f'Loading complete!'))

Exemple #2

0

Afficher le fichier

def handle(self, *args, **options): if options['delete_products']: self.stdout.write( self.style.WARNING( f'Deleting all Walmart products in the database...')) product_records = Product.objects.filter(store="WALMART") product_records.delete() self.stdout.write( self.style.ERROR( f'Deleted all Walmart records in the database!')) quit() scrape_date = parse_date(options['date']) # Read main json file f = list(Path(options['input_dir']).glob('*.json'))[0] assert f.exists() j = read_json(str(f)) # Get image directory tmp = list(Path(options['input_dir']).glob('*')) image_dir = [x for x in tmp if x.is_dir()][0] assert image_dir.exists() self.stdout.write( self.style.SUCCESS( f'Started loading Walmart products to database')) # Create scrape batch # All Walmart products in the DB existing_products = [ x.product.product_code for x in WalmartProduct.objects.all() ] product_codes = [x['product_code'] for x in j] missing_products = len( list(set(existing_products) - set(product_codes))) new_products = len( [x for x in product_codes if x not in existing_products]) total_products = len(j) # TODO: If the script fails this will still be created, should probably clean up after itself scrape = ScrapeBatch.objects.create(missing_products=missing_products, new_products=new_products, total_products=total_products, scrape_date=scrape_date, store='WALMART') self.stdout.write( self.style.SUCCESS( f'Created new scrape batch for {scrape.scrape_date}')) # Iterate over all products existing_codes_dict = Product.generate_existing_product_codes_dict( store='WALMART') for p in tqdm(j, desc="Loading Walmart JSON"): # Make sure all of the expected keys are populated at least with None. # Also rename the carbohydrate and carbohydrate_dv columns to match the DB for k in EXPECTED_KEYS: if k not in p: p[k] = None product = Product.objects.create(product_code=p['product_code']) # Product fields product.name = normalize_apostrophe(p['product_name']) product.brand = normalize_apostrophe(p['Brand']) product.store = 'WALMART' # TODO: Make sure the UPC code is just the first entry if p['UPC'] is not None: first_upc_code = str(p['UPC']).split(',')[0] product.upc_code = first_upc_code product.url = p['url'] product.description = p['long_desc'] if p['breadcrumbs'] is not None: product.breadcrumbs_text = p['breadcrumbs'].strip() product.breadcrumbs_array = [ x.strip() for x in p['breadcrumbs'].strip().split('>') ] if p['price'] == 'price unavailable': product.price = 'price unavailable' elif p['price'] is not None: if '¢' in p['price']: product.price_float = float(p['price'].replace('¢', '')) / 100 elif '\u00a2' in p[ 'price']: # Weird character that represents cents product.price_float = float(p['price'].replace( '\u00a2', '')) / 100 elif '$' in p['price']: product.price_float = float(p['price'].replace('$', '')) product.price = p['price'] product.price_units = "ea" # TODO: This is just assumed because there is no value provided by the Walmart scraper product.nutrition_available = p['nft_present'] product.nielsen_product = p['nielsen_product'] product.unidentified_nft_format = p['nft_american'] product.batch = scrape # Update most_recent flag of older duplicate products if necessary if product.product_code in existing_codes_dict.values(): ids_to_demote = Product.test_if_most_recent( product_code=product.product_code, existing_codes_dict=existing_codes_dict) Product.demote_most_recent_product_list(ids_to_demote) # Change reason product.changeReason = CHANGE_REASON product.save() # Walmart fields walmart = WalmartProduct.objects.create(product=product) walmart.nutrition_facts_json = p['nutrition'] if len(p['images']['image_paths']) > 0: walmart.image_directory = str( Path(p['images']['image_paths'][0]).parent) else: walmart.image_directory = None walmart.dietary_info = p['Lifestyle & Dietary Need'] walmart.bullets = p['bullets'] walmart.sku = p['SKU'] walmart.changeReason = CHANGE_REASON walmart.save() # Nutrition fields # Pass over the nutrition dict to replace 'absent' with 0 and 'conflict' with None nutrition_dict = p['nutrition'].copy() # Correct carbohydrate values to proper name to ensure fields match with database if "carbohydrate" in nutrition_dict.keys(): nutrition_dict['totalcarbohydrate'] = nutrition_dict[ 'carbohydrate'] if "carbohydrate_dv" in nutrition_dict.keys(): nutrition_dict['totalcarbohydrate_dv'] = nutrition_dict[ 'carbohydrate_dv'] if "carbohydrate_unit" in nutrition_dict.keys(): nutrition_dict['totalcarbohydrate_unit'] = nutrition_dict[ 'carbohydrate_unit'] for key, val in nutrition_dict.items(): # Override bad values with 0 or None if val == 'absent': nutrition_dict[key] = 0 if val == 'conflict': nutrition_dict[key] = None # Convert any 'o' values to numeric 0. This is an OCR error. if '_dv' in key and val is not None: if type(val) == str and val.lower() == 'o': nutrition_dict[key] = 0 elif type(val) == str: nutrition_dict[key] = None # Need to iterate through the nutrition dict and convert mg to grams, and dv/100 for key, val in nutrition_dict.items(): if '_unit' in key and val is not None: if str(val).lower() == 'mg': nutrient_to_adjust = key.replace('_unit', '') if nutrition_dict[nutrient_to_adjust] is None: continue nutrition_dict[nutrient_to_adjust] = nutrition_dict[ nutrient_to_adjust] / 1000 if '_dv' in key and val is not None: nutrition_dict[key] = val / 100 nutrition, c = NutritionFacts.objects.get_or_create( product=product) nutrition.ingredients = p['ingredients_txt'] # TODO: There's a bug where the size value can just be a crazy string. # Here's a temporary hack to get around it. if p['size'] is not None: if len(p['size']) < 100: nutrition.total_size = p['size'] else: nutrition.total_size = None if not c: nutrition.changeReason = CHANGE_REASON # TODO: Verify Adrian's keys correspond with mine for key, val in nutrition_dict.items(): if val is not None: setattr(nutrition, key, val) # Serving size is weird. These keys are also not consistently in the nutrition dict. nutrition.serving_size_raw = None if 'serving_size' in nutrition_dict.keys( ) and 'serving_size_unit' in nutrition_dict.keys(): nutrition.serving_size_raw = f'{nutrition_dict["serving_size"]} {nutrition_dict["serving_size_unit"]}' else: pass # self.stdout.write(self.style.WARNING(f'Issues detected with serving size values')) nutrition.serving_size = None if 'serving_size' in nutrition_dict.keys(): nutrition.serving_size = nutrition_dict["serving_size"] nutrition.serving_size_units = None if 'serving_size_unit' in nutrition_dict.keys(): nutrition.serving_size_units = nutrition_dict[ "serving_size_unit"] nutrition.save() # Images image_paths = p['images']['image_paths'] image_labels = p['images']['image_labels'] # Check if the product already has images associated with it existing_images = ProductImage.objects.filter( product__product_code=product.product_code) if len(existing_images) > 0: # print(f'Already have image records for {product}; skipping!') continue # Upload images if there are any if len(image_paths) > 0: for i, val in enumerate(image_paths): # Note image_dir is the absolute path to the image directory image_path = image_dir.parent / val assert image_path.exists() # Strip out media root so images behave correctly image_path = str(image_path).replace( settings.MEDIA_ROOT, '') try: image = ProductImage.objects.create( product=product, image_path=image_path, image_label=image_labels[i], image_number=i) image.save() # Skip if the file path already exists except IntegrityError as e: pass self.stdout.write( self.style.SUCCESS( f'Done loading Walmart-{str(scrape_date)} products to database!' )) self.stdout.write( self.style.SUCCESS(f'Conducting category assignment step')) assign_categories() self.stdout.write( self.style.SUCCESS(f'Conducting variety pack assignment step')) assign_variety_pack_flag() self.stdout.write( self.style.SUCCESS(f'Calculating Atwater result for products')) calculate_atwater() self.stdout.write(self.style.SUCCESS(f'Loading complete!'))