def to_mongo(self, value): if isinstance(value, Timestamp): return value elif isinstance(value, datetime.datetime): return Timestamp(value, 0) elif isinstance(value, string_types): try: return Timestamp(parse_datetime(value), 0) except (ValueError, TypeError): pass raise ValidationError('%r cannot be converted to a Timestamp.' % value)
def to_mongo(self, value): if isinstance(value, datetime.datetime): return value elif isinstance(value, datetime.date): return datetime.datetime(value.year, value.month, value.day) elif isinstance(value, string_types): parsed = parse_datetime(value) if parsed is not None: return parsed raise ValidationError( '%r cannot be converted to a datetime object.' % value)
def build_tr_filter(args: dict) -> dict: """Возвращает словарь для фильтрации транзакций согласно заданным аргументам""" filter_ = {} # По совпадению пункта вылета/прилёта for f_name in ('from_flight', 'to_flight'): if f_name in args: filter_[f_name] = args[f_name] # По диапазону дат полёта dt_flt_filter = {} if 'date_flight__lte' in args: dt_flt_filter['$lte'] = parse_datetime(args['date_flight__lte']) if 'date_flight__gte' in args: dt_flt_filter['$gte'] = parse_datetime(args['date_flight__gte']) if dt_flt_filter: filter_['date_flight'] = dt_flt_filter return filter_
def to_mongo(self, value): if isinstance(value, datetime.datetime): return value elif isinstance(value, datetime.date): return datetime.datetime(value.year, value.month, value.day) elif isinstance(value, string_types): parsed = parse_datetime(value) if parsed is not None: return parsed try: return datetime.datetime.utcfromtimestamp(value) except TypeError: raise ValidationError( '%r cannot be converted to a datetime object.' % value)
def process_entry(entry, website_pk): """Process entry data. params: - entry: parsed dataset line - website_pk: website Mongo <ObjectId> reference The function will process the entry data based on the "page type: product_detail or product_listing". A boolean value will be returned to mark the process ended successful or failed. The process can also raise exception for unrecoverable failures. """ if not entry['extract_ok']: return False extracted_data = entry['extracted_data'] if entry['page_type'] == 'product_detail': item = extracted_data['item'] brand = item['brand_name'] if brand: try: brand = models.Brand(brand=brand).ensure() except: raise props = { "brand": brand, "crawled_at": parse_datetime(entry['crawled_at']), "discount_percentage": item['discount_percentage'], "name": item['article_name'], "on_sale": item['on_sale'], "price": item['sale_price'], "product_type": item['article_type'], "properties": item['extra_props'], "sku": item['sku'], "url": entry['page_url'], "website": website_pk, # path=None, # listings=[], } # print(props) ## Clean None values props = utils.removeNoneValuesFromDict(props) # print(props) p = models.Product(**props) try: # p.save() p.ensure() except models.DuplicateKeyError as error: logger.debug("Item already exists: %s - %s - %s [%s]" % ( props.get("sku"), props.get("name"), props.get("url"), props.get("crawled_at"), )) return False except Exception as e: writeErrorFile('detail-%s' % (website_pk), entry['body']) raise e elif entry['page_type'] == 'product_listing': status = True number_of_items = extracted_data['number_of_items'] # number_of_items = len(extracted_data['items']) props = { "page_number": entry['page_number'], "page_listing_size": number_of_items, "category": entry['product_category'], "sorted_by": entry['ordering'], "url": entry['page_url'], "crawled_at": parse_datetime(entry['crawled_at']), "website": website_pk, } props = utils.removeNoneValuesFromDict(props) pl = models.ProductListingPage(**props) try: pl.ensure() pl_pk = pl.pk except models.DuplicateKeyError as error: pl = models.ProductListingPage.objects.get( dict([(k, v) for k, v in props.items() if k in ('url', 'crawled_at')])) pl_pk = pl.pk except: raise # ------------------------------------------------------------------------- # Assign Items # ------------------------------------------------------------------------- total_items = 0 not_found_products = 0 listing_added_total = 0 insufficent_data = 0 for i, item in enumerate(extracted_data['items']): # ------------------------------------------------------------------------- # Find Item first # ------------------------------------------------------------------------- detail_page_url = item.get('detail_page_url') if not detail_page_url: continue total_items = total_items + 1 # ------------------------------------------------------------------------- # Find matching Product based on detail_page_url # ------------------------------------------------------------------------- try: product = models.Product.objects.get({'path': detail_page_url}) except models.Product.DoesNotExist: logger.debug("No Product match found for %s" % (detail_page_url)) not_found_products = not_found_products + 1 continue try: li_props = { "position": i + 1, "price": item['sale_price'], "on_sale": item['on_sale'], "discount_percentage": item['discount_percentage'], "listing_props": item['listing_props'], "listing": pl_pk, } # ------------------------------------------------------------------------- # Create Listing Item # ------------------------------------------------------------------------- li = models.ProductListingItem(**li_props) except Exception as e: writeErrorFile('listing-%s' % (pl_pk), entry['body']) logger.error(e) insufficent_data = insufficent_data + 1 continue if any([True for l in product.listings if l.listing._id == pl_pk]): # print("Listing already added to product") listing_added_total = listing_added_total + 1 continue # ------------------------------------------------------------------------- # Add New Listing ot Product listings # ------------------------------------------------------------------------- product.listings.append(li) try: product.save() listing_added_total = listing_added_total + 1 except Exception as e: logger.error(e) writeErrorFile('listing-%s-%s' % (pl_pk, i), entry['body']) # ------------------------------------------------------------------------- # Debug stats # ------------------------------------------------------------------------- logger.debug("""%s: stats (ok:%s/missing:%s/nodata:%s/total:%s)""" % ( utils.get_url_path(entry['page_url']), listing_added_total, not_found_products, insufficent_data, total_items, )) return True else: logger.error("Unknown page_type") return False return True