Esempio n. 1
0
 def to_mongo(self, value):
     if isinstance(value, Timestamp):
         return value
     elif isinstance(value, datetime.datetime):
         return Timestamp(value, 0)
     elif isinstance(value, string_types):
         try:
             return Timestamp(parse_datetime(value), 0)
         except (ValueError, TypeError):
             pass
     raise ValidationError('%r cannot be converted to a Timestamp.' % value)
Esempio n. 2
0
 def to_mongo(self, value):
     if isinstance(value, Timestamp):
         return value
     elif isinstance(value, datetime.datetime):
         return Timestamp(value, 0)
     elif isinstance(value, string_types):
         try:
             return Timestamp(parse_datetime(value), 0)
         except (ValueError, TypeError):
             pass
     raise ValidationError('%r cannot be converted to a Timestamp.' % value)
Esempio n. 3
0
 def to_mongo(self, value):
     if isinstance(value, datetime.datetime):
         return value
     elif isinstance(value, datetime.date):
         return datetime.datetime(value.year, value.month, value.day)
     elif isinstance(value, string_types):
         parsed = parse_datetime(value)
         if parsed is not None:
             return parsed
     raise ValidationError(
         '%r cannot be converted to a datetime object.' % value)
Esempio n. 4
0
def build_tr_filter(args: dict) -> dict:
    """Возвращает словарь для фильтрации транзакций согласно заданным аргументам"""
    filter_ = {}

    # По совпадению пункта вылета/прилёта
    for f_name in ('from_flight', 'to_flight'):
        if f_name in args:
            filter_[f_name] = args[f_name]

    # По диапазону дат полёта
    dt_flt_filter = {}
    if 'date_flight__lte' in args:
        dt_flt_filter['$lte'] = parse_datetime(args['date_flight__lte'])

    if 'date_flight__gte' in args:
        dt_flt_filter['$gte'] = parse_datetime(args['date_flight__gte'])

    if dt_flt_filter:
        filter_['date_flight'] = dt_flt_filter

    return filter_
Esempio n. 5
0
 def to_mongo(self, value):
     if isinstance(value, datetime.datetime):
         return value
     elif isinstance(value, datetime.date):
         return datetime.datetime(value.year, value.month, value.day)
     elif isinstance(value, string_types):
         parsed = parse_datetime(value)
         if parsed is not None:
             return parsed
     try:
         return datetime.datetime.utcfromtimestamp(value)
     except TypeError:
         raise ValidationError(
             '%r cannot be converted to a datetime object.' % value)
Esempio n. 6
0
def process_entry(entry, website_pk):
    """Process entry data.

    params:
        - entry: parsed dataset line
        - website_pk: website Mongo <ObjectId> reference

    The function will process the entry data based on the "page type: product_detail or product_listing".

    A boolean value will be returned to mark the process ended successful or failed.

    The process can also raise exception for unrecoverable failures.

    """
    if not entry['extract_ok']:
        return False

    extracted_data = entry['extracted_data']

    if entry['page_type'] == 'product_detail':
        item = extracted_data['item']
        brand = item['brand_name']
        if brand:
            try:
                brand = models.Brand(brand=brand).ensure()
            except:
                raise

        props = {
            "brand": brand,
            "crawled_at": parse_datetime(entry['crawled_at']),
            "discount_percentage": item['discount_percentage'],
            "name": item['article_name'],
            "on_sale": item['on_sale'],
            "price": item['sale_price'],
            "product_type": item['article_type'],
            "properties": item['extra_props'],
            "sku": item['sku'],
            "url": entry['page_url'],
            "website": website_pk,
            # path=None,
            # listings=[],
        }
        # print(props)
        ## Clean None values
        props = utils.removeNoneValuesFromDict(props)
        # print(props)
        p = models.Product(**props)
        try:
            # p.save()
            p.ensure()
        except models.DuplicateKeyError as error:
            logger.debug("Item already exists: %s - %s - %s [%s]" % (
                props.get("sku"),
                props.get("name"),
                props.get("url"),
                props.get("crawled_at"),
            ))
            return False
        except Exception as e:
            writeErrorFile('detail-%s' % (website_pk), entry['body'])
            raise e

    elif entry['page_type'] == 'product_listing':
        status = True

        number_of_items = extracted_data['number_of_items']
        # number_of_items = len(extracted_data['items'])

        props = {
            "page_number": entry['page_number'],
            "page_listing_size": number_of_items,
            "category": entry['product_category'],
            "sorted_by": entry['ordering'],
            "url": entry['page_url'],
            "crawled_at": parse_datetime(entry['crawled_at']),
            "website": website_pk,
        }

        props = utils.removeNoneValuesFromDict(props)

        pl = models.ProductListingPage(**props)
        try:
            pl.ensure()
            pl_pk = pl.pk
        except models.DuplicateKeyError as error:
            pl = models.ProductListingPage.objects.get(
                dict([(k, v) for k, v in props.items()
                      if k in ('url', 'crawled_at')]))
            pl_pk = pl.pk
        except:
            raise

        # -------------------------------------------------------------------------
        # Assign Items
        # -------------------------------------------------------------------------
        total_items = 0
        not_found_products = 0
        listing_added_total = 0
        insufficent_data = 0
        for i, item in enumerate(extracted_data['items']):
            # -------------------------------------------------------------------------
            # Find Item first
            # -------------------------------------------------------------------------
            detail_page_url = item.get('detail_page_url')
            if not detail_page_url:
                continue

            total_items = total_items + 1
            # -------------------------------------------------------------------------
            # Find matching Product based on detail_page_url
            # -------------------------------------------------------------------------
            try:
                product = models.Product.objects.get({'path': detail_page_url})
            except models.Product.DoesNotExist:
                logger.debug("No Product match found for %s" %
                             (detail_page_url))
                not_found_products = not_found_products + 1
                continue

            try:
                li_props = {
                    "position": i + 1,
                    "price": item['sale_price'],
                    "on_sale": item['on_sale'],
                    "discount_percentage": item['discount_percentage'],
                    "listing_props": item['listing_props'],
                    "listing": pl_pk,
                }
                # -------------------------------------------------------------------------
                # Create Listing Item
                # -------------------------------------------------------------------------
                li = models.ProductListingItem(**li_props)
            except Exception as e:
                writeErrorFile('listing-%s' % (pl_pk), entry['body'])
                logger.error(e)
                insufficent_data = insufficent_data + 1
                continue

            if any([True for l in product.listings if l.listing._id == pl_pk]):
                # print("Listing already added to product")
                listing_added_total = listing_added_total + 1
                continue

            # -------------------------------------------------------------------------
            # Add New Listing ot Product listings
            # -------------------------------------------------------------------------
            product.listings.append(li)

            try:
                product.save()
                listing_added_total = listing_added_total + 1
            except Exception as e:
                logger.error(e)

                writeErrorFile('listing-%s-%s' % (pl_pk, i), entry['body'])

        # -------------------------------------------------------------------------
        # Debug stats
        # -------------------------------------------------------------------------
        logger.debug("""%s: stats (ok:%s/missing:%s/nodata:%s/total:%s)""" % (
            utils.get_url_path(entry['page_url']),
            listing_added_total,
            not_found_products,
            insufficent_data,
            total_items,
        ))

        return True
    else:
        logger.error("Unknown page_type")
        return False

    return True