def _get_search_products(search_term: str) -> List[Dict[str, Any]]: """ Get product list in a json format, given the input search term. :param search_term: product to search :return: list of products """ # Search products in the db with the given tag. products = Product.Product.objects((mongo_visitor.Q(search_term=search_term))) # Extract data to display from the results. res_list = [] logging.error("products") logging.error(len(products)) for product in products: # Get product price. product_prices = ProductPrice.ProductPrice.objects( (mongo_visitor.Q(product_id=str(product.id))) ) # Extract price info. for price in product_prices: item = {} # Extract product info. item["name"] = str(product.name) item["price (€)"] = price.price item["unit price (€)"] = price.unit_price item["unit measure"] = str(price.unit_measure) item["quantity"] = str(product.quantity) item[ "shop_link" ] = f'<a href="{str(price.shop_link)}">{str(price.shop)}</a>' item["image"] = f'<img src="{price.image_link}" width="100" height="100">' res_list.append(item) return res_list
def _process_arg_filters(self, args): """ Fix filter arguments in nested Q objects """ _args = tuple() for arg in args: # Unforunately mongoengine doesn't expose any visitors other than Q, so we have to # extract QCombination from the module itself if isinstance(arg, visitor.Q): # Note: Both of those functions manipulate "filters" variable so the order in which # they are called matters filters, _ = self._process_datetime_range_filters(filters=arg.query) filters = self._process_null_filters(filters=filters) # Create a new Q object with the same filters as the old one _args += (visitor.Q(**filters),) elif isinstance(arg, visitor.QCombination): # Recurse if we need to children = self._process_arg_filters(arg.children) # Create a new QCombination object with the same operation and fixed filters _args += (visitor.QCombination(arg.operation, children),) else: raise TypeError("Unknown argument type '%s' of argument '%s'" % (type(arg), repr(arg))) return _args
def present_in_db(search_term, time_delta): """ Checks if a product is present in the database. :param search_term: user search term :param time_delta: used to decide whether an entry is "old" or not :return: True if there are recent entries in the db related to the search term, False otherwise """ # Look for product into the db. product_prices = ProductPrice.ProductPrice.objects( (mongo_visitor.Q(search_term=search_term))).order_by("date") if len(product_prices) == 0: return False last_insertion_datetime = product_prices[0].date current_datetime = datetime.datetime.now() if current_datetime - last_insertion_datetime > time_delta: return False return True
def filter_queryset(self, request, queryset, view): search_fields = getattr(view, 'search_fields', None) search_terms = self.get_search_terms(request) if not search_fields or not search_terms: return queryset orm_lookups = [ self.construct_search(six.text_type(search_field)) for search_field in search_fields ] if not orm_lookups: return queryset conditions = [] for search_term in search_terms: queries = [ visitor.Q(**{orm_lookup: search_term}) for orm_lookup in orm_lookups ] conditions.append(reduce(operator.or_, queries)) queryset = queryset.filter(reduce(operator.and_, conditions)) return queryset
def process_item(self, item, spider): """ Update the database with item info. Pack the item info as product and product price database instances, insert if not already exist, or update if needed. :param item: item crawled from spider :param spider: spider who crawled the item :return: product item and shop name for the next pipeline """ # Check if the product already exists. product = [] if "id" in item.fields: if item["shop"] == "coop": product = Product.Product.objects( (mongo_visitor.Q(coop_id=item["id"]))) elif item["shop"] == "ah": product = Product.Product.objects( (mongo_visitor.Q(ah_id=item["id"]))) print(product) # Case 1: product already exists in the db. if len(product) > 0: print("Already exists!") product = product[0] # Check if the price is up to date. last_price = ProductPrice.ProductPrice.objects( (mongo_visitor.Q(product_id=product.id) & mongo_visitor.Q(shop=item["shop"]))).order_by("-date")[0] if item["price"] != last_price.price: # Update the price. product.last_update = datetime.datetime.utcnow() product.save() print("Updating the price..") self.update_price(item, product) if item["shop_ranking"] != product[item["shop"] + "_" + "ranking"]: print("Updating the shop ranking..") product[item["shop"] + "_" + "ranking"] = item["shop_ranking"] product.last_update = datetime.datetime.utcnow() product.save() else: # Check for other shops. # # Quick check first. product = Product.Product.objects( (mongo_visitor.Q(name=item["name"]) & mongo_visitor.Q(quantity=item["quantity"]))) if not product: # Check if the product with slightly different name or quantity format already # exists in the db. if len(Product.Product.objects) > 0: for product2 in Product.Product.objects: if product_sim.same_product(product2, item): print( "already exists, with a slightly different name / quantity!" ) product = product2 else: product = product[0] # Case 2: product already exists in another shop. if product: print("already exists, inserting new shop info..") shop = item["shop"] + "_" product[shop + "id"] = item["id"] product[shop + "link"] = item["link"] product[shop + "image"] = item["image"].tobytes() product[shop + "name"] = item["name"] product[shop + "ranking"] = item["shop_ranking"] if item["search_term"] not in product.search_term: product.search_term.append(item["search_term"]) product.last_update = datetime.datetime.utcnow() product.save() # Insert new price instance. self.update_price(item, product) # Case 3: product does not exist in the db. else: print("inserting new object..") product = Product.Product() product.name = item["name"] shop = item["shop"] + "_" product[shop] = shop product["link"] = item["link"] product[shop + "id"] = item["id"] product[shop + "link"] = item["link"] product[shop + "image"] = item["image"].tobytes() product[shop + "ranking"] = item["shop_ranking"] product.search_term = [item["search_term"]] product.quantity = item["quantity"] product.comparison_quantity = item["comparison_quantity"] product.comparison_measure = item["comparison_measure"] # Inserting product price. product.save() # Insert new price instance. self.update_price(item, product) return {"product": copy.deepcopy(product), "shop": item["shop"]}