def generate_annotation_id(self, collection_uri): collection_id = self._get_id(collection_uri) prefix = collection_uri + "/annotation/" if API.last_generated_ann_id is None: query = prepareQuery('select ?annotation where {?annotation a ?type}') annotations = [] g = Store() g.attach_directory(os.path.join(self.basedir, collection_id)) results = g.graph.query(query, initBindings={'type': DADA.Annotation}) for result in results.bindings: annotations.append(int(result["annotation"].toPython().replace(prefix, ""))) API.last_generated_ann_id = max(annotations) API.last_generated_ann_id += 1 return prefix + str(API.last_generated_ann_id)
def search_sparql(self, collection_uri, query): collection_id = self._get_id(collection_uri) output = {"head":{"vars":[]}, "results":{"bindings":[]}} subgraph = Store() subgraph.attach_directory(os.path.join(self.basedir, collection_id)) subgraph.graph.parse(os.path.join(self.basedir, collection_id+".n3"), format='n3') result = subgraph.graph.query(query) for var in result.vars: output["head"]["vars"].append(str(var)) for binding in result.bindings: temp = {} for var in output["head"]["vars"]: temp[var] = {"type":type(binding[var]).__name__, "value":binding[var].toPython()} output["results"]["bindings"].append(temp) return output
def get_products_from_france(self): """get all products from openfoodfact api.""" for category in self.categories: params = { "action": "process", "tagtype_0": "categories", "tag_contains_0": "contains", "tag_0": category, "tagtype_1": "countries", "tag_contains_1": "contains", "tag_1": "france", "page": 1, "page_size": config.PAGE_SIZE, "json": 1, } res = requests.get("https://fr.openfoodfacts.org/cgi/search.pl", params=params) self.result = res.json() self.products = self.result["products"] for product in self.products: # check if all field are presents self.products = [ product.update(categories=category) for product in self.result["products"] ] if not all(tag in product for tag in config.FILTER): continue elif len(product["quantity"]) == 0: continue elif len(product["nutrition_grade_fr"]) == 0: continue elif len(product["stores"]) == 0: continue code = product["code"] product_name = product["product_name"] category_name = product["categories"] nutriscore = product["nutrition_grade_fr"] brands = product["brands"] quantity = product["quantity"] url = product["url"] store_name = product["stores"] # products are injected in database. c1 = Category(id=code, category_name=category_name) p1 = Product( id=code, product_name=product_name, brands=brands, category=c1, nutriscore_fr=nutriscore, quantity=quantity, product_url=url, ) p3 = Store(id=code, store_name=store_name) p1.stores.append(p3) # insert the products in the tables self.session.add(p1) self.session.commit()
def search(self, item): item = quote(item) my_url = self.url + "search/?query=" + item response = requests.get( my_url) # Opens connection, grabs the webpage and downloads it page_html = response.content #Parsing html page_soup = soup(page_html) #grabs each product containers = page_soup.findAll( "div", {"class": "search-result-gridview-item-wrapper"}) # print(len(containers)) storeObj = Store() for container in containers: out_of_stock = len(containers[0].findAll( "div", {"class": "product-sub-title-block product-out-of-stock" })) != 0 if not out_of_stock: store = Store() store.store_name = 'Walmart' store.title = container.img["alt"] store.image_url = container.img["data-image-src"] store.product_url = self.url + container.a["href"] store.price = container.findAll( "span", {"class": "visuallyhidden"})[-1].text storeObj.add_item(store) return storeObj.generate_json()