Example #1
0
def test_generate_category_pistol_returns_correct_category():
    url = "https://www.2alfa.no/ammunisjon/ladet-ammo/pistol.hml"

    result = Category.extract(url)

    assert result != None
    assert result == Category.HANDGUN
Example #2
0
def test_generate_category_rifle():
    url = "/produkter/ammunisjon/rifle"

    result = Category.extract(url)

    assert result != None
    assert result == Category.RIFLE
Example #3
0
def test_generate_category_shotgun():
    url = "/produkter/ammunisjon/haandvaapen"

    result = Category.extract(url)

    assert result != None
    assert result == Category.HANDGUN
Example #4
0
def test_generate_category_shotgun():
    url = "/produkter/ammunisjon/hagle"

    result = Category.extract(url)

    assert result != None
    assert result == Category.SHOTGUN
Example #5
0
    def fetch(self):
        elements = []
        for url in self.urls:
            compounded_url = f"{self.root_url}{url}"
            # print(f"Fetching data @ {compounded_url}")
            response = requests.get(compounded_url)

            # 2Alpha has a bit more cryptioc site setup
            soup = BeautifulSoup(response.content, "html.parser")
            data = soup.find_all("div", class_="ut2-gl__body")

            for container in data:
                # Extract image info
                image_div = container.find("div", class_="ut2-gl__image")

                image_url = image_div.find("img", class_="ty-pict")["src"]

                # Get the first anchor which has the details link
                details_url = image_div.find("a")["href"]

                # Extract name
                name_div = container.find("div", class_="ut2-gl__name")
                name = name_div.find("a")["title"]

                # Extract price
                price_div = container.find("div", class_="ut2-gl__price")
                price_str = price_div.find("span", class_="ty-price-num").text

                product = Product(cat=Category.extract(compounded_url),
                                  img_url=image_url,
                                  price=price_str,
                                  name=name,
                                  details_url=details_url)
                elements.append(product)
        return elements
Example #6
0
    def fetch(self):
        elements = []
        for url in urls:
            compounded_url = f"{root_url}{url}"
            print(f"Fetching data @ {compounded_url}")
            response = requests.get(compounded_url)

            soup = BeautifulSoup(response.content, "html.parser")

            data = soup.find_all("div", class_="ProdItem")

            for container in data:
                # print(container)
                image_link = container.find("img")["src"]
                # print(image_link)
                name = container.find("a", class_="ItemTitleLink")["title"]
                # print(title)
                details_url = container.find("a",
                                             class_="ItemTitleLink")["href"]
                # print(title_link)
                price_string = container.find("span", class_="Price").text
                # print(price_string)

                product = Product(cat=Category.extract(compounded_url),
                                  img_url=f"{root_url}{image_link}",
                                  price=price_string,
                                  name=name,
                                  details_url=details_url)

                elements.append(product)

        return elements
Example #7
0
    def add_sentence(self, sentence, categoryName):
        sentence = self.preprocess_sentence(sentence)

        category = self._categories.get(categoryName)

        if category is None:
            category = Category(categoryName)
            for word in get_words(sentence):
                category.add_words(word)
                if word not in self._words:
                    self._words.append(word)
            self._categories[categoryName] = category
            category.setValueN(len(get_words(sentence)))
            category.setNumberOfSentence(1)
        else:
            for word in get_words(sentence):
                category.add_words(word)
                if word not in self._words:
                    self._words.append(word)
            category.setValueN(len(get_words(sentence)) + category.getValueN())
            category.setNumberOfSentence(category.getNumberOfSentence()+1)