Ejemplo n.º 1
0
 def process_item(self, good_item, spider):
     if 'ingredients' in good_item:
         ingredients_as_string = good_item['ingredients']
         ingredients_as_string = string_processor.remove_substring_in_paranthesis(
             ingredients_as_string)
         ingredients_fragments = string_processor.split_ingredients(ingredients_as_string)
         #log.msg("ingredients after splitting: {0}".format(ingredients_fragments))
         for fragment in ingredients_fragments:
             if not string_processor.parse_e_additives(fragment):
                 fragment = string_processor.remove_weight(fragment)
                 fragment = string_processor.remove_percents(fragment)
                 agrovoc_match = self.agrovoc_graph.find_ingredient_by_name(fragment.strip())
                 if agrovoc_match:
                     #log.msg('found ingredient {0}'.format(agrovoc_match))
                     good_item['agrovoc_ingredients'] = (
                         good_item.get('agrovoc_ingredients', []) + [agrovoc_match])
                 else:
                     self.not_parsed_fragments[fragment] = (
                         self.not_parsed_fragments.get(fragment, 0) + 1)
     return good_item
Ejemplo n.º 2
0
 def process_item(self, good_item, spider):
     if 'ingredients' in good_item:
         ingredients_as_string = good_item['ingredients']
         ingredients_as_string = string_processor.remove_substring_in_paranthesis(
             ingredients_as_string)
         ingredients_fragments = string_processor.split_ingredients(
             ingredients_as_string)
         #log.msg("ingredients after splitting: {0}".format(ingredients_fragments))
         for fragment in ingredients_fragments:
             if not string_processor.parse_e_additives(fragment):
                 fragment = string_processor.remove_weight(fragment)
                 fragment = string_processor.remove_percents(fragment)
                 agrovoc_match = self.agrovoc_graph.find_ingredient_by_name(
                     fragment.strip())
                 if agrovoc_match:
                     #log.msg('found ingredient {0}'.format(agrovoc_match))
                     good_item['agrovoc_ingredients'] = (
                         good_item.get('agrovoc_ingredients', []) +
                         [agrovoc_match])
                 else:
                     self.not_parsed_fragments[fragment] = (
                         self.not_parsed_fragments.get(fragment, 0) + 1)
     return good_item
Ejemplo n.º 3
0
 def test_nested_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis(
         "abc(d(e)f)")
     self.assertEqual(no_substring_in_paranthesis, "abcf)")
Ejemplo n.º 4
0
 def test_brakets(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis(
         "abc[ddd]")
     self.assertEqual(no_substring_in_paranthesis, "abc[ddd]")
Ejemplo n.º 5
0
 def test_not_oppened_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis(
         "abcdd)d")
     self.assertEqual(no_substring_in_paranthesis, "abcdd)d")
Ejemplo n.º 6
0
 def test_not_closed_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis(
         "abc(ddd")
     self.assertEqual(no_substring_in_paranthesis, "abc(ddd")
Ejemplo n.º 7
0
 def test_one_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis(
         "abc(ddd)")
     self.assertEqual(no_substring_in_paranthesis, "abc")
Ejemplo n.º 8
0
 def test_nested_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis("abc(d(e)f)")
     self.assertEqual(no_substring_in_paranthesis, "abcf)")
Ejemplo n.º 9
0
 def test_brakets(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis("abc[ddd]")
     self.assertEqual(no_substring_in_paranthesis, "abc[ddd]")
Ejemplo n.º 10
0
 def test_not_oppened_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis("abcdd)d")
     self.assertEqual(no_substring_in_paranthesis, "abcdd)d")
Ejemplo n.º 11
0
 def test_not_closed_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis("abc(ddd")
     self.assertEqual(no_substring_in_paranthesis, "abc(ddd")
Ejemplo n.º 12
0
 def test_one_paranthesis(self):
     no_substring_in_paranthesis = remove_substring_in_paranthesis("abc(ddd)")
     self.assertEqual(no_substring_in_paranthesis, "abc")