Example #1
0
class PersistentRDFPipeline(object):
    def __init__(self):
        self.tempdir = mktemp()
        self.graph = FoodpediaGraph(ConjunctiveGraph(store="Sleepycat"))

        rt = self.graph.open(self.tempdir, create=False)
        if rt == NO_STORE:
            # There is no underlying Sleepycat infrastructure, create it
            self.graph.open(self.tempdir, create=True)
            self.graph.bind_default_namespaces()
        else:
            assert rt == VALID_STORE, "The underlying store is corrupt"
        log.msg("Temporal directory for persistent storage of parsed items: {0}".format(self.tempdir), level=log.INFO)
        log.msg("Triples in graph before add: {0}".format(len(self.graph)), level=log.INFO)

    def process_item(self, item, spider):
        self.graph.add_good_item(item)
        self.graph.commit()

    def close_spider(self, spider):
        output_filename = spider.settings.get("OUTPUT_FILENAME")
        output_filename = output_filename if output_filename else "data.ttl"

        log.msg("Triples in graph after add: {0}".format(len(self.graph)), level=log.INFO)
        with open(output_filename, 'w') as output_file:
            log.msg("serialize the graph to {0}".format(output_filename))
            self.graph.serialize(output_file, format='turtle')
        self.graph.close()

        log.msg("Clean up the temp directory '{0}' to remove the Sleepycat database files".format(self.tempdir),
                level=log.INFO)
        for f in os.listdir(self.tempdir):
            os.unlink(os.path.join(self.tempdir, f))
        os.rmdir(self.tempdir)
Example #2
0
class InMemoryRDFPipeline(object):
    def __init__(self):
        self.graph = FoodpediaGraph(ConjunctiveGraph(store="IOMemory"))
        self.graph.bind_default_namespaces()

    def process_item(self, item, spider):
        self.graph.add_good_item(item)

    def close_spider(self, spider):
        output_filename = spider.settings.get("OUTPUT_FILENAME")
        output_filename = output_filename if output_filename else "data.ttl"

        with open(output_filename, 'w') as output_file:
            log.msg("serialize the graph to {0}".format(output_filename))
            self.graph.serialize(output_file, format='turtle')
        self.graph.close()
class TestFoodpediaGraph(TestCase):
    def setUp(self):
        self.foodpedia_graph = FoodpediaGraph(Graph())

    def test_bind_default_namespaces(self):
        self.foodpedia_graph.bind_default_namespaces()

        self.assertIn(("food", Namespace("http://purl.org/foodontology#")), self.foodpedia_graph.get_namespaces())
        self.assertIn(("foodpedia-owl", Namespace("http://foodpedia.tk/ontology#")), self.foodpedia_graph.get_namespaces())
        self.assertIn(("gr", Namespace("http://purl.org/goodrelations/v1#")), self.foodpedia_graph.get_namespaces())

    def test_add_good_item_added_uri(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111"))

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"), RDF.type, URIRef("http://purl.org/foodontology#Food")
        )

    def test_add_good_item_added_barcode(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111"))

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#hasEAN_UCC-13"),
            Literal("1111")
        )

    def test_add_name_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111"))

        self.foodpedia_graph.add_name_to_good("1111", "test")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("test", lang="ru")
        )

    def test_add_name_to_not_existing_good(self):
        self.foodpedia_graph.add_name_to_good("2222", "test")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/2222"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("test", lang="ru")
        )

    def test_add_name_to_good_with_existing_name(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111", name="existing_name"))

        self.foodpedia_graph.add_name_to_good("1111", "second_name")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("existing_name", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("second_name", lang="ru")
        )

    def test_add_english_name_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111"))

        self.foodpedia_graph.add_name_to_good("1111", "test", "en")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("test", lang="en")
        )

    def test_add_best_before_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="333"))

        self.foodpedia_graph.add_best_before_to_good("333", "6 мес.")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/333"),
            URIRef("http://foodpedia.tk/ontology#best_before"),
            Literal("6 мес.", lang="ru")
        )

    def test_add_comment_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="333"))

        self.foodpedia_graph.add_comment_to_good("333", "кусочки мяса птицы на костях от разных частей тушки")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/333"),
            URIRef("http://purl.org/goodrelations/v1#description"),
            Literal("кусочки мяса птицы на костях от разных частей тушки", lang="ru")
        )

    def test_add_ingridienta_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="333"))

        self.foodpedia_graph.add_ingridients_to_good("333", "мясо птицы")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/333"),
            URIRef("http://purl.org/foodontology#ingredientsListAsText"),
            Literal("мясо птицы", lang="ru")
        )

    def test_add_netto_weight_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="4444"))

        self.foodpedia_graph.add_netto_weight_to_good("4444", "1,00 кг")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/4444"),
            URIRef("http://foodpedia.tk/ontology#netto_mass"),
            Literal("1,00 кг", lang="ru")
        )

    def test_add_standart_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="4444"))

        self.foodpedia_graph.add_standart_to_good("4444", "14192-96, ТУ 9214-212-2347684-10")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/4444"),
            URIRef("http://foodpedia.tk/ontology#standart"),
            Literal("14192-96, ТУ 9214-212-2347684-10", lang="ru")
        )

    def test_add_store_conditions_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="5678"))

        self.foodpedia_graph.add_store_conditions_to_good("5678", "Условия хранения от -18 градусов С")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/5678"),
            URIRef("http://foodpedia.tk/ontology#store_cond"),
            Literal("Условия хранения от -18 градусов С", lang="ru")
        )

    def test_add_esl_as_string_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="9"))

        self.foodpedia_graph.add_esl_as_string_to_good("9", "Белки: не менее 19,00 г Жиры: не более 11,50 г Энергетическая ценность:  179,50 ккал")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/9"),
            URIRef("http://foodpedia.tk/ontology#esl"),
            Literal("Белки: не менее 19,00 г Жиры: не более 11,50 г Энергетическая ценность:  179,50 ккал", lang="ru")
        )

    def test_add_pack_type_to_good(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="0"))

        self.foodpedia_graph.add_pack_type_to_good("0", "Пакет пластиковый, металлизированный, многослойный")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/0"),
            URIRef("http://foodpedia.tk/ontology#pack_type"),
            Literal("Пакет пластиковый, металлизированный, многослойный", lang="ru")
        )

    def test_add_string_object_ru(self):
        self.foodpedia_graph.add_string_object(
            URIRef("http://foodpedia.tk/resource/999"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            "bla-bla-bla",
            "ru"
        )

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/999"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("bla-bla-bla", lang="ru")
        )

    def test_add_string_object_en(self):
        self.foodpedia_graph.add_string_object(
            URIRef("http://foodpedia.tk/resource/888"),
            URIRef("http://purl.org/goodrelations/v1#"),
            "bla-bla-bla",
            "en"
        )

    def test_add_calories_as_double(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="10"))

        self.foodpedia_graph.add_calories_as_double_to_good("10", 179.5)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/10"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            Literal(179.5, datatype=XSD.double)
        )

    def test_add_calories_as_double_passed_as_string(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="10"))

        self.foodpedia_graph.add_calories_as_double_to_good ("10", "179.5")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/10"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            Literal(179.5, datatype=XSD.double)
        )

    def test_add_calories_as_double_passed_as_integer(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="10"))

        self.foodpedia_graph.add_calories_as_double_to_good("10", 179)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/10"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            Literal(179, datatype=XSD.double)
        )

    def test_add_fats_as_double(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="999999999999999999"))

        self.foodpedia_graph.add_fats_as_double_to_good("999999999999999999", 11.5)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/999999999999999999"),
            URIRef("http://purl.org/foodontology#fatPer100gAsDouble"),
            Literal(11.5, datatype=XSD.double)
        )

    def test_add_proteins_as_double(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="8"))

        self.foodpedia_graph.add_proteins_as_double_to_good("8", 19.00)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/8"),
            URIRef("http://purl.org/foodontology#proteinsPer100gAsDouble"),
            Literal(19.0, datatype=XSD.double)
        )

    def test_add_carbohydrates_as_double(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="8"))

        self.foodpedia_graph.add_carbohydrates_as_double_to_good("8", 19.00)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/8"),
            URIRef("http://purl.org/foodontology#carbohydratesPer100gAsDouble"),
            Literal(19.0, datatype=XSD.double)
        )

    def test_add_double_object(self):
        self.foodpedia_graph.add_double_object(
            URIRef("http://foodpedia.tk/resource/678"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            179.5
        )

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/678"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            Literal(179.5, datatype=XSD.double)
        )

    def test_add_eadditive(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="100500"))

        self.foodpedia_graph.add_eadditive_to_good("100500", "E401")

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/100500"),
            URIRef("http://purl.org/foodontology#containsIngredient"),
            URIRef("http://foodpedia.tk/resource/E401")
        )

    def test_convert_eadditive_name_to_uri(self):
        eadditive_name = "E401"
        uri = FoodpediaGraph.convert_eadditive_name_to_uri(eadditive_name)
        self.assertEqual(URIRef("http://foodpedia.tk/resource/E401"), uri)

    def test_convert_string_barcode_to_uri(self):
        barcode = "1234"
        uri = FoodpediaGraph.convert_barcode_to_uri(barcode)
        self.assertEqual(URIRef("http://foodpedia.tk/resource/1234"), uri)

    def test_convert_int_barcode_to_uri(self):
        barcode = 1234
        uri = FoodpediaGraph.convert_barcode_to_uri(barcode)
        self.assertEqual(URIRef("http://foodpedia.tk/resource/1234"), uri)

    def test_add_good_item_added_properties(self):
        good_item = GoodItem()
        good_item["goodsmatrix_url"] = "http://www.goodsmatrix.ru/goods/4600605021002.html"
        good_item["name"] = "supergood"
        good_item["barcode"] = "1111"
        good_item["best_before"] = "24 hours"
        good_item["comment"] = "description?"
        good_item["ingredients"] = "salt"
        good_item["netto_weight"] = "100500"
        good_item["standart"] = "TU-TU-TU"
        good_item["store_conditions"] = "dark side of the World"
        good_item["esl_as_string"] = "bla-bla-bla"
        good_item["proteins_as_double"] = 123.4
        good_item["fats_as_double"] = 56.7
        good_item["carbohydrates_as_double"] = 8.9
        good_item["calories_as_double"] = 0
        good_item["pack_type"] = "do not know"
        self.foodpedia_graph.add_good_item(good_item)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#name"),
            Literal("supergood", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#hasEAN_UCC-13"),
            Literal("1111")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#best_before"),
            Literal("24 hours", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#description"),
            Literal("description?", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#ingredientsListAsText"),
            Literal("salt", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#netto_mass"),
            Literal("100500", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#standart"),
            Literal("TU-TU-TU", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#store_cond"),
            Literal("dark side of the World", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#esl"),
            Literal("bla-bla-bla", lang="ru")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#proteinsPer100gAsDouble"),
            Literal(123.4, datatype=XSD.double)
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#fatPer100gAsDouble"),
            Literal(56.7, datatype=XSD.double)
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#carbohydratesPer100gAsDouble"),
            Literal(8.9, datatype=XSD.double)
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#energyPer100gAsDouble"),
            Literal(0, datatype=XSD.double)
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://foodpedia.tk/ontology#pack_type"),
            Literal("do not know", lang="ru")
        )

    def test_add_good_item_does_not_add_missed_properties(self):
        self.foodpedia_graph.add_good_item(GoodItem(barcode="1111"))

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"), RDF.type, URIRef("http://purl.org/foodontology#Food")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/goodrelations/v1#hasEAN_UCC-13"),
            Literal("1111")
        )
        self.assertEqual(len(self.foodpedia_graph), 2)

    def test_add_good_item_throws_key_error_on_missed_barcode(self):
        good_item = GoodItem()

        with self.assertRaises(KeyError):
            self.foodpedia_graph.add_good_item(good_item)

    def test_add_good_item_does_not_add_not_correct_property(self):
        good_item_like_dict = {"barcode": "1111", "not_correct_property": "not_correct_property"}

        self.foodpedia_graph.add_good_item(good_item_like_dict)

        self.assertFalse(filter(lambda x: "not_correct_property" in x, self.foodpedia_graph.objects()))

    def test_add_good_item_adds_eadditives_to_graph(self):
        good_item = GoodItem(barcode="1111", e_additives=["E100", "E101"])

        self.foodpedia_graph.add_good_item(good_item)

        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#containsIngredient"),
            URIRef("http://foodpedia.tk/resource/E100")
        )
        self.assertTripleInGraph(
            URIRef("http://foodpedia.tk/resource/1111"),
            URIRef("http://purl.org/foodontology#containsIngredient"),
            URIRef("http://foodpedia.tk/resource/E101")
        )

    def assertTripleInGraph(self, *triple):
        def graph_to_string(graph):
            return ",\n".join(str((s, p, o)) for s, p, o in graph)

        self.assertIn(triple, self.foodpedia_graph,
                      msg="the triple \n{0}\n not found in the graph \n{1}\n".format(
                          triple, graph_to_string(self.foodpedia_graph)
                          )
                      )

    def tearDown(self):
        self.foodpedia_graph.close()