Exemple #1
0
def test_default_with_datatype():
    csvw = CSVW(
        csv_path='tests/virtual1.csv',
        metadata_path='tests/virtual1.default.datatype.csv-metadata.json')
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    ns = Namespace("http://example.org/")

    for x in [1, 2]:
        active_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['active'], None)))
        assert len(active_vals) == 1
        active_val = active_vals[0][2]
        assert isinstance(active_val, Literal)
        assert active_val.datatype == XSD.boolean
        assert active_val.value

        string_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['stringprop1'], None)))
        assert len(string_vals) == 1
        string_val = string_vals[0][2]
        assert isinstance(string_val, Literal)
        assert string_val.value == "some string"

        string_vals = list(
            g.triples((ns['sub-{}'.format(x)], ns['stringprop2'], None)))
        assert len(string_vals) == 1
        string_val = string_vals[0][2]
        assert isinstance(string_val, Literal)
        assert "%20" not in string_val.value
    def test(self):
        metadata = None
        if 'metadata' in option:
            metadata = option['metadata']

        try:
            csvw = CSVW(csv_file, metadata_url=metadata)
            
            
        except Exception as e:
            # this should be a negative test
            if TYPES[type]:
                traceback.print_exc()
            self.assertFalse(TYPES[type])
            return

        # if we get here this should be a positive test
        self.assertTrue(TYPES[type])

        # if we can parse it we should at least produce some embedded metadata
        self.assertNotEqual(csvw.metadata, None)
        # and the result should exists
        self.assertNotEqual(result_url, None)


        gr = Graph()
        result = gr.parse(result_url)
        converted_result = csvw.to_rdf()
    
        result.serialize('output_rdf/' + name + '.ttl', format='turtle')
        converted_result.serialize('output_rdf/generated' + name + '.ttl', format='turtle')
        
        self.assertTrue(compare.isomorphic(result, converted_result))
    def test(self):
        metadata = None
        if 'metadata' in option:
            metadata = option['metadata']

        try:
            csvw = CSVW(csv_file, metadata_url=metadata)
        except Exception as e:
            # this should be a negative test
            if TYPES[type]:
                traceback.print_exc()
            self.assertFalse(TYPES[type])
            return

        # if we get here this should be a positive test
        self.assertTrue(TYPES[type])

        # if we can parse it we should at least produce some embedded metadata
        self.assertNotEqual(csvw.metadata, None)
        # and the result should exists
        self.assertNotEqual(result_url, None)

        # test the json result

        resp = urllib2.urlopen(result_url)
        result = json.loads(resp.read())
        self.assertEqual(csvw.to_json(), result)
    def test(self):
        metadata = None
        if 'metadata' in option:
            metadata = option['metadata']

        try:
            csvw = CSVW(csv_file, metadata_url=metadata)
        except Exception as e:
            # this should be a negative test
            if TYPES[type]:
                traceback.print_exc()
            self.assertFalse(TYPES[type])
            return

        # if we get here this should be a positive test
        self.assertTrue(TYPES[type])

        # if we can parse it we should at least produce some embedded metadata
        self.assertNotEqual(csvw.metadata, None)
        # and the result should exists
        self.assertNotEqual(result_url, None)

        # test the json result

        resp = urllib2.urlopen(result_url)
        result = json.loads(resp.read())
        self.assertEqual(csvw.to_json(), result)
def test_null_values_with_single_string():
    csvw = CSVW(csv_path="tests/null1.csv",
                metadata_path="tests/null1.single.csv-metadata.json")
    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    # There should be no subject NA
    all_subjects = {x for x in g.subjects()}
    assert subj_ns['null_key'] not in all_subjects
    assert subj_ns['1'] in all_subjects
    assert len(all_subjects) == 4

    # Null valued objects should not be created
    all_objects = {x for x in g.objects()}
    assert Literal('null_key', datatype=XSD.token) not in all_objects
    assert Literal('null_sector') not in all_objects
    assert Literal('null_id', datatype=XSD.token) not in all_objects
    assert Literal('PUBLIC') in all_objects
    assert Literal('12', datatype=XSD.token) in all_objects

    # Spot check some triples do not exist but other do from the same row
    null_key_lit = Literal('null_id', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['2'], id_uri, null_key_lit)))) == 0

    priv_lit = Literal('PRIVATE')
    assert len(list(g.triples((subj_ns['2'], sect_uri, priv_lit)))) == 1

    null_sector_lit = Literal('null_sector')
    assert len(list(g.triples((subj_ns['3'], sect_uri, null_sector_lit)))) == 0

    twelve_lit = Literal('12', datatype=XSD.token)
    assert len(list(g.triples((subj_ns['3'], id_uri, twelve_lit)))) == 1
Exemple #6
0
def test_literals_with_new_lines():
    csv_path = "tests/parsing.quoted_newlines.csv"
    metadata_path = "tests/parsing.quoted_newlines.csv-metadata.json"
    csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path)

    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    ns = Namespace("http://example.org/expense/")
    desc = URIRef("http://example.org/desc")

    taxi_triples = list(g.triples((ns['taxi'], desc, None)))
    assert len(taxi_triples) == 1
    taxi_desc = taxi_triples[0][2]
    assert isinstance(taxi_desc, Literal)
    assert len(taxi_desc.value.splitlines()) == 2

    flight = URIRef("http://example.org/expense/multi-hop%20flight")
    flight_triples = list(g.triples((flight, desc, None)))
    assert len(flight_triples) == 1
    flight_desc = flight_triples[0][2]
    assert isinstance(flight_desc, Literal)
    assert len(flight_desc.value.splitlines()) == 4

    dinner_triples = list(g.triples((ns['dinner'], desc, None)))
    assert len(dinner_triples) == 1
    dinner_desc = dinner_triples[0][2]
    assert isinstance(dinner_desc, Literal)
    assert u'\u2019' in dinner_desc, "Expected to read unicode characters"
    assert u"('')" in dinner_desc, "Expected to read apostrophes"
Exemple #7
0
def test_json_generation():
    """Will remove this test when we add json generation support."""
    csvw = CSVW(csv_path="tests/simple.csv",
                metadata_path="tests/simple.csv-metadata.json")
    with pytest.raises(NotImplementedError) as exc:
        csvw.to_json()
    assert "JSON generation" in str(exc.value)
def test_single_table_using_path():
    csv_path = "tests/simple.csv"
    metadata_path = "tests/simple.csv-metadata.json"

    csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
def test_empty():

    csvw = CSVW(csv_path="tests/empty.csv",
                metadata_path="tests/empty.csv-metadata.json")
    rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    assert len(g) == 0
Exemple #10
0
def test_multiple_tables_through_paths():
    metadata_path = "tests/multiple_tables.csv-metadata.json"
    csv1_path = "tests/multiple_tables.Name-ID.csv"
    csv2_path = "tests/multiple_tables.ID-Age.csv"

    with open(metadata_path, 'r') as metadata_f:
        metadata = io.StringIO(text(metadata_f.read()))

    csvw = CSVW(csv_path=(csv1_path, csv2_path), metadata_handle=metadata)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
def test_single_table_using_handles():
    csv_path = "tests/simple.csv"
    metadata_path = "tests/simple.csv-metadata.json"

    with io.open(csv_path) as csv1_f, io.open(metadata_path,
                                              'r') as metadata_f:
        csv_handle = io.StringIO(csv1_f.read())
        metadata = io.StringIO(metadata_f.read())

    csvw = CSVW(csv_handle=csv_handle, metadata_handle=metadata)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
Exemple #12
0
def test_tmp_files():
    tmp_dir = tempfile.mkdtemp(dir="/tmp")
    assert len(os.listdir(tmp_dir)) == 0
    csvw = CSVW(csv_path="./tests/books.csv",
                metadata_path="./tests/books.csv-metadata.json",
                temp_dir=tmp_dir)
    assert len(os.listdir(tmp_dir)) == 0

    csvw.to_rdf(fmt="nt")
    created_files = os.listdir(tmp_dir)
    assert len(created_files
               ) == 1, "nt serialization should generate only 1 temp file"
    assert created_files[0].endswith(".nt")

    os.remove(os.path.join(tmp_dir, created_files[0]))
    assert len(os.listdir(tmp_dir)) == 0

    csvw.to_rdf(fmt="turtle")
    created_files = os.listdir(tmp_dir)
    assert len(
        created_files) == 2, "ttl serialization should generate two temps file"
    assert any([f.endswith(".nt") for f in created_files])
    assert any([f.endswith(".ttl") for f in created_files])
    # Check permissions
    expected_flags = [stat.S_IRUSR, stat.S_IRGRP, stat.S_IROTH]
    unexpected_flags = [stat.S_IWUSR, stat.S_IWGRP, stat.S_IWOTH]
    for f in created_files:
        st = os.stat(os.path.join(tmp_dir, f))
        for flag, non_flag in zip(expected_flags, unexpected_flags):
            assert bool(st.st_mode & flag)
            assert not bool(st.st_mode & non_flag)

    csvw.close()
    assert len(os.listdir(tmp_dir)) == 0
Exemple #13
0
def test_multiple_tables_through_handles():
    metadata_path = "tests/multiple_tables.csv-metadata.json"
    csv1_path = "tests/multiple_tables.Name-ID.csv"
    csv2_path = "tests/multiple_tables.ID-Age.csv"

    with io.open(metadata_path, 'r') as metadata_f, io.open(
            csv1_path) as csv1_f, io.open(csv2_path) as csv2_f:
        metadata = io.StringIO(metadata_f.read())
        csv1 = io.StringIO(csv1_f.read())
        csv2 = io.StringIO(csv2_f.read())

    csvw = CSVW(csv_handle=[csv1, csv2], metadata_handle=metadata)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
Exemple #14
0
def test_default():
    csvw = CSVW(csv_path='tests/virtual1.csv',
                metadata_path='tests/virtual1.default.csv-metadata.json')
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    all_subjects = {x for x in g.subjects()}
    assert len(all_subjects) == 4

    ns = Namespace("http://example.org/")
    assert ns['sub-1'] in all_subjects
    assert ns['sub-2'] in all_subjects
    assert len([g.triples((ns['sub-1'], ns['obj-1'], ns['myvalue']))]) == 1
    assert len([g.triples((ns['sub-2'], ns['obj-2'], ns['myvalue']))]) == 1
Exemple #15
0
def test_negative_both_default_or_value():
    with pytest.raises(BothDefaultAndValueUrlError):
        print(
            CSVW(csv_path='tests/virtual1.csv',
                 metadata_path=
                 'tests/virtual1.BothDefaultAndValueUrlError.csv-metadata.json'
                 ))
def test_single_table_using_url(mock_urlopen):
    csv_path = "tests/simple.csv"
    metadata_path = "tests/simple.csv-metadata.json"
    csv_url = "http://example.org/simple.csv"

    with io.open(csv_path) as csv1_f:
        csv1 = text(csv1_f.read())

    reader = Mock()
    reader.read.side_effect = [csv1]
    mock_urlopen.return_value = reader

    csvw = CSVW(csv_url=csv_url, metadata_path=metadata_path)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
Exemple #17
0
def test_time():

    with CSVW(csv_path="tests/datatypes.time.csv",
              metadata_path="tests/datatypes.time.csv-metadata.json") as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    NS = Namespace('https://www.example.org/')

    time1_lit = Literal("19:30:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time1'], time1_lit)))) == 1

    time2_lit = Literal("09:30:10.5", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time2'], time2_lit)))) == 1

    time3_lit = Literal("10:30:10Z", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time3'], time3_lit)))) == 1

    time4_lit = Literal("11:30:10-06:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time4'], time4_lit)))) == 1

    time5_lit = Literal("04:30:10+04:00", datatype=XSD.time)
    assert len(list(g.triples((NS['event/1'], NS['time5'], time5_lit)))) == 1
Exemple #18
0
def test_datetime():
    with CSVW(csv_path="tests/datatypes.datetime.csv",
              metadata_path="tests/datatypes.datetime.csv-metadata.json"
              ) as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    dt1_lit = Literal("2002-05-30T09:00:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime1'], dt1_lit)))) == 1

    dt2_lit = Literal("2002-05-30T09:30:10.5", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime2'], dt2_lit)))) == 1

    dt3_lit = Literal("2002-05-30T09:30:10Z", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime3'], dt3_lit)))) == 1

    dt4_lit = Literal("2002-05-30T09:30:10-06:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime4'], dt4_lit)))) == 1

    dt5_lit = Literal("2002-05-30T09:30:10+04:00", datatype=XSD.dateTime)
    assert len(list(g.triples((NS['event/1'], NS['datetime5'], dt5_lit)))) == 1

    datestamp = Literal("2004-04-12T13:20:00-05:00",
                        datatype=XSD.dateTimeStamp)
    assert len(list(g.triples(
        (NS['event/1'], NS['datetimestamp'], datestamp)))) == 1
Exemple #19
0
def test_context_mgr():
    tmp_dir = tempfile.mkdtemp(dir="/tmp")
    assert len(os.listdir(tmp_dir)) == 0

    with CSVW(csv_path="./tests/books.csv",
              metadata_path="./tests/books.csv-metadata.json",
              temp_dir=tmp_dir) as csvw:
        assert len(os.listdir(tmp_dir)) == 0

        csvw.to_rdf(fmt="nt")
        created_files = os.listdir(tmp_dir)
        assert len(created_files
                   ) == 1, "nt serialization should generate only 1 temp file"
        assert created_files[0].endswith(".nt")

        os.remove(os.path.join(tmp_dir, created_files[0]))
        assert len(os.listdir(tmp_dir)) == 0

        csvw.to_rdf(fmt="turtle")
        created_files = os.listdir(tmp_dir)
        assert len(created_files
                   ) == 2, "ttl serialization should generate two temps file"
        assert any([f.endswith(".nt") for f in created_files])
        assert any([f.endswith(".ttl") for f in created_files])

    assert len(os.listdir(tmp_dir)) == 0
Exemple #20
0
def test_encoding_rdf():
    # With encoding specified
    encoding = "ISO-8859-1"
    csvw = CSVW(csv_path="./tests/iso_encoding.csv",
                metadata_path="./tests/iso_encoding.csv-metadata.json",
                csv_encoding=encoding)
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    units = Namespace('http://example.org/units/')
    cars = Namespace('http://example.org/cars/')
    meta = Namespace("http://example.org/properties/")

    expected_unit = units[quote(u"\xb5100".encode('utf-8'))]
    assert (cars['1'], meta['UnitOfMeasurement'], expected_unit) in g
    assert expected_unit in list(g.objects())
Exemple #21
0
def test_metadata_mismatch():
    csv_path = "tests/negative.metadata_mismatch.csv"

    csvw1 = CSVW(
        csv_path=csv_path,
        metadata_path=
        "tests/negative.NumberOfNonVirtualColumnsMismatch1.csv-metadata.json")
    csvw2 = CSVW(
        csv_path=csv_path,
        metadata_path=
        "tests/negative.NumberOfNonVirtualColumnsMismatch2.csv-metadata.json")

    with pytest.raises(NumberOfNonVirtualColumnsMismatch) as exc:
        print(csvw1.to_rdf())
    assert "metadata, 2" in str(exc.value)
    assert "row 1, 3" in str(exc.value)

    with pytest.raises(NumberOfNonVirtualColumnsMismatch) as exc:
        print(csvw2.to_rdf())
    assert "metadata, 4" in str(exc.value)
    assert "row 1, 3" in str(exc.value)

    with pytest.raises(VirtualColumnPrecedesNonVirtualColumn) as exc:
        CSVW(
            csv_path=csv_path,
            metadata_path=
            'tests/negative.VirtualColumnPrecedesNonVirtualColumn.csv-metadata.json'
        )
    assert "t2" in str(exc.value)
def process_csv(csv_file, metadata_file, riotpath, g = None):
    # Generate RDF from a CSV file and metadata file pair, 
    # csv_file is a path string
    # metadata-file is a path string path optionally merging it into a Graph

    if g is None:
        g = Graph()

    csvw = CSVW(csv_path = csv_file, metadata_path = metadata_file, riot_path = riotpath)

    with NamedTemporaryFile() as f:
        rdf_output = csvw.to_rdf_files([ (f,'turtle') ])
        f.seek(0)
        g.parse(f, format = 'ttl')

    csvw.close()

    return g
Exemple #23
0
def test_others(metadata_file):
    with CSVW(csv_path="tests/datatypes.others.csv",
              metadata_path=metadata_file) as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    triples_to_look_for = [
        (NS['custom_pred'], "someformatteddata",
         URIRef("https://www.datatypes.org#mycustomdatatypedefinition")),
        (NS["anyURI"], "https://www.sampleuri.org", XSD.anyURI),
        (NS["base64Binary"], "0FB8", XSD.base64Binary),
        (NS['boolean1'], True, XSD.boolean),
        (NS['boolean2'], False, XSD.boolean),
        (NS['boolean3'], True, XSD.boolean),
        (NS['boolean4'], False, XSD.boolean),
        (NS['integer'], -3, XSD.integer),
        (NS['long'], -1231235555, XSD.long),
        (NS['int'], 3, XSD.int),
        (NS['short'], -1231, XSD.short),
        (NS['byte'], 45, XSD.byte),
        (NS['nonNegativeInteger'], 111, XSD.nonNegativeInteger),
        (NS['positiveInteger'], 123456, XSD.positiveInteger),
        (NS['unsignedLong'], 3456, XSD.unsignedLong),
        (NS['unsignedInt'], 7890000, XSD.unsignedInt),
        (NS['unsignedShort'], 65000, XSD.unsignedShort),
        (NS['unsignedByte'], 254, XSD.unsignedByte),
        (NS['nonPositiveInteger'], -123, XSD.nonPositiveInteger),
        (NS['negativeInteger'], -34500000, XSD.negativeInteger),
        (NS['decimal'], "+3.5", XSD.decimal),
        (NS['double'], "4268.22752E11", XSD.double),
        (NS['float'], "+24.3e-3", XSD.float),
        (NS['duration'], "P2Y6M5DT12H35M30S", XSD.duration),
        (NS['dayTimeDuration'], "P1DT2H", XSD.dayTimeDuration),
        (NS['yearMonthDuration'], "P0Y20M", XSD.yearMonthDuration),
        (NS['gDay'], "---02", XSD.gDay),
        (NS['gMonth'], "--04", XSD.gMonth),
        (NS['gMonthDay'], "--04-12", XSD.gMonthDay),
        (NS['gYear'], "2004", XSD.gYear),
        (NS['gYearMonth'], "2004-04", XSD.gYearMonth),
        (NS['hexBinary'], "0FB8", XSD.hexBinary),
        (NS['QName'], "myElement", XSD.QName),
        (NS['normalizedString'], "This is a normalized string!",
         XSD.normalizedString),
        (NS['token'], "token", XSD.token),
        (NS['language'], "en", XSD.language),
        (NS['Name'], "_my.Element", XSD.Name),
        (NS['NMTOKEN'], "123_456", XSD.NMTOKEN),
        (NS['xml'], "<a>bla</a>", RDF.XMLLiteral),
        (NS['html'], "<div><p>xyz</p></div>", RDF.HTML),
        (NS['json'], "{}", CSVW_NS.JSON),
    ]
    for pred, lit_val, lit_type in triples_to_look_for:
        lit = Literal(lit_val, datatype=lit_type)
        assert len(list(g.triples(
            (NS['event/1'], pred, lit)))) == 1, "Failed for {}".format(pred)
Exemple #24
0
def test_bool_with_format():
    csvw = CSVW(csv_path="tests/datatypes.bool.csv",
                metadata_path="tests/datatypes.bool.csv-metadata.json")
    rdf_output = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    true_lit = Literal(True, datatype=XSD.boolean)
    false_lit = Literal(False, datatype=XSD.boolean)

    assert len(list(g.triples((NS['event/1'], NS['bool1'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/1'], NS['bool2'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/1'], NS['bool3'], true_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool1'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool2'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/2'], NS['bool3'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool1'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool2'], false_lit)))) == 1
    assert len(list(g.triples((NS['event/3'], NS['bool3'], false_lit)))) == 1
def test_null_values_with_multiple_strings():
    csvw = CSVW(csv_path="tests/null1.csv",
                metadata_path="tests/null1.multiple.csv-metadata.json")
    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    all_objects = {x for x in g.objects()}

    assert Literal('null_key', datatype=XSD.token) not in all_objects
    assert Literal('null_sector') not in all_objects
    assert Literal('null_id', datatype=XSD.token) not in all_objects
    for id in ['10', '11', '12', '13']:
        assert Literal(id, datatype=XSD.token) not in all_objects

    all_preds = {x for x in g.predicates()}
    assert id_uri not in all_preds

    assert Literal('1', datatype=XSD.token) not in all_objects
Exemple #26
0
def test_multiple_tables_through_urls(mock_urlopen):
    metadata_path = "tests/multiple_tables.csv-metadata.json"
    csv1_url = "multiple_tables.Name-ID.csv"
    csv2_url = "multiple_tables.ID-Age.csv"
    csv1_path = "tests/multiple_tables.Name-ID.csv"
    csv2_path = "tests/multiple_tables.ID-Age.csv"

    with io.open(metadata_path, 'r') as metadata_f, io.open(
            csv1_path) as csv1_f, io.open(csv2_path) as csv2_f:
        metadata = io.StringIO(text(metadata_f.read()))
        csv1 = text(csv1_f.read())
        csv2 = text(csv2_f.read())

    reader = Mock()
    reader.read.side_effect = [csv1, csv2]
    mock_urlopen.return_value = reader

    csvw = CSVW(csv_url=(csv1_url, csv2_url), metadata_handle=metadata)
    rdf = csvw.to_rdf()

    verify_rdf(rdf)
    def test(self):
        metadata = None
        if 'metadata' in option:
            metadata = option['metadata']

        try:
            csvw = CSVW(csv_file, metadata_url=metadata)
        except Exception as e:
            # this should be a negative test
            if TYPES[type]:
                traceback.print_exc()
            self.assertFalse(TYPES[type])
            return

        # if we get here this should be a positive test
        self.assertTrue(TYPES[type])

        # if we can parse it we should at least produce some embedded metadata
        self.assertNotEqual(csvw.metadata, None)
        # and the result should exists
        self.assertNotEqual(result_url, None)

        # test the json result

        # test the json result

        resp = urllib2.urlopen(result_url)
        result = json.loads(resp.read())
        
        generated_result = json.loads(csvw.to_json())
        
        with open('output_json/' + name + '.json', 'w') as outfile:
            json.dump(result, outfile, indent=2)
            
        with open('output_json/generated_' + name + '.json', 'w') as outfile:
            json.dump(generated_result, outfile, indent=2)

        
        self.assertEqual(ordered(generated_result), ordered(result))
Exemple #28
0
def test_all_formats(mock_find_executable):
    mock_find_executable.return_value = True
    csvw = CSVW(csv_path="./tests/books.csv",
                metadata_path="./tests/books.csv-metadata.json")
    assert mock_find_executable.call_count == 0
    # Create file like objects
    input_val = []
    for f in TEST_PARAMS:
        input_val.append((tempfile.TemporaryFile(), f[0]))
    csvw.to_rdf_files(input_val)
    # Check each output
    for fmt in zip(input_val, TEST_PARAMS):
        file_obj = fmt[0][0]
        validate_func = fmt[1][1]
        rdflib_input = fmt[1][2]
        # Rewind and read
        file_obj.seek(0)
        contents = file_obj.read().decode('utf-8')
        # Validate
        validate_func(contents)
        verify_rdf_contents(contents, rdflib_input)

    assert mock_find_executable.call_count == 1
Exemple #29
0
def test_literals_with_escaped_quotes():
    csv_path = "tests/parsing.escaped_quotes.csv"
    metadata_path = "tests/parsing.escaped_quotes.csv-metadata.json"
    csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path)

    rdf_contents = csvw.to_rdf()
    g = ConjunctiveGraph()
    g.parse(data=rdf_contents, format="turtle")

    ns = Namespace("http://example.org/expense/")
    desc = URIRef("http://example.org/desc")

    taxi_triples = list(g.triples((ns['taxi'], desc, None)))
    assert len(taxi_triples) == 1
    taxi_desc = taxi_triples[0][2]
    assert isinstance(taxi_desc, Literal)
    assert taxi_desc.value == "go from x to y"

    quoted_expense_triples = list(
        g.triples((URIRef("http://example.org/expense/quoted%20expense"), desc,
                   None)))
    assert len(quoted_expense_triples) == 1
    quoted_expense_desc = quoted_expense_triples[0][2]
    assert isinstance(quoted_expense_desc, Literal)
    assert quoted_expense_desc.value == "for some reason it came with quotes in it"

    flight_triples = list(g.triples((ns['flight'], desc, None)))
    assert len(flight_triples) == 1
    flight_desc = flight_triples[0][2]
    assert isinstance(flight_desc, Literal)
    assert flight_desc.value == "had to fly \"escaped quotes business\" for this trip"

    car_triples = list(g.triples((ns['car'], desc, None)))
    assert len(car_triples) == 1
    car_desc = car_triples[0][2]
    assert isinstance(car_desc, Literal)
    assert car_desc.value == " some \ in it to be escaped"
Exemple #30
0
def test_single_csv():

    metadata_path = "tests/simple.csv-metadata.json"
    metadata_url = "http://example.org/simple.metadata"
    csv_url = "http://example.org/simple.csv"
    csv_path = "tests/simple.csv"

    # Both metadata url and path
    with pytest.raises(ValueError) as exc:
        CSVW(metadata_path=metadata_path,
             metadata_url=metadata_url,
             csv_path=csv_path)
    assert "only one argument of metadata_url and metadata_path" in str(
        exc.value)

    # No metadata
    with pytest.raises(ValueError) as exc:
        CSVW(csv_path=csv_path)
    assert "No metadata" in str(exc.value)

    # Only url or path for csv
    with pytest.raises(ValueError) as exc:
        CSVW(metadata_path=metadata_path, csv_path=csv_path, csv_url=csv_url)
    assert "Only one of csv_url, csv_path or csv_handle" in str(exc.value)

    # No csv
    with pytest.raises(ValueError) as exc:
        CSVW(metadata_path=metadata_path)
    assert "csv_url or csv_path or csv_handle argument required" in str(
        exc.value)

    # Table not specifying url
    with pytest.raises(ValueError) as exc:
        CSVW(csv_path=csv_path,
             metadata_path='tests/simple.no_url.csv-metadata.json')
    assert "should specify 'url'" in str(exc.value)
Exemple #31
0
def test_date():
    with CSVW(csv_path="tests/datatypes.date.csv",
              metadata_path="tests/datatypes.date.csv-metadata.json") as csvw:
        rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    date1_lit = Literal("2017-01-09", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date1'], date1_lit)))) == 1

    date2_lit = Literal("2017-01-10Z", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date2'], date2_lit)))) == 1

    date3_lit = Literal("2017-01-11", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date3'], date3_lit)))) == 1

    date4_lit = Literal("2002-09-24-06:00", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date4'], date4_lit)))) == 1

    date5_lit = Literal("2002-09-24+04:00", datatype=XSD.date)
    assert len(list(g.triples((NS['event/1'], NS['date5'], date5_lit)))) == 1
Exemple #32
0
def main(csv_url, csv_path, metadata_url, metadata_path, json_dest, rdf_dest,
         temp_dir, riot_path):
    """ Command line interface for pycsvw."""
    # Handle no csv_path, single one and multiple ones
    if csv_path == ():
        csv_path = None
    elif len(csv_path) == 1:
        csv_path = csv_path[0]

    with CSVW(csv_url=csv_url if csv_url else None,
              csv_path=csv_path,
              metadata_url=metadata_url,
              metadata_path=metadata_path,
              temp_dir=temp_dir,
              riot_path=riot_path) as csvw:

        for form, dest in rdf_dest:
            rdf_output = csvw.to_rdf(form)
            with io.open(dest, "wb") as rdf_file:
                rdf_file.write(rdf_output.encode('utf-8'))
        if json_dest:
            json_output = csvw.to_json()
            with open(json_dest, "w") as json_file:
                json.dump(json_output, json_file, indent=2)
def test_empty_boolean():
    csvw = CSVW(csv_path="tests/empty.csv",
                metadata_path="tests/empty.bool.csv-metadata.json")
    rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    assert len(g) == 2
    assert len(list(g.triples((None, None, Literal(False))))) == 2

    csvw = CSVW(csv_path="tests/empty.csv",
                metadata_path="tests/empty.invalid_base.csv-metadata.json")
    rdf_output = csvw.to_rdf()

    g = ConjunctiveGraph()
    g.parse(data=rdf_output, format="turtle")

    assert len(g) == 0
Exemple #34
0
def test():

    t1 = 'GID,On Street,Species,Trim Cycle,Diameter at Breast Ht,Inventory Date,Comments,Protected,KML\n' \
         '1,ADDISON AV,Celtis australis,Large Tree Routine Prune,11,10/18/2010,,,"<Point><coordinates>-122.156485,37.440963</coordinates></Point>"\n' \
         '2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,11,6/2/2010,,,"<Point><coordinates>-122.156749,37.440958</coordinates></Point>"\n' \
         '6,ADDISON AV,Robinia pseudoacacia,Large Tree Routine Prune,29,6/1/2010,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay;  beware of BEES,YES,"<Point><coordinates>-122.156299,37.441151</coordinates></Point>"'

    m1_dict = {
          "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}],
          "@id": "http://example.org/tree-ops-ext",
          "url": "tree-ops-ext.csv",
          "dc:title": "Tree Operations",
          "dcat:keyword": ["tree", "street", "maintenance"],
          "dc:publisher": [{
            "schema:name": "Example Municipality",
            "schema:url": {"@id": "http://example.org"}
          }],
          "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"},
          "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"},
          "notes": [{
            "@type": "oa:Annotation",
            "oa:hasTarget": {"@id": "http://example.org/tree-ops-ext"},
            "oa:hasBody": {
              "@type": "oa:EmbeddedContent",
              "rdf:value": "This is a very interesting comment about the table; it's a table!",
              "dc:format": {"@value": "text/plain"}
            }
          }],
          "dialect": {"trim": True},
          "tableSchema": {
            "columns": [{
              "name": "GID",
              "titles": [
                "GID",
                "Generic Identifier"
              ],
              "dc:description": "An identifier for the operation on a tree.",
              "datatype": "string",
              "required": True,
              "suppressOutput": True
            }, {
              "name": "on_street",
              "titles": "On Street",
              "dc:description": "The street that the tree is on.",
              "datatype": "string"
            }, {
              "name": "species",
              "titles": "Species",
              "dc:description": "The species of the tree.",
              "datatype": "string"
            }, {
              "name": "trim_cycle",
              "titles": "Trim Cycle",
              "dc:description": "The operation performed on the tree.",
              "datatype": "string",
              "lang": "en"
            }, {
              "name": "dbh",
              "titles": "Diameter at Breast Ht",
              "dc:description": "Diameter at Breast Height (DBH) of the tree (in feet), measured 4.5ft above ground.",
              "datatype": "integer"
            }, {
              "name": "inventory_date",
              "titles": "Inventory Date",
              "dc:description": "The date of the operation that was performed.",
              "datatype": {"base": "date", "format": "M/d/yyyy"}
            }, {
              "name": "comments",
              "titles": "Comments",
              "dc:description": "Supplementary comments relating to the operation or tree.",
              "datatype": "string",
              "separator": ";"
            }, {
              "name": "protected",
              "titles": "Protected",
              "dc:description": "Indication (YES / NO) whether the tree is subject to a protection order.",
              "datatype": {"base": "boolean", "format": "YES|NO"},
              "default": "NO"
            }, {
              "name": "kml",
              "titles": "KML",
              "dc:description": "KML-encoded description of tree location.",
              "datatype": "xml"
            }],
            "primaryKey": "GID",
            "aboutUrl": "http://example.org/tree-ops-ext#gid-{GID}"
          }
        }
    m1 = StringIO(json.dumps(m1_dict))
    f = StringIO(t1)
    csvw = CSVW(handle=f, metadata_handle=m1, url='http://example.org/tree-ops-ext.csv')
    for col in csvw.table.columns:
        pprint.pprint(col.name)
        pprint.pprint(col.titles)
        pprint.pprint(col.cells)
        for c in col.cells:
            pprint.pprint(c.value)
    pprint.pprint(csvw.table.rows)

    pprint.pprint(csvw.metadata.json())

    csvw.to_json()