def test_default_with_datatype(): csvw = CSVW( csv_path='tests/virtual1.csv', metadata_path='tests/virtual1.default.datatype.csv-metadata.json') rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") ns = Namespace("http://example.org/") for x in [1, 2]: active_vals = list( g.triples((ns['sub-{}'.format(x)], ns['active'], None))) assert len(active_vals) == 1 active_val = active_vals[0][2] assert isinstance(active_val, Literal) assert active_val.datatype == XSD.boolean assert active_val.value string_vals = list( g.triples((ns['sub-{}'.format(x)], ns['stringprop1'], None))) assert len(string_vals) == 1 string_val = string_vals[0][2] assert isinstance(string_val, Literal) assert string_val.value == "some string" string_vals = list( g.triples((ns['sub-{}'.format(x)], ns['stringprop2'], None))) assert len(string_vals) == 1 string_val = string_vals[0][2] assert isinstance(string_val, Literal) assert "%20" not in string_val.value
def test(self): metadata = None if 'metadata' in option: metadata = option['metadata'] try: csvw = CSVW(csv_file, metadata_url=metadata) except Exception as e: # this should be a negative test if TYPES[type]: traceback.print_exc() self.assertFalse(TYPES[type]) return # if we get here this should be a positive test self.assertTrue(TYPES[type]) # if we can parse it we should at least produce some embedded metadata self.assertNotEqual(csvw.metadata, None) # and the result should exists self.assertNotEqual(result_url, None) gr = Graph() result = gr.parse(result_url) converted_result = csvw.to_rdf() result.serialize('output_rdf/' + name + '.ttl', format='turtle') converted_result.serialize('output_rdf/generated' + name + '.ttl', format='turtle') self.assertTrue(compare.isomorphic(result, converted_result))
def test(self): metadata = None if 'metadata' in option: metadata = option['metadata'] try: csvw = CSVW(csv_file, metadata_url=metadata) except Exception as e: # this should be a negative test if TYPES[type]: traceback.print_exc() self.assertFalse(TYPES[type]) return # if we get here this should be a positive test self.assertTrue(TYPES[type]) # if we can parse it we should at least produce some embedded metadata self.assertNotEqual(csvw.metadata, None) # and the result should exists self.assertNotEqual(result_url, None) # test the json result resp = urllib2.urlopen(result_url) result = json.loads(resp.read()) self.assertEqual(csvw.to_json(), result)
def test_null_values_with_single_string(): csvw = CSVW(csv_path="tests/null1.csv", metadata_path="tests/null1.single.csv-metadata.json") rdf_contents = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_contents, format="turtle") # There should be no subject NA all_subjects = {x for x in g.subjects()} assert subj_ns['null_key'] not in all_subjects assert subj_ns['1'] in all_subjects assert len(all_subjects) == 4 # Null valued objects should not be created all_objects = {x for x in g.objects()} assert Literal('null_key', datatype=XSD.token) not in all_objects assert Literal('null_sector') not in all_objects assert Literal('null_id', datatype=XSD.token) not in all_objects assert Literal('PUBLIC') in all_objects assert Literal('12', datatype=XSD.token) in all_objects # Spot check some triples do not exist but other do from the same row null_key_lit = Literal('null_id', datatype=XSD.token) assert len(list(g.triples((subj_ns['2'], id_uri, null_key_lit)))) == 0 priv_lit = Literal('PRIVATE') assert len(list(g.triples((subj_ns['2'], sect_uri, priv_lit)))) == 1 null_sector_lit = Literal('null_sector') assert len(list(g.triples((subj_ns['3'], sect_uri, null_sector_lit)))) == 0 twelve_lit = Literal('12', datatype=XSD.token) assert len(list(g.triples((subj_ns['3'], id_uri, twelve_lit)))) == 1
def test_literals_with_new_lines(): csv_path = "tests/parsing.quoted_newlines.csv" metadata_path = "tests/parsing.quoted_newlines.csv-metadata.json" csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path) rdf_contents = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_contents, format="turtle") ns = Namespace("http://example.org/expense/") desc = URIRef("http://example.org/desc") taxi_triples = list(g.triples((ns['taxi'], desc, None))) assert len(taxi_triples) == 1 taxi_desc = taxi_triples[0][2] assert isinstance(taxi_desc, Literal) assert len(taxi_desc.value.splitlines()) == 2 flight = URIRef("http://example.org/expense/multi-hop%20flight") flight_triples = list(g.triples((flight, desc, None))) assert len(flight_triples) == 1 flight_desc = flight_triples[0][2] assert isinstance(flight_desc, Literal) assert len(flight_desc.value.splitlines()) == 4 dinner_triples = list(g.triples((ns['dinner'], desc, None))) assert len(dinner_triples) == 1 dinner_desc = dinner_triples[0][2] assert isinstance(dinner_desc, Literal) assert u'\u2019' in dinner_desc, "Expected to read unicode characters" assert u"('')" in dinner_desc, "Expected to read apostrophes"
def test_json_generation(): """Will remove this test when we add json generation support.""" csvw = CSVW(csv_path="tests/simple.csv", metadata_path="tests/simple.csv-metadata.json") with pytest.raises(NotImplementedError) as exc: csvw.to_json() assert "JSON generation" in str(exc.value)
def test_single_table_using_path(): csv_path = "tests/simple.csv" metadata_path = "tests/simple.csv-metadata.json" csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path) rdf = csvw.to_rdf() verify_rdf(rdf)
def test_empty(): csvw = CSVW(csv_path="tests/empty.csv", metadata_path="tests/empty.csv-metadata.json") rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") assert len(g) == 0
def test_multiple_tables_through_paths(): metadata_path = "tests/multiple_tables.csv-metadata.json" csv1_path = "tests/multiple_tables.Name-ID.csv" csv2_path = "tests/multiple_tables.ID-Age.csv" with open(metadata_path, 'r') as metadata_f: metadata = io.StringIO(text(metadata_f.read())) csvw = CSVW(csv_path=(csv1_path, csv2_path), metadata_handle=metadata) rdf = csvw.to_rdf() verify_rdf(rdf)
def test_single_table_using_handles(): csv_path = "tests/simple.csv" metadata_path = "tests/simple.csv-metadata.json" with io.open(csv_path) as csv1_f, io.open(metadata_path, 'r') as metadata_f: csv_handle = io.StringIO(csv1_f.read()) metadata = io.StringIO(metadata_f.read()) csvw = CSVW(csv_handle=csv_handle, metadata_handle=metadata) rdf = csvw.to_rdf() verify_rdf(rdf)
def test_tmp_files(): tmp_dir = tempfile.mkdtemp(dir="/tmp") assert len(os.listdir(tmp_dir)) == 0 csvw = CSVW(csv_path="./tests/books.csv", metadata_path="./tests/books.csv-metadata.json", temp_dir=tmp_dir) assert len(os.listdir(tmp_dir)) == 0 csvw.to_rdf(fmt="nt") created_files = os.listdir(tmp_dir) assert len(created_files ) == 1, "nt serialization should generate only 1 temp file" assert created_files[0].endswith(".nt") os.remove(os.path.join(tmp_dir, created_files[0])) assert len(os.listdir(tmp_dir)) == 0 csvw.to_rdf(fmt="turtle") created_files = os.listdir(tmp_dir) assert len( created_files) == 2, "ttl serialization should generate two temps file" assert any([f.endswith(".nt") for f in created_files]) assert any([f.endswith(".ttl") for f in created_files]) # Check permissions expected_flags = [stat.S_IRUSR, stat.S_IRGRP, stat.S_IROTH] unexpected_flags = [stat.S_IWUSR, stat.S_IWGRP, stat.S_IWOTH] for f in created_files: st = os.stat(os.path.join(tmp_dir, f)) for flag, non_flag in zip(expected_flags, unexpected_flags): assert bool(st.st_mode & flag) assert not bool(st.st_mode & non_flag) csvw.close() assert len(os.listdir(tmp_dir)) == 0
def test_multiple_tables_through_handles(): metadata_path = "tests/multiple_tables.csv-metadata.json" csv1_path = "tests/multiple_tables.Name-ID.csv" csv2_path = "tests/multiple_tables.ID-Age.csv" with io.open(metadata_path, 'r') as metadata_f, io.open( csv1_path) as csv1_f, io.open(csv2_path) as csv2_f: metadata = io.StringIO(metadata_f.read()) csv1 = io.StringIO(csv1_f.read()) csv2 = io.StringIO(csv2_f.read()) csvw = CSVW(csv_handle=[csv1, csv2], metadata_handle=metadata) rdf = csvw.to_rdf() verify_rdf(rdf)
def test_default(): csvw = CSVW(csv_path='tests/virtual1.csv', metadata_path='tests/virtual1.default.csv-metadata.json') rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") all_subjects = {x for x in g.subjects()} assert len(all_subjects) == 4 ns = Namespace("http://example.org/") assert ns['sub-1'] in all_subjects assert ns['sub-2'] in all_subjects assert len([g.triples((ns['sub-1'], ns['obj-1'], ns['myvalue']))]) == 1 assert len([g.triples((ns['sub-2'], ns['obj-2'], ns['myvalue']))]) == 1
def test_negative_both_default_or_value(): with pytest.raises(BothDefaultAndValueUrlError): print( CSVW(csv_path='tests/virtual1.csv', metadata_path= 'tests/virtual1.BothDefaultAndValueUrlError.csv-metadata.json' ))
def test_single_table_using_url(mock_urlopen): csv_path = "tests/simple.csv" metadata_path = "tests/simple.csv-metadata.json" csv_url = "http://example.org/simple.csv" with io.open(csv_path) as csv1_f: csv1 = text(csv1_f.read()) reader = Mock() reader.read.side_effect = [csv1] mock_urlopen.return_value = reader csvw = CSVW(csv_url=csv_url, metadata_path=metadata_path) rdf = csvw.to_rdf() verify_rdf(rdf)
def test_time(): with CSVW(csv_path="tests/datatypes.time.csv", metadata_path="tests/datatypes.time.csv-metadata.json") as csvw: rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") NS = Namespace('https://www.example.org/') time1_lit = Literal("19:30:00", datatype=XSD.time) assert len(list(g.triples((NS['event/1'], NS['time1'], time1_lit)))) == 1 time2_lit = Literal("09:30:10.5", datatype=XSD.time) assert len(list(g.triples((NS['event/1'], NS['time2'], time2_lit)))) == 1 time3_lit = Literal("10:30:10Z", datatype=XSD.time) assert len(list(g.triples((NS['event/1'], NS['time3'], time3_lit)))) == 1 time4_lit = Literal("11:30:10-06:00", datatype=XSD.time) assert len(list(g.triples((NS['event/1'], NS['time4'], time4_lit)))) == 1 time5_lit = Literal("04:30:10+04:00", datatype=XSD.time) assert len(list(g.triples((NS['event/1'], NS['time5'], time5_lit)))) == 1
def test_datetime(): with CSVW(csv_path="tests/datatypes.datetime.csv", metadata_path="tests/datatypes.datetime.csv-metadata.json" ) as csvw: rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") dt1_lit = Literal("2002-05-30T09:00:00", datatype=XSD.dateTime) assert len(list(g.triples((NS['event/1'], NS['datetime1'], dt1_lit)))) == 1 dt2_lit = Literal("2002-05-30T09:30:10.5", datatype=XSD.dateTime) assert len(list(g.triples((NS['event/1'], NS['datetime2'], dt2_lit)))) == 1 dt3_lit = Literal("2002-05-30T09:30:10Z", datatype=XSD.dateTime) assert len(list(g.triples((NS['event/1'], NS['datetime3'], dt3_lit)))) == 1 dt4_lit = Literal("2002-05-30T09:30:10-06:00", datatype=XSD.dateTime) assert len(list(g.triples((NS['event/1'], NS['datetime4'], dt4_lit)))) == 1 dt5_lit = Literal("2002-05-30T09:30:10+04:00", datatype=XSD.dateTime) assert len(list(g.triples((NS['event/1'], NS['datetime5'], dt5_lit)))) == 1 datestamp = Literal("2004-04-12T13:20:00-05:00", datatype=XSD.dateTimeStamp) assert len(list(g.triples( (NS['event/1'], NS['datetimestamp'], datestamp)))) == 1
def test_context_mgr(): tmp_dir = tempfile.mkdtemp(dir="/tmp") assert len(os.listdir(tmp_dir)) == 0 with CSVW(csv_path="./tests/books.csv", metadata_path="./tests/books.csv-metadata.json", temp_dir=tmp_dir) as csvw: assert len(os.listdir(tmp_dir)) == 0 csvw.to_rdf(fmt="nt") created_files = os.listdir(tmp_dir) assert len(created_files ) == 1, "nt serialization should generate only 1 temp file" assert created_files[0].endswith(".nt") os.remove(os.path.join(tmp_dir, created_files[0])) assert len(os.listdir(tmp_dir)) == 0 csvw.to_rdf(fmt="turtle") created_files = os.listdir(tmp_dir) assert len(created_files ) == 2, "ttl serialization should generate two temps file" assert any([f.endswith(".nt") for f in created_files]) assert any([f.endswith(".ttl") for f in created_files]) assert len(os.listdir(tmp_dir)) == 0
def test_encoding_rdf(): # With encoding specified encoding = "ISO-8859-1" csvw = CSVW(csv_path="./tests/iso_encoding.csv", metadata_path="./tests/iso_encoding.csv-metadata.json", csv_encoding=encoding) rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") units = Namespace('http://example.org/units/') cars = Namespace('http://example.org/cars/') meta = Namespace("http://example.org/properties/") expected_unit = units[quote(u"\xb5100".encode('utf-8'))] assert (cars['1'], meta['UnitOfMeasurement'], expected_unit) in g assert expected_unit in list(g.objects())
def test_metadata_mismatch(): csv_path = "tests/negative.metadata_mismatch.csv" csvw1 = CSVW( csv_path=csv_path, metadata_path= "tests/negative.NumberOfNonVirtualColumnsMismatch1.csv-metadata.json") csvw2 = CSVW( csv_path=csv_path, metadata_path= "tests/negative.NumberOfNonVirtualColumnsMismatch2.csv-metadata.json") with pytest.raises(NumberOfNonVirtualColumnsMismatch) as exc: print(csvw1.to_rdf()) assert "metadata, 2" in str(exc.value) assert "row 1, 3" in str(exc.value) with pytest.raises(NumberOfNonVirtualColumnsMismatch) as exc: print(csvw2.to_rdf()) assert "metadata, 4" in str(exc.value) assert "row 1, 3" in str(exc.value) with pytest.raises(VirtualColumnPrecedesNonVirtualColumn) as exc: CSVW( csv_path=csv_path, metadata_path= 'tests/negative.VirtualColumnPrecedesNonVirtualColumn.csv-metadata.json' ) assert "t2" in str(exc.value)
def process_csv(csv_file, metadata_file, riotpath, g = None): # Generate RDF from a CSV file and metadata file pair, # csv_file is a path string # metadata-file is a path string path optionally merging it into a Graph if g is None: g = Graph() csvw = CSVW(csv_path = csv_file, metadata_path = metadata_file, riot_path = riotpath) with NamedTemporaryFile() as f: rdf_output = csvw.to_rdf_files([ (f,'turtle') ]) f.seek(0) g.parse(f, format = 'ttl') csvw.close() return g
def test_others(metadata_file): with CSVW(csv_path="tests/datatypes.others.csv", metadata_path=metadata_file) as csvw: rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") triples_to_look_for = [ (NS['custom_pred'], "someformatteddata", URIRef("https://www.datatypes.org#mycustomdatatypedefinition")), (NS["anyURI"], "https://www.sampleuri.org", XSD.anyURI), (NS["base64Binary"], "0FB8", XSD.base64Binary), (NS['boolean1'], True, XSD.boolean), (NS['boolean2'], False, XSD.boolean), (NS['boolean3'], True, XSD.boolean), (NS['boolean4'], False, XSD.boolean), (NS['integer'], -3, XSD.integer), (NS['long'], -1231235555, XSD.long), (NS['int'], 3, XSD.int), (NS['short'], -1231, XSD.short), (NS['byte'], 45, XSD.byte), (NS['nonNegativeInteger'], 111, XSD.nonNegativeInteger), (NS['positiveInteger'], 123456, XSD.positiveInteger), (NS['unsignedLong'], 3456, XSD.unsignedLong), (NS['unsignedInt'], 7890000, XSD.unsignedInt), (NS['unsignedShort'], 65000, XSD.unsignedShort), (NS['unsignedByte'], 254, XSD.unsignedByte), (NS['nonPositiveInteger'], -123, XSD.nonPositiveInteger), (NS['negativeInteger'], -34500000, XSD.negativeInteger), (NS['decimal'], "+3.5", XSD.decimal), (NS['double'], "4268.22752E11", XSD.double), (NS['float'], "+24.3e-3", XSD.float), (NS['duration'], "P2Y6M5DT12H35M30S", XSD.duration), (NS['dayTimeDuration'], "P1DT2H", XSD.dayTimeDuration), (NS['yearMonthDuration'], "P0Y20M", XSD.yearMonthDuration), (NS['gDay'], "---02", XSD.gDay), (NS['gMonth'], "--04", XSD.gMonth), (NS['gMonthDay'], "--04-12", XSD.gMonthDay), (NS['gYear'], "2004", XSD.gYear), (NS['gYearMonth'], "2004-04", XSD.gYearMonth), (NS['hexBinary'], "0FB8", XSD.hexBinary), (NS['QName'], "myElement", XSD.QName), (NS['normalizedString'], "This is a normalized string!", XSD.normalizedString), (NS['token'], "token", XSD.token), (NS['language'], "en", XSD.language), (NS['Name'], "_my.Element", XSD.Name), (NS['NMTOKEN'], "123_456", XSD.NMTOKEN), (NS['xml'], "<a>bla</a>", RDF.XMLLiteral), (NS['html'], "<div><p>xyz</p></div>", RDF.HTML), (NS['json'], "{}", CSVW_NS.JSON), ] for pred, lit_val, lit_type in triples_to_look_for: lit = Literal(lit_val, datatype=lit_type) assert len(list(g.triples( (NS['event/1'], pred, lit)))) == 1, "Failed for {}".format(pred)
def test_bool_with_format(): csvw = CSVW(csv_path="tests/datatypes.bool.csv", metadata_path="tests/datatypes.bool.csv-metadata.json") rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") true_lit = Literal(True, datatype=XSD.boolean) false_lit = Literal(False, datatype=XSD.boolean) assert len(list(g.triples((NS['event/1'], NS['bool1'], true_lit)))) == 1 assert len(list(g.triples((NS['event/1'], NS['bool2'], true_lit)))) == 1 assert len(list(g.triples((NS['event/1'], NS['bool3'], true_lit)))) == 1 assert len(list(g.triples((NS['event/2'], NS['bool1'], false_lit)))) == 1 assert len(list(g.triples((NS['event/2'], NS['bool2'], false_lit)))) == 1 assert len(list(g.triples((NS['event/2'], NS['bool3'], false_lit)))) == 1 assert len(list(g.triples((NS['event/3'], NS['bool1'], false_lit)))) == 1 assert len(list(g.triples((NS['event/3'], NS['bool2'], false_lit)))) == 1 assert len(list(g.triples((NS['event/3'], NS['bool3'], false_lit)))) == 1
def test_null_values_with_multiple_strings(): csvw = CSVW(csv_path="tests/null1.csv", metadata_path="tests/null1.multiple.csv-metadata.json") rdf_contents = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_contents, format="turtle") all_objects = {x for x in g.objects()} assert Literal('null_key', datatype=XSD.token) not in all_objects assert Literal('null_sector') not in all_objects assert Literal('null_id', datatype=XSD.token) not in all_objects for id in ['10', '11', '12', '13']: assert Literal(id, datatype=XSD.token) not in all_objects all_preds = {x for x in g.predicates()} assert id_uri not in all_preds assert Literal('1', datatype=XSD.token) not in all_objects
def test_multiple_tables_through_urls(mock_urlopen): metadata_path = "tests/multiple_tables.csv-metadata.json" csv1_url = "multiple_tables.Name-ID.csv" csv2_url = "multiple_tables.ID-Age.csv" csv1_path = "tests/multiple_tables.Name-ID.csv" csv2_path = "tests/multiple_tables.ID-Age.csv" with io.open(metadata_path, 'r') as metadata_f, io.open( csv1_path) as csv1_f, io.open(csv2_path) as csv2_f: metadata = io.StringIO(text(metadata_f.read())) csv1 = text(csv1_f.read()) csv2 = text(csv2_f.read()) reader = Mock() reader.read.side_effect = [csv1, csv2] mock_urlopen.return_value = reader csvw = CSVW(csv_url=(csv1_url, csv2_url), metadata_handle=metadata) rdf = csvw.to_rdf() verify_rdf(rdf)
def test(self): metadata = None if 'metadata' in option: metadata = option['metadata'] try: csvw = CSVW(csv_file, metadata_url=metadata) except Exception as e: # this should be a negative test if TYPES[type]: traceback.print_exc() self.assertFalse(TYPES[type]) return # if we get here this should be a positive test self.assertTrue(TYPES[type]) # if we can parse it we should at least produce some embedded metadata self.assertNotEqual(csvw.metadata, None) # and the result should exists self.assertNotEqual(result_url, None) # test the json result # test the json result resp = urllib2.urlopen(result_url) result = json.loads(resp.read()) generated_result = json.loads(csvw.to_json()) with open('output_json/' + name + '.json', 'w') as outfile: json.dump(result, outfile, indent=2) with open('output_json/generated_' + name + '.json', 'w') as outfile: json.dump(generated_result, outfile, indent=2) self.assertEqual(ordered(generated_result), ordered(result))
def test_all_formats(mock_find_executable): mock_find_executable.return_value = True csvw = CSVW(csv_path="./tests/books.csv", metadata_path="./tests/books.csv-metadata.json") assert mock_find_executable.call_count == 0 # Create file like objects input_val = [] for f in TEST_PARAMS: input_val.append((tempfile.TemporaryFile(), f[0])) csvw.to_rdf_files(input_val) # Check each output for fmt in zip(input_val, TEST_PARAMS): file_obj = fmt[0][0] validate_func = fmt[1][1] rdflib_input = fmt[1][2] # Rewind and read file_obj.seek(0) contents = file_obj.read().decode('utf-8') # Validate validate_func(contents) verify_rdf_contents(contents, rdflib_input) assert mock_find_executable.call_count == 1
def test_literals_with_escaped_quotes(): csv_path = "tests/parsing.escaped_quotes.csv" metadata_path = "tests/parsing.escaped_quotes.csv-metadata.json" csvw = CSVW(csv_path=csv_path, metadata_path=metadata_path) rdf_contents = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_contents, format="turtle") ns = Namespace("http://example.org/expense/") desc = URIRef("http://example.org/desc") taxi_triples = list(g.triples((ns['taxi'], desc, None))) assert len(taxi_triples) == 1 taxi_desc = taxi_triples[0][2] assert isinstance(taxi_desc, Literal) assert taxi_desc.value == "go from x to y" quoted_expense_triples = list( g.triples((URIRef("http://example.org/expense/quoted%20expense"), desc, None))) assert len(quoted_expense_triples) == 1 quoted_expense_desc = quoted_expense_triples[0][2] assert isinstance(quoted_expense_desc, Literal) assert quoted_expense_desc.value == "for some reason it came with quotes in it" flight_triples = list(g.triples((ns['flight'], desc, None))) assert len(flight_triples) == 1 flight_desc = flight_triples[0][2] assert isinstance(flight_desc, Literal) assert flight_desc.value == "had to fly \"escaped quotes business\" for this trip" car_triples = list(g.triples((ns['car'], desc, None))) assert len(car_triples) == 1 car_desc = car_triples[0][2] assert isinstance(car_desc, Literal) assert car_desc.value == " some \ in it to be escaped"
def test_single_csv(): metadata_path = "tests/simple.csv-metadata.json" metadata_url = "http://example.org/simple.metadata" csv_url = "http://example.org/simple.csv" csv_path = "tests/simple.csv" # Both metadata url and path with pytest.raises(ValueError) as exc: CSVW(metadata_path=metadata_path, metadata_url=metadata_url, csv_path=csv_path) assert "only one argument of metadata_url and metadata_path" in str( exc.value) # No metadata with pytest.raises(ValueError) as exc: CSVW(csv_path=csv_path) assert "No metadata" in str(exc.value) # Only url or path for csv with pytest.raises(ValueError) as exc: CSVW(metadata_path=metadata_path, csv_path=csv_path, csv_url=csv_url) assert "Only one of csv_url, csv_path or csv_handle" in str(exc.value) # No csv with pytest.raises(ValueError) as exc: CSVW(metadata_path=metadata_path) assert "csv_url or csv_path or csv_handle argument required" in str( exc.value) # Table not specifying url with pytest.raises(ValueError) as exc: CSVW(csv_path=csv_path, metadata_path='tests/simple.no_url.csv-metadata.json') assert "should specify 'url'" in str(exc.value)
def test_date(): with CSVW(csv_path="tests/datatypes.date.csv", metadata_path="tests/datatypes.date.csv-metadata.json") as csvw: rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") date1_lit = Literal("2017-01-09", datatype=XSD.date) assert len(list(g.triples((NS['event/1'], NS['date1'], date1_lit)))) == 1 date2_lit = Literal("2017-01-10Z", datatype=XSD.date) assert len(list(g.triples((NS['event/1'], NS['date2'], date2_lit)))) == 1 date3_lit = Literal("2017-01-11", datatype=XSD.date) assert len(list(g.triples((NS['event/1'], NS['date3'], date3_lit)))) == 1 date4_lit = Literal("2002-09-24-06:00", datatype=XSD.date) assert len(list(g.triples((NS['event/1'], NS['date4'], date4_lit)))) == 1 date5_lit = Literal("2002-09-24+04:00", datatype=XSD.date) assert len(list(g.triples((NS['event/1'], NS['date5'], date5_lit)))) == 1
def main(csv_url, csv_path, metadata_url, metadata_path, json_dest, rdf_dest, temp_dir, riot_path): """ Command line interface for pycsvw.""" # Handle no csv_path, single one and multiple ones if csv_path == (): csv_path = None elif len(csv_path) == 1: csv_path = csv_path[0] with CSVW(csv_url=csv_url if csv_url else None, csv_path=csv_path, metadata_url=metadata_url, metadata_path=metadata_path, temp_dir=temp_dir, riot_path=riot_path) as csvw: for form, dest in rdf_dest: rdf_output = csvw.to_rdf(form) with io.open(dest, "wb") as rdf_file: rdf_file.write(rdf_output.encode('utf-8')) if json_dest: json_output = csvw.to_json() with open(json_dest, "w") as json_file: json.dump(json_output, json_file, indent=2)
def test_empty_boolean(): csvw = CSVW(csv_path="tests/empty.csv", metadata_path="tests/empty.bool.csv-metadata.json") rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") assert len(g) == 2 assert len(list(g.triples((None, None, Literal(False))))) == 2 csvw = CSVW(csv_path="tests/empty.csv", metadata_path="tests/empty.invalid_base.csv-metadata.json") rdf_output = csvw.to_rdf() g = ConjunctiveGraph() g.parse(data=rdf_output, format="turtle") assert len(g) == 0
def test(): t1 = 'GID,On Street,Species,Trim Cycle,Diameter at Breast Ht,Inventory Date,Comments,Protected,KML\n' \ '1,ADDISON AV,Celtis australis,Large Tree Routine Prune,11,10/18/2010,,,"<Point><coordinates>-122.156485,37.440963</coordinates></Point>"\n' \ '2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,11,6/2/2010,,,"<Point><coordinates>-122.156749,37.440958</coordinates></Point>"\n' \ '6,ADDISON AV,Robinia pseudoacacia,Large Tree Routine Prune,29,6/1/2010,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay; beware of BEES,YES,"<Point><coordinates>-122.156299,37.441151</coordinates></Point>"' m1_dict = { "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}], "@id": "http://example.org/tree-ops-ext", "url": "tree-ops-ext.csv", "dc:title": "Tree Operations", "dcat:keyword": ["tree", "street", "maintenance"], "dc:publisher": [{ "schema:name": "Example Municipality", "schema:url": {"@id": "http://example.org"} }], "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"}, "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"}, "notes": [{ "@type": "oa:Annotation", "oa:hasTarget": {"@id": "http://example.org/tree-ops-ext"}, "oa:hasBody": { "@type": "oa:EmbeddedContent", "rdf:value": "This is a very interesting comment about the table; it's a table!", "dc:format": {"@value": "text/plain"} } }], "dialect": {"trim": True}, "tableSchema": { "columns": [{ "name": "GID", "titles": [ "GID", "Generic Identifier" ], "dc:description": "An identifier for the operation on a tree.", "datatype": "string", "required": True, "suppressOutput": True }, { "name": "on_street", "titles": "On Street", "dc:description": "The street that the tree is on.", "datatype": "string" }, { "name": "species", "titles": "Species", "dc:description": "The species of the tree.", "datatype": "string" }, { "name": "trim_cycle", "titles": "Trim Cycle", "dc:description": "The operation performed on the tree.", "datatype": "string", "lang": "en" }, { "name": "dbh", "titles": "Diameter at Breast Ht", "dc:description": "Diameter at Breast Height (DBH) of the tree (in feet), measured 4.5ft above ground.", "datatype": "integer" }, { "name": "inventory_date", "titles": "Inventory Date", "dc:description": "The date of the operation that was performed.", "datatype": {"base": "date", "format": "M/d/yyyy"} }, { "name": "comments", "titles": "Comments", "dc:description": "Supplementary comments relating to the operation or tree.", "datatype": "string", "separator": ";" }, { "name": "protected", "titles": "Protected", "dc:description": "Indication (YES / NO) whether the tree is subject to a protection order.", "datatype": {"base": "boolean", "format": "YES|NO"}, "default": "NO" }, { "name": "kml", "titles": "KML", "dc:description": "KML-encoded description of tree location.", "datatype": "xml" }], "primaryKey": "GID", "aboutUrl": "http://example.org/tree-ops-ext#gid-{GID}" } } m1 = StringIO(json.dumps(m1_dict)) f = StringIO(t1) csvw = CSVW(handle=f, metadata_handle=m1, url='http://example.org/tree-ops-ext.csv') for col in csvw.table.columns: pprint.pprint(col.name) pprint.pprint(col.titles) pprint.pprint(col.cells) for c in col.cells: pprint.pprint(c.value) pprint.pprint(csvw.table.rows) pprint.pprint(csvw.metadata.json()) csvw.to_json()