def test_round_trip(self): ''' tests the csv round trip: dict -> csv -> dict Note: the inital dict has missing values it is expected that the final dict has the missing keys with None as value ''' fileName = "%s/%s.csv" % (self.testFolder, self.test_round_trip.__name__) csvLOD = [{ "pageTitle": "page_1", "name": "Test Page 1", "label": "1" }, { "name": "Test Page 2", "label": "2" }, { "pageTitle": "page_3", "label": "3" }, { "pageTitle": "page_4", "name": "Test Page 4" }] CSV.storeToCSVFile(csvLOD, fileName, withPostfix=True) actualLOD = CSV.restoreFromCSVFile(fileName, withPostfix=True) # build expected LOD expectedLOD = csvLOD.copy() fields = LOD.getFields(expectedLOD) LOD.setNone4List(expectedLOD, fields) self.assertEqual(expectedLOD, actualLOD)
def test_store_to_csvfile(self): '''tests if LoD is correctly stored as csv file''' fileName = "%s/test_store_to_csvfile.csv" % self.testFolder expectedStr = self.csvStr.replace("\r", "") CSV.storeToCSVFile(self.csvLOD, fileName, True) actualStr = CSV.readFile(fileName) self.assertEqual(expectedStr, actualStr)
def test_to_csv_delimiter_in_value(self): '''tests if delimiter in dict value will not result in incorrect values''' csvLOD = [{ "pageTitle": "page_1", "name": "Test Page 1, delimiter in value", "label": "1,000" }] actualCSVStr = CSV.toCSV(csvLOD) actualLOD = CSV.fromCSV(actualCSVStr) self.assertEqual(csvLOD, actualLOD)
def testRoyals(self): ''' test conversion of royals ''' return # TODO - fix me inlod = Sample.getRoyals() csv = CSV.toCSV(inlod) if self.debug: print(csv) # https://stackoverflow.com/questions/3717785/what-is-a-convenient-way-to-store-and-retrieve-boolean-values-in-a-csv-file outlod = CSV.fromCSV(csv) if self.debug: print(outlod)
def test_from_csv(self): '''tests if the csv is correctly parsed to an LoD''' expRecord = self.csvLOD[0] lod = CSV.fromCSV(self.csvStr) self.assertTrue(len(lod) == 2) actualRecord = lod[0] for key, value in expRecord.items(): self.assertTrue(key in actualRecord) self.assertEqual(expRecord[key], actualRecord[key])
def test_to_csv_incomplete_dicts(self): ''' tests if the LoD is correctly converted to csv even if some dicts are incomplete Note: incomplete dicts can lead to changes of the column orders of the csv string ''' csvLOD = [{ "pageTitle": "page_1", "label": "1" }, { "pageTitle": "page_2", "name": "Test Page 2", "label": "2" }] expectedStr = '"pageTitle","label","name"\r\n"page_1","1",""\r\n"page_2","2","Test Page 2"\r\n' actualStr = CSV.toCSV(csvLOD) self.assertEqual(expectedStr, actualStr)
def testStackoverflow71444069(self): ''' https://stackoverflow.com/questions/71444069/create-csv-from-result-of-a-for-google-colab/71548650#71548650 ''' from lodstorage.sparql import SPARQL from lodstorage.csv import CSV sparqlQuery = """SELECT ?org ?orgLabel WHERE { ?org wdt:P31 wd:Q4830453. #instance of organizations ?org wdt:P17 wd:Q96. #Mexico country SERVICE wikibase:label { bd:serviceParam wikibase:language "en"} }""" sparql = SPARQL("https://query.wikidata.org/sparql") qlod = sparql.queryAsListOfDicts(sparqlQuery) csv = CSV.toCSV(qlod) if self.debug: print(csv)
def testCsvFromJSONAble(self): ''' tests generation of csv from list of JSONAble object ''' lod = [{ "name": "Test", "label": 1 }, { "name": "Test 2", "label": 2 }, { "name": "Different", "location": "Munich" }] jsonAbleList = JSONAbleList(clazz=JSONAble) jsonAbleList.fromLoD(lod) actualCsvString = CSV.toCSV(jsonAbleList.getList()) expectedCsvString = '"name","label","location"\r\n"Test",1,""\r\n"Test 2",2,""\r\n"Different","","Munich"\r\n' self.assertEqual(actualCsvString, expectedCsvString)
def testCsvFromJSONAbleIncludeFields(self): ''' tests generation of csv from list of JSONAble object with including only specified fields (positive list) ''' lod = [{ "name": "Test", "label": 1 }, { "name": "Test 2", "label": 2 }, { "name": "Different", "location": "Munich" }] jsonAbleList = JSONAbleList(clazz=JSONAble) jsonAbleList.fromLoD(lod) actualCsvString = CSV.toCSV(jsonAbleList.getList(), includeFields=['name', 'location']) expectedCsvString = '"name","location"\r\n"Test",""\r\n"Test 2",""\r\n"Different","Munich"\r\n' self.assertEqual(actualCsvString, expectedCsvString)
def test_restore_from_csvfile(self): '''tests if the lod is correctly restored from csv file''' fileName = "%s/test_restore_from_csvfile.csv" % self.testFolder CSV.writeFile(self.csvStr, fileName) lod = CSV.restoreFromCSVFile(fileName, withPostfix=True) self.assertEqual(self.csvLOD, lod)
def test_to_csv(self): '''tests if LoD is correctly converted to csv str''' expectedStr = self.csvStr actualStr = CSV.toCSV(self.csvLOD) self.assertEqual(expectedStr, actualStr)
def test_from_csv_without_header(self): '''tests if csv string without embedded headers is parsed correctly''' csvStr = '"page_1","Test Page 1","1"\r\n"page_2","Test Page 2","2"\r\n' headerNames = ["pageTitle", "name", "label"] actualLOD = CSV.fromCSV(csvStr, headerNames) self.assertEqual(self.csvLOD, actualLOD)
def main(cls, args): ''' command line activation with parsed args Args: args(list): the command line arguments ''' debug = args.debug endpoints = EndpointManager.getEndpoints(args.endpointPath) qm = QueryManager(lang=args.language, debug=debug, queriesPath=args.queriesPath) query = None queryCode = args.query endpointConf = None formats = None # preload ValueFormatter ValueFormatter.getFormats(args.formatsPath) if args.list: for name, query in qm.queriesByName.items(): print(f"{name}:{query.title}") elif args.listEndpoints: # list endpoints for endpoint in endpoints.values(): if hasattr(endpoint, "lang") and endpoint.lang == args.language: print(endpoint) elif args.queryName is not None: if debug or args.showQuery: print(f"named query {args.queryName}:") if args.queryName not in qm.queriesByName: raise Exception(f"named query {args.queryName} not available") query = qm.queriesByName[args.queryName] formats = query.formats queryCode = query.query if debug or args.showQuery: if hasattr(query, "description") and query.description is not None: print(query.description) if query is None: name = "?" if queryCode is None and args.queryFile is not None: queryFilePath = Path(args.queryFile) queryCode = queryFilePath.read_text() name = queryFilePath.stem query = Query(name="?", query=queryCode, lang=args.language) if queryCode: if debug or args.showQuery: print(f"{args.language}:\n{queryCode}") if args.endpointName: endpointConf = endpoints.get(args.endpointName) if args.language == "sparql": method = 'POST' if args.endpointName: endPointUrl = endpointConf.endpoint method = endpointConf.method query.tryItUrl = endpointConf.website query.database = endpointConf.database else: endPointUrl = query.endpoint if args.method: method = method sparql = SPARQL(endPointUrl, method=method) if args.prefixes and endpointConf is not None: queryCode = f"{endpointConf.prefixes}\n{queryCode}" if args.raw: qres = cls.rawQuery(endPointUrl, query=query.query, resultFormat=args.format, mimeType=args.mimeType) print(qres) return if "wikidata" in args.endpointName and formats is None: formats = ["*:wikidata"] qlod = sparql.queryAsListOfDicts(queryCode) elif args.language == "sql": sqlDB = SQLDB(endpointConf.endpoint) qlod = sqlDB.query(queryCode) else: raise Exception( f"language {args.language} not known/supported") if args.format is Format.csv: csv = CSV.toCSV(qlod) print(csv) elif args.format in [ Format.latex, Format.github, Format.mediawiki ]: doc = query.documentQueryResult(qlod, tablefmt=str(args.format), floatfmt=".0f") docstr = doc.asText() print(docstr) elif args.format in [Format.json ] or args.format is None: # set as default # https://stackoverflow.com/a/36142844/1497139 print(json.dumps(qlod, indent=2, sort_keys=True, default=str)) elif args.format in [Format.xml]: lod2xml = Lod2Xml(qlod) xml = lod2xml.asXml() print(xml) else: raise Exception(f"format {args.format} not supported yet")