Example #1
0
 def test_round_trip(self):
     '''
     tests the csv round trip: dict -> csv -> dict
     Note: the inital dict has missing values it is expected that the final dict has the missing keys with None as value
     '''
     fileName = "%s/%s.csv" % (self.testFolder,
                               self.test_round_trip.__name__)
     csvLOD = [{
         "pageTitle": "page_1",
         "name": "Test Page 1",
         "label": "1"
     }, {
         "name": "Test Page 2",
         "label": "2"
     }, {
         "pageTitle": "page_3",
         "label": "3"
     }, {
         "pageTitle": "page_4",
         "name": "Test Page 4"
     }]
     CSV.storeToCSVFile(csvLOD, fileName, withPostfix=True)
     actualLOD = CSV.restoreFromCSVFile(fileName, withPostfix=True)
     # build expected LOD
     expectedLOD = csvLOD.copy()
     fields = LOD.getFields(expectedLOD)
     LOD.setNone4List(expectedLOD, fields)
     self.assertEqual(expectedLOD, actualLOD)
Example #2
0
 def test_store_to_csvfile(self):
     '''tests if LoD is correctly stored as csv file'''
     fileName = "%s/test_store_to_csvfile.csv" % self.testFolder
     expectedStr = self.csvStr.replace("\r", "")
     CSV.storeToCSVFile(self.csvLOD, fileName, True)
     actualStr = CSV.readFile(fileName)
     self.assertEqual(expectedStr, actualStr)
Example #3
0
 def test_to_csv_delimiter_in_value(self):
     '''tests if delimiter in dict value will not result in incorrect values'''
     csvLOD = [{
         "pageTitle": "page_1",
         "name": "Test Page 1, delimiter in value",
         "label": "1,000"
     }]
     actualCSVStr = CSV.toCSV(csvLOD)
     actualLOD = CSV.fromCSV(actualCSVStr)
     self.assertEqual(csvLOD, actualLOD)
Example #4
0
 def testRoyals(self):
     '''
     test conversion of royals
     '''
     return
     # TODO - fix me
     inlod = Sample.getRoyals()
     csv = CSV.toCSV(inlod)
     if self.debug:
         print(csv)
     # https://stackoverflow.com/questions/3717785/what-is-a-convenient-way-to-store-and-retrieve-boolean-values-in-a-csv-file
     outlod = CSV.fromCSV(csv)
     if self.debug:
         print(outlod)
Example #5
0
 def test_from_csv(self):
     '''tests if the csv is correctly parsed to an LoD'''
     expRecord = self.csvLOD[0]
     lod = CSV.fromCSV(self.csvStr)
     self.assertTrue(len(lod) == 2)
     actualRecord = lod[0]
     for key, value in expRecord.items():
         self.assertTrue(key in actualRecord)
         self.assertEqual(expRecord[key], actualRecord[key])
Example #6
0
 def test_to_csv_incomplete_dicts(self):
     '''
     tests if the LoD is correctly converted to csv even if some dicts are incomplete
     Note: incomplete dicts can lead to changes of the column orders of the csv string
     '''
     csvLOD = [{
         "pageTitle": "page_1",
         "label": "1"
     }, {
         "pageTitle": "page_2",
         "name": "Test Page 2",
         "label": "2"
     }]
     expectedStr = '"pageTitle","label","name"\r\n"page_1","1",""\r\n"page_2","2","Test Page 2"\r\n'
     actualStr = CSV.toCSV(csvLOD)
     self.assertEqual(expectedStr, actualStr)
Example #7
0
    def testStackoverflow71444069(self):
        '''
        https://stackoverflow.com/questions/71444069/create-csv-from-result-of-a-for-google-colab/71548650#71548650
        '''
        from lodstorage.sparql import SPARQL
        from lodstorage.csv import CSV
        sparqlQuery = """SELECT ?org ?orgLabel
WHERE
{
  ?org wdt:P31 wd:Q4830453. #instance of organizations
  ?org wdt:P17 wd:Q96. #Mexico country

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en"}
}"""
        sparql = SPARQL("https://query.wikidata.org/sparql")
        qlod = sparql.queryAsListOfDicts(sparqlQuery)
        csv = CSV.toCSV(qlod)
        if self.debug:
            print(csv)
Example #8
0
 def testCsvFromJSONAble(self):
     '''
     tests generation of csv from list of JSONAble object
     '''
     lod = [{
         "name": "Test",
         "label": 1
     }, {
         "name": "Test 2",
         "label": 2
     }, {
         "name": "Different",
         "location": "Munich"
     }]
     jsonAbleList = JSONAbleList(clazz=JSONAble)
     jsonAbleList.fromLoD(lod)
     actualCsvString = CSV.toCSV(jsonAbleList.getList())
     expectedCsvString = '"name","label","location"\r\n"Test",1,""\r\n"Test 2",2,""\r\n"Different","","Munich"\r\n'
     self.assertEqual(actualCsvString, expectedCsvString)
Example #9
0
 def testCsvFromJSONAbleIncludeFields(self):
     '''
     tests generation of csv from list of JSONAble object with including only specified fields (positive list)
     '''
     lod = [{
         "name": "Test",
         "label": 1
     }, {
         "name": "Test 2",
         "label": 2
     }, {
         "name": "Different",
         "location": "Munich"
     }]
     jsonAbleList = JSONAbleList(clazz=JSONAble)
     jsonAbleList.fromLoD(lod)
     actualCsvString = CSV.toCSV(jsonAbleList.getList(),
                                 includeFields=['name', 'location'])
     expectedCsvString = '"name","location"\r\n"Test",""\r\n"Test 2",""\r\n"Different","Munich"\r\n'
     self.assertEqual(actualCsvString, expectedCsvString)
Example #10
0
 def test_restore_from_csvfile(self):
     '''tests if the lod is correctly restored from csv file'''
     fileName = "%s/test_restore_from_csvfile.csv" % self.testFolder
     CSV.writeFile(self.csvStr, fileName)
     lod = CSV.restoreFromCSVFile(fileName, withPostfix=True)
     self.assertEqual(self.csvLOD, lod)
Example #11
0
 def test_to_csv(self):
     '''tests if LoD is correctly converted to csv str'''
     expectedStr = self.csvStr
     actualStr = CSV.toCSV(self.csvLOD)
     self.assertEqual(expectedStr, actualStr)
Example #12
0
 def test_from_csv_without_header(self):
     '''tests if csv string without embedded headers is parsed correctly'''
     csvStr = '"page_1","Test Page 1","1"\r\n"page_2","Test Page 2","2"\r\n'
     headerNames = ["pageTitle", "name", "label"]
     actualLOD = CSV.fromCSV(csvStr, headerNames)
     self.assertEqual(self.csvLOD, actualLOD)
Example #13
0
    def main(cls, args):
        '''
        command line activation with parsed args
        
        Args:
            args(list): the command line arguments
        '''
        debug = args.debug
        endpoints = EndpointManager.getEndpoints(args.endpointPath)
        qm = QueryManager(lang=args.language,
                          debug=debug,
                          queriesPath=args.queriesPath)
        query = None
        queryCode = args.query
        endpointConf = None
        formats = None
        # preload ValueFormatter
        ValueFormatter.getFormats(args.formatsPath)
        if args.list:
            for name, query in qm.queriesByName.items():
                print(f"{name}:{query.title}")
        elif args.listEndpoints:
            # list endpoints
            for endpoint in endpoints.values():
                if hasattr(endpoint,
                           "lang") and endpoint.lang == args.language:
                    print(endpoint)

        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            query = qm.queriesByName[args.queryName]
            formats = query.formats
            queryCode = query.query
            if debug or args.showQuery:
                if hasattr(query,
                           "description") and query.description is not None:
                    print(query.description)
        if query is None:
            name = "?"
            if queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            query = Query(name="?", query=queryCode, lang=args.language)
        if queryCode:
            if debug or args.showQuery:
                print(f"{args.language}:\n{queryCode}")
            if args.endpointName:
                endpointConf = endpoints.get(args.endpointName)
            if args.language == "sparql":
                method = 'POST'
                if args.endpointName:
                    endPointUrl = endpointConf.endpoint
                    method = endpointConf.method
                    query.tryItUrl = endpointConf.website
                    query.database = endpointConf.database
                else:
                    endPointUrl = query.endpoint
                if args.method:
                    method = method
                sparql = SPARQL(endPointUrl, method=method)
                if args.prefixes and endpointConf is not None:
                    queryCode = f"{endpointConf.prefixes}\n{queryCode}"
                if args.raw:
                    qres = cls.rawQuery(endPointUrl,
                                        query=query.query,
                                        resultFormat=args.format,
                                        mimeType=args.mimeType)
                    print(qres)
                    return
                if "wikidata" in args.endpointName and formats is None:
                    formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(queryCode)
            elif args.language == "sql":
                sqlDB = SQLDB(endpointConf.endpoint)
                qlod = sqlDB.query(queryCode)
            else:
                raise Exception(
                    f"language {args.language} not known/supported")
            if args.format is Format.csv:
                csv = CSV.toCSV(qlod)
                print(csv)
            elif args.format in [
                    Format.latex, Format.github, Format.mediawiki
            ]:
                doc = query.documentQueryResult(qlod,
                                                tablefmt=str(args.format),
                                                floatfmt=".0f")
                docstr = doc.asText()
                print(docstr)
            elif args.format in [Format.json
                                 ] or args.format is None:  # set as default
                # https://stackoverflow.com/a/36142844/1497139
                print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
            elif args.format in [Format.xml]:
                lod2xml = Lod2Xml(qlod)
                xml = lod2xml.asXml()
                print(xml)

            else:
                raise Exception(f"format {args.format} not supported yet")