def test_shex_from_csv_languages_delim_bar(self): shexstatement = CSV.generate_shex_from_csv( "examples/languagedelimbar.csv", delim="|") desired = '''start = @<language> <language> { wdt:P31 [ wd:Q34770 ] ;# instance of a language wdt:P1705 LITERAL ;# native name wdt:P17 .+ ;# spoken in country wdt:P2989 .+ ;# grammatical cases wdt:P282 .+ ;# writing system wdt:P1098 .+ ;# speakers wdt:P1999 .* ;# UNESCO language status wdt:P2341 .+ ;# indigenous to } ''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True) self.assertEqual( "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True) self.assertEqual( "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
def test_shex_from_csv_algorithm(self): shexstatement = CSV.generate_shex_from_csv( "examples/wikidata/algorithm.csv", delim=";") desired = '''start = @<algorithm> <algorithm> { wdt:P31 [ wd:Q8366 ] ;#instance of a algorithm wdt:P138 .* ;#named after wdt:P61 .* ;#discoverer or inventor wdt:P3752 .+ ;#worst-case time complexity wdt:P3753 .+ ;#best-case time complexity wdt:P3754 .+ ;#average time complexity wdt:P3755 .+ ;#worst-case space complexity wdt:P3756 .+ ;#best-case space complexity wdt:P3757 .+ ;#average space complexity wdt:P575 .{0,1} ;#time of discovery or invention } ''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True) self.assertEqual( "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True) self.assertEqual( "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
def test_shex_from_csv_tvseriesextra(self): shexstatement = CSV.generate_shex_from_csv( "examples/tvseriesextra.csv") desired = '''start = @<tvseries> <tvseries> EXTRA wdt:P31 { wdt:P31 [ wd:Q5398426 ] ;# instance of a tvseries wdt:P136 @<genre>* ;# genre wdt:P495 .+ ;#country of origin wdt:P57 .+ ;#director wdt:P58 .+ ;#screenwriter } <genre> EXTRA wdt:P31 { wdt:P31 [ wd:Q201658 wd:Q15961987 ] ;#instance of genre } ''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True) self.assertEqual( "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True) self.assertEqual( "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
def generate_shexj_from_csv(filepath, delim=",", skip_header=False): """ This method can be used to generate ShExJ from ShExStatements CSV file Parameters ---------- filepath : str This parameter can contain either a file path of a CSV file or shexstatements in CSV format. delim : str a delimiter. Allowed values include ',', '|' and ';' skip_header : bool if the first line is a header, set this value to True. By default, the value is False. Returns ------- shexj shape expression in JSON format (ShExJ) """ shexj = "" try: shexstatement = CSV.generate_shex_from_csv( filepath, delim=delim, skip_header=skip_header) shexj = ShExJCSV.generate_shexj_from_shexstament(shexstatement) except Exception as e: print("Unable to parse. Error: " + str(e)) return shexj
def test_shex_from_csvstring(self): csvstring = '''@painting,P31,Q3305213 @painting,P571,xsd:dateTime,#date of creation @painting,P572,xsd:dateTime @painting,P276,.,+ @painting,P1476,.,+ @painting,P195,.,+ @painting,P170,@creator,+,#creator of painting @creator,P2561,LITERAL,#name''' shexstatement = CSV.generate_shex_from_csv(csvstring, filename=False) desired = '''start = @<painting> <painting> { P31 [ Q3305213 ] ; P571 [ xsd:dateTime ] ;#date of creation P572 [ xsd:dateTime ] ; P276 .+ ; P1476 .+ ; P195 .+ ; P170 @<creator>+ ;#creator of painting } <creator> { P2561 LITERAL ;#name } ''' self.assertEqual(shexstatement, desired)
def test_shex_from_csv_tvseries_negative_prop(self): shexstatement = CSV.generate_shex_from_csv( "examples/tests/tvseriesnegativeprop.csv", delim="|") desired = '''start = @<tvseries> <tvseries> { wdt:P31 [ wd:Q5398426 ] ;# instance of a tvseries wdt:P136 @<genre>* ;# genre wdt:P495 .+ ;#country of origin wdt:P57 .+ ;#director wdt:P58 .+ ;#screenwriter wdt:P279 .{0} ;#no subclass values ^wdt:P279 .{0} ;#no such statements } <genre> { wdt:P31 [ wd:Q201658 wd:Q15961987 ] ;#instance of genre } ''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True) self.assertEqual( "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True) self.assertEqual( "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
def generateshex(): data = {} if ("text/html" in request.headers["Accept"]): if request.method == "POST" and "shexstatements" in request.form: shexstatements = request.form['shexstatements'] delim = request.form['delim'] shex = "" if 'file' not in request.files: filepath = request.files["csvfileupload"].filename filename, file_extension = splitext(filepath) if ".csv" == file_extension.lower(): shex = CSV.generate_shex_from_csv(shexstatements, delim=delim, filename=False) elif file_extension.lower() in {".ods", ".xls", ".xlsx"}: shexstatements = request.files[ "csvfileupload"].stream.read() shex = Spreadsheet.generate_shex_from_spreadsheet( stream=shexstatements, filepath=filepath) else: shex = CSV.generate_shex_from_csv(shexstatements, delim=delim, filename=False) data["input"] = shexstatements data["output"] = shex return render_template('shexstatements.html', data=data) else: return render_template('shexstatements.html', data=data) elif ("application/json" in request.headers["Accept"]): jsonstr = next(iter(request.form.to_dict().keys())) jsonval = json.loads(jsonstr) shex = CSV.generate_shex_from_csv(jsonval[1], delim=jsonval[0], filename=False) return json.dumps(shex) # Currently shexstatements does not handle any other formats else: return ""
def test_shex_from_csv_empty_values(self): shexstatement = CSV.generate_shex_from_csv("examples/emptyvalues.csv") desired = '''start = @<painting> <painting> { P31 [ Q3305213 ] ; P571 [ xsd:dateTime ] ;#date of creation P572 [ xsd:dateTime ] ; P276 .+ ; P1476 .+ ; P195 .+ ; P170 @<creator>+ ;#creator of painting } <creator> { P2561 LITERAL ;#name } ''' self.assertEqual(shexstatement, desired)
def test_shex_from_csv_empty_values(self): shexstatement = CSV.generate_shex_from_csv("examples/emptyvalues.csv") desired = '''start = @<painting> <painting> { wdt:P31 [ wd:Q3305213 ] ; wdt:P571 xsd:dateTime ;#date of creation wdt:P572 xsd:dateTime ; wdt:P276 .+ ; wdt:P1476 .+ ; wdt:P195 .+ ; wdt:P170 @<creator>+ ;#creator of painting } <creator> { wdt:P2561 LITERAL ;#name } ''' self.assertEqual(desired in shexstatement, True)
def test_shex_from_csv_foaf_person(self): shexstatement = CSV.generate_shex_from_csv("examples/foaf.csv") desired = '''start = @<person> <person> { rdf:type foaf:Person ;#should be a person foaf:name Literal ;#name foaf:mbox IRI* ;#mail foaf:homepage IRI* ;#URL foaf:nick Literal* ;#Nickname foaf:depiction IRI* ;#photograph foaf:interest IRI* ;#topics of interest foaf:knows @<person>* ;#person knows another person } ''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>" in shexstatement, True) self.assertEqual( "PREFIX foaf: <http://xmlns.com/foaf/0.1/>" in shexstatement, True)
def test_shex_from_csv_os(self): shexstatement = CSV.generate_shex_from_csv( "examples/wikidata/operatingsystem.csv", delim=";") desired = '''start = @<operatingsystem> <operatingsystem> EXTRA wdt:P31 { wdt:P31 [ wd:Q9135 ] ;#instance of a operating system wdt:P138 .* ;#named after wdt:P178 .* ;#developer wdt:P277 .* ;# programming language wdt:P571 .{0,1} ;#inception wdt:P1448 .* ;#official name wdt:P737 .* ;#influenced by wdt:P856 .* ;#official website }''' self.maxDiff = None self.assertEqual(desired in shexstatement, True) self.assertEqual( "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True) self.assertEqual( "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True) self.assertEqual( "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
def handle_cli_arguments(arguments): parser = argparse.ArgumentParser(prog='shexstatements') parser.add_argument('-o', '--output', type=str, help='output file') parser.add_argument('-ap', '--applicationprofile', action='store_true', help='input is application profile') parser.add_argument('-d', '--delimiter', type=str, help='output file') parser.add_argument('-s', '--skipheader', action='store_true', help='Skip CSV header') parser.add_argument('-j', '--shexj', action='store_true', help='Generate ShExJ') parser.add_argument('-r', '--run', action='store_true', help='run web application') parser.add_argument('-v', '--version', action='store_true', help='get version of shexstatements') parser.add_argument('csvfile', nargs="*", type=str, help='path of CSV file') skipheader = False delimiter = "," args = parser.parse_args(args=arguments[1:]) if args.version: version_meta = runpy.run_path("./shexstatements/version.py") version = version_meta["__version__"] print("shexstatements " + version) return if args.run: shexstatements.application.run() else: if len(args.csvfile) < 1: print("CSV file missing") parser.print_usage() return for csvfile in args.csvfile: if args.skipheader: skipheader = args.skipheader if args.delimiter: delimiter = args.delimiter if args.applicationprofile: shexstatement = ApplicationProfile.generate_shex_from_csv( csvfile, delim=delimiter, skip_header=skipheader) if args.shexj: shexstatement = ShExJCSV.generate_shexj_from_shexstament( shexstatement) else: filename, file_extension = splitext(csvfile) if ".csv" == file_extension.lower(): if args.shexj: shexstatement = ShExJCSV.generate_shexj_from_csv( csvfile, delim=delimiter, skip_header=skipheader) else: shexstatement = CSV.generate_shex_from_csv( csvfile, delim=delimiter, skip_header=skipheader) else: shexstatement = Spreadsheet.generate_shex_from_spreadsheet( filepath=csvfile) if args.output: with open(args.output, 'w') as shexfile: shexfile.write(shexstatement) else: print(shexstatement)
def generate_shex_from_csv(filepath, delim=",", skip_header=False): """ This method can be used to generate ShEx from application profile CSV file. However, the input file must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename should be set to false. Parameters ---------- filepath : str This parameter can contain either a file path of a CSV file or shexstatements in CSV format. delim : str a delimiter. Allowed values include ',', '|' and ';' skip_header : bool if the first line is a header, set this value to True. By default, the value is False. Returns ------- shex shape expression """ shexstatement = "" try: data = "" with open(filepath, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=delim) rowno = 0 shapename = "" typelines = set() for row in csvreader: rowno = rowno + 1 if skip_header and rowno == 1: continue line = "" # Ignore lines with incorrect number of values if (len(row) != 8): continue if row[0]: shapename = "@" + row[0] if row[6] and row[1]: typelines.add("@" + row[6] + "type" + "|rdf:type|" + row[6] + "\n") line = shapename + "|" + \ row[1]+"|" + "@" + row[6]+"type" else: line = shapename + "|" + row[1] + "|" + row[5] mand = row[3].lower() == "yes" repeat = row[4].lower() == "yes" if mand and repeat: line = line + "|+" elif mand and not repeat: line = line + "|1" elif not mand and repeat: line = line + "|*" elif not mand and not repeat: line = line + "|0,1" if row[7]: line = line + "|#" + row[7] data = data + line + "\n" if typelines: data = data + "".join(typelines) + "\n" shexstatement = CSV.generate_shex_from_data_string(data) except Exception as e: print("Unable to parse. Error: " + str(e)) return shexstatement
import argparse from shexstatements.shexfromcsv import CSV parser = argparse.ArgumentParser(prog='shexstatements') parser.add_argument('-o', '--output', type=str, help='output file') parser.add_argument('-d', '--delimiter', type=str, help='output file') parser.add_argument('-s', '--skipheader', action='store_true', help='output file') parser.add_argument('csvfile', type=str, help='path of CSV file') skipheader = False delimiter = "," args = parser.parse_args() if args.skipheader: skipheader = args.skipheader if args.delimiter: delimiter = args.delimiter shexstatement = CSV.generate_shex_from_csv(args.csvfile, delim=delimiter, skip_header=skipheader) if args.output: with open(args.output, 'w') as shexfile: shexfile.write(shexstatement) else: print(shexstatement)
def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): """ This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename should be set to false. Parameters ---------- filepath : str This parameter contains path of a Spreadsheet file skip_header : bool if the first line is a header, set this value to True. By default, the value is False. Returns ------- shex shape expression """ shexstatement = "" try: pattern = '^\s*$' data = "" filename, file_extension = splitext(filepath) if (file_extension in {".xlsx", ".xlsm", ".xltx", ".xltm"}): wb = None if stream is not None: with open("tmp" + filepath, "wb") as sf: sf.write(stream) sf.close() filepath = "tmp" + filepath wb = load_workbook(filepath) for ws in wb.worksheets: for i in range(1, ws.max_row + 1): line = list() for j in range(1, ws.max_column + 1): cell = ws.cell(row=i, column=j).value if cell is not None: line.append(cell) line = "|".join(line) data = data + line + "\n" if stream is not None: remove(filepath) elif (file_extension in {".xls"}): wb = None if stream is not None: #wb = open_workbook(file_contents=stream, encoding_override="cp1252") wb = open_workbook(file_contents=stream) else: wb = open_workbook(filepath) for sheet in wb.sheets(): for i in range(0, wb.sheets()[0].nrows): line = list() for j in range(0, wb.sheets()[0].ncols): cell = sheet.cell(i, j).value if len(str(cell)) > 0: line.append(cell) data = data + "|".join(line) + "\n" elif (file_extension in {".ods"}): wb = None if stream is not None: with open("tmp" + filepath, "wb") as sf: sf.write(stream) sf.close() filepath = "tmp" + filepath wb = load(filepath) wb = wb.spreadsheet rows = wb.getElementsByType(TableRow) for row in rows: cells = row.getElementsByType(TableCell) line = list() for cell in cells: if len(str(cell)) > 0: line.append(str(cell)) data = data + "|".join(line) + "\n" if stream is not None: remove(filepath) shexstatement = CSV.generate_shex_from_data_string(data) except Exception as e: print("Unable to read file. Error: " + str(e)) return shexstatement