Ejemplo n.º 1
0
    def test_shex_from_csv_languages_delim_bar(self):
        shexstatement = CSV.generate_shex_from_csv(
            "examples/languagedelimbar.csv", delim="|")
        desired = '''start = @<language>
<language> {
  wdt:P31 [ wd:Q34770  ] ;# instance of a language
  wdt:P1705 LITERAL ;# native name
  wdt:P17 .+ ;# spoken in country
  wdt:P2989 .+ ;# grammatical cases
  wdt:P282 .+ ;# writing system
  wdt:P1098 .+ ;# speakers
  wdt:P1999 .* ;# UNESCO language status
  wdt:P2341 .+ ;# indigenous to
}
'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wdt: <http://www.wikidata.org/prop/direct/>"
            in shexstatement, True)
Ejemplo n.º 2
0
    def test_shex_from_csv_algorithm(self):
        shexstatement = CSV.generate_shex_from_csv(
            "examples/wikidata/algorithm.csv", delim=";")
        desired = '''start = @<algorithm>
<algorithm> {
  wdt:P31 [ wd:Q8366  ] ;#instance of a algorithm
  wdt:P138 .* ;#named after
  wdt:P61 .* ;#discoverer or inventor
  wdt:P3752 .+ ;#worst-case time complexity
  wdt:P3753 .+ ;#best-case time complexity
  wdt:P3754 .+ ;#average time complexity
  wdt:P3755 .+ ;#worst-case space complexity
  wdt:P3756 .+ ;#best-case space complexity 
  wdt:P3757 .+ ;#average space complexity 
  wdt:P575 .{0,1} ;#time of discovery or invention
}
'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True)
        self.assertEqual(
            "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True)
        self.assertEqual(
            "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
Ejemplo n.º 3
0
    def test_shex_from_csv_tvseriesextra(self):
        shexstatement = CSV.generate_shex_from_csv(
            "examples/tvseriesextra.csv")
        desired = '''start = @<tvseries>
<tvseries>  EXTRA wdt:P31  {
  wdt:P31 [ wd:Q5398426  ] ;# instance of a tvseries
  wdt:P136 @<genre>* ;# genre
  wdt:P495 .+ ;#country of origin
  wdt:P57 .+ ;#director
  wdt:P58 .+ ;#screenwriter
}
<genre>  EXTRA wdt:P31  {
  wdt:P31 [ wd:Q201658 wd:Q15961987  ] ;#instance of genre
}
'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wdt: <http://www.wikidata.org/prop/direct/>"
            in shexstatement, True)
Ejemplo n.º 4
0
    def generate_shexj_from_csv(filepath, delim=",", skip_header=False):
        """
        This method can be used to generate ShExJ from ShExStatements CSV file

        Parameters
        ----------
          filepath : str
            This parameter can contain either a file path of a CSV file or shexstatements in CSV format.
          delim : str
            a delimiter. Allowed values include ',', '|' and ';' 
          skip_header : bool
            if the first line is a header, set this value to True. By default, the value is False.

        Returns
        -------
          shexj
            shape expression in JSON format (ShExJ)

        """
        shexj = ""

        try:
            shexstatement = CSV.generate_shex_from_csv(
                filepath, delim=delim, skip_header=skip_header)
            shexj = ShExJCSV.generate_shexj_from_shexstament(shexstatement)
        except Exception as e:
            print("Unable to parse. Error: " + str(e))
        return shexj
Ejemplo n.º 5
0
    def test_shex_from_csvstring(self):
        csvstring = '''@painting,P31,Q3305213
@painting,P571,xsd:dateTime,#date of creation
@painting,P572,xsd:dateTime
@painting,P276,.,+
@painting,P1476,.,+
@painting,P195,.,+
@painting,P170,@creator,+,#creator of painting
@creator,P2561,LITERAL,#name'''
        shexstatement = CSV.generate_shex_from_csv(csvstring, filename=False)
        desired = '''start = @<painting>
<painting> {
  P31 [ Q3305213  ] ;
  P571 [ xsd:dateTime  ] ;#date of creation
  P572 [ xsd:dateTime  ] ;
  P276 .+ ;
  P1476 .+ ;
  P195 .+ ;
  P170 @<creator>+ ;#creator of painting
}
<creator> {
  P2561 LITERAL ;#name
}
'''
        self.assertEqual(shexstatement, desired)
Ejemplo n.º 6
0
    def test_shex_from_csv_tvseries_negative_prop(self):
        shexstatement = CSV.generate_shex_from_csv(
            "examples/tests/tvseriesnegativeprop.csv", delim="|")
        desired = '''start = @<tvseries>
<tvseries> {
  wdt:P31 [ wd:Q5398426  ] ;# instance of a tvseries
  wdt:P136 @<genre>* ;# genre
  wdt:P495 .+ ;#country of origin
  wdt:P57 .+ ;#director
  wdt:P58 .+ ;#screenwriter
  wdt:P279 .{0} ;#no subclass values
  ^wdt:P279 .{0} ;#no such statements
}
<genre> {
  wdt:P31 [ wd:Q201658 wd:Q15961987  ] ;#instance of genre
}
'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement,
            True)
        self.assertEqual(
            "PREFIX wdt: <http://www.wikidata.org/prop/direct/>"
            in shexstatement, True)
Ejemplo n.º 7
0
def generateshex():
    data = {}
    if ("text/html" in request.headers["Accept"]):
        if request.method == "POST" and "shexstatements" in request.form:
            shexstatements = request.form['shexstatements']
            delim = request.form['delim']
            shex = ""
            if 'file' not in request.files:
                filepath = request.files["csvfileupload"].filename
                filename, file_extension = splitext(filepath)
                if ".csv" == file_extension.lower():
                    shex = CSV.generate_shex_from_csv(shexstatements,
                                                      delim=delim,
                                                      filename=False)
                elif file_extension.lower() in {".ods", ".xls", ".xlsx"}:
                    shexstatements = request.files[
                        "csvfileupload"].stream.read()
                    shex = Spreadsheet.generate_shex_from_spreadsheet(
                        stream=shexstatements, filepath=filepath)
                else:
                    shex = CSV.generate_shex_from_csv(shexstatements,
                                                      delim=delim,
                                                      filename=False)

            data["input"] = shexstatements
            data["output"] = shex
            return render_template('shexstatements.html', data=data)
        else:
            return render_template('shexstatements.html', data=data)
    elif ("application/json" in request.headers["Accept"]):
        jsonstr = next(iter(request.form.to_dict().keys()))
        jsonval = json.loads(jsonstr)
        shex = CSV.generate_shex_from_csv(jsonval[1],
                                          delim=jsonval[0],
                                          filename=False)
        return json.dumps(shex)
    # Currently shexstatements does not handle any other formats
    else:
        return ""
Ejemplo n.º 8
0
    def test_shex_from_csv_empty_values(self):
        shexstatement = CSV.generate_shex_from_csv("examples/emptyvalues.csv")
        desired = '''start = @<painting>
<painting> {
  P31 [ Q3305213  ] ;
  P571 [ xsd:dateTime  ] ;#date of creation
  P572 [ xsd:dateTime  ] ;
  P276 .+ ;
  P1476 .+ ;
  P195 .+ ;
  P170 @<creator>+ ;#creator of painting
}
<creator> {
  P2561 LITERAL ;#name
}
'''
        self.assertEqual(shexstatement, desired)
Ejemplo n.º 9
0
    def test_shex_from_csv_empty_values(self):
        shexstatement = CSV.generate_shex_from_csv("examples/emptyvalues.csv")
        desired = '''start = @<painting>
<painting> {
  wdt:P31 [ wd:Q3305213  ] ;
  wdt:P571 xsd:dateTime  ;#date of creation
  wdt:P572 xsd:dateTime  ;
  wdt:P276 .+ ;
  wdt:P1476 .+ ;
  wdt:P195 .+ ;
  wdt:P170 @<creator>+ ;#creator of painting
}
<creator> {
  wdt:P2561 LITERAL ;#name
}
'''
        self.assertEqual(desired in shexstatement, True)
Ejemplo n.º 10
0
    def test_shex_from_csv_foaf_person(self):
        shexstatement = CSV.generate_shex_from_csv("examples/foaf.csv")
        desired = '''start = @<person>
<person> {
  rdf:type foaf:Person  ;#should be a person
  foaf:name Literal ;#name
  foaf:mbox IRI* ;#mail
  foaf:homepage IRI* ;#URL
  foaf:nick Literal* ;#Nickname
  foaf:depiction IRI* ;#photograph
  foaf:interest IRI* ;#topics of interest
  foaf:knows @<person>* ;#person knows another person 
}
'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"
            in shexstatement, True)
        self.assertEqual(
            "PREFIX foaf: <http://xmlns.com/foaf/0.1/>" in shexstatement, True)
Ejemplo n.º 11
0
    def test_shex_from_csv_os(self):
        shexstatement = CSV.generate_shex_from_csv(
            "examples/wikidata/operatingsystem.csv", delim=";")
        desired = '''start = @<operatingsystem>
<operatingsystem>  EXTRA wdt:P31  {
  wdt:P31 [ wd:Q9135  ] ;#instance of a operating system
  wdt:P138 .* ;#named after
  wdt:P178 .* ;#developer
  wdt:P277 .* ;# programming language
  wdt:P571 .{0,1} ;#inception
  wdt:P1448 .* ;#official name
  wdt:P737 .* ;#influenced by
  wdt:P856 .* ;#official website
}'''
        self.maxDiff = None
        self.assertEqual(desired in shexstatement, True)
        self.assertEqual(
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>" in shexstatement, True)
        self.assertEqual(
            "PREFIX wd: <http://www.wikidata.org/entity/>" in shexstatement, True)
        self.assertEqual(
            "PREFIX wdt: <http://www.wikidata.org/prop/direct/>" in shexstatement, True)
Ejemplo n.º 12
0
def handle_cli_arguments(arguments):
    parser = argparse.ArgumentParser(prog='shexstatements')
    parser.add_argument('-o', '--output', type=str, help='output file')
    parser.add_argument('-ap',
                        '--applicationprofile',
                        action='store_true',
                        help='input is application profile')
    parser.add_argument('-d', '--delimiter', type=str, help='output file')
    parser.add_argument('-s',
                        '--skipheader',
                        action='store_true',
                        help='Skip CSV header')
    parser.add_argument('-j',
                        '--shexj',
                        action='store_true',
                        help='Generate ShExJ')
    parser.add_argument('-r',
                        '--run',
                        action='store_true',
                        help='run web application')
    parser.add_argument('-v',
                        '--version',
                        action='store_true',
                        help='get version of shexstatements')
    parser.add_argument('csvfile',
                        nargs="*",
                        type=str,
                        help='path of CSV file')
    skipheader = False
    delimiter = ","

    args = parser.parse_args(args=arguments[1:])
    if args.version:
        version_meta = runpy.run_path("./shexstatements/version.py")
        version = version_meta["__version__"]
        print("shexstatements " + version)
        return
    if args.run:
        shexstatements.application.run()
    else:
        if len(args.csvfile) < 1:
            print("CSV file missing")
            parser.print_usage()
            return
        for csvfile in args.csvfile:
            if args.skipheader:
                skipheader = args.skipheader
            if args.delimiter:
                delimiter = args.delimiter

            if args.applicationprofile:
                shexstatement = ApplicationProfile.generate_shex_from_csv(
                    csvfile, delim=delimiter, skip_header=skipheader)
                if args.shexj:
                    shexstatement = ShExJCSV.generate_shexj_from_shexstament(
                        shexstatement)
            else:
                filename, file_extension = splitext(csvfile)
                if ".csv" == file_extension.lower():
                    if args.shexj:
                        shexstatement = ShExJCSV.generate_shexj_from_csv(
                            csvfile, delim=delimiter, skip_header=skipheader)
                    else:
                        shexstatement = CSV.generate_shex_from_csv(
                            csvfile, delim=delimiter, skip_header=skipheader)
                else:
                    shexstatement = Spreadsheet.generate_shex_from_spreadsheet(
                        filepath=csvfile)

            if args.output:
                with open(args.output, 'w') as shexfile:
                    shexfile.write(shexstatement)
            else:
                print(shexstatement)
    def generate_shex_from_csv(filepath, delim=",", skip_header=False):
        """
        This method can be used to generate ShEx from application profile CSV file. However, the input file must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename  should be set to false.

        Parameters
        ----------
          filepath : str
            This parameter can contain either a file path of a CSV file or shexstatements in CSV format.
          delim : str
            a delimiter. Allowed values include ',', '|' and ';' 
          skip_header : bool
            if the first line is a header, set this value to True. By default, the value is False.

        Returns
        -------
          shex
            shape expression

        """
        shexstatement = ""
        try:
            data = ""
            with open(filepath, 'r') as csvfile:
                csvreader = csv.reader(csvfile, delimiter=delim)
                rowno = 0
                shapename = ""
                typelines = set()
                for row in csvreader:
                    rowno = rowno + 1
                    if skip_header and rowno == 1:
                        continue
                    line = ""
                    # Ignore lines with incorrect number of values
                    if (len(row) != 8):
                        continue
                    if row[0]:
                        shapename = "@" + row[0]
                    if row[6] and row[1]:
                        typelines.add("@" + row[6] + "type" + "|rdf:type|" +
                                      row[6] + "\n")
                        line = shapename + "|" + \
                            row[1]+"|" + "@" + row[6]+"type"
                    else:
                        line = shapename + "|" + row[1] + "|" + row[5]
                    mand = row[3].lower() == "yes"
                    repeat = row[4].lower() == "yes"
                    if mand and repeat:
                        line = line + "|+"
                    elif mand and not repeat:
                        line = line + "|1"
                    elif not mand and repeat:
                        line = line + "|*"
                    elif not mand and not repeat:
                        line = line + "|0,1"
                    if row[7]:
                        line = line + "|#" + row[7]
                    data = data + line + "\n"
            if typelines:
                data = data + "".join(typelines) + "\n"
            shexstatement = CSV.generate_shex_from_data_string(data)
        except Exception as e:
            print("Unable to parse. Error: " + str(e))
        return shexstatement
Ejemplo n.º 14
0
import argparse
from shexstatements.shexfromcsv import CSV

parser = argparse.ArgumentParser(prog='shexstatements')
parser.add_argument('-o', '--output', type=str, help='output file')
parser.add_argument('-d', '--delimiter', type=str, help='output file')
parser.add_argument('-s',
                    '--skipheader',
                    action='store_true',
                    help='output file')
parser.add_argument('csvfile', type=str, help='path of CSV file')
skipheader = False
delimiter = ","

args = parser.parse_args()
if args.skipheader:
    skipheader = args.skipheader
if args.delimiter:
    delimiter = args.delimiter

shexstatement = CSV.generate_shex_from_csv(args.csvfile,
                                           delim=delimiter,
                                           skip_header=skipheader)

if args.output:
    with open(args.output, 'w') as shexfile:
        shexfile.write(shexstatement)
else:
    print(shexstatement)
Ejemplo n.º 15
0
    def generate_shex_from_spreadsheet(filepath,
                                       skip_header=False,
                                       stream=None):
        """
        This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename  should be set to false.

        Parameters
        ----------
          filepath : str
            This parameter contains path of a Spreadsheet file
          skip_header : bool
            if the first line is a header, set this value to True. By default, the value is False.

        Returns
        -------
          shex
            shape expression

        """
        shexstatement = ""
        try:
            pattern = '^\s*$'
            data = ""
            filename, file_extension = splitext(filepath)

            if (file_extension in {".xlsx", ".xlsm", ".xltx", ".xltm"}):
                wb = None
                if stream is not None:
                    with open("tmp" + filepath, "wb") as sf:
                        sf.write(stream)
                    sf.close()
                    filepath = "tmp" + filepath

                wb = load_workbook(filepath)
                for ws in wb.worksheets:
                    for i in range(1, ws.max_row + 1):
                        line = list()
                        for j in range(1, ws.max_column + 1):
                            cell = ws.cell(row=i, column=j).value
                            if cell is not None:
                                line.append(cell)
                        line = "|".join(line)
                        data = data + line + "\n"

                if stream is not None:
                    remove(filepath)

            elif (file_extension in {".xls"}):
                wb = None
                if stream is not None:
                    #wb = open_workbook(file_contents=stream, encoding_override="cp1252")
                    wb = open_workbook(file_contents=stream)
                else:
                    wb = open_workbook(filepath)
                for sheet in wb.sheets():
                    for i in range(0, wb.sheets()[0].nrows):
                        line = list()
                        for j in range(0, wb.sheets()[0].ncols):
                            cell = sheet.cell(i, j).value
                            if len(str(cell)) > 0:
                                line.append(cell)
                        data = data + "|".join(line) + "\n"

            elif (file_extension in {".ods"}):
                wb = None
                if stream is not None:
                    with open("tmp" + filepath, "wb") as sf:
                        sf.write(stream)
                    sf.close()
                    filepath = "tmp" + filepath

                wb = load(filepath)
                wb = wb.spreadsheet
                rows = wb.getElementsByType(TableRow)
                for row in rows:
                    cells = row.getElementsByType(TableCell)
                    line = list()
                    for cell in cells:
                        if len(str(cell)) > 0:
                            line.append(str(cell))
                    data = data + "|".join(line) + "\n"

                if stream is not None:
                    remove(filepath)

            shexstatement = CSV.generate_shex_from_data_string(data)
        except Exception as e:
            print("Unable to read file. Error: " + str(e))
        return shexstatement