def generate_shex_from_csv(filepath, delim=",", skip_header=False): """ This method can be used to generate ShEx from application profile CSV file. However, the input file must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename should be set to false. Parameters ---------- filepath : str This parameter can contain either a file path of a CSV file or shexstatements in CSV format. delim : str a delimiter. Allowed values include ',', '|' and ';' skip_header : bool if the first line is a header, set this value to True. By default, the value is False. Returns ------- shex shape expression """ shexstatement = "" try: data = "" with open(filepath, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=delim) rowno = 0 shapename = "" typelines = set() for row in csvreader: rowno = rowno + 1 if skip_header and rowno == 1: continue line = "" # Ignore lines with incorrect number of values if (len(row) != 8): continue if row[0]: shapename = "@" + row[0] if row[6] and row[1]: typelines.add("@" + row[6] + "type" + "|rdf:type|" + row[6] + "\n") line = shapename + "|" + \ row[1]+"|" + "@" + row[6]+"type" else: line = shapename + "|" + row[1] + "|" + row[5] mand = row[3].lower() == "yes" repeat = row[4].lower() == "yes" if mand and repeat: line = line + "|+" elif mand and not repeat: line = line + "|1" elif not mand and repeat: line = line + "|*" elif not mand and not repeat: line = line + "|0,1" if row[7]: line = line + "|#" + row[7] data = data + line + "\n" if typelines: data = data + "".join(typelines) + "\n" shexstatement = CSV.generate_shex_from_data_string(data) except Exception as e: print("Unable to parse. Error: " + str(e)) return shexstatement
def generate_shex_from_spreadsheet(filepath, skip_header=False, stream=None): """ This method can be used to generate ShEx from data string. However, the input data string must contain one or more lines. Each line contains '|' separated values. If filepath is a string, filename should be set to false. Parameters ---------- filepath : str This parameter contains path of a Spreadsheet file skip_header : bool if the first line is a header, set this value to True. By default, the value is False. Returns ------- shex shape expression """ shexstatement = "" try: pattern = '^\s*$' data = "" filename, file_extension = splitext(filepath) if (file_extension in {".xlsx", ".xlsm", ".xltx", ".xltm"}): wb = None if stream is not None: with open("tmp" + filepath, "wb") as sf: sf.write(stream) sf.close() filepath = "tmp" + filepath wb = load_workbook(filepath) for ws in wb.worksheets: for i in range(1, ws.max_row + 1): line = list() for j in range(1, ws.max_column + 1): cell = ws.cell(row=i, column=j).value if cell is not None: line.append(cell) line = "|".join(line) data = data + line + "\n" if stream is not None: remove(filepath) elif (file_extension in {".xls"}): wb = None if stream is not None: #wb = open_workbook(file_contents=stream, encoding_override="cp1252") wb = open_workbook(file_contents=stream) else: wb = open_workbook(filepath) for sheet in wb.sheets(): for i in range(0, wb.sheets()[0].nrows): line = list() for j in range(0, wb.sheets()[0].ncols): cell = sheet.cell(i, j).value if len(str(cell)) > 0: line.append(cell) data = data + "|".join(line) + "\n" elif (file_extension in {".ods"}): wb = None if stream is not None: with open("tmp" + filepath, "wb") as sf: sf.write(stream) sf.close() filepath = "tmp" + filepath wb = load(filepath) wb = wb.spreadsheet rows = wb.getElementsByType(TableRow) for row in rows: cells = row.getElementsByType(TableCell) line = list() for cell in cells: if len(str(cell)) > 0: line.append(str(cell)) data = data + "|".join(line) + "\n" if stream is not None: remove(filepath) shexstatement = CSV.generate_shex_from_data_string(data) except Exception as e: print("Unable to read file. Error: " + str(e)) return shexstatement