def handleAnnotation(self, cell, annotation) : """ Create relevant triples for the annotation attached to the cell """ # Create triples according to Open Annotation model annotation_URI = cell['URI'] + "-oa" annotation_body_URI = annotation_URI + '-body' self.graph.add((annotation_URI, RDF.type, OA.Annotation)) self.graph.add((annotation_URI, OA.hasTarget, cell['URI'])) self.graph.add((annotation_URI, OA.hasBody, annotation_body_URI)) self.graph.add((annotation_body_URI, RDF.type, RDFS.Resource)) self.graph.add((annotation_body_URI, TABLINKER.value, Literal(clean_string(getText(annotation))))) # Extract author author = annotation.getElementsByType(dc.Creator) if len(author) > 0: author = clean_string(str(author[0])) self.graph.add((annotation_body_URI, OA.annotatedBy, Literal(author))) # Extract date creation_date = annotation.getElementsByType(dc.Date) if len(creation_date) > 0: creation_date = str(creation_date[0]) self.graph.add((annotation_body_URI, OA.serializedAt, Literal(creation_date, datatype=XSD.date)))
def parseSheet(self, n, sheet): """ Parses the currently selected sheet in the workbook, takes no arguments. Iterates over all cells in the Excel sheet and produces relevant RDF Triples. """ # Define a sheetURI for the current sheet sheetURI = self.data_ns["{0}-S{1}".format(self.basename, n)] columnDimensions = {} row_dims = {} rowProperties = {} marked_count = 0 rows = sheet.getElementsByType(TableRow) for rowIndex in range(0, len(rows)): cols = getColumns(rows[rowIndex]) for colIndex in range(0, len(cols)): cell_obj = cols[colIndex] if cell_obj == None: continue # Get the cell name and the current style cellName = colName(colIndex) + str(rowIndex + 1) if len(cell_obj.getElementsByType(text.P)) == 0: literal = '' else: literal = getText(cell_obj) if type(literal) == type(1.0): if literal.is_integer(): literal = str(int(literal)) else: literal = str(float(literal)) cell = { # Coordinates 'i' : rowIndex, 'j' : colIndex, # The cell itself 'cell' : cell_obj, # The sheet 'sheet' : sheet, # The name of the cell 'name' : cellName, # The type of the cell 'type' : self.getStyle(cell_obj), # The (cleaned) value of the cell 'value' : str(literal), # Is empty ? 'isEmpty' : str(literal) == '', # Compose a resource name for the cell 'URI' : URIRef("{0}-{1}".format(sheetURI, cellName)), # Pass on the URI of the data set 'sheetURI' : sheetURI } # logger.debug("({},{}) {}/{}: \"{}\"". format(i, j, cellType, cellName, cellValue)) # Increase the counter of marked cells if cell['type'] in ['TL Data', 'TL RowHeader', 'TL HRowHeader', 'TL ColHeader', 'TL RowProperty']: marked_count = marked_count + 1 # Parse cell content if cell['type'] == 'TL Data': self.handleData(cell, columnDimensions, row_dims) elif cell['type'] == 'TL RowHeader' : self.handleRowHeader(cell, row_dims, rowProperties) elif cell['type'] == 'TL HRowHeader' : self.handleHRowHeader(cell, row_dims, rowProperties) elif cell['type'] == 'TL ColHeader' : self.handleColHeader(cell, columnDimensions) elif cell['type'] == 'TL RowProperty' : self.handleRowProperty(cell, rowProperties) elif cell['type'] == 'TL Title' : self.handleTitle(cell) # Parse annotation if any and if their processing is enabled annotations = cell_obj.getElementsByType(office.Annotation) if len(annotations) != 0: self.handleAnnotation(cell, annotations[0]) # Relate all the row properties to their row headers for rowDimension in row_dims: for (p, vs) in row_dims[rowDimension].iteritems(): for v in vs: try: self.graph.add((v, TABLINKER.parentCell, p)) except exceptions.AssertionError: logger.debug('Ignore {}'.format(p)) # Add additional information about the hierarchy of column headers # for value in columnDimensions.values(): # for index in range(1, len(value)): # uri_sub = self.getColHeaderValueURI(value[:index + 1]) # uri_top = self.getColHeaderValueURI(value[:index]) # self.graph.add((uri_sub, self.namespaces['tablink']['subColHeaderOf'], uri_top)) return (sheetURI, marked_count)