def properties_from_row( self, row: t.List[openpyxl.cell.Cell] ) -> t.Optional[RowObject]: self.row_prop_separators = [ self.db.dataset["CognatesetTable", k].separator for k in self.row_header ] data = [clean_cell_value(cell) for cell in row[: self.left - 1]] properties: t.Dict[t.Optional[str], t.Any] = { n: (v if sep is None else v.split(sep)) for n, sep, v in zip(self.row_header, self.row_prop_separators, data) } if not any(properties.values()): return None # delete all possible None entries coming from row_header cogset: t.Dict[str, t.Any] = { key: value for key, value in properties.items() if key is not None } while None in properties.keys(): del properties[None] comments: t.List[str] = [] for cell in row[: self.left - 1]: c = get_cell_comment(cell) if c is not None: comments.append(c) comment = "\t".join(comments).strip() cogset[self.db.dataset["CognatesetTable", "comment"].name] = comment return CogSet(cogset)
def language_from_column(self, column: t.List[openpyxl.cell.Cell]) -> Language: data = [clean_cell_value(cell) for cell in column[: self.top - 1]] # Do we need to know language comments? – comment = get_cell_comment(column[0]) return Language( { self.db.dataset["LanguageTable", "name"].name: data[0], } )
def language_from_column(self, column: t.List[openpyxl.cell.Cell]) -> Language: data = [clean_cell_value(cell) for cell in column[:self.top - 1]] comment = get_cell_comment(column[0]) id = string_to_id(data[0]) return Language( # an id candidate must be provided, which is transformed into a unique id ID=id, Name=data[0], Comment=comment, )
def parse( self, cell: openpyxl.cell.Cell, language_id: str, cell_identifier: str = "" ) -> t.Iterable[Form]: """Return form properties for every form in the cell""" text = clean_cell_value(cell) if not text: return [] for element in self.separate( text, context=cell_identifier and f"{cell_identifier}: " ): try: form = self.parse_form(element, language_id, cell_identifier) except KeyError: continue if form: yield form
def properties_from_row( self, row: t.List[openpyxl.cell.Cell]) -> t.Optional[RowObject]: row_object = self.row_object() c_id = self.db.dataset[row_object.__table__, "id"].name c_comment = self.db.dataset[row_object.__table__, "comment"].name c_name = self.db.dataset[row_object.__table__, "name"].name data = [clean_cell_value(cell) for cell in row[:self.left - 1]] properties = dict(zip(self.row_header, data)) # delete all possible None entries coming from row_header while None in properties.keys(): del properties[None] # fetch cell comment comment = get_cell_comment(row[0]) properties[c_comment] = comment # cldf_name serves as cldf_id candidate properties[c_id] = properties[c_name] # create new row object return self.row_object(properties)
def import_data_from_sheet( sheet, sheet_header, implicit: t.Mapping[Literal["languageReference", "id", "value"], str] = {}, entries_to_concepts: t.Mapping[str, str] = KeyKeyDict(), concept_column: t.Tuple[str, str] = ("Concept_ID", "Concept_ID"), ) -> t.Iterable[Form]: row_iter = sheet.iter_rows() # TODO?: compare header of this sheet to format of given data set process # row. Maybe unnecessary. In any case, do not complain about the unused # variable. header = next(row_iter) # noqa: F841 assert ( concept_column[1] in sheet_header ), f"Could not find concept column {concept_column[0]} in your excel sheet {sheet.title}." for row in row_iter: data = Form({k: clean_cell_value(cell) for k, cell in zip(sheet_header, row)}) if "value" in implicit: data[implicit["value"]] = "\t".join(map(str, data.values())) try: concept_entry = data.pop(concept_column[1]) data[concept_column[0]] = entries_to_concepts[concept_entry] except KeyError: logger.warning( f"Concept {concept_entry} was not found. Please add it to the concepts table manually. The corresponding form was ignored and not added to the dataset." ) data[concept_column[0]] = concept_entry continue if "id" in implicit: data[implicit["id"]] = None if "languageReference" in implicit: data[implicit["languageReference"]] = sheet.title yield data
def parse( self, cell: openpyxl.cell.Cell, language_id: str, cell_identifier: str = "" ) -> t.Iterable[Judgement]: try: url = cell.hyperlink.target text = clean_cell_value(cell) comment = get_cell_comment(cell) if "{" not in text: slice, alignment = alignment_from_braces("{" + text + "}") else: slice, alignment = alignment_from_braces(text) properties = { self.c["c_id"]: url.split("/")[-1], self.c.get("c_segments"): ",".join( "{:}:{:}".format(i, j) for i, j in slice ), self.c.get("c_alignment"): alignment, self.c.get("c_comment"): comment, } properties.pop(None, None) yield Judgement(properties) except AttributeError: pass
def cells_are_empty(cells: t.Iterable[openpyxl.cell.Cell]) -> bool: return not any([clean_cell_value(cell) for cell in cells])