def properties_from_row( self, row: t.List[openpyxl.cell.Cell] ) -> t.Optional[RowObject]: self.row_prop_separators = [ self.db.dataset["CognatesetTable", k].separator for k in self.row_header ] data = [clean_cell_value(cell) for cell in row[: self.left - 1]] properties: t.Dict[t.Optional[str], t.Any] = { n: (v if sep is None else v.split(sep)) for n, sep, v in zip(self.row_header, self.row_prop_separators, data) } if not any(properties.values()): return None # delete all possible None entries coming from row_header cogset: t.Dict[str, t.Any] = { key: value for key, value in properties.items() if key is not None } while None in properties.keys(): del properties[None] comments: t.List[str] = [] for cell in row[: self.left - 1]: c = get_cell_comment(cell) if c is not None: comments.append(c) comment = "\t".join(comments).strip() cogset[self.db.dataset["CognatesetTable", "comment"].name] = comment return CogSet(cogset)
def language_from_column(self, column: t.List[openpyxl.cell.Cell]) -> Language: data = [clean_cell_value(cell) for cell in column[:self.top - 1]] comment = get_cell_comment(column[0]) id = string_to_id(data[0]) return Language( # an id candidate must be provided, which is transformed into a unique id ID=id, Name=data[0], Comment=comment, )
def properties_from_row( self, row: t.List[openpyxl.cell.Cell]) -> t.Optional[RowObject]: row_object = self.row_object() c_id = self.db.dataset[row_object.__table__, "id"].name c_comment = self.db.dataset[row_object.__table__, "comment"].name c_name = self.db.dataset[row_object.__table__, "name"].name data = [clean_cell_value(cell) for cell in row[:self.left - 1]] properties = dict(zip(self.row_header, data)) # delete all possible None entries coming from row_header while None in properties.keys(): del properties[None] # fetch cell comment comment = get_cell_comment(row[0]) properties[c_comment] = comment # cldf_name serves as cldf_id candidate properties[c_id] = properties[c_name] # create new row object return self.row_object(properties)
def parse( self, cell: openpyxl.cell.Cell, language_id: str, cell_identifier: str = "" ) -> t.Iterable[Judgement]: try: url = cell.hyperlink.target text = clean_cell_value(cell) comment = get_cell_comment(cell) if "{" not in text: slice, alignment = alignment_from_braces("{" + text + "}") else: slice, alignment = alignment_from_braces(text) properties = { self.c["c_id"]: url.split("/")[-1], self.c.get("c_segments"): ",".join( "{:}:{:}".format(i, j) for i, j in slice ), self.c.get("c_alignment"): alignment, self.c.get("c_comment"): comment, } properties.pop(None, None) yield Judgement(properties) except AttributeError: pass