Exemple #1
0
    def properties_from_row(
        self, row: t.List[openpyxl.cell.Cell]
    ) -> t.Optional[RowObject]:
        self.row_prop_separators = [
            self.db.dataset["CognatesetTable", k].separator for k in self.row_header
        ]
        data = [clean_cell_value(cell) for cell in row[: self.left - 1]]
        properties: t.Dict[t.Optional[str], t.Any] = {
            n: (v if sep is None else v.split(sep))
            for n, sep, v in zip(self.row_header, self.row_prop_separators, data)
        }
        if not any(properties.values()):
            return None

        # delete all possible None entries coming from row_header
        cogset: t.Dict[str, t.Any] = {
            key: value for key, value in properties.items() if key is not None
        }

        while None in properties.keys():
            del properties[None]

        comments: t.List[str] = []
        for cell in row[: self.left - 1]:
            c = get_cell_comment(cell)
            if c is not None:
                comments.append(c)
        comment = "\t".join(comments).strip()
        cogset[self.db.dataset["CognatesetTable", "comment"].name] = comment
        return CogSet(cogset)
Exemple #2
0
 def language_from_column(self, column: t.List[openpyxl.cell.Cell]) -> Language:
     data = [clean_cell_value(cell) for cell in column[: self.top - 1]]
     # Do we need to know language comments? – comment = get_cell_comment(column[0])
     return Language(
         {
             self.db.dataset["LanguageTable", "name"].name: data[0],
         }
     )
Exemple #3
0
 def language_from_column(self,
                          column: t.List[openpyxl.cell.Cell]) -> Language:
     data = [clean_cell_value(cell) for cell in column[:self.top - 1]]
     comment = get_cell_comment(column[0])
     id = string_to_id(data[0])
     return Language(
         # an id candidate must be provided, which is transformed into a unique id
         ID=id,
         Name=data[0],
         Comment=comment,
     )
Exemple #4
0
    def parse(
        self, cell: openpyxl.cell.Cell, language_id: str, cell_identifier: str = ""
    ) -> t.Iterable[Form]:
        """Return form properties for every form in the cell"""
        text = clean_cell_value(cell)
        if not text:
            return []

        for element in self.separate(
            text, context=cell_identifier and f"{cell_identifier}: "
        ):
            try:
                form = self.parse_form(element, language_id, cell_identifier)
            except KeyError:
                continue
            if form:
                yield form
Exemple #5
0
    def properties_from_row(
            self, row: t.List[openpyxl.cell.Cell]) -> t.Optional[RowObject]:
        row_object = self.row_object()
        c_id = self.db.dataset[row_object.__table__, "id"].name
        c_comment = self.db.dataset[row_object.__table__, "comment"].name
        c_name = self.db.dataset[row_object.__table__, "name"].name
        data = [clean_cell_value(cell) for cell in row[:self.left - 1]]
        properties = dict(zip(self.row_header, data))
        # delete all possible None entries coming from row_header
        while None in properties.keys():
            del properties[None]

        # fetch cell comment
        comment = get_cell_comment(row[0])
        properties[c_comment] = comment

        # cldf_name serves as cldf_id candidate
        properties[c_id] = properties[c_name]
        # create new row object

        return self.row_object(properties)
Exemple #6
0
def import_data_from_sheet(
    sheet,
    sheet_header,
    implicit: t.Mapping[Literal["languageReference", "id", "value"], str] = {},
    entries_to_concepts: t.Mapping[str, str] = KeyKeyDict(),
    concept_column: t.Tuple[str, str] = ("Concept_ID", "Concept_ID"),
) -> t.Iterable[Form]:
    row_iter = sheet.iter_rows()

    # TODO?: compare header of this sheet to format of given data set process
    # row. Maybe unnecessary. In any case, do not complain about the unused
    # variable.
    header = next(row_iter)  # noqa: F841

    assert (
        concept_column[1] in sheet_header
    ), f"Could not find concept column {concept_column[0]} in your excel sheet {sheet.title}."

    for row in row_iter:
        data = Form({k: clean_cell_value(cell) for k, cell in zip(sheet_header, row)})
        if "value" in implicit:
            data[implicit["value"]] = "\t".join(map(str, data.values()))
        try:
            concept_entry = data.pop(concept_column[1])
            data[concept_column[0]] = entries_to_concepts[concept_entry]
        except KeyError:
            logger.warning(
                f"Concept {concept_entry} was not found. Please add it to the concepts table manually. The corresponding form was ignored and not added to the dataset."
            )
            data[concept_column[0]] = concept_entry
            continue
        if "id" in implicit:
            data[implicit["id"]] = None
        if "languageReference" in implicit:
            data[implicit["languageReference"]] = sheet.title
        yield data
Exemple #7
0
    def parse(
        self, cell: openpyxl.cell.Cell, language_id: str, cell_identifier: str = ""
    ) -> t.Iterable[Judgement]:
        try:
            url = cell.hyperlink.target
            text = clean_cell_value(cell)
            comment = get_cell_comment(cell)
            if "{" not in text:
                slice, alignment = alignment_from_braces("{" + text + "}")
            else:
                slice, alignment = alignment_from_braces(text)
            properties = {
                self.c["c_id"]: url.split("/")[-1],
                self.c.get("c_segments"): ",".join(
                    "{:}:{:}".format(i, j) for i, j in slice
                ),
                self.c.get("c_alignment"): alignment,
                self.c.get("c_comment"): comment,
            }
            properties.pop(None, None)
            yield Judgement(properties)

        except AttributeError:
            pass
Exemple #8
0
def cells_are_empty(cells: t.Iterable[openpyxl.cell.Cell]) -> bool:
    return not any([clean_cell_value(cell) for cell in cells])