Ejemplo n.º 1
0
def clean_mapping(
        rows: t.Mapping[str, t.Mapping[str, str]]) -> t.Mapping[str, str]:
    """Create unique normalized IDs.

    >>> clean_mapping({"A": {}, "B": {}})
    {'A': 'a', 'B': 'b'}

    >>> clean_mapping({"A": {}, "a": {}})
    {'A': 'a', 'a': 'a_x2'}
    """
    avoid = {id.lower() for id in rows}

    mapping: t.Dict[str, str] = {}
    for id, row in rows.items():
        i = 1
        if row:
            base = string_to_id("_".join(row.values()))
        else:
            base = string_to_id(id)

        if base in avoid and base not in mapping.values():
            # I kept a spot for you!
            mapping[id] = base
            continue

        # Make sure ID is unique
        tentative_mapping = base
        while tentative_mapping in avoid or tentative_mapping in mapping.values(
        ):
            i += 1
            tentative_mapping = "{:}_x{:}".format(base, i)
        mapping[id] = tentative_mapping

    return mapping
Ejemplo n.º 2
0
    def source_from_source_string(
        self,
        source_string: str,
        language_id: t.Optional[str],
        logger: cli.logging.Logger = cli.logger,
    ) -> str:
        """Parse a string referencing a language-specific source"""
        context: t.Optional[str]
        if ":" in source_string:
            source_part, context = source_string.split(":", maxsplit=1)
            if not context.endswith("}"):
                logger.warning(
                    f"In source {source_string}: Closing bracket '}}' is missing, split into source and page/context may be wrong"
                )
            source_string = source_part + "}"
            context = context[:-1].strip()

            context = context.replace(":", "").replace(",", "")
        else:
            context = None

        if source_string.startswith("{") and source_string.endswith("}"):
            source_string = source_string[1:-1]
        if language_id is None:
            source_id = string_to_id(source_string)
        else:
            source_id = string_to_id(f"{language_id:}_s{source_string:}")

        source_id = source_id.replace(":", "").replace(",", "")

        if context:
            return f"{source_id}[{context}]"
        else:
            return source_id
Ejemplo n.º 3
0
    def parse_cells(
        self,
        sheet: openpyxl.worksheet.worksheet.Worksheet,
        status_update: t.Optional[str] = None,
    ) -> None:
        languages = self.parse_all_languages(sheet)
        row_object = None
        for row in tqdm(sheet.iter_rows(min_row=self.top),
                        total=sheet.max_row - self.top):
            row_header, row_forms = row[:self.left - 1], row[self.left - 1:]
            # Parse the row header, creating or retrieving the associated row
            # object (i.e. a concept or a cognateset)
            properties = self.properties_from_row(row_header)
            if properties:
                c_r_id = self.db.dataset[properties.__table__, "id"].name
                c_r_name = self.db.dataset[properties.__table__, "name"].name
                similar = self.db.find_db_candidates(properties,
                                                     self.check_for_row_match)
                for row_id in similar:
                    properties[c_r_id] = row_id
                    break
                else:
                    if self.on_row_not_found(properties, row[0]):
                        if c_r_id not in properties:
                            properties[c_r_id] = string_to_id(
                                str(properties.get(c_r_name, "")))
                        self.db.make_id_unique(properties)
                        self.db.insert_into_db(properties)
                    else:
                        continue
                row_object = properties

            if row_object is None:
                if any(c.value for c in row_forms):
                    raise AssertionError(
                        "Empty first row: Row had no properties, "
                        "and there was no previous row to copy")
                else:
                    continue
            # Parse the row, cell by cell
            for cell_with_forms in row_forms:
                try:
                    this_lan = languages[cell_with_forms.column]
                except KeyError:
                    continue

                # Parse the cell, which results (potentially) in multiple forms
                for params in self.cell_parser.parse(
                        cell_with_forms,
                        this_lan,
                        f"{sheet.title}.{cell_with_forms.coordinate}",
                ):
                    self.handle_form(params, row_object, cell_with_forms,
                                     this_lan, status_update)
        self.db.commit()
Ejemplo n.º 4
0
 def language_from_column(self,
                          column: t.List[openpyxl.cell.Cell]) -> Language:
     data = [clean_cell_value(cell) for cell in column[:self.top - 1]]
     comment = get_cell_comment(column[0])
     id = string_to_id(data[0])
     return Language(
         # an id candidate must be provided, which is transformed into a unique id
         ID=id,
         Name=data[0],
         Comment=comment,
     )
Ejemplo n.º 5
0
        def language_from_column(
                self, column: t.List[openpyxl.cell.Cell]) -> Language:
            """Parse the row, according to regexes from the metadata.

            Raises
            ======
            ValueError: When the cell cannot be parsed with the specified regex.

            """
            d: t.Dict[str, str] = {}
            for cell, cell_regex, comment_regex in zip(
                    column, dialect.lang_cell_regexes,
                    dialect.lang_comment_regexes):
                if cell.value:
                    match = re.fullmatch(cell_regex, cell.value.strip(),
                                         re.DOTALL)
                    if match is None:
                        raise ValueError(
                            f"In cell {cell.coordinate}: Expected to encounter match "
                            f"for {cell_regex}, but found {cell.value}")
                    for k, v in match.groupdict().items():
                        if k in d:
                            d[k] = d[k] + v
                        else:
                            d[k] = v
                if cell.comment:
                    match = re.fullmatch(comment_regex, cell.comment.content,
                                         re.DOTALL)
                    if match is None:
                        raise ValueError(
                            f"In cell {cell.coordinate}: Expected to encounter match "
                            f"for {comment_regex}, but found {cell.comment.content}"
                        )
                    for k, v in match.groupdict().items():
                        if k in d:
                            d[k] = d[k] + v
                        else:
                            d[k] = v

            c_l_id = self.db.dataset["LanguageTable", "id"].name
            c_l_name = self.db.dataset["LanguageTable", "name"].name
            if c_l_id not in d:
                d[c_l_id] = string_to_id(d[c_l_name])
            return Language(d)
Ejemplo n.º 6
0
def read_single_excel_sheet(
    dataset: pycldf.Dataset,
    sheet: openpyxl.worksheet.worksheet.Worksheet,
    match_form: t.Optional[t.List[str]] = None,
    entries_to_concepts: t.Mapping[str, str] = KeyKeyDict(),
    concept_column: t.Optional[str] = None,
    ignore_missing: bool = False,
    ignore_superfluous: bool = False,
    status_update: t.Optional[str] = None,
):
    concept_columns: t.Tuple[str, str]
    if concept_column is None:
        concept_columns = (
            dataset["FormTable", "parameterReference"].name,
            dataset["FormTable", "parameterReference"].name,
        )
    else:
        concept_columns = (
            dataset["FormTable", "parameterReference"].name,
            concept_column,
        )
    db = DB(dataset)
    db.cache_dataset()
    # required cldf fields of a form
    c_f_id = db.dataset["FormTable", "id"].name
    c_f_language = db.dataset["FormTable", "languageReference"].name
    c_f_form = db.dataset["FormTable", "form"].name
    c_f_value = db.dataset["FormTable", "value"].name
    c_f_concept = db.dataset["FormTable", "parameterReference"].name
    if not match_form:
        match_form = [c_f_form, c_f_language]
    if not db.dataset["FormTable", c_f_concept].separator:
        match_form.append(c_f_concept)

    sheet_header = get_headers_from_excel(sheet)
    form_header = list(db.dataset["FormTable"].tableSchema.columndict.keys())

    # These columns don't need to be given, we can infer them from the sheet title and from the other data:
    implicit: t.Dict[Literal["languageReference", "id", "value"], str] = {}
    if c_f_language not in sheet_header:
        implicit["languageReference"] = c_f_language
    if c_f_id not in sheet_header:
        implicit["id"] = c_f_id
    if c_f_value not in sheet_header:
        implicit["value"] = c_f_value

    found_columns = set(sheet_header) - {concept_column} - set(implicit.values())
    expected_columns = set(form_header) - {c_f_concept} - set(implicit.values())
    if not found_columns >= expected_columns:
        message = f"Your Excel sheet {sheet.title} is missing columns {expected_columns - found_columns}."
        if ignore_missing:
            logger.warning(message)
        else:
            raise ValueError(message)
    if not found_columns <= expected_columns:
        message = f"Your Excel sheet {sheet.title} contained unexpected columns {found_columns - expected_columns}."
        if ignore_superfluous:
            logger.warning(message)
        else:
            raise ValueError(message)
    # check if language exist, add if not add language to cache
    c_l_name = db.dataset["LanguageTable", "name"].name
    c_l_id = db.dataset["LanguageTable", "id"].name
    language_name_to_language_id = {
        row[c_l_name]: row[c_l_id] for row in db.cache["LanguageTable"].values()
    }
    language_name = sheet.title
    if language_name in language_name_to_language_id:
        language_id = language_name_to_language_id[language_name]
    else:
        language_id = language_name
    # read new data from sheet
    for form in import_data_from_sheet(
        sheet,
        sheet_header=sheet_header,
        implicit=implicit,
        entries_to_concepts=entries_to_concepts,
        concept_column=concept_columns,
    ):
        # if concept not in datasete, don't add form
        try:
            entries_to_concepts[form[c_f_concept]]
        except KeyError:
            continue
        # else, look for candidates, link to existing form or add new form
        for item, value in form.items():
            try:
                sep = db.dataset["FormTable", item].separator
            except KeyError:
                continue
            if sep is None:
                continue
            form[item] = value.split(sep)
        form_candidates = db.find_db_candidates(form, match_form)
        for form_id in form_candidates:
            logger.info(f"Form {form[c_f_value]} was already in data set.")

            if db.dataset["FormTable", c_f_concept].separator:
                for new_concept in form[c_f_concept]:
                    if new_concept not in db.cache[form_id][c_f_concept]:
                        db.cache[form_id][c_f_concept].append(new_concept)
                        logger.info(
                            f"Existing form {form_id} was added to concept {form[c_f_concept]}. "
                            f"If this was not intended (because it was a homophonous form, not a polysemy), "
                            f"you need to manually remove that concept "
                            f"from the old form and create a separate new form."
                        )
            break
        else:
            form[c_f_language] = language_id
            if "id" in implicit:
                # TODO: check for type of form id column
                form_concept = form[c_f_concept]
                concept_reference = (
                    form_concept[0] if isinstance(form_concept, list) else form_concept
                )
                form[c_f_id] = string_to_id(f"{form[c_f_language]}_{concept_reference}")
            db.make_id_unique(form)
            if status_update:
                form["Status_Column"] = status_update
            db.insert_into_db(form)
    # write to cldf
    db.write_dataset_from_cache()
Ejemplo n.º 7
0
def import_interleaved(
    ws: openpyxl.worksheet.worksheet.Worksheet,
    logger: logging.Logger = cli.logger,
    ids: t.Optional[t.Set[types.Cognateset_ID]] = None,
) -> t.Iterable[
    t.Tuple[
        types.Form_ID,
        types.Language_ID,
        types.Parameter_ID,
        str,
        None,
        types.Cognateset_ID,
    ]
]:
    if ids is None:
        ids = set()

    comma_or_semicolon = re.compile("[,;]\\W*")

    concepts = []
    for concept_metadata in ws.iter_cols(min_col=1, max_col=1, min_row=2):
        for entry, cogset in zip(concept_metadata[::2], concept_metadata[1::2]):
            try:
                concepts.append(clean_cell_value(entry))
            except AttributeError:
                break

    for language in cli.tq(
        ws.iter_cols(min_col=2), task="Parsing cells", total=ws.max_column
    ):
        language_name = clean_cell_value(language[0])
        for c, (entry, cogset) in enumerate(zip(language[1::2], language[2::2])):
            if not entry.value:
                if cogset.value:
                    logger.warning(
                        f"Cell {entry.coordinate} was empty, but cognatesets {cogset.value} were given in {cogset.coordinate}."
                    )
                continue
            bracket_level = 0
            i = 0
            f = clean_cell_value(entry)
            forms = []

            try:
                len(f)
            except TypeError:
                cli.Exit.INVALID_INPUT(
                    "I expected one or more forms (so, text) in cell {}, but found {}. Do you have more than one header row?".format(
                        entry.coordinate, f
                    )
                )

            while i < len(f):
                match = comma_or_semicolon.match(f[i:])
                if f[i] == "(":
                    bracket_level += 1
                    i += 1
                    continue
                elif f[i] == ")":
                    bracket_level -= 1
                    i += 1
                    continue
                elif bracket_level:
                    i += 1
                    continue
                elif match:
                    forms.append(f[:i].strip())
                    i += match.span()[1]
                    f = f[i:]
                    i = 0
                else:
                    i += 1

            forms.append(f.strip())

            if isinstance(clean_cell_value(cogset), int):
                cogsets = [str(clean_cell_value(cogset))]
            else:
                cogset = clean_cell_value(cogset)
                cogsets = comma_or_semicolon.split(cogset.strip())

            if len(cogsets) == 1 or len(cogsets) == len(forms):
                True
            else:
                logger.warning(
                    "{:}: Forms ({:}) did not match cognates ({:})".format(
                        entry.coordinate, ", ".join(forms), ", ".join(cogsets)
                    )
                )
            for form, cogset in zip(forms, cogsets + [None]):
                if form == "?" or cogset == "?":
                    continue
                base_id = util.string_to_id(f"{language_name}_{concepts[c]}")
                id = base_id
                synonym = 1
                while id in ids:
                    synonym += 1
                    id = f"{base_id}_s{synonym:d}"
                yield (id, language_name, concepts[c], form, None, cogset)
                ids.add(id)
Ejemplo n.º 8
0
def add_cognate_table(
    dataset: pycldf.Wordlist,
    split: bool = True,
    logger: cli.logging.Logger = cli.logger,
) -> None:
    if "CognateTable" in dataset:
        return
    dataset.add_component("CognateTable")

    # TODO: Check if that cognatesetReference is already a foreign key to
    # elsewhere (could be a CognatesetTable, could be whatever), because then
    # we need to transfer that knowledge.

    # Load anything that's useful for a cognate set table: Form IDs, segments,
    # segment slices, cognateset references, alignments
    columns = {
        "id": dataset["FormTable", "id"].name,
        "concept": dataset["FormTable", "parameterReference"].name,
        "form": dataset["FormTable", "form"].name,
    }
    for property in [
            "segments", "segmentSlice", "cognatesetReference", "alignment"
    ]:
        try:
            columns[property] = dataset["FormTable", property].name
        except KeyError:
            pass
    cognate_judgements = []
    forms = cache_table(dataset, columns=columns)
    forms_without_segments = 0
    for f, form in cli.tq(forms.items(),
                          task="Extracting cognate judgements from forms…"):
        if form.get("cognatesetReference"):
            if split:
                cogset = util.string_to_id("{:}-{:}".format(
                    form["concept"], form["cognatesetReference"]))
            else:
                cogset = form["cognatesetReference"]
            judgement = {
                "ID": f,
                "Form_ID": f,
                "Cognateset_ID": cogset,
            }
            try:
                judgement["Segment_Slice"] = form["segmentSlice"]
            except KeyError:
                try:
                    if not form["segments"]:
                        raise ValueError("No segments")
                    if ("+" in form["segments"]
                            and dataset["FormTable",
                                        "cognatesetReference"].separator):
                        logger.warning(
                            "You seem to have morpheme annotations in your cognates. I will probably mess them up a bit, because I have not been taught properly how to deal with them. Sorry!"
                        )
                    judgement["Segment_Slice"] = [
                        "1:{:d}".format(len(form["segments"]))
                    ]
                except (KeyError, TypeError, ValueError):
                    forms_without_segments += 1
                    if forms_without_segments >= 5:
                        pass
                    else:
                        logger.warning(
                            f"No segments found for form {f} ({form['form']})."
                        )
            # What does an alignment mean without segments or their slices?
            # Doesn't matter, if we were given one, we take it.
            judgement["Alignment"] = form.get("alignment")
            cognate_judgements.append(judgement)

    if forms_without_segments >= 5:
        logger.warning(
            "No segments found for %d forms. You can generate segments using `lexedata.edit.segment_using_clts`.",
            forms_without_segments,
        )

    # Delete the cognateset column
    cols = dataset["FormTable"].tableSchema.columns
    remove = {
        dataset["FormTable", c].name
        for c in ["cognatesetReference", "segmentSlice", "alignment"]
        if ("FormTable", c) in dataset
    }

    def clean_form(form):
        for c in remove:
            form.pop(c, None)
        return form

    forms = [clean_form(form) for form in dataset["FormTable"]]
    for c in remove:
        ix = cols.index(dataset["FormTable", c])
        del cols[ix]

    dataset.write(FormTable=forms)

    dataset.write(CognateTable=cognate_judgements)
Ejemplo n.º 9
0
def read_single_excel_sheet(
    dataset: pycldf.Dataset,
    sheet: openpyxl.worksheet.worksheet.Worksheet,
    logger: cli.logging.Logger = cli.logger,
    match_form: t.Optional[t.List[str]] = None,
    entries_to_concepts: t.Mapping[str, str] = KeyKeyDict(),
    concept_column: t.Optional[str] = None,
    ignore_missing: bool = False,
    ignore_superfluous: bool = False,
    status_update: t.Optional[str] = None,
) -> t.Mapping[str, ImportLanguageReport]:
    report: t.Dict[str, ImportLanguageReport] = defaultdict(ImportLanguageReport)

    concept_columns: t.Tuple[str, str]
    if concept_column is None:
        concept_columns = (
            dataset["FormTable", "parameterReference"].name,
            dataset["FormTable", "parameterReference"].name,
        )
    else:
        concept_columns = (
            dataset["FormTable", "parameterReference"].name,
            concept_column,
        )
    db = DB(dataset)
    db.cache_dataset()
    # required cldf fields of a form
    c_f_id = db.dataset["FormTable", "id"].name
    c_f_language = db.dataset["FormTable", "languageReference"].name
    c_f_form = db.dataset["FormTable", "form"].name
    c_f_value = db.dataset["FormTable", "value"].name
    c_f_concept = db.dataset["FormTable", "parameterReference"].name
    if not match_form:
        match_form = [c_f_form, c_f_language]
    if not db.dataset["FormTable", c_f_concept].separator:
        logger.warning(
            "Your metadata does not allow polysemous forms. According to your specifications, "
            "identical forms with different concepts will always be considered homophones, not a single "
            "polysemous form. To include polysemous forms, add a separator to your FormTable #parameterReference "
            "in the Metadata.json To find potential polysemies, run lexedata.report.list_homophones."
        )
        match_form.append(c_f_concept)
    else:
        if c_f_concept in match_form:
            logger.info(
                "Matching by concept enabled: To find potential polysemies, run lexedata.report.list_homophones."
            )

    sheet_header = get_headers_from_excel(sheet)
    form_header = list(db.dataset["FormTable"].tableSchema.columndict.keys())

    # These columns don't need to be given, we can infer them from the sheet title and from the other data:
    implicit: t.Dict[Literal["languageReference", "id", "value"], str] = {}
    if c_f_language not in sheet_header:
        implicit["languageReference"] = c_f_language
    if c_f_id not in sheet_header:
        implicit["id"] = c_f_id
    if c_f_value not in sheet_header:
        implicit["value"] = c_f_value

    found_columns = set(sheet_header) - {concept_column} - set(implicit.values())
    expected_columns = set(form_header) - {c_f_concept} - set(implicit.values())
    if not found_columns >= expected_columns:
        if ignore_missing:
            logger.info(
                f"Your Excel sheet {sheet.title} is missing columns {expected_columns - found_columns}. "
                f"For the newly imported forms, these columns will be left empty in the dataset."
            )
        else:
            raise ValueError(
                f"Your Excel sheet {sheet.title} is missing columns {expected_columns - found_columns}. "
                f"Clean up your data, or use --ignore-missing-excel-columns to import anyway and leave these "
                f"columns empty in the dataset for the newly imported forms."
            )
    if not found_columns <= expected_columns:
        if ignore_superfluous:
            logger.info(
                f"Your Excel sheet {sheet.title} contained unexpected columns "
                f"{found_columns - expected_columns}. These columns will be ignored."
            )
        else:
            raise ValueError(
                f"Your Excel sheet {sheet.title} contained unexpected columns "
                f"{found_columns - expected_columns}. Clean up your data, or use "
                f"--ignore-superfluous-excel-columns to import the data anyway and ignore these columns."
            )
    # check if language exist
    c_l_name = db.dataset["LanguageTable", "name"].name
    c_l_id = db.dataset["LanguageTable", "id"].name
    language_name_to_language_id = {
        row[c_l_name]: row[c_l_id] for row in db.cache["LanguageTable"].values()
    }
    language_name = normalize_string(sheet.title)
    if language_name in language_name_to_language_id:
        language_id = language_name_to_language_id[language_name]
        report[language_id].is_new_language = False
    else:
        language_id = language_name
        report[language_id].is_new_language = True

    # read new data from sheet
    for form in cli.tq(
        import_data_from_sheet(
            sheet,
            sheet_header=sheet_header,
            implicit=implicit,
            language_id=language_id,
            concept_column=concept_columns,
        ),
        task=f"Parsing cells of sheet {sheet.title}",
        total=sheet.max_row,
    ):
        # if concept not in dataset, don't add form
        try:
            concept_entry = form[c_f_concept]
            entries_to_concepts[concept_entry]
        except KeyError:
            logger.warning(
                f"Concept {concept_entry} was not found. Please add it to the concepts.csv file manually. "
                f"The corresponding form was ignored and not added to the dataset."
            )
            report[language_id].skipped += 1
            continue
        # else, look for candidates, link to existing form or add new form
        for item, value in form.items():
            try:
                sep = db.dataset["FormTable", item].separator
            except KeyError:
                continue
            if sep is None:
                continue
            form[item] = value.split(sep)
        form_candidates = db.find_db_candidates(form, match_form)
        if form_candidates:
            new_concept_added = False
            for form_id in form_candidates:
                logger.info(f"Form {form[c_f_value]} was already in dataset.")

                if db.dataset["FormTable", c_f_concept].separator:
                    for new_concept in form[c_f_concept]:
                        if (
                            new_concept
                            not in db.cache["FormTable"][form_id][c_f_concept]
                        ):
                            db.cache["FormTable"][form_id][c_f_concept].append(
                                new_concept
                            )
                            logger.info(
                                f"New form-concept association: Concept {form[c_f_concept]} was added to existing form "
                                f"{form_id}. If this was not intended "
                                f"(because it is a homophonous form, not a polysemy), "
                                f"you need to manually remove that concept from the old form in forms.csv "
                                f"and create a separate new form. If you want to treat identical forms "
                                f"as homophones in general, add  "
                                f"--match-forms={' '.join(match_form)}, "
                                f"{db.dataset['FormTable', 'parameterReference']} "
                                f"when you run this script."
                            )
                            new_concept_added = True
                break

            if new_concept_added:
                report[language_id].concepts += 1
            else:
                report[language_id].existing += 1
        else:
            # we land here after the break and keep adding existing forms to the dataset just with integer in id +1
            form[c_f_language] = language_id
            if "id" in implicit:
                # TODO: check for type of form id column
                form_concept = form[c_f_concept]
                concept_reference = (
                    form_concept[0] if isinstance(form_concept, list) else form_concept
                )
                form[c_f_id] = string_to_id(f"{form[c_f_language]}_{concept_reference}")
            db.make_id_unique(form)
            if status_update:
                form["Status_Column"] = status_update
            db.insert_into_db(form)
            report[language_id].new += 1
    # write to cldf
    db.write_dataset_from_cache()
    return report
Ejemplo n.º 10
0
    def parse_cells(
        self,
        sheet: openpyxl.worksheet.worksheet.Worksheet,
        status_update: t.Optional[str] = None,
    ) -> None:
        languages = self.parse_all_languages(sheet)
        row_object: t.Optional[R] = None
        for row in cli.tq(
                sheet.iter_rows(min_row=self.top),
                task="Parsing cells",
                total=sheet.max_row - self.top,
        ):
            row_header, row_forms = row[:self.left - 1], row[self.left - 1:]
            # Parse the row header, creating or retrieving the associated row
            # object (i.e. a concept or a cognateset)
            properties = self.properties_from_row(row_header)
            if properties:
                c_r_id = self.db.dataset[properties.__table__, "id"].name
                try:
                    c_r_name = self.db.dataset[properties.__table__,
                                               "name"].name
                except KeyError:
                    c_r_name = None
                similar = self.db.find_db_candidates(properties,
                                                     self.check_for_row_match)
                for row_id in similar:
                    properties[c_r_id] = row_id
                    break
                else:
                    if self.on_row_not_found(
                            properties, cell_identifier=row[0].coordinate):
                        if c_r_id not in properties:
                            properties[c_r_id] = string_to_id(
                                str(properties.get(c_r_name, "")))
                        self.db.make_id_unique(properties)
                        self.db.insert_into_db(properties)
                    else:
                        continue
                # check the fields of properties are not empty, if so, set row
                # object to properties. This means that if there is no
                # properties object, of if it is empty, the previous row object
                # is re-used. This is intentional.
                if any(properties.values()):
                    row_object = properties

            if row_object is None:
                if any(c.value for c in row_forms):
                    raise AssertionError(
                        "Your first data row didn't have a name. "
                        "Please check your format specification or ensure the first row has a name."
                    )
                else:
                    continue
            # Parse the row, cell by cell
            for cell_with_forms in row_forms:
                try:
                    this_lan = languages[cell_with_forms.column]
                except KeyError:
                    continue

                # Parse the cell, which results (potentially) in multiple forms
                if row_object.__table__ == "FormTable":
                    raise NotImplementedError(
                        "TODO: I am confused why what I'm doing right now ever landed on my agenda, but you seem to have gotten me to attempt it. Please contact the developers and tell them what you did, so they can implement the thing you tried to do properly!"
                    )
                    c_f_form = self.db.dataset[row_object.__table__,
                                               "form"].name
                for params in self.cell_parser.parse(
                        cell_with_forms,
                        this_lan,
                        f"{sheet.title}.{cell_with_forms.coordinate}",
                ):
                    if row_object.__table__ == "FormTable":
                        if params[c_f_form] == "?":
                            continue
                        else:
                            self.handle_form(
                                params,
                                row_object,
                                cell_with_forms,
                                this_lan,
                                status_update,
                            )
                    else:
                        self.handle_form(params, row_object, cell_with_forms,
                                         this_lan, status_update)
        self.db.commit()