def form_to_cell_value(self, form: types.Form) -> str: """Build a string describing the form itself Provide the best transcription and all translations of the form strung together. """ transcription = self.get_best_transcription(form) translations = [] suffix = "" # TODO: Use CLDF terms instead of column names, like the c_ elsewhere if form.get("Comment"): suffix = f" {WARNING:}" # corresponding concepts – TODO: distinguish between list data type # (multiple concepts) and others (single concept) c_concept = self.dataset["FormTable", "parameterReference"].name translations.append(form[c_concept]) return "{:} ‘{:}’{:}".format(transcription, ", ".join(translations), suffix)
def form_to_cell_value(self, form: types.Form) -> str: """Build a string describing the form itself Provide the best transcription and all translations of the form strung together. >>> ds = util.fs.new_wordlist(FormTable=[], CognatesetTable=[], CognateTable=[]) >>> E = ExcelWriter(dataset=ds) >>> E.form_to_cell_value({"form": "f", "parameterReference": "c"}) 'f ‘c’' >>> E.form_to_cell_value( ... {"form": "f", "parameterReference": "c", "formComment": "Not empty"}) 'f ‘c’ ⚠' >>> E.form_to_cell_value( ... {"form": "fo", "parameterReference": "c", "segments": ["f", "o"]}) '{ f o } ‘c’' >>> E.form_to_cell_value( ... {"form": "fo", ... "parameterReference": "c", ... "segments": ["f", "o"], ... "segmentSlice": ["1:1"]}) '{ f }o ‘c’' TODO: This function should at some point support alignments, so that the following call will return '{ - f - }o ‘c’' instead. >>> E.form_to_cell_value( ... {"form": "fo", ... "parameterReference": "c", ... "segments": ["f", "o"], ... "segmentSlice": ["1:1"], ... "alignment": ["", "f", ""]}) '{ f }o ‘c’' """ segments = form.get("segments") if not segments: transcription = form["form"] else: transcription = "" # TODO: use CLDF property instead of column name included_segments: t.Iterable[int] try: included_segments = set( parse_segment_slices(form["segmentSlice"], enforce_ordered=True)) except TypeError: self.logger.warning( "In judgement %s, for form %s, there was no segment slice. I will use the whole form.", form["cognateReference"], form["id"], ) included_segments = range(len(form["segments"])) except KeyError: included_segments = range(len(form["segments"])) except ValueError: # What if segments overlap or cross? Overlap shouldn't happen, # but we don't check here. Crossing might happen, but this # serialization cannot reflect it, so we enforce order, # expecting that an error message here will be more useful than # silently messing with data. If the check fails, we take the # whole segment and warn. self.logger.warning( "In judgement %s, for form %s, segment slice %s is invalid. I will use the whole form.", form["cognateReference"], form["id"], ",".join(form["segmentSlice"]), ) included_segments = range(len(form["segments"])) included = False for i, s in enumerate(segments): if included and i not in included_segments: transcription += " }" + s included = False elif not included and i in included_segments: transcription += "{ " + s included = True elif i in included_segments: transcription += " " + s else: transcription += s if included: transcription += " }" transcription = transcription.strip() translations = [] suffix = "" try: if form.get("formComment"): suffix = f" {WARNING:}" except (KeyError): pass # corresponding concepts # (multiple concepts) and others (single concept) if isinstance(form["parameterReference"], list): for f in form["parameterReference"]: translations.append(f) else: translations.append(form["parameterReference"]) return "{:} ‘{:}’{:}".format(transcription, ", ".join(translations), suffix)