Beispiel #1
0
    def save_detector_settings_csv(
            self, detectors_by_value: Dict[str, List[str]]) -> None:
        detector = DocumentFieldMultilineRegexDetector()
        detector.document_field = self.document_field
        df = pd.DataFrame(columns=['value', 'pattern'])
        df.set_index("pattern", inplace=True)
        for field_val in detectors_by_value:
            for include_reg_value in detectors_by_value[field_val]:
                df = df.append(
                    {
                        'value': field_val,
                        'pattern': include_reg_value
                    },
                    ignore_index=True)
        df.drop_duplicates(subset='pattern', inplace=True)

        try:
            existing = DocumentFieldMultilineRegexDetector.objects.get(
                document_field_id=self.document_field.uid
            )  # type: DocumentFieldMultilineRegexDetector
        except DocumentFieldMultilineRegexDetector.DoesNotExist:
            detector.csv_content = df.to_csv()
            detector.update_checksum()
            detector.save()
            return

        # just update CSV content and hashsum
        if self.drop_previous_field_detectors:
            existing.csv_content = df.to_csv()
            existing.update_checksum()
            existing.save()
            return

        # join these options with existing one
        # overwriting duplicates by detected_value or regexp pattern
        existing.combine_with_dataframe(df)
        existing.save()
 def save_detector_settings(cls,
                            detector: DocumentFieldMultilineRegexDetector):
     detector.save()