def save_detector_settings_csv( self, detectors_by_value: Dict[str, List[str]]) -> None: detector = DocumentFieldMultilineRegexDetector() detector.document_field = self.document_field df = pd.DataFrame(columns=['value', 'pattern']) df.set_index("pattern", inplace=True) for field_val in detectors_by_value: for include_reg_value in detectors_by_value[field_val]: df = df.append( { 'value': field_val, 'pattern': include_reg_value }, ignore_index=True) df.drop_duplicates(subset='pattern', inplace=True) try: existing = DocumentFieldMultilineRegexDetector.objects.get( document_field_id=self.document_field.uid ) # type: DocumentFieldMultilineRegexDetector except DocumentFieldMultilineRegexDetector.DoesNotExist: detector.csv_content = df.to_csv() detector.update_checksum() detector.save() return # just update CSV content and hashsum if self.drop_previous_field_detectors: existing.csv_content = df.to_csv() existing.update_checksum() existing.save() return # join these options with existing one # overwriting duplicates by detected_value or regexp pattern existing.combine_with_dataframe(df) existing.save()
def save_detector_settings(cls, detector: DocumentFieldMultilineRegexDetector): detector.save()