Python DocumentFieldDetector.include_regexps 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: apps.document.models

메소드/함수: include_regexps

hotexamples.com에서의 예제들: 5

Python DocumentFieldDetector.include_regexps - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 apps.document.models.DocumentFieldDetector.include_regexps에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DocumentFieldDetector(6)

detected_value(5)

include_regexps(5)

exclude_regexps(3)

extraction_hint(3)

field(3)

regexps_pre_process_lower(3)

validate_detected_value(3)

category(2)

save(2)

compile_regexps_string(1)

definition_words(1)

document_type(1)

text_part(1)

예제 #1

파일 보기

def apply_simple_config(log: ProcessLogger, document_field: DocumentField,
                        csv: bytes, drop_previous_field_detectors: bool,
                        update_field_choice_values: bool):
    df = pd.read_csv(io.BytesIO(csv), dtype=str)
    if df.shape[0] < 1 or df.shape[1] < 1:
        raise ValueError('Config csv contains no data')
    row_num = df.shape[0]

    if update_field_choice_values:
        choices = df[
            df.columns[0]].dropna().drop_duplicates().sort_values().tolist()
        document_field.choices = '\n'.join(choices)
        document_field.save()

    log.info(
        'Creating {2} naive field detectors for document field {0} and document type {1}...'
        .format(document_field, document_field.document_type, df.shape[0]))
    log.set_progress_steps_number(int(row_num / 10) + 1)
    if drop_previous_field_detectors:
        DocumentFieldDetector.objects.filter(
            field=document_field,
            category=FD_CATEGORY_IMPORTED_SIMPLE_CONFIG).delete()
    for index, row in df.iterrows():
        detector = DocumentFieldDetector()
        detector.category = FD_CATEGORY_IMPORTED_SIMPLE_CONFIG
        detector.field = document_field
        detector.regexps_pre_process_lower = True
        detector.detected_value = row[0]
        detector.include_regexps = '\n'.join(row.dropna()).lower()
        detector.save()
        if index % 10 == 0:
            log.step_progress()
    log.info('Done.')

예제 #2

파일 보기

파일: test_regexps_field_detection.py 프로젝트: francisjervis/lexpredict-contraxsuite

 def make_doc_field_detector(self) -> DocumentFieldDetector:
     detector = DocumentFieldDetector()
     detector.exclude_regexps = 'cushion'
     detector.include_regexps = r'(?<=\D{3,3}\s\D{5,5}\s)\D+'
     detector.detected_value = 'shall'
     detector.extraction_hint = None
     return detector

예제 #3

파일 보기

 def make_doc_field_detector(
         self,
         exclude_regexps: Optional[str] = None,
         include_regexps: Optional[str] = None,
         detected_value: Optional[str] = None) -> DocumentFieldDetector:
     detector = DocumentFieldDetector()
     detector.exclude_regexps = exclude_regexps if exclude_regexps is not None else 'cushion'
     detector.include_regexps = include_regexps if include_regexps is not None else r'(?<=\D{3,3}\s\D{5,5}\s)\D+'
     detector.detected_value = detected_value if detected_value is not None else 'shall'
     detector.extraction_hint = 'detected'
     return detector

예제 #4

파일 보기

파일: test_regexps_field_detection.py 프로젝트: fagan2888/lexpredict-contraxsuite

 def make_doc_field_detector(exclude_regexps: Optional[str] = None,
                             include_regexps: Optional[str] = None,
                             detected_value: Optional[str] = None,
                             regexps_pre_process_lower: bool = True,
                             definition_words: Optional[str] = None) -> DocumentFieldDetector:
     detector = DocumentFieldDetector()
     detector.exclude_regexps = exclude_regexps if exclude_regexps is not None else 'cushion'
     detector.include_regexps = include_regexps if include_regexps is not None else r'(?<=\D{3,3}\s\D{5,5}\s)\D+'
     if detected_value is not None:
         detector.detected_value = detected_value
     detector.extraction_hint = 'TAKE_FIRST'  # 'detected'
     detector.text_part = 'INSIDE_REGEXP'
     detector.regexps_pre_process_lower = regexps_pre_process_lower
     detector.definition_words = definition_words
     return detector

예제 #5

파일 보기

    def save_detector_settings(
            self, detectors_by_value: Dict[str, List[str]]) -> None:
        # save [all pattern: value] records into DocumentFieldMultilineRegexDetector
        if self.save_in_csv_format:
            self.save_detector_settings_csv(detectors_by_value)
            return

        # save patterns as one or more DocumentFieldDetector records
        # but before (optionally) delete old settings
        if self.drop_previous_field_detectors:
            DocumentFieldDetector.objects.filter(
                field=self.document_field,
                category=self.FD_CATEGORY_IMPORTED_SIMPLE_CONFIG).delete()
        for field_val in detectors_by_value:
            include_reg_values = detectors_by_value[field_val]

            detector = DocumentFieldDetector()
            detector.category = self.FD_CATEGORY_IMPORTED_SIMPLE_CONFIG
            detector.field = self.document_field
            detector.regexps_pre_process_lower = True
            detector.detected_value = field_val
            detector.include_regexps = '\n'.join(include_reg_values)
            detector.save()