コード例 #1
0
 def get_values(
         self, doc: Document,
         text: str) -> List[Tuple[Any, Optional[int], Optional[int]]]:
     res = get_employee_name(text)
     return [(res, 0, len(text))] if res else None
コード例 #2
0
 def find_value_in_text_unit(self, log: ProcessLogger, field: DocumentField, doc: Document, text_unit: TextUnit) -> \
         Tuple[bool, Any]:
     res = get_employee_name(text_unit.text)
     return res is not None, res
コード例 #3
0
    def parse_document_for_employee(document_id: int, no_detect: bool,
                                    task_id):

        detect = not no_detect
        document = Document.objects.get(pk=document_id)

        log('Process employment document: #{}. {}'.format(
            document_id, document.name),
            task=task_id)

        if detect and not is_employment_doc(document.full_text
                                            or document.text):
            log('Not an employment document: #{}. {}'.format(
                document_id, document.name),
                task=task_id)
            return

        employee_dict = {}
        provisions = []

        for t in TextUnit.objects.filter(document_id=document_id,
                                         unit_type="paragraph").all():
            paragraph_text = t.text
            # skip if all text in uppercase
            if paragraph_text == paragraph_text.upper():
                continue
            try:
                sentences = segment_sentences(paragraph_text)
            except:
                #accept the paragraph is a sentence if segmenter errors out.
                sentences = [paragraph_text]
            for text in sentences:

                # clean
                text = text.replace('[', '(').replace(']', ')')

                # get values not yet found. This logic assumes only one of each
                # of these values found per document.
                # if there is more than one it will only pick up the first (except effective date)
                if employee_dict.get('name') is None:
                    employee_dict['name'] = get_employee_name(text)
                if employee_dict.get('employer') is None:
                    employee_dict['employer'] = get_employer_name(text)
                if employee_dict.get('annual_salary') is None:
                    get_salary_result = get_salary(text)
                    if get_salary_result is not None:
                        employee_dict['annual_salary'] = get_salary_result[0][
                            0] * get_salary_result[1]
                        employee_dict['salary_currency'] = get_salary_result[
                            0][1]
                if employee_dict.get('effective_date') is None:
                    employee_dict['effective_date'] = get_effective_date(text)
                if employee_dict.get('vacation') is None:
                    get_vacation_result = get_vacation_duration(text)
                    if get_vacation_result is not None:
                        yearly_amount = get_vacation_result[0][
                            1] * get_vacation_result[1]
                        employee_dict['vacation'] = str(
                            yearly_amount) + " " + str(
                                get_vacation_result[0][0]) + "s"
                if employee_dict.get('governing_geo') is None:
                    employee_dict['governing_geo'] = get_governing_geo(text)

            non_compete_similarity = get_similar_to_non_compete(text)
            if non_compete_similarity > .5:
                provisions.append({
                    "text_unit": t.id,
                    "similarity": non_compete_similarity,
                    "type": "noncompete"
                })

                termination_similarity = get_similar_to_termination(text)
                if termination_similarity > .5:
                    provisions.append({
                        "text_unit": t.id,
                        "similarity": termination_similarity,
                        "type": "termination"
                    })

                benefits_similarity = get_similar_to_benefits(text)
                if benefits_similarity > .5:
                    provisions.append({
                        "text_unit": t.id,
                        "similarity": benefits_similarity,
                        "type": "benefits"
                    })
                severance_similarity = get_similar_to_severance(text)
                if severance_similarity > .5:
                    provisions.append({
                        "text_unit": t.id,
                        "similarity": severance_similarity,
                        "type": "severance"
                    })

        employee = employer = None
        # create Employee only if his/her name exists
        if employee_dict.get('name') is not None:
            employee, ee_created = Employee.objects.get_or_create(
                name=employee_dict['name'],
                annual_salary=employee_dict.get('annual_salary'),
                salary_currency=employee_dict.get('salary_currency'),
                effective_date=employee_dict.get('effective_date'),
                vacation_yearly=employee_dict.get('vacation'),
                governing_geo=employee_dict.get('governing_geo'),
                document=Document.objects.get(pk=document_id))

        if len(provisions) > 0 and employee is not None:
            noncompete_found = termination_found = \
                severance_found=benefits_found = False

            for i in provisions:
                if i["type"] == "noncompete":
                    noncompete_found = True
                else:
                    if i["type"] == "termination":
                        termination_found = True
                    else:
                        if i["type"] == "benefits":
                            benefits_found = True
                        else:
                            if i["type"] == "severance":
                                severance_found = True
                Provision.objects.get_or_create(
                    text_unit=TextUnit.objects.get(pk=i["text_unit"]),
                    similarity=i["similarity"],
                    employee=employee,
                    document=Document.objects.get(pk=document_id),
                    type=i["type"])
            employee.has_noncompete = noncompete_found
            employee.has_termination = termination_found
            employee.has_benefits = benefits_found
            employee.has_severance = severance_found
            employee.save()

        # create Employer
        if employee and employee_dict.get('employer') is not None:
            employer, er_created = Employer.objects.get_or_create(
                name=employee_dict['employer'])

        if employee and employer and not employee.employer:
            employee.employer = employer
            employee.save()
コード例 #4
0
 def get_values(self, text: str) -> List[Tuple[Any, int, int]]:
     res = get_employee_name(text)
     return [(res, 0, len(text))] if res else None