Beispiel #1
0
    def analyse_header(self, s, sentences, index):

        isEnd = False

        if self.set_contract_date(s, index):
            if self._is_society_employer_sentence(s):
                self._add_society_employer(s)
        elif not self._title and self._is_title(s):
            self._title = s
            ''' add to extraction '''
            crf_utils.add_clause_string(self.CONTRACT_NAME, self._title,
                                        self._results)

        elif self._is_society_employer_sentence(s):
            index = self._add_society_employer_sentences(sentences, index)

        elif self._is_society(s):
            index = self._add_society_info(sentences, index)

        elif self._is_employer(s):
            index = self._add_employer_info(sentences, index)

        elif not self._has_table_contents and self.is_table_contents(s):
            index = self.pass_table_contents(sentences, (index + 1))

        elif self.is_end_header(s, sentences, (index + 1)):
            isEnd = True

        return isEnd, index
Beispiel #2
0
    def _parse_output1(self, output, results):
        sentence = ''
        for s in output:
            s = s.decode('utf-8')
            clause_name = ''
            sentences = {}
            for line in s.splitlines():
                if line.strip():
                    pieces = line.split('\t')
                    key = pieces[-1]
                    n = 1
                    clause_name = crf_utils.get_tagging_name(
                        key, self.keywords, self._focus)
                    if (clause_name in sentences.keys()):
                        n += sentences[clause_name]
                    sentences[clause_name] = n
                    sentence += pieces[0]
                else:
                    clause_name = self._get_clause_name_for_line(sentences)
                    crf_utils.add_clause_string(clause_name, sentence, results)
                    sentences = {}
                    sentence = ''

            if len(sentence) > 0:
                clause_name = self._get_clause_name_for_line(sentences)
                crf_utils.add_clause_string(clause_name, sentence, results)
Beispiel #3
0
 def set_contract_date(self, s, index):
     if not self._contract_date:
         self._contract_date = self.get_contract_time(s, index)
         if self._contract_date:
             ''' add to extraction '''
             crf_utils.add_clause_string(self.CONTRACT_DATE,
                                         self._contract_date, self._results)
             return True
     return False
Beispiel #4
0
 def _add_employer_info(self, sentences, index):
     s = sentences[index]
     self.employer = [s]
     crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
     index += 1
     while s:
         s = sentences[index]
         if self._is_employer_info(s):
             self.employer.append(s)
             crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
         else:
             index -= 1
             break
         index += 1
     return index
 def _add_assignee(self, s, sentences, index):
     if len(s) < 6 :
         d = 1
     else :
         d = 0
     ss = sentences[index+d]
     self.assignee = [ss]
     crf_utils.add_clause_string(self.CONTRACT_AB, ss, self._results)
     d += 1
     for i in range(d, 5) :
         ss = sentences[index + i]
         if self._is_transfor_info(ss) :
             self.assignee.append(ss)
         else:
             break
Beispiel #6
0
    def _add_society_employer_sentences(self, sentences, index):
        self.employer = []
        self.society = []

        index += 1
        while True:
            s = sentences[index]
            if self._is_employer_sentence(s):
                self.employer.append(s)
                crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
            elif self._is_society_sentence(s):
                self.society.append(s)
                crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
            else:
                break
            index += 1

        return index
    def analyse_header(self, s, sentences, index):
        isEnd = False
        if self.set_contract_date(s, index) : 
            pass
        elif not self._title and self._find_in_info(s, self.TITLE_NAME):
            self._title = s
            crf_utils.add_clause_string(self.CONTRACT_NAME, self._title, self._results)
        elif not self.transfor and self._is_transfor(s) :
            self._add_transfor(s, sentences, index)
            
        elif not self.assignee and self._is_assignee(s) : 
            self._add_assignee(s, sentences, index)
        
        elif not self._has_table_contents and self.is_table_contents(s) :
            index = self.pass_table_contents(sentences, (index+1))

        elif self.is_end_header(s) :
            isEnd = True
            
        return isEnd, index
Beispiel #8
0
    def _parse_output(self, document, results):
        sentence = ''
        clause_name = ''
        sentences = {}
        for line in document.splitlines():
            if line.strip():
                n = 1
                pieces = line.split('\t')
                key = pieces[-1]
                clause_name = crf_utils.get_tagging_name(
                    key, self.dtn_doc._keywords, self.dtn_doc._focus_points)
                if (clause_name in sentences.keys()):
                    n += sentences[clause_name]
                sentences[clause_name] = n
                sentence += pieces[0]
            else:
                clause_name = self._get_clause_name_for_line(sentences)
                crf_utils.add_clause_string(clause_name, sentence, results)
                sentences = {}
                sentence = ''

        if len(sentence) > 0:
            clause_name = self._get_clause_name_for_line(sentences)
            crf_utils.add_clause_string(clause_name, sentence, results)
Beispiel #9
0
    def _add_society_employer(self, sentence):
        start = sentence.find(self.SOC_START)
        if start < 0:
            start = 0
        end = sentence.find(self.EMP_START)
        if end < 0:
            end = len(sentence) - 1
        nb = sentence.find(self.TIME_START)
        if nb < end:
            nb = len(sentence) - 1

        soc = sentence[start + 1:end]
        tab = soc.split(',')
        self.society = []
        if tab and len(tab):
            for s in tab:
                s = s.strip()
                if len(s) > 0:
                    self.society.append(s)
                    crf_utils.add_clause_string(self.CONTRACT_AB, s,
                                                self._results)
        else:
            self.society.append(soc)
            crf_utils.add_clause_string(self.CONTRACT_AB, soc, self._results)

        emp = sentence[end + 1:nb]
        tab = emp.split(',')
        self.employer = []
        if tab and len(tab):
            for s in tab:
                s = s.strip()
                if len(s) > 0:
                    self.employer.append(s)
                    crf_utils.add_clause_string(self.CONTRACT_AB, s,
                                                self._results)
        else:
            self.employer.append(emp)
            crf_utils.add_clause_string(self.CONTRACT_AB, emp, self._results)
Beispiel #10
0
 def _add_society_info(self, sentences, index):
     s = sentences[index]
     self.society = [s]
     crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
     index += 1
     prev = s
     while s:
         s = sentences[index]
         if self._is_society_info(s):
             self.society.append(s)
             crf_utils.add_clause_string(self.CONTRACT_AB, s, self._results)
         else:
             if prev.endswith(':') or prev.endswith(':'):
                 self.society.append(s)
                 crf_utils.add_clause_string(self.CONTRACT_AB, s,
                                             self._results)
             else:
                 index -= 1
                 break
         prev = s
         index += 1
     return index