コード例 #1
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            # this function get's hopped and called later
            # Attention: This is a special case which get's called like:
            # lines, lines, self.index_field, feats, len(lines), file_info,None
            file_info = prev_line
            dbname = file_info.dbname
            current_year = int(dbname)
            selected_start_index = None

            if current_year >= 1960:
                # get the first lines which can resemble the title
                for index, value in enumerate(line_index):
                    if value is not False:
                        break
                    selected_line = line_text[index]
                    selected_text = selected_line['text'].strip(",.; ")
                    if selected_text != "":
                        selected_start_index = index
                        break
            else:
                # just take last line in early years
                selected_start_index = len(line_index) - 1

            if selected_start_index is not None:
                placeholder_match, errors = regu.fuzzy_search(r"", "")
                self.do_match_work(True, placeholder_match,
                                   selected_start_index, 0)
                return True

            return False
コード例 #2
0
 def match_start_condition(self, line, line_text, line_index, features,
                           num_lines, prev_line, combined_texts):
     match_start, errors = regu.fuzzy_search(
         r"^Hauptsitz\s?:|^Sitz\s?:", line_text)
     if match_start is not None:
         self.do_match_work(True, match_start, line_index, errors)
         return True
コード例 #3
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Verwaltung:?|Verw\.\s?):", line_text, err_number=0)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #4
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Aufsichtsrat|Kontrollstelle)\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #5
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Vorstand|Verwaltungsrat|Verwaltungsbeirat)\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #6
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(?:Fernschreiber|Telex)\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #7
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^Geschäfts(inhaber|leitung)\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #8
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^Aus.+konsolidiert.+Bilanzen\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #9
0
        def match_stop_condition(self, line, line_text, line_index, features,
                                 num_lines, prev_line, combined_texts):
            match_stop, errors = regu.fuzzy_search(r"^Geschäftsjahr\s?:",
                                                   line_text)

            if match_stop is not None:
                self.do_match_work(False, match_stop, line_index, errors)
                return True
コード例 #10
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(r"(^Niederlassungen\s?:)",
                                                    line_text,
                                                    err_number=1)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #11
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):

            match_start, errors = regu.fuzzy_search(
                r"^Kommandite.+und.+Bank.+:", combined_texts)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #12
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            # matches ss or ß (group is not capturing)
            match_start, errors = regu.fuzzy_search(
                r"^Aus (der|den) Gewinn- und Verlust- ", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #13
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Stimmrecht der Aktien|Stimmrecht d\.[.\s]*Aktien.+)\s?:",
                line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #14
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            # matches ss or ß (group is not capturing)
            match_start, errors = regu.fuzzy_search(
                r"^Dividenden(?:.+)aktien\s?:", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #15
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"Aus.+konsolidiert.+(?:G|g)ewinn.+(?:V|v)erlustrechnungen",
                combined_texts)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #16
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"(^Zahlstellen|^Hinterlegungs\- u(nd|\.)\s?Zahlstellen|^Zahlstellen\sbzw.\sHinterlegungsstellen)\s?:",
                line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #17
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"(bung der Tochtergesellschaften|^Tochtergesellschaften\sund\ssonstige\sBeteiligungen|^Hauptlagerplatz)\s?:",
                line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #18
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Persönlich\shaftender\s)?(Gesellschafter|schafter)\s?:",
                line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #19
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Es werden erzeugt|Erzeugnisse|Gegenstand\sdes\sUnternehmens|Produktionsprogramm)\s?:",
                line_text,
                err_number=1)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #20
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            # matches ss or ß (group is not capturing)
            match_start, errors = regu.fuzzy_search(
                r"^((Gro(?:ss|ß)aktionär(?:\s?|e\s?))|Aktionäre?)\s?:",
                line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #21
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Betriebsanlagen|Betriebsgesellschaften|Vertriebsgesellschaften|Besitzangaben)\s?:",
                line_text,
                err_number=1)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #22
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Werke in|Werke\s?:|Betriebsstätten\s?:|Eigenwerke\s?:|Zechen\s?:)",
                line_text,
                err_number=1)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #23
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            # matches ss or ß (group is not capturing)
            #match_start, errors = regu.fuzzy_search(r"Aktienkurse\s?.*:", "Aktienkurse (Düsseldorf):")

            match_start, errors = regu.fuzzy_search(r"Aktienkurse\s?.*:",
                                                    line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #24
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):

            # reduced error number to prevent confusion with "Beteiligung:"
            match_bet, errors = regu.fuzzy_search(
                r"(((?:Namhafte|Wesentliche|Maßgebliche|Wichtigste|Sonstige|Direkte)\s?Beteiligung(en)?)|\s?Beteiligung(en)?)\s?:",
                line_text,
                err_number=0)
            if match_bet is not None:
                self.do_match_work(True, match_bet, line_index, errors)
                return True
コード例 #25
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(?:Fernruf|Telefon)\s?:", line_text)

            # if "Kupferberg" in combined_texts:
            #    print("asd")

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #26
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"(^Zweigniederlassungen und Büros\s?:|"
                r"^Zweigniederlassungen in\s?:?|"
                r"^Zweigniederlassungen\s?:|"
                r"^Vertreten\sin\s:|"
                r"^Hauptverwaltung\s?:)", line_text)

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #27
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(r"^Grundkapital\s?:",
                                                    line_text,
                                                    err_number=0)

            if match_start is not None:
                if "Bezugsrechte:" in combined_texts:
                    return False  # this is a special case

                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #28
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            #if "Bezugsrechtabschläge insgesamt" in line_text:
            #   return False # false positive, abort
            # nd Berichtigungsaktien
            regex_string = r"(^Bezugsrechtabschläge insgesamt\s?:|^Umtauschrechte\s?:|^Berichtigunsaktien\s?:|^Bezugsrechte und Berichtigungsaktien\s?:|^Bezugsrechte\s?:)"
            match_start, errors = regu.fuzzy_search(regex_string,
                                                    combined_texts)

            if match_start is not None:
                match_line, errors_2 = regu.fuzzy_search(
                    regex_string, line_text)

                # if the current line contains match index current line
                if match_line:
                    pass_index = line_index
                else:
                    # if combination with previous text contains info match previous index
                    pass_index = line_index - 1

                self.do_match_work(True, match_start, pass_index, errors)
                return True
コード例 #29
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):

            match_start, errors = regu.fuzzy_search(
                r"Rechte.+Vorzugs.*(?:a|A)ktien.*:",
                combined_texts,
                err_number=1)
            # mismatch: 'rechtslose Vorzugsaktien. Aktienkurse:' with e2
            # match: 'Besondere Rechte der an der Börse Hamburg gehandelten Vorzugs-Aktien:' with e0
            # match: 'Besondere Rechte der Vorzugsaktien:' with e0

            if match_start is not None:
                self.do_match_work(True, match_start, line_index, errors)
                return True
コード例 #30
0
        def match_start_condition(self, line, line_text, line_index, features,
                                  num_lines, prev_line, combined_texts):
            match_start, errors = regu.fuzzy_search(
                r"^(Aktionärvertreter)\s?:", line_text)

            # this is a possible false positive for above regex
            #match_wrong, errors = regu.fuzzy_search(r"^Aktionären", line_text, err_number=1)

            if match_start is not None:
                match_text = match_start.group()
                if "Aktionären" in match_text:
                    return

                self.do_match_work(True, match_start, line_index, errors)
                return True