NEWS_EMBEDDING = os.path.join(sys._MEIPASS, "navec_news_v1_1B_250K_300d_100q.tar") NEWS_MORPH = os.path.join(sys._MEIPASS, "slovnet_morph_news_v1.tar") NEWS_SYNTAX = os.path.join(sys._MEIPASS, "slovnet_syntax_news_v1.tar") NEWS_NER = os.path.join(sys._MEIPASS, "slovnet_ner_news_v1.tar") DICTS = os.path.join(sys._MEIPASS, "dicts") else: NEWS_EMBEDDING = os.path.join("navec_news_v1_1B_250K_300d_100q.tar") NEWS_MORPH = os.path.join("slovnet_morph_news_v1.tar") NEWS_SYNTAX = os.path.join("slovnet_syntax_news_v1.tar") NEWS_NER = os.path.join("slovnet_ner_news_v1.tar") DICTS = "dicts" emb = NewsEmbedding(path=NEWS_EMBEDDING) morph_tagger = NewsMorphTagger(emb, path=NEWS_MORPH) segmenter = Segmenter() syntax_parser = NewsSyntaxParser(emb, path=NEWS_SYNTAX) ner_tagger = NewsNERTagger(emb, path=NEWS_NER) NARRATOR = -1 DETPRON = { "Fem": { '3': ["ее", "её"], '1': [ 'мой', 'моя', 'моё', 'мое', 'мои', 'моего', 'моей', 'моих', 'моему', 'моим', 'мою', 'моим', 'моею', 'моими', 'моем', 'моём' ] }, "Masc": { '3': ['его'], '1': [
def segmenter(): return Segmenter()
def Main(docType, text): status = 1 res = {} segmenter = Segmenter() emb = NewsEmbedding() morph_tagger = NewsMorphTagger(emb) syntax_parser = NewsSyntaxParser(emb) ner_tagger = NewsNERTagger(emb) morph_vocab = MorphVocab() names_extractor = NamesExtractor(morph_vocab) money_extractor = MoneyExtractor(morph_vocab) doc = Doc(text) doc.segment(segmenter) doc.tag_morph(morph_tagger) doc.parse_syntax(syntax_parser) doc.tag_ner(ner_tagger) for span in doc.spans: span.normalize(morph_vocab) #для судебного приказа if docType == 'coast': #фио for span in doc.spans: if span.type == PER: span.extract_fact(names_extractor) x = [_.fact.as_dict for _ in doc.spans if _.type == PER] if x: res['ФИО'] = x else: status = 0 #инн y = myextractors.findINN(text) if y: res['ИНН'] = y else: status = 0 #номер судебного приказа y = myextractors.findNCOASTCASE(text) if y: res['номер судебного приказа'] = y else: status = 0 #дата с п y = myextractors.findDATECOAST(text) if y: res['дата судебного приказа'] = y else: status = 0 #организации y = [] for span in doc.spans: if span.type == ORG: d = {} d['name'] = span.text y = y + [d] if y: res['организации'] = y else: status = 0 #для письма if docType == 'mail': #фио for span in doc.spans: if span.type == PER: span.extract_fact(names_extractor) x = [_.fact.as_dict for _ in doc.spans if _.type == PER] if x: res['ФИО'] = x else: status = 0 #инн y = myextractors.findINN(text) if y: res['ИНН'] = y else: status = 0 #номер дог y = myextractors.findNCONTRACT(text) if y: res['номер договора'] = y else: status = 0 #дата дог y = myextractors.findDATECONT(text) if y: res['дата договора'] = y else: status = 0 #для платежного поручения if docType == 'order': #фио for span in doc.spans: if span.type == PER: span.extract_fact(names_extractor) x = [_.fact.as_dict for _ in doc.spans if _.type == PER] if x: res['ФИО'] = x else: status = 0 #инн y = myextractors.findINN(text) if y: res['ИНН'] = y else: status = 0 #организации y = [] for span in doc.spans: if span.type == ORG: d = {} d['name'] = span.text y = y + [d] if y: res['организации'] = y else: status = 0 #номер дог y = myextractors.findNCONTRACT(text) if y: res['номер договора'] = y else: status = 0 #дата дог y = myextractors.findNCONTRACT(text) if y: res['номер договора'] = y else: status = 0 #сумма matches = list(money_extractor(text)) y = [_.fact for _ in matches] ret = [] for i in y: z = {} z['amount'] = i.amount z['currency'] = i.currency ret = ret + [z] if ret: res['сумма'] = ret else: status = 0 returning = {} if status == 1: returning['status'] = 'успех' else: returning['status'] = 'не успех' returning['entities'] = res return returning