Beispiel #1
0
    NEWS_EMBEDDING = os.path.join(sys._MEIPASS,
                                  "navec_news_v1_1B_250K_300d_100q.tar")
    NEWS_MORPH = os.path.join(sys._MEIPASS, "slovnet_morph_news_v1.tar")
    NEWS_SYNTAX = os.path.join(sys._MEIPASS, "slovnet_syntax_news_v1.tar")
    NEWS_NER = os.path.join(sys._MEIPASS, "slovnet_ner_news_v1.tar")
    DICTS = os.path.join(sys._MEIPASS, "dicts")
else:
    NEWS_EMBEDDING = os.path.join("navec_news_v1_1B_250K_300d_100q.tar")
    NEWS_MORPH = os.path.join("slovnet_morph_news_v1.tar")
    NEWS_SYNTAX = os.path.join("slovnet_syntax_news_v1.tar")
    NEWS_NER = os.path.join("slovnet_ner_news_v1.tar")
    DICTS = "dicts"

emb = NewsEmbedding(path=NEWS_EMBEDDING)
morph_tagger = NewsMorphTagger(emb, path=NEWS_MORPH)
segmenter = Segmenter()
syntax_parser = NewsSyntaxParser(emb, path=NEWS_SYNTAX)
ner_tagger = NewsNERTagger(emb, path=NEWS_NER)
NARRATOR = -1

DETPRON = {
    "Fem": {
        '3': ["ее", "её"],
        '1': [
            'мой', 'моя', 'моё', 'мое', 'мои', 'моего', 'моей', 'моих',
            'моему', 'моим', 'мою', 'моим', 'моею', 'моими', 'моем', 'моём'
        ]
    },
    "Masc": {
        '3': ['его'],
        '1': [
def segmenter():
    return Segmenter()
Beispiel #3
0
def Main(docType, text):
    status = 1
    res = {}

    segmenter = Segmenter()

    emb = NewsEmbedding()
    morph_tagger = NewsMorphTagger(emb)
    syntax_parser = NewsSyntaxParser(emb)
    ner_tagger = NewsNERTagger(emb)
    morph_vocab = MorphVocab()

    names_extractor = NamesExtractor(morph_vocab)
    money_extractor = MoneyExtractor(morph_vocab)

    doc = Doc(text)
    doc.segment(segmenter)
    doc.tag_morph(morph_tagger)
    doc.parse_syntax(syntax_parser)
    doc.tag_ner(ner_tagger)

    for span in doc.spans:
        span.normalize(morph_vocab)

    #для судебного приказа
    if docType == 'coast':
        #фио
        for span in doc.spans:
            if span.type == PER:
                span.extract_fact(names_extractor)
        x = [_.fact.as_dict for _ in doc.spans if _.type == PER]
        if x:
            res['ФИО'] = x
        else:
            status = 0
        #инн
        y = myextractors.findINN(text)
        if y:
            res['ИНН'] = y
        else:
            status = 0
        #номер судебного приказа
        y = myextractors.findNCOASTCASE(text)
        if y:
            res['номер судебного приказа'] = y
        else:
            status = 0
        #дата с п
        y = myextractors.findDATECOAST(text)
        if y:
            res['дата судебного приказа'] = y
        else:
            status = 0
        #организации
        y = []
        for span in doc.spans:
            if span.type == ORG:
                d = {}
                d['name'] = span.text
                y = y + [d]
        if y:
            res['организации'] = y
        else:
            status = 0

    #для письма
    if docType == 'mail':
        #фио
        for span in doc.spans:
            if span.type == PER:
                span.extract_fact(names_extractor)
        x = [_.fact.as_dict for _ in doc.spans if _.type == PER]
        if x:
            res['ФИО'] = x
        else:
            status = 0
        #инн
        y = myextractors.findINN(text)
        if y:
            res['ИНН'] = y
        else:
            status = 0
        #номер дог
        y = myextractors.findNCONTRACT(text)
        if y:
            res['номер договора'] = y
        else:
            status = 0
        #дата дог
        y = myextractors.findDATECONT(text)
        if y:
            res['дата договора'] = y
        else:
            status = 0

    #для платежного поручения
    if docType == 'order':
        #фио
        for span in doc.spans:
            if span.type == PER:
                span.extract_fact(names_extractor)
        x = [_.fact.as_dict for _ in doc.spans if _.type == PER]
        if x:
            res['ФИО'] = x
        else:
            status = 0
        #инн
        y = myextractors.findINN(text)
        if y:
            res['ИНН'] = y
        else:
            status = 0
        #организации
        y = []
        for span in doc.spans:
            if span.type == ORG:
                d = {}
                d['name'] = span.text
                y = y + [d]
        if y:
            res['организации'] = y
        else:
            status = 0
        #номер дог
        y = myextractors.findNCONTRACT(text)
        if y:
            res['номер договора'] = y
        else:
            status = 0
        #дата дог
        y = myextractors.findNCONTRACT(text)
        if y:
            res['номер договора'] = y
        else:
            status = 0
        #сумма
        matches = list(money_extractor(text))
        y = [_.fact for _ in matches]
        ret = []
        for i in y:
            z = {}
            z['amount'] = i.amount
            z['currency'] = i.currency
            ret = ret + [z]
        if ret:
            res['сумма'] = ret
        else:
            status = 0

    returning = {}

    if status == 1:
        returning['status'] = 'успех'
    else:
        returning['status'] = 'не успех'

    returning['entities'] = res
    return returning