Example #1
0
def main():
    news_sites = {'m24.ru': M24_accidents,
                  'mosday.ru': Mosday_accidents,
                  'vm.ru': VM_accidents}

    # Инициализируем Наташу
    morph_vocab = MorphVocab()
    extractor = AddrExtractor(morph_vocab)

    # Ищем новости, проверяем на наличие адресов, загружаем
    # во временное хранилище
    news_list = []
    for key in news_sites.keys():
        try:
            ScrapeClass = news_sites.get(key)
            source = ScrapeClass()
            rec = get_news(source, extractor)
            news_list += rec
        except (TypeError):
            print("Источник {} недоступен.".format(key))

    for item in news_list:
        published = item['time'] + ' ' + item['date']
        published = datetime.strptime(published, '%H:%M %d.%m.%Y')

        record = News(
            title=item['title'],
            link=item['link'],
            date_and_time=published,
            text=item['text'],
            address=item['location']['address'],
            street=item['location']['street'],
            lat=item['location']['coordinates'][0],
            lon=item['location']['coordinates'][1]
        )
        record_in_db = News.query.filter_by(link=item['link']).first()
        if record_in_db:
            continue
        else:
            db.session.add(record)
    db.session.commit()

    '''
from natasha import (Segmenter, MorphVocab, NewsEmbedding, NewsMorphTagger,
                     NewsSyntaxParser, NewsNERTagger, PER, NamesExtractor,
                     DatesExtractor, MoneyExtractor, AddrExtractor, Doc)

segmenter = Segmenter()
morph_vocab = MorphVocab()

emb = NewsEmbedding()
morph_tagger = NewsMorphTagger(emb)
syntax_parser = NewsSyntaxParser(emb)
ner_tagger = NewsNERTagger(emb)

names_extractor = NamesExtractor(morph_vocab)
dates_extractor = DatesExtractor(morph_vocab)
money_extractor = MoneyExtractor(morph_vocab)
addr_extractor = AddrExtractor(morph_vocab)

data_path = '../data/'


# train_file='paraphrases.tsv', test_file='paraphrases_gold.tsv'
# train_file='paraphrase_framebank.tsv'
def read_all_data(train_file=data_path + 'paraphrases.tsv',
                  test_file=data_path + 'paraphrases_gold.tsv'):
    with open(train_file, encoding='utf-8') as f:
        reader = csv.DictReader(f, delimiter='\t')
        data = list(reader)
        data = numpy.asarray(data)

        # numpy.random.seed(123)
        # numpy.random.shuffle(data)
Example #3
0
def addr_extractor(morph_vocab):
    return AddrExtractor(morph_vocab)
Example #4
0
    Doc
)
<<<<<<< HEAD
segmenter = Segmenter()
morph_vocab = MorphVocab()

emb = NewsEmbedding()
morph_tagger = NewsMorphTagger(emb)
syntax_parser = NewsSyntaxParser(emb)
ner_tagger = NewsNERTagger(emb)

names_extractor = NamesExtractor(morph_vocab)
dates_extractor = DatesExtractor(morph_vocab)
money_extractor = MoneyExtractor(morph_vocab)
addr_extractor = AddrExtractor(morph_vocab)
=======
>>>>>>> 48053ea7494d126cc00495acaefc6b7b63fe3943

from loguru import logger
from classificator import preproccesor
from clasterizator import main_patrol_func

import sys
sys.path.append('./models/tensorflow1/models/research/object_detection')
from Object_detection_image import get_image


segmenter = Segmenter()
morph_vocab = MorphVocab()