Beispiel #1
0
def news():
    """Show News Feed"""
    # Get articles
    articles = get_articles()

    # send articles for rendering
    return render_template("news.html", articles=articles)
Beispiel #2
0
if __name__ == '__main__':

    print("Starting...")
    # create client
    es = ElasticsearchClient('10.0.0.35')
    print("Connected")

    # set fetching corresponding time window
    from_ = '2021-02-18T00:00:00.000'
    to_ = '2021-02-18T23:59:00.000'
    timeframe = (from_, to_)

    # get articles
    print("Fetching articles...")
    articles = get_articles(es, timeframe)
    print('Total articles fetched: {}'.format(len(articles)))

    # transform fetched data to df
    data = pd.DataFrame(articles).rename(columns={0: 'datetime', 1: 'article'})
    data = data.join(data['article'].apply(pd.Series))
    cleaned_data = get_scientific(data)

    network = list()
    for index, row in cleaned_data.iterrows():
        extracted_entities = extract_ne(row['text'])
        print('\tFor article with title: `{}` found {} entities'.format(
            row['title'], len(extracted_entities)))

        for entity in extracted_entities:
            pair = {
Beispiel #3
0
        TRAIN_DEVICE = args.device
    else:
        TRAIN_DEVICE = DEFAULT_TRAIN_DEVICE

    torch.manual_seed(SEED)
    device = torch.device(TRAIN_DEVICE)

    theme_folders = helpers.get_article_themes(ARTICLE_FOLDER)

    TEXT = data.Field(sequential=True, tokenize='spacy', batch_first=True)
    THEME = data.LabelField(batch_first=True, use_vocab=False)
    fields = {'text': TEXT}
    for theme in theme_folders:
        fields[theme] = THEME

    df = helpers.get_articles(ARTICLE_FOLDER, theme_folders)

    training_data = helpers.DataFrameDataset(df, fields)

    train_data, valid_data = training_data.split(
        split_ratio=0.9, random_state=random.seed(SEED))

    #initialize glove embeddings
    TEXT.build_vocab(train_data, max_size=1000, vectors='glove.6B.200d')

    train_iter, val_iter = data.BucketIterator.splits(
        (train_data, valid_data),
        batch_sizes=(BATCH_SIZE, BATCH_SIZE),
        device=device,
        sort_key=lambda x: len(x.text),
        sort_within_batch=False,
Beispiel #4
0
def archives(request, tag=None):
    return render_to_response(request, 'ladypenh/archives.html',
                              dict(theme_name=helpers.get_theme(helpers.today()),
                                   articles=helpers.get_articles(helpers.today(), tag),
                                   tags=helpers.get_tags()))