def get(self, request): username = request.user.username labeled_articles = Article.objects(seen_by=username).count() interesting_articles = Article.objects(interesting_to=username).count() return render( request, "news_selection/done.html", { "labeled_articles": labeled_articles, "interesting_articles": interesting_articles, })
def test_create_with_class_method(): creator = user(screen_name="LANACION") commenter_1 = user() commenter_2 = user() tweet = { "_id": 123456, "text": "Esto es una noticia muy triste", "article": { "title": "Python 2 ya no tiene mantenimiento", "body": "Desde el 1ro de Enero de 2020, Python 2 ya no tiene mantenimiento", "html": "Algo de html", "url": "unaurl" }, "created_at": datetime.utcnow(), "user": creator, "replies": [ comment_tweet(text="Aguante Python 3"), comment_tweet(text="Aguante NodeJS"), ] } art = Article.from_tweet(tweet) art.save() assert len(art.comments) == 2
def test_create_article_with_slug(): art = Article( tweet_id=123, text="This is a tweet", title="This is a unique title", body="This is a detailed explanation of the news", url="http://clarin.com/url", html="algodehtml", created_at=datetime.utcnow() - timedelta(days=1), ) comments = [comment(), comment()] art.comments = comments art.save() art = Article.objects.get(tweet_id=123) assert art.slug is not None
def merge_articles(database="hatespeech-selection"): """ Merges articles with the same name """ client = connect(database) print("Viendo si hay alguno con búsqueda vacía") empty_pars = Article.objects(first_paragraphs=None) for art in tqdm(empty_pars, total=empty_pars.count()): if not art.first_paragraphs: art.save() first_count = Article.objects.count() print(f"Tenemos {first_count} artículos") users = Article.objects.distinct('user') for user in users: distinct_titles = Article.objects(user=user).distinct('title') total_count = Article.objects(user=user).count() print( f"{user:<15} --> {len(distinct_titles):<5} distintos, {total_count} total" ) deleted_articles = 0 for title in tqdm(list(distinct_titles)): articles = Article.objects(title=title, user=user).order_by('created_at') count = articles.count() if count >= 2: first_article = articles[0] for art in articles[1:]: first_article.comments += art.comments art.delete() deleted_articles += 1 first_article.save() print(f"Artículos borrados de {user:<15}: {deleted_articles}")
def test_create_article(): art = Article( tweet_id=12345, text="This is a tweet", title="This is a title", body="This is a detailed explanation of the news", url="http://clarin.com/url", html="algodehtml", created_at=datetime.utcnow() - timedelta(days=1), ) comments = [comment(), comment()] art.comments = comments art.save() art = Article.objects.get(tweet_id=12345) assert len(art.comments) == 2 assert art.comments[0].text == comments[0].text assert art.comments[1].text == comments[1].text
def get(self, request): # TODO: CHANGE THIS username = request.user.username """ We get one random out of the next 100 (at most) articles """ next_articles = Article.next_articles_to_be_labelled(username) num_articles = next_articles.count() if num_articles == 0: return redirect('news_selection:done') to_be_considered = min(next_articles.count(), 100) idx = random.randint(0, to_be_considered - 1) article = next_articles[idx] return redirect('news_selection:label', article.slug)
def test_create_article_with_differents_slug(): art1 = Article( tweet_id=1919, text="This is a tweet", title="My title", body="This is a detailed explanation of the news", url="http://clarin.com/url", html="algodehtml", created_at=datetime.utcnow() - timedelta(days=1), ) art2 = Article( tweet_id=19191, text="This is a tweet", title="My title", url="http://clarin.com/url", html="algodehtml", body="This is a detailed explanation of the news", created_at=datetime.utcnow() - timedelta(days=1), ) art1.save() art2.save() assert art1.slug != art2.slug
def load_replies(database, drop_replies=True): """ Create samples of articles to be labelled Arguments: database: string Name of mongo database drop_groups: boolean (default: True) Whether to drop existing groups of samples num_articles: int (default: 30) Number of articles to sample min_comments: int (default: 20) Minimum number of comments to take it into account """ client = connect(database) if drop_replies: deleted = Reply.objects.delete() print(f"Dropped {deleted} replies") articles = Article.objects(dummy__ne=True) for art in tqdm(articles, total=articles.count()): for comm in art.comments: reply = Reply( article=art, text=comm.text, tweet_id=comm.tweet_id, user_id=comm.user_id, created_at=comm.created_at, ) reply.save()
def create_samples(database, drop_groups=True, drop_articles=True, num_articles=30, min_comments=20, sampled_comments=50, clone_and_sample=False): """ Create samples of articles to be labelled Arguments: database: string Name of mongo database drop_groups: boolean (default: True) Whether to drop existing groups of samples num_articles: int (default: 30) Number of articles to sample min_comments: int (default: 20) Minimum number of comments to take it into account """ client = connect(database) db = client[database] if drop_articles: deleted = Article.objects(dummy=True).delete() print(f"Dropped {deleted} dummy articles") print(f"Number of articles: {Article.objects.count()}") groups = Group.objects if drop_groups: print(f"Dropping {groups.count()} groups") Group.objects.delete() else: print("Not dropping groups ") initial_query = { f"comments__{min_comments-1}__exists": True, } articles = Article.objects(**initial_query).as_pymongo() print(f"Articles with at least {min_comments}: {articles.count()}\n\n") articles = list(articles) for article in articles: hateful_comments = [ c for c in article["comments"] if c["hateful_value"] > 0.5 ] article["num_hateful_comments"] = len(hateful_comments) article["avg_hate_value"] = sum(c["hateful_value"] for c in article["comments"]) / len( article["comments"]) """ Create hateful articles """ thresholded_articles = { k: [art for art in articles if art["avg_hate_value"] > k] for k in [0.15, 0.20, 0.25, 0.30] } random.seed(2020) print("Creating hateful groups") for key, hateful_articles in tqdm(thresholded_articles.items()): #selected_articles = random.sample(hateful_articles, num_articles) selected_articles = hateful_articles selected_articles = Article.objects( id__in=[t["_id"] for t in selected_articles]).order_by('created_at') group_name = f"Comments {key:.2f}" group = create_group(group_name, selected_articles, sampled_comments, clone_and_sample=clone_and_sample) print( f"Created {group.name} group with {len(group.articles)} articles")