def get(self, request):
        username = request.user.username

        labeled_articles = Article.objects(seen_by=username).count()
        interesting_articles = Article.objects(interesting_to=username).count()

        return render(
            request, "news_selection/done.html", {
                "labeled_articles": labeled_articles,
                "interesting_articles": interesting_articles,
            })
Exemple #2
0
def test_create_with_class_method():
    creator = user(screen_name="LANACION")

    commenter_1 = user()
    commenter_2 = user()

    tweet = {
        "_id":
        123456,
        "text":
        "Esto es una noticia muy triste",
        "article": {
            "title": "Python 2 ya no tiene mantenimiento",
            "body":
            "Desde el 1ro de Enero de 2020, Python 2 ya no tiene mantenimiento",
            "html": "Algo de html",
            "url": "unaurl"
        },
        "created_at":
        datetime.utcnow(),
        "user":
        creator,
        "replies": [
            comment_tweet(text="Aguante Python 3"),
            comment_tweet(text="Aguante NodeJS"),
        ]
    }
    art = Article.from_tweet(tweet)

    art.save()

    assert len(art.comments) == 2
Exemple #3
0
def test_create_article_with_slug():
    art = Article(
        tweet_id=123,
        text="This is a tweet",
        title="This is a unique title",
        body="This is a detailed explanation of the news",
        url="http://clarin.com/url",
        html="algodehtml",
        created_at=datetime.utcnow() - timedelta(days=1),
    )
    comments = [comment(), comment()]

    art.comments = comments

    art.save()

    art = Article.objects.get(tweet_id=123)
    assert art.slug is not None
def merge_articles(database="hatespeech-selection"):
    """
    Merges articles with the same name
    """

    client = connect(database)

    print("Viendo si hay alguno con búsqueda vacía")

    empty_pars = Article.objects(first_paragraphs=None)
    for art in tqdm(empty_pars, total=empty_pars.count()):
        if not art.first_paragraphs:
            art.save()

    first_count = Article.objects.count()
    print(f"Tenemos {first_count} artículos")

    users = Article.objects.distinct('user')

    for user in users:
        distinct_titles = Article.objects(user=user).distinct('title')

        total_count = Article.objects(user=user).count()
        print(
            f"{user:<15} --> {len(distinct_titles):<5} distintos, {total_count} total"
        )

        deleted_articles = 0

        for title in tqdm(list(distinct_titles)):
            articles = Article.objects(title=title,
                                       user=user).order_by('created_at')
            count = articles.count()

            if count >= 2:
                first_article = articles[0]
                for art in articles[1:]:
                    first_article.comments += art.comments
                    art.delete()
                    deleted_articles += 1
                first_article.save()

        print(f"Artículos borrados de {user:<15}: {deleted_articles}")
Exemple #5
0
def test_create_article():
    art = Article(
        tweet_id=12345,
        text="This is a tweet",
        title="This is a title",
        body="This is a detailed explanation of the news",
        url="http://clarin.com/url",
        html="algodehtml",
        created_at=datetime.utcnow() - timedelta(days=1),
    )
    comments = [comment(), comment()]

    art.comments = comments

    art.save()

    art = Article.objects.get(tweet_id=12345)

    assert len(art.comments) == 2
    assert art.comments[0].text == comments[0].text
    assert art.comments[1].text == comments[1].text
    def get(self, request):

        # TODO: CHANGE THIS
        username = request.user.username
        """
        We get one random out of the next 100 (at most) articles
        """
        next_articles = Article.next_articles_to_be_labelled(username)
        num_articles = next_articles.count()

        if num_articles == 0:
            return redirect('news_selection:done')

        to_be_considered = min(next_articles.count(), 100)
        idx = random.randint(0, to_be_considered - 1)

        article = next_articles[idx]

        return redirect('news_selection:label', article.slug)
Exemple #7
0
def test_create_article_with_differents_slug():
    art1 = Article(
        tweet_id=1919,
        text="This is a tweet",
        title="My title",
        body="This is a detailed explanation of the news",
        url="http://clarin.com/url",
        html="algodehtml",
        created_at=datetime.utcnow() - timedelta(days=1),
    )
    art2 = Article(
        tweet_id=19191,
        text="This is a tweet",
        title="My title",
        url="http://clarin.com/url",
        html="algodehtml",
        body="This is a detailed explanation of the news",
        created_at=datetime.utcnow() - timedelta(days=1),
    )

    art1.save()
    art2.save()
    assert art1.slug != art2.slug
def load_replies(database, drop_replies=True):
    """
    Create samples of articles to be labelled

    Arguments:

    database: string
        Name of mongo database

    drop_groups: boolean (default: True)
        Whether to drop existing groups of samples

    num_articles: int (default: 30)
        Number of articles to sample

    min_comments: int (default: 20)
        Minimum number of comments to take it into account
    """
    client = connect(database)

    if drop_replies:
        deleted = Reply.objects.delete()
        print(f"Dropped {deleted} replies")

    articles = Article.objects(dummy__ne=True)
    for art in tqdm(articles, total=articles.count()):
        for comm in art.comments:
            reply = Reply(
                article=art,
                text=comm.text,
                tweet_id=comm.tweet_id,
                user_id=comm.user_id,
                created_at=comm.created_at,
            )

            reply.save()
Exemple #9
0
def create_samples(database,
                   drop_groups=True,
                   drop_articles=True,
                   num_articles=30,
                   min_comments=20,
                   sampled_comments=50,
                   clone_and_sample=False):
    """
    Create samples of articles to be labelled

    Arguments:

    database: string
        Name of mongo database

    drop_groups: boolean (default: True)
        Whether to drop existing groups of samples

    num_articles: int (default: 30)
        Number of articles to sample

    min_comments: int (default: 20)
        Minimum number of comments to take it into account
    """
    client = connect(database)
    db = client[database]

    if drop_articles:
        deleted = Article.objects(dummy=True).delete()
        print(f"Dropped {deleted} dummy articles")

    print(f"Number of articles: {Article.objects.count()}")

    groups = Group.objects
    if drop_groups:
        print(f"Dropping {groups.count()} groups")
        Group.objects.delete()
    else:
        print("Not dropping groups ")

    initial_query = {
        f"comments__{min_comments-1}__exists": True,
    }
    articles = Article.objects(**initial_query).as_pymongo()
    print(f"Articles with at least {min_comments}: {articles.count()}\n\n")

    articles = list(articles)

    for article in articles:
        hateful_comments = [
            c for c in article["comments"] if c["hateful_value"] > 0.5
        ]

        article["num_hateful_comments"] = len(hateful_comments)
        article["avg_hate_value"] = sum(c["hateful_value"]
                                        for c in article["comments"]) / len(
                                            article["comments"])
    """
    Create hateful articles
    """
    thresholded_articles = {
        k: [art for art in articles if art["avg_hate_value"] > k]
        for k in [0.15, 0.20, 0.25, 0.30]
    }
    random.seed(2020)

    print("Creating hateful groups")
    for key, hateful_articles in tqdm(thresholded_articles.items()):
        #selected_articles = random.sample(hateful_articles, num_articles)
        selected_articles = hateful_articles
        selected_articles = Article.objects(
            id__in=[t["_id"]
                    for t in selected_articles]).order_by('created_at')
        group_name = f"Comments {key:.2f}"
        group = create_group(group_name,
                             selected_articles,
                             sampled_comments,
                             clone_and_sample=clone_and_sample)
        print(
            f"Created {group.name} group with {len(group.articles)} articles")