コード例 #1
0
ファイル: classifier_test.py プロジェクト: masonshuler/MuDimA
 def test_keywordless_articles(self):
     """Tests that keywordless articles are put into a separate grouping."""
     articles = [models.Article(url="example.com", keywords=[]), models.Article(url="test.com", keywords=[])]
     articles.extend(test_utils.SIMILAR_ARTICLES)
     groups = classifier.group_articles(articles)
     self.assertEqual(2, len(groups))
     for group in groups:
         if len(group.get_articles()[0].get_keywords()) == 0:
             self.assertTrue(group.in_database())
コード例 #2
0
 def create_article(article_text):
     try:
         temp_article = models.Article(article_text['record_lens_id'],
                                       article_text['title'],
                                       article_text['authors'],
                                       str(int(article_text['volume'])))
     except Exception as e:
         temp_article = models.Article(article_text['record_lens_id'],
                                       article_text['title'],
                                       article_text['authors'], '-1')
     return temp_article
コード例 #3
0
    def find_unprocessed_articles(self, cap=10000):
        """
    Obtains a list of unprocessed articles and determines their author data.
    """
        done = []
        with self.connection.db.cursor() as cursor:
            cursor.execute(
                f'SELECT DISTINCT(article) FROM prod.article_authors ORDER BY article;'
            )
            for record in cursor:
                if len(record) > 0:
                    done.append(record[0])
        self.log.record(f'Found {len(done)} done already')

        todo = []
        with self.PROD.db.cursor() as cursor:
            cursor.execute(
                f'SELECT id, url FROM prod.articles WHERE url IS NOT NULL ORDER BY id;'
            )
            for record in cursor:
                if len(record) > 0:
                    if record[0] not in done:
                        todo.append(models.Article(record[0], record[1]))
                        if len(todo) >= cap:
                            self.log.record(
                                f'Found max of {cap} entries to do; returning')
                            return todo
                else:
                    self.log.record(f'Empty entry', 'error')
        self.log.record(f'GOT {len(todo)} TO DO')
        return todo
コード例 #4
0
ファイル: storage.py プロジェクト: lsuanet/renews
def create_article(db: Session, minio: Minio, article: schemas.ArticleCreate):
    minio_file = '%s/%s.txt' % (str(article.news_source_id), str(uuid.uuid4()))

    # store article body in s3
    minio_service.store_string(minio,
                               body=article.article_body,
                               minio_file=minio_file)

    # store article in db
    db_article = models.Article(
        site_article_id=article.site_article_id,
        title=article.title,
        body_file_path=minio_file,
        url=article.url,
        category=article.category,
        published=article.published,
        article_last_updated=article.article_last_updated,
        news_source_id=article.news_source_id)
    db.add(db_article)
    db.commit()
    db.refresh(db_article)

    db_article.article_body = minio_service.get_string(
        minio, minio_file=db_article.body_file_path)

    return db_article
コード例 #5
0
ファイル: views.py プロジェクト: zongyimin/py_training
def new_article(request):
    categorys = models.Category.objects.all()
    if request.method == 'POST':
        print(request.POST)
        form = ArticleForm(request.POST, request.FILES)
        if form.is_valid():
            print("form is valid")
            print(request.FILES)
            data = form.cleaned_data
            del data['head_img']
            uploaded_filename = handle_uploaded_file(request,
                                                     request.FILES['head_img'])
            data['author_id'] = request.user.userprofile.id
            try:
                new_article_obj = models.Article(**data)
                new_article_obj.head_img = uploaded_filename
                new_article_obj.save()
            except Exception as e:
                return HttpResponse(e)
            return render(request, 'create_article.html',
                          {'new_article_obj': new_article_obj})
        else:
            print(form.errors)
            return render(request, 'create_article.html', {
                'categorys': categorys,
                'form': form
            })
    return render(request, 'create_article.html', {'categorys': categorys})
コード例 #6
0
def post_article():
    """	URL - /api/v1.0/article
		Method - POST

		Creates a new article from a URL and returns a dictionary that represents it.
	"""
    post_json = request.get_json()
    if not post_json or not 'url' in post_json:
        abort(400)
    url = post_json['url']

    # Check if the article is already in database
    query = models.Article.query.filter_by(url=url).first()
    if query:
        return jsonify(query.dictionary()), 201

    # If not in DB, get article from web
    parsedArticle = ParsedArticle(url)
    article = models.Article(
        url=url,
        title=parsedArticle.get_title(),
        content=parsedArticle.get_content(),
        author=parsedArticle.get_author(),
        excerpt=parsedArticle.get_excerpt(),
        date=parsedArticle.get_date(),
        dek=parsedArticle.get_dek(),
        lead_image=parsedArticle.get_lead_image(),
    )
    db.session.add(article)
    db.session.commit()
    return jsonify(article.dictionary()), 201
コード例 #7
0
def _add_standalone_article(raw_url, added_by):
    url = raw_url.split('?')[0]  #For if user copy-pastes from news site
    url = prepend_http(url)
    url = url.strip('/')
    url = url.strip('<>')
    url = url.strip()

    # This is a hack to deal with unicode passed in the URL.
    # Otherwise gives an error, since our table character set is latin1. (Why not encode the table as unicode?)
    url = url.encode('ascii', 'ignore')

    decoded_url = decode_scheme_colon(url)
    try:
        try:
            article = StandaloneArticle.objects.get(url=decoded_url)
        except StandaloneArticle.DoesNotExist:
            article = StandaloneArticle.objects.get(url=swap_http_https(decoded_url))
    except StandaloneArticle.DoesNotExist:
        article = StandaloneArticle(url=decoded_url, added_by=added_by)
        article.save()

    # Trigger a scraper call
    try:
        a = models.Article.objects.get(url=article.url)
    except Article.DoesNotExist:
        a = models.Article(url=decoded_url, git_dir=get_and_make_git_repo())
        a.save()

    return a
コード例 #8
0
ファイル: test.py プロジェクト: bluecatchbird/sms
async def createNewArticle(name: str,
                           project: models.Project = Depends(getProject),
                           db: Session = Depends(get_db)):
    new_article = models.Article(name=name)
    db.add(new_article)
    project.articles.append(new_article)
    db.commit()
    return new_article
コード例 #9
0
 def create(self):
     self.data = self.parse_data()
     bbs_obj = models.Article(**self.data)
     bbs_obj.save()
     file_name = handle_upload_file(self.request,
                                    self.request.FILES["head_img"])
     bbs_obj.head_img = "imgs/upload/%s" % file_name
     bbs_obj.save()
     return bbs_obj
コード例 #10
0
def add_art(req):
    errs = ''

    if req.method == "POST":
        #print(req.POST)
        form = ArticleForm(req.POST, req.FILES)
        if form.is_valid():
            #print ("--form data:",form.cleaned_data)
            form_data = form.cleaned_data
            form_data['author_id'] = req.user.userprofile.id
            #jieba 自动从title提取关键词,
            textrank = analyse.textrank
            keywords = textrank(form_data['title'])
            #循环组合前3个关键词
            arr = []
            n = 0
            for s in keywords:
                arr.append(s)
                strs = ','.join(arr)
                form_data['keywords'] = strs

                # 循环保存到tags表
                #查询数据库tag是否存在
                try:
                    have_tag = models.Tags.objects.get(tagname=s)
                    num = int(have_tag.num) + 1
                    models.Tags.objects.filter(tagname=s).update(num=num)
                except:
                    b = models.Tags(tagname=s, num=1)
                    b.save()
                n = n + 1
                if n == 3:
                    break

            #增加文章描述
            description = form_data['content']

            form_data['description'] = mvhtml.strip_tags(description[0:200])

            new_article_obj = models.Article(**form_data)
            new_article_obj.save()
            return render(req, 'addarticle.html')
        else:
            #print ('err:',form.errors)
            errs = form.errors

    if req.user.userprofile.id:
        parent_category = models.Category.objects.filter(
            parent_category_id=None)
        category = models.Category.objects.all()

        return render(req, 'addarticle.html', {
            'parent_category': parent_category,
            'category': category,
            'errs': errs
        })
コード例 #11
0
 def create(self):
     self.data = self.parse_data()
     bbs_obj = models.Article(**self.data)
     # print bbs_obj
     # bbs_obj.save()
     filename = handle_upload_file(self.request,
                                   self.request.FILES['head_img'])
     bbs_obj.head_img = 'static/imgs/upload/%s' % filename
     bbs_obj.save()
     return bbs_obj
コード例 #12
0
ファイル: database_reader.py プロジェクト: masonshuler/MuDimA
def get_ungrouped_articles():
    """Get the items in the database and puts them into Article and Grouping objects."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT name, link, article_text FROM article "
                       "WHERE article_text != '' AND topic_id IS NULL;")
        articles = []
        for item in cursor.fetchall():
            name, url, article_text = item
            articles.append(
                models.Article(url=url, title=name, text=article_text))
        return articles
コード例 #13
0
ファイル: app.py プロジェクト: drakulavich/hexlet-crud
def add_article():
    """
    Add new post to database.
    """

    new_article = models.Article(request.form['name'], request.form['body'])
    db.session.add(new_article)
    db.session.commit()

    flash('New entry was successfully created')
    return redirect(url_for('index'))
コード例 #14
0
def get_articles(source=None, distance=0):
    articles = []
    rx = re.compile(r'^https?://(?:[^/]*\.)%s/' % source if source else '')

    pagelength = datetime.timedelta(days=1)
    end_date = datetime.datetime.now() - distance * pagelength
    start_date = end_date - pagelength

    print 'Asking query'
    version_query = '''SELECT
    version.id, version.article_id, version.v, version.title,
      version.byline, version.date, version.boring, version.diff_json,
      T.age as age,
      Articles.url as a_url, Articles.initial_date as a_initial_date,
      Articles.last_update as a_last_update, Articles.last_check as a_last_check
    FROM version,
     (SELECT Articles.id as article_id, MAX(T3.date) AS age, COUNT(T3.id) AS num_vs
      FROM Articles LEFT OUTER JOIN version T3 ON (Articles.id = T3.article_id)
      WHERE (T3.boring=0) GROUP BY Articles.id
      HAVING (age > %s  AND age < %s  AND num_vs > 1 )) T, Articles
    WHERE (version.article_id = Articles.id) and
          (version.article_id = T.article_id) and
          NOT version.boring
    ORDER BY date'''

    all_versions = models.Version.objects.raw(version_query,
                                              (start_date, end_date))
    article_dict = {}
    for v in all_versions:
        a = models.Article(id=v.article_id,
                           url=v.a_url,
                           initial_date=v.a_initial_date,
                           last_update=v.a_last_update,
                           last_check=v.a_last_check)
        v.article = a
        article_dict.setdefault(v.article, []).append(v)

    for article, versions in article_dict.items():
        url = article.url
        if not rx.match(url):
            print 'REJECTING', url
            continue
        if 'blogs.nytimes.com' in url:  #XXX temporary
            continue

        if len(versions) < 2:
            continue
        rowinfo = get_rowinfo(article, versions)
        articles.append((article, versions[-1], rowinfo))
    print 'Queries:', len(
        django.db.connection.queries), django.db.connection.queries
    articles.sort(key=lambda x: x[-1][0][1].date, reverse=True)
    return articles
コード例 #15
0
    def post(self, user):
        tags = self.request.get_all('tags')
        title = self.request.get('title')
        body = self.request.get('body')
        article = models.Article(title=title, body=body)

        for t in tags:
            article.tags.append(db.Key(encoded=t))

        article.put()
        time.sleep(0.10)
        return self.redirect('/admin/article')
コード例 #16
0
ファイル: test_utils.py プロジェクト: masonshuler/MuDimA
 def setUp(self):
     """Set up the class for the tests."""
     self._database_name_mock = mock.patch(
         "server.database_utils.database_name",
         return_value="mudima_test.db")
     self._database_name_mock.start()
     self._database_location = database_utils.database_path(
         database_utils.database_name())
     self._delete_database()
     self.article = models.Article("example.com",
                                   title="Example",
                                   keywords=["0", "1"])
     self.grouping = models.Grouping(self.article)
コード例 #17
0
    def add(self, message):
        #if not source_parser:
        #    #TODO factory or something based on source
        #    source_parser = HTMLSource(self.source)

        source_parser = HTMLSource(message.url)
        source_parser.fetch()

        article = models.Article()
        article.source = source_parser.source
        article.author = source_parser.author
        article.published = source_parser.published
        article.publisher = source_parser.publisher
        if message.timestamp:
            article.posted = message.timestamp
        if message.author:
            article.posted_by = message.author

        tokens = article.tokens
        count = 0
        tokenizer = source_parser.tokenizer()
        for t in iter(tokenizer):
            token = t.lower()
            if token not in tokens:
                tokens[token] = 0

            tokens[token] += 1
            count += 1

        #while True:
        #    try:
        #        token = tokenizer.next().lower()
        #    except StopIteration:
        #        break

        #    if token not in tokens:
        #        tokens[token] = 0

        #    tokens[token] += 1
        #    count += 1

        article.save(context)

        if DEBUG:
            self.__stats.update({
                'tokenizer': tokenizer.stats(),
                'count': count,
            })

        return article
コード例 #18
0
 def search_name(self, value):
     for article in self.articles:
         if value in article.get('Source Title'):
             article_object = models.Article(article.get('Lens ID'),
                                             article.get('Title'),
                                             article.get('Source Title'),
                                             article.get('Date Published'),
                                             article.get('Author'),
                                             article.get('Publisher'))
             # article_object debe de ser un append para ir agregando a una lista
             magazine_object = models.Magazine(article.get('Source Title'),
                                               article.get('ISSNs'),
                                               article_object)
             self.list_magazine.append(magazine_object)
コード例 #19
0
ファイル: views.py プロジェクト: RonaldHauw/G1932Y
def Submitarticle(request):
    if request.method == 'POST':

        form = article(request.POST)
        cur = models.Article()

        description = form['description'].value()
        name = form['name'].value()
        art = form['article'].value()
        link = form['link'].value()
        if str(link) == "":
            link = "None"
        cur.__addarticle__(name, description, art, link)
        return HttpResponseRedirect('/L/#article')
コード例 #20
0
 def test_clean_database(self):
     """Test clean database."""
     database_writer.write_groups([self.grouping])
     self.assertEqual(1, len(database_reader.get_urls()))
     database_writer.clean_database()
     self.assertEqual(1, len(database_reader.get_urls()))
     grouping = models.Grouping(
         models.Article(url="google.com",
                        publishedAt="2016-10-11T23:41:34Z",
                        keywords=["a"]))
     database_writer.write_groups([grouping])
     self.assertEqual(2, len(database_reader.get_urls()))
     database_writer.clean_database()
     self.assertEqual(1, len(database_reader.get_urls()))
コード例 #21
0
def group_articles(article_list=None, debug=False):
    """Group good articles in the database."""
    if article_list is None:
        article_list = database_reader.get_ungrouped_articles()
    else:
        article_list = [
            models.Article(url=a) if isinstance(a, (str, unicode)) else a
            for a in article_list
        ]
    groupings = database_reader.get_grouped_articles()
    no_keyword_grouping = None
    for index, article in enumerate(article_list):
        if debug:
            print "Grouping", index, "out of", len(article_list)
        if not article.get_keywords():
            if no_keyword_grouping is None:
                # in_database is set to True here because we do not want a no keyword grouping in the database.
                no_keyword_grouping = models.Grouping(article,
                                                      in_database=True)
            else:
                no_keyword_grouping.add_article(article)
            continue  # Skip the article if the keywords cannot be gotten from it.
        best_grouping, best_grouping_similarity = None, 0

        # Need to make a shallow copy of list for the possibility of combining two of the items in the list.
        for grouping in groupings[:]:
            similarity = grouping.best_similarity(article)
            if similarity > best_grouping_similarity:
                # If this article has a high similarity with two separate groups, then combine the groups.
                if best_grouping_similarity > constants.MIN_COMBINE_GROUP_PERCENTAGE:
                    if best_grouping.in_database():
                        if grouping.in_database():
                            database_writer.remove_grouping_from_database(
                                grouping)
                        best_grouping.combine_group(grouping)
                        groupings.remove(grouping)
                    else:
                        grouping.combine_group(best_grouping)
                        groupings.remove(best_grouping)
                best_grouping = grouping
                best_grouping_similarity = similarity
        if best_grouping is not None and best_grouping_similarity > constants.MIN_GROUPING_PERCENTAGE:
            best_grouping.add_article(article)
        else:
            groupings.append(models.Grouping(article))
    if no_keyword_grouping:
        groupings.append(no_keyword_grouping)
    return groupings
コード例 #22
0
ファイル: views.py プロジェクト: gpython/JZ_Worker
def new_article(request):
  if request.method == 'POST':
    print request.POST
    form = ArticleForm(request.POST)
    if form.is_valid():
      print "form data: ", form.cleaned_data
      form_data = form.cleaned_data
      form_data['author_id'] = request.user.userprofile.id
      new_article_obj = models.Article(**form_data)
      new_article_obj.save()

      return render(request, 'web/new_article.html', {'title': 'New Article Published', 'new_article_obj': new_article_obj })
    else:
      print "Error: ", form.errors

  category_list = models.Category.objects.all()
  return render(request, 'web/new_article.html', {'title': 'New Article', 'category_list':category_list})
コード例 #23
0
ファイル: endpoints.py プロジェクト: dineshresearch/rxivist
def paper_downloads(a_id, connection):
  """Returns time-series data from bioRxiv about how many
  times a paper's webpage and PDF have been downloaded.

  Arguments:
    - connection: a database Connection object.
    - a_id: the Rxivist-issued ID given to the paper being queried.
  Returns:
    - A list of months and the download stats for each month

  """
  result = models.Article(a_id)
  result.GetTraffic(connection)
  return {
    "query": {
      "id": a_id
    },
    "results": [{"month": x.month, "year": x.year, "downloads": x.downloads, "views": x.views} for x in result.traffic]
  }
コード例 #24
0
def new_article(request):
    if request.method == "POST":
        form = ArticleForm(request.POST, request.FILES)
        if form.is_valid():
            print "--form data:", form.cleaned_data
            form_data = form.cleaned_data
            form_data['author_id'] = request.user.userprofiles.id
            new_img_path = handle_uploaded_file(request,
                                                request.FILES['head_img'])
            form_data['head_img'] = new_img_path
            new_article_obj = models.Article(**form_data)
            new_article_obj.save()
            return render(request, 'new_article.html',
                          {'new_article_obj': new_article_obj})
        else:
            print "err:", form.errors

    category_list = models.Category.objects.all()
    return render(request, 'new_article.html',
                  {'category_list': category_list})
コード例 #25
0
def create_article(user, ffile, title, abstract=None, language='en'):
    data = {
        'owner': user,
        'title': title,
        'slug': slugify(title),
        'abstract': abstract,
        'language': language
    }
    article = models.Article(**data)
    if not isinstance(ffile, File):
        # http://stackoverflow.com/questions/3501588/how-to-assign-a-local-file-to-the-filefield-in-django
        # handles regular files
        ffile = File(ffile)
        article.article.save('untitled', ffile)
    else:
        # handles uploaded files
        article.article = ffile
    article.save()
    article.authors.add(user.get_profile())
    article.save()  # necessary?
    return article
コード例 #26
0
def editor_action(db):
    auth = check_session()
    if auth:
        title = request.forms.title
        subtitle = request.forms.subtitle
        img_url = request.forms.imgurl
        article = request.forms.article
        draft = int(request.forms.btnval)
        mode = request.query.m

        if mode == "new":
            new_post = models.Article(
                title=title,
                subtitle=subtitle,
                article=article,
                header_image=img_url,
                draft=draft,
                author_id=auth[0]
            )
            db.add(new_post)
        elif mode == "edit":
            id = request.forms.id
            if len(id) is 0:
                redirect("/admin/editor")

            post = db.query(models.Article).filter(and_(models.Article.id == id,
                                                        models.Article.author_id == auth[0]))
            post = post.first()
            if post.draft == True and post.draft != draft:
                post.created_on = datetime.now()
            post.title = title
            post.subtitle = subtitle
            post.header_image = img_url
            post.article = article
            post.draft = draft

        db.commit()
        redirect("/admin/view?mode=post")
    else:
        redirect("/admin/login")
コード例 #27
0
ファイル: database_reader.py プロジェクト: masonshuler/MuDimA
def get_grouped_articles():
    """Get the items in the database and puts them into Article and Grouping objects."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT name, topic_id, link, article_text, image_url FROM article "
            "WHERE article_text != '' AND topic_id IS NOT NULL;")
        groups = {}
        for item in cursor.fetchall():
            name, id, url, article_text, image_url = item
            article = models.Article(url=url,
                                     title=name,
                                     text=article_text,
                                     urlToImage=image_url,
                                     in_database=True)
            article.set_keywords(_get_article_keywords(url, cursor))
            if id in groups:
                groups.get(id).add_article(article, new_article=False)
            else:
                groups[id] = models.Grouping(article,
                                             uuid=id,
                                             in_database=True,
                                             has_new_articles=False)
        return list(groups.values())
コード例 #28
0
ファイル: app.py プロジェクト: AurelVU/Conference-Site-VSU
def article():
    updform = UpdateArticle()
    form = UploadArticle()
    fromDate = datetime.now() - timedelta(days=365)
    na_rass = 0
    rrr = models.Article.query.filter(models.Article.timestamp >= fromDate).filter_by(stat=1).all()
    for r in rrr:
        f = models.File.query.filter_by(id=r.file).first()
        if f.owner == current_user.id:
            na_rass += 1
    otclon = 0
    rrr = models.Article.query.filter(models.Article.timestamp >= fromDate).filter_by(stat=2).all()
    for r in rrr:
        f = models.File.query.filter_by(id=r.file).first()
        if f.owner == current_user.id:
            otclon += 1
    prin = 0
    rrr = models.Article.query.filter(models.Article.timestamp >= fromDate).filter_by(stat=3).all()
    for r in rrr:
        f = models.File.query.filter_by(id=r.file).first()
        if f.owner == current_user.id:
            prin += 1
    alll = 0
    rrr = models.Article.query.filter(models.Article.timestamp >= fromDate).all()
    for r in rrr:
        f = models.File.query.filter_by(id=r.file).first()
        if f.owner == current_user.id:
            alll += 1
    if form.submit.data:
        block = models.BlockUser.query.filter_by(id_user=current_user.id).first()
        if (block is None) or not (block.block_article):
            if block is not None and (block.block_file):
                return 'Блокировка загрузки файлов'
            current_file = form.file.data
            file = models.File.upload(current_file)
            db.session.add(file)
            db.session.commit()
            idfile = models.File.query.filter_by(drive_file_id=file.drive_file_id).first_or_404()
            article = models.Article(file=idfile.id, name=form.name.data, stat=1)
            db.session.add(article)
            db.session.commit()

            return redirect(url_for('article'))
        else:
            return 'Блокировка добавления статей'
    else:
        articles = models.Article.query.join(models.File, (models.File.id == models.Article.file)).all()
        files = models.File.query.filter(models.File.owner == current_user.id).all()
        statuses = models.Status.query.all()
        st = [(i.id, i.name) for i in statuses]
        ChangeArticleStatus.setStatuses(st)
        forms = {}
        articlesss = []
        st = models.Status.query.all()
        statuses = {}
        for s in st:
            statuses[s.id] = s.name
        for art in articles:
            forms[art.id] = ChangeArticleStatus(id=art.id, stat=art.stat)
            for f in files:
                if art.file == f.id:
                    articlesss.append({'article': art, 'file': f, 'owner' : current_user.username, 'owner_id' : current_user.id, 'id':art.id, 'stat_id': art.stat, 'stat':statuses[art.stat], 'timestamp' : art.timestamp.strftime("%d.%m.%Y %H:%M:%S") })

        return render_template('articles.html', form=form, updform=updform, forms=forms, na_rass=na_rass ,otclon=otclon, prin=prin, all=alll,  articles=articlesss)
コード例 #29
0
ファイル: models_test.py プロジェクト: masonshuler/MuDimA
 def test_get_keywords_bad_url(self):
     """Check that it does not error out when the url is bad."""
     article = models.Article("")
     with mock.patch("traceback.print_exc"):
         self.assertEqual(0, len(article.get_keywords()))
         self.assertEqual("", article.get_text())
コード例 #30
0
ファイル: test_utils.py プロジェクト: masonshuler/MuDimA
"""Various utilities for tests."""

import database_utils
import mock
import models
import os
import unittest

SIMILAR_ARTICLES = (
    models.Article(
        "https://www.nytimes.com/2017/09/25/us/politics/obamacare-repeal-susan-collins-dead.html",
        keywords={
            u'senators', u'repeal', u'support', u'bill', u'dead', u'gop',
            u'pivotal', u'health', u'declares', u'opposition', u'mr', u'vote',
            u'senator', u'republicans', u'republican', u'appears', u'care'
        }),
    models.Article(
        "http://thehill.com/policy/healthcare/352342-third-gop-senator-opposes-new-obamacare-"
        "repeal-killing-bill-ahead-of",
        keywords={
            u'bill', u'trump', u'republicans', u'obamacare', u'dead',
            u'hearing', u'appears', u'lastditch', u'vote', u'collins',
            u'repeal', u'effort', u'gop'
        }))

DISSIMILAR_ARTICLES = (
    models.Article(
        "https://www.washingtonpost.com/opinions/cassidy-is-sorry-about-the-cassidy-graham-"
        "process-he-should-be/2017/09/25/0cd234f0-a243-11e7-ade1-76d061d56efa_story.html",
        keywords={
            u'cassidygraham', u'votes', u'republicans', u'room', u'process',