Example #1
0
 def parse_feed(self, feed):
     'Extract list of articles from the feed.'
     articles = []
     htmlparser = HtmlParser()
     for e in feed.entries[:1]: # read just the first entry while debugging
         article = Article(source=e.author, title=e.title, link=e.link)
         content = htmlparser.parse(e.link)
         article.content = re.sub(r' -.*$', '', content)
         article.save() # and associated word frequencies
         articles.append(article)
     return articles
def update_article(id):
    try:
        params = request.json
        art_services.update_article(
            Article(id, params['title'], None, None, None, None, '0',
                    params['tagId'], None),
            ArticleContent(None, params['content']))
        return ResultResp(20000, '修改成功', None).to_resp()
    except Exception as e:
        print(e)
        return ResultResp(50001, '修改失败!', None).to_resp()
def insert_article():
    try:
        params = request.json
        art_services.insert_article(
            Article(None, params['title'], session['login_user']['id'],
                    session['login_user']['nickname'], None, None, '0',
                    params['tagId'], None),
            ArticleContent(None, params['content']))
        return ResultResp(20000, '添加成功', None).to_resp()
    except Exception as e:
        print(e)
        return ResultResp(50001, '添加失败!', None).to_resp()
Example #4
0
 def parse_feed(self, entry):
     'Extract list of articles from the feed.'
     articles = []
     (url, publisher, publisher_location) = entry
     try:
         c = urlopen(url)
     except URLError:
         print 'Failed to fetch ' + url
     feed = feedparser.parse(c)
     # for e in feed.entries[:1]: # read just the first entry while debugging
     for e in feed.entries:
         image_link = None
         image_type = None
         for link in e.links:
             if link['rel'] == 'enclosure':
                 image_link = link['href']
                 image_type = link['type']
         article = Article(
             publisher=publisher,
             publisher_location=publisher_location,
             published_date=e.updated_parsed,
             title=e.title,
             link=e.link,
             image_link=image_link,
             image_type=image_type)
         content = self.htmlparser.parse(e.link)
         m = re.search(r'-\s*([a-zA-Z]+(,?\s+[a-zA-Z]+){0,6})$', content)
         if m:
             article.source = m.group(1)
         article.content = re.sub(r'(\\n)?\s*-\s*([a-zA-Z]+(,?\s+[a-zA-Z]+){0,6})$', '', content)
         article.store(self.db) # put article and word frequencies into couchdb
         articles.append(article)
     return articles
Example #5
0
 def post(self):
     data = request.json
     article = Article(
         title=data.get('title'),
         slug=data.get('slug'),
         author_id=data.get('author_id'),
         description=data.get('description'),
         short_description=data.get('short_description'),
         img=data.get('img')
     )
     db.session.add(article)
     db.session.commit()
     return article.serialize
def article_store():
    data = request.form
    img = request.files['img']
    if img:
        img.save(os.path.join(Config.UPLOAD_PATH, img.filename))
        path = "/" + Config.UPLOAD_PATH + img.filename

    article = Article(title=data.get('title'),
                      slug=data.get('slug'),
                      author_id=1,
                      description=data.get('description'),
                      short_description=data.get('short_description'),
                      img=path)

    db.session.add(article)
    db.session.commit()
    return redirect("/")
Example #7
0
    def process_item(self, item, spider):
        article = Article()
        article.title = item["title"]
        article.create_date = item["create_date"]
        article.content = remove_tags(item["content"]).strip().replace("\r\n","").replace("\t","")
        article.front_image_url = item["front_image_url"]
        # article.front_image_path = item["front_image_path"]
        article.praise_nums = item["praise_nums"]
        article.comment_nums = item["comment_nums"]
        article.fav_nums = item["fav_nums"]
        article.url = item["url"]
        article.tags = item["tags"]
        article.id = item["url_object_id"]

        title_suggest = self.gen_suggests(article.title, article.tags)
        article.title_suggest = title_suggest

        article.save()

        return item
Example #8
0
 def from_crawler(cls, crawler):
     ext = cls()
     ext.settings = crawler.settings
     Article.init()
     return ext
Example #9
0
    def save_to_es(self):
        article = Article()
        article.title = self['title']
        article.create_date = self["create_date"]
        article.content = remove_tags(self["content"])
        article.front_image_url = self["front_image_url"]
        if "front_image_path" in self:
            article.front_image_path = self["front_image_path"]
        article.praise_nums = self["praise_nums"]
        article.fav_nums = self["fav_nums"]
        article.comment_nums = self["comment_nums"]
        article.url = self["url"]
        article.tags = self["tags"]
        article.meta.id = self["url_object_id"]

        article.title_suggest = gen_suggests(Article._doc_type.index, ((article.title, 7), (article.tags, 8)))

        article.save()

        redis_cli.incr("jobbole_count")

        return
# -*- coding: utf-8 -*-

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Integer, Keyword, Text
from elasticsearch_dsl.connections import connections

# Define a default Elasticsearch client
connections.create_connection(hosts=['localhost'])

from models.models import Article

s = Article.search()
s = s.suggest('title_suggestion',
              'python',
              completion={
                  'field': 'title_suggest',
                  'fuzzy': {
                      'fuzziness': 2
                  },
                  'size': 10
              })
suggestions = s.execute_suggest()
for match in suggestions.title_suggestion[0].options:
    source = match._source
    print(source['title'], match._score)
# Display cluster health
# print(connections.get_connection().cluster.health())

# from elasticsearch_dsl import Keyword, Mapping, Nested, Text
#
# m = Mapping('article')