Exemple #1
0
 def __scrape_rss(self, source, full=False):
     """ scape a rss source, return a list of article """
     result = []
     document = feedparser.parse(source['target'])
     for article in document['entries']:
         try:
             if full:
                 entry = Article(title=article['title'],
                                 date=datetime.fromtimestamp(
                                     mktime(article['published_parsed'])),
                                 content=article['content'][0]['value'],
                                 summary=article['summary'],
                                 link=article['link'],
                                 thematic=source['thematic'],
                                 type='common_rss')
                 entry.save()
             else:
                 worker.run('retrieve_page',
                            article['link'],
                            thematic=source['thematic'],
                            source='common_rss')
         except KeyError as e:
             # TODO: Logs
             # print e, article
             pass
     return result
Exemple #2
0
def get_content_from_duration(duration, thematics=None, user_id=None):
	words      = utils.how_many_words(int(duration))
	thematics  = thematics.split(',') if thematics else None
	articles   = {
		"one"   : Article.get_closest(count_words=words,   limit=5, thematics=thematics, user=user_id), # FIXME
		"two"   : Article.get_closest(count_words=words/2, limit=2, thematics=thematics, user=user_id),
		"three" : Article.get_closest(count_words=words/3, limit=3, thematics=thematics, user=user_id),
	}
	return dumps({'articles': articles, 'delta': duration})
Exemple #3
0
def get_content_from_itineraire(src, tgt, thematics=None, user_id=None):
	itineraire = utils.get_itineraire(src, tgt)
	duration   = itineraire['delta']
	words      = utils.how_many_words(duration)
	thematics  = thematics.split(',') if thematics else None
	articles   = {
		"one"   : Article.get_closest(count_words=words,   limit=5, thematics=thematics, user=user_id), # FIXME
		"two"   : Article.get_closest(count_words=words/2, limit=2, thematics=thematics, user=user_id),
		"three" : Article.get_closest(count_words=words/3, limit=3, thematics=thematics, user=user_id),
	}
	itineraire["articles"] = articles
	return dumps(itineraire)
	def __scrape_rss(self, source, full=False):
		""" scape a rss source, return a list of article """
		result = []
		document = feedparser.parse(source['target'])
		for article in document['entries']:
			try:
				if full:
					entry = Article(
						title    = article['title'],
						date     = datetime.fromtimestamp(mktime(article['published_parsed'])),
						content  = article['content'][0]['value'],
						summary  = article['summary'],
						link     = article['link'],
						thematic = source['thematic'],
						type     = 'common_rss')
					entry.save()
				else:
					worker.run('retrieve_page', article['link'], thematic=source['thematic'], source='common_rss')
			except KeyError as e:
				# TODO: Logs
				# print e, article
				pass
		return result
Exemple #5
0
    def run(self, url, thematic=None, user_id=None, source=None):
        if not (url.startswith("http://") or url.startswith("https://")):
            url = "http://%s" % url
        # parse the web page
        res = requests.get(
            "http://www.readability.com/api/content/v1/parser?url=%s&token=%s"
            % (url, app.config['READABILITY_PARSER_TOKEN']))
        parsed = res.json()
        # save the article
        article = Article()
        article.title = parsed['title']
        article.date = parsed['date_published']
        article.content = parsed['content']
        article.summary = parsed['excerpt']
        article.link = parsed['url']
        article.domain = parsed['domain']
        article.count_words = parsed['word_count']
        article.user = user_id
        article.thematic = thematic
        article.type = source
        article.save()


# EOF
	def run(self):
		response = requests.get("https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}"
			.format(
				token   = app.config['API_QUEFAIREAPARIS_TOKEN'],
				created = "0",
				offset  = "0",
				limit   = "100"),
			verify=False
		)
		results = response.json()
		for result in results['data']:
			article = Article()
			article.title       = result['nom']
			article.date        = datetime.datetime.strptime(result['created'], '%Y-%m-%dT%H:%M:%S.%fZ')
			article.content     = result['description']
			article.summary     = result['small_description']
			article.thematic    = "quefaireaparis" # FIXME
			article.type        = "quefaireaparis"
			# special fields
			article.occurences  = result['occurences']
			article.thematics   = [_['rubrique'] for _ in result['rubriques']]
			article.location    = dict(lat=result['lat'], lon=result['lon'])
			article.save()

# EOF
Exemple #7
0
	def run(self, url, thematic=None, user_id=None, source=None):
		if not (url.startswith("http://") or url.startswith("https://")):
			url = "http://%s" % url
		# parse the web page
		res = requests.get("http://www.readability.com/api/content/v1/parser?url=%s&token=%s" % 
			(url, app.config['READABILITY_PARSER_TOKEN']))
		parsed  = res.json()
		# save the article
		article = Article()
		article.title       = parsed['title']
		article.date        = parsed['date_published']
		article.content     = parsed['content']
		article.summary     = parsed['excerpt']
		article.link        = parsed['url']
		article.domain      = parsed['domain']
		article.count_words = parsed['word_count']
		article.user        = user_id
		article.thematic    = thematic
		article.type        = source
		article.save()

# EOF
    def run(self):
        response = requests.get(
            "https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}"
            .format(token=app.config['API_QUEFAIREAPARIS_TOKEN'],
                    created="0",
                    offset="0",
                    limit="100"),
            verify=False)
        results = response.json()
        for result in results['data']:
            article = Article()
            article.title = result['nom']
            article.date = datetime.datetime.strptime(result['created'],
                                                      '%Y-%m-%dT%H:%M:%S.%fZ')
            article.content = result['description']
            article.summary = result['small_description']
            article.thematic = "quefaireaparis"  # FIXME
            article.type = "quefaireaparis"
            # special fields
            article.occurences = result['occurences']
            article.thematics = [_['rubrique'] for _ in result['rubriques']]
            article.location = dict(lat=result['lat'], lon=result['lon'])
            article.save()


# EOF
Exemple #9
0
def api_content(id):
	article  = Article.get(id=id)
	if article:
		return article['content']
	return "false"
Exemple #10
0
def reset_content():
	articles_collection = Article.get_collection()
	articles_collection.remove()
	worker.run('retrieve_common_articles', app.config['SOURCE_CONTENT'])
	return "ok"