Exemple #1
0
 def __scrape_rss(self, source, full=False):
     """ scape a rss source, return a list of article """
     result = []
     document = feedparser.parse(source['target'])
     for article in document['entries']:
         try:
             if full:
                 entry = Article(title=article['title'],
                                 date=datetime.fromtimestamp(
                                     mktime(article['published_parsed'])),
                                 content=article['content'][0]['value'],
                                 summary=article['summary'],
                                 link=article['link'],
                                 thematic=source['thematic'],
                                 type='common_rss')
                 entry.save()
             else:
                 worker.run('retrieve_page',
                            article['link'],
                            thematic=source['thematic'],
                            source='common_rss')
         except KeyError as e:
             # TODO: Logs
             # print e, article
             pass
     return result
	def run(self):
		response = requests.get("https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}"
			.format(
				token   = app.config['API_QUEFAIREAPARIS_TOKEN'],
				created = "0",
				offset  = "0",
				limit   = "100"),
			verify=False
		)
		results = response.json()
		for result in results['data']:
			article = Article()
			article.title       = result['nom']
			article.date        = datetime.datetime.strptime(result['created'], '%Y-%m-%dT%H:%M:%S.%fZ')
			article.content     = result['description']
			article.summary     = result['small_description']
			article.thematic    = "quefaireaparis" # FIXME
			article.type        = "quefaireaparis"
			# special fields
			article.occurences  = result['occurences']
			article.thematics   = [_['rubrique'] for _ in result['rubriques']]
			article.location    = dict(lat=result['lat'], lon=result['lon'])
			article.save()

# EOF
    def run(self):
        response = requests.get(
            "https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}"
            .format(token=app.config['API_QUEFAIREAPARIS_TOKEN'],
                    created="0",
                    offset="0",
                    limit="100"),
            verify=False)
        results = response.json()
        for result in results['data']:
            article = Article()
            article.title = result['nom']
            article.date = datetime.datetime.strptime(result['created'],
                                                      '%Y-%m-%dT%H:%M:%S.%fZ')
            article.content = result['description']
            article.summary = result['small_description']
            article.thematic = "quefaireaparis"  # FIXME
            article.type = "quefaireaparis"
            # special fields
            article.occurences = result['occurences']
            article.thematics = [_['rubrique'] for _ in result['rubriques']]
            article.location = dict(lat=result['lat'], lon=result['lon'])
            article.save()


# EOF
Exemple #4
0
	def run(self, url, thematic=None, user_id=None, source=None):
		if not (url.startswith("http://") or url.startswith("https://")):
			url = "http://%s" % url
		# parse the web page
		res = requests.get("http://www.readability.com/api/content/v1/parser?url=%s&token=%s" % 
			(url, app.config['READABILITY_PARSER_TOKEN']))
		parsed  = res.json()
		# save the article
		article = Article()
		article.title       = parsed['title']
		article.date        = parsed['date_published']
		article.content     = parsed['content']
		article.summary     = parsed['excerpt']
		article.link        = parsed['url']
		article.domain      = parsed['domain']
		article.count_words = parsed['word_count']
		article.user        = user_id
		article.thematic    = thematic
		article.type        = source
		article.save()

# EOF
	def __scrape_rss(self, source, full=False):
		""" scape a rss source, return a list of article """
		result = []
		document = feedparser.parse(source['target'])
		for article in document['entries']:
			try:
				if full:
					entry = Article(
						title    = article['title'],
						date     = datetime.fromtimestamp(mktime(article['published_parsed'])),
						content  = article['content'][0]['value'],
						summary  = article['summary'],
						link     = article['link'],
						thematic = source['thematic'],
						type     = 'common_rss')
					entry.save()
				else:
					worker.run('retrieve_page', article['link'], thematic=source['thematic'], source='common_rss')
			except KeyError as e:
				# TODO: Logs
				# print e, article
				pass
		return result
Exemple #6
0
    def run(self, url, thematic=None, user_id=None, source=None):
        if not (url.startswith("http://") or url.startswith("https://")):
            url = "http://%s" % url
        # parse the web page
        res = requests.get(
            "http://www.readability.com/api/content/v1/parser?url=%s&token=%s"
            % (url, app.config['READABILITY_PARSER_TOKEN']))
        parsed = res.json()
        # save the article
        article = Article()
        article.title = parsed['title']
        article.date = parsed['date_published']
        article.content = parsed['content']
        article.summary = parsed['excerpt']
        article.link = parsed['url']
        article.domain = parsed['domain']
        article.count_words = parsed['word_count']
        article.user = user_id
        article.thematic = thematic
        article.type = source
        article.save()


# EOF