Example #1
0
    def collect_with_feedzilla():
        news = Feedzilla.collect()
        for n in news:
            cluster = Cluster()
            cluster.display_name = n["title"]
            cluster.relevancy = 1.0 / n["search_place"] * 100
            cluster.topic = Topic.objects.get(short_name=str(n["topic"]))
            cluster.date = format_date(n["publish_date"])
            cluster.save()
            if (n["check_url"]):
                try:
                    cluster_content = extractFromURL(n["url"])
                    cluster_countries = Placemaker.get_countries_from_string(
                        cluster_content)
                    for country in cluster_countries:
                        try:
                            l = Location.objects.get(name=country)
                        except Location.DoesNotExist:
                            l = Location()
                            l.name = country
                            l.save()
                        cluster.location.add(l)

                    cluster_continents = Placemaker.get_continents_from_countries(
                        cluster_countries)
                    for continent in cluster_continents:
                        try:
                            c = Continent.objects.get(name=continent)
                        except Continent.DoesNotExist:
                            c = Continent()
                            c.name = continent
                            c.save()
                        cluster.continent_location.add(c)
                except TypeError:
                    print "webarticle2text Error"

            a = Article()
            a.title = n["title"]
            a.url = n["url"]
            a.publisher = n["source"]
            a.content = n["summary"][:999]
            a.published_date = format_date(n["publish_date"])
            a.cluster = cluster
            a.save()
Example #2
0
    def collect_with_feedzilla():
        news = Feedzilla.collect()
        for n in news:
            cluster = Cluster()
            cluster.display_name = n["title"]
            cluster.relevancy = 1.0 / n["search_place"] * 100
            cluster.topic = Topic.objects.get(short_name=str(n["topic"]))
            cluster.date = format_date(n["publish_date"])
            cluster.save()
            if n["check_url"]:
                try:
                    cluster_content = extractFromURL(n["url"])
                    cluster_countries = Placemaker.get_countries_from_string(cluster_content)
                    for country in cluster_countries:
                        try:
                            l = Location.objects.get(name=country)
                        except Location.DoesNotExist:
                            l = Location()
                            l.name = country
                            l.save()
                        cluster.location.add(l)

                    cluster_continents = Placemaker.get_continents_from_countries(cluster_countries)
                    for continent in cluster_continents:
                        try:
                            c = Continent.objects.get(name=continent)
                        except Continent.DoesNotExist:
                            c = Continent()
                            c.name = continent
                            c.save()
                        cluster.continent_location.add(c)
                except TypeError:
                    print "webarticle2text Error"

            a = Article()
            a.title = n["title"]
            a.url = n["url"]
            a.publisher = n["source"]
            a.content = n["summary"][:999]
            a.published_date = format_date(n["publish_date"])
            a.cluster = cluster
            a.save()
Example #3
0
	def collect_with_google():
		google_news = GoogleNews.collect()
		for google_article in google_news:
			if(is_today(google_article["publishedDate"])):
				cluster_content = ""
				cluster = Cluster()
				if("image" in google_article):
					cluster.image = google_article["image"]["url"]
				else:
					cluster.image = "http://panhandletickets.com/images/not_available.jpg"
				if("relatedStories" in google_article):
					cluster.relevancy = (len(google_article["relatedStories"]) + 1)*100/google_article["page"]
				else:
					cluster.relevancy = 100/google_article["page"]
				cluster.topic = Topic.objects.get(name=google_article["topic"])
				cluster.is_local = False
				cluster.date = format_date(google_article["publishedDate"])
				cluster.save()

				article = Article()
				article.title = google_article["titleNoFormatting"]
				article.url = google_article["unescapedUrl"]
				article.location = google_article["location"]
				article.publisher = google_article["publisher"]
				cluster_content += google_article["titleNoFormatting"] + " "
				if "content" in google_article:
					article.content = google_article["content"]
					cluster_content += google_article["content"]+ " "
				else:
					article.content = ""
				article.published_date = format_date(google_article["publishedDate"])
				article.cluster = cluster
				article.save()
				if("relatedStories" in google_article):
					article_related_stories = google_article["relatedStories"]
					for related in article_related_stories:
						article = Article()
						article.title = related["titleNoFormatting"]
						article.url = related["unescapedUrl"]
						article.location = related["location"]
						article.publisher = related["publisher"]
						if "content" in related:
							article.content = related["content"]
							cluster_content += related["content"]+ " "
						else:
							article.content = ""
						article.published_date = format_date(related["publishedDate"])
						article.cluster = cluster
						article.save()
				cluster.save()
				cluster_countries = Placemaker.get_countries_from_string(cluster_content)
				for country in cluster_countries:
					try:
						l = Location.objects.get(name=country)
					except Location.DoesNotExist:
						l = Location()
						l.name = country
						l.save()
					cluster.location.add(l)

				cluster_continents = Placemaker.get_continents_from_countries(cluster_countries)
				for continent in cluster_continents:
					try:
						c = Continent.objects.get(name=continent)
					except Continent.DoesNotExist:
						c = Continent()
						c.name = continent
						c.save()
					cluster.continent_location.add(c)
				cluster.save()