def collect_with_feedzilla(): news = Feedzilla.collect() for n in news: cluster = Cluster() cluster.display_name = n["title"] cluster.relevancy = 1.0 / n["search_place"] * 100 cluster.topic = Topic.objects.get(short_name=str(n["topic"])) cluster.date = format_date(n["publish_date"]) cluster.save() if (n["check_url"]): try: cluster_content = extractFromURL(n["url"]) cluster_countries = Placemaker.get_countries_from_string( cluster_content) for country in cluster_countries: try: l = Location.objects.get(name=country) except Location.DoesNotExist: l = Location() l.name = country l.save() cluster.location.add(l) cluster_continents = Placemaker.get_continents_from_countries( cluster_countries) for continent in cluster_continents: try: c = Continent.objects.get(name=continent) except Continent.DoesNotExist: c = Continent() c.name = continent c.save() cluster.continent_location.add(c) except TypeError: print "webarticle2text Error" a = Article() a.title = n["title"] a.url = n["url"] a.publisher = n["source"] a.content = n["summary"][:999] a.published_date = format_date(n["publish_date"]) a.cluster = cluster a.save()
def collect_with_feedzilla(): news = Feedzilla.collect() for n in news: cluster = Cluster() cluster.display_name = n["title"] cluster.relevancy = 1.0 / n["search_place"] * 100 cluster.topic = Topic.objects.get(short_name=str(n["topic"])) cluster.date = format_date(n["publish_date"]) cluster.save() if n["check_url"]: try: cluster_content = extractFromURL(n["url"]) cluster_countries = Placemaker.get_countries_from_string(cluster_content) for country in cluster_countries: try: l = Location.objects.get(name=country) except Location.DoesNotExist: l = Location() l.name = country l.save() cluster.location.add(l) cluster_continents = Placemaker.get_continents_from_countries(cluster_countries) for continent in cluster_continents: try: c = Continent.objects.get(name=continent) except Continent.DoesNotExist: c = Continent() c.name = continent c.save() cluster.continent_location.add(c) except TypeError: print "webarticle2text Error" a = Article() a.title = n["title"] a.url = n["url"] a.publisher = n["source"] a.content = n["summary"][:999] a.published_date = format_date(n["publish_date"]) a.cluster = cluster a.save()
def collect_with_google(): google_news = GoogleNews.collect() for google_article in google_news: if(is_today(google_article["publishedDate"])): cluster_content = "" cluster = Cluster() if("image" in google_article): cluster.image = google_article["image"]["url"] else: cluster.image = "http://panhandletickets.com/images/not_available.jpg" if("relatedStories" in google_article): cluster.relevancy = (len(google_article["relatedStories"]) + 1)*100/google_article["page"] else: cluster.relevancy = 100/google_article["page"] cluster.topic = Topic.objects.get(name=google_article["topic"]) cluster.is_local = False cluster.date = format_date(google_article["publishedDate"]) cluster.save() article = Article() article.title = google_article["titleNoFormatting"] article.url = google_article["unescapedUrl"] article.location = google_article["location"] article.publisher = google_article["publisher"] cluster_content += google_article["titleNoFormatting"] + " " if "content" in google_article: article.content = google_article["content"] cluster_content += google_article["content"]+ " " else: article.content = "" article.published_date = format_date(google_article["publishedDate"]) article.cluster = cluster article.save() if("relatedStories" in google_article): article_related_stories = google_article["relatedStories"] for related in article_related_stories: article = Article() article.title = related["titleNoFormatting"] article.url = related["unescapedUrl"] article.location = related["location"] article.publisher = related["publisher"] if "content" in related: article.content = related["content"] cluster_content += related["content"]+ " " else: article.content = "" article.published_date = format_date(related["publishedDate"]) article.cluster = cluster article.save() cluster.save() cluster_countries = Placemaker.get_countries_from_string(cluster_content) for country in cluster_countries: try: l = Location.objects.get(name=country) except Location.DoesNotExist: l = Location() l.name = country l.save() cluster.location.add(l) cluster_continents = Placemaker.get_continents_from_countries(cluster_countries) for continent in cluster_continents: try: c = Continent.objects.get(name=continent) except Continent.DoesNotExist: c = Continent() c.name = continent c.save() cluster.continent_location.add(c) cluster.save()