def put_article_in_db(self, story_url): try: if (not self.db.in_set({'url': story_url})): current_article = NewsArticle(story_url) #publish date for article : datetime object article_published = current_article.date_made() #title of article : String article_title = current_article.get_title() #print article_title current_article.goodArticle() #keywords in article: Array of Strings article_key_words = current_article.getKeywords() #videos in story : Array of Strings (url to videos) article_videos = current_article.get_videos() #summary of article : String article_summary = current_article.getSummary() #authors of article: Array of Strings article_authors = current_article.getAuthors() #image for article : String (url to image) article_thumbnaillink = current_article.thumbnail_url() article_url = current_article.get_url() res = get_tld(article_url, as_object=True) new_entry = {} new_entry['title'] = article_title new_entry['sum'] = article_summary new_entry['auth'] = article_authors new_entry['thumb'] = article_thumbnaillink new_entry['pub'] = article_published new_entry['keywords'] = article_key_words new_entry['vids'] = article_videos new_entry['likes'] = 0 new_entry['dislikes'] = 0 new_entry['comments'] = [] new_entry['url'] = article_url new_entry['creationtime'] = datetime.datetime.now() new_entry['publisher'] = res.domain new_entry['creationtime'] = datetime.datetime.utcnow() new_entry['companycreator'] = res.domain self.db.CollectionSubmitOne(new_entry) except Exception as e: print "------" print "its f****d emma" print e print "------"
def put_article_in_db(self): counter=0 try: for x in self.sublist: submissions=self.reddit.get_subreddit(x).get_hot(limit=30) for submission in submissions: story_url=submission.url.encode('ascii', 'ignore') if( not self.db.in_set({'url':story_url})): print str(story_url) current_article = NewsArticle(story_url) #publish date for article : datetime object article_published = current_article.date_made() #title of article : String article_title=current_article.get_title() #print article_title current_article.goodArticle() #keywords in article: Array of Strings article_key_words = current_article.getKeywords() #videos in story : Array of Strings (url to videos) article_videos = current_article.get_videos() #summary of article : String article_summary = current_article.getSummary() #authors of article: Array of Strings article_authors = current_article.getAuthors() #image for article : String (url to image) article_thumbnaillink = current_article.thumbnail_url() mydb = pymongo.MongoClient() res=get_tld(story_url, as_object=True) new_entry = {} new_entry['title']=article_title new_entry['sum']=article_summary new_entry['author']=article_authors new_entry['thumb'] = article_thumbnaillink new_entry['pub'] = article_published new_entry['keywords'] = article_key_words new_entry['vids'] = article_videos new_entry['likes']=0 new_entry['dislikes']=0 new_entry['comments'] = [] new_entry['url'] = story_url new_entry['_id'] = uuid.uuid4().hex new_entry['postnum']=mydb.lyket.articles.count() new_entry['creationtime']=datetime.datetime.now() new_entry['publisher'] = res.domain new_entry['creationtime']=datetime.datetime.utcnow() new_entry['companycreator'] = res.domain self.db.CollectionSubmitOne(new_entry) print "Done with article " + str(mydb.lyket.articles.count()) else: print "Already have it " + str(counter) counter=counter+1 except Exception as e: print "------" print "its f****d emma" print e print "------"
def put_article_in_db(self,story_url): try: if( not self.db.in_set({'url':story_url})): current_article = NewsArticle(story_url) #publish date for article : datetime object article_published = current_article.date_made() #title of article : String article_title=current_article.get_title() #print article_title current_article.goodArticle() #keywords in article: Array of Strings article_key_words = current_article.getKeywords() #videos in story : Array of Strings (url to videos) article_videos = current_article.get_videos() #summary of article : String article_summary = current_article.getSummary() #authors of article: Array of Strings article_authors = current_article.getAuthors() #image for article : String (url to image) article_thumbnaillink = current_article.thumbnail_url() article_url = current_article.get_url() res=get_tld(article_url, as_object=True) new_entry = {} new_entry['title']=article_title new_entry['sum']=article_summary new_entry['auth']=article_authors new_entry['thumb'] = article_thumbnaillink new_entry['pub'] = article_published new_entry['keywords'] = article_key_words new_entry['vids'] = article_videos new_entry['likes']=0 new_entry['dislikes']=0 new_entry['comments'] = [] new_entry['url'] = article_url new_entry['creationtime']=datetime.datetime.now() new_entry['publisher'] = res.domain new_entry['creationtime']=datetime.datetime.utcnow() new_entry['companycreator'] = res.domain self.db.CollectionSubmitOne(new_entry) except Exception as e: print "------" print "its f****d emma" print e print "------"