def add_website(self, website_meta): '''Add the website in the database''' #Create a website object to check if it exists in the database web = Website.objects( name=website_meta.get("name"), homepage_url=website_meta.get("homepage_url") ).first() if web: return web #This website object is used to add to the database web = Website( name=website_meta.get("name"), homepage_url=website_meta.get("homepage_url") ) try: status = web.save() except ValidationError: self.logger.warn('Save/Validate Website Failed! url: {0}'\ .format(website_meta.get("homepage_url"))) if status: return web else: return None
def generate_relation_dict(self, news_sources, news_targets): ''' generates a dictionary of string/list(int) in the format {source : target_count} ie. {s1 : [tc1, tc2, ... tcn], s2 : [tc1, tc2, ... tcn], ... sn : [tc1, tc2, ... tcn]} where sn is the source, tcn is the citation count of each target ''' # initialize the relation dictionary. relation_dict = {} for source_name, source_url in news_sources.iteritems(): # create an empty list with a specific size which describe the number # of target referenced by each source target_count = [0] * len(news_targets) # Find the articles which have a specific source website url articles = Article.objects( Q(website=Website.objects(homepage_url=source_url).only('homepage_url').first()) & Q(citations__exists=True)).only('citations') for article in articles: # Count the times that each target in the news_targets is in the # citation list for each article and put it in the target_count for citation in article.citations: if not isinstance( citation, int ): i = 0 while i < len(news_targets): if citation.target_name.lower() == news_targets.keys()[i].lower(): target_count[i] += 1 i += 1 relation_dict[source_name] = target_count return relation_dict
def get_articles(self, number=None): global username show_article_template = Template(filename='get_articles.html') sources = User.objects(name=username).first().news_sources targets = User.objects(name=username).first().news_targets articles = [] for s in sources: articles += Article.objects(website=Website.objects(name=s).first()).only('title', 'url').all() for t in targets: articles += Article.objects(website=Website.objects(name=t).first()).only('title', 'url').all() if not number: number = len(articles) return show_article_template.render(articles=articles[ :int(number)])
def generate_relation_dict_beta(self, news_sources, news_targets): relation_dict = {} for source_name in news_sources: # create an empty list with a specific size which describe the number # of target referenced by each source target_count = [0] * len(news_targets) # Find the articles which have a specific source website url articles = Article.objects( Q(website=Website.objects(name=source_name).only('name').first()) & Q(citations__exists=True)).only('citations') for article in articles: # Count the times that each target in the news_targets is in the # citation list for each article and put it in the target_count for citation in article.citations: if not isinstance( citation, int ): i = 0 while i < len(news_targets): if citation.target_name.lower() == news_targets[i].lower(): target_count[i] += 1 i += 1 relation_dict[source_name] = target_count return relation_dict