Esempio n. 1
0
    def add_website(self, website_meta):
        '''Add the website in the database'''

        #Create a website object to check if it exists in the database
        web = Website.objects(
                    name=website_meta.get("name"),
                    homepage_url=website_meta.get("homepage_url")
                ).first()

        if web:
            return web

        #This website object is used to add to the database
        web = Website(
                name=website_meta.get("name"),
                homepage_url=website_meta.get("homepage_url")
                )
        try:
            status = web.save()
        except ValidationError:
            self.logger.warn('Save/Validate Website Failed! url: {0}'\
                                      .format(website_meta.get("homepage_url")))

        if status:
            return web
        else:
            return None
Esempio n. 2
0
    def generate_relation_dict(self, news_sources, news_targets):
        '''
        generates a dictionary of string/list(int) in the format
        {source : target_count}
        ie. {s1 : [tc1, tc2, ... tcn],
        s2 : [tc1, tc2, ... tcn], ...
        sn : [tc1, tc2, ... tcn]}
        where sn is the source, tcn is the citation count of each target
        '''
        # initialize the relation dictionary.
        relation_dict = {}

        for source_name, source_url in news_sources.iteritems():
            # create an empty list with a specific size which describe the number
            # of target referenced by each source
            target_count = [0] * len(news_targets)
            # Find the articles which have a specific source website url
            articles = Article.objects(
                Q(website=Website.objects(homepage_url=source_url).only('homepage_url').first()) &
                Q(citations__exists=True)).only('citations')
            for article in articles:
                # Count the times that each target in the news_targets is in the
                # citation list for each article and put it in the target_count
                for citation in article.citations:
                    if not isinstance( citation, int ):
                        i = 0
                        while i < len(news_targets):
                            if citation.target_name.lower() == news_targets.keys()[i].lower():
                                target_count[i] += 1
                            i += 1
            relation_dict[source_name] = target_count
        return relation_dict
Esempio n. 3
0
    def get_articles(self, number=None):
        global username

        show_article_template = Template(filename='get_articles.html')
        sources = User.objects(name=username).first().news_sources
        targets = User.objects(name=username).first().news_targets
        articles = []

        for s in sources:
            articles += Article.objects(website=Website.objects(name=s).first()).only('title', 'url').all()
        for t in targets:
            articles += Article.objects(website=Website.objects(name=t).first()).only('title', 'url').all()

        if not number:
            number = len(articles)

        return show_article_template.render(articles=articles[ :int(number)])
Esempio n. 4
0
 def generate_relation_dict_beta(self, news_sources, news_targets):
     relation_dict = {}
     for source_name in news_sources:
         # create an empty list with a specific size which describe the number
         # of target referenced by each source
         target_count = [0] * len(news_targets)
         # Find the articles which have a specific source website url
         articles = Article.objects(
             Q(website=Website.objects(name=source_name).only('name').first()) &
             Q(citations__exists=True)).only('citations')
         for article in articles:
             # Count the times that each target in the news_targets is in the
             # citation list for each article and put it in the target_count
             for citation in article.citations:
                 if not isinstance( citation, int ):
                     i = 0
                     while i < len(news_targets):
                         if citation.target_name.lower() == news_targets[i].lower():
                             target_count[i] += 1
                         i += 1
         relation_dict[source_name] = target_count
     return relation_dict