Esempio n. 1
0
#--------------------------------------------------------------------------------
# Filter articles with Article.filter_articles()
#--------------------------------------------------------------------------------

from sourcenet.models import Article, Temp_Section, Newspaper

# create parameters instance
article_filter_params = {}
article_filter_params[ Article.PARAM_NEWSPAPER_NEWSBANK_CODE ] = "GRPB"
article_filter_params[ Article.PARAM_SECTION_NAME_LIST ] = Article.GRP_NEWS_SECTION_NAME_LIST
article_filter_params[ Article.PARAM_CUSTOM_ARTICLE_Q ] = Article.Q_GRP_IN_HOUSE_AUTHOR

# dates before layoffs
article_filter_params[ Article.PARAM_START_DATE ] = "2009-07-01"
article_filter_params[ Article.PARAM_END_DATE ] = "2009-07-31"
before_qs = Article.filter_articles( **article_filter_params )

# dates before layoffs
article_filter_params[ Article.PARAM_START_DATE ] = "2010-06-01"
article_filter_params[ Article.PARAM_END_DATE ] = "2010-06-30"
after_qs = Article.filter_articles( **article_filter_params )


#================================================================================
# NLTK stuff
#================================================================================

#--------------------------------------------------------------------------------
# Get article to test on
#--------------------------------------------------------------------------------
Esempio n. 2
0
    def create_article_query_set( self, param_prefix_IN = '' ):

        # return reference
        query_set_OUT = None

        # declare variables
        me = "create_article_query_set"
        my_logger = None
        params_IN = None
        start_date_IN = ''
        end_date_IN = ''
        date_range_IN = ''
        publication_list_IN = None
        tag_list_IN = None
        section_list_IN = None
        unique_id_list_IN = ''
        article_id_list_IN = ''
        filter_articles_params = {}
        
        # grab a logger.
        my_logger = self.get_logger()

        # get the request
        params_IN = self.get_param_container()

        # got a request?
        if ( params_IN ):

            # retrieve the incoming parameters
            start_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_START_DATE, None )
            end_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_END_DATE, None )
            date_range_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_DATE_RANGE, None )
            publication_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_PUBLICATION_LIST, [] )
            tag_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_TAG_LIST, [] )
            unique_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_UNIQUE_ID_LIST, [] )
            article_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_ARTICLE_ID_LIST, [] )
            section_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_SECTION_LIST, [] )
            
            my_logger.info( "In " + me + ": unique_id_list_IN = " + str( unique_id_list_IN ) )            
            
            # get all articles to start
            query_set_OUT = Article.objects.all()
            
            # set up dictionary for call to Article.filter_articles()
            filter_articles_params = {}
            filter_articles_params[ Article.PARAM_START_DATE ] = start_date_IN
            filter_articles_params[ Article.PARAM_END_DATE ] = end_date_IN
            filter_articles_params[ Article.PARAM_DATE_RANGE ] = date_range_IN
            filter_articles_params[ Article.PARAM_NEWSPAPER_ID_IN_LIST ] = publication_list_IN
            filter_articles_params[ Article.PARAM_TAGS_IN_LIST ] = tag_list_IN
            filter_articles_params[ Article.PARAM_UNIQUE_ID_IN_LIST ] = unique_id_list_IN
            filter_articles_params[ Article.PARAM_ARTICLE_ID_IN_LIST ] = article_id_list_IN
            filter_articles_params[ Article.PARAM_SECTION_NAME_IN_LIST ] = section_list_IN
            
            my_logger.info( "In " + me + ": filter_articles_params = " + str( filter_articles_params ) )
            
            # call Article.filter_articles()
            query_set_OUT = Article.filter_articles( qs_IN = query_set_OUT, params_IN = filter_articles_params )
            
        else:
        
            # no param container present.  Error.
            query_set_OUT = None
        
        #-- END check to make sure we have a param container. --#

        return query_set_OUT