Example #1
0
#--------------------------------------------------------------------------------
# Filter articles with Article.filter_articles()
#--------------------------------------------------------------------------------

from sourcenet.models import Article, Temp_Section, Newspaper

# create parameters instance
article_filter_params = {}
article_filter_params[ Article.PARAM_NEWSPAPER_NEWSBANK_CODE ] = "GRPB"
article_filter_params[ Article.PARAM_SECTION_NAME_LIST ] = Article.GRP_NEWS_SECTION_NAME_LIST
article_filter_params[ Article.PARAM_CUSTOM_ARTICLE_Q ] = Article.Q_GRP_IN_HOUSE_AUTHOR

# dates before layoffs
article_filter_params[ Article.PARAM_START_DATE ] = "2009-07-01"
article_filter_params[ Article.PARAM_END_DATE ] = "2009-07-31"
before_qs = Article.filter_articles( **article_filter_params )

# dates before layoffs
article_filter_params[ Article.PARAM_START_DATE ] = "2010-06-01"
article_filter_params[ Article.PARAM_END_DATE ] = "2010-06-30"
after_qs = Article.filter_articles( **article_filter_params )


#================================================================================
# NLTK stuff
#================================================================================

#--------------------------------------------------------------------------------
# Get article to test on
#--------------------------------------------------------------------------------
Example #2
0
# end_date
options_parser.add_option( "-e", "--end_date", dest = my_article.PARAM_END_DATE, default = None, help = "End date of date range to collect, in YYYY-MM-DD format." )

# single_date
options_parser.add_option( "-d", "--single_date", dest = my_article.PARAM_SINGLE_DATE, default = None, help = "Single date to collect, in YYYY-MM-DD format." )

# flag to tell whether we do all processing.
options_parser.add_option( "-a", "--process_all", dest = "autoproc_all", action = "store_true", default = False, help = "If present, runs all possible processing for each article." )

# flag to tell whether we process authors.
options_parser.add_option( "-b", "--process_bylines", dest = "autoproc_authors", action = "store_true", default = False, help = "If present, runs author string processing routines." )

# parse options passed in on command line.
(options, args) = options_parser.parse_args()


#================================================================================
# Do work
#================================================================================


# set debug flag
DEBUG = True

# convert the options to a dictionary.
options_dict = vars( options )

# call the method on the articles.
status_message = Article.process_articles( **options_dict )
Example #3
0
    def create_article_query_set( self, param_prefix_IN = '' ):

        # return reference
        query_set_OUT = None

        # declare variables
        me = "create_article_query_set"
        my_logger = None
        params_IN = None
        start_date_IN = ''
        end_date_IN = ''
        date_range_IN = ''
        publication_list_IN = None
        tag_list_IN = None
        section_list_IN = None
        unique_id_list_IN = ''
        article_id_list_IN = ''
        filter_articles_params = {}
        
        # grab a logger.
        my_logger = self.get_logger()

        # get the request
        params_IN = self.get_param_container()

        # got a request?
        if ( params_IN ):

            # retrieve the incoming parameters
            start_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_START_DATE, None )
            end_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_END_DATE, None )
            date_range_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_DATE_RANGE, None )
            publication_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_PUBLICATION_LIST, [] )
            tag_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_TAG_LIST, [] )
            unique_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_UNIQUE_ID_LIST, [] )
            article_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_ARTICLE_ID_LIST, [] )
            section_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_SECTION_LIST, [] )
            
            my_logger.info( "In " + me + ": unique_id_list_IN = " + str( unique_id_list_IN ) )            
            
            # get all articles to start
            query_set_OUT = Article.objects.all()
            
            # set up dictionary for call to Article.filter_articles()
            filter_articles_params = {}
            filter_articles_params[ Article.PARAM_START_DATE ] = start_date_IN
            filter_articles_params[ Article.PARAM_END_DATE ] = end_date_IN
            filter_articles_params[ Article.PARAM_DATE_RANGE ] = date_range_IN
            filter_articles_params[ Article.PARAM_NEWSPAPER_ID_IN_LIST ] = publication_list_IN
            filter_articles_params[ Article.PARAM_TAGS_IN_LIST ] = tag_list_IN
            filter_articles_params[ Article.PARAM_UNIQUE_ID_IN_LIST ] = unique_id_list_IN
            filter_articles_params[ Article.PARAM_ARTICLE_ID_IN_LIST ] = article_id_list_IN
            filter_articles_params[ Article.PARAM_SECTION_NAME_IN_LIST ] = section_list_IN
            
            my_logger.info( "In " + me + ": filter_articles_params = " + str( filter_articles_params ) )
            
            # call Article.filter_articles()
            query_set_OUT = Article.filter_articles( qs_IN = query_set_OUT, params_IN = filter_articles_params )
            
        else:
        
            # no param container present.  Error.
            query_set_OUT = None
        
        #-- END check to make sure we have a param container. --#

        return query_set_OUT