#-------------------------------------------------------------------------------- # Filter articles with Article.filter_articles() #-------------------------------------------------------------------------------- from sourcenet.models import Article, Temp_Section, Newspaper # create parameters instance article_filter_params = {} article_filter_params[ Article.PARAM_NEWSPAPER_NEWSBANK_CODE ] = "GRPB" article_filter_params[ Article.PARAM_SECTION_NAME_LIST ] = Article.GRP_NEWS_SECTION_NAME_LIST article_filter_params[ Article.PARAM_CUSTOM_ARTICLE_Q ] = Article.Q_GRP_IN_HOUSE_AUTHOR # dates before layoffs article_filter_params[ Article.PARAM_START_DATE ] = "2009-07-01" article_filter_params[ Article.PARAM_END_DATE ] = "2009-07-31" before_qs = Article.filter_articles( **article_filter_params ) # dates before layoffs article_filter_params[ Article.PARAM_START_DATE ] = "2010-06-01" article_filter_params[ Article.PARAM_END_DATE ] = "2010-06-30" after_qs = Article.filter_articles( **article_filter_params ) #================================================================================ # NLTK stuff #================================================================================ #-------------------------------------------------------------------------------- # Get article to test on #--------------------------------------------------------------------------------
# end_date options_parser.add_option( "-e", "--end_date", dest = my_article.PARAM_END_DATE, default = None, help = "End date of date range to collect, in YYYY-MM-DD format." ) # single_date options_parser.add_option( "-d", "--single_date", dest = my_article.PARAM_SINGLE_DATE, default = None, help = "Single date to collect, in YYYY-MM-DD format." ) # flag to tell whether we do all processing. options_parser.add_option( "-a", "--process_all", dest = "autoproc_all", action = "store_true", default = False, help = "If present, runs all possible processing for each article." ) # flag to tell whether we process authors. options_parser.add_option( "-b", "--process_bylines", dest = "autoproc_authors", action = "store_true", default = False, help = "If present, runs author string processing routines." ) # parse options passed in on command line. (options, args) = options_parser.parse_args() #================================================================================ # Do work #================================================================================ # set debug flag DEBUG = True # convert the options to a dictionary. options_dict = vars( options ) # call the method on the articles. status_message = Article.process_articles( **options_dict )
def create_article_query_set( self, param_prefix_IN = '' ): # return reference query_set_OUT = None # declare variables me = "create_article_query_set" my_logger = None params_IN = None start_date_IN = '' end_date_IN = '' date_range_IN = '' publication_list_IN = None tag_list_IN = None section_list_IN = None unique_id_list_IN = '' article_id_list_IN = '' filter_articles_params = {} # grab a logger. my_logger = self.get_logger() # get the request params_IN = self.get_param_container() # got a request? if ( params_IN ): # retrieve the incoming parameters start_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_START_DATE, None ) end_date_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_END_DATE, None ) date_range_IN = self.get_param_as_str( param_prefix_IN + SourcenetBase.PARAM_DATE_RANGE, None ) publication_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_PUBLICATION_LIST, [] ) tag_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_TAG_LIST, [] ) unique_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_UNIQUE_ID_LIST, [] ) article_id_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_ARTICLE_ID_LIST, [] ) section_list_IN = self.get_param_as_list( param_prefix_IN + SourcenetBase.PARAM_SECTION_LIST, [] ) my_logger.info( "In " + me + ": unique_id_list_IN = " + str( unique_id_list_IN ) ) # get all articles to start query_set_OUT = Article.objects.all() # set up dictionary for call to Article.filter_articles() filter_articles_params = {} filter_articles_params[ Article.PARAM_START_DATE ] = start_date_IN filter_articles_params[ Article.PARAM_END_DATE ] = end_date_IN filter_articles_params[ Article.PARAM_DATE_RANGE ] = date_range_IN filter_articles_params[ Article.PARAM_NEWSPAPER_ID_IN_LIST ] = publication_list_IN filter_articles_params[ Article.PARAM_TAGS_IN_LIST ] = tag_list_IN filter_articles_params[ Article.PARAM_UNIQUE_ID_IN_LIST ] = unique_id_list_IN filter_articles_params[ Article.PARAM_ARTICLE_ID_IN_LIST ] = article_id_list_IN filter_articles_params[ Article.PARAM_SECTION_NAME_IN_LIST ] = section_list_IN my_logger.info( "In " + me + ": filter_articles_params = " + str( filter_articles_params ) ) # call Article.filter_articles() query_set_OUT = Article.filter_articles( qs_IN = query_set_OUT, params_IN = filter_articles_params ) else: # no param container present. Error. query_set_OUT = None #-- END check to make sure we have a param container. --# return query_set_OUT