# Got something? if ( ( author_organization ) and ( author_organization != "" ) ): # save it. print( " - org. string: " + author_organization ) current_article_author.organization_string = author_organization current_article_author.save() #-- END check to see if there is an organization. --# #-- END check to see if we have an author string --# #-- END loop over authros with no organization_string --# # set stop time. my_summary_helper.set_stop_time() # add info. to summary outputter. my_summary_helper.set_prop_value( "org_string_counter", org_string_counter ) my_summary_helper.set_prop_desc( "org_string_counter", "Found org. string" ) my_summary_helper.set_prop_value( "article_source_counter", article_source_counter ) my_summary_helper.set_prop_desc( "article_source_counter", "Using Article Source" ) my_summary_helper.set_prop_value( "bad_author_string_counter", bad_author_string_counter ) my_summary_helper.set_prop_desc( "bad_author_string_counter", "Bad Author String" ) # generate summary string. summary_string += my_summary_helper.create_summary_string( item_prefix_IN = "==> " ) print( summary_string )
def code_article_data( self, query_set_IN ): """ Accepts query set of Articles. Creates a new instance of the ArticleCoder class for coder_type passed in, places the query set in it, sets up its instance variables appropriately according to the request, then codes the attribution in the articles using the coder class. Returns status message. Results in Article_Data for each attribution detected by the coder in each article. Checks for the attribution to already have been detected using article, paragraph number, etc. If so, does not create an additional Article_Data instance (could add a flag for this later if needed...). Preconditions: assumes that we have a query set of Articles passed in that we can store in the instance. If not, does nothing, returns empty string. Postconditions: Returns status message. Results in Article_Data for each attribution detected by the coder in each article. Checks for the attribution to already have been detected using article, paragraph number, etc. If so, does not create an additional Article_Data instance (could add a flag for this later if needed...). Parameters: - query_set_IN - django HTTP request instance that contains parameters we use to generate network data. Returns: - String - Status message. """ # return reference status_OUT = '' # declare variables me = "code_article_data" logging_message = "" my_logger = None do_i_print_updates = False my_summary_helper = None summary_string = "" article_coder = None param_dict = {} current_status = "" my_exception_helper = None exception_message = "" # rate-limiting variables am_i_rate_limited = False continue_work = True # auditing variables article_counter = -1 exception_counter = -1 error_counter = -1 # grab a logger. my_logger = self.get_logger() # do I print some status? do_i_print_updates = self.do_print_updates # initialize summary helper my_summary_helper = SummaryHelper() # init rate-limiting am_i_rate_limited = self.do_manage_time # do we have a query set? if ( query_set_IN ): # create instance of ArticleCoder. article_coder = self.get_coder_instance() # initialize ArticleCoder instance from params. # Get parent parameter container. my_params = self.get_param_container() # retrieve the inner dictionary. param_dict = my_params.get_parameters() # use the dictionary from the param container to initialize. article_coder.initialize_from_params( param_dict ) # loop on the article list, passing each to the ArticleCoder for # processing. article_counter = 0 exception_counter = 0 error_counter = 0 continue_work = True for current_article in query_set_IN: # OK to continue work? if ( continue_work == True ): # increment article counter article_counter += 1 # rate-limited? if ( am_i_rate_limited == True ): # yes - start timer. self.start_request() #-- END pre-request check for rate-limiting --# # a little debugging to start logging_message = "\n\n============================================================\n==> article " + str( article_counter ) + ": " + str( current_article.id ) + " - " + current_article.headline my_logger.info( logging_message ) # print? if ( do_i_print_updates == True ): print( logging_message ) #-- END check to see if we print a message. # add per-article exception handling, so we can get an idea of how # many articles cause problems. try: # code the article. current_status = article_coder.code_article( current_article ) # record status self.record_article_status( current_article.id, current_status ) # success? if ( current_status != ArticleCoder.STATUS_SUCCESS ): # nope. Error. error_counter += 1 logging_message = "======> In " + me + "(): ERROR - " + current_status + "; article = " + str( current_article ) my_logger.debug( logging_message ) # print? if ( do_i_print_updates == True ): print( logging_message ) #-- END check to see if we print a message. #-- END check to see if success --# except Exception as e: # increment exception_counter exception_counter += 1 # get exception helper. my_exception_helper = self.get_exception_helper() # log exception, no email or anything. exception_message = "Exception caught for article " + str( current_article.id ) my_exception_helper.process_exception( e, exception_message ) logging_message = "======> " + exception_message + " - " + str( e ) my_logger.debug( logging_message ) # print? if ( do_i_print_updates == True ): print( logging_message ) #-- END check to see if we print a message. # record status self.record_article_status( current_article.id, logging_message ) #-- END exception handling around individual article processing. --# # rate-limited? if ( am_i_rate_limited == True ): # yes - check if we may continue. continue_work = self.may_i_continue() #-- END post-request check for rate-limiting --# else: # not OK to continue work. Break? #break pass #-- END check to see if OK to continue. If not... --# #-- END loop over articles --# # add some debug? if ( ArticleCoder.DEBUG_FLAG == True ): # yup. status_OUT += "\n\n" + article_coder.debug + "\n\n" #-- END check to see if we have debug to output. --# #-- END check to make sure we have a query set. --# # add stuff to summary and print the results. # set stop time my_summary_helper.set_stop_time() # add stuff to summary my_summary_helper.set_prop_value( "article_counter", article_counter ) my_summary_helper.set_prop_desc( "article_counter", "Articles processed" ) my_summary_helper.set_prop_value( "error_counter", error_counter ) my_summary_helper.set_prop_desc( "error_counter", "Error count" ) my_summary_helper.set_prop_value( "exception_counter", exception_counter ) my_summary_helper.set_prop_desc( "exception_counter", "Exception count" ) # output - set prefix if you want. summary_string += my_summary_helper.create_summary_string( item_prefix_IN = "==> " ) my_logger.info( summary_string ) # output summary string as status. status_OUT += summary_string return status_OUT