def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True) : start_time = time.time() #Load the xml_configuration file and pass the database information to a Query_manager xmldoc = xml_config_address if usingConfig : xmldoc = minidom.parse(xml_config_address) io_info = xmldoc.getElementsByTagName('io_info')[0] #Seeing whether to display all predictions MAKE_ALL_PREDS = False if io_info.hasAttribute('display_all_predictions') : MAKE_ALL_PREDS = str(io_info.attributes['display_all_predictions'].value) == 'True' #Loading tests from xml file model_info_list = xmldoc.getElementsByTagName('model_info') model_list = [] for model_info in model_info_list : type = model_info.attributes["type"].value if type == "Num" : model_list.append(Num_model(model_info, MAKE_ALL_PREDS, logCB, progressCB)) elif type == "Cat" : model_list.append(Cat_model(model_info, MAKE_ALL_PREDS, logCB, progressCB)) #Setting up query manager query_manager = Query_manager(io_info, logCB, progressCB) query_manager.update_att_lists(model_list) #Setting up output table columns_list = get_columns_list(query_manager, model_list, MAKE_ALL_PREDS) if io_info.hasAttribute("output_table_name") : output_manager = Output_manager(io_info, None, columns_list) if output_manager.table == None : print "ERROR: ordered not to overwrite table, but existing table is uncompatible with current test" return -1 else : output_manager = Output_manager(None, query_manager, columns_list) #Settting up output log (NOT USED IF TAG doesn't exist) log_manager = None if io_info.hasAttribute("log_folder_address") : log_manager = Log_manager(io_info, xml_config_address) if log_manager.folder_address == None : print "ERROR: ordered to not overide the log folder and a log folder exists" return -1 #Setting up parcel drawer if use_drawing : parcel_block_vis = Parcel_block_vis(query_manager, query_manager.table_name) #Keeping track of tests that are actually run ran_test = {} for model in model_list : ran_test[model] = False #Impute values for all rows for each block while(query_manager.number_remaining_blocks() > 0) : query_manager.query_rows() #Making sure there are some test rows in the blocked set of query rows if query_manager.is_test_list.count(True) > 0 : #When all predictions are made none of the original values will be changed if not MAKE_ALL_PREDS : #Initialize data imputed columns for each row for row in query_manager.current_rows : for model in model_list : row[model.test_attribute + "_imputed"] = 0 #Running all given tests for model in model_list : #Making predictions for a given attribute test_result = model.get_predictions(query_manager) #Updating null values if test_result != None : ran_test[model] = True update_rows(query_manager, test_result, MAKE_ALL_PREDS) #Recording accuracy estimation if log_manager != None : log_manager.get_block_stats(query_manager, test_result) #Replace null values in data, and update imputed columns #Only add test rows, not training rows (list of rows that will be added) test_rows = [] for i in range(len(query_manager.current_rows)) : if query_manager.is_test_list[i] : test_rows.append(query_manager.current_rows[i]) output_manager.insert_rows(test_rows) #Record parcels visually if use_drawing : parcel_block_vis.print_parcels(query_manager) #Close parcel block visual if use_drawing and log_manager != None : parcel_block_vis.close_image(log_manager) #Print all log information to logs if log_manager != None : log_manager.store_stats(query_manager, output_manager, model_list, start_time, ran_test) #Posting information about time taken end_time = time.time() print "Overall Time taken: ", int(end_time - start_time) if logCB != None : logCB("Overall Time taken: " + str(int(end_time - start_time))) return 0
def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True): start_time = time.time() #Load the xml_configuration file and pass the database information to a Query_manager xmldoc = xml_config_address if usingConfig: xmldoc = minidom.parse(xml_config_address) io_info = xmldoc.getElementsByTagName('io_info')[0] #Seeing whether to display all predictions MAKE_ALL_PREDS = False if io_info.hasAttribute('display_all_predictions'): MAKE_ALL_PREDS = str( io_info.attributes['display_all_predictions'].value) == 'True' #Loading tests from xml file model_info_list = xmldoc.getElementsByTagName('model_info') model_list = [] for model_info in model_info_list: type = model_info.attributes["type"].value if type == "Num": model_list.append( Num_model(model_info, MAKE_ALL_PREDS, logCB, progressCB)) elif type == "Cat": model_list.append( Cat_model(model_info, MAKE_ALL_PREDS, logCB, progressCB)) #Setting up query manager query_manager = Query_manager(io_info, logCB, progressCB) query_manager.update_att_lists(model_list) #Setting up output table columns_list = get_columns_list(query_manager, model_list, MAKE_ALL_PREDS) if io_info.hasAttribute("output_table_name"): output_manager = Output_manager(io_info, None, columns_list) if output_manager.table == None: print "ERROR: ordered not to overwrite table, but existing table is uncompatible with current test" return -1 else: output_manager = Output_manager(None, query_manager, columns_list) #Settting up output log (NOT USED IF TAG doesn't exist) log_manager = None if io_info.hasAttribute("log_folder_address"): log_manager = Log_manager(io_info, xml_config_address) if log_manager.folder_address == None: print "ERROR: ordered to not overide the log folder and a log folder exists" return -1 #Setting up parcel drawer if use_drawing: parcel_block_vis = Parcel_block_vis(query_manager, query_manager.table_name) #Keeping track of tests that are actually run ran_test = {} for model in model_list: ran_test[model] = False #Impute values for all rows for each block while (query_manager.number_remaining_blocks() > 0): query_manager.query_rows() #Making sure there are some test rows in the blocked set of query rows if query_manager.is_test_list.count(True) > 0: #When all predictions are made none of the original values will be changed if not MAKE_ALL_PREDS: #Initialize data imputed columns for each row for row in query_manager.current_rows: for model in model_list: row[model.test_attribute + "_imputed"] = 0 #Running all given tests for model in model_list: #Making predictions for a given attribute test_result = model.get_predictions(query_manager) #Updating null values if test_result != None: ran_test[model] = True update_rows(query_manager, test_result, MAKE_ALL_PREDS) #Recording accuracy estimation if log_manager != None: log_manager.get_block_stats(query_manager, test_result) #Replace null values in data, and update imputed columns #Only add test rows, not training rows (list of rows that will be added) test_rows = [] for i in range(len(query_manager.current_rows)): if query_manager.is_test_list[i]: test_rows.append(query_manager.current_rows[i]) output_manager.insert_rows(test_rows) #Record parcels visually if use_drawing: parcel_block_vis.print_parcels(query_manager) #Close parcel block visual if use_drawing and log_manager != None: parcel_block_vis.close_image(log_manager) #Print all log information to logs if log_manager != None: log_manager.store_stats(query_manager, output_manager, model_list, start_time, ran_test) #Posting information about time taken end_time = time.time() print "Overall Time taken: ", int(end_time - start_time) if logCB != None: logCB("Overall Time taken: " + str(int(end_time - start_time))) return 0
def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True) : start = time.time() #Load the xml_configuration file and pass the database information to a Query_manager xmldoc = xml_config_address if usingConfig : xmldoc = minidom.parse(xml_config_address) #Loading tests from xml file model_info = xmldoc.getElementsByTagName('model_info')[0] model = LDOF_model(model_info, logCB, progressCB) #Setting up query manager io_info = xmldoc.getElementsByTagName('io_info')[0] query_manager = Query_manager(io_info, logCB, progressCB) query_manager.update_att_lists([model]) query_manager.group_max = 1 query_manager.group_count = 1 #Pick a random target_attribute (it isn't used so it doesn't matter) #WILL THROW ERROR IF NO NUMERIC ATTRIBUTES ARE USED model.test_attribute = query_manager.numeric_list[0] #Setting up output table #Add extra columns to view if supported extra_columns = [] if io_info.hasAttribute('extra_attributes') : extra_columns = util_get_attribute_list(io_info.attributes['extra_attributes'].value) columns_list = get_columns_list(query_manager, model.attributes, extra_columns, model) output_manager = Output_manager(io_info, None, columns_list) #Impute values for all rows for each block while(query_manager.number_remaining_blocks() > 0) : query_manager.query_rows() #Making sure there are some test rows in the blocked set of query rows if query_manager.is_test_list.count(True) > 0 : #Get ranges of values for all the columns in the queried rows data_profiler = Data_profiler(query_manager) #Setup the data that is going to be used for the test data_profiler.prepare_test_data(model) #Making predictions for a given attribute test_result = model.get_predictions(data_profiler) #Updating null values update_rows(query_manager, data_profiler, test_result, model) #Add rows to the new table (only addition is the ldof value) test_rows = [] for i in range(len(query_manager.current_rows)) : if query_manager.is_test_list[i] : test_rows.append(query_manager.current_rows[i]) output_manager.insert_rows(test_rows) #Posting information about time taken end = time.time() print "Overall Time taken: ", int(end - start) if logCB != None : logCB("Overall Time taken: " + str(int(end - start))) return 0
def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True): start = time.time() #Load the xml_configuration file and pass the database information to a Query_manager xmldoc = xml_config_address if usingConfig: xmldoc = minidom.parse(xml_config_address) #Loading tests from xml file model_info = xmldoc.getElementsByTagName('model_info')[0] model = LDOF_model(model_info, logCB, progressCB) #Setting up query manager io_info = xmldoc.getElementsByTagName('io_info')[0] query_manager = Query_manager(io_info, logCB, progressCB) query_manager.update_att_lists([model]) query_manager.group_max = 1 query_manager.group_count = 1 #Pick a random target_attribute (it isn't used so it doesn't matter) #WILL THROW ERROR IF NO NUMERIC ATTRIBUTES ARE USED model.test_attribute = query_manager.numeric_list[0] #Setting up output table #Add extra columns to view if supported extra_columns = [] if io_info.hasAttribute('extra_attributes'): extra_columns = util_get_attribute_list( io_info.attributes['extra_attributes'].value) columns_list = get_columns_list(query_manager, model.attributes, extra_columns, model) output_manager = Output_manager(io_info, None, columns_list) #Impute values for all rows for each block while (query_manager.number_remaining_blocks() > 0): query_manager.query_rows() #Making sure there are some test rows in the blocked set of query rows if query_manager.is_test_list.count(True) > 0: #Get ranges of values for all the columns in the queried rows data_profiler = Data_profiler(query_manager) #Setup the data that is going to be used for the test data_profiler.prepare_test_data(model) #Making predictions for a given attribute test_result = model.get_predictions(data_profiler) #Updating null values update_rows(query_manager, data_profiler, test_result, model) #Add rows to the new table (only addition is the ldof value) test_rows = [] for i in range(len(query_manager.current_rows)): if query_manager.is_test_list[i]: test_rows.append(query_manager.current_rows[i]) output_manager.insert_rows(test_rows) #Posting information about time taken end = time.time() print "Overall Time taken: ", int(end - start) if logCB != None: logCB("Overall Time taken: " + str(int(end - start))) return 0