Beispiel #1
0
def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True) :
    
    start_time = time.time()
    
    #Load the xml_configuration file and pass the database information to a Query_manager
    xmldoc = xml_config_address
    if usingConfig :
        xmldoc = minidom.parse(xml_config_address)
    io_info = xmldoc.getElementsByTagName('io_info')[0]

    #Seeing whether to display all predictions
    MAKE_ALL_PREDS = False
    if io_info.hasAttribute('display_all_predictions') :
        MAKE_ALL_PREDS = str(io_info.attributes['display_all_predictions'].value) == 'True'   
    
    #Loading tests from xml file
    model_info_list = xmldoc.getElementsByTagName('model_info')
    model_list = []
    for model_info in model_info_list :
        
        type = model_info.attributes["type"].value
        if type == "Num" :
            model_list.append(Num_model(model_info, MAKE_ALL_PREDS, logCB, progressCB))
        elif type == "Cat" :
            model_list.append(Cat_model(model_info, MAKE_ALL_PREDS, logCB, progressCB))
    
    #Setting up query manager
    query_manager = Query_manager(io_info, logCB, progressCB)
    query_manager.update_att_lists(model_list)
        
    #Setting up output table
    columns_list = get_columns_list(query_manager, model_list, MAKE_ALL_PREDS)
    if io_info.hasAttribute("output_table_name") :
        output_manager = Output_manager(io_info, None, columns_list)
        if output_manager.table == None :
            print "ERROR: ordered not to overwrite table, but existing table is uncompatible with current test"
            return -1
    else :
        output_manager = Output_manager(None, query_manager, columns_list)
    
    #Settting up output log (NOT USED IF TAG doesn't exist)
    log_manager = None
    if io_info.hasAttribute("log_folder_address") :  
        log_manager = Log_manager(io_info, xml_config_address)
        if log_manager.folder_address == None :
            print "ERROR: ordered to not overide the log folder and a log folder exists"
            return -1
    
    #Setting up parcel drawer
    if use_drawing :
        parcel_block_vis = Parcel_block_vis(query_manager, query_manager.table_name)

    #Keeping track of tests that are actually run
    ran_test = {}
    for model in model_list :
        ran_test[model] = False
    
    #Impute values for all rows for each block
    while(query_manager.number_remaining_blocks() > 0) :
        
        query_manager.query_rows()
        
        #Making sure there are some test rows in the blocked set of query rows
        if query_manager.is_test_list.count(True) > 0 :
            
            #When all predictions are made none of the original values will be changed
            if not MAKE_ALL_PREDS :
                
                #Initialize data imputed columns for each row
                for row in query_manager.current_rows :
                    for model in model_list :
                        row[model.test_attribute + "_imputed"] = 0
                            
                                        
            #Running all given tests
            for model in model_list :
                            
                #Making predictions for a given attribute
                test_result = model.get_predictions(query_manager)

                #Updating null values
                if test_result != None :
                    ran_test[model] = True
                    update_rows(query_manager, test_result, MAKE_ALL_PREDS)
         
                    #Recording accuracy estimation
                    if log_manager != None :
                        log_manager.get_block_stats(query_manager, test_result)
         
            #Replace null values in data, and update imputed columns
            #Only add test rows, not training rows (list of rows that will be added)
            test_rows = []
            for i in range(len(query_manager.current_rows)) :
                if query_manager.is_test_list[i] :
                    test_rows.append(query_manager.current_rows[i])
                    
            output_manager.insert_rows(test_rows)
          
            #Record parcels visually
            if use_drawing :
                parcel_block_vis.print_parcels(query_manager)
            
    #Close parcel block visual
    if use_drawing and log_manager != None :
        parcel_block_vis.close_image(log_manager)
    
    #Print all log information to logs
    if log_manager != None :
        log_manager.store_stats(query_manager, output_manager, model_list, start_time, ran_test) 
            
    #Posting information about time taken 
    end_time = time.time()
    print "Overall Time taken: ", int(end_time - start_time)
    if logCB != None :
        logCB("Overall Time taken: " + str(int(end_time - start_time)))
        
    return 0
Beispiel #2
0
def run_test(xml_config_address,
             logCB=None,
             progressCB=None,
             usingConfig=True):

    start_time = time.time()

    #Load the xml_configuration file and pass the database information to a Query_manager
    xmldoc = xml_config_address
    if usingConfig:
        xmldoc = minidom.parse(xml_config_address)
    io_info = xmldoc.getElementsByTagName('io_info')[0]

    #Seeing whether to display all predictions
    MAKE_ALL_PREDS = False
    if io_info.hasAttribute('display_all_predictions'):
        MAKE_ALL_PREDS = str(
            io_info.attributes['display_all_predictions'].value) == 'True'

    #Loading tests from xml file
    model_info_list = xmldoc.getElementsByTagName('model_info')
    model_list = []
    for model_info in model_info_list:

        type = model_info.attributes["type"].value
        if type == "Num":
            model_list.append(
                Num_model(model_info, MAKE_ALL_PREDS, logCB, progressCB))
        elif type == "Cat":
            model_list.append(
                Cat_model(model_info, MAKE_ALL_PREDS, logCB, progressCB))

    #Setting up query manager
    query_manager = Query_manager(io_info, logCB, progressCB)
    query_manager.update_att_lists(model_list)

    #Setting up output table
    columns_list = get_columns_list(query_manager, model_list, MAKE_ALL_PREDS)
    if io_info.hasAttribute("output_table_name"):
        output_manager = Output_manager(io_info, None, columns_list)
        if output_manager.table == None:
            print "ERROR: ordered not to overwrite table, but existing table is uncompatible with current test"
            return -1
    else:
        output_manager = Output_manager(None, query_manager, columns_list)

    #Settting up output log (NOT USED IF TAG doesn't exist)
    log_manager = None
    if io_info.hasAttribute("log_folder_address"):
        log_manager = Log_manager(io_info, xml_config_address)
        if log_manager.folder_address == None:
            print "ERROR: ordered to not overide the log folder and a log folder exists"
            return -1

    #Setting up parcel drawer
    if use_drawing:
        parcel_block_vis = Parcel_block_vis(query_manager,
                                            query_manager.table_name)

    #Keeping track of tests that are actually run
    ran_test = {}
    for model in model_list:
        ran_test[model] = False

    #Impute values for all rows for each block
    while (query_manager.number_remaining_blocks() > 0):

        query_manager.query_rows()

        #Making sure there are some test rows in the blocked set of query rows
        if query_manager.is_test_list.count(True) > 0:

            #When all predictions are made none of the original values will be changed
            if not MAKE_ALL_PREDS:

                #Initialize data imputed columns for each row
                for row in query_manager.current_rows:
                    for model in model_list:
                        row[model.test_attribute + "_imputed"] = 0

            #Running all given tests
            for model in model_list:

                #Making predictions for a given attribute
                test_result = model.get_predictions(query_manager)

                #Updating null values
                if test_result != None:
                    ran_test[model] = True
                    update_rows(query_manager, test_result, MAKE_ALL_PREDS)

                    #Recording accuracy estimation
                    if log_manager != None:
                        log_manager.get_block_stats(query_manager, test_result)

            #Replace null values in data, and update imputed columns
            #Only add test rows, not training rows (list of rows that will be added)
            test_rows = []
            for i in range(len(query_manager.current_rows)):
                if query_manager.is_test_list[i]:
                    test_rows.append(query_manager.current_rows[i])

            output_manager.insert_rows(test_rows)

            #Record parcels visually
            if use_drawing:
                parcel_block_vis.print_parcels(query_manager)

    #Close parcel block visual
    if use_drawing and log_manager != None:
        parcel_block_vis.close_image(log_manager)

    #Print all log information to logs
    if log_manager != None:
        log_manager.store_stats(query_manager, output_manager, model_list,
                                start_time, ran_test)

    #Posting information about time taken
    end_time = time.time()
    print "Overall Time taken: ", int(end_time - start_time)
    if logCB != None:
        logCB("Overall Time taken: " + str(int(end_time - start_time)))

    return 0
def run_test(xml_config_address, logCB=None, progressCB=None, usingConfig=True) :
    
    start = time.time()
    
    #Load the xml_configuration file and pass the database information to a Query_manager
    xmldoc = xml_config_address
    if usingConfig :
        xmldoc = minidom.parse(xml_config_address)
    
    #Loading tests from xml file
    model_info = xmldoc.getElementsByTagName('model_info')[0]
    model = LDOF_model(model_info, logCB, progressCB)
        
    #Setting up query manager
    io_info = xmldoc.getElementsByTagName('io_info')[0]
    query_manager = Query_manager(io_info, logCB, progressCB)
    query_manager.update_att_lists([model])
    query_manager.group_max = 1
    query_manager.group_count = 1
    
    #Pick a random target_attribute (it isn't used so it doesn't matter)
    #WILL THROW ERROR IF NO NUMERIC ATTRIBUTES ARE USED
    model.test_attribute = query_manager.numeric_list[0]
    
    #Setting up output table

    #Add extra columns to view if supported
    extra_columns = []
    if io_info.hasAttribute('extra_attributes') :
        extra_columns = util_get_attribute_list(io_info.attributes['extra_attributes'].value)

    columns_list = get_columns_list(query_manager, model.attributes, extra_columns, model)
    output_manager = Output_manager(io_info, None, columns_list)
    
    #Impute values for all rows for each block
    while(query_manager.number_remaining_blocks() > 0) :
        
        query_manager.query_rows()
        
        #Making sure there are some test rows in the blocked set of query rows
        if query_manager.is_test_list.count(True) > 0 :                    
            
            #Get ranges of values for all the columns in the queried rows            
            data_profiler = Data_profiler(query_manager)
            
            #Setup the data that is going to be used for the test
            data_profiler.prepare_test_data(model)
                                
            #Making predictions for a given attribute
            test_result = model.get_predictions(data_profiler)
                    
            #Updating null values
            update_rows(query_manager, data_profiler, test_result, model)
         
            #Add rows to the new table (only addition is the ldof value)
            test_rows = []
            for i in range(len(query_manager.current_rows)) :
                if query_manager.is_test_list[i] :
                    test_rows.append(query_manager.current_rows[i])
                    
            output_manager.insert_rows(test_rows)
          
    #Posting information about time taken 
    end = time.time()
    print "Overall Time taken: ", int(end - start)
    if logCB != None :
        logCB("Overall Time taken: " + str(int(end - start)))
    return 0
Beispiel #4
0
def run_test(xml_config_address,
             logCB=None,
             progressCB=None,
             usingConfig=True):

    start = time.time()

    #Load the xml_configuration file and pass the database information to a Query_manager
    xmldoc = xml_config_address
    if usingConfig:
        xmldoc = minidom.parse(xml_config_address)

    #Loading tests from xml file
    model_info = xmldoc.getElementsByTagName('model_info')[0]
    model = LDOF_model(model_info, logCB, progressCB)

    #Setting up query manager
    io_info = xmldoc.getElementsByTagName('io_info')[0]
    query_manager = Query_manager(io_info, logCB, progressCB)
    query_manager.update_att_lists([model])
    query_manager.group_max = 1
    query_manager.group_count = 1

    #Pick a random target_attribute (it isn't used so it doesn't matter)
    #WILL THROW ERROR IF NO NUMERIC ATTRIBUTES ARE USED
    model.test_attribute = query_manager.numeric_list[0]

    #Setting up output table

    #Add extra columns to view if supported
    extra_columns = []
    if io_info.hasAttribute('extra_attributes'):
        extra_columns = util_get_attribute_list(
            io_info.attributes['extra_attributes'].value)

    columns_list = get_columns_list(query_manager, model.attributes,
                                    extra_columns, model)
    output_manager = Output_manager(io_info, None, columns_list)

    #Impute values for all rows for each block
    while (query_manager.number_remaining_blocks() > 0):

        query_manager.query_rows()

        #Making sure there are some test rows in the blocked set of query rows
        if query_manager.is_test_list.count(True) > 0:

            #Get ranges of values for all the columns in the queried rows
            data_profiler = Data_profiler(query_manager)

            #Setup the data that is going to be used for the test
            data_profiler.prepare_test_data(model)

            #Making predictions for a given attribute
            test_result = model.get_predictions(data_profiler)

            #Updating null values
            update_rows(query_manager, data_profiler, test_result, model)

            #Add rows to the new table (only addition is the ldof value)
            test_rows = []
            for i in range(len(query_manager.current_rows)):
                if query_manager.is_test_list[i]:
                    test_rows.append(query_manager.current_rows[i])

            output_manager.insert_rows(test_rows)

    #Posting information about time taken
    end = time.time()
    print "Overall Time taken: ", int(end - start)
    if logCB != None:
        logCB("Overall Time taken: " + str(int(end - start)))
    return 0