Esempio n. 1
0
 time_left_over = 0
 for basename in datanames: # Loop over datasets
     
     vprint( verbose,  "************************************************")
     vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
     vprint( verbose,  "************************************************")
     
     # ======== Learning on a time budget:
     # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
     start = time.time()
     
     # ======== Creating a data object with data, informations about it
     vprint( verbose,  "========= Reading and converting data ==========")
     D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
     print (D)
     vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))
     
     # ======== Keeping track of time
     if debug_mode<1:
         time_budget = D.info['time_budget']        # <== HERE IS THE TIME BUDGET!
     else:
         time_budget = max_time
     overall_time_budget = overall_time_budget + time_budget
     vprint( verbose,  "[+] Cumulated time budget (all tasks so far)  %5.2f sec" % (overall_time_budget))
     # We do not add the time left over form previous dataset: time_budget += time_left_over
     vprint( verbose,  "[+] Time budget for this task %5.2f sec" % time_budget)
     time_spent = time.time() - start
     vprint( verbose,  "[+] Remaining time after reading data %5.2f sec" % (time_budget-time_spent))
     if time_spent >= time_budget:
         vprint( verbose,  "[-] Sorry, time budget exceeded, skipping this task")
         execution_success = False
Esempio n. 2
0
            pass
        try:
            os.remove(filename_test)
        except:
            pass

        logger.info("************************************************")
        logger.info("******** Processing dataset " + basename.capitalize() + "********")
        logger.info("************************************************")
        
        # ======== Creating a data object with data, informations about it
        logger.info("========= Reading and converting data ==========")
        D = DataManager(basename, data_dir, max_samples=max_samples)
        logger.info(str(D))
        logger.info("[+] Size of uploaded data  %5.2f bytes" %
               data_io.total_size(D))
        overall_time_budget = min(max_time, D.info['time_budget'])
        
        # ======== Create auto-sklearn model
        new_info_object = {}
        new_info_object['is_sparse'] = D.info['is_sparse']
        new_info_object['task'] = STRING_TO_TASK_TYPES[D.info['task']]
        new_info_object['metric'] = STRING_TO_METRIC[D.info['metric']]

        configuration_space = get_configuration_space(new_info_object)
        try:
            config = ConfigSpace.Configuration(configuration_space, configuration)
        except Exception as inst:
            execution_success = False
            logger.critical(inst)
            continue
Esempio n. 3
0
        # ======== Learning on a time budget:
        # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
        start = time.time()

        # ======== Creating a data object with data, informations about it
        vprint(verbose, "========= Reading and converting data ==========")
        D = DataManager(basename,
                        input_dir,
                        replace_missing=True,
                        filter_features=True,
                        max_samples=max_samples,
                        verbose=verbose)
        print D
        vprint(
            verbose,
            "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))

        # ======== Keeping track of time
        if debug_mode < 1:
            time_budget = D.info['time_budget']  # <== HERE IS THE TIME BUDGET!
        else:
            time_budget = max_time
        overall_time_budget = overall_time_budget + time_budget
        vprint(
            verbose,
            "[+] Cumulated time budget (all tasks so far)  %5.2f sec" %
            (overall_time_budget))
        # We do not add the time left over form previous dataset: time_budget += time_left_over
        vprint(verbose,
               "[+] Time budget for this task %5.2f sec" % time_budget)
        time_spent = time.time() - start
Esempio n. 4
0
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_version()
        data_io.show_io(input_dir, output_dir)
        exit(0)
          
    vprint( verbose,  "****************************************************")
    vprint( verbose,  "******** Processing spatio-temporal dataset ********")
    vprint( verbose,  "****************************************************")

    #### Instanciate input data manager and load data
    vprint( verbose,  "========= Reading and converting data ==========")
    Din = DataManager(datatype="input", verbose=verbose) 
    Din.loadData(input_dir)
    vprint( verbose, Din)
    vprint( verbose,  "[+] Size of uploaded data  {:5.2f} bytes".format(data_io.total_size(Din)))
    
    #### Instanciate output data manager and load data
    Dout = DataManager(datatype="output", verbose=verbose)
    Dout.col_names = Din.col_names[Din.ycol0:]
    Dout.horizon = Din.horizon
    Dout.stride = Din.horizon
    
    #### In debug mode, cheat and get the solution too
    if debug_mode>1:
        Dsol = DataManager(datatype="input", verbose=verbose) 
        Dsol.loadData(input_dir)
    
    #### Instanciate predictive model
    vprint( verbose,  "======== Creating model ==========")
    M = Model()
Esempio n. 5
0
 for basename in datanames: # Loop over datasets
     
     vprint( verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n") 
     vprint( verbose,  "************************************************")
     vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
     vprint( verbose,  "************************************************")
     
     # ======== Learning on a time budget:
     # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
     start = time.time()
     
     # ======== Creating a data object with data, informations about it
     vprint( verbose,  "========= Reading and converting data ==========")
     D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
     print(D)
     vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))
     
     # ======== Keeping track of time
     if debug_mode<1:
         time_budget = D.info['time_budget']        # <== HERE IS THE TIME BUDGET!
     else:
         time_budget = max_time
     overall_time_budget = overall_time_budget + time_budget
     vprint( verbose,  "[+] Cumulated time budget (all tasks so far)  %5.2f sec" % (overall_time_budget))
     # We do not add the time left over form previous dataset: time_budget += time_left_over
     vprint( verbose,  "[+] Time budget for this task %5.2f sec" % time_budget)
     time_spent = time.time() - start
     vprint( verbose,  "[+] Remaining time after reading data %5.2f sec" % (time_budget-time_spent))
     if time_spent >= time_budget:
         vprint( verbose,  "[-] Sorry, time budget exceeded, skipping this task")
         execution_success = False
Esempio n. 6
0
     continue
 time_predict_value = time_to_predict(D)
 time_budget = time_budget - time_spent  # Remove time spent so far
 start = time.time()                     # Reset the counter
 time_spent = 0                          # Initialize time spent learning
 M.time_limit = time_budget * time_predict_value * 0.9
 vprint(verbose,  "[+] Time budget to train the model %5.2f sec" % M._time_limit)
 Xtest = None
 if D.info['test_num'] < 1000:
     Xtest = np.array([x.hy.full_array() for x in read_data(test_fname)]).T
 M.fit(X, y, test_set=Xtest)
 # log_reg = LogisticRegression(random_state=0, class_weight='balanced')
 # log_reg.fit(M.raw_decision_function(X), y)
 vprint(verbose, "=========== " + basename.capitalize() + " Training cycle " + " ================")
 vprint(verbose, "[+] Fitting success, time spent so far %5.2f sec" % (time.time() - start))
 vprint(verbose, "[+] Size of trained model  %5.2f bytes" % data_io.total_size(M))
 # Make predictions
 # -----------------
 if os.path.isfile(valid_fname):
     Y_valid = M.predict_proba(read_data(valid_fname))[:, 1]
     # Y_valid = log_reg.predict_proba(M.raw_decision_function(read_data(valid_fname)))[:, 1]
 else:
     Y_valid = None
 if Xtest is None:
     Xtest = read_data(test_fname)
 Y_test = M.predict_proba(Xtest)[:, 1]
 # Y_test = log_reg.predict_proba(M.raw_decision_function(read_data(test_fname)))[:, 1]
 vprint(verbose,  "[+] Prediction success, time spent so far %5.2f sec" % (time.time() - start))
 # Write results
 # -------------
 filename_valid = basename + '_valid.predict'
Esempio n. 7
0
 time_left_over = 0
 for basename in datanames: # Loop over datasets
     
     vprint( verbose,  "************************************************")
     vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
     vprint( verbose,  "************************************************")
     
     # ======== Learning on a time budget:
     # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
     start = time.time()
     
     # ======== Creating a data object with data, informations about it
     vprint( verbose,  "========= Reading and converting data ==========")
     D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
     print D
     vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))
     
     # ======== Keeping track of time
     if debug_mode<1:    
         time_budget = D.info['time_budget']        # <== HERE IS THE TIME BUDGET!
     else:
         time_budget = max_time
     overall_time_budget = overall_time_budget + time_budget
     vprint( verbose,  "[+] Cumulated time budget (all tasks so far)  %5.2f sec" % (overall_time_budget))
     # We do not add the time left over form previous dataset: time_budget += time_left_over
     vprint( verbose,  "[+] Time budget for this task %5.2f sec" % time_budget)
     time_spent = time.time() - start
     vprint( verbose,  "[+] Remaining time after reading data %5.2f sec" % (time_budget-time_spent))
     if time_spent >= time_budget:
         vprint( verbose,  "[-] Sorry, time budget exceeded, skipping this task")
         execution_success = False
Esempio n. 8
0
        # ======== Learning on a time budget:
        # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
        start = time.time()

        # ======== Creating a data object with data, informations about it
        vprint(verbose, "========= Reading and converting data ==========")
        D = DataManager(basename,
                        input_dir,
                        replace_missing=True,
                        filter_features=True,
                        max_samples=max_samples,
                        verbose=verbose)
        print(D)
        vprint(
            verbose,
            "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))

        # ======== Keeping track of time
        if debug_mode < 1:
            time_budget = D.info['time_budget']  # <== HERE IS THE TIME BUDGET!
        else:
            time_budget = max_time
        #print overall_time_budget
        #print time_budget
        time_budget = float(time_budget)
        overall_time_budget = overall_time_budget + time_budget
        vprint(
            verbose,
            "[+] Cumulated time budget (all tasks so far)  %5.2f sec" %
            (overall_time_budget))
        # We do not add the time left over form previous dataset: time_budget += time_left_over