vprint( verbose, "Using input_dir: " + input_dir) vprint( verbose, "Using output_dir: " + output_dir) # Move old results and create a new output directory if not(running_on_codalab) and save_previous_results: data_io.mvdir(output_dir, output_dir+'_'+the_date) data_io.mkdir(output_dir) #### INVENTORY DATA (and sort dataset names alphabetically) datanames = data_io.inventory_data(input_dir) # Overwrite the "natural" order #### DEBUG MODE: Show dataset list and STOP if debug_mode>=3: data_io.show_io(input_dir, output_dir) print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n') data_io.write_list(datanames) datanames = [] # Do not proceed with learning and testing # ==================== @RESULT SUBMISSION (KEEP THIS) ===================== # Always keep this code to enable result submission of pre-calculated results # deposited in the res/ subdirectory. if len(datanames)>0: vprint( verbose, "************************************************************************") vprint( verbose, "****** Attempting to copy files (from res/) for RESULT submission ******") vprint( verbose, "************************************************************************") datanames = data_io.copy_results(datanames, res_dir, output_dir, verbose) # DO NOT REMOVE! if not datanames: vprint( verbose, "[+] Results copied to output directory, no model trained/tested") else:
sys.path.append("libs") default_input_dir="C:\\Users\\vmkocheg\\Documents\\MLContest\\Phase2\\input" default_output_dir="C:\\Users\\vmkocheg\\Documents\\MLContest\\Phase2\\output" if len(argv)==1: # Use the default input and output directories if no arguments are provided input_dir = default_input_dir output_dir = default_output_dir else: input_dir = argv[1] output_dir = os.path.abspath(argv[2]); #### INVENTORY DATA (and sort dataset names alphabetically) datanames = data_io.inventory_data(input_dir) #### DEBUG MODE: Show dataset list and STOP if debug_mode>=3: data_io.show_io(input_dir, output_dir) data_io.write_list(datanames) datanames = [] # Do not proceed with learning and testing for basename in datanames: # Loop over datasets if basename not in ["robert"]: continue vprint( verbose, "************************************************") vprint( verbose, "******** Processing dataset " + basename.capitalize() + " ********") vprint( verbose, "************************************************") # ======== Learning on a time budget: # Keep track of time not to exceed your time budget. Time spent to inventory data neglected. start = time.time()
def predictSpatioTemporal(step_num, input_dir, output_dir, code_dir, \ ext = '.h5', verbose=True, debug_mode=0, \ time_budget = 300, max_samples = 0, \ AR_order = 1, I_order = 0, MA_order = 0, \ num_predicted_frames=8, \ save_model = False, cache_data = False, \ cache_dir = "", \ version = 0.1 ): ''' Main spatio-temporal prediction function. step_num Current file number n being processed Xn.h5. input_dir Input directory in which the training/adapatation data are found in two subdirectories train/ and adapt/ output_dir Output directory in which we expect Yn+1.h5 predictions to be deposited. The next num_frame frames must be predicted. code_dir The directory to which the participant submissions are unzipped. ext The file extensions of input and output data verbose if True, debug messages are printed debug_mode 0: run the code normally, using the time budget of the task 1: run the code normally, but limit the time to max_time 2: run everything, but do not train, use persistence 3: just list the directories and program version time_budget Maximum total running time in seconds. The code should keep track of time spent and NOT exceed the time limit. max_samples Maximum number of training samples loaded. Allows you to limit the number of traiining samples read for speed-up. Model order The order of an ARIMA model. Your training algorithm may be slow, so you may want to limit . the window of past frames used. AR_order = 1 # Persistence is order 1 I_order = 0 MA_order = 0 num_predicted_frames Number of frames to be predicted in the future. save_model Models can eventually be pre-trained and re-loaded. cache_data Data that were loaded in the past can be cached in some binary format for faster reload. cache_dir A directory where to cache data. version This code's version. ''' #### Check whether everything went well (no time exceeded) execution_success = True start_time = time.time() # <== Mark starting time if not(cache_dir): cache_dir = code_dir # For the moment it is the code directory path.append (code_dir) path.append (os.path.join(code_dir, 'sample_code')) import data_io from data_io import vprint from data_manager import DataManager # load/save data and get info about them from model import Model # example model implementing persistence vprint( verbose, "\n====> STEP: " + str(step_num)) vprint( verbose, "Using input_dir: " + input_dir) vprint( verbose, "Using output_dir: " + output_dir) vprint( verbose, "Using code_dir: " + code_dir) vprint( verbose, "Using cache_dir: " + cache_dir) # Make a result directory and cache_dir if they do not exist data_io.mkdir(output_dir) data_io.mkdir(cache_dir) # List various directories if debug_mode >= 3: vprint( verbose, "This code version is %d" + str(version)) data_io.show_version() data_io.show_dir(os.getcwd()) # Run directory data_io.show_io(input_dir, output_dir) data_io.show_dir(output_dir) # Our libraries path.append (code_dir) #### START WORKING #### #### #### #### #### #### #### #### #### vprint( verbose, "************************************************") vprint( verbose, "******** Processing data chunk number " + str(step_num) + " ********") vprint( verbose, "************************************************") # Instantiate data and model objects if cache_data: cache_file = os.path.join(cache_dir, "Din.pickle") else: cache_file = "" Din = DataManager(datatype="input", verbose=verbose, cache_file=cache_file) Dout = DataManager(datatype="output", verbose=verbose) M = Model(hyper_param=(AR_order, I_order, MA_order), path=code_dir, verbose=verbose) # Read data training frames and train if step_num == 0: # First time we read the training data. train_data_dir = os.path.join(input_dir, "train") Din.loadTrainData(train_data_dir, max_samples=max_samples) # Train the model M.train(Din.X, Din.t) # The X matrix is the time series, the T vector are the (optional) time indices else: # Reload the already trained model and data (warm start) if save_model: M.load(path=cache_dir) if cache_data: Din.reloadData('Din', data_dir=cache_dir, format='pickle') # Read additional frames and append them. adapt_data_dir = os.path.join(input_dir, "adapt") Din.appendSamples(step_num, adapt_data_dir) # Save data for future re-use (we do not forget anything at the moment, # but this may be waistful in time and memory). We especially may not need # the training data. if cache_data: Din.saveData('Din', data_dir=cache_dir, format='pickle') # Adapt the model. We pass all the data we have, the model is supposed to # know how to use a window of data in the past. M.adapt(Din.X, Din.t) # To save the effort of re-computing predictions made by the old model to # correct it we could re-load past predictions (still available in the output directory). # For simplicity we do not do it here. # Eventually save the model for future re-use (warm start) if save_model: M.save(path=cache_dir) # Make predictions Dout.X = M.predict(Din.X, num_predicted_frames=num_predicted_frames) Dout.t = np.array(range(1, Dout.X.shape[0]+1)) # Save predictions Dout.saveData('Y' + str(step_num), data_dir=output_dir, format="h5") time_spent = time.time() - start_time time_left_over = time_budget - time_spent if time_left_over>0: vprint( verbose, "[+] Done") vprint( verbose, "[+] Time spent %5.2f sec " % time_spent + ":: Time budget %5.2f sec" % time_budget) else: execution_success = 0 vprint( verbose, "[-] Time exceeded") vprint( verbose, "[-] Time spent %5.2f sec " % time_spent + " > Time budget %5.2f sec" % time_budget) return execution_success
except Exception: pass # Move old results and create a new output directory if not(running_on_codalab) and save_previous_results: data_io.mvdir(res_dir, res_dir+'_'+the_date) data_io.mkdir(res_dir) #### INVENTORY DATA (and sort dataset names alphabetically) datanames = data_io.inventory_data(data_dir) # Overwrite the "natural" order #### DEBUG MODE: Show dataset list and STOP if debug_mode>=3: data_io.show_io(data_dir, res_dir) logger.info('****** Sample code version ' + str(version) + '******') logger.info('========== DATASETS ==========') data_io.write_list(datanames) datanames = [] # Do not proceed with learning and testing # =================== NEW AutoSKLEARN challenge REMOVED @RESULT SUBMISSION (KEEP THIS) ================== # ================ @CODE SUBMISSION (SUBTITUTE YOUR CODE) ================= time_left_over = 0 for basename in datanames: # Loop over datasets filename_valid = basename + '_valid.predict' filename_test = basename + '_test.predict' try:
if __name__ == "__main__": print("****** The ingestion program generates the result ******\n") datanames = data_io.inventory_data(input_directory) if len(datanames) == 0: print("****** No data found ******") # Loop over datasets for basename in datanames: print("****** Processing " + basename.capitalize() + " ******") # Here it should process the participant submission (but we don't bother) X = data_io.data( path.join(input_directory, basename, basename + '_valid.data')) Yvalid = random.rand(X.shape[0]) X = data_io.data( path.join(input_directory, basename, basename + '_test.data')) # This should be where we call the participant submission and use his code: Ytest = random.rand(X.shape[0]) # Write results to files data_io.write(path.join(output_directory, basename + '_valid.predict'), Yvalid) data_io.write(path.join(output_directory, basename + '_test.predict'), Ytest) # Lots of debug code... data_io.show_io(input_directory, output_directory) data_io.show_version() exit(0)