コード例 #1
ファイル: run.py プロジェクト: tomMoral/autoML
if __name__=="__main__" and debug_mode<4:   
    #### Check whether everything went well (no time exceeded)
    execution_success = True

    #### INPUT/OUTPUT: Get input and output directory names
    if len(argv)==1: # Use the default input and output directories if no arguments are provided
        input_dir = default_input_dir
        output_dir = default_output_dir
        input_dir = argv[1]
        output_dir = os.path.abspath(argv[2])
    # Move old results and create a new output directory
    if not(running_on_codalab):
        data_io.mvdir(output_dir, '../'+output_dir+'_'+the_date)

    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')
        datanames = [] # Do not proceed with learning and testing

    # ==================== @RESULT SUBMISSION (KEEP THIS) =====================
    # Always keep this code to enable result submission of pre-calculated results
    # deposited in the res/ subdirectory.
    if len(datanames)>0:
コード例 #2
    # Our libraries
    path.append (program_dir)
    path.append (submission_dir)
    path.append (submission_dir + '/AutoML3_sample_code_submission') #IG: to allow submitting the starting kit as sample submission
    import data_io
    from AutoML3_ingestion_program.data_io import vprint
    from AutoML3_sample_code_submission.model import Model
    from AutoML3_ingestion_program.data_manager import DataManager

    if debug_mode >= 4: # Show library version and directory structure

    # Move old results and create a new output directory (useful if you run locally)
    if save_previous_results:
        data_io.mvdir(output_dir, output_dir+'_'+the_date)


    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)
    # Overwrite the "natural" order

    #### Delete zip files and metadata file, if present
    datanames = [x for x in datanames
      if x!='metadata' and not x.endswith('.zip')]

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
コード例 #3
    import data_io                       # general purpose input/output functions
    from data_io import vprint           # print only in verbose mode
    from data_manager import DataManager # load/save data and get info about them
    from complexity import complexity # complexity measure

    if debug_mode >= 4:
      print('File structure')

    if debug_mode >= 4: # Show library version and directory structure
    # Move old results and create a new output directory (useful if you run locally)
    if save_previous_results:
        data_io.mvdir(output_dir, output_dir+'_'+the_date) 
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = os.listdir(input_dir)
    # change input dir to compensate for the single file unzipping
    if 'input_data' in datanames:
        input_dir = os.path.join(input_dir, 'input_data')
        datanames = os.listdir(input_dir)
    # Overwrite the "natural" order
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Ingestion program version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
コード例 #4
def predictSpatioTemporal(step_num, input_dir, output_dir, code_dir, \
                          ext = '.h5', verbose=True, debug_mode=0, \
                          time_budget = 300, max_samples = 0, \
                          AR_order = 1, I_order = 0, MA_order = 0, \
                          num_predicted_frames=8, \
                          save_model = False, cache_data = False, \
                          cache_dir = "", \
                          version = 0.1 ):
    ''' Main spatio-temporal prediction function.
        Current file number n being processed Xn.h5.
        Input directory in which the training/adapatation data are found
        in two subdirectories train/ and adapt/
        Output directory in which we expect Yn+1.h5 predictions to be deposited.
        The next num_frame frames must be predicted.
        The directory to which the participant submissions are unzipped.
        The file extensions of input and output data
        if True, debug messages are printed
        0: run the code normally, using the time budget of the task
        1: run the code normally, but limit the time to max_time
        2: run everything, but do not train, use persistence
        3: just list the directories and program version
        Maximum total running time in seconds.
        The code should keep track of time spent and NOT exceed the time limit.
        Maximum number of training samples loaded.
        Allows you to limit the number of traiining samples read for speed-up.
    Model order
        The order of an ARIMA model.
        Your training algorithm may be slow, so you may want to limit .
        the window of past frames used.
        AR_order = 1 # Persistence is order 1
        I_order = 0
        MA_order = 0
        Number of frames to be predicted in the future.
        Models can eventually be pre-trained and re-loaded.
        Data that were loaded in the past can be cached in some
        binary format for faster reload.
        A directory where to cache data.
        This code's version.
    #### Check whether everything went well (no time exceeded)
    execution_success = True
    start_time = time.time()         # <== Mark starting time
    if not(cache_dir): cache_dir = code_dir # For the moment it is the code directory

    path.append (code_dir)
    path.append (os.path.join(code_dir, 'sample_code'))
    import data_io
    from data_io import vprint
    from data_manager import DataManager # load/save data and get info about them
    from model_diviyan_new import Model              # example model implementing persistence

    vprint( verbose,  "\n====> STEP: " + str(step_num))
    vprint( verbose,  "Using input_dir: " + input_dir)
    vprint( verbose,  "Using output_dir: " + output_dir)
    vprint( verbose,  "Using code_dir: " + code_dir)
    vprint( verbose,  "Using cache_dir: " + cache_dir)

    # Make a result directory and cache_dir if they do not exist

    # List various directories
    if debug_mode >= 3:
        vprint( verbose,  "This code version is %d" + str(version))
        data_io.show_dir(os.getcwd()) # Run directory
        data_io.show_io(input_dir, output_dir)

    # Our libraries
    path.append (code_dir)

    #### START WORKING ####  ####  ####  ####  ####  ####  ####  ####  ####
    vprint( verbose,  "************************************************")
    vprint( verbose,  "******** Processing data chunk number " + str(step_num) + " ********")
    vprint( verbose,  "************************************************")

    # Instantiate data and model objects
    if cache_data:
        cache_file = os.path.join(cache_dir, "Din.pickle")
        cache_file = ""
    Din = DataManager(datatype="input", verbose=verbose, cache_file=cache_file)
    Dout = DataManager(datatype="output", verbose=verbose)
    M = Model(hyper_param=(AR_order, I_order, MA_order), path=code_dir, verbose=verbose)

    # Read data training frames and train
    if step_num == 0:
        # First time we read the training data.
        train_data_dir = os.path.join(input_dir, "train")
        Din.loadTrainData(train_data_dir, max_samples=max_samples)
        # Train the model
        M.train(Din.X, Din.t) # The X matrix is the time series, the T vector are the (optional) time indices
        # Reload the already trained model and data (warm start)
        if save_model:
        if cache_data:
            Din.reloadData('Din', data_dir=cache_dir, format='pickle')

    # Read additional frames and append them.
    adapt_data_dir = os.path.join(input_dir, "adapt")
    Din.appendSamples(step_num, adapt_data_dir)

    # Save data for future re-use (we do not forget anything at the moment,
    # but this may be waistful in time and memory). We especially may not need
    # the training data.
    if cache_data:
        Din.saveData('Din', data_dir=cache_dir, format='pickle')

    # Adapt the model. We pass all the data we have, the model is supposed to
    # know how to use a window of data in the past.
    M.adapt(Din.X, Din.t)
    # To save the effort of re-computing predictions made by the old model to
    # correct it we could re-load past predictions (still available in the output directory).
    # For simplicity we do not do it here.

    # Eventually save the model for future re-use (warm start)
    if save_model:

    # Make predictions
    Dout.X = M.predict(Din.X, num_predicted_frames=num_predicted_frames)
    Dout.t = np.array(range(1, Dout.X.shape[0]+1))

    # Save predictions
    Dout.saveData('Y' + str(step_num), data_dir=output_dir, format="h5")

    time_spent = time.time() - start_time
    time_left_over = time_budget - time_spent
    if time_left_over>0:
        vprint( verbose,  "[+] Done")
        vprint( verbose,  "[+] Time spent %5.2f sec " % time_spent + "::  Time budget %5.2f sec" % time_budget)
        execution_success = 0
        vprint( verbose,  "[-] Time exceeded")
        vprint( verbose,  "[-] Time spent %5.2f sec " % time_spent + " > Time budget %5.2f sec" % time_budget)

    return execution_success