Example #1
0
  def loadLabel (self, filename, verbose=True):
      ''' Get the solution/truth values'''
      if verbose:  print("========= Reading " + filename)
      start = time.time()
      if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
          with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
              vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
              return pickle.load(pickle_file)
      if 'task' not in self.info.keys():
          self.getTypeProblem(filename)
  
         # IG: Here change to accommodate the new multiclass label format
      if self.info['task'] == 'multilabel.classification':
          label = data_io.data(filename)
      elif self.info['task'] == 'multiclass.classification':
          label = data_io.data(filename)
          # IG: I changed that because it was too confusing.
          #label = data_converter.convert_to_num(data_io.data(filename))              
      else:
          label = np.ravel(data_io.data(filename)) # get a column vector
          #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector
 
      if self.use_pickle:
          with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
              vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
              p = pickle.Pickler(pickle_file) 
              p.fast = True 
              p.dump(label)
      end = time.time()
      if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
      return label
	def loadData (self, filename, verbose=True, replace_missing=True):
		''' Get the data from a text file in one of 3 formats: matrix, sparse, sparse_binary'''
		if verbose:  print("========= Reading " + filename)
		start = time.time()
		if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
				vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
				return pickle.load(pickle_file)
		if 'format' not in self.info.keys():
			self.getFormatData(filename)
		if 'feat_num' not in self.info.keys():
			self.getNbrFeatures(filename)

		data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}

		data = data_func[self.info['format']](filename, self.info['feat_num'])

		# INPORTANT: when we replace missing values we double the number of variables

		if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
			pass
			#vprint (verbose, "Replace missing values by 0 (slow, sorry)")
			#data = data_converter.replace_missing(data)
		if self.use_pickle:
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
				vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
				p = pickle.Pickler(pickle_file)
				p.fast = True
				p.dump(data)
		end = time.time()
		if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
		return data
Example #3
0
 def __init__(self,
              datatype="input",
              data_file="",
              verbose=False,
              cache_file=""):
     '''Constructor'''
     self.version = "1"
     self.datatype = datatype
     self.verbose = verbose
     self.cache_file = cache_file  # To save/reload data in binary format (only if not empty)
     if not cache_file:
         self.use_pickle = False
     else:
         self.use_pickle = True
     self.X = np.array([])
     self.t = np.array([])
     self.col_names = []
     self.ycol0 = 0
     self.t0 = 0
     self.now = 0
     self.stride = 0
     self.horizon = 0
     vprint(self.verbose, "DataManager :: Version = " + self.version)
     if data_file:
         self.loadData(data_file)
Example #4
0
 def load(self, path=""):
     ''' Reload model.'''
     if not path:
         path = self.model_dir
     vprint(self.verbose, "Model :: ========= Loading model from " + path)
     self = pickle.load(open(os.path.join(path, '_model.pickle'), "w"))
     return self
Example #5
0
	def loadLabel (self, filename, verbose=True):
		''' Get the solution/truth values'''
		if verbose:  print("========= Reading " + filename)
		start = time.time()
		if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
				vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
				return pickle.load(pickle_file)
		if 'task' not in self.info.keys():
			self.getTypeProblem(filename)
	
           # IG: Here change to accommodate the new multiclass label format
		if self.info['task'] == 'multilabel.classification':
			label = data_io.data(filename)
		elif self.info['task'] == 'multiclass.classification':
			label = data_converter.convert_to_num(data_io.data(filename))              
		else:
			label = np.ravel(data_io.data(filename)) # get a column vector
			#label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector
   
		if self.use_pickle:
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
				vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
				p = pickle.Pickler(pickle_file) 
				p.fast = True 
				p.dump(label)
		end = time.time()
		if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
		return label
Example #6
0
 def reloadData(self, filename="", data_dir=""):
     ''' Reload data in pickle format.'''
     success = True
     vprint(self.verbose, "Data Manager :: ========= Reloading data from " + filename)
     start = time.time()
     # Write some code to reload the data
     temp =[]
     try:
         if filename.endswith('h5'): 
             with h5py.File(os.path.join(data_dir, filename), 'r') as f:
                 self.X = f['X'][:]
                 self.t = f['t'][:]
         elif filename.endswith('pickle'):
             with open(os.path.join(data_dir, filename), 'rb') as f:
                 temp = pickle.load(f)
                 self.X = temp['X']
                 self.t = temp['t']
                 vprint(self.verbose, filename)
         else:
             success = False
             vprint(self.verbose, "[-] No such file extension." + filename)            
     except Exception as e: 
         vprint (self.verbose, e)
         success = False 
     end = time.time()
     if success:
         vprint(self.verbose, "[+] Success in %5.2f sec" % (end - start))
     return success
Example #7
0
 def __init__(self, hyper_param=[], path="", verbose=True):
     ''' Define whatever data member you need (model paramaters and hyper-parameters).
     hyper_param is a tuple.
     path specifies the directory where models are saved/loaded.'''
     self.version = "Persitent"
     self.hyper_param = hyper_param
     self.model_dir = path
     self.verbose = verbose
     vprint(self.verbose, "Version = " + self.version)
Example #8
0
 def predict(self, Xtest, num_predicted_frames=8):
     ''' Make predictions of the next num_predicted_frames frames.
     For this example we predict persistence of the last frame.'''
     vprint(self.verbose, "Model :: ========= Making predictions =========")
     start = time.time()
     Ytest = np.array([Xtest[-1]] * num_predicted_frames)
     end = time.time()
     vprint(self.verbose, "[+] Success, predictions made in %5.2f sec" % (end - start))
     return Ytest
Example #9
0
 def train(self, Xtrain, Ttrain=[]):
     '''  Adjust parameters with training data.
     Xtrain is a matrix of frames (frames in lines, features/variables in columns)
     Ttrain is the optional time index. The index may not be continuous (e.g. jumps or resets)
     Typically Xtrain has thousands of lines.''' 
     vprint(self.verbose, "Model :: ========= Training model =========")
     start = time.time()
     # Do something
     end = time.time()
     vprint(self.verbose, "[+] Success, model trained in %5.2f sec" % (end - start))
Example #10
0
    def getInfo(self, filename, verbose=True):
        ''' Get all information {attribute = value} pairs from the filename (public.info file), 
              if it exists, otherwise, output default values'''
        #print "os.path.exist"
        #print os.path.exists(filename)
        if filename == None:
            basename = self.basename
            input_dir = self.input_dir
        else:

            basename = os.path.basename(filename).split('_')[0]
            input_dir = os.path.dirname(filename)

        if os.path.exists(filename):
            self.getInfoFromFile(filename)
            vprint(verbose, "Info file found : " + os.path.abspath(filename))
            # Finds the data format ('dense', 'sparse', or 'sparse_binary')
            self.getFormatData(
                os.path.join(input_dir, basename + '_train1.data'))
#else:
#	self.info={}
        else:
            vprint(verbose,
                   "Info file NOT found : " + os.path.abspath(filename))
            # Hopefully this never happens because this is done in a very inefficient way
            # reading the data multiple times...
            self.info['usage'] = 'No Info File'
            self.info['name'] = basename
            # Get the data format and sparsity
            self.getFormatData(
                os.path.join(input_dir, basename + '_train1.data'))
            # Assume no categorical variable and no missing value (we'll deal with that later)
            self.info['has_categorical'] = 0
            self.info['has_missing'] = 0
            # Get the target number, label number, target type and task
            self.getTypeProblem(
                os.path.join(input_dir, basename + '_train1.solution'))
            if self.info['task'] == 'regression':
                self.info['metric'] = 'r2_metric'
            else:
                self.info['metric'] = 'auc_metric'
# Feature type: Numerical, Categorical, or Binary
# Can also be determined from [filename].type
            self.info['feat_type'] = 'Mixed'
            # Get the number of features and patterns
            self.getNbrFeatures(
                os.path.join(input_dir, basename + '_train.data'),
                os.path.join(input_dir, basename + '_test.data'),
                os.path.join(input_dir, basename + '_valid.data'))
            self.getNbrPatterns(basename, input_dir, 'train')
            self.getNbrPatterns(basename, input_dir, 'valid')
            self.getNbrPatterns(basename, input_dir, 'test')
            # Set default time budget
            self.info['time_budget'] = 600
        return self.info
Example #11
0
 def adapt(self, Xadapt, Tadapt=[]):
     ''' Adjust parameters and hyper-paramaters with short-term adaptation data.
     Xadapt is a matrix of frames (frames in lines, features/variables in columns)
     Tadapt is the optional time index. 
     Typically the time index has no cuts/jumps and the number of frames is of
     the order of 100.''' 
     vprint(self.verbose, "Model :: ========= Adapting model =========")
     start = time.time()
     # Do something
     end = time.time()
     vprint(self.verbose, "[+] Success, model adapted in %5.2f sec" % (end - start))
Example #12
0
 def predict(self, Xtest, num_predicted_frames=8, ycol0=0):
     ''' Make predictions of the next num_predicted_frames frames.
     Start at variable ycol0 only (do not predict the values of the first
     0 to ycol0-1 variables).
     For this example we predict persistence of the last frame.'''
     vprint(self.verbose, "Model :: ========= Making predictions =========")
     start = time.time()
     Ytest = np.array([Xtest[-1, ycol0:]] * num_predicted_frames)
     end = time.time()
     vprint(self.verbose,
            "[+] Success, predictions made in %5.2f sec" % (end - start))
     return Ytest
Example #13
0
 def reloadData(self, filename, format="pickle"):
     ''' Reload data in pickle or csv format.
         Warning: csv format will not reload medatada, suitable only for predictions.
     '''
     if not filename.endswith(format):
         filename = filename + '.' + format
     vprint(self.verbose, "DataManager :: ========= Attempting to reload data from " + filename)
     start = time.time()
     success = True
     temp =[]
     try:
         if format=='pickle':
             with open(filename, 'rb') as f:
                 temp = pickle.load(f)
                 for key in self.__dict__.keys():
                     self.__dict__[key] = temp[key]
         elif format=='csv' and self.datatype=='output':
             data = np.genfromtxt(filename, delimiter=',', skip_header=1)
             self.t = data[:,0]
             self.X = data[:,1:]
         else: 
             vprint(self.verbose, "[-] Wrong file format " + format + " for " + self.datatype)
             success = False
     except Exception as e: 
         vprint (self.verbose, e)
         success = False     
     if success:
         end = time.time()
         vprint(self.verbose, "[+] Success in %5.2f sec" % (end - start))
     self.resetTime()
     return success    
Example #14
0
 def appendData(self, X, t): 
     ''' Append a data sample (useful for predictions). '''
     vprint(self.verbose, "DataManager :: ========= Appending {} frame(s)".format(X.shape[0]))
     if X.shape[0] != t.shape[0]:
         vprint(self.verbose, "[-] Inconsistent dimensions X.len={} t.len={}".format(X.shape[0], t.shape[0]))
     self.t = np.append(self.t, t)
     if self.datatype=='output':
         rng=range(self.ycol0,X.shape[1])
     else:
         rng=range(X.shape[1])
     if self.X.shape[0]==0:
         self.X = X[:,rng]
     else:
         self.X = np.append(self.X[:,rng], X[:,rng], axis=0) 
     return 
Example #15
0
 def __init__(self, datatype="unknown", data_file="", verbose=False, max_samples=float('inf'), cache_file=""):
     '''Constructor'''
     self.version = "1"
     self.datatype = datatype 
     self.verbose = verbose
     self.max_samples=max_samples
     self.cache_file=cache_file # To save/reload data in binary format (only if not empty)
     if not cache_file: 
         self.use_pickle = False
     else:
         self.use_pickle = True
     self.X = np.array([])
     self.t = np.array([])
     vprint(self.verbose, "Data Manager :: Version = " + self.version)
     if data_file:
         self.loadData(data_file)
Example #16
0
    def loadData(self, filename, feat_type, verbose=True):
        ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
        if verbose: print("========= Reading " + filename)
        start = time.time()

        if self.use_pickle and os.path.exists(
                os.path.join(self.tmp_dir,
                             os.path.basename(filename) + ".pickle")):
            with open(
                    os.path.join(self.tmp_dir,
                                 os.path.basename(filename) + ".pickle"),
                    "r") as pickle_file:
                vprint(
                    verbose, "Loading pickle file : " +
                    os.path.join(self.tmp_dir,
                                 os.path.basename(filename) + ".pickle"))
                return pickle.load(pickle_file)

        if 'format' not in self.info:
            self.getFormatData(filename)
        if 'feat_num' not in self.info:
            self.getNbrFeatures(filename)

        data_func = {
            'dense': input_routines.convert_file_to_array,
            'sparse': data_io.data_sparse,
            'sparse_binary': data_io.data_binary_sparse
        }

        data = data_func[self.info['format']](filename, feat_type)

        if self.use_pickle:
            with open(
                    os.path.join(self.tmp_dir,
                                 os.path.basename(filename) + ".pickle"),
                    "wb") as pickle_file:
                vprint(
                    verbose, "Saving pickle file : " +
                    os.path.join(self.tmp_dir,
                                 os.path.basename(filename) + ".pickle"))
                p = pickle.Pickler(pickle_file)
                p.fast = True
                p.dump(data)
        end = time.time()
        if verbose: print("[+] Success in %5.2f sec" % (end - start))
        return data
Example #17
0
 def getInfo(self, filename, verbose=True):
     ''' Get all information {attribute = value} pairs from the filename (public.info file),
               if it exists, otherwise, output default values'''
     if filename == None:
         basename = self.basename
         input_dir = self.input_dir
     else:
         basename = os.path.basename(filename).split('_')[0]
         input_dir = os.path.dirname(filename)
     if os.path.exists(filename):
         self.getInfoFromFile(filename)
         vprint(verbose, "Info file found : " + os.path.abspath(filename))
         # Finds the data format ('dense', 'sparse', or 'sparse_binary')
         self.getFormatData(os.path.join(input_dir, basename + '_train.data'))
     else:
         vprint(verbose, "Info file NOT found : " + os.path.abspath(filename))
         # Hopefully this never happens because this is done in a very inefficient way
         # reading the data multiple times...
         self.info['usage'] = 'No Info File'
         self.info['name'] = basename
         # Get the data format and sparsity
         self.getFormatData(os.path.join(input_dir, basename + '_train.data'))
         # Assume no categorical variable and no missing value (we'll deal with that later)
         self.info['has_categorical'] = 0
         self.info['has_missing'] = 0
         # Get the target number, label number, target type and task
         self.getTypeProblem(os.path.join(input_dir, basename + '_train.solution'))
         if self.info['task'] == 'regression':
             self.info['metric'] = 'r2_metric'
         else:
             self.info['metric'] = 'auc_metric'
         # Feature type: Numerical, Categorical, or Binary
         # Can also be determined from [filename].type
         self.info['feat_type'] = 'Mixed'
         # Get the number of features and patterns
         self.getNbrFeatures(os.path.join(input_dir, basename + '_train.data'),
                             os.path.join(input_dir, basename + '_test.data'),
                             os.path.join(input_dir, basename + '_valid.data'))
         self.getNbrPatterns(basename, input_dir, 'train')
         self.getNbrPatterns(basename, input_dir, 'valid')
         self.getNbrPatterns(basename, input_dir, 'test')
         # Set default time budget
         self.info['time_budget'] = 600
     return self.info
Example #18
0
	def loadData (self, filename, verbose=True, replace_missing=True):
		''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
		if verbose:  print("========= Reading " + filename)
		start = time.time()
		if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
				vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
				return pickle.load(pickle_file)
		if 'format' not in self.info.keys():
			self.getFormatData(filename)
		if 'feat_num' not in self.info.keys():
			self.getNbrFeatures(filename)
			
		data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}
		
		data = data_func[self.info['format']](filename, self.info['feat_num'])
  
		# INPORTANT: when we replace missing values we double the number of variables
  
		if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
			vprint (verbose, "Replace missing values by 0 (slow, sorry)")
			data = data_converter.replace_missing(data)
		if self.use_pickle:
			with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
				vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
				p = pickle.Pickler(pickle_file) 
				p.fast = True 
				p.dump(data)
		end = time.time()
		if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
		return data
Example #19
0
 def loadTrainData(self, data_dir="", max_samples=float('inf')):
     ''' Get the data from hdf5 files.'''
     success = True
     data_reloaded = False
     vprint(self.verbose, "Data Manager :: ========= Reading training data from " + data_dir)
     start = time.time()
     vid=0
     if self.use_pickle and self.reloadData(self.cache_file):
         # Try to reload the file from a pickle
         data_reloaded = True # Turn "success" to false if there is a problem.
     else:
         # Load the data into X and t.
         dir_list = []
         for dir in os.listdir(data_dir):
             if os.path.isdir(os.path.join(data_dir, dir)):
                 dir_list.append(dir)
         # sort dir in decreasing order of n for n in Xmn        
         dir_list = sorted(dir_list, key=lambda i: i.split('m')[-1], reverse=True)
         vprint(self.verbose, dir_list)
         self.X=np.array([]) # Re-initialize from scratch
         self.t=np.array([])
         for dir in dir_list:
             for data_file in sorted([h5file for h5file in os.listdir(os.path.join(data_dir, dir)) if h5file.endswith('h5')],key=lambda i:int(i.split('.')[0].split('X')[-1])):
                 self.appendSamples(data_file, os.path.join(data_dir, dir), verbose=False)
                 vid=vid+1
         #self.X = np.reshape(self.X, (-1, self.X[0].shape[-2],self.X[0].shape[-1]))          
                
     if self.use_pickle and not data_reloaded:
         # Save data as a pickle for "faster" later reload
         self.saveData(self.cache_file, format='pickle')
         
     end = time.time()
     if len(self.X)==0:
         success = False 
         vprint(self.verbose, "[-] Loading failed")
     else:
         vprint(self.verbose, "[+] Success, loaded %d videos in %5.2f sec" % (vid, end - start))
         #vprint(self.verbose, self.X.shape)
         #vprint(self.verbose, self.t.shape)
     return success
Example #20
0
    def saveData(self, data_file, data_dir="", frames=[], format='pickle'):
        ''' Save data in picke / h5 format.        Parameters: 
            data_file: save data under this filename (no extention)
            data_dir: where to save data
            frames: specify which lines in the video matrix to be saved,  
            e.g. frames=(start_frame, end_frame)=(10,15)
                    default = entire video matrix
            format: 'pickle' or 'h5', default = 'pickle'
        '''
        if not data_file.endswith(format):
            data_file = data_file + '.' + format
        success = True
        try:
            filename = os.path.join(data_dir, data_file)
            vprint(self.verbose, "Data Manager :: ========= Saving data to " + filename)

            start = time.time()
            # Write some code to save the data
            if frames: 
                if format=='h5': 
                    with h5py.File(filename, 'w') as f:
                        f.create_dataset(name='X', shape=self.X[frames[0]:frames[1]].shape, \
                            data=self.X[frames[0]:frames[1]])
                        f.create_dataset(name='t', shape=self.t[frames[0]:frames[1]].shape, \
                            data=self.t[frames[0]:frames[1]])
                else: 
                    with open(filename, 'wb') as f:
                        dict_to_save = {key:self.__dict__[key] for key in self.__dict__.keys() if not key in ['X', 't']}
                        dict_to_save['X'] = self.X[frames[0]:frames[1]]
                        dict_to_save['t'] = self.t[frames[0]:frames[1]]
                        pickle.dump(dict_to_save, f, 2)
            else: #save the entire matrix
                if format=='h5':
                    with h5py.File(filename, 'w') as f:
                        f.create_dataset(name='X', shape=self.__dict__['X'].shape, data=self.__dict__['X'])
                        f.create_dataset(name='t', shape=self.__dict__['t'].shape, data=self.__dict__['t'])
                else: 
                    with open(filename, 'wb') as f:
                        pickle.dump(self.__dict__, f, 2) 
        except Exception as e: 
            vprint (e)
            success = False
        end = time.time()
        vprint(self.verbose, "[+] Success in %5.2f sec" % (end - start))
        return success
Example #21
0
 def loadData(self, data_file, data_dir=""):
     ''' Erase previous data and load data from a give data file.
     data_file: Number n of the 'chunk' or 'step' (appearing in the file name)
     Alternatively, the full file name Xn can be supplied as a string instead of the chunk number.
     '''
     success = True
     start = time.time() 
     if isinstance(data_file, int ):
         data_file = "X" + str(data_file)
     vprint(self.verbose, "Data Manager :: ========= Loading data from " + data_file)          
     self.X, self.t = self.getOneSample(data_file, data_dir)
     end = time.time()
     if len(self.X)==0:
         success = False 
         vprint(self.verbose, "[-] Loading failed")
     else:
         vprint(self.verbose, "[+] Success in %5.2f sec" % (end - start))
     return success
Example #22
0
    
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)
    
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
        data_io.write_list(datanames)      
        datanames = [] # Do not proceed with learning and testing
        
    # ==================== @RESULT SUBMISSION (KEEP THIS) =====================
    # Always keep this code to enable result submission of pre-calculated results
    # deposited in the res/ subdirectory.
    if len(datanames)>0:
        vprint( verbose,  "************************************************************************")
        vprint( verbose,  "****** Attempting to copy files (from res/) for RESULT submission ******")
        vprint( verbose,  "************************************************************************")
        OK = data_io.copy_results(datanames, res_dir, output_dir, verbose) # DO NOT REMOVE!
        if OK: 
            vprint( verbose,  "[+] Success")
            datanames = [] # Do not proceed with learning and testing
        else:
            vprint( verbose, "======== Some missing results on current datasets!")
            vprint( verbose, "======== Proceeding to train/test:\n")
    # =================== End @RESULT SUBMISSION (KEEP THIS) ==================

    # ================ @CODE SUBMISSION (SUBTITUTE YOUR CODE) ================= 
    overall_time_budget = 0
    for basename in datanames: # Loop over datasets
        vprint( verbose,  "************************************************")
Example #23
0
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')
        data_io.write_list(datanames)      
        datanames = [] # Do not proceed with learning and testing

    # ==================== @RESULT SUBMISSION (KEEP THIS) =====================
    # Always keep this code to enable result submission of pre-calculated results
    # deposited in the res/ subdirectory.
    if len(datanames)>0:
        vprint( verbose,  "************************************************************************")
        vprint( verbose,  "****** Attempting to copy files (from res/) for RESULT submission ******")
        vprint( verbose,  "************************************************************************")
        OK = data_io.copy_results(datanames, res_dir, output_dir, verbose) # DO NOT REMOVE!
        if OK: 
            vprint( verbose,  "[+] Success")
            datanames = [] # Do not proceed with learning and testing
        else:
            vprint( verbose, "======== Some missing results on current datasets!")
            vprint( verbose, "======== Proceeding to train/test:\n")
    # =================== End @RESULT SUBMISSION (KEEP THIS) ==================

    # ================ @CODE SUBMISSION (SUBTITUTE YOUR CODE) ================= 
    overall_time_budget = 0
    for basename in datanames: # Loop over datasets
        
Example #24
0
 def saveData(self, filename, format="pickle"):
     ''' Save data in pickle format or csv formal. 
     '''
     if not filename.endswith(format):
         filename = filename + '.' + format
         vprint(self.verbose, "[-] filename must end with " + format)
     vprint(self.verbose,
            "DataManager :: ========= Saving data to " + filename)
     start = time.time()
     try:
         if format == 'pickle':
             with open(filename, 'wb') as f:
                 vprint(self.verbose, "DataManager :: Saving as pickle")
                 dict_to_save = {
                     key: self.__dict__[key]
                     for key in self.__dict__.keys()
                     if not key in ['X', 't']
                 }
                 dict_to_save['X'] = self.X
                 dict_to_save['t'] = self.t
                 pickle.dump(dict_to_save, f, 2)
         else:
             with open(filename, 'w') as f:
                 vprint(self.verbose, "DataManager :: Saving as csv")
                 f.write("Date")
                 for nm in self.col_names:
                     f.write("," + nm)
                 f.write("\n")
                 for i in range(self.t.shape[0]):
                     f.write("{:g}".format(self.t[i]))
                     for j in range(self.X.shape[1]):
                         f.write(",{:g}".format(self.X[i, j]))
                     f.write("\n")
         success = True
     except Exception as e:
         vprint(self.verbose, e)
         success = False
     end = time.time()
     vprint(self.verbose, "[+] Success in %5.2f sec" % (end - start))
     return success
Example #25
0
    output_dir = os.path.abspath(argv[2]);

#### INVENTORY DATA (and sort dataset names alphabetically)
datanames = data_io.inventory_data(input_dir)
#### DEBUG MODE: Show dataset list and STOP
if debug_mode>=3:
    data_io.show_io(input_dir, output_dir)
    data_io.write_list(datanames)
    datanames = [] # Do not proceed with learning and testing


for basename in datanames: # Loop over datasets
    if basename not in ["robert"]:
        continue

    vprint( verbose,  "************************************************")
    vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
    vprint( verbose,  "************************************************")

    # ======== Learning on a time budget:
    # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
    start = time.time()

    # ======== Creating a data object with data, informations about it
    vprint( verbose,  "======== Reading and converting data ==========")
    D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, verbose=verbose)
    print D

    # ======== Keeping track of time
    time_spent = time.time() - start
    vprint( verbose,  "time spent %5.2f sec" %time_spent)
Example #26
0
    def loadData(self, data_dir=""):
        ''' Get the data from csv files.'''
        success = True
        data_reloaded = False
        vprint(
            self.verbose,
            "DataManager :: ========= Reading training data from " + data_dir)
        start = time.time()
        if self.use_pickle and self.reloadData(self.cache_file):
            # Try to reload the file from a pickle
            data_reloaded = True  # Turn "success" to false if there is a problem.
        else:
            # Load metadata
            metadata = yaml.load(open(join(data_dir, 'metadata'), 'r'))
            self.stride = metadata['stride']
            self.horizon = metadata['horizon']
            self.ycol0 = metadata['ycol0']
            # Load the training data data into X and t.
            data_file_list = sorted(ls(join(data_dir, "training", "*.csv")))
            vprint(self.verbose,
                   "DataManager :: ========= Load data from files:")
            vprint(self.verbose, data_file_list)
            header = np.genfromtxt(data_file_list[0],
                                   delimiter=',',
                                   max_rows=1,
                                   names=True)
            self.col_names = header.dtype.names[1:]
            for data_file in data_file_list:
                data = np.genfromtxt(data_file, delimiter=',', skip_header=1)
                self.t = np.append(self.t, data[:, 0])
                if self.X.shape[0] == 0:
                    self.X = data[:, 1:]
                else:
                    self.X = np.append(self.X, data[:, 1:], axis=0)
            self.t0 = self.t.shape[0]
            # Append the evaluation data data to X and t.
            data_file_list = sorted(ls(join(data_dir, "evaluation", "*.csv")))
            vprint(self.verbose, data_file_list)
            for data_file in data_file_list:
                data = np.genfromtxt(data_file, delimiter=',', skip_header=1)
                self.t = np.append(self.t, data[:, 0])
                self.X = np.append(self.X, data[:, 1:], axis=0)

        if self.use_pickle and not data_reloaded:
            # Save data as a pickle for "faster" later reload
            self.saveData(self.cache_file)

        end = time.time()
        if len(self.X) == 0:
            success = False
            vprint(self.verbose, "[-] Loading failed")
        else:
            vprint(
                self.verbose, "[+] Success, loaded %d samples in %5.2f sec" %
                (self.t.shape[0], end - start))
        self.resetTime()
        return success
Example #27
0
 # Overwrite the "natural" order
 
 #### DEBUG MODE: Show dataset list and STOP
 if debug_mode>=3:
     data_io.show_version()
     data_io.show_io(input_dir, output_dir)
     print('\n****** Ingestion program version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
     data_io.write_list(datanames)      
     datanames = [] # Do not proceed with learning and testing
     
 #### MAIN LOOP OVER DATASETS: 
 overall_time_budget = 0
 time_left_over = 0
 for basename in datanames: # Loop over datasets
     
     vprint( verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n") 
     vprint( verbose,  "************************************************")
     vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
     vprint( verbose,  "************************************************")
     
     # ======== Learning on a time budget:
     # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
     start = time.time()
     
     # ======== Creating a data object with data, informations about it
     vprint( verbose,  "========= Reading and converting data ==========")
     D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
     print(D)
     vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))
     
     # ======== Keeping track of time
Example #28
0
    datanames = data_io.inventory_data(input_dir)
    # Overwrite the "natural" order

    # DEBUG MODE: Show dataset list and STOP
    if debug_mode >= 3:
        data_io.show_version()
        data_io.show_io(input_dir, output_dir)
        print('\n****** Ingestion program version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')
        data_io.write_list(datanames)
        datanames = []  # Do not proceed with learning and testing

    # MAIN LOOP OVER DATASETS:
    overall_time_budget = 0
    time_left_over = 0
    for basename in datanames:  # Loop over datasets
        vprint(verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n")
        vprint(verbose,  "************************************************")
        vprint(verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
        vprint(verbose,  "************************************************")
        tmp_valid = os.path.join(program_dir, 'output', basename + '_valid.predict')
        if os.path.isfile(tmp_valid):
            os.link(tmp_valid, os.path.join(output_dir, basename + '_valid.predict'))
        tmp_test = os.path.join(program_dir, 'output', basename + '_test.predict')
        if os.path.isfile(tmp_test):
            os.link(tmp_test, os.path.join(output_dir, basename + '_test.predict'))
            vprint(verbose,  "[+] Results saved using cache")
            continue

        # ======== Learning on a time budget:
        # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
        start = time.time()
Example #29
0
    # Overwrite the "natural" order

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_version()
        data_io.show_io(input_dir, output_dir)
        print('\n****** Ingestion program version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')
        data_io.write_list(datanames)
        datanames = [] # Do not proceed with learning and testing

    #### MAIN LOOP OVER DATASETS:
    overall_time_budget = 0
    time_left_over = 0
    for basename in datanames: # Loop over datasets

        vprint( verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n")
        vprint( verbose,  "************************************************")
        vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
        vprint( verbose,  "************************************************")

        # ======== Learning on a time budget:
        # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
        start = time.time()

        # ======== Creating a data object with data, informations about it
        vprint( verbose,  "========= Reading and converting data ==========")
        D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
        print (D)
        vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))

        # ======== Keeping track of time
Example #30
0
    datanames = data_io.inventory_data(input_dir)

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode >= 3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' +
              '========== DATASETS ==========\n')
        data_io.write_list(datanames)
        datanames = []  # Do not proceed with learning and testing

    # ==================== @RESULT SUBMISSION (KEEP THIS) =====================
    # Always keep this code to enable result submission of pre-calculated results
    # deposited in the res/ subdirectory.
    if len(datanames) > 0:
        vprint(
            verbose,
            "************************************************************************"
        )
        vprint(
            verbose,
            "****** Attempting to copy files (from res/) for RESULT submission ******"
        )
        vprint(
            verbose,
            "************************************************************************"
        )
        OK = data_io.copy_results(datanames, res_dir, output_dir,
                                  verbose)  # DO NOT REMOVE!
        if OK:
            vprint(verbose, "[+] Success")
            datanames = []  # Do not proceed with learning and testing
        else:
Example #31
0
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)    

    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')          
        data_io.write_list(datanames)      
        datanames = [] # Do not proceed with learning and testing

    # ================ @CODE SUBMISSION (SUBTITUTE YOUR CODE) =================
    overall_time_budget = 0
    time_left_over = 0
    for basename in datanames:

        vprint( verbose,  "************************************************")
        vprint( verbose,  "******** Processing dataset " + basename.capitalize() + " ********")
        vprint( verbose,  "************************************************")
        
        # ======== Learning on a time budget:
        # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
        start = time.time()

        # ======== Creating a data object with data, informations about it
        vprint( verbose,  "========= Reading and converting data ==========")
        D = DataManager(basename, input_dir, replace_missing=True, filter_features=True, max_samples=max_samples, verbose=verbose)
        print D
        vprint( verbose,  "[+] Size of uploaded data  %5.2f bytes" % data_io.total_size(D))

        # ======== Keeping track of time
        if debug_mode<1:    
Example #32
0
 def save(self, path=""):
     ''' Save model.'''
     if not path: path = self.model_dir
     vprint(self.verbose, "Model :: ========= Saving model to " + path)
     pickle.dump(self, open(os.path.join(path, '_model.pickle'), "w"))
Example #33
0
    path.append (program_dir)
    path.append (submission_dir)
    import data_io                       # general purpose input/output functions
    from data_io import vprint           # print only in verbose mode
    from data_manager import DataManager # load/save data and get info about them
    from model import Model    			 # example model, in scikit-learn style

    if debug_mode >= 4: # Show library version and directory structure
        data_io.show_dir(".")
        
    # Move old results and create a new output directory (useful if you run locally)
    if save_previous_results:
        data_io.mvdir(output_dir, output_dir+'_'+the_date) 
    data_io.mkdir(output_dir) 
        
    vprint( verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n") 
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_version()
        data_io.show_io(input_dir, output_dir)
        exit(0)
          
    vprint( verbose,  "****************************************************")
    vprint( verbose,  "******** Processing spatio-temporal dataset ********")
    vprint( verbose,  "****************************************************")

    #### Instanciate input data manager and load data
    vprint( verbose,  "========= Reading and converting data ==========")
    Din = DataManager(datatype="input", verbose=verbose) 
    Din.loadData(input_dir)
    vprint( verbose, Din)
Example #34
0
    def predict(self, Xtest, num_predicted_frames=8, ycol0=0):
        ''' Make predictions of the next num_predicted_frames frames.
        Start at variable ycol0 only (do not predict the values of the first
        0 to ycol0-1 variables).
        For this example we predict persistence of the last frame.'''
        vprint(self.verbose, "Model :: ========= Making predictions =========")
        vprint(self.verbose, "===============================================")
        start = time.time()
        #Ytest = np.array([Xtest[random.randint(0,10),ycol0:]] * num_predicted_frames)

        ######################
        # import rpy2's package module
        import rpy2
        import rpy2.robjects as robjects
        import rpy2.robjects.packages as rpackages
        from rpy2.robjects.packages import importr

        # import R's "base" package
        base = rpackages.importr('base')

        # import R's utility package
        utils = rpackages.importr('utils')

        # select a mirror for R packages
        utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

        if rpy2.robjects.packages.isinstalled(
                'forecast', lib_loc=rpy2.__path__[0]) == False:
            utils.install_packages('forecast', lib=rpy2.__path__[0])
        forecast = importr('forecast', lib_loc=rpy2.__path__[0])

        ts = robjects.r('ts')

        #from rpy2.robjects.vectors import FloatVector
        #from rpy2.robjects.vectors import IntVector
        #from rpy2.robjects.vectors import BoolVector

        #from rpy2.robjects import pandas2ri

        from rpy2.robjects import pandas2ri
        from rpy2.robjects import vectors

        pandas2ri.activate()
        ######################

        Ytest = np.zeros((7, 57))

        # Code assumes daily data (not aggregated. Arima will break if it's run on aggregated data.
        # I've provided commented code that should undo aggrgation in inputs into model and redo
        # aggregation to return the predictions (Ytest)

        # undo aggregation:
        future_starts = []
        for col in range(ycol0, Xtest.shape[1]):
            init = Xtest[0, col]
            for row in range(1, Xtest.shape[0]):
                Xtest[row, col] -= init
                init += Xtest[row, col]
            future_starts.append(init)

        for col in range(ycol0, Xtest.shape[1]):
            #print(col)
            dtp = num_predicted_frames - 1  # days to predict
            ndpat = num_predicted_frames  # number days to predict at a time
            dat = Xtest[1:, col]
            #print(dat)
            #print(len(dat))
            sum_RMSE = 0
            f = ts(dat, frequency=1, start=1, end=len(dat))
            best_params = robjects.IntVector([0, 0, 0])
            best_RMSE = 1000000

            for p in range(1, 5):
                for q in range(0, 5):
                    for d in range(0, 3):
                        try:
                            t_order = robjects.IntVector([p, d, q])
                            fit2 = forecast.Arima(f,
                                                  order=t_order,
                                                  xreg=robjects.r("NULL"),
                                                  include_mean=True,
                                                  include_drift=False,
                                                  biasadj=False,
                                                  method="ML",
                                                  model=robjects.r("NULL"))
                            RMSE = forecast.accuracy(fit2)[0][2]  #RMSE
                            if RMSE < best_RMSE:
                                best_RMSE = RMSE
                                best_params = robjects.IntVector([p, d, q])
                        except:
                            continue

            best_opts = robjects.BoolVector([True, False])
            possible_opts = robjects.BoolVector([True, False])
            for mean_opt in range(0, 1):
                for drift_opt in range(0, 1):
                    mean_opt = possible_opts[mean_opt]
                    drift_opt = possible_opts[drift_opt]
                    fit2 = forecast.Arima(f,
                                          order=best_params,
                                          xreg=robjects.r("NULL"),
                                          include_mean=mean_opt,
                                          include_drift=drift_opt,
                                          biasadj=False,
                                          method="ML",
                                          model=robjects.r("NULL"))
                    RMSE = forecast.accuracy(fit2)[0][2]  #RMSE
                    if (RMSE < best_RMSE):
                        #print(paste("Reset best_params to (p,d,q) = (", p, ",", d, ",", q , ")", sep = ""))
                        best_RMSE = RMSE
                        best_opts = robjects.BoolVector([mean_opt, drift_opt])

            #print("best params = ", best_params)
            #print("best opts = ", best_opts)
            fit2 = forecast.Arima(f,
                                  order=best_params,
                                  xreg=robjects.r("NULL"),
                                  include_mean=best_opts[0],
                                  include_drift=best_opts[1],
                                  biasadj=False,
                                  method="ML",
                                  model=robjects.r("NULL"))
            #    print(forecast.forecast(fit2, ndpat))
            #    print(forecast.forecast(fit2, ndpat)[0])
            #    print(forecast.forecast(fit2, ndpat)[1])
            #    print(forecast.forecast(fit2, ndpat)[2])
            #    print(forecast.forecast(fit2, ndpat)[3])
            Ytest[:, col] = forecast.forecast(fit2, ndpat)[3]
            #print(Ytest)

        #print(Xtest.shape)    # (78, 57)
        #print(Xtest.shape[0]) # 78
        #print(Ytest.shape)    # typically (7, 57)

        # reconstruct aggregated predictions
        for col in range(ycol0, Xtest.shape[1]):
            init = future_starts[col]
            for row in range(0, num_predicted_frames - 1):
                tinc = init
                init += Ytest[row, col]
                Ytest[row, col] += tinc

        end = time.time()
        vprint(self.verbose,
               "[+] Success, predictions made in %5.2f sec" % (end - start))
        vprint(self.verbose, "Model :: ======== Predictions finished ========")
        return Ytest
Example #35
0
   
   
    
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_version()
        data_io.show_io(input_dir, output_dir)
        print('\n****** Ingestion program version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
        data_io.write_list(datanames)      
        datanames = [] # Do not proceed with learning and testing
        
    #### MAIN LOOP OVER DATASETS: 
    overall_time_budget = 0
    time_left_over = 0
        
    vprint( verbose,  "\n========== Ingestion program version " + str(version) + " ==========\n") 
    vprint( verbose,  "************************************************")
    vprint( verbose,  "******** Processing dataset " + data_name.capitalize() + " ********")
    vprint( verbose,  "************************************************")

    # ======== Learning on a time budget:
    # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
    start = time.time()

    # ======== Creating a data object with data, informations about it
    vprint( verbose,  "========= Reading and converting data ==========")
    
    #Read DATA 
    data = read_images(input_dir, numerical_labels=True)
    
    
Example #36
0
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)
    # Overwrite the "natural" order
    
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
        data_io.write_list(datanames)      
        datanames = [] # Do not proceed with learning and testing
        
    # ==================== @RESULT SUBMISSION (KEEP THIS) =====================
    # Always keep this code to enable result submission of pre-calculated results
    # deposited in the res/ subdirectory.
    if len(datanames)>0:
        vprint( verbose,  "************************************************************************")
        vprint( verbose,  "****** Attempting to copy files (from res/) for RESULT submission ******")
        vprint( verbose,  "************************************************************************")
        datanames = data_io.copy_results(datanames, res_dir, output_dir, verbose) # DO NOT REMOVE!
        if not datanames: 
            vprint( verbose,  "[+] Success")
        else:
            vprint( verbose, "======== Some missing results on current datasets!")
            vprint( verbose, "======== Proceeding to train/test:\n")
    # =================== End @RESULT SUBMISSION (KEEP THIS) ==================

    # ================ @CODE SUBMISSION (SUBTITUTE YOUR CODE) ================= 
    overall_time_budget = 0
    time_left_over = 0
    for basename in datanames: # Loop over datasets
        
Example #37
0
# =========================== BEGIN PROGRAM ================================

if __name__=="__main__" and debug_mode<4:	
    #### Check whether everything went well (no time exceeded)
    execution_success = True
    
    #### INPUT/OUTPUT: Get input and output directory names
    if len(argv)==1: # Use the default input and output directories if no arguments are provided
        input_dir = default_input_dir
        output_dir = default_output_dir
    else:
        input_dir = argv[1]
        output_dir = os.path.abspath(argv[2]);
        
    vprint( verbose,  "Using input_dir: " + input_dir)
    vprint( verbose,  "Using output_dir: " + output_dir)
        
    # Move old results and create a new output directory 
    if not(running_on_codalab) and save_previous_results:
        data_io.mvdir(output_dir, output_dir+'_'+the_date) 
    data_io.mkdir(output_dir) 
    
    #### INVENTORY DATA (and sort dataset names alphabetically)
    datanames = data_io.inventory_data(input_dir)
    # Overwrite the "natural" order
    
    #### DEBUG MODE: Show dataset list and STOP
    if debug_mode>=3:
        data_io.show_io(input_dir, output_dir)
        print('\n****** Sample code version ' + str(version) + ' ******\n\n' + '========== DATASETS ==========\n')        	
Example #38
0
        data_io.show_io(input_dir, output_dir)
        print('\n****** Ingestion program version ' + str(version) +
              ' ******\n\n' + '========== DATASETS ==========\n')
        data_io.write_list(datanames)
        datanames = []  # Do not proceed with learning and testing

    #### MAIN LOOP OVER DATASETS:
    overall_time_budget = 0
    time_left_over = 0
    #for basename in datanames: # Loop over datasets

    ######################################################### CLASSIFICATION #########################################################################################
    basename = datanames[0]

    vprint(
        verbose,
        "************************************************************************"
    )
    vprint(
        verbose, "******** Processing dataset " + basename.capitalize() +
        " for patch classification ********")
    vprint(
        verbose,
        "************************************************************************"
    )

    # ======== Learning on a time budget:
    # Keep track of time not to exceed your time budget. Time spent to inventory data neglected.
    start = time.time()

    # ======== Creating a data object with data, informations about it
    vprint(verbose, "========= Reading and converting data ==========")