def read_batch(): x = HDF5Matrix(data_path, x_name) y = HDF5Matrix(data_path, y_name) assert x.end == y.end for i in range(0, x.end, batch_size): batch_end = len if i + batch_size > x.end else i + batch_size yield x[i:batch_end], y[i:batch_end]
def generate_arrays(train_filename, batch_size, max_sample, new_size): batch_features = np.zeros((batch_size, new_size, new_size, 3)) batch_labels = np.zeros((batch_size,1)) current_sample_idx = 0 combined_num = 0 print('GENERATOR: Train file = {}, batch = {}, total samples = {}'.format(train_filename, batch_size, max_sample)) while 1: reached_end = False start_idx = current_sample_idx end_idx = batch_size + start_idx if (end_idx > max_sample): end_idx = batch_size reached_end = True print('GENERATOR: Start idx = {}, end_idx = {}, total samples = {}'.format(start_idx, end_idx, max_sample)) x = HDF5Matrix(train_filename, 'data', start=start_idx, end=end_idx) y = HDF5Matrix(train_filename, 'labels', start=start_idx, end=end_idx) x = np.array(x) y = np.array(y) y = np_utils.to_categorical(y, NUMBER_OF_CLASSES) current_sample_idx = end_idx if reached_end: current_sample_idx = 0 print("Shapes. x = {}, y = {}".format(x.shape, y.shape)) #batch_labels = np_utils.to_categorical(batch_labels, NUMBER_OF_CLASSES) yield(x,y)
def load5hpyTestData(data_name): """Load h5py data and return HDF5 object corresponding to X_test, Y_test Returns: dataX_test (HDF5Matrix object): keras object for loading h5py datasets dataY_test (HDF5Matrix object): keras object for loading h5py datasets """ data_dir = '/home/KODAI/MATLAB_vis_master/' data_file = data_dir + data_name # data_name = 'TopAngle100_dataX_dataY.h5' by default # Load first element of data to extract information on video with h5py.File(data_file, 'r') as hf: print("Reading test data from file..") dataX_test = hf['dataX_test'] dataY_test = hf['dataY_test'] dataZ_test = hf['dataZ_test'] print("dataX_test.shape:", dataX_test.shape) print("dataY_test.shape:", dataY_test.shape) print("dataZ_test.shape:", dataZ_test.shape) # Load data into HDF5Matrix object, which reads the file from disk and does not put it into RAM dataX_test = HDF5Matrix(data_file, 'dataX_test') dataY_test = HDF5Matrix(data_file, 'dataY_test') dataZ_test = HDF5Matrix(data_file, 'dataZ_test') print("converting h5py to numpy...") dataX_test = np.array(dataX_test) dataY_test = np.array(dataY_test) dataZ_test = np.array(dataZ_test) return dataX_test, dataY_test, dataZ_test
def load5hpyTrainData(data_name): """Load h5py data and return HDF5 object corresponding to X_train, Y_train Returns: dataX_train (HDF5Matrix object): keras object for loading h5py datasets dataY_train (HDF5Matrix object): keras object for loading h5py datasets """ data_dir = '/home/KODAI/MATLAB_vis_master/' data_file = data_dir + data_name # data_name = 'TopAngle100_dataX_dataY.h5' by default # Load first element of data to extract information on video with h5py.File(data_file, 'r') as hf: print("Reading train data from file..") #dataX_train = hf['dataX_train'] # Adding the [:] actually loads it into memory #dataY_train = hf['dataY_train'] #dataZ_train = hf['dataZ_train'] #print("dataX_train.shape:", dataX_train.shape) #print("dataY_train.shape:", dataY_train.shape) #print("dataZ_train.shape:", dataZ_train.shape) # Load data into HDF5Matrix object, which reads the file from disk and does not put it into RAM dataX_train = HDF5Matrix(data_file, 'dataX_train', start=0, end=60000) dataY_train = HDF5Matrix(data_file, 'dataY_train', start=0, end=60000) #dataZ_train = HDF5Matrix(data_file, 'dataZ_train',start=0,end=60000) print("converting h5py to numpy...(only dataX Z and dataY)") dataX_train = np.array(dataX_train) dataY_train = np.array(dataY_train) #dataZ_train = np.array(dataZ_train) return dataX_train, dataY_train
def index(): msg = request.json['msg'] img_data = msg.encode('utf-8') with open("test.jpg", "wb") as fh: fh.write(base64.decodebytes(img_data)) model = load_model( 'C:\\Users/HP/Desktop/Flask_Example/venv/app/food_model-1.h5') train_h5_path = 'C:\\Users/HP/Desktop/Flask_Example/venv/app/food_c101_n10099_r32x32x1.h5' test_h5_path = 'C:\\Users/HP/Desktop/Flask_Example/venv/app/food_test_c101_n1000_r32x32x1.h5' X_train = HDF5Matrix(train_h5_path, 'images') y_train = HDF5Matrix(train_h5_path, 'category') sample_imgs = 25 with h5py.File(train_h5_path, 'r') as n_file: total_imgs = n_file['images'].shape[0] read_idxs = slice(0, sample_imgs) im_data = n_file['images'][read_idxs] im_label = n_file['category'].value[read_idxs] label_names = [x.decode() for x in n_file['category_names'].value] img = Image.open("test.jpg") arr = array(img) import cv2 arr = cv2.cvtColor(arr, cv2.cv2.COLOR_BGR2GRAY) arr = cv2.resize(arr, (32, 32)) nparray = np.asarray(arr) nparray = nparray.reshape((1, 32, 32, 1)) prediction = model.predict_classes(nparray) print(label_names[prediction[0]]) return jsonify({"result": label_names[prediction[0]]}), 220
def generator(data_list, batch_size): while True: numlab = numpy.arange(len(data_list)) numpy.random.shuffle(numlab) for curnum in range(len(data_list)): num = numlab[curnum] hdf5_name = data_list[num]+ 'margin' + str(args.margin) + 'dim' + str(args.dim) + '.h5' hdf5_file = os.path.join(SAVE_DIR, hdf5_name) hdf5_name2 = data_list[num]+ '.h5' hdf5_file2 = os.path.join(DATA_DIR, hdf5_name2) wgt = HDF5Matrix(hdf5_file, 'data') if args.normals: pts = HDF5Matrix(hdf5_file2, 'data') nms = HDF5Matrix(hdf5_file2, 'normal') size = wgt.end y = HDF5Matrix(hdf5_file2, 'segment') batchnm = int(numpy.ceil(size/batch_size)) blab = numpy.arange(batchnm) numpy.random.shuffle(blab) for itt in range(batchnm): idx = blab[itt] * batch_size if idx + batch_size >= size: end = size else: end = idx + batch_size x1 = wgt[idx:end] if args.normals: x2 = pts[idx:end] x3 = nms[idx:end] xbatch = [x1, x2, x3] else: xbatch = x1 yield xbatch, to_categorical(y[idx:end],num_classes=num_classes)
def __init__(self, indexes, batch_size, data_path, num_tx_beams, num_blocks_per_frame, input_size, num_samples_per_block, how_many_blocks_per_frame, shuffle=False, is_2d=False): 'Initialization' self.indexes = indexes self.batch_size = batch_size self.data_path = data_path self.shuffle = shuffle self.cache = {} self.iq = HDF5Matrix(self.data_path, "iq") self.tx_beam = HDF5Matrix(self.data_path, "tx_beam") self.rx_beam = HDF5Matrix(self.data_path, "rx_beam") self.gain = HDF5Matrix(self.data_path, "gain") self.num_blocks_per_frame = num_blocks_per_frame self.input_size = input_size self.num_samples_per_block = num_samples_per_block self.how_many_blocks_per_frame = how_many_blocks_per_frame self.num_tx_beam = num_tx_beams self.num_rx_beam = 1 self.is_2d = is_2d self.cache_rate = 10
def train(self, training_h5): # Read Dataset X_t = HDF5Matrix(training_h5, 'X_train') y_t = HDF5Matrix(training_h5, 'y_train') X_v = HDF5Matrix(training_h5, 'X_val') y_v = HDF5Matrix(training_h5, 'y_val') # Create Model model = self.Model3() # Compile Model model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-4), metrics=['acc']) # Launch Training model_checkpoint = ModelCheckpoint(os.path.join( self.output_folder, 'modelqc-{epoch:03d}-{acc:03f}-{val_acc:03f}.h5'), verbose=1, monitor='val_loss', save_best_only=True, mode='auto') earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min') history = model.fit(X_t, y_t, shuffle='batch', batch_size=32, epochs=20, validation_data=(np.array(X_v), y_v), callbacks=[model_checkpoint, earlyStopping])
def main(): print( "INFO: this notebook has been tested under keras.version=2.2.2, tensorflow.version=1.8.0" ) print("INFO: here is the info your local") print(" keras.version={}".format(keras.__version__)) print(" tensorflow.version={}".format(tf.__version__)) print( "INFO: consider to the suggested versions if you can't run the following code properly." ) busternet = create_linear_BusterNet( weight_file='models/pretrained_busterNet.hd5') for layer in busternet.layers: layer.trainable = False model = create_RobusterNet(busternet, weight_file='models/robusternet_weights.hdf5') print(model.summary()) X = HDF5Matrix('data/CASIA-CMFD/CASIA-CMFD-Pos.hd5', 'X') Y = HDF5Matrix('data/CASIA-CMFD/CASIA-CMFD-Pos.hd5', 'Y') Z = model.predict(X, verbose=1, batch_size=1) evaluate_protocol_B(Y, Z)
def test6(): train_filename = '../build/overlapping/stupid_simple_1s_full_5s_window_check.h5' input_group = 'input_matrices' output_group = 'output_matrices' x_train = HDF5Matrix(train_filename, input_group) y_train = HDF5Matrix(train_filename, output_group) print(x_train[0]) print(len(x_train))
def network_trainer(gen): fname = get_ready_path(gen) # Data loading with h5.File(fname) as hf: memory_cost = 122 * 4 # Buffer for creating np array memory_cost += get_memory_size(hf['train/x']) memory_cost += get_memory_size(hf['val/x']) memory_cost += 2 * get_memory_size(hf['train/y']) memory_cost += 2 * get_memory_size(hf['val/y']) if memory_cost < psutil.virtual_memory()[1]: # available memory with h5.File(fname) as hf: x_train = hf['train/x'][()] y_train = to_categorical(hf['train/y'], 2) x_val = hf['val/x'][()] y_val = to_categorical(hf['val/y'], 2) print("Using data loaded into memory") else: x_train = HDF5Matrix(fname, 'train/x') y_train = to_categorical(HDF5Matrix(fname, 'train/y'), 2) x_val = HDF5Matrix(fname, 'val/x') y_val = to_categorical(HDF5Matrix(fname, 'val/y'), 2) print("Using data from HDF5Matrix") # Model loading model = net() model.summary() if not os.path.exists(model_path): os.makedirs(model_path) calls = [ LearningRateScheduler(lambda i: float(0.001 * (0.98**i))), EarlyStopping(monitor='val_loss', min_delta=0., patience=10, verbose=2, mode='auto'), ModelCheckpoint('{0}{1}.h5'.format(model_path, gen), monitor='val_loss', verbose=2, save_best_only=True, mode='auto') ] hist = model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), batch_size=100, epochs=100, shuffle='batch', verbose=2, callbacks=calls) save_history(hist.history, gen) clear_session()
def train_model(args, data_size): ''' Loads in training data and trains on the given neural network architecture. Also, saves the trained model each time the model improves. ''' # Load training and validation data print("Loading training data.") X_train = HDF5Matrix('data' + data_size + '.h5', 'images') X_val = HDF5Matrix('challenge_pics' + data_size + '.h5', 'images') # Load image labels y_train = HDF5Matrix('labels112.h5', 'labels') y_val = HDF5Matrix('challenge_lane_labels112.h5', 'labels') print("Training data loaded.") # Get input_shape to feed into model input_shape = X_train.shape[1:] # Load "module" containing our neural network architecture m = importlib.import_module(args.a) # Load model from that "module" print("Loading model.") model = m.get_model(input_shape, args.fa) print("Model loaded.") # Compile model print("Compiling model.") model.compile(optimizer='Adam', loss=args.l, metrics=['accuracy']) print("Model compiled, training initializing.") # Save down only the best result # Uses architecture name, output activation and loss save_path = args.a.replace('.', '/') + '-' + args.fa[0] + args.l[0] + '.h5' checkpoint = ModelCheckpoint(filepath=save_path, monitor='val_loss', save_best_only=True) # Stop early when improvement ends stopper = EarlyStopping(monitor='val_acc', min_delta=0.0003, patience=5) # Using a generator to help the model use less data datagen, val_datagen = get_generators(args) # Train the model args.b = int(args.b) args.e = int(args.e) model.fit_generator(datagen.flow(X_train, y_train, batch_size=args.b), steps_per_epoch=len(X_train) / args.b, epochs=args.e, verbose=1, callbacks=[checkpoint, stopper], validation_data=val_datagen.flow(X_val, y_val, batch_size=args.b), validation_steps=len(X_val) / args.b) # Show summary of model at conclusion of training model.summary()
def loadFromHdf5(start, num, fileNum): global input_shape img_rows, img_cols = 128, 128 X_train = HDF5Matrix('/export/home/lparcala/dataFace/celebA{}.h5'.format(fileNum), 'images', start, start+num) #, normalizer=normalize_data) Y_train = HDF5Matrix('/export/home/lparcala/dataFace/celebA{}.h5'.format(fileNum), 'labels', start, start+num) print("Done reading data from disk from {} to {}.".format(start, start+num)) return X_train, np.array(Y_train)
def load_from_hdf5(dir,type,start=0,end=None,labels_only=False): X_train, y_train = 0,0 if(type=="training" or type == "validation"): if(labels_only): y_train = HDF5Matrix(dir, 'training_labels', start=start, end=end) else: X_train = HDF5Matrix(dir, 'training_input',start=start,end=end) y_train = HDF5Matrix(dir, 'training_labels',start=start,end=end) elif(type=="development"): if(labels_only): y_train = HDF5Matrix(dir, 'development_labels', start=start, end=end) else: X_train = HDF5Matrix(dir, 'development_input',start=start,end=end) y_train = HDF5Matrix(dir, 'development_labels',start=start,end=end) elif (type == "test"): if(labels_only): y_train = HDF5Matrix(dir, 'test_labels', start=start, end=end) else: X_train = HDF5Matrix(dir, 'test_input',start=start,end=end) y_train = HDF5Matrix(dir, 'test_labels',start=start,end=end) return X_train,y_train
def load_and_predict(is_checkpoint, filename, output_dir, use_train, use_bas=False): if not os.path.exists(output_dir): os.makedirs(output_dir) model = get_model(is_checkpoint, filename, use_bas) batch_size = 4 data_file = "./data/new_heart_l131k.h5" train_small = batch_size * 10 X_train = HDF5Matrix(data_file, 'train_in', start=0, end=train_small) y_train = HDF5Matrix(data_file, 'train_out', start=0, end=train_small) # X_train = HDF5Matrix(data_file, 'train_in') # y_train = HDF5Matrix(data_file, 'train_out') test_small = batch_size * 10 X_test = HDF5Matrix(data_file, 'test_in', start=0, end=test_small) y_test = HDF5Matrix(data_file, 'test_out', start=0, end=test_small) # X_test = HDF5Matrix(data_file, 'test_in') # y_test = HDF5Matrix(data_file, 'test_out') X, Y = None, None if use_train: print("using train") X = X_train Y = y_train else: print("using test") X = X_test Y = y_test print(X.shape) print(Y.shape) y_predictions = None if os.path.exists(output_dir + "/y_predictions.npy"): print("loading prediction") y_predictions = np.load(output_dir + "/y_predictions.npy") else: print("predicting") y_predictions = model.predict(X, batch_size=batch_size) # write predictions to file np.save(output_dir + "/y_predictions.npy", y_predictions) print("creating plots") create_prediction_histograms(y_predictions, Y, output_dir) print("bar graphs done") create_scatterplot(y_predictions, Y, output_dir) print("scatters done") # create_pdf_graph(y_predictions, Y, output_dir) print('finished successfully!')
def read_batch(test=False): x = HDF5Matrix(data_path, state_name) y = HDF5Matrix(data_path, action_name) assert x.end == y.end train_len, len, *_ = get_sizes() start = train_len if test else 0 end = len if test else train_len while True: for i in range(start, end, batch_size): batch_end = len if i + batch_size > len else i + batch_size yield x[i:batch_end], y[i:batch_end]
def load_data(data_path, start, num_examples): X = HDF5Matrix(datapath=data_path, dataset='images', start=start, end=start + num_examples, normalizer=normalize_data) Y = HDF5Matrix(datapath=data_path, dataset='labels', start=start, end=start + num_examples) return (X[:], Y[:])
def load_training_set(df_train, rescaled_dim): """ Returns HDF5Matrix of the training set. Attempts to load data from cache. If data doesnt exist in cache, load from source""" training_file_path = training_set_file_path_format.format(rescaled_dim) if not os.path.exists(training_file_path): load_training_set_from_source(df_train, rescaled_dim) train_x_hdf5 = HDF5Matrix(training_file_path, "training-x") train_y_hdf5 = HDF5Matrix(training_file_path, "training-y") return train_x_hdf5, train_y_hdf5
def read_from_h5(base_path, train_h5_file, test_h5_file): train_h5_path = os.path.join(base_path, train_h5_file) test_h5_path = os.path.join(base_path, train_h5_file) X_train = HDF5Matrix(train_h5_path, 'images') y_train = HDF5Matrix(train_h5_path, 'category') print('In Data', X_train.shape, '=>', y_train.shape) X_test = HDF5Matrix(test_h5_path, 'images') y_test = HDF5Matrix(test_h5_path, 'category') print('In Data', X_test.shape, '=>', y_test.shape) return X_train, y_train, X_test, y_test
def dataset_xy_hdf5matrix_keras(h5file_path, start_ratio, end_ratio): s = None e = None with h5py.File(h5file_path, 'r') as h5file: Y = h5file['Y'] s = int(Y.shape[0] * start_ratio) e = int(Y.shape[0] * end_ratio) X = HDF5Matrix(h5file_path, 'X', start=s, end=e) Y = HDF5Matrix(h5file_path, 'Y', start=s, end=e) return X, Y
def __init__(self, indexes, batch_size, data_path, shuffle=False, is_2d=False, models_path=None, model_name='FIR_model', taps_name='phi', FIR_layer_name='FIR_layer', num_classes=24): 'Initialization' self.indexes = indexes self.batch_size = batch_size self.data_path = data_path self.shuffle = shuffle self.cache = {} self.is_2d = is_2d # load FIR taps, this is supposed to be saved as a FIR layer with name FIR_layer_name and taps are named as taps_name:0 for d in range(num_classes): f = h5py.File( os.path.join(models_path, model_name + '_' + str(d) + '.hdf5'), 'r') if d == 0: shape_var = f['model_weights'][FIR_layer_name][FIR_layer_name][ taps_name + ':0'].shape trivial_dimension = np.argwhere(np.array(shape_var) == 1)[0] if len(trivial_dimension ): # this is only if we need to remove dummy dimensions print('Dropping one input dimension') self.shape_FIR = np.delete(shape_var, trivial_dimension) self.fir_taps = np.zeros(np.append(self.shape_FIR, num_classes)) # SALVO il secondo FIR layer name cambia ad ogni modello, cercare un modo di prendere solo quello tramite keys. var_temp = f['model_weights'][FIR_layer_name] key = [key for key in var_temp.keys()][0] if trivial_dimension == 0: self.fir_taps[:, :, d] = var_temp[key][taps_name + ':0'][0, :, :] elif trivial_dimension == 1: self.fir_taps[:, :, d] = var_temp[key][taps_name + ':0'][:, 0, :] else: self.fir_taps[:, :, d] = var_temp[key][taps_name + ':0'][:, :, 0] self.X = HDF5Matrix(self.data_path, 'X') self.Y = HDF5Matrix(self.data_path, 'Y')
def lp_test_generator(): i = 0 img_data = HDF5Matrix('lp_test.h5', 'images') lbl_data = HDF5Matrix('lp_test.h5', 'labels') # size = 339 size = 160 while 1: img_single = img_data[i % size].reshape((1, 320, 240, 3)) lbl_single = lbl_data[i % size].reshape((1, 4)) # img_single = img_data[i % size].reshape((320, 240, 3)) # lbl_single = lbl_data[i % size].reshape(4) yield (img_single, lbl_single) i += 1
def finetune_top_model(model, save_weights='top_half_weights.h5', epochs=10, batch_size=64, mini_batch=True): h5f = h5py.File('bottleneck_features.h5', 'r') # If you want to load into memory do the following # train_data = h5f['training'][:5000] # train_labels = h5f['train_labels'][:5000] train_data = HDF5Matrix("bottleneck_features.h5", 'training') train_labels = HDF5Matrix("bottleneck_features.h5", 'train_labels') val_data = HDF5Matrix("bottleneck_features.h5", 'val') val_labels = HDF5Matrix("bottleneck_features.h5", 'val_labels') checkpointer = ModelCheckpoint(filepath=save_weights, verbose=1, save_best_only=True) history = History() early_stopping = EarlyStopping(patience=1) # If mini_batch then we split datasets into 10 parts and pick a 1/10th training # and 1/10th validation set for 10 iterations if mini_batch: pick = np.random.randint(0, 9) train_data = HDF5Matrix("bottleneck_features.h5", 'training', start=10000 * pick, end=10000 * (pick + 1)) train_labels = HDF5Matrix("bottleneck_features.h5", 'train_labels', start=10000 * pick, end=10000 * (pick + 1)) pick = np.random.randint(0, 9) val_data = HDF5Matrix("bottleneck_features.h5", 'val', start=1000 * pick, end=1000 * (pick + 1)) val_labels = HDF5Matrix("bottleneck_features.h5", 'val_labels', start=1000 * pick, end=1000 * (pick + 1)) try: history_returned = model.fit( train_data, train_labels, validation_data=(val_data, val_labels), epochs=epochs, batch_size=batch_size, shuffle='batch', callbacks=[checkpointer, history, early_stopping]) return history_returned except KeyboardInterrupt as e: print("keyboard interrupted, saving to history.json and history.txt") if hasattr(history, "history"): json.dump(history.history, open("history.json", 'w')) np.savetxt("history.txt", history.history, delimiter=",") raise (e)
def __init__(self, indexes, batch_size, data_path, shuffle=False, is_2d=False): 'Initialization' self.indexes = indexes self.batch_size = batch_size self.data_path = data_path self.shuffle = shuffle self.cache = {} self.is_2d = is_2d self.X = HDF5Matrix(self.data_path, 'X') self.Y = HDF5Matrix(self.data_path, 'Y')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--model') args = parser.parse_args() model = models.get_unet(input_shape=(1280, 1280, 3), pool_cnt=7, filter_cnt=8) if args.model: model.load_weights(args.model, by_name=True) train_x = HDF5Matrix(DATA_FILE, "train/x", end=4080) train_y = HDF5Matrix(DATA_FILE, "train/y", end=4080) train_it = HDF5MatrixIterator(train_x, train_y, batch_size=BATCH_SIZE, preprocess=preprocess_train, shuffle=True) train_cnt = len(train_x) val_x = HDF5Matrix(DATA_FILE, "train/x", start=4080) val_y = HDF5Matrix(DATA_FILE, "train/y", start=4080) val_it = HDF5MatrixIterator(val_x, val_y, batch_size=BATCH_SIZE, preprocess=preprocess_val) val_cnt = len(val_x) chkpt = ModelCheckpoint(filepath='model.hdf5', monitor='val_dice_coef', verbose=1, save_best_only=True, save_weights_only=True, mode='max') early_stop = EarlyStopping(monitor='val_dice_coef', patience=5, verbose=1, mode='max') model.fit_generator(generator=train_it, steps_per_epoch=math.ceil(train_cnt / BATCH_SIZE), epochs=200, callbacks=[chkpt, early_stop], validation_data=val_it, validation_steps=math.ceil(val_cnt / BATCH_SIZE), max_q_size=20, workers=4)
def _get_nb_total_batches(self): h5_data = HDF5Matrix( os.path.join( self._data_dir, 'folds_h5', '{}_metadata_{}.hdf5'.format(self._run_type, self._n_fold + 1)), 'seq_labels') return int(np.floor(h5_data.shape[0] / self._batch_size))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', required=True) parser.add_argument('-i', '--input', required=True) args = parser.parse_args() model = models.get_seq_unet_1024() if args.model: model.load_weights(args.model) x = HDF5Matrix(args.input, "train/x", start=255) y = HDF5Matrix(args.input, "train/y", start=255) it = HDF5MatrixIterator(x, y, batch_size=4, preprocess=preprocess_seq) score = evaluate_seq_generator(model, it, 5) print(score)
def test_model(self): optimizer = Adam(lr=0.0001) self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # score_eval = self.model.evaluate_generator(self.test_generator, verbose=1, use_multiprocessing = False) # print(score_eval) score_predict = self.model.predict_generator(self.test_generator, verbose=1, use_multiprocessing=False) label_predict = np.argmax(score_predict, 1) Y_real = HDF5Matrix(self.args.data_path, 'Y') label_true = np.zeros(label_predict.shape) for i in range(label_predict.shape[0]): # print('Index : ', self.test_indexes[i]) # print(Y_real.__getitem__(self.test_indexes[i])) # print(np.argmax(Y_real.__getitem__(self.test_indexes[i]))) label_true[i] = np.argmax(Y_real.__getitem__(self.test_indexes[i])) con_matrix = confusion_matrix(label_true, label_predict) con_matrix_perc = con_matrix / con_matrix.astype(np.float).sum(axis=1) example_accuracy = np.mean(np.diag(con_matrix_perc)) # compute batch accuracy num_batches = int(len(self.test_indexes) / self.args.batch_size) batch_prediction_indicator = np.zeros([ num_batches, ]) for b in range(num_batches): true_batch_label = label_true[ b * self.args. batch_size] # label is the same for each batch, you can just take the first element predicted_batch_label = self.get_predicted_label( label_predict[b * self.args.batch_size:(b + 1) * self.args.batch_size]) if true_batch_label == predicted_batch_label: batch_prediction_indicator[b] = 1 batch_accuracy = np.mean(batch_prediction_indicator) my_dict = { 'example_accuracy': example_accuracy, 'batch_accuracy': batch_accuracy, 'confusion_matrix': con_matrix_perc } print('Example Accuracy: ', example_accuracy) print('Batch Accuracy: ', batch_accuracy) # Saving the objects: save_name = os.path.join(self.args.save_path, (self.args.save_file_name + '.pkl')) with open(save_name, 'wb') as f: # Python 3: open(..., 'wb') pkl.dump(my_dict, f)
def test_io_utils(in_tmpdir): '''Tests the HDF5Matrix code using the sample from @jfsantos at https://gist.github.com/jfsantos/e2ef822c744357a4ed16ec0c885100a3 ''' h5_path = 'test.h5' create_dataset(h5_path) # Instantiating HDF5Matrix for the training set, which is a slice of the first 150 elements X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150) y_train = HDF5Matrix(h5_path, 'my_labels', start=0, end=150) # Likewise for the test set X_test = HDF5Matrix(h5_path, 'my_data', start=150, end=200) y_test = HDF5Matrix(h5_path, 'my_labels', start=150, end=200) # HDF5Matrix behave more or less like Numpy matrices with regards to indexing assert y_train.shape == (150, 1), 'HDF5Matrix shape should match input array' # But they do not support negative indices, so don't try print(X_train[-1]) assert y_train.dtype == np.dtype( 'i'), 'HDF5Matrix dtype should match input array' assert y_train.ndim == 2, 'HDF5Matrix ndim should match input array' assert y_train.size == 150, 'HDF5Matrix ndim should match input array' model = Sequential() model.add(Dense(64, input_shape=(10, ), activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd') # Note: you have to use shuffle='batch' or False with HDF5Matrix model.fit(X_train, y_train, batch_size=32, shuffle='batch', verbose=False) # test that evalutation and prediction don't crash and return reasonable results out_pred = model.predict(X_test, batch_size=32, verbose=False) out_eval = model.evaluate(X_test, y_test, batch_size=32, verbose=False) assert out_pred.shape == (50, 1), 'Prediction shape does not match' assert out_eval.shape == (), 'Shape of evaluation does not match' assert out_eval > 0, 'Evaluation value does not meet criteria: {}'.format( out_eval) os.remove(h5_path)
def load5hpyData(USE_TITANX=True, data_name='TopAngle100_dataX_dataY.h5'): """Load h5py data and return HDF5 object corresponding to X_train, Y_train, X_test, Y_test Args: USE_TITANX (boolean): set True if using the Linux Computer with TITANX data_name (string): name of the dataset e.g 'TopAngle100_dataX_dataY.h5' Returns: dataX_train (HDF5Matrix object): keras object for loading h5py datasets dataY_train (HDF5Matrix object): keras object for loading h5py datasets dataX_test (HDF5Matrix object): keras object for loading h5py datasets dataY_test (HDF5Matrix object): keras object for loading h5py datasets """ if USE_TITANX: data_dir = '/home/zanoi/ZANOI/auditory_hallucinations_data/' else: data_dir = '/Volumes/SAMSUNG_SSD_256GB/ADV_CV/data/' data_file = data_dir + data_name # data_name = 'TopAngle100_dataX_dataY.h5' by default # Load first element of data to extract information on video with h5py.File(data_file, 'r') as hf: print("Reading fukk data from file..") dataX_train = hf[ 'dataX_train'] # Adding the [:] actually loads it into memory dataY_train = hf['dataY_train'] dataX_test = hf['dataX_test'] dataY_test = hf['dataY_test'] print("dataX_train.shape:", dataX_train.shape) print("dataY_train.shape:", dataY_train.shape) print("dataX_test.shape:", dataX_test.shape) print("dataY_test.shape:", dataY_test.shape) # Load data into HDF5Matrix object, which reads the file from disk and does not put it into RAM dataX_train = HDF5Matrix(data_file, 'dataX_train') dataY_train = HDF5Matrix(data_file, 'dataY_train') dataX_test = HDF5Matrix(data_file, 'dataX_test') dataY_test = HDF5Matrix(data_file, 'dataY_test') return dataX_train, dataY_train, dataX_test, dataY_test