def fit(self, X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer): loss_history = [] train_accuracy = [] test_accuracy = [] self.init() data_gen = utils.DataGenerator(X_train, y_train, batch_size) itr = 0 for epoch in range(num_epochs): epoch_iter = 0 epoch_accuracy = [] for X, Y in data_gen: optimizer.zeroGrad() probabilities = self.forward(X) loss = utils.cross_entropy_loss(probabilities, Y) self.backward(Y) loss_history += [loss] itr += 1 epoch_iter += 1 optimizer.step() epoch_acc = self.evaluate(X, Y) epoch_accuracy.append(epoch_acc) train_acc = np.array(epoch_accuracy).sum() / epoch_iter train_accuracy.append(train_acc) test_acc = self.evaluate(X_test, y_test) test_accuracy.append(test_acc) print("epoch = {}, train accuracy = {} test accuracy = {}".format( epoch, train_acc, test_acc)) return loss_history, train_accuracy, test_accuracy
def segmentation(name, path, model): test_df = [] sub_df = pd.DataFrame( data={ 'Image_Label': [ name + "_Fish", name + "_Flower", name + "_Gravel", name + "_Sugar" ], 'EncodedPixels': ['1 1', '1 1', '1 1', '1 1'], 'ImageId': [name, name, name, name] }) test_imgs = pd.DataFrame(data={'ImageId': [name]}) test_generator = utils.DataGenerator([0], df=test_imgs, shuffle=False, mode='predict', dim=(350, 525), reshape=(320, 480), n_channels=3, base_path=path, target_df=sub_df, batch_size=1, n_classes=4) batch_pred_masks = model.predict_generator(test_generator, workers=1, verbose=1) for j, b in enumerate([0]): filename = test_imgs['ImageId'].iloc[b] image_df = sub_df[sub_df['ImageId'] == filename].copy() pred_masks = batch_pred_masks[j, ].round().astype(int) pred_rles = utils.build_rles(pred_masks, reshape=(350, 525)) image_df['EncodedPixels'] = pred_rles test_df.append(image_df) test_df[0].iloc[:, :2].to_csv('./mask_test.csv') return test_df
def run_experiment(model_path, net_name, data_path, results_path, lr=0.0001, balanced=True): nb_epochs = 200 # adjust the number of epochs # results_path = f'./results_up/{data_path}/{net_name}/' if not os.path.exists(results_path): os.makedirs(results_path) if balanced: data_generator = utils.BalancedGenerator(images_path=data_path, batch_size=32) else: data_generator = utils.DataGenerator(images_path=data_path, batch_size=32) csv_logger_callback = CSVLogger(os.path.join(results_path, 'results_file.csv'), append=True, separator=';') early_stopping_callback = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto', restore_best_weights=True) train_generator = data_generator.get_training_generator() valid_generator = data_generator.get_validation_generator() test_generator = data_generator.get_testing_generator() print("Classes:", test_generator.class_indices) print("Number of examples:", train_generator.n) pre_trained_model = utils.get_pre_trained_model(model_path, net_name) pre_trained_output = pre_trained_model.output predictions = Dense(len(test_generator.class_indices), activation=tf.nn.softmax, name='final_output')(pre_trained_output) model = Model(input=pre_trained_model.input, output=predictions) adam = Adam(lr) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) model.fit_generator( generator=train_generator, steps_per_epoch=train_generator.n // train_generator.batch_size, validation_data=valid_generator, validation_steps=valid_generator.n // valid_generator.batch_size, epochs=nb_epochs, callbacks=[csv_logger_callback, early_stopping_callback]) output = model.predict_generator(generator=test_generator, steps=test_generator.n // test_generator.batch_size, pickle_safe=True) output = np.argmax(output, axis=1) accuracy = accuracy_score(test_generator.classes, output) f1_macro = f1_score(test_generator.classes, output, average='macro') f1_micro = f1_score(test_generator.classes, output, average='micro') error_matrix = confusion_matrix(test_generator.classes, output) results = { 'accuracy': accuracy, 'f1_macro': f1_macro, 'f1_micro': f1_micro, 'error_matrix': error_matrix, 'class_indices': test_generator.class_indices } with open(os.path.join(results_path, 'results.pickle'), 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL) model.save(os.path.join(results_path, 'model.h5')) model.save_weights(os.path.join(results_path, 'weights.h5'))
def train(self, X, y, **kwargs): """Train model on labelled data. Arguments: X {list} -- List of path to training images. y {list} -- List of paths to training labels. Keyword Arguments: epochs (int): Number of training epochs. default=5 batch_size: Mini-batch size. default=128 optimizer (str, keras.optimizers.Optimzier): Network optimizer. default='rmsprop' valid_portion (float): Validation size (in fraction). between 0 & 1. default=0. save_best_only (bool): Save only the best recorded accuracy during training. default=True steps_per_epoch (int): Number of iteration per epoch. default=1,000. """ # Extract keyword arguments. epochs = kwargs.get('epochs', 10) batch_size = kwargs.get('batch_size', 128) optimizer = kwargs.get('optimizer', 'rmsprop') save_dir = kwargs.get('save_dir', 'saved/models') valid_portion = kwargs.get('valid_portion', 0.) save_best_only = kwargs.get('save_best_only', True) steps_per_epoch = kwargs.get('steps_per_epoch', 1000) # Make sure valid_portion is between 0 & 1. assert 0 <= valid_portion < 1, '`valid_portion` must be between 0. & 1.' if not os.path.isdir(save_dir): os.makedirs(save_dir) # Split data into training & validation set. size = int(valid_portion * len(X)) if size > 0: X_train, y_train = X[:-size], y[:-size] X_valid, y_train = X[-size:], y[-size:] # Create data generator for train & validation set. train_gen = utils.DataGenerator(X[:-size], y[:-size], batch_size) val_gen = utils.DataGenerator(X[-size:], y[-size:], batch_size) else: # Create data generator for only training set. train_gen = utils.DataGenerator(X, y, batch_size) val_gen = None # Checkpoint callback. checkpoint = keras.callbacks.ModelCheckpoint( os.path.join(save_dir, 'model.{epoch:03d}.h5'), save_best_only=save_best_only, verbose=self._verbose) # Compile model using Adam optimizer & cross entropy loss function. self._model.compile(loss='categorical_crossentropy', optimizer=optimizer) # Train model. try: self._model.fit_generator(generator=train_gen, epochs=epochs, validation_data=val_gen, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint], verbose=self._verbose) except KeyboardInterrupt: print('\n{}'.format('-' * 65)) print('Interrupted by user! \nSaving model...') # Save model. keras.models.save_model(model=self._model, filepath=os.path.join( save_dir, 'model.interrupt.h5')) print('{}\n'.format('-' * 55))
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument(dest="data_path", metavar="DATA_PATH", help="Path to read examples from.") parser.add_argument("-sW", "--save_weights_path", metavar="SAVE_WEIGHTS_PATH", default=None, help="Path to save trained weights to. If no path is specified checkpoints are not saved.") parser.add_argument("-sM", "--save_model_path", metavar="SAVE_MODEL_PATH", default=None, help="Path to save trained model to.") parser.add_argument("-l", "--load_path", metavar="LOAD_PATH", default=None, help="Path to load trained model from. If no path is specified model is trained from scratch.") parser.add_argument("-m", "--metrics-path", metavar="METRICS_PATH", default=None, help="Path to save additional performance metrics to (for debugging purposes).") parser.add_argument("-b", "--read_batches", metavar="READ_BATCHES", default=False, help="If true, data is read incrementally in batches during training.") parser.add_argument("--PCA", metavar="PCA", default=False, help="If true, a PCA plot is saved.") parser.add_argument("--TSNE", metavar="TSNE", default=False, help="If true, a TSNE plot is saved.") parser.add_argument("--output_loss_threshold", metavar="OUTPUT_LOSS_THRESHOLD", default=None, help="Value between 0.0-1.0. Main function will return loss value of triplet at set percentage.") args = parser.parse_args() parse_args(args) X_shape, y_shape = utils.get_shapes(args.data_path, "train_anchors") # Build model input_shape = X_shape[1:] tower_model = build_tower_cnn_model(input_shape) # single input model triplet_model = build_triplet_model(input_shape, tower_model) # siamese model if args.load_path is not None: triplet_model.load_weights(args.load_path) # Setup callbacks for early stopping and model saving callback_list = setup_callbacks(args.save_weights_path) # Compile model adam = Adam(lr=LEARNING_RATE) triplet_model.compile(optimizer=adam, loss='mean_squared_error') if not args.read_batches: # Read all data at once # Load training triplets and validation triplets X_train_anchors, y_train_anchors = utils.load_examples(args.data_path, "train_anchors") X_train_positives, _ = utils.load_examples(args.data_path, "train_positives") X_train_negatives, _ = utils.load_examples(args.data_path, "train_negatives") X_valid_anchors, y_valid_anchors = utils.load_examples(args.data_path, "valid_anchors") X_valid_positives, _ = utils.load_examples(args.data_path, "valid_positives") X_valid_negatives, _ = utils.load_examples(args.data_path, "valid_negatives") # Create dummy y = 0 (since output of siamese model is triplet loss) y_train_dummy = np.zeros((X_shape[0],)) y_valid_dummy = np.zeros((X_valid_anchors.shape[0],)) # Train the model triplet_model.fit([X_train_anchors, X_train_positives, X_train_negatives], y_train_dummy, validation_data=([X_valid_anchors, X_valid_positives, X_valid_negatives], y_valid_dummy), epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=callback_list) global training_complete training_complete = True else: # Read data in batches training_batch_generator = utils.DataGenerator(args.data_path, "train", batch_size=1000) validation_batch_generator = utils.DataGenerator(args.data_path, "valid", batch_size=1000) triplet_model.fit_generator(generator=training_batch_generator, validation_data=validation_batch_generator, callbacks=callback_list, epochs=EPOCHS) # Save weights if args.save_weights_path is not None: triplet_model.save_weights(args.save_weights_path + "final_weights.hdf5") # Save model if args.save_model_path is not None: tower_model.save(args.save_model_path + "tower_model.hdf5") triplet_model.save(args.save_model_path + "triplet_model.hdf5") # Plot PCA/TSNE # For now, read all the valid anchors to do PCA # TODO: add function in util that reads a specified number of random samples from a dataset. if args.PCA is not False or args.TSNE is not False: X_valid_anchors, y_valid_anchors = utils.load_examples(args.data_path, "valid_anchors") X, Y = utils.shuffle_data(X_valid_anchors[:, :, :], y_valid_anchors[:, :], one_hot_labels=True) X = X[:5000, :, :] Y = Y[:5000, :] X = tower_model.predict(X) if args.PCA: utils.plot_with_PCA(X, Y) if args.TSNE: utils.plot_with_TSNE(X, Y) # Calculate loss value of triplet at a certain threshold if args.output_loss_threshold is not None: if not args.read_batches: # Read all data at once # Load training triplets and validation triplets X_train_anchors, _ = utils.load_examples(args.data_path, "train_anchors") X_train_positives, _ = utils.load_examples(args.data_path, "train_positives") X_train_negatives, _ = utils.load_examples(args.data_path, "train_negatives") # Get abs(distance) of embeddings X_train = triplet_model.predict([X_train_anchors, X_train_positives, X_train_negatives]) else: # Read data in batches training_batch_generator = utils.DataGenerator(args.data_path, "train", batch_size=100, stop_after_batch=10) # Get abs(distance) of embeddings (one batch at a time) X_train = triplet_model.predict_generator(generator=training_batch_generator, verbose=1) X_train = np.sort(X_train, axis=None) print(X_train[int(float(args.output_loss_threshold) * X_train.shape[0])])
import tensorflow as tf import utils, yolo, loss import json import os from train import param, test_list, classes os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ["CUDA_VISIBLE_DEVICES"] = '1' images = '/home/cvlab09/kyung-taek/detection/YOLOv1_darknet/VOCdevkit/VOC2007/JPEGImages' # test_generator = utils.DataGenerator(test_list, 4, images, param['grid_size'], param['num_bboxes'], param['num_classes']) test_generator = utils.DataGenerator(test_list[:16], 4, images, param['grid_size'], param['num_bboxes'], param['num_classes']) output_dir = r'/home/cvlab09/kyung-taek/detection/YOLOv1/output' model = yolo.YOLOv1() weight = r'/home/cvlab09/kyung-taek/detection/YOLOv1/weights/YOLOv1_t1.h5' model.load_weights(weight) pred = utils.Prediction(test_generator, output_dir, model, classes)
def Train(self, model): print('-' * 30 + 'Begin: training ' + '-' * 30) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 #Train #Callbacks print("Proc: Preprare the callbacks...") log = callbacks.CSVLogger(configuration.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=configuration.save_dir + '/tensorboard-logs', batch_size=configuration.batch_size, histogram_freq=self.args.debug) callback = [log, tb] + configuration.list_for_callbacks print("Done: callbacks are created...") # compile the model print("Proc: Compile the model...") model.compile( optimizer=configuration.optimizer, loss_weights=configuration.loss_weights, loss=configuration. loss, #self.loss_sigmoid_cross_entropy_with_logits,#util.margin_loss, metrics=configuration.metrics) print("Done: the model was complied...") print("Proc: Training the model...") # Training with data augmentation total_number = int( round( len(configuration.training_set_index) * configuration.augmentation_factor_for_training)) train_steps_per_epoch = int( round((total_number // configuration.batch_size))) valid_steps_per_epoch = ( len(configuration.validation_set_index) * configuration.augmentation_factor_for_validation ) // configuration.batch_size test_steps_per_epoch = (len( configuration.test_set_index)) // configuration.batch_size print('train_steps_per_epoch', train_steps_per_epoch) print('valid_steps_per_epoch', valid_steps_per_epoch) if configuration.test_mode: for x, y in utils.DataGenerator( list_IDs=configuration.training_set_index, hdf5_path=configuration.dataset_hdf5_path, batch_size=configuration.batch_size, dim=configuration.input_shape, n_channels=configuration.number_input_channel, n_classes=configuration.n_class, shuffle=configuration.shuffle, run_augmentations=configuration.run_augmentations, mode="training", convert_to_categorical=configuration. convert_to_categorical, binarize=configuration.binarize, threshold_to_binary=configuration.threshold_to_binary, Normalization=configuration.Normalization, Two_output=configuration.Two_output): print(x.shape) print('max', np.max(x)) print('min', np.min(x)) print('mean', np.mean(x)) print('median', np.median(x)) print(y[0].shape) print(y[1].shape) else: train_generator = utils.DataGenerator( list_IDs=configuration.training_set_index, hdf5_path=configuration.dataset_hdf5_path, batch_size=configuration.batch_size, dim=configuration.input_shape, n_channels=configuration.number_input_channel, n_classes=configuration.n_class, shuffle=configuration.shuffle, run_augmentations=configuration.run_augmentations, mode="training", convert_to_categorical=configuration.convert_to_categorical, binarize=configuration.binarize, threshold_to_binary=configuration.threshold_to_binary, Normalization=configuration.Normalization, Two_output=configuration.Two_output) validation_input_generator = utils.DataGenerator( list_IDs=configuration.validation_set_index, hdf5_path=configuration.dataset_hdf5_path, batch_size=configuration.batch_size, dim=configuration.input_shape, n_channels=configuration.number_input_channel, n_classes=configuration.n_class, shuffle=configuration.shuffle, run_augmentations=False, mode="training", convert_to_categorical=configuration.convert_to_categorical, binarize=configuration.binarize, threshold_to_binary=configuration.threshold_to_binary, Normalization=configuration.Normalization, Two_output=configuration.Two_output) test_input_generator = utils.DataGenerator( list_IDs=configuration.test_set_index, hdf5_path=configuration.dataset_hdf5_path, batch_size=configuration.batch_size, dim=configuration.input_shape, n_channels=configuration.number_input_channel, n_classes=configuration.n_class, shuffle=configuration.shuffle, run_augmentations=False, mode="prediction", convert_to_categorical=configuration.convert_to_categorical, binarize=configuration.binarize, threshold_to_binary=configuration.threshold_to_binary, Normalization=configuration.Normalization, Two_output=configuration.Two_output) model.fit_generator( train_generator, steps_per_epoch=train_steps_per_epoch, epochs=configuration.epochs, use_multiprocessing=configuration.use_multiprocessing, max_queue_size=configuration.max_queue_size, workers=configuration.workers, class_weight=configuration.class_weight, validation_steps=valid_steps_per_epoch, validation_data=validation_input_generator, callbacks=callback) print("Run: test phase") print('test_steps_per_epoch', test_steps_per_epoch) print("Number of test cases", len(configuration.test_set_index)) y_predict = model.predict_generator( test_input_generator, steps_per_epoch=train_steps_per_epoch, epochs=configuration.epochs, use_multiprocessing=configuration.use_multiprocessing, max_queue_size=configuration.max_queue_size, workers=configuration.workers, class_weight=configuration.class_weight, validation_steps=valid_steps_per_epoch, validation_data=validation_input_generator, callbacks=None) y_true_ = keras.utils.HDF5Matrix(configuration.dataset_hdf5_path, 'label') y_true = np.asarray(y_true_ >= configuration.threshold_to_binary, dtype=np.uint8) from sklearn.metrics import auc, average_precision_score, precision_recall_curve, classification_report, f1_score, confusion_matrix, brier_score_loss from sklearn.metrics import roc_auc_score, roc_curve, fowlkes_mallows_score title = "ROC Curve for case detection with prostate cancer" utils.plotROCCurveMultiCall(plt, y_true, y_predict, title) plt.savefig("%s/PCA_MRI_DETECTION_roc_curve.eps" % (configuration.save_dir), transparent=True) plt.savefig("%s/PCA_MRI_DETECTION_roc_curve.pdf" % (configuration.save_dir), transparent=True) plt.savefig("%s/PCA_MRI_DETECTION_roc_curve.png" % (configuration.save_dir), transparent=True) plt.show() plt.close() fpr, tpr, threshold = roc_curve(y_true, y_predict) roc_auc = auc(fpr, tpr) print('roc_auc', roc_auc) threshold = utils.cutoff_youdens(fpr, tpr, threshold) print('threshold', threshold) print("Confusion matrix") print(confusion_matrix(y_true, y_predict > threshold)) print("Classification report") print(classification_report(y_true, y_predict > threshold)) print("fowlkes_mallows_score") fms = fowlkes_mallows_score(y_true, y_predict > threshold) print(fms) brier_score = brier_score_loss(y_true, y_predict) print("brier score") print(brier_score) print("END: TESTING THE MODEL") print('-' * 30 + 'End: training ' + '-' * 30)
'trn_val_rate' : 0.3, 'test_len' : len(info_list)-5000, 'lr' : 1e-4, 'lr_schedule' : utils.LR_SCHEDULE, 'momentum' : 9, 'weight_decay' : 5e-4, } ############## # Split Data # ############## train_valid_list = info_list[:5000] test_list = info_list[5000:] train_data_list, valid_data_list = utils.train_test_split(train_valid_list) # 3500, 1500 train_generator = utils.DataGenerator(train_data_list, batch_size, images, param['grid_size'], param['num_bboxes'], param['num_classes']) valid_generator = utils.DataGenerator(valid_data_list, batch_size, images, param['grid_size'], param['num_bboxes'], param['num_classes']) step_size_train = train_generator.n // train_generator.batch_size step_size_valid = valid_generator.n // valid_generator.batch_size ############# # Callbacks # ############# custom_scheduler = utils.custom_lr_scheduler(utils.lr_schedule) save_file_name = "{}_t{}".format(param['model_name'], param['trial']) history = tf.keras.callbacks.CSVLogger('/home/cvlab09/kyung-taek/detection/YOLOv1/history/'+save_file_name+'_history.txt', separator="\t", append=True) checkpoint = ModelCheckpoint('/home/cvlab09/kyung-taek/detection/YOLOv1/weights/'+save_file_name+'.h5', monitor='val_loss', verbose=1, save_best_only=True)
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument(dest="data_path", metavar="DATA_PATH", help="Path to read examples from.") parser.add_argument( "-sW", "--save_weights_path", metavar="SAVE_WEIGHTS_PATH", default=None, help= "Path to save trained weights to. If no path is specified checkpoints are not saved." ) parser.add_argument("-sM", "--save_model_path", metavar="SAVE_MODEL_PATH", default=None, help="Path to save trained model to.") parser.add_argument( "-l", "--load_path", metavar="LOAD_PATH", default=None, help= "Path to load trained model from. If no path is specified model is trained from scratch." ) args = parser.parse_args() parse_args(args) X_shape, y_shape = utils.get_shapes(args.data_path, "train") # Build model input_shape = X_shape[1:] model = nn_model.Model(input_shape, EMB_SIZE, ALPHA, REG_WEIGHT) if args.load_path is not None: model.triplet_model.load_weights(args.load_path) # Setup callbacks for early stopping and model saving callback_list = setup_callbacks(args.save_weights_path) # Compile model adam = Adam(lr=LEARNING_RATE) model.triplet_model.compile(optimizer=adam, loss=custom_loss) model.tower.predict(np.zeros( (1, ) + input_shape)) # predict on some random data to activate predict() # Initializate online triplet generators training_batch_generator = nn_tripletGeneration.OnlineTripletGenerator( args.data_path, "train", model.tower, batch_size=BATCH_SIZE, alpha=ALPHA) validation_batch_generator = utils.DataGenerator(args.data_path, "valid", batch_size=BATCH_SIZE) model.triplet_model.fit_generator( generator=training_batch_generator, validation_data=validation_batch_generator, callbacks=callback_list, epochs=EPOCHS) # Save weights if args.save_weights_path is not None: model.triplet_model.save_weights(args.save_weights_path + "final_weights.hdf5") # Save model if args.save_model_path is not None: model.tower.save(args.save_model_path + "tower_model.hdf5")
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument(dest="data_path", metavar="DATA_PATH", help="Path to read examples from.") parser.add_argument( "-sW", "--save_weights_path", metavar="SAVE_WEIGHTS_PATH", default=None, help= "Path to save trained weights to. If no path is specified checkpoints are not saved." ) parser.add_argument("-sM", "--save_model_path", metavar="SAVE_MODEL_PATH", default=None, help="Path to save trained model to.") parser.add_argument( "-l", "--load_path", metavar="LOAD_PATH", default=None, help= "Path to load trained model from. If no path is specified model is trained from scratch." ) parser.add_argument("--PCA", metavar="PCA", default=False, help="If true, a PCA plot is saved.") parser.add_argument("--TSNE", metavar="TSNE", default=False, help="If true, a TSNE plot is saved.") args = parser.parse_args() parse_args(args) X_shape, y_shape = utils.get_shapes(args.data_path, "train") # Build model input_shape = X_shape[1:] tower_model = build_tower_cnn_model(input_shape) # single input model triplet_model = build_triplet_model(input_shape, tower_model) # siamese model if args.load_path is not None: triplet_model.load_weights(args.load_path) # Setup callbacks for early stopping and model saving callback_list = setup_callbacks(args.save_weights_path) # Compile model adam = Adam(lr=LEARNING_RATE) triplet_model.compile(optimizer=adam, loss=custom_loss) tower_model.predict(np.zeros( (1, ) + input_shape)) # predict on some random data to activate predict() # Initializate online triplet generators training_batch_generator = OnlineTripletGenerator(args.data_path, "train", tower_model, batch_size=BATCH_SIZE, triplet_mode="batch_all") validation_batch_generator = utils.DataGenerator(args.data_path, "valid", batch_size=BATCH_SIZE) triplet_model.fit_generator(generator=training_batch_generator, validation_data=validation_batch_generator, callbacks=callback_list, epochs=EPOCHS) # Save weights if args.save_weights_path is not None: triplet_model.save_weights(args.save_weights_path + "final_weights.hdf5") # Save model if args.save_model_path is not None: tower_model.save(args.save_model_path + "tower_model.hdf5") # Plot PCA/TSNE # TODO: add function in util that reads a specified number of random samples from a dataset. if args.PCA is not False or args.TSNE is not False: X_valid, y_valid = utils.load_examples(args.data_path, "train") X, Y = utils.shuffle_data(X_valid[:, :, :], y_valid[:, :], one_hot_labels=True) X = X[:5000, :, :] Y = Y[:5000, :] X = tower_model.predict(X) if args.PCA: utils.plot_with_PCA(X, Y) if args.TSNE: utils.plot_with_TSNE(X, Y)
def testing(q_table, job_type_arg, latencies_filename=utils.TMP_DIR + '/latency{0}.csv'): print("TST: Starting everything") utils.start_everything() dg = utils.DataGenerator() p = Process(target=dg.play_pattern, args=(job_type_arg, pattern, pattern_file, pattern_timing)) p.start() job_prefix = job_type_arg[2:] topic_name = job_prefix + '_bdapro' aggregator = utils.FlinkJob(argument='--agg', parallelism=1) job = utils.FlinkJob(argument=job_type_arg, parallelism=1) print("TST: Sleeping for 3*60 seconds") time.sleep(3 * 60) latencies = [] times = [] paralls = [] state = get_current_state(topic_name) while p.is_alive(): current_latency = get_current_latency(job_prefix) latencies.append(current_latency) times.append(time.time()) paralls.append(job.parallelism) if current_latency > rescaling_latency: print("TST: We have latency higher than omega latency of", rescaling_latency) temp_state = get_current_state(topic_name) if optimal_parallelism(q_table, temp_state) != job.parallelism: print("TST: Getting stabilized state") state = get_stabilized_state(topic_name, 10) print("TST: Current state:", state) opt_par = optimal_parallelism(q_table, state) print("TST: Optimal parallelism:", opt_par) # add output results for plotting just before changing parallelism latencies.append(get_current_latency(job_prefix)) times.append(time.time()) paralls.append(job.parallelism) job.set_parallelism(opt_par) time.sleep(rescaling_interval) else: print("TST: Latency less than omega latency of", rescaling_latency) # check if parallelism is too high for the current state temp_state = get_current_state(topic_name) if optimal_parallelism(q_table, temp_state) < job.parallelism: while True: # let kafka metrics stabilize for grace_period seconds temp_state = get_current_state(topic_name) time.sleep(stability_period) current_state = get_current_state(topic_name) if current_state >= temp_state: break state = current_state print("TST: Current state:", current_state) opt_par = optimal_parallelism(q_table, current_state) print("TST: Optimal parallelism:", opt_par) if opt_par < job.parallelism: job.set_parallelism(opt_par) else: print("TST: Sleeping for 30 seconds") time.sleep(rescaling_interval) print(dg) df = pd.DataFrame({ 'latency': latencies, 'parallelism': paralls, 'time': times }) df.to_csv(latencies_filename.format(job_type_arg), index=False) dg.stop_all() aggregator.kill() job.kill() utils.stop_everything()
import utils import os import sys import time import json import subprocess import math from multiprocessing import Process import atexit import pandas as pd # This file contains a q-learning based scaler from config import all_jobs, actions, states, train_sleep_time, alpha, cutoff, \ rescaling_latency, rescaling_interval, stability_period, pattern, pattern_timing, qtablefile, pattern_file generator = utils.DataGenerator() @atexit.register def exit_function(): print("EXIT: Stopping Generators") generator.stop_all() print("EXIT: Stopping Everything") utils.stop_everything() def init_Q(policy='zeroes'): if policy == 'zeroes': Q = {} for state in states: Q[state] = [0 for x in actions]