def __init__(self, config, env, state_dim, action_dim): # Get Config self.cf = config # Setting Environment self.env = env self.state_dim = state_dim self.action_dim = action_dim # Setting Replay Memory self.rm = ReplayMemory(self.cf.REPLAY_MEMORY_SIZE, self.cf.FRAME_SIZE, self.cf.AGENT_HISTORY_LENGHTH) # Build Model self.q = build_model(self.cf.FRAME_SIZE, self.action_dim, self.cf.AGENT_HISTORY_LENGHTH) self.target_q = build_model(self.cf.FRAME_SIZE, self.action_dim, self.cf.AGENT_HISTORY_LENGHTH) # Optimizer and Loss for Training self.optimizer = tf.keras.optimizers.Adam( learning_rate=self.cf.LEARNING_RATE, clipnorm=10.) self.loss = tf.keras.losses.Huber() self.q.summary()
def train_process(): # 準備訓練資料 train_dataset = EN2CNDataset(data_path, max_output_len, 'training') # get 50位的英 中 train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) train_iter = infinite_iter(train_loader) # 準備檢驗資料 val_dataset = EN2CNDataset(data_path, max_output_len, 'validation') val_loader = data.DataLoader(val_dataset, batch_size=1) # 建構模型 model, optimizer = build_model(train_dataset.en_vocab_size, train_dataset.cn_vocab_size) # 3922 3805 loss_function = nn.CrossEntropyLoss(ignore_index=0) train_losses, val_losses, bleu_scores = [], [], [] total_steps = 0 while (total_steps < num_steps): # 12000 model, optimizer, loss = train(model, optimizer, train_iter, loss_function, total_steps, summary_steps) # 0 300 train_losses += loss # 檢驗模型 val_loss, bleu_score, result = test(model, val_loader, loss_function) val_losses.append(val_loss) bleu_scores.append(bleu_score) total_steps = total_steps + summary_steps print( "val [{total_steps}] loss: {val_loss:.3f}, Perplexity: {np.exp(val_loss):.3f}, blue score: {bleu_score:.3f}") return train_losses, val_losses, bleu_scores
def compile(self, learning_rate=None): # define inputs, outputs and optimizer of the chosen architecture inputs, outputs, optimizer = network.build_model(self.input_height, self.input_width, self.input_channels, self.vocab_size + 1) # create and compile self.model = Model(inputs=inputs, outputs=outputs) self.model.compile(optimizer=optimizer, loss=self.ctc_loss_lambda_func)
def get_model(load): """ Get the model. """ if load: logging.info("Loading model from %s", load) return network.load(load) else: logging.info("Building a new model") return network.build_model()
def __init__(self, cfg, use_tensorboardx=True): self.cfg = cfg self.output_dir = os.path.join(cfg.output_dir, cfg.desc) self.desc = cfg.desc self.model = build_model(cfg).float() self.loss = build_loss(cfg) self._init_model_device() if self.desc != 'debug' and use_tensorboardx: self.summary_writer = tensorboardX.SummaryWriter( logdir=os.path.join(cfg.output_dir, 'tf_logs')) else: self.summary_writer = None
def __init__(self): self.char_embedd_dim = 30 self.dropout = True self.num_filters = 30 self.num_units = 300 self.grad_clipping = 5.0 self.peepholes = True self.max_sent_length_pos, self.max_char_length_pos, self.num_labels_pos, self.embedd_dim_pos, \ self.alphabet_char_pos, self.alphabet_label_pos, self.char_embedd_table_pos = \ load_config_pos(current_dir + '/pre-trained-model/pos', self.char_embedd_dim) self.max_sent_length_chunk, self.max_char_length_chunk, self.num_labels_chunk, self.embedd_dim_chunk, \ self.alphabet_char_chunk, self.alphabet_label_chunk, self.alphabet_pos_chunk, self.char_embedd_table_chunk = \ load_config_chunk(current_dir + '/pre-trained-model/chunk', self.char_embedd_dim) self.max_sent_length_ner, self.max_char_length_ner, self.num_labels_ner, self.embedd_dim_ner, \ self.alphabet_char_ner, self.alphabet_label_ner, self.alphabet_pos_ner, self.alphabet_chunk_ner, \ self.char_embedd_table_ner = load_config_ner(current_dir + '/pre-trained-model/ner', self.char_embedd_dim) self.pos_model, self.predict_fn_pos = network.build_model(self.embedd_dim_pos, self.max_sent_length_pos, self.max_char_length_pos, self.alphabet_char_pos.size(), self.char_embedd_dim, self.num_labels_pos, self.dropout, self.num_filters, self.num_units, self.grad_clipping, self.peepholes, self.char_embedd_table_pos) set_weights(current_dir + '/pre-trained-model/pos/weights.npz', self.pos_model) self.chunk_model, self.predict_fn_chunk = network.build_model(self.embedd_dim_chunk, self.max_sent_length_chunk, self.max_char_length_chunk, self.alphabet_char_chunk.size(), self.char_embedd_dim, self.num_labels_chunk, self.dropout, self.num_filters, self.num_units, self.grad_clipping, self.peepholes, self.char_embedd_table_chunk) set_weights(current_dir + '/pre-trained-model/chunk/weights.npz', self.chunk_model) self.ner_model, self.predict_fn_ner = network.build_model(self.embedd_dim_ner, self.max_sent_length_ner, self.max_char_length_ner, self.alphabet_char_ner.size(), self.char_embedd_dim, self.num_labels_ner, self.dropout, self.num_filters, self.num_units, self.grad_clipping, self.peepholes, self.char_embedd_table_ner) set_weights(current_dir + '/pre-trained-model/ner/weights.npz', self.ner_model) self.max_sent_length = min(self.max_sent_length_pos, self.max_sent_length_chunk, self.max_sent_length_ner) self.max_char_length = min(self.max_char_length_pos, self.max_char_length_chunk, self.max_char_length_ner)
def test_label_encoder(self): d, _ = load_datasets(1) for s in d.take(1): print(s[0].shape) print(s[1].shape) encoder = LabelEncoder() _, l = encoder.encode_batch( s[0], tf.constant([[0, 1, 0, 1]], shape=[1, 1, 4], dtype=tf.float32), tf.constant([[1]], shape=[1, 1])) print(l.shape) b_init = tf.constant_initializer(-np.log((1 - 0.01) / 0.01)) x = tf.random.normal((s[0].shape)) model = build_model(2) y = model(x) print(y.shape)
form=form, ious=ious) image_precision += precision_at_threshold / n_threshold return image_precision validation_image_precisions = [] iou_thresholds = [x for x in np.arange(0.5, 0.76, 0.05)] config = cfg.get_args() train_data_loader, valid_data_loader = dh.get_data_loaders(config) model, optimizer, history = network.build_model(config) model.eval() for images, targets, image_ids in valid_data_loader: # ... # outputs = model(images) sample = images[0].permute(1, 2, 0).cpu().numpy() preds = outputs[0]['boxes'].data.cpu().numpy() scores = outputs[0]['scores'].data.cpu().numpy() gt_boxes = targets[0]['boxes'].data.cpu().numpy() # preds = ... # shape: (#predicted box, 4) # scores = ... # shape: (#predicted box, ) # gt_boxes = ... # shape: (#ground-truth box, 4) preds_sorted_idx = np.argsort(scores)[::-1]
def main(): print("loading data") data = load_data() model = build_model(data) print("")
# Testset test_files = [filenames[i] for i in test_fnms] X_test, Y_test = fetchDataAndlabels(test_files, annotation, classes) data_test = np.asarray(X_test) label_test = to_categorical(Y_test) data_test.resize(len(data_test), 250, 16, 1) def get_session(gpu_fraction=0.333): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(get_session()) inhaler_model = build_model(data_train.shape[1:], num_classes) inhaler_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) inhaler_model.summary() if fromScratch: inhaler_train = inhaler_model.fit(data_train, label_train, batch_size=batch_size, epochs=epochs, verbose=1) if doStore == True: inhaler_model.save(trainedModel) else: inhaler_model.load_weights(trainedModel)
def __init__(self, cfg): self.cfg = cfg self.features, self.bottleneck_layer, self.classifier = build_model( self.cfg)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True, help='training data') parser.add_argument('--valid', type=str, required=True, help='validation data') parser.add_argument('--feat-dim', default=40, type=int, help='feats dim') parser.add_argument('--n-labels', default=1024, type=int, required=True, help='number of output labels') parser.add_argument('--batch-size', default=64, type=int, help='mini-batch size') parser.add_argument('--epochs', default=10, type=int, help='training epochs') parser.add_argument('--snapshot', type=str, default='./', help='snapshot directory') parser.add_argument('--snapshot-prefix', type=str, default='snapshot', help='snapshot file prefix') parser.add_argument('--learn-rate', type=float, default=1.0e-3, help='initial learn rate') #parser.add_argument('--log-dir', type=str, default='./', # help='tensorboard log directory') parser.add_argument('--units', type=int ,default=16, help='number of LSTM cells') parser.add_argument('--lstm-depth', type=int ,default=2, help='number of LSTM layers') parser.add_argument('--factor', type=float, default=0.5,help='lerarning rate decaying factor') parser.add_argument('--min-lr', type=float, default=1.0e-6, help='minimum learning rate') parser.add_argument('--dropout', type=float, default=0.0, help='dropout') parser.add_argument('--filters', type=int, default=64, help='number of filters for CNNs') parser.add_argument('--max-patience', type=int, default=5, help='max patient') parser.add_argument('--optim', type=str, default='adam', help='optimizer [adam|adadelta]') parser.add_argument('--weights', type=str, default=None, help='network werigts') args = parser.parse_args() inputs = Input(shape=(None, args.feat_dim)) curr_lr = args.learn_rate model = network.build_model(inputs, args.units, args.lstm_depth, args.n_labels, args.feat_dim, curr_lr, args.dropout, args.filters, args.optim) if args.weights is not None: model.load_weights(args.weights, by_name=True) tensorboard = keras.callbacks.TensorBoard( log_dir='./tensorboard', histogram_freq=0, batch_size=args.batch_size, write_graph=True, write_grads=True ) tensorboard.set_model(model) training_generator = generator.DataGenerator(args.data, args.batch_size, args.feat_dim, args.n_labels, shuffle=True) valid_generator = generator.DataGenerator(args.valid, args.batch_size, args.feat_dim, args.n_labels, shuffle=False) ep=0 train_bt=0 val_bt=0 while ep < args.epochs: start_time=time.time() curr_loss = 0.0 curr_samples=0 for bt in range(training_generator.__len__()): #print("batch %d/%d" % (bt+1, training_generator.__len__())) data = training_generator.__getitem__(bt) # data = [input_sequences, label_sequences, inputs_lengths, labels_length] # y (true labels) is set to None, because not used in tensorflow CTC training. # 'train_on_batch' will return CTC-loss value logs = model.train_on_batch(x=data,y=data[1]) _logs=[] _logs.append(logs) train_bt+=bt write_log(tensorboard, ['loss'] , _logs, train_bt) curr_val_cer = [] for bt in range(valid_generator.__len__()): data = valid_generator.__getitem__(bt) # eval_on_batch will return sequence error rate (ser) and label error rate (ler) # the function returns ['loss', 'ler', 'ser'] # 'ler' should not be normalized by true lengths _logs=[] loss, cer, ser = model.evaluate(data) _logs.append(loss[0]) _logs.append(np.mean(np.array(cer))) _logs.append(np.mean(np.array(ser))) val_bt+=bt write_log(tensorboard, ['val_loss', 'val_cer', 'val_ser'], _logs, val_bt) curr_val_cer.append(cer) curr_val_cer = np.mean(curr_val_cer)*100.0 print('Epoch %d (valid) cer=%.4f' % (ep+1, curr_val_cer)) path = os.path.join(args.snapshot,args.snapshot_prefix+'.h5') model.save_weights(path) msg="save the model epoch %d" % (ep+1) ep += 1 print("Training End.")
import keras as k from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint from keras.optimizers import sgd from keras.metrics import binary_accuracy as acc import network import utils import config model = network.build_model() model.summary() opt = sgd(lr=0.01) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[acc]) #callback early_stopping = EarlyStopping(patience=10, verbose=1) model_checkpoint = ModelCheckpoint(config.SAVE_MODEL_PATH, save_best_only=True, monitor='val_acc', mode='max', verbose=1) reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5, min_delta=0.005, mode='max', verbose=1) train_gen = utils.data_generator(config.BATCH_SIZE) val_gen = utils.data_generator(config.BATCH_SIZE, is_train=False)
# vis.eval_and_plot_on_train(train_data_loader, model,detection_threshold) vis.PlotLoss([0, 1, 2, 4], param='learning_rate') exit(1) #choose the relevant device ChooseDevice = 1 # 0=CPU ,1=Check for GPE if ChooseDevice: device = torch.device('cuda:' + str(config.gpu_id)) if torch.cuda.is_available( ) else torch.device('cpu') else: device = torch.device('cpu') print(device) #creating the model, optimizer and documentation model, optimizer, history, lr_scheduler = network.build_model(config, device) #creating split train and validation datasets train_data_loader, valid_data_loader = dh.get_data_loaders(config) class Averager: def __init__(self): self.current_total = 0.0 self.iterations = 0.0 def send(self, value): self.current_total += value self.iterations += 1 @property def value(self):
# Checkpoint is used to resume training. checkpoint_dir = "./checkpoints/" # Save the model for inference later. export_dir = "./exported" # Log directory will keep training logs like loss/accuracy curves. log_dir = "./logs" # A sample image used to log the model's behavior during training. log_image = "./docs/family.jpg" # Setup the model num_classes = 2 model = build_model(num_classes) # Model built. Restore the latest model if checkpoints are available. if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) print("Checkpoint directory created: {}".format(checkpoint_dir)) latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Checkpoint found: {}, restoring...".format(latest_checkpoint)) model.load_weights(latest_checkpoint) print("Checkpoint restored: {}".format(latest_checkpoint)) else: print( "Checkpoint not found. Model weights will be initialized randomly." )
def set_weights(filename, model): with np.load(filename) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(model, param_values) if __name__ == '__main__': start_time = datetime.now() print('Loading data...') word_train, word_dev, word_test, char_train, char_dev, char_test, mask_train, mask_dev, mask_test, \ label_train, label_dev, label_test, alphabet_label, alphabet_char, max_sent_length, max_char_length, \ char_embedd_table, num_labels, num_data = \ create_data_2_train(train_dir, dev_dir, test_dir, char_embedd_dim) print('Building model...') pos_model, input_var, target_var, mask_var, char_input_var, prediction_fn = \ network.build_model(embedd_dim, max_sent_length, max_char_length, alphabet_char.size(), char_embedd_dim, num_labels, dropout, num_filters, num_units, grad_clipping, peepholes, char_embedd_table) print('Training model...') network.train_model(num_data, batch_size, learning_rate, patience, decay_rate, word_train, label_train, mask_train, char_train, word_dev, label_dev, mask_dev, char_dev, word_test, label_test, mask_test, char_test, input_var, target_var, mask_var, char_input_var, pos_model, 'pos', alphabet_label, 'output/pos') print('Saving parameter...') save_config('pre-trained-model/pos/config.ini') end_time = datetime.now() print("Running time:") print(end_time - start_time)
def test(cf, env_name, weights_url, render=False, check_input_frames=False, check_log_plot=False, check_saliency_map=False): # Environment env_name = env_name env = gym.make(env_name) action_dim = env.action_space.n # Build Network model = build_model(cf.FRAME_SIZE, action_dim, cf.AGENT_HISTORY_LENGHTH) model.summary() model.load_weights(weights_url) # Initialize frames, action, done = 0, 0, 0 initial_state = env.reset() state = np.stack([preprocess(initial_state, frame_size=cf.FRAME_SIZE)] * 4, axis=3) state = np.reshape(state, state.shape[:-1]) while not done: frames += 1 # Render if render: env.render() # Interact with Environmnet action = np.argmax(model(normalize(state))) next_state, reward, done, _ = env.step(action) reward = np.clip(reward, -1, 1) next_state = np.append(state[..., 1:], preprocess(next_state, frame_size=cf.FRAME_SIZE), axis=3) state = next_state # Check Input if check_input_frames: test_img = np.reshape( next_state, (cf.FRAME_SIZE, cf.FRAME_SIZE, cf.AGENT_HISTORY_LENGHTH)) test_img = cv2.resize(test_img, dsize=(150, 200), interpolation=cv2.INTER_AREA) cv2.imshow('input image', test_img) cv2.waitKey(0) != ord('l') if cv2.waitKey(25) == ord('q') or done: cv2.destroyAllWindows() # Check Saliency Map if check_saliency_map: # Color Map cm = plt.get_cmap('jet') # Saliency Map grad_img = generate_grad_cam( model, state, 'conv2d_6', output_layer='global_average_pooling2d') grad_img = np.reshape(grad_img, (cf.FRAME_SIZE, cf.FRAME_SIZE)) grad_img = cm(grad_img)[:, :, :3] screen = env.render(mode='rgb_array') screen, grad_img = cv2.resize( screen, dsize=(150, 200)) / 255., cv2.resize(grad_img, dsize=(150, 200)) test_img = cv2.addWeighted(screen, 0.5, grad_img, 0.5, 0, dtype=cv2.CV_32F) test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB) cv2.imshow('saliency map Value', test_img) if cv2.waitKey(25) == ord('q') or done: cv2.destroyAllWindows() # Check Log & Plot if check_log_plot: # Jupter Notebook Matplotlib Setting is_ipython = 'inline' in matplotlib.get_backend() if is_ipython: from IPython import display plt.ion() q = np.array(model(normalize(state))[0]) fig = plot_durations(q, is_ipython) # print(action, q, end='\r') print(action, max(q), min(q), sum(q) / action_dim, end='\r') plt.ioff()
#数据预处理 # process_dir = '/home/data/sfz/data_typhoon/npy_test_0923/' # dataset/npy_testset/ # output_dir = '/home/data/sfz/data_typhoon/png_test_0923/' # ./img_data/test/ # HEADS = ['U', 'V', 'W', 'X', 'Y', 'Z'] # print('Searching for missing data of train dataset...') # filling(process_dir, HEADS) # print('Transfering...') # save2img(process_dir, output_dir, HEADS, 500) # print('Save images OK') # process_dir = '/home/data/sfz/data_typhoon/npy_trainset_all0923/' # dataset/npy_trainset/ # output_dir = '/home/data/sfz/data_typhoon/png_train_all0923/' # img_data/train/ # HEADS = ['A', 'B', 'C', 'D'] # print('Searching for missing data of test dataset...') # filling(process_dir, HEADS) # print('Transfering...') # save2img(process_dir, output_dir, HEADS, 500) # print('Save images OK') #训练序列生成 n_step = 6 row = 55 # 100 col = 55 # 100 channel = 3 #train_data_dir = "/home/data/sfz/data_typhoon/png_train_small"#"data/train1" png_train_small #train_data_dir = "/home/data/sfz/data_typhoon/png_train_all0923" train_data_dir = "/home/data/sfz/data_typhoon/png_train_allA" trainX, trainY, validX, validY = generate.generate_seq(train_data_dir,row = row, col = col) #搭建网络训练模型 model = network.build_model(n_step = n_step, row = row, col = col, channel = channel) history = network.train_model(trainX, trainY, validX, validY, model) time2 = time.time() print('time use:' + str(time2 - time1) + 's')
if __name__ == '__main__': ''' 0. Set Hyperparameters ''' num_epochs = 100 batch_size = 64 shape = 100 folder_path = 'cs-ioc5008-hw1' ''' 1. Load and Preprocess data ''' classes = utilities.load_classes(folder_path) # load classes generators = utilities.load_data(folder_path, batch_size, shape, classes) # load and augment data testX = utilities.load_test(folder_path, shape) # load test data class_weight = utilities.weight_classes(folder_path, classes) # weight classes so that we can later provide # bias to minority classes during training ''' 2. Build and Fit the model ''' model = network.build_model(len(classes), shape) # build model using transfer learning model = network.fit_model(generators, model, num_epochs, batch_size, class_weight) # fit model ''' 3. Restore best model and output predictions ''' model = network.load_weights(model, 'weights.best.hdf5') # load best model found predictions = model.predict(testX) # compute output predictions for Kaggle challenge utilities.output_predictions(predictions, classes, testX)
def main(): filename = sys.argv[1] X = data.load_dataset('{}_X.npy'.format(filename)) Y = data.load_dataset('{}_Y.npy'.format(filename)) model = network.build_model() # vizualize the model network.vizualize_model(model, filename) # 80:20 # print network.train_model(model, (X, Y)) # score = model.evaluate(X, Y, verbose=0) # print 'Test score:', score[0] # K-Fold val_error = [] losses = [] kf = KFold(Y.shape[0], n_folds=FOLDS, shuffle=True, random_state=None) for train_index, val_index in kf: # Generate the dataset for this fold X_train, X_val = X[train_index], X[val_index] Y_train, Y_val = Y[train_index], Y[val_index] print X_train.shape, X_val.shape print Y_train.shape, Y_val.shape # Train the model on this dataset train_history, loss_history = network.train_model( model, (X_train, Y_train), (X_val, Y_val)) # TODO: save the losses to a file. losses.append(loss_history.losses) # Evaluate the model val_error = model.evaluate(X_val, Y_val, verbose=0) print 'Validation error:', val_error # NOTE: hack to run only one split break # Print final K-Fold error print "K-Fold Error: %0.2f (+/- %0.2f)" % (val_error.mean(), val_error.std() * 2) # Predict some labels # TODO: modify this to suit our image needs. counter = 0 while counter < 1: idx = random.choice(xrange(Y.shape[0])) prediction = network.predict_model(model, np.expand_dims(X[idx, :], axis=0)) print 'Testing: sample={}, prediction={}, actual={}'.format( idx, prediction, Y[idx, :]) # save this file data.generate_image(prediction) counter += 1 # dump the model to the file network.save_model(model, filename)
# Make a grid from batch out = torchvision.utils.make_grid(inputs) util.imshow(out, title=[class_names[x] for x in classes]) plt.show() # %% # load existing model model = torch.load('results/models/full_model-resnet18_v1.pt') # %% # or create new model base_model = models.resnet18 model = network.build_model(base_model, class_names, True, False) # %% criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # %% # train the model model = network.train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=1) # %%