def add_to_training_data(posts): """ Given posts (a list of dicts extracted from StackExchange JSON data), add posts to the training data stored in the database. The model is then retrained using all available data. Note: If a post ID is already in the training database, it is updated with the newly-extracted measurements. """ query = "INSERT INTO trainingdata (" query += ', '.join(fields) + ") VALUES " datavecs = [str(tuple(extract_data_vector(item, True, True))) for item in posts] query += ",\n".join(datavecs) query += " ON DUPLICATE KEY UPDATE " query += ','.join(["{0}=VALUES({0})".format(field) for field in fields[1:]]) query += ';\n' f = open('dbase.conf', 'r') dbase, user, passwd = f.readline().rstrip().split(',') f.close() conn = pymysql.connect(user=user, passwd=passwd, db=dbase) cur = conn.cursor() count = cur.execute(query) conn.commit() print("Successfully merged {} entries!".format(count)) cur.close() conn.close() model.build_model()
def main(): import model models = [model.build_model('cpu') for _ in range(5)] for i, m in enumerate(models): m.load_weights('weights-' + str(i) + '.09.hdf5') cluster_to_data = load_testing_data() for cluster, data in cluster_to_data.iteritems(): X, flops, y = data new_flops = [np.zeros((flop.shape[0], INPUT_LENGTH, flop.shape[1])) for flop in flops] # flops: (player, hand, board) # new_flops: (player, hand, actions, board) # X: (player, hand, actions, action) for i, player in enumerate(zip(flops, new_flops, X)): for j, (flop, new_flop, X_hand) in enumerate(zip(*player)): for k, v in enumerate(X_hand): if v[15] == 1: # determine if flop has been reached break new_flops[i][j] = np.concatenate((np.zeros((k, flop.shape[0])), np.tile(np.expand_dims(flop, 0), (INPUT_LENGTH - k, 1)))) flops = [x.astype(int) for x in new_flops] losses, individual_losses = evaluate(models, X, flops, y) print ("Total Cluster Loss for {n} players: {val}" .format(n=len(X), val=losses)) print "Total Individual Losses:", individual_losses
def load_model(model_dir, model_weights=None): config_json = json.load(open(model_dir + '/config.json')) model_json = json.load(open(model_dir + '/model.json')) model_json.update(config_json) if 'model_cfg' in model_json: for k,v in model_json['model_cfg']: model_json[k] = v if model_weights: if isinstance(model_weights, bool) and model_weights is True: # Original behavior: only one weights file exists and it has # the weights from the best epoch. model_json['model_weights'] = model_dir + '/model.h5' elif isinstance(model_weights, str): # New behavior: multiple weights files may exist. if os.path.exists(model_dir + '/' + model_weights): model_json['model_weights'] = model_dir + '/' + model_weights else: model_json['model_weights'] = model_weights else: raise ValueError('unexpected type for "model_weights" %s (%s)' % (model_weights, type(model_weights))) # Re-instantiate ModelConfig using the updated JSON. sys.path.append(model_dir) from model import build_model model_cfg = ModelConfig(**model_json) model = build_model(model_cfg) return model, model_cfg
def main(): print 'Loading model...' labels_df = pd.read_csv('labels.csv') num_labels = len(labels_df['label'].unique()) mdl = build_model(num_labels) mdl.initialize() mdl.load_params_from('models/cnn_handle_frac_last.pkl') test_df = pd.read_csv('test_imgs.csv') preds = [] for filename in test_df['filename']: latex = Latex2Code(mdl, labels_df, verbose=False) print 'Predicting', filename img = cv2.imread(filename) preds.append(latex.to_latex(img)) y_true = [s.replace(' ', '') for s in test_df['equation']] preds_new = [s.replace(' ', '') for s in preds] y_true = np.array(y_true) preds_new = np.array(preds_new) print 'Accuracy =', 1. * sum(y_true == preds_new) / len(y_true) test_df['preds'] = preds test_df.to_csv('out.csv')
def joblib_wrapper(historical_games_trunc, all_stats, bet_info, historical_games_by_tuple, tunable_param_list): (moving_averages, transform_params, n_estimators, min_samples_split, min_samples_leaf, bet_threshold) = tunable_param_list print 'Building model...' X, y = model.build_model_inputs(historical_games_trunc, all_stats, moving_averages, transform_params) the_model = model.build_model(X, y, n_estimators=n_estimators, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf) print 'Evaluating model...' winnings = evaluator.evaluate_model(the_model, all_stats, bet_info, historical_games_by_tuple, moving_averages, transform_params, bet_threshold) return winnings
def work(): config_dict = yaml.load(open(sys.argv[1], 'r')) print config_dict if config_dict['working_mode'] == 'train_new': train, valid, alphabet = build_datasets(config_dict) generator, cost = build_model(len(alphabet), config_dict) algorithm = build_algorithm(generator, cost, config_dict) extensions = build_extensions(cost, algorithm, valid, config_dict) main_loop = MainLoop(algorithm=algorithm, data_stream=train, model=Model(cost), extensions=extensions) main_loop.run() elif config_dict['working_mode'] == 'train_resume': # TODO pass
def __init__(self, config_dict): print config_dict train, valid, alphabet = build_datasets(config_dict) generator, cost = build_model(len(alphabet), config_dict) algorithm = build_algorithm(generator, cost, config_dict) extensions = build_extensions(cost, algorithm, valid, config_dict) main_loop = MainLoop(algorithm=algorithm, data_stream=train, model=Model(cost), extensions=extensions) ml = Load(config_dict['checkpoint_path'], load_log=True) ml.load_to(main_loop) generator = main_loop.model.get_top_bricks()[-1] self.numbers_from_text = pickle.load(open(config_dict['dict_path'])) x = tensor.lmatrix('sample') cost_cg = generator.cost(x) self.cost_f = theano.function([x], cost_cg)
def test(X_data, Y_data, activation, job_dir, device_name): with tf.Graph().as_default() as v_graph: (x, y_), _, cost, error_rate, saver = \ build_model(activation, \ is_learning=False, enable_bn = True, device_name = device_name) with tf.Session(graph=v_graph) as sess: saver.restore(sess, job_dir) cost, acc = sess.run(fetches=[cost, error_rate], feed_dict={ x: X_data, y_: Y_data }) print("Test error_rate: %g" % (acc)) print("Test cost: %g" % (cost))
def test_learning_rate(lrs=[0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]): X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets() results = {} for i in range(len(lrs)): temp_results = [] for j in range(20): model = build_model(layers_neurons=[32, 4, 1], lr=lrs[i]) history = model.fit(X_train, y_train, epochs=20, batch_size=10) temp_results.append(history.history["loss"][-1]) results[lrs[i]] = mean(temp_results) with open('results.json', "r") as file: data = json.load(file) data["learning_rate"] = results with open('results.json', "w") as file: json.dump(data, file, indent=4) return results
def test_second_layer_size(max_size=6): X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets() results = {} for i in range(max_size): temp_results = [] for j in range(20): model = build_model(layers_neurons=[2**(max_size - 1), 2**i, 1]) history = model.fit(X_train, y_train, epochs=20, batch_size=10) temp_results.append(history.history["loss"][-1]) results[2**i] = mean(temp_results) with open('results.json', "r") as file: data = json.load(file) data["second_layer_size"] = results with open('results.json', "w") as file: json.dump(data, file, indent=4) return results
def plot_latent_space(weightsfile): print('building model') layers = model.build_model() batch_size = 128 decoder_func = theano_funcs.create_decoder_func(layers) print('loading weights from %s' % (weightsfile)) model.load_weights([ layers['l_decoder_out'], layers['l_discriminator_out'], ], weightsfile) # regularly-spaced grid of points sampled from p(z) Z = np.mgrid[2:-2.2:-0.2, -2:2.2:0.2].reshape(2, -1).T[:, ::-1].astype(np.float32) reconstructions = [] print('generating samples') for idx in get_batch_idx(Z.shape[0], batch_size): Z_batch = Z[idx] X_batch = decoder_func(Z_batch) reconstructions.append(X_batch) X = np.vstack(reconstructions) X = X.reshape(X.shape[0], 28, 28) fig = plt.figure(1, (12., 12.)) ax1 = plt.axes(frameon=False) ax1.get_xaxis().set_visible(False) ax1.get_yaxis().set_visible(False) plt.title('samples generated from latent space of autoencoder') grid = ImageGrid( fig, 111, nrows_ncols=(21, 21), share_all=True) print('plotting latent space') for i, x in enumerate(X): img = (x * 255).astype(np.uint8) grid[i].imshow(img, cmap='Greys_r') grid[i].get_xaxis().set_visible(False) grid[i].get_yaxis().set_visible(False) grid[i].set_frame_on(False) plt.savefig('latent_train_val.png', bbox_inches='tight')
def test(): logger = logging.getLogger('MobileNetReID.test') # prepare dataloader train_loader, val_loader, num_query, num_class = make_data_loader(cfg) # prepare model model = build_model(cfg, num_class) # load param ckpt_path = cfg.OUTPUT.ROOT_DIR + cfg.OUTPUT.CKPT_DIR + cfg.TEST.BEST_CKPT if os.path.isfile(ckpt_path): model.load_param(ckpt_path) else: logger.info("file: {} is not found".format(ckpt_path)) exit(1) use_gpu = cfg.MODEL.DEVICE == 'cuda' device = cfg.MODEL.DEVICE_ID if use_gpu: model = nn.DataPararallel(model) model.to(device) model.eval() metrics = R1_mAP(num_query, use_gpu=use_gpu) with torch.no_grad(): for batch in val_loader: data, pids, camids = batch if use_gpu: imgs.to(device) feats = model(imgs) metrics.update(feats, labels, camids) cmc, mAP = metrics.compute() logger.info("test result as follows") logger.info("mAP:{:2%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC cure, Rank-{:<3}:{:2%}".format(r, cmc[r - 1])) print("test is endding")
def train(cfg): # prepare dataset train_loader, val_loader, test_loader, classes_list = make_data_loader( cfg, for_train=True) # build model and load parameter model = build_model(cfg) if cfg.SOLVER.SCHEDULER.RETRAIN_FROM_HEAD == True: if cfg.TRAIN.TRICK.PRETRAINED == True: model.load_param("Base", cfg.TRAIN.TRICK.PRETRAIN_PATH) else: if cfg.TRAIN.TRICK.PRETRAINED == True: model.load_param("Overall", cfg.TRAIN.TRICK.PRETRAIN_PATH) train_loader.dataset.batch_converter = model.backbone_batch_converter val_loader.dataset.batch_converter = model.backbone_batch_converter test_loader.dataset.batch_converter = model.backbone_batch_converter # build loss function loss_func, loss_class = build_loss(cfg) print('Train with losses:', cfg.LOSS.TYPE) # build optimizer (based on model) optimizer = build_optimizer(cfg, model, bias_free=cfg.MODEL.BIAS_FREE) #loss里也可能有参数 print("Model Bias-Free:{}".format(cfg.MODEL.BIAS_FREE)) print('Train with the optimizer type is', cfg.SOLVER.OPTIMIZER.NAME) # build scheduler (based on optimizer) scheduler, start_epoch = build_scheduler(cfg, optimizer) # build and launch engine for training do_train( cfg, model, train_loader, val_loader, classes_list, optimizer, scheduler, loss_func, start_epoch, )
def translate(src_text, config, model_path, beam=5): """ Translate from a source language to a target language using the model at `model_path` whose config is described by the config at `config`. The translation uses a beam search of width `beam`. """ params, project_dir = \ parse_config(config, batch_size=1) # Tokenize the sentence. p = Popen([ 'perl', 'data/moses/tokenizer/tokenizer.perl', '-threads', '8', '-a', '-l', 'fr' ], stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = p.communicate(src_text.encode('utf-8')) stdout = stdout.decode('utf-8') # Build PyTorch model. model, src_vocab, tgt_vocab = build_model(params, project_dir) # Load saved model. if params['cpu']: device = torch.device('cpu') else: device = torch.device(params['gpu_ids'][0]) load_model(model, model_path, device) # Prepare input vector. src_toks = src_vocab.to_ints(stdout)[:MAX_LENGTH] src_data = torch.tensor([src_toks]).to(device) max_tgt_length = min(MAX_LENGTH, int(max(len(src_toks) * 1.5, len(src_toks) + 3))) # Beam search. out_data = beam_search(model, src_data, beam, max_tgt_length) out_text = tgt_vocab.to_text(out_data)[0] print(out_text)
def run(item_fp, bs=512, save_dir=None, fold=None, model_path=None, num_epochs=10, st_epoch=0, stop_window=3, test=False, gene_em=False): items = pd.read_csv(item_fp) num_class = items['label'].nunique() lg.info('item shape: %s, num_class:%s', items.shape, num_class) if save_dir is None: save_dir = '{}_{}'.format( 'result', datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) if not os.path.exists(save_dir): os.mkdir(save_dir) seed = 42 random.seed(seed) np.random.seed(seed) model_ft = build_model(num_class, embedding_dim) if model_path is not None: model_ft.load_state_dict(torch.load('{}'.format(model_path))) train_model(items, model_ft, save_dir, num_epochs=num_epochs, st_epoch=st_epoch, stop_window=stop_window, bs=bs, test=test) # reset for next fold model_path = None st_epoch = 0 if test: lg.info('just test, return')
def plot_latent_space(weightsfile): print('building model') layers = model.build_model() batch_size = 128 decoder_func = theano_funcs.create_decoder_func(layers) print('loading weights from %s' % (weightsfile)) model.load_weights([ layers['l_decoder_out'], layers['l_discriminator_out'], ], weightsfile) # regularly-spaced grid of points sampled from p(z) Z = np.mgrid[2:-2.2:-0.2, -2:2.2:0.2].reshape(2, -1).T[:, ::-1].astype(np.float32) reconstructions = [] print('generating samples') for idx in get_batch_idx(Z.shape[0], batch_size): Z_batch = Z[idx] X_batch = decoder_func(Z_batch) reconstructions.append(X_batch) X = np.vstack(reconstructions) X = X.reshape(X.shape[0], 28, 28) fig = plt.figure(1, (12., 12.)) ax1 = plt.axes(frameon=False) ax1.get_xaxis().set_visible(False) ax1.get_yaxis().set_visible(False) plt.title('samples generated from latent space of autoencoder') grid = ImageGrid(fig, 111, nrows_ncols=(21, 21), share_all=True) print('plotting latent space') for i, x in enumerate(X): img = (x * 255).astype(np.uint8) grid[i].imshow(img, cmap='Greys_r') grid[i].get_xaxis().set_visible(False) grid[i].get_yaxis().set_visible(False) grid[i].set_frame_on(False) plt.savefig('latent_train_val.png', bbox_inches='tight')
def query(image_name, username="******", dataset="market1501", model_name="ssnetv4"): path = data_root + "{}/{}/query/".format(username, dataset) #merge the config file config_file = root + "/configs/{}".format(configs[model_name]) cfg.merge_from_file(config_file) # 更新chekpoint cfg.MODEL.PRETRAIN_PATH = cfg.MODEL.PRETRAIN_PATH + "{}/{}.pth".format( dataset, model_name) img_path = path + image_name print("query:", img_path) # 1、数据准备 query = make_batch_data(cfg, [img_path]) # return a list of data # print(len(res)) # 2、准备模型 model = build_model(cfg, numids[dataset]) # print(model) imgs, pids, camids, paths = query # img = img.unsqueeze(0) img = imgs[0] # print(img.size()) # 设置为eval 模式 model.eval() result = model(img) query[0] = result # create the ranker ranker = Top5(username, data_root) # set the data ranker.set_gallery(dataset, model_name) ranker.set_query(query) result = ranker.compute() return result
def main(cfg): torch.cuda.empty_cache() torch.manual_seed(cfg.param.seed) # Training settings cwd = Path(hydra.utils.get_original_cwd()) wsi_dir = cwd/cfg.dir.wsi patch_dir = cwd/cfg.dir.patch ckpt = Checkpoint( cwd, cfg.gpus, cfg.dir.resume, cfg.dir.save_to, cfg.log.save_model) device = torch.device( f"cuda:{cfg.gpus[0]}" if cfg.gpus[0] != -1 else "cpu") model = build_model(gpus=cfg.gpus) optimizer = RAdam(model.parameters(), lr=cfg.param.lr) scheduler = StepLR(optimizer, step_size=1, gamma=cfg.param.gamma) if cfg.dir.resume: model, optimizer, scheduler = ckpt.load_state( model, optimizer, scheduler) criterion = get_loss_fn() train_wsi, test_wsi = split_wsi( wsi_dir, ckpt.save_to, cwd, ratio=cfg.data.ratio, projects=cfg.data.projects, strategies=cfg.data.strategies, limit=cfg.data.limit) for epoch in range(ckpt.start_epoch, cfg.param.epochs + 1): split_data( patch_dir, ckpt.save_to, train_wsi, test_wsi, cfg.data.chunks, epoch, cfg.dir.resume) for chunk in range(ckpt.start_chunk, cfg.data.chunks): data_loader = get_loaders( cfg.param.batch_size, ckpt.save_to, chunk, cfg.gpus) train( model, device, data_loader, optimizer, scheduler, criterion, epoch, cfg.param.epochs, chunk, cfg.data.chunks, ckpt) ckpt.start_chunk = 0 scheduler.step() ckpt.save(model, optimizer, scheduler, epoch, chunk, loss=False) ckpt.close_writer()
def model_train(bertvec, y): model = build_model(maxlen) model.summary() best_model_path = 'model/keras_bert.h5' adlearningRate = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='min', epsilon=0.0001, cooldown=0, min_lr=0) earlyStopping = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='max') saveBestModel = ModelCheckpoint(best_model_path, save_weights_only=True, monitor='val_acc', verbose=1, save_best_only=True, mode='max') tensorboard = TensorBoard(log_dir='tensorboard', histogram_freq=0, write_graph=True, write_grads=False, write_images=True) model.fit( bertvec, y, batch_size=64, epochs=1, validation_split=0.2, shuffle=True, callbacks=[tensorboard, earlyStopping, saveBestModel, adlearningRate]) # weight to json model_json = model.to_json() with open("model/weight/model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model/weight/model.h5") print("Saved model to disk")
def evaluation(cfg, dataset='val'): model = build_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) # load last checkpoint assert cfg.MODEL.WEIGHTS is not "" model.load_state_dict(torch.load(cfg.MODEL.WEIGHTS)) # build the dataloader dataloader = make_data_loader(cfg, dataset) # start the inferring procedure do_evaluation( cfg, model, dataloader, device, verbose=True )
def pre_load_model(self, ml_mode=ML_MODE_COLORIZE): if (ml_mode == ML_MODE_COLORIZE): model_weights_path = 'models/model.06-2.5489.hdf5' self.model = build_model() self.model.load_weights(model_weights_path) print(self.model.summary()) # Load the array of quantized ab value self.q_ab = np.load("data/pts_in_hull.npy") self.nb_q = self.q_ab.shape[0] # Fit a NN to q_ab self.nn_finder = nn.NearestNeighbors(n_neighbors=nb_neighbors, algorithm='ball_tree').fit( self.q_ab) elif (ml_mode == ML_MODE_SUPER_RES): print("Super res not yet implimented in TF_COLORISE class") else: print("Invalid ML Mode: ", ml_mode)
def train(posFastaFile, negFastaFile, posValFasta, negValFasta, parameters): print "Reading input files..." positives = fasta.load_fasta(posFastaFile, parameters['min_length']) negatives = fasta.load_fasta(negFastaFile, parameters['min_length']) valpos = fasta.load_fasta(posValFasta, parameters['min_length']) valneg = fasta.load_fasta(negValFasta, parameters['min_length']) train = positives, negatives val = valpos, valneg print "Building new model..." mRNN = model.build_model(parameters['weights'], parameters['embedding_size'], parameters['recurrent_gate_size'], 5, parameters['dropout']) print inspect.getmodule(mRNN.__class__) print "Training model..." mRNN = model.train_model(mRNN, train, val, parameters['epochs'], parameters['output'], parameters['max_length'], parameters['save_freq'], parameters['early_stopping']) return mRNN
def main(): # 指定GPU set_gpu() # 加载数据 train, valid = load_train_data() test_images, test_visits = load_test_data() # 构建模型 model = build_model(num_classes) # 训练模型 callbacks = set_callbacks(model_path) train_model(model, train, valid, callbacks, batch_size, epochs) # 评估模型 eval_model(model, valid) # 预测结果 predict_model(model, test_images, test_visits, test_file_pre_npy_path, result_data_path)
def main(): args = get_arguments() model_weight_path = Path(args.path) if not model_weight_path.exists(): raise FileExistsError(model_weight_path) output_path = Path(args.output_path) if not output_path.exists(): raise FileExistsError(output_path) width, height = 224, 224 num_channels = 3 num_classes = len(USE_LABELS) input_shapes = (height, width, num_channels) base_model = resnet_v2.ResNet101V2(include_top=False, weights='imagenet', input_shape=input_shapes) model = build_model(base_model, n_classes=num_classes) model.load_weights(str(model_weight_path)) tf.keras.backend.clear_session() model.save(str(output_path), save_format="tf")
def build_model(self): self.net_bone = build_model(base_model_cfg) if self.config.cuda: self.net_bone = self.net_bone.cuda() self.net_bone.eval() # use_global_stats = True self.net_bone.apply(weights_init) if self.config.mode == 'train': if self.config.load_bone == '': if base_model_cfg == 'vgg': self.net_bone.base.load_pretrained_model(torch.load(self.config.vgg)) elif base_model_cfg == 'resnet': self.net_bone.base.load_state_dict(torch.load(self.config.resnet)) if self.config.load_bone != '': self.net_bone.load_state_dict(torch.load(self.config.load_bone)) self.lr_bone = p['lr_bone'] self.lr_branch = p['lr_branch'] self.optimizer_bone = Adam(filter(lambda p: p.requires_grad, self.net_bone.parameters()), lr=self.lr_bone, weight_decay=p['wd']) self.print_network(self.net_bone, 'trueUnify bone part')
def main(): args = parse_args() callbacks = None if args.save: logdir = 'logdir/{}_{:03d}'.format("yelp_photos", len(glob.glob('logdir/*'))) print('Saving to {}'.format(logdir)) callbacks = [ keras.callbacks.ModelCheckpoint( os.path.join(logdir, 'mobilenetv2.h5')), keras.callbacks.TensorBoard(log_dir=logdir) ] ds_train, train_info = build_yelp_dataset(split='train', image_shape=(args.res, args.res), rotate=True, batch_size=args.batch_size) ds_test, test_info = build_yelp_dataset(split='test', image_shape=(args.res, args.res), rotate=True, batch_size=args.batch_size) model = build_model(base_weights=args.base_model, classes=train_info["classes"], input_shape=(args.res, args.res, 3), full_weights=args.full_model) model.compile(optimizer=keras.optimizers.Adam(learning_rate=args.lr), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(ds_train, callbacks=callbacks, epochs=args.epochs, steps_per_epoch=train_info["length"], validation_data=ds_test, validation_steps=test_info["length"]) if args.save: model.layers[0].save(os.path.join(logdir, 'mobilenetv2_base.h5'))
def eval(cfg, target_set_name="test"): # prepare dataset train_loader, val_loader, test_loader, classes_list = make_data_loader(cfg, for_train=False) num_classes = len(classes_list) # build model and load parameter model = build_model(cfg) model.load_param("Overall", cfg.TEST.WEIGHT) # build loss function loss_func, loss_class = build_loss(cfg) print('Eval with losses:', cfg.LOSS.TYPE) # input data_loader if target_set_name == "train": input_data_loader = train_loader elif target_set_name == "valid": input_data_loader = val_loader elif target_set_name == "test": input_data_loader = test_loader else: raise Exception("Wrong Dataset Name!") # build and launch engine for evaluation metrics = do_inference(cfg, model, input_data_loader, classes_list, loss_func, target_set_name=target_set_name, plotFlag=True) # logging with tensorboard summaryWriter model_epoch = cfg.TEST.WEIGHT.split('/')[-1].split('.')[0].split('_')[-1] model_iteration = len(train_loader) * int(model_epoch) if model_epoch.isdigit() == True else 0 writer_test = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/eval_" + target_set_name) writer_test.add_scalar("MSE", metrics["mse"], model_iteration) writer_test.close()
def train(config, experiment_name=None): num_classes = config.MODEL.NUM_CLASSES # dataloader for training train_period = 'train' train_loader = build_dataloader(cfg=config, period=train_period, loader_type='train') val_loader = build_dataloader(cfg=config, period=train_period, loader_type='val') # prepare model model = build_model(cfg=config) print('The loss type is', config.MODEL.LOSS_TYPE) loss_func = build_loss(config, num_classes) optimizer = build_optimizer(config, model) # Add for using self trained model if config.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( config.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = config.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict(torch.load(config.MODEL.PRETRAIN_PATH)) optimizer.load_state_dict(torch.load(path_to_optimizer)) scheduler = WarmUpMultiStepLR(optimizer, config.SOLVER.STEPS, config.SOLVER.GAMMA, config.SOLVER.WARMUP_FACTOR, config.SOLVER.WARMUP_ITERS, config.SOLVER.WARMUP_METHOD) print('------------------ Start Training -------------------') do_train(config, model, train_loader, val_loader, optimizer, scheduler, loss_func, experiment_name) print('---------------- Training Completed ---------------- ')
def main(): # set mode try: mode = sys.argv[1] assert (mode == 'dnn' or mode == 'cnn') except: print('Error: Model mode not found') exit() # set parameters # TODO choose good numbers of batch size and epoch batch = 32 epoch = 20 # load data tr_feats, te_feats, tr_labels, te_labels = read_dataset() # data augmentation # TODO set up the parameters for 'ImageDataGenerator' augment_gen = ImageDataGenerator() origin_gen = ImageDataGenerator() # build model emotion_classifier = model.build_model(mode) # start training emotion_classifier.fit_generator(augment_gen.flow(tr_feats, tr_labels, batch_size=batch, seed=0), steps_per_epoch=len(tr_feats) // batch, validation_data=origin_gen.flow( te_feats, te_labels, batch_size=batch, seed=0), validation_steps=len(te_feats) // batch, epochs=epoch) # save model emotion_classifier.save_weights(mode + '.h5')
def train(notes, char_to_idx, uniqueNotesLen, epochs=100, save_freq=10): #model_architecture model = build_model(BATCH_SIZE, SEQ_LENGTH, vocab_size) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #Train data generation T = np.asarray( [char_to_idx[c] for c in notes], dtype=np.int32) #convert complete text into numerical indices #T_norm = T / float(uniqueNotesLen) print("Length of text:" + str(T.size)) print("Length of unique test: ,", uniqueNotesLen) steps_per_epoch = (len(notes) / BATCH_SIZE - 1) / SEQ_LENGTH print('Steps per epoch : ', steps_per_epoch) log = TrainLogger('training_log.csv') for epoch in range(epochs): print('\nEpoch {}/{}'.format(epoch + 1, epochs)) losses, accs = [], [] msg = "" for i, (X, Y) in enumerate(read_batches(T, vocab_size)): print(X) loss, acc = model.train_on_batch(X, Y) print('Batch {}: loss = {}, acc = {}'.format(i + 1, loss, acc)) losses.append(loss) accs.append(acc) log.add_entry(np.average(losses), np.average(accs)) if (epoch + 1) % save_freq == 0: save_weights(epoch + 1, model) print('Saved checkpoint to', 'weights.{}.h5'.format(epoch + 1))
def main(): rmtree('out') makedirs('out') parser = argparse.ArgumentParser() parser.add_argument('content_image') parser.add_argument('style_image') args = parser.parse_args() content_input = matrix_from_image_file(args.content_image, 0.1) style_input = matrix_from_image_file(args.style_image) model = build_model(layers, content_input, style_input) wp = WeightProvider() for i in range(10000): content_weights, style_weights = wp.get_weights() print('Using weights:\n{}\n{}'.format(content_weights, style_weights)) outfile = f'out/run_{i:0>3}.png' rel_content_weights = [ w * CONTENT_WEIGHT_MULTIPLIER for w in content_weights ] img, losses = run_model(model, RUNS_PER_EPOCH, content_input.shape, rel_content_weights, style_weights) img.save(outfile) r = { 'img': outfile, 'layers': package_layers( zip(layers, content_weights, losses[:8], style_weights, losses[8:])), 'parts': ['content', 'style'], } with open(RESULTS_FILE, 'w') as fh: dump(r, fh)
def plot_autoencoder(weightsfile): print('building model') layers = model.build_model() batch_size = 128 print('compiling theano function') encoder_func = theano_funcs.create_encoder_func(layers) print('loading weights from %s' % (weightsfile)) model.load_weights([ layers['l_decoder_out'], layers['l_discriminator_out'], ], weightsfile) print('loading data') X_train, y_train, X_test, y_test = utils.load_mnist() train_datapoints = [] print('transforming training data') for train_idx in get_batch_idx(X_train.shape[0], batch_size): X_train_batch = X_train[train_idx] train_batch_codes = encoder_func(X_train_batch) train_datapoints.append(train_batch_codes) test_datapoints = [] print('transforming test data') for test_idx in get_batch_idx(X_test.shape[0], batch_size): X_test_batch = X_test[test_idx] test_batch_codes = encoder_func(X_test_batch) test_datapoints.append(test_batch_codes) Z_train = np.vstack(train_datapoints) Z_test = np.vstack(test_datapoints) plot(Z_train, y_train, Z_test, y_test, filename='adversarial_train_val.png', title='projected onto latent space of autoencoder')
def test_eval(): data_root = "data_dir" dataset = AudiobookDataset(data_root) if hp.input_type == 'raw': collate_fn = raw_collate elif hp.input_type == 'bits': collate_fn = discrete_collate else: raise ValueError("input_type:{} not supported".format(hp.input_type)) data_loader = DataLoader(dataset, collate_fn=collate_fn, shuffle=True, num_workers=0, batch_size=hp.batch_size) device = torch.device("cuda" if use_cuda else "cpu") print("using device:{}".format(device)) # build model, create optimizer model = build_model().to(device) evaluate_model(model, data_loader)
def main(f=None): args = parser.parse_args() if args.cmd_type == "embeddings": build_embeddings(args.data, args.output) elif args.cmd_type == "train": print("Loading embeddings ...") embeddings = EmbeddingsData.load(args.embeddings_data_file) print("Loading the data ...") the_data = Data.make_data(args.train_file, args.dev_file, args.batch_size) print("Building the model ...") model = build_model(embeddings, args.batch_size) train_sess = Training.make_training(model, the_data, args.epoch_count) while train_sess.has_more_epochs(): print("Next epoch ...") train_sess.next_epoch() # TODO save model else: # ? exit(1) return 0
def test_space(spaces, remove_bad_topologies=True): pp = pprint.PrettyPrinter(indent=4) for i, space in enumerate(spaces): logm(f'Testing space [{i+1} of {len(spaces)}]', cur_frame=currentframe(), mtype='I') pp.pprint(space) try: K.clear_session() model = build_model(conf, space, input_shape=(SPEC_SHAPE_HEIGTH, SPEC_SHAPE_WIDTH, CHANNELS)) except ValueError as err: logm(f'Failed when building the model: {str(err)} ', cur_frame=currentframe(), mtype='I') if remove_bad_topologies: del space continue return spaces
def test_momentum(m=[0.99, 0.97, 0.95, 0.93, 0.91]): X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets() results = {} for i in range(len(m)): temp_results = [] for j in range(20): model = build_model(layers_neurons=[32, 4, 1], lr=0.001, momentum=m[i]) history = model.fit(X_train, y_train, epochs=20, batch_size=10) temp_results.append(history.history["loss"][-1]) results[m[i]] = mean(temp_results) with open('results.json', "r") as file: data = json.load(file) data["momentum"] = results with open('results.json', "w") as file: json.dump(data, file, indent=4) return results
def train(self): '''训练模型''' # number_of_epoch = len(self.files_content) // self.config.batch_size number_of_epoch = 50 builded_model = model.build_model(self.config, self.num2word, self.words) self.model = builded_model self.model.summary() history = self.model.fit_generator( generator=self.data_generator(), verbose=True, steps_per_epoch=self.config.batch_size, epochs=number_of_epoch, callbacks=[ keras.callbacks.ModelCheckpoint(self.config.weight_file, save_weights_only=False), LambdaCallback(on_epoch_end=self.generate_sample_result) ]) utils.result_image(history)
def test_train_model(self): """ Test if function returns trained model """ texts, labels = preprocess_labels(data_dir_path="data/mock_aclImdb", dataset="train") vectorized_texts, word_index = tokenize_data(texts) mock_X_train, mock_y_train, mock_X_val, mock_y_val = split_data( vectorized_texts, labels) mock_embedding_matrix = pickle.load( open("models/mock_glove.6B/mock_embedding_matrix.p", "rb")) mock_model = build_model(mock_embedding_matrix) mock_trained_model = train_model(mock_model, (mock_X_train, mock_y_train), (mock_X_val, mock_y_val)) self.assertIsNotNone(mock_trained_model[1], "no model trained") self.assertIsNotNone(mock_trained_model[0], "history dict doesn't exist")
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) rng = np.random.RandomState(args.seed) json_cfg = load_model_json(args, x_train=None, n_classes=None) model_cfg = ModelConfig(**json_cfg) if args.verbose: print("model_cfg " + str(model_cfg)) sys.path.append(args.model_dir) import model from model import build_model, fit_model, load_train, load_validation train_data = load_train(args, model_cfg) validation_data = load_validation(args, model_cfg) if args.verbose: print("loading model") model = build_model(model_cfg, train_data, validation_data) fit_model(model, train_data, validation_data, args)
def get_prediction_function(feature_layer = None): ''' Get prediction function (C3D and Video2GIF combined) @param feature_layer: a layer name (see model.py). If provided, pred_fn returns (score, and the activations at feature_layer) @return: theano function that scores sniplets ''' print('Load weights and compile model...') # Build model net= model.build_model(batch_size=2) # Set the weights (takes some time) model.set_weights(net['score'],config.get('paths','c3d_weight_file'),config.get('paths','video2gif_weight_file')) layer='score' prediction = lasagne.layers.get_output(net[layer], deterministic=True) if feature_layer: features = lasagne.layers.get_output(net[feature_layer], deterministic=True) pred_fn = theano.function([net['input'].input_var], [prediction, features], allow_input_downcast = True) else: pred_fn = theano.function([net['input'].input_var], prediction, allow_input_downcast = True) return pred_fn
def trainer(data='coco', #f8k, f30k, coco margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size = 128, saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz', validFreq=100, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print 'Loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Creating dictionary' worddict = build_dictionary(train[0]+dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl'%saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Building sentence encoder' trng, inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr = 0. n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc ls = encode_sentences(curr_model, dev[0]) lim = encode_images(curr_model, dev[1]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving...', params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
import torchfile import numpy as np import time, sys from model import build_model from util import * # constants width = 128 loss_lambda = 0.1 checkpoint_dir = sys.argv[1] # model # grasp_class_prediction, depth_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width) grasp_class_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width) depth_image_ph = tf.placeholder('float', [None, width, width, 1]) grasp_class_ph = tf.placeholder('int64', [None]) # loss grasp_class_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit, grasp_class_ph), name='grasp_class_loss') # depth_loss = tf.reduce_mean(tf.square(depth_image_ph - depth_prediction), name='depth_loss') # combined_loss = (1. - loss_lambda) * grasp_class_loss + loss_lambda * depth_loss combined_loss = grasp_class_loss # evaluation batch = int(sys.argv[2]) correct_prediction = tf.equal(tf.argmax(grasp_class_prediction, 1), grasp_class_ph) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1)
def train_autoencoder(): print('building model') layers = model.build_model() max_epochs = 5000 batch_size = 128 weightsfile = join('weights', 'weights_train_val.pickle') print('compiling theano functions for training') print(' encoder/decoder') encoder_decoder_update = theano_funcs.create_encoder_decoder_func( layers, apply_updates=True) print(' discriminator') discriminator_update = theano_funcs.create_discriminator_func( layers, apply_updates=True) print(' generator') generator_update = theano_funcs.create_generator_func( layers, apply_updates=True) print('compiling theano functions for validation') print(' encoder/decoder') encoder_decoder_func = theano_funcs.create_encoder_decoder_func(layers) print(' discriminator') discriminator_func = theano_funcs.create_discriminator_func(layers) print(' generator') generator_func = theano_funcs.create_generator_func(layers) print('loading data') X_train, y_train, X_test, y_test = utils.load_mnist() try: for epoch in range(1, max_epochs + 1): print('epoch %d' % (epoch)) # compute loss on training data and apply gradient updates train_reconstruction_losses = [] train_discriminative_losses = [] train_generative_losses = [] for train_idx in get_batch_idx(X_train.shape[0], batch_size): X_train_batch = X_train[train_idx] # 1.) update the encoder/decoder to min. reconstruction loss train_batch_reconstruction_loss =\ encoder_decoder_update(X_train_batch) # sample from p(z) pz_train_batch = np.random.uniform( low=-2, high=2, size=(X_train_batch.shape[0], 2)).astype( np.float32) # 2.) update discriminator to separate q(z|x) from p(z) train_batch_discriminative_loss =\ discriminator_update(X_train_batch, pz_train_batch) # 3.) update generator to output q(z|x) that mimic p(z) train_batch_generative_loss = generator_update(X_train_batch) train_reconstruction_losses.append( train_batch_reconstruction_loss) train_discriminative_losses.append( train_batch_discriminative_loss) train_generative_losses.append( train_batch_generative_loss) # average over minibatches train_reconstruction_losses_mean = np.mean( train_reconstruction_losses) train_discriminative_losses_mean = np.mean( train_discriminative_losses) train_generative_losses_mean = np.mean( train_generative_losses) print(' train: rec = %.6f, dis = %.6f, gen = %.6f' % ( train_reconstruction_losses_mean, train_discriminative_losses_mean, train_generative_losses_mean, )) # compute loss on test data test_reconstruction_losses = [] test_discriminative_losses = [] test_generative_losses = [] for test_idx in get_batch_idx(X_test.shape[0], batch_size): X_test_batch = X_test[test_idx] test_batch_reconstruction_loss =\ encoder_decoder_func(X_test_batch) # sample from p(z) pz_test_batch = np.random.uniform( low=-2, high=2, size=(X_test.shape[0], 2)).astype( np.float32) test_batch_discriminative_loss =\ discriminator_func(X_test_batch, pz_test_batch) test_batch_generative_loss = generator_func(X_test_batch) test_reconstruction_losses.append( test_batch_reconstruction_loss) test_discriminative_losses.append( test_batch_discriminative_loss) test_generative_losses.append( test_batch_generative_loss) test_reconstruction_losses_mean = np.mean( test_reconstruction_losses) test_discriminative_losses_mean = np.mean( test_discriminative_losses) test_generative_losses_mean = np.mean( test_generative_losses) print(' test: rec = %.6f, dis = %.6f, gen = %.6f' % ( test_reconstruction_losses_mean, test_discriminative_losses_mean, test_generative_losses_mean, )) except KeyboardInterrupt: print('caught ctrl-c, stopped training') weights = get_all_param_values([ layers['l_decoder_out'], layers['l_discriminator_out'], ]) print('saving weights to %s' % (weightsfile)) model.save_weights(weights, weightsfile)
def main(): rospy.init_node('execute') rs = baxter_interface.RobotEnable(baxter_interface.CHECK_VERSION) rs.enable() # retrieve images global current_image def update_image(msg): global current_image current_image = PIL_Image.frombytes('RGBA', (msg.width, msg.height), msg.data) # print msg.width, msg.height, msg.is_bigendian, msg.step, msg.encoding rospy.Subscriber('/cameras/left_hand_camera/image', Image, update_image) # model width = 128 checkpoint_dir = 'checkpoints-dev-rgb-4-max' grasp_class_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1) arm = baxter_interface.Limb('left') arm.move_to_neutral() gripper = baxter_interface.Gripper('left') gripper.calibrate() # grasp crop crop_center_x = 330 crop_center_y = 160 grasp_class_threashold = 0.5 scale = 1.0 crop_width = width * scale crop_box = (crop_center_x - crop_width/2, crop_center_y - crop_width/2, crop_center_x + crop_width/2, crop_center_y + crop_width/2) # grasp workspace x0 = 0.81 y0 = 0.25 delta = 0.04 initial_z = 0.1 bound_z = -0.165 grasp_class_threashold = 0.5 pub = rospy.Publisher('/robot/xdisplay', Image, queue_size=1) global force def display_gripper_state(msg): global force force = msg.force rospy.Subscriber('/robot/end_effector/left_gripper/state', EndEffectorState, display_gripper_state) with tf.Session() as sess: restore_vars(saver, sess, checkpoint_dir) attemp = 0 while True: # sample a grasp dx = np.random.rand() * (2. * delta) - delta dy = np.random.rand() * (2. * delta) - delta target_theta = (np.random.rand() * 2. - 1.) * 3.059 target_x = x0 + dx target_y = y0 + dy # move to the grasp location execute_linear(arm, target_x, target_y, initial_z, target_theta) # predict grasp crop = np.array(current_image.crop(crop_box).resize((width, width)))[:,:,:3] grasp_pred = grasp_class_prediction.eval(session=sess, feed_dict={ grasp_image_ph: crop.reshape((1, width, width, 3)), keep_prob_ph: 1., }) # display image draw = PIL_ImageDraw.Draw(current_image) draw.text(crop_box[:2], 'prob: %.5f' % grasp_pred[0, 1]) draw.text((20, 20), 'grasp force: %.5f' % force) if grasp_pred[0, 1] > grasp_class_threashold: draw.rectangle(crop_box, outline=(0, 255, 0)) else: draw.rectangle(crop_box, outline=(0, 0, 255)) msg = Image( header=Header( stamp=rospy.Time.now(), frame_id='base', ), width=640, height=400, step=640 * 4, encoding='bgra8', is_bigendian=0, data=current_image.tobytes(), ) pub.publish(msg) if grasp_pred[0, 1] > grasp_class_threashold: execute_planar_grasp(arm, gripper, initial_z, bound_z, target_theta, lower_to_drop=0.05) attemp += 1
N = args.cube_size # The number of permutations from a finished cube to get the initial cube rand_nb = args.rand_nb eps = args.epsilon # The probability of taking a random moves gamma = args.gamma # The discount lr = args.learning_rate mb_size = args.mini_batch_size # The minibatch size # Initialize the Replay_Memory: replay_memory = [] # Initialize Q: function of the Neural Network Q, gradient_descent_step, params = build_model(args) max_action = max_action_Q(N, Q) # Printing current_episode_century = 0 count = 0 for episode in range(M): # Initialize a random cube env = Environment(N) moves = env.suffle(rand_nb=rand_nb) # Show good examples in the replay memory with probability # "good_examples" r = np.random.uniform(0., 1., 1) if r < args.good_examples:
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) sys.stdout, sys.stderr = setup_logging(args, model_path) x_train, y_train = load_model_data(args.train_file, args.data_name, args.target_name) x_validation, y_validation = load_model_data( args.validation_file, args.data_name, args.target_name) rng = np.random.RandomState(args.seed) if args.n_classes > -1: n_classes = args.n_classes else: n_classes = max(y_train)+1 n_classes, target_names, class_weight = load_target_data(args, n_classes) if class_weight is None and args.class_weight_auto: n_samples = len(y_train) weights = float(n_samples) / (n_classes * np.bincount(y_train)) if args.class_weight_exponent: weights = weights**args.class_weight_exponent class_weight = dict(zip(range(n_classes), weights)) if args.verbose: logging.debug("n_classes {0} min {1} max {2}".format( n_classes, min(y_train), max(y_train))) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes) if args.verbose: logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) min_vocab_index = np.min(x_train) max_vocab_index = np.max(x_train) if args.verbose: logging.debug("min vocab index {0} max vocab index {1}".format( min_vocab_index, max_vocab_index)) json_cfg = load_model_json(args, x_train, n_classes) if args.verbose: logging.debug("loading model") sys.path.append(args.model_dir) import model from model import build_model ####################################################################### # Subsetting ####################################################################### if args.subsetting_function: subsetter = getattr(M, args.subsetting_function) else: subsetter = None def take_subset(subsetter, path, x, y, y_one_hot, n): if subsetter is None: return x[0:n], y[0:n], y_one_hot[0:n] else: mask = subsetter(path) idx = np.where(mask)[0] idx = idx[0:n] return x[idx], y[idx], y_one_hot[idx] x_train, y_train, y_train_one_hot = take_subset( subsetter, args.train_file, x_train, y_train, y_train_one_hot, n=args.n_train) x_validation, y_validation, y_validation_one_hot = take_subset( subsetter, args.validation_file, x_validation, y_validation, y_validation_one_hot, n=args.n_validation) ####################################################################### # Preprocessing ####################################################################### if args.preprocessing_class: preprocessor = getattr(M, args.preprocessing_class)(seed=args.seed) else: preprocessor = modeling.preprocess.NullPreprocessor() if args.verbose: logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) model_cfg = ModelConfig(**json_cfg) if args.verbose: logging.info("model_cfg " + str(model_cfg)) net = build_model(model_cfg) setattr(net, 'stop_training', False) marshaller = None if isinstance(net, keras.models.Graph): marshaller = getattr(model, args.graph_marshalling_class)() logging.info('model has {n_params} parameters'.format( n_params=count_parameters(net))) if len(args.extra_train_file) > 1: callbacks = keras.callbacks.CallbackList() else: callbacks = [] save_model_info(args, model_path, model_cfg) callback_logger = logging.info if args.log else callable_print ####################################################################### # Callbacks that need validation set predictions. ####################################################################### pc = PredictionCallback(x_validation, callback_logger, marshaller=marshaller, batch_size=model_cfg.batch_size) callbacks.append(pc) if args.classification_report: cr = ClassificationReport(x_validation, y_validation, callback_logger, target_names=target_names) pc.add(cr) if args.confusion_matrix: cm = ConfusionMatrix(x_validation, y_validation, callback_logger) pc.add(cm) def get_mode(metric_name): return { 'val_loss': 'min', 'val_acc': 'max', 'val_f1': 'max', 'val_f2': 'max', 'val_f0.5': 'max' }[metric_name] if args.early_stopping or args.early_stopping_metric is not None: es = EarlyStopping(monitor=args.early_stopping_metric, mode=get_mode(args.early_stopping_metric), patience=model_cfg.patience, verbose=1) cb = DelegatingMetricCallback( x_validation, y_validation, callback_logger, delegate=es, metric_name=args.early_stopping_metric, marshaller=marshaller) pc.add(cb) if not args.no_save: if args.save_all_checkpoints: filepath = model_path + '/model-{epoch:04d}.h5' else: filepath = model_path + '/model.h5' mc = ModelCheckpoint( filepath=filepath, mode=get_mode(args.checkpoint_metric), verbose=1, monitor=args.checkpoint_metric, save_best_only=not args.save_every_epoch) cb = DelegatingMetricCallback( x_validation, y_validation, callback_logger, delegate=mc, metric_name=args.checkpoint_metric, marshaller=marshaller) pc.add(cb) if model_cfg.optimizer == 'SGD': callbacks.append(SingleStepLearningRateSchedule(patience=10)) if len(args.extra_train_file) > 1: args.extra_train_file.append(args.train_file) logging.info("Using the following files for training: " + ','.join(args.extra_train_file)) train_file_iter = itertools.cycle(args.extra_train_file) current_train = args.train_file callbacks._set_model(net) callbacks.on_train_begin(logs={}) epoch = batch = 0 while True: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) iteration = batch % len(args.extra_train_file) logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format( epoch=epoch, iteration=iteration, train_file=current_train)) callbacks.on_epoch_begin(epoch, logs={}) n_train = x_train.shape[0] callbacks.on_batch_begin(batch, logs={'size': n_train}) index_array = np.arange(n_train) if args.shuffle: rng.shuffle(index_array) batches = keras.models.make_batches(n_train, model_cfg.batch_size) logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) avg_train_loss = avg_train_accuracy = 0. for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(net, keras.models.Graph): train_data = marshaller.marshal( x_train[batch_ids], y_train_one_hot[batch_ids]) train_loss = net.train_on_batch( train_data, class_weight=class_weight) # It looks like train_on_batch returns a different # type for graph than sequential models. train_loss = train_loss[0] train_accuracy = 0. else: train_loss, train_accuracy = net.train_on_batch( x_train[batch_ids], y_train_one_hot[batch_ids], accuracy=True, class_weight=class_weight) batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy} avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1) avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1) callbacks.on_batch_end(batch, logs={'loss': train_loss, 'accuracy': train_accuracy}) logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format( epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy)) batch += 1 # Validation frequency (this if-block) doesn't necessarily # occur in the same iteration as beginning of an epoch # (next if-block), so net.evaluate appears twice here. kwargs = { 'batch_size': model_cfg.batch_size, 'verbose': 0 if args.log else 1 } pargs = [] validation_data = {} if isinstance(net, keras.models.Graph): validation_data = marshaller.marshal( x_validation, y_validation_one_hot) pargs = [validation_data] else: pargs = [x_validation, y_validation_one_hot] kwargs['show_accuracy'] = True if (iteration + 1) % args.validation_freq == 0: if isinstance(net, keras.models.Graph): val_loss = net.evaluate(*pargs, **kwargs) y_hat = net.predict(validation_data, batch_size=model_cfg.batch_size) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = net.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} callbacks.on_epoch_end(epoch, epoch_end_logs) if batch % len(args.extra_train_file) == 0: if isinstance(net, keras.models.Graph): val_loss = net.evaluate(*pargs, **kwargs) y_hat = net.predict(validation_data, batch_size=model_cfg.batch_size) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = net.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} epoch += 1 callbacks.on_epoch_end(epoch, epoch_end_logs) if net.stop_training: logging.info("epoch {epoch} iteration {iteration} - done training".format( epoch=epoch, iteration=iteration)) break current_train = next(train_file_iter) x_train, y_train = load_model_data(current_train, args.data_name, args.target_name) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) if epoch > args.n_epochs: break callbacks.on_train_end(logs={}) else: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) if isinstance(net, keras.models.Graph): train_data = marshaller.marshal( x_train, y_train_one_hot) validation_data = marshaller.marshal( x_validation, y_validation_one_hot) net.fit(train_data, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, validation_data=validation_data, callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1) else: net.fit(x_train, y_train_one_hot, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, show_accuracy=True, validation_data=(x_validation, y_validation_one_hot), callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1)
help='sum the integers (default: find the max)') parser.add_argument('--batch-size', type=int, help='Batch size') parser.add_argument('--max-training-files', type=int, help='Maximum number of training files to use.') args = parser.parse_args() model.MAX_TRAINING_FILES = 3000 cluster_to_data = model.load_training_data() logging.info("Finished loading training data. Finalizing training data.") for model_num in range(5): print "Testing with model from cluster " + str(model_num) weights_file = "weights-" + str(model_num) + ".09.hdf5" my_model = model.build_model(args.gpu) my_model.load_weights(weights_file) for cluster, data in cluster_to_data.iteritems(): X, flops, y = data new_flops = np.zeros((flops.shape[0], model.INPUT_LENGTH, flops.shape[1])) # Zero out flop before it comes out for i, X_hand in enumerate(X): for j, v in enumerate(X_hand): # First hand post-flop if v[15] == 1: break new_flops[i] = np.concatenate((np.zeros((j, flops.shape[1])),\ np.tile(np.expand_dims(flops[i], 0),\ (model.INPUT_LENGTH - j, 1)))) flops = new_flops.astype(int)
def trainer(X, C, stmodel, dimctx=4800, #vector dimensionality dim_word=620, # word vector dimensionality dim=1600, # the number of GRU units encoder='gru', decoder='gru', doutput=False, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=40000, maxlen_w=100, optimizer='adam', batch_size = 16, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', embeddings=None, saveFreq=1000, sampleFreq=100, reload_=False): # Model options model_options = {} model_options['dimctx'] = dimctx model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['doutput'] = doutput model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['sampleFreq'] = sampleFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings != None: print 'Loading embeddings...' with open(embeddings, 'rb') as f: embed_map = pkl.load(f) dim_word = len(embed_map.values()[0]) model_options['dim_word'] = dim_word preemb = norm_weight(n_words, dim_word) pz = defaultdict(lambda : 0) for w in embed_map.keys(): pz[w] = 1 for w in worddict.keys()[:n_words-2]: if pz[w] > 0: preemb[worddict[w]] = embed_map[w] else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, c in train_iter: n_samples += len(x) uidx += 1 x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, mask, ctx) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: x_s = x mask_s = mask ctx_s = ctx for jj in xrange(numpy.minimum(10, len(ctx_s))): sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options, trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False) print 'Truth ',jj,': ', for vv in x_s[:,jj]: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print for kk, ss in enumerate([sample[0]]): print 'Sample (', kk,') ', jj, ': ', for vv in ss: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print print 'Seen %d samples'%n_samples
def main(): MAX_VOCAB = 6000 WINDOW_SIZE = 4 LEVEL = 'char' EMBED_DIM = 100 MAX_TOKEN_LEN = 15 NB_LAYERS = 1 NB_EPOCHS = 3 cutoff = 10000000 words = codecs.open('../data/Austen_Sense.txt', 'r', encoding='utf8') \ .read().lower().split()[:cutoff] print('Loaded', len(words), 'words') cnt = Counter(words) most_comm = [k for k, v in cnt.most_common(500)] print('Most frequent:', most_comm[:50]) word_to_int = {'UNK': 0} for w, c in cnt.most_common(MAX_VOCAB): word_to_int[w] = len(word_to_int) int_to_word = [None] * len(word_to_int) for k, v in word_to_int.items(): int_to_word[v] = k if LEVEL == 'char': char_vector_dict, char_idx = index_characters(int_to_word) print(char_vector_dict.keys()) model = build_model(vocab_size=len(word_to_int), embed_dim=EMBED_DIM, level=LEVEL, token_len=MAX_TOKEN_LEN, token_char_vector_dict=char_vector_dict, nb_recurrent_layers=NB_LAYERS) most_comm_X = vectorize_tokens(tokens=most_comm, char_vector_dict=char_vector_dict, max_len=MAX_TOKEN_LEN) print(most_comm_X.shape, '!!!') elif LEVEL == 'word': model = build_model(vocab_size=len(word_to_int), embed_dim=50, level=LEVEL, token_len=None, token_char_vector_dict=None, nb_recurrent_layers=None) model.summary() sampling_table = make_sampling_table(size=len(word_to_int)) for e in range(NB_EPOCHS): idx = 0 losses = [] for idx in range(WINDOW_SIZE, len(words)-WINDOW_SIZE): seq = [] for w in words[(idx - WINDOW_SIZE): (idx + WINDOW_SIZE)]: try: seq.append(word_to_int[w]) except KeyError: seq.append(0) couples, labels = skipgrams(seq, len(word_to_int), window_size=4, negative_samples=1., shuffle=True, categorical=False, sampling_table=sampling_table) if len(couples) > 1: couples = np.array(couples, dtype='int32') c_inp = couples[:, 1] c_inp = c_inp[:, np.newaxis] if LEVEL == 'word': p_inp = couples[:, 0] p_inp = p_inp[:, np.newaxis] elif LEVEL == 'char': tokens = [int_to_word[i] for i in couples[:, 0]] p_inp = vectorize_tokens(tokens=tokens, char_vector_dict=char_vector_dict, max_len=MAX_TOKEN_LEN) else: raise ValueError('Wrong level param: word or char') labels = np.array(labels, dtype='int32') loss = model.train_on_batch({'pivot': p_inp, 'context': c_inp}, {'label': labels}) losses.append(loss) if idx % 5000 == 0: print(np.mean(losses)) if idx % 10000 == 0: print(np.mean(losses)) print('Compiling repr func') get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[6].output, ]) activations = get_activations([most_comm_X, 0])[0] activations = np.array(activations, dtype='float32') print(activations.shape, '-----') norm_weights = np_utils.normalize(activations) # dimension reduction: tsne = TSNE(n_components=2) coor = tsne.fit_transform(norm_weights) plt.clf() sns.set_style('dark') sns.plt.rcParams['axes.linewidth'] = 0.4 fig, ax1 = sns.plt.subplots() labels = most_comm # first plot slices: x1, x2 = coor[:, 0], coor[:, 1] ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none') # clustering on top (add some colouring): clustering = AgglomerativeClustering(linkage='ward', affinity='euclidean', n_clusters=10) clustering.fit(coor) # add names: axes = zip(x1, x2, most_comm, clustering.labels_) for x, y, name, cluster_label in axes: ax1.text(x, y, name, ha='center', va="center", color=plt.cm.spectral(cluster_label / 10.), fontdict={'family': 'Arial', 'size': 8}) # control aesthetics: ax1.set_xlabel('') ax1.set_ylabel('') ax1.set_xticklabels([]) ax1.set_xticks([]) ax1.set_yticklabels([]) ax1.set_yticks([]) sns.plt.savefig('embeddings.pdf', bbox_inches=0)
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl'%load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps']+dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index index = json.load(open('vis/roc/index.json', 'r')) models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples'%n_samples
def setup_to_train(self, train_data=None, dev_data=None, test_data=None): # create a model directory: if os.path.isdir(self.model_dir): shutil.rmtree(self.model_dir) os.mkdir(self.model_dir) self.train_tokens = train_data['token'] if self.include_test: self.test_tokens = test_data['token'] if self.include_dev: self.dev_tokens = dev_data['token'] idx_cnt = 0 if self.include_lemma: self.lemma_out_idx = idx_cnt idx_cnt += 1 self.train_lemmas = train_data['lemma'] self.known_lemmas = set(self.train_lemmas) if self.include_dev: self.dev_lemmas = dev_data['lemma'] if self.include_test: self.test_lemmas = test_data['lemma'] if self.include_pos: self.pos_out_idx = idx_cnt idx_cnt += 1 self.train_pos = train_data['pos'] if self.include_dev: self.dev_pos = dev_data['pos'] if self.include_test: self.test_pos = test_data['pos'] if self.include_morph: self.morph_out_idx = idx_cnt self.train_morph = train_data['morph'] if self.include_dev: self.dev_morph = dev_data['morph'] if self.include_test: self.test_morph = test_data['morph'] self.preprocessor = Preprocessor().fit(tokens=self.train_tokens, lemmas=self.train_lemmas, pos=self.train_pos, morph=self.train_morph, include_lemma=self.include_lemma, include_morph=self.include_morph, max_token_len=self.max_token_len, focus_repr=self.focus_repr, min_lem_cnt=self.min_lem_cnt, ) self.pretrainer = Pretrainer(nb_left_tokens=self.nb_left_tokens, nb_right_tokens=self.nb_right_tokens, size=self.nb_embedding_dims, minimum_count=self.min_token_freq_emb) self.pretrainer.fit(tokens=self.train_tokens) train_transformed = self.preprocessor.transform(tokens=self.train_tokens, lemmas=self.train_lemmas, pos=self.train_pos, morph=self.train_morph) if self.include_dev: dev_transformed = self.preprocessor.transform(tokens=self.dev_tokens, lemmas=self.dev_lemmas, pos=self.dev_pos, morph=self.dev_morph) if self.include_test: test_transformed = self.preprocessor.transform(tokens=self.test_tokens, lemmas=self.test_lemmas, pos=self.test_pos, morph=self.test_morph) self.train_X_focus = train_transformed['X_focus'] if self.include_dev: self.dev_X_focus = dev_transformed['X_focus'] if self.include_test: self.test_X_focus = test_transformed['X_focus'] if self.include_lemma: self.train_X_lemma = train_transformed['X_lemma'] if self.include_dev: self.dev_X_lemma = dev_transformed['X_lemma'] if self.include_test: self.test_X_lemma = test_transformed['X_lemma'] if self.include_pos: self.train_X_pos = train_transformed['X_pos'] if self.include_dev: self.dev_X_pos = dev_transformed['X_pos'] if self.include_test: self.test_X_pos = test_transformed['X_pos'] if self.include_morph: self.train_X_morph = train_transformed['X_morph'] if self.include_dev: self.dev_X_morph = dev_transformed['X_morph'] if self.include_test: self.test_X_morph = test_transformed['X_morph'] self.train_contexts = self.pretrainer.transform(tokens=self.train_tokens) if self.include_dev: self.dev_contexts = self.pretrainer.transform(tokens=self.dev_tokens) if self.include_test: self.test_contexts = self.pretrainer.transform(tokens=self.test_tokens) print('Building model...') nb_tags = None try: nb_tags = len(self.preprocessor.pos_encoder.classes_) except AttributeError: pass nb_morph_cats = None try: nb_morph_cats = self.preprocessor.nb_morph_cats except AttributeError: pass max_token_len, token_char_dict = None, None try: max_token_len = self.preprocessor.max_token_len token_char_dict = self.preprocessor.token_char_dict except AttributeError: pass max_lemma_len, lemma_char_dict = None, None try: max_lemma_len = self.preprocessor.max_lemma_len lemma_char_dict = self.preprocessor.lemma_char_dict except AttributeError: pass nb_lemmas = None try: nb_lemmas = len(self.preprocessor.lemma_encoder.classes_) except AttributeError: pass self.model = build_model(token_len=max_token_len, token_char_vector_dict=token_char_dict, lemma_len=max_lemma_len, nb_tags=nb_tags, nb_morph_cats=nb_morph_cats, lemma_char_vector_dict=lemma_char_dict, nb_encoding_layers=self.nb_encoding_layers, nb_dense_dims=self.nb_dense_dims, nb_embedding_dims=self.nb_embedding_dims, nb_train_tokens=len(self.pretrainer.train_token_vocab), nb_context_tokens=self.nb_context_tokens, pretrained_embeddings=self.pretrainer.pretrained_embeddings, include_token=self.include_token, include_context=self.include_context, include_lemma=self.include_lemma, include_pos=self.include_pos, include_morph=self.include_morph, nb_filters = self.nb_filters, filter_length = self.filter_length, focus_repr = self.focus_repr, dropout_level = self.dropout_level, nb_lemmas = nb_lemmas, ) self.save() self.setup = True
def main(args): model_id = build_model_id(args) model_path = build_model_path(args, model_id) setup_model_dir(args, model_path) sys.stdout, sys.stderr = setup_logging(args, model_path) x_train, y_train = load_model_data(args.train_file, args.data_name, args.target_name) x_validation, y_validation = load_model_data( args.validation_file, args.data_name, args.target_name) rng = np.random.RandomState(args.seed) if args.n_classes > -1: n_classes = args.n_classes else: n_classes = max(y_train)+1 n_classes, target_names, class_weight = load_target_data(args, n_classes) if len(class_weight) == 0: n_samples = len(y_train) print('n_samples', n_samples) print('classes', range(n_classes)) print('weights', n_samples / (n_classes * np.bincount(y_train))) class_weight = dict(zip(range(n_classes), n_samples / (n_classes * np.bincount(y_train)))) print('class_weight', class_weight) logging.debug("n_classes {0} min {1} max {2}".format( n_classes, min(y_train), max(y_train))) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes) logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) min_vocab_index = np.min(x_train) max_vocab_index = np.max(x_train) logging.debug("min vocab index {0} max vocab index {1}".format( min_vocab_index, max_vocab_index)) json_cfg = load_model_json(args, x_train, n_classes) logging.debug("loading model") sys.path.append(args.model_dir) import model from model import build_model ####################################################################### # Subsetting ####################################################################### if args.subsetting_function: subsetter = getattr(model, args.subsetting_function) else: subsetter = None def take_subset(subsetter, path, x, y, y_one_hot, n): if subsetter is None: return x[0:n], y[0:n], y_one_hot[0:n] else: mask = subsetter(path) idx = np.where(mask)[0] idx = idx[0:n] return x[idx], y[idx], y_one_hot[idx] x_train, y_train, y_train_one_hot = take_subset( subsetter, args.train_file, x_train, y_train, y_train_one_hot, n=args.n_train) x_validation, y_validation, y_validation_one_hot = take_subset( subsetter, args.validation_file, x_validation, y_validation, y_validation_one_hot, n=args.n_validation) ####################################################################### # Preprocessing ####################################################################### if args.preprocessing_class: preprocessor = getattr(model, args.preprocessing_class)(seed=args.seed) else: preprocessor = modeling.preprocess.NullPreprocessor() logging.debug("y_train_one_hot " + str(y_train_one_hot.shape)) logging.debug("x_train " + str(x_train.shape)) model_cfg = ModelConfig(**json_cfg) logging.info("model_cfg " + str(model_cfg)) model = build_model(model_cfg) setattr(model, 'stop_training', False) logging.info('model has {n_params} parameters'.format( n_params=count_parameters(model))) if len(args.extra_train_file) > 1: callbacks = keras.callbacks.CallbackList() else: callbacks = [] save_model_info(args, model_path, model_cfg) if not args.no_save: if args.save_all_checkpoints: filepath = model_path + '/model-{epoch:04d}.h5' else: filepath = model_path + '/model.h5' callbacks.append(ModelCheckpoint( filepath=filepath, verbose=1, save_best_only=not args.save_every_epoch)) callback_logger = logging.info if args.log else callable_print if args.n_epochs < sys.maxsize: # Number of epochs overrides patience. If the number of epochs # is specified on the command line, the model is trained for # exactly that number; otherwise, the model is trained with # early stopping using the patience specified in the model # configuration. callbacks.append(EarlyStopping( monitor='val_loss', patience=model_cfg.patience, verbose=1)) if args.classification_report: cr = ClassificationReport(x_validation, y_validation, callback_logger, target_names=target_names) callbacks.append(cr) if model_cfg.optimizer == 'SGD': callbacks.append(SingleStepLearningRateSchedule(patience=10)) if len(args.extra_train_file) > 1: args.extra_train_file.append(args.train_file) logging.info("Using the following files for training: " + ','.join(args.extra_train_file)) train_file_iter = itertools.cycle(args.extra_train_file) current_train = args.train_file callbacks._set_model(model) callbacks.on_train_begin(logs={}) epoch = batch = 0 while True: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) iteration = batch % len(args.extra_train_file) logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format( epoch=epoch, iteration=iteration, train_file=current_train)) callbacks.on_epoch_begin(epoch, logs={}) n_train = x_train.shape[0] callbacks.on_batch_begin(batch, logs={'size': n_train}) index_array = np.arange(n_train) if args.shuffle: rng.shuffle(index_array) batches = keras.models.make_batches(n_train, model_cfg.batch_size) logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) avg_train_loss = avg_train_accuracy = 0. for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(model, keras.models.Graph): data = { 'input': x_train[batch_ids], 'output': y_train_one_hot[batch_ids] } train_loss = model.train_on_batch(data, class_weight=class_weight) train_accuracy = 0. else: train_loss, train_accuracy = model.train_on_batch( x_train[batch_ids], y_train_one_hot[batch_ids], accuracy=True, class_weight=class_weight) batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy} avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1) avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1) callbacks.on_batch_end(batch, logs={'loss': train_loss, 'accuracy': train_accuracy}) logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format( epoch=epoch, iteration=iteration, n_batches=len(batches))) logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format( epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy)) batch += 1 # Validation frequency (this if-block) doesn't necessarily # occur in the same iteration as beginning of an epoch # (next if-block), so model.evaluate appears twice here. kwargs = { 'verbose': 0 if args.log else 1 } pargs = [] validation_data = {} if isinstance(model, keras.models.Graph): validation_data = { 'input': x_validation, 'output': y_validation_one_hot } pargs = [validation_data] else: pargs = [x_validation, y_validation_one_hot] kwargs['show_accuracy'] = True if (iteration + 1) % args.validation_freq == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} callbacks.on_epoch_end(epoch, epoch_end_logs) if batch % len(args.extra_train_file) == 0: if isinstance(model, keras.models.Graph): val_loss = model.evaluate(*pargs, **kwargs) y_hat = model.predict(validation_data) val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)) else: val_loss, val_acc = model.evaluate( *pargs, **kwargs) logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format( epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc)) epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc} epoch += 1 callbacks.on_epoch_end(epoch, epoch_end_logs) if model.stop_training: logging.info("epoch {epoch} iteration {iteration} - done training".format( epoch=epoch, iteration=iteration)) break current_train = next(train_file_iter) x_train, y_train = load_model_data(current_train, args.data_name, args.target_name) y_train_one_hot = np_utils.to_categorical(y_train, n_classes) if epoch > args.n_epochs: break callbacks.on_train_end(logs={}) else: x_train, y_train_one_hot = preprocessor.fit_transform( x_train, y_train_one_hot) x_validation, y_validation_one_hot = preprocessor.transform( x_validation, y_validation_one_hot) if isinstance(model, keras.models.Graph): data = { 'input': x_train, 'output': y_train_one_hot } validation_data = { 'input': x_validation, 'output': y_validation_one_hot } model.fit(data, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, validation_data=validation_data, callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1) y_hat = model.predict(validation_data) print('val_acc %.04f' % accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))) else: model.fit(x_train, y_train_one_hot, shuffle=args.shuffle, nb_epoch=args.n_epochs, batch_size=model_cfg.batch_size, show_accuracy=True, validation_data=(x_validation, y_validation_one_hot), callbacks=callbacks, class_weight=class_weight, verbose=2 if args.log else 1)
def trainer(X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size = 64, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', saveFreq=1000, reload_=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ print model_options # reload options # TODO: if loading old parameters you need to make sure you are using them # in the rest of the code # if reload_ and os.path.exists(saveto): # print 'reloading...' + saveto # with open('%s.pkl'%saveto, 'rb') as f: # model_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto + '.npz', params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
import sys import yaml from data import build_datasets from model import build_model, build_algorithm from monitor import build_extensions from blocks.main_loop import MainLoop from blocks.model import Model from blocks.extensions.saveload import Load import cPickle as pickle from blocks.graph import ComputationGraph config_dict = yaml.load(open(sys.argv[1], 'r')) print config_dict train, valid, alphabet = build_datasets(config_dict) generator, cost = build_model(len(alphabet), config_dict) algorithm = build_algorithm(generator, cost, config_dict) extensions = build_extensions(cost, algorithm, valid, config_dict) main_loop = MainLoop(algorithm=algorithm, data_stream=train, model=Model(cost), extensions=extensions) ml = Load(config_dict['checkpoint_path'], load_log=True) print dir(ml) ml.load_to(main_loop) generator = main_loop.model.get_top_bricks()[-1] sampler = ComputationGraph(generator.generate( n_steps=1000, batch_size=10, iterate=True)).get_theano_function() samples = sampler() outputs = samples[-2]
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print "reloading..." + load_from with open("%s.pkl" % load_from, "rb") as f: curr_model = pkl.load(f) else: curr_model["options"] = {} for k, v in kwargs.iteritems(): curr_model["options"][k] = v model_options = curr_model["options"] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name from logger import Log log = Log( name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1 ) print curr_model["options"] # Load training and development sets print "Loading dataset" dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True) train = dataset["train"] dev = dataset["dev"] # Create dictionary print "Creating dictionary" worddict = build_dictionary(train["caps"] + dev["caps"]) print "Dictionary size: " + str(len(worddict)) curr_model["worddict"] = worddict curr_model["options"]["n_words"] = len(worddict) + 2 # save model pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb")) print "Loading data" train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print "Building model" params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print "Building sentence encoder" inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print "Building image encoder" inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print "Building f_grad...", grads = tensor.grad(cost, wrt=itemlist(tparams)) print "Building errors.." inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model["f_senc"] = f_senc curr_model["f_ienc"] = f_ienc curr_model["f_err"] = f_err if model_options["grad_clip"] > 0.0: grads = [maxnorm(g, model_options["grad_clip"]) for g in grads] lr = tensor.scalar(name="lr") print "Building optimizers...", # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost) print "Optimization" uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options["max_epochs"]): print "Epoch ", eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options["lrate"]) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print "NaN detected" return 1.0, 1.0, 1.0 if numpy.mod(uidx, model_options["dispFreq"]) == 0: print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud log.update({"Error": float(cost)}, n_samples) if numpy.mod(uidx, model_options["validFreq"]) == 0: print "Computing results..." # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"]) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples) print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update( { "Image2Caption_R@1": r1i, "Image2Caption_R@5": r5i, "Image2CaptionR@10": r10i, "Image2Caption_median_rank": medri, "Image2Caption_mean_rank": meanri, }, n_samples, ) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print "Saving...", numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams)) print "Done" vis_details["hyperparams"] = model_options # Save visualization details with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open("vis/roc/index.json", "r")) except IOError: index = {model_options["data"]: []} models = index[model_options["data"]] if timestampedName not in models: models.append(timestampedName) with open("vis/roc/index.json", "w") as f: json.dump(index, f) print "Seen %d samples" % n_samples
def train(dim_word_desc=400,# word vector dimensionality dim_word_q=400, dim_word_ans=600, dim_proj=300, dim=400,# the number of LSTM units encoder_desc='lstm', encoder_desc_word='lstm', encoder_desc_sent='lstm', use_dq_sims=False, eyem=None, learn_h0=False, use_desc_skip_c_g=False, debug=False, encoder_q='lstm', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., clip_c=-1., lrate=0.01, n_words_q=49145, n_words_desc=115425, n_words_ans=409, pkl_train_files=None, pkl_valid_files=None, maxlen=2000, # maximum length of the description optimizer='rmsprop', batch_size=2, vocab=None, valid_batch_size=16, use_elu_g=False, saveto='model.npz', model_dir=None, ms_nlayers=3, validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates datasets=[None], truncate=400, momentum=0.9, use_bidir=False, cost_mask=None, valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5', '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'], dropout_rate=0.5, use_dropout=True, reload_=True, **opt_ds): ensure_dir_exists(model_dir) mpath = os.path.join(model_dir, saveto) mpath_best = os.path.join(model_dir, prfx("best", saveto)) mpath_last = os.path.join(model_dir, prfx("last", saveto)) mpath_stats = os.path.join(model_dir, prfx("stats", saveto)) # Model options model_options = locals().copy() model_options['use_sent_reps'] = opt_ds['use_sent_reps'] stats = defaultdict(list) del model_options['eyem'] del model_options['cost_mask'] if cost_mask is not None: cost_mask = sharedX(cost_mask) # reload options and parameters if reload_: print "Reloading the model." if os.path.exists(mpath_best): print "Reloading the best model from %s." % mpath_best with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f: models_options = pkl.load(f) params = init_params(model_options) params = load_params(mpath_best, params) elif os.path.exists(mpath): print "Reloading the model from %s." % mpath with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f: models_options = pkl.load(f) params = init_params(model_options) params = load_params(mpath, params) else: raise IOError("Couldn't open the file.") else: print "Couldn't reload the models initializing from scratch." params = init_params(model_options) if datasets[0]: print "Short dataset", datasets[0] print 'Loading data' print 'Building model' if pkl_train_files is None or pkl_valid_files is None: train, valid, test = load_data(path=datasets[0], valid_path=valid_datasets[0], test_path=valid_datasets[1], batch_size=batch_size, **opt_ds) else: train, valid, test = load_pkl_data(train_file_paths=pkl_train_files, valid_file_paths=pkl_valid_files, batch_size=batch_size, vocab=vocab, eyem=eyem, **opt_ds) tparams = init_tparams(params) trng, use_noise, inps_d, \ opt_ret, \ cost, errors, ent_errors, ent_derrors, probs = \ build_model(tparams, model_options, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, valid, cost_mask=cost_mask) alphas = opt_ret['dec_alphas'] if opt_ds['use_sent_reps']: inps = [inps_d["desc"], \ inps_d["word_mask"], \ inps_d["q"], \ inps_d['q_mask'], \ inps_d['ans'], \ inps_d['wlen'], inps_d['slen'], inps_d['qlen'],\ inps_d['ent_mask'] ] else: inps = [inps_d["desc"], \ inps_d["word_mask"], \ inps_d["q"], \ inps_d['q_mask'], \ inps_d['ans'], \ inps_d['wlen'], \ inps_d['qlen'], \ inps_d['ent_mask']] outs = [cost, errors, probs, alphas] if ent_errors: outs += [ent_errors] if ent_derrors: outs += [ent_derrors] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, outs, profile=profile) print 'Done' # Apply weight decay on the feed-forward connections if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): if "logit" in kk or "ff" in kk: weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Computing gradient...', grads = safe_grad(cost, itemlist(tparams)) print 'Done' # Gradient clipping: if clip_c > 0.: g2 = get_norms(grads) for p, g in grads.iteritems(): grads[p] = tensor.switch(g2 > (clip_c**2), (g / tensor.sqrt(g2 + 1e-8)) * clip_c, g) inps.pop() if optimizer.lower() == "adasecant": learning_rule = Adasecant(delta_clip=25.0, use_adagrad=True, grad_clip=0.25, gamma_clip=0.) elif optimizer.lower() == "rmsprop": learning_rule = RMSPropMomentum(init_momentum=momentum) elif optimizer.lower() == "adam": learning_rule = Adam() elif optimizer.lower() == "adadelta": learning_rule = AdaDelta() lr = tensor.scalar(name='lr') print 'Building optimizers...', learning_rule = None if learning_rule: f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr, grads=grads, inp=inps, cost=cost, errors=errors) else: f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost, errors) print 'Done' print 'Optimization' history_errs = [] # reload history if reload_ and os.path.exists(mpath): history_errs = list(numpy.load(mpath)['history_errs']) best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size best_found = False uidx = 0 estop = False train_cost_ave, train_err_ave, \ train_gnorm_ave = reset_train_vals() for eidx in xrange(max_epochs): n_samples = 0 if train.done: train.reset() for d_, q_, a, em in train: n_samples += len(a) uidx += 1 use_noise.set_value(1.) if opt_ds['use_sent_reps']: # To mask the description and the question. d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_, q_) if d is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask, q, q_mask, a, dlen, slen, qlen) else: d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_) if d is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask, q, q_mask, a, dlen, qlen) upnorm = f_update(lrate) ud = time.time() - ud_start # Collect the running ave train stats. train_cost_ave = running_ave(train_cost_ave, cost) train_err_ave = running_ave(train_err_ave, errors) train_gnorm_ave = running_ave(train_gnorm_ave, gnorm) if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' import ipdb; ipdb.set_trace() if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, ' Update ', uidx, \ ' Cost ', cost, ' UD ', ud, \ ' UpNorm ', upnorm[0].tolist(), \ ' GNorm ', gnorm, \ ' Pnorm ', pnorm, 'Terrors ', errors if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None and best_found: numpy.savez(mpath_best, history_errs=history_errs, **best_p) pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb')) else: params = unzip(tparams) numpy.savez(mpath, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % mpath, 'wb')) pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb')) print 'Done' print_param_norms(tparams) if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) if valid.done: valid.reset() valid_costs, valid_errs, valid_probs, \ valid_alphas, error_ent, error_dent = eval_model(f_log_probs, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, model_options, valid, use_sent_rep=opt_ds['use_sent_reps']) valid_alphas_ = numpy.concatenate([va.argmax(0) for va in valid_alphas.tolist()], axis=0) valid_err = valid_errs.mean() valid_cost = valid_costs.mean() valid_alpha_ent = -negentropy(valid_alphas) mean_valid_alphas = valid_alphas_.mean() std_valid_alphas = valid_alphas_.std() mean_valid_probs = valid_probs.argmax(1).mean() std_valid_probs = valid_probs.argmax(1).std() history_errs.append([valid_cost, valid_err]) stats['train_err_ave'].append(train_err_ave) stats['train_cost_ave'].append(train_cost_ave) stats['train_gnorm_ave'].append(train_gnorm_ave) stats['valid_errs'].append(valid_err) stats['valid_costs'].append(valid_cost) stats['valid_err_ent'].append(error_ent) stats['valid_err_desc_ent'].append(error_dent) stats['valid_alphas_mean'].append(mean_valid_alphas) stats['valid_alphas_std'].append(std_valid_alphas) stats['valid_alphas_ent'].append(valid_alpha_ent) stats['valid_probs_mean'].append(mean_valid_probs) stats['valid_probs_std'].append(std_valid_probs) if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min(): best_p = unzip(tparams) bad_counter = 0 best_found = True else: bst_found = False if numpy.isnan(valid_err): import ipdb; ipdb.set_trace() print "============================" print '\t>>>Valid error: ', valid_err, \ ' Valid cost: ', valid_cost print '\t>>>Valid pred mean: ', mean_valid_probs, \ ' Valid pred std: ', std_valid_probs print '\t>>>Valid alphas mean: ', mean_valid_alphas, \ ' Valid alphas std: ', std_valid_alphas, \ ' Valid alpha negent: ', valid_alpha_ent, \ ' Valid error ent: ', error_ent, \ ' Valid error desc ent: ', error_dent print "============================" print "Running average train stats " print '\t>>>Train error: ', train_err_ave, \ ' Train cost: ', train_cost_ave, \ ' Train grad norm: ', train_gnorm_ave print "============================" train_cost_ave, train_err_ave, \ train_gnorm_ave = reset_train_vals() print 'Seen %d samples' % n_samples if estop: break if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) valid.reset() valid_cost, valid_error, valid_probs, \ valid_alphas, error_ent = eval_model(f_log_probs, prepare_data if not opt_ds['use_sent_reps'] \ else prepare_data_sents, model_options, valid, use_sent_rep=opt_ds['use_sent_rep']) print " Final eval resuts: " print 'Valid error: ', valid_error.mean() print 'Valid cost: ', valid_cost.mean() print '\t>>>Valid pred mean: ', valid_probs.mean(), \ ' Valid pred std: ', valid_probs.std(), \ ' Valid error ent: ', error_ent params = copy.copy(best_p) numpy.savez(mpath_last, zipped_params=best_p, history_errs=history_errs, **params) return valid_err, valid_cost
def __init__(self, alpha, batch_size, n_epochs, wordVecLen, flag_dropout, datapath, random_seed, dropoutRates, optimizer, dispFreq, beam_size, flag_random_lookup_table, flag_toy_data, size_hidden_layer, dataset, result_path, sentence_modeling, CNN_filter_length, LSTM_go_backwards ): model_options = locals().copy() model_options['rng'] = np.random.RandomState(random_seed) print 'Loading data' src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding = load_data(path=datapath) if flag_toy_data == True: src_valid = src_valid[:10] src_test = src_test[:10] #src_train = copy.copy(src_valid) src_train = src_train[:10] elif flag_toy_data != False: valid_l = len(src_valid) * flag_toy_data test_l = len(src_test) * flag_toy_data train_l = len(src_train) * flag_toy_data src_valid = src_valid[:int(valid_l)] src_test = src_test[:int(test_l)] src_train = src_train[:int(train_l)] train,pairdict_train = prepare_data(src_train) valid,pairdict_valid = prepare_data(src_valid) test,pairdict_test = prepare_data(src_test) model_options['embedding'] = embedding (sentence1,sentence1_mask,sentence2,sentence2_mask,y,cost,f_pred,tparams,f_debug) = build_model(model_options) #f_cost = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost, name='f_cost') #grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams.values()) grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams) # grads = tensor.grad(cost, wrt=tparams.values()) #f_grad = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], grads, name='f_grad') lr = tensor.scalar(name='lr') if model_options['optimizer'] == 'sgd': optimizer = sgd elif model_options['optimizer'] == 'rmsprop': optimizer = rmsprop else: optimizer = adadelta f_grad_shared, f_update = optimizer(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost) print 'Optimization' kf_valid = get_minibatches_idx(len(valid), model_options['batch_size']) kf_test = get_minibatches_idx(len(test), model_options['batch_size']) print "%d train examples" % len(train) print "%d valid examples" % len(valid) print "%d test examples" % len(test) sys.stdout.flush() best_validation_score = -np.inf best_iter = 0 uidx = 0 # the number of update done for epoch in xrange(model_options['n_epochs']): print ('Training on %d epoch' % epoch) sys.stdout.flush() kf = get_minibatches_idx(len(train), batch_size, shuffle=True) start_time = time.time() samples_seen = 0 for _, train_index in kf: uidx += 1 batch_samples = [train[t] for t in train_index] samples_seen += len(batch_samples) #print batch_samples sentence1,sentence1_mask,sentence2,sentence2_mask,y = data_padding(batch_samples) #print sentence1,sentence1_mask,sentence2,sentence2_mask,y #print sentence1.shape,sentence1_mask.shape,sentence2.shape,sentence2_mask.shape,y.shape #o = f_debug(sentence1,sentence1_mask,sentence2,sentence2_mask,y) #print o #print o[0].shape,o[1].shape,o[2].shape,o[3].shape cost = f_grad_shared(sentence1,sentence1_mask,sentence2,sentence2_mask,y) f_update(model_options['alpha']) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. if np.mod(uidx, dispFreq) == 0: print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen sys.stdout.flush() print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen sys.stdout.flush() ''' if epoch % 5 == 0: kf_train = get_minibatches_idx(len(train), batch_size) print ('Train_score:') self.eva(f_pred, src_train, train, pairdict_train, kf_train, model_options) sys.stdout.flush() ''' print ('Valid_score:') top1_res = self.eva(f_pred, src_valid, valid, pairdict_valid, kf_valid, model_options) self.save_result(model_options['result_path'] + 'dev.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res) sys.stdout.flush() print ('Test_score:') top1_res = self.eva(f_pred, src_test, test, pairdict_test, kf_test, model_options) self.save_result(model_options['result_path'] + 'test.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res) sys.stdout.flush() print ('%d epoch completed.' % epoch) sys.stdout.flush() ''' if(best_validation_score < valid_score): best_iter = epoch best_validation_score = valid_score print ('Current best_dev_F is %.2f, at %d epoch'%(best_validation_score,best_iter)) ''' end_time = time.time() minu = int((end_time - start_time)/60) sec = (end_time - start_time) - 60 * minu print ('Time: %d min %.2f sec' % (minu, sec)) sys.stdout.flush() print('Training completed!') sys.stdout.flush()
def metdraw(filename,count_mets=None,met_file=None,show=False, engine='fdp',output='svg',quiet=False,q='1',Ln='1000', json=False,norun=False,status=False,dotcmd='dot',no_gpr=False, defaults=defaults): sbml_filename = filename if filename.endswith('.xml'): filename = filename[:-4] dot_filename = filename + '.dot' mets_filename = filename + '.mets' gpr_filename = filename + '.gpr' output_filename = filename + '.' + output if not quiet: print 'Loading model file', sbml_filename if filename.endswith('.json'): model = Model.build_model(*model_json.parse_json_file(file=sbml_filename)) else: pieces = sbml.parse_sbml_file(file=sbml_filename) model = Model.build_model(**pieces) if not no_gpr: gpr.write_gpr_file(gpr.Gpr(pieces['reactions']),gpr_filename) if not quiet: print 'GPR written to file', gpr_filename model.name = filename model.set_param(**defaults) if count_mets: if not quiet: print 'Writing metabolite counts to file', filename+'.mets' Minors.write_met_file(Minors.count_species(model), filename=mets_filename, json=json) return if met_file: minors = Minors.read_met_file(filename=met_file) if not quiet: print len(minors), "minors loaded from file '{0}'".format(met_file) else: # find the minors in the model; for now, we create a temporary mets # file that is deleted after loading the minors temp_filename = mets_filename + '.TEMP' Minors.write_met_file(Minors.count_species(model),filename=temp_filename) minors = Minors.read_met_file(temp_filename) os.remove(temp_filename) if not quiet: print len(minors), "minors found in model" model.set_param(name="minors",value=minors) if show: model.display() display_parameters(defaults) if not quiet: print 'Creating reaction layout' g = layout.model_to_dot(model) if not quiet: print 'Creating DOT file', dot_filename g.to_file(dot_filename) # run graphviz if not quiet: print 'Preparing Graphviz call:' cmdstr = '{dot} -q{q} -Ln{Ln} -K{engine} -T{fmt} -o {outfile} {file}' cmd = cmdstr.format(dot=dotcmd, q=q,Ln=Ln, engine=engine, fmt=output, outfile=output_filename, file=dot_filename) if not quiet: print ' ' + cmd if not norun: print 'Running Graphviz' error = os.system(cmd) if error: print "Error running dot:", error else: print 'ok' # clean up intermediate DOT file os.remove(dot_filename)
theano.In(X_batch), ], outputs=generator_loss, updates=generator_updates, givens={ X: X_batch, }, ) return generator_func if __name__ == '__main__': import model print('building model') layers = model.build_model() print('compiling theano functions') encoder_decoder_func = create_encoder_decoder_func(layers) discriminator_func = create_discriminator_func(layers) generator_func = create_generator_func(layers) import numpy as np X = np.random.random((16, 28 * 28)).astype(np.float32) pz = np.random.uniform(-2, 2, size=(16, 2)).astype(np.float32) print('X.shape = %r' % (X.shape,)) print('pz.shape = %r' % (pz.shape,)) print('running the three forward passes') print encoder_decoder_func(X)