def setUp(self): config = read_py_config('./configs/config.py') self.config = config self.model = build_model(config, device='cpu', strict=True, mode='convert') self.img_size = tuple(map(int, config.resize.values()))
def synthesize(text): input = text + "|00-" + lang + "|" + lang # Change to Multi_TTS path sys.path.append( os.path.join(os.path.dirname(__file__), "dependencies/Multilingual_Text_to_Speech")) if "utils" in sys.modules: del sys.modules["utils"] from synthesize import synthesize from utils import build_model # Load Mulilingual pretrained model model = build_model( os.path.abspath("./dependencies/checkpoints/generated_switching.pyt")) model.eval() # generate spectogram spectogram = synthesize(model, "|" + input) # Change to WaveRNN Path sys.path.append( os.path.join(os.path.dirname(__file__), "dependencies/WaveRNN")) if "utils" in sys.modules: del sys.modules["utils"] from models.fatchord_version import WaveRNN from utils import hparams as hp from gen_wavernn import generate import torch # Load WaveRNN pretrained model hp.configure("hparams.py") model = WaveRNN( rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors, feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks, hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).to( torch.device('cuda' if torch.cuda.is_available() else 'cpu')) model.load( os.path.join(os.path.dirname(__file__), "dependencies/checkpoints/wavernn_weight.pyt")) waveform = generate(model, s, hp.voc_gen_batched, hp.voc_target, hp.voc_overlap) f = write("./temp/result.wav", "x") f.write(waveform) f.close()
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ # only allow print-outs if execution has no repetitions allow_print = args['repetitions'] == 1 # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) d = prepare_dataset(args['dataset_choice'], cls_target, args['batch_size']) print('\n\tTask: Classify «{}» using «{}»\n'.format( cls_str, d['data_str'])) print_dataset_info(d) # build and train inputs = Input(shape=(7810, )) models = [ build_model(i, d['num_classes'], inputs=inputs) for i in range(args['num_models']) ] # combine outputs of all models y = Average()([m.outputs[0] for m in models]) model = Model(inputs, outputs=y, name='multiple') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) if allow_print: model.summary() print('') plot_model(model, to_file='img/multiple_mlp.png') model.fit(d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], verbose=1, class_weight=d['class_weights']) # evaluation model print('Evaluate ...') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) # predict on testset and calculate classification report and confusion matrix for diagnosis print('Test ...') pred = model.predict(d['test_data'], steps=d['test_steps']) if allow_print: diagnose_output(d['test_labels'], pred.argmax(axis=1), d['classes_trans']) return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
def main(): if not os.path.exists(train_label_path): print('loading training labels...') train_label_file = "data/dev_label.txt" if args.task == 1 else "data/train_label.txt" train_label = read_label_from_file(train_label_file, frame_size=frame_size, frame_shift=frame_shift) save_json(train_label, train_label_path) else: print('lazy loading training labels...') train_label = read_json(train_label_path) features_train, target_train = sklearn_dataset( train_label, task=args.task, mode='train', frame_size=frame_size, frame_shift=frame_shift, features_path=train_features_path, target_path=train_target_path) '''optional''' # from sklearn.manifold import TSNE # import matplotlib # matplotlib.use('Agg') # import matplotlib.pyplot as plt # X_embedded = TSNE(n_components=2).fit_transform(features_train[0::500,:]) # plt.scatter(X_embedded[:,0], X_embedded[:,1],c=target_train[0::500]) # plt.savefig('vis.png') '''optional''' if args.task == 2: if not os.path.exists(val_label_path): print('loading validation labels...') val_label_file = "data/dev_label.txt" val_label = read_label_from_file(val_label_file, frame_size=frame_size, frame_shift=frame_shift) save_json(val_label, val_label_path) else: print('lazy loading validation labels...') val_label = read_json(val_label_path) features_val, target_val = sklearn_dataset( val_label, task=args.task, mode='val', frame_size=frame_size, frame_shift=frame_shift, features_path=val_features_path, target_path=val_target_path) else: features_val, target_val = None, None m = build_model(args) exp(m, features_train, target_train, features_val, target_val, exp_id)
def main(): # parsing arguments parser = argparse.ArgumentParser(description='antispoofing training') parser.add_argument('--draw_graph', default=False, type=bool, required=False, help='whether or not to draw graphics') parser.add_argument('--GPU', default=0, type=int, required=False, help='specify which GPU to use') parser.add_argument('--config', type=str, default=None, required=True, help='path to configuration file') parser.add_argument('--device', type=str, default='cuda', help='if you want to eval model on cpu, pass "cpu" param') args = parser.parse_args() # reading config and manage device path_to_config = args.config config = read_py_config(path_to_config) device = args.device + f':{args.GPU}' if args.device == 'cuda' else 'cpu' # building model model = build_model(config, device, strict=True, mode='eval') model.to(device) if config.data_parallel.use_parallel: model = nn.DataParallel(model, **config.data_parallel.parallel_params) # load snapshot path_to_experiment = os.path.join(config.checkpoint.experiment_path, config.checkpoint.snapshot_name) epoch_of_checkpoint = load_checkpoint(path_to_experiment, model, map_location=device, optimizer=None) # preprocessing, making dataset and loader normalize = A.Normalize(**config.img_norm_cfg) test_transform = A.Compose([ A.Resize(**config.resize, interpolation=cv.INTER_CUBIC), normalize ]) test_transform = Transform(val=test_transform) test_dataset = make_dataset(config, val_transform=test_transform, mode='eval') test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=True, num_workers=2) # computing metrics auc_, eer, accur, apcer, bpcer, acer, fpr, tpr = evaluate(model, test_loader, config, device, compute_accuracy=True) print((f'eer = {round(eer*100,2)}\n' + f'accuracy on test data = {round(np.mean(accur),3)}\n' + f'auc = {round(auc_,3)}\n' + f'apcer = {round(apcer*100,2)}\n' + f'bpcer = {round(bpcer*100,2)}\n' + f'acer = {round(acer*100,2)}\n' + f'checkpoint made on {epoch_of_checkpoint} epoch')) # draw graphics if needed if args.draw_graph: fnr = 1 - tpr plot_roc_curve(fpr, tpr, config) det_curve(fpr, fnr, eer, config)
def forecast_plot(tickername, steps): data = fetch_data(tickername).reset_index() data['Type'] = "HISTORICAL" model = build_model(tickername) fcast = model.forecast(int(steps)) new_series = pd.date_range(data['Date'].iloc[-1], periods=int(steps)) fcast_df = pd.DataFrame({'Date': new_series, 'Close': fcast[0], 'Type': "FORECAST"}) final_df = pd.concat([data[['Date', 'Close', 'Type']], fcast_df]) fig = px.line(final_df, x='Date', y='Close', color='Type') return fig.to_html()
def main(): set_random_seed(C.seed) summary_writer = SummaryWriter(C.log_dpath) train_iter, val_iter, test_iter, vocab = build_loaders(C) model = build_model(C, vocab) print("#params: ", count_parameters(model)) model = model.cuda() optimizer = torch.optim.Adamax(model.parameters(), lr=C.lr, weight_decay=1e-5) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, C.epochs, eta_min=0, last_epoch=-1) best_val_scores = {'CIDEr': -1.} for e in range(1, C.epochs + 1): print() ckpt_fpath = C.ckpt_fpath_tpl.format(e) """ Train """ teacher_forcing_ratio = get_teacher_forcing_ratio( C.decoder.max_teacher_forcing_ratio, C.decoder.min_teacher_forcing_ratio, e, C.epochs) train_loss = train(e, model, optimizer, train_iter, vocab, teacher_forcing_ratio, C.CA_lambda, C.gradient_clip) log_train(C, summary_writer, e, train_loss, get_lr(optimizer), teacher_forcing_ratio) lr_scheduler.step() """ Validation """ val_loss = evaluate(model, val_iter, vocab, C.CA_lambda) val_scores, _, _, _ = score(model, val_iter, vocab) log_val(C, summary_writer, e, val_loss, val_scores) if val_scores['CIDEr'] > best_val_scores['CIDEr']: best_val_scores = val_scores best_epoch = e best_model = model print("Saving checkpoint at epoch={} to {}".format(e, ckpt_fpath)) save_checkpoint(ckpt_fpath, e, model, optimizer) """ Test """ test_scores, _, _, _ = score(best_model, test_iter, vocab) for metric in C.metrics: summary_writer.add_scalar("BEST SCORE/{}".format(metric), test_scores[metric], best_epoch) best_ckpt_fpath = C.ckpt_fpath_tpl.format("best") save_checkpoint(best_ckpt_fpath, best_epoch, best_model, optimizer)
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ # only allow print-outs if execution has no repetitions allow_print = args['repetitions'] == 1 # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) d = prepare_dataset(args['dataset_choice'], cls_target, args['batch_size'], args['norm_choice']) print('\n\tTask: Classify «{}» using «{}»\n'.format( cls_str, d['data_str'])) print_dataset_info(d) model = build_model(0, d['num_classes'], name='baseline_mlp', new_input=True) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) if allow_print: model.summary() print('') # callback to log data for TensorBoard # tb_callback = TensorBoard(log_dir='./results', histogram_freq=0, write_graph=True, write_images=True) # train and evaluate model.fit( d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], # callbacks=[tb_callback], verbose=1, class_weight=d['class_weights']) model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) # predict on testset and calculate classification report and confusion matrix for diagnosis pred = model.predict(d['test_data'], steps=d['test_steps']) if allow_print: diagnose_output(d['test_labels'], pred.argmax(axis=1), d['classes_trans']) return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
def main(): """Prepares data for the antispoofing recognition demo""" parser = argparse.ArgumentParser(description='antispoofing recognition live demo script') parser.add_argument('--video', type=str, default=None, help='Input video') parser.add_argument('--cam_id', type=int, default=-1, help='Input cam') parser.add_argument('--config', type=str, default=None, required=False, help='Configuration file') parser.add_argument('--fd_model', type=str, required=True) parser.add_argument('--fd_thresh', type=float, default=0.6, help='Threshold for FD') parser.add_argument('--spoof_thresh', type=float, default=0.4, help='Threshold for predicting spoof/real. The lower the more model oriented on spoofs') parser.add_argument('--spf_model', type=str, default=None, help='path to .pth checkpoint of model or .xml IR OpenVINO model', required=True) parser.add_argument('--device', type=str, default='CPU') parser.add_argument('--GPU', type=int, default=0, help='specify which GPU to use') parser.add_argument('-l', '--cpu_extension', help='MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels ' 'impl.', type=str, default=None) parser.add_argument('--write_video', type=bool, default=False, help='if you set this arg to True, the video of the demo will be recoreded') args = parser.parse_args() device = args.device + f':{args.GPU}' if args.device == 'cuda' else 'cpu' write_video = args.write_video if args.cam_id >= 0: log.info('Reading from cam {}'.format(args.cam_id)) cap = cv.VideoCapture(args.cam_id) cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720) cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG')) else: assert args.video log.info('Reading from {}'.format(args.video)) cap = cv.VideoCapture(args.video) cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG')) assert cap.isOpened() face_detector = FaceDetector(args.fd_model, args.fd_thresh, args.device, args.cpu_extension) if args.spf_model.endswith('pth.tar'): if not args.config: raise ValueError('You should pass config file to work with a Pytorch model') config = utils.read_py_config(args.config) spoof_model = utils.build_model(config, args, strict=True, mode='eval') spoof_model = TorchCNN(spoof_model, args.spf_model, config, device=device) else: assert args.spf_model.endswith('.xml') spoof_model = VectorCNN(args.spf_model) # running demo run(args, cap, face_detector, spoof_model, write_video)
def train_process(config): start = time.time() # 准备训练资料 train_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'training') train_loader = data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) train_iter = infinit_iter(train_loader) # 准备检验资料 val_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'validation') val_loader = data.DataLoader(val_dataset, batch_size=1) # 构建模型 model, optimizer = build_model(config, train_dataset.en_vocab_size, train_dataset.cn_vocab_size) loss_function = nn.CrossEntropyLoss( ignore_index=0) # ??? 是 ignore bias 的 grad 吗 # 训练过程 train_loss, val_losses, bleu_scores = [], [], [] total_steps = 0 while total_steps < config.num_steps: # 训练模型 model, optimizer, losses = train(model, optimizer, train_iter, loss_function, total_steps, config.summary_steps) train_loss += losses # 检验模型 val_loss, bleu_score, result = test(model, val_loader, loss_function) val_losses.append(val_loss) bleu_scores.append(bleu_score) total_steps += config.summary_steps print( '\r', 'val [{}] loss {:.3f}, Perplexity: {:.3f}, bleu score: {:.3f}, used {} seconds ' .format(total_steps, val_loss, np.exp(val_loss), bleu_score, int(time.time() - start))) # 储存模型和结果 if total_steps % config.store_steps == 0 or total_steps >= config.num_steps: save_model(model, config.store_model_path, total_steps) with open(f'{config.store_model_path}/output_{total_steps}.txt', 'w') as f: for l in result: print(l, file=f) return train_loss, val_losses, bleu_scores
def forecast_plot(tickername, steps): data = fetch_data(tickername).reset_index() data['Type'] = 'HISTORICAL' model = build_model(tickername) fcast = model.forecast(int(steps)) new_series = pd.date_range(data['Date'].iloc[-1], periods=int(steps)) fcast_df = pd.DataFrame({ 'Date': new_series, 'Close': fcast[0], 'Type': 'FORECAST' }) final_df = pd.concat([data[['Date', 'Close', 'Type']], fcast_df]) fig = px.line(final_df, x='Date', y='Close', color='Type') # fig = go.Figure([go.Scatter(x=data['Date'], y=data['Close'])]) # fig.add_trace(go.Scatter(x=fcast['Date'], y=fcast[0])) return fig.to_html()
def main(): """Main workflow""" args = utils.build_args(argparse.ArgumentParser()) utils.init_logger(args.model_file) assert torch.cuda.is_available() torch.cuda.set_device(args.gpuid) utils.init_random(args.seed) utils.set_params(args) logger.info("Config:\n%s", pformat(vars(args))) fields = utils.build_fields() logger.info("Fields: %s", fields.keys()) logger.info("Load %s", args.train_file) train_data = LMDataset(fields, args.train_file, args.sent_length_trunc) logger.info("Training sentences: %d", len(train_data)) logger.info("Load %s", args.valid_file) val_data = LMDataset(fields, args.valid_file, args.sent_length_trunc) logger.info("Validation sentences: %d", len(val_data)) fields["sent"].build_vocab(train_data) train_iter = utils.build_dataset_iter(train_data, args) val_iter = utils.build_dataset_iter(val_data, args, train=False) if args.resume and os.path.isfile(args.checkpoint_file): logger.info("Resume training") logger.info("Load checkpoint %s", args.checkpoint_file) checkpoint = torch.load(args.checkpoint_file, map_location=lambda storage, loc: storage) es_stats = checkpoint["es_stats"] args = utils.set_args(args, checkpoint) else: checkpoint = None es_stats = ESStatistics(args) model = utils.build_model(fields, args, checkpoint) logger.info("Model:\n%s", model) optimizer = utils.build_optimizer(model, args, checkpoint) try_train_val(fields, model, optimizer, train_iter, val_iter, es_stats, args)
def __init__(self, train_loader, test_loader, real_loader, config): self.train_loader = train_loader self.test_loader = test_loader self.real_loader = real_loader self.z_dim = config.z_dim self.c_dim = config.c_dim self.image_size = config.image_size self.g_conv_dim = config.g_conv_dim self.d_conv_dim = config.d_conv_dim self.g_repeat_num = config.g_repeat_num self.d_repeat_num = config.d_repeat_num self.lambda_gan = config.lambda_gan self.batch_size = config.batch_size self.num_epoch = config.num_epoch self.lr_decay_start = config.lr_decay_start self.g_lr = config.g_lr self.d_lr = config.d_lr self.n_critic = config.n_critic self.resume_epoch = config.resume_epoch # Miscellaneous. self.use_tensorboard = config.use_tensorboard self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.use_numpy_fid = config.use_numpy_fid # Directories. self.log_dir = config.log_dir self.sample_dir = config.sample_dir self.model_save_dir = config.model_save_dir self.result_dir = config.result_dir self.real_incep_stat_dir = config.real_incep_stat_dir self.real_fid_stat_dir = config.real_fid_stat_dir # Step size. self.log_step = config.log_step self.sample_step = config.sample_step self.model_save_step = config.model_save_step # Build the model and tensorboard. self.KDLoss = CMPDisLoss() self.G, self.D, self.g_optimizer, self.d_optimizer = utils.build_model( config) if self.use_tensorboard: self.logger = utils.build_tensorboard(self.log_dir)
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ with open('config/datasets.yaml') as cnf: dataset_configs = yaml.safe_load(cnf) try: repo_path = dataset_configs['repo_path'] except KeyError as e: print(f'Missing dataset config key: {e}') sys.exit(1) batch_size = 64 repetitions = args['repetitions'] # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) # list of 5% increments ranging from 0% to 100% mixture_range = np.arange(0, 1.01, .05) results = np.zeros((len(mixture_range), repetitions)) for i,cut in enumerate(mixture_range): print(f'cut: {cut}') d = prepare_mixture_dataset( cls_target, args['batch_size'], mixture_pct=cut, normalisation=args['norm_choice']) # perform #repetitions per 5% dataset mixture for j in range(repetitions): model = build_model(0, d['num_classes'], name='baseline_mlp', new_input=True) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # train and evaluate model.fit( d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], verbose=0, class_weight=d['class_weights']) results[i,j] = balanced_accuracy_score(d['test_labels'], model.predict(d['test_data'](), steps=d['test_steps']).argmax(axis=1)) print(results) np.save(join(repo_path, 'data/synthetic_influence_target_{cls_target}', results))
def __init__(self, config, device, resume=False): self.config = config self.cfg_stg = config['strategy'] self.device = device self.model = utils.build_model(config['model']) self.model.to(device) self.logger = utils.create_logger(self.cfg_stg['save_path']) self.tb_logger = SummaryWriter( join(self.cfg_stg['save_path'], 'events')) self.start_epoch = 1 if resume: self.load_model() self.optimizer = utils.build_optimizer(config['strategy'], self.model, self.start_epoch)
def main(): # Load checkpoint if we resume from a previous training. if opt.train_from: print('Loading checkpoint from %s' % opt.train_from) checkpoint = torch.load(opt.train_from, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] # I don't like reassigning attributes of opt: it's not clear. opt.start_epoch = checkpoint['epoch'] + 1 elif opt.init_with: print('Loading checkpoint from %s' % opt.init_with) checkpoint = torch.load(opt.init_with, map_location=lambda storage, loc: storage) model_opt = opt elif opt.eval_with: print('Loading checkpoint from %s' % opt.eval_with) checkpoint = torch.load(opt.eval_with, map_location=lambda storage, loc: storage) model_opt = checkpoint["opt"] model_opt.eval_only = 1 else: checkpoint = None model_opt = opt for k, v in vars(model_opt).items(): print("{}: {}".format(k, v)) first_dataset = next(lazily_load_dataset("train")) data_type = first_dataset.data_type fields = load_fields(first_dataset, data_type, checkpoint) collect_report_features(fields) model = build_model(model_opt, opt, fields, checkpoint) tally_parameters(model) check_save_model_path() optim = build_optim(model, checkpoint) train_model(model, fields, optim, data_type, model_opt) if opt.tensorboard: writer.close()
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ # only allow print-outs if execution has no repetitions allow_print = args['repetitions'] == 1 # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) d = prepare_dataset( args['dataset_choice'], cls_target, args['batch_size'], args['norm_choice'], mp_heatmap=True) print('\n\tTask: Classify «{}» using «{}»\n'.format(cls_str, d['data_str'])) print_dataset_info(d) model = build_model(0, d['num_classes'], name='64shot_mlp', new_input=True) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) if allow_print: model.summary() print('') # train and evaluate model.fit( x=d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], verbose=1, class_weight=d['class_weights']) model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) # predict on testset and calculate classification report and confusion matrix for diagnosis pred = model.predict(d['test_data'], steps=d['test_steps'], verbose=1) # instead of argmax, reduce list to only on-target predictions to see how accurate the model judged each shot target_preds = [pred[i][l] for i,l in enumerate(d['test_labels'])] pred = pred.argmax(axis=1) compute_accuracy_heatmaps(d, target_preds, cls_target, args['epochs']) return balanced_accuracy_score(d['test_labels'], pred)
def _construct_model_from_theta(self, theta): # print('type of theta: {}'.format(type(theta))) theta = nn.Parameter(theta) target_net_new = utils.build_model(self.args) # .state_dict() stores all the persistent buffers (e.g. running averages), which are not included in .parameters() model_dict = self.target_net.state_dict() params, offset = {}, 0 for k, v in self.target_net.named_parameters(): v_length = np.prod(v.size()) params[k] = theta[offset:offset + v_length].view(v.size()) # print('type of params[k]: {}'.format(type(params[k]))) offset += v_length assert offset == len(theta) model_dict.update(params) target_net_new.load_state_dict(model_dict) return target_net_new.cuda()
def test_process(config): # 准备测试资料 test_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'testing') test_loader = data.DataLoader(test_dataset, batch_size=1) # 建构模型 model, optimizer = build_model(config, test_dataset.en_vocab_size, test_dataset.cn_vocab_size) print('Finish build model') loss_function = nn.CrossEntropyLoss(ignore_index=0) model.eval() # 测试模型 test_loss, bleu_score, result = test(model, test_loader, loss_function) # 储存结果 with open(f'./test_output.txt', 'w') as f: for line in result: print(line, file=f) return test_loss, bleu_score
def main(_): if FLAGS.gpu == -1: device = '/cpu:0' else: device = '/gpu:{}'.format(FLAGS.gpu) with tf.device(device): tf.random.set_seed(1234) # Load the dataset and process features and adj matrix print('Loading {} dataset...'.format(FLAGS.dataset)) adj, features, labels, idx_train, idx_val, idx_test = load_dataset( FLAGS.dataset) num_classes = max(labels) + 1 print('Build model...') model = build_model(FLAGS.model, FLAGS.num_layers, FLAGS.hidden_dim, num_classes, FLAGS.dropout_rate) print('Start Training...') train(model, adj, features, labels, idx_train, idx_val, idx_test)
def run(corpus, ckpt_fpath): C.corpus = corpus if corpus == 'MSVD': C.loader = MSVDLoaderConfig elif corpus == 'MSR-VTT': C.loader = MSRVTTLoaderConfig else: raise NotImplementedError('Unknown corpus: {}'.format(corpus)) checkpoint = torch.load(ckpt_fpath) train_iter, val_iter, test_iter, vocab = build_loaders(C) model = build_model(C, vocab) model.load_state_dict(torch.load(ckpt_fpath)) model.cuda() model.eval() scores, _, _, _ = score(model, test_iter, vocab) print(scores)
def main(): """Prepares data for the accuracy convertation checker""" parser = argparse.ArgumentParser(description='antispoofing recognition live demo script') parser.add_argument('--config', type=str, default=None, required=True, help='Configuration file') parser.add_argument('--spf_model_openvino', type=str, default=None, help='path to .xml IR OpenVINO model', required=True) parser.add_argument('--spf_model_torch', type=str, default=None, help='path to .pth.tar checkpoint', required=True) parser.add_argument('--device', type=str, default='CPU') args = parser.parse_args() config = utils.read_py_config(args.config) assert args.spf_model_openvino.endswith('.xml') and args.spf_model_torch.endswith('.pth.tar') spoof_model_torch = utils.build_model(config, args.device.lower(), strict=True, mode='eval') spoof_model_torch = TorchCNN(spoof_model_torch, args.spf_model_torch, config, device=args.device.lower()) spoof_model_openvino = VectorCNN(args.spf_model_openvino) # running checker avg_diff = run(spoof_model_torch, spoof_model_openvino) print((f'mean difference on the first predicted class : {avg_diff[0]}\n' + f'mean difference on the second predicted class : {avg_diff[1]}'))
def main(length=40, num_epochs=20): ''' Build and train LSTM network to solve XOR problem ''' X_train, y_train, X_test, y_test = generate_samples(length=length) model = build_model() history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=32, validation_split=0.10, shuffle=False) # Evaluate model on test set preds = model.predict(X_test) preds = np.round(preds[:, 0]).astype('float32') acc = (np.sum(preds == y_test) / len(y_test)) * 100 print('Accuracy: {:.2f}%'.format(acc)) # Plotting loss and accuracy model_plot(history) return
def main(): """Runs the script.""" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Running on the: " + str(device)) args = get_parsed_arguments() #Loading the data imagedatasets, dataloader = utils.load_data(path=args.data_dir, pin_memory=args.pin_mem) #Build the model model = utils.build_model(arch=args.arch, dropout=args.dropout, con_check=args.con_check) #Train model print("Training Model...") utils.train_model(model, dataloader["training"], dataloader["validation"], epoch=args.epoch, device=args.device) #Save model print("Saving Model") utils.save_checkpoint(model = model, train_data = imagedatasets["training"], check_name=args.check_name) print("Process Complete, you can now start predicting!")
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ batch_size = 64 # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) d = prepare_dataset(args['dataset_choice'], cls_target, batch_size) print('\n\tTask: Classify «{}» using «{}»\n'.format( cls_str, d['data_str'])) print_dataset_info(d) model = build_model(0, d['num_classes'], name='baseline_mlp', new_input=True) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # train and evaluate model.fit(d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], verbose=1, class_weight=d['class_weights']) print('Evaluate ...') model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) # predict on testset and calculate classification report and confusion matrix for diagnosis print('Test ...') pred = model.predict(d['test_data'], steps=d['test_steps']) diagnose_output(d['test_labels'], pred.argmax(axis=1), d['classes_trans'])
def run_roberta(strategy: tf.distribute.TPUStrategy, x_train: np.array, x_valid: np.array, _y_train: np.array, y_valid: np.array, train_dataset: tf.data.Dataset, valid_dataset: tf.data.Dataset, test_dataset: tf.data.Dataset, max_len: int, epochs: int, batch_size: int) -> tf.keras.models.Model: """ create and run distilibert on training and testing data """ logger.info('build roberta') with strategy.scope(): transformer_layer = TFAutoModel.from_pretrained(MODEL) model = build_model(transformer_layer, max_len=max_len) model.summary() # run model train n_steps = x_train.shape[0] // batch_size history = model.fit( train_dataset, steps_per_epoch=n_steps, validation_data=valid_dataset, epochs=epochs ) plot_train_val_loss(history, 'xlm_roberta') n_steps = x_valid.shape[0] // batch_size _train_history_2 = model.fit( valid_dataset.repeat(), steps_per_epoch=n_steps, epochs=epochs ) scores = model.predict(test_dataset, verbose=1) logger.info(f"AUC: {roc_auc(scores, y_valid):.4f}") return model
ch: i for (i, ch) in enumerate(sorted(list(set(train_data + test_data)))) } idx_to_char = {i: ch for (ch, i) in char_to_idx.items()} vocab_size = len(char_to_idx) with open('../data/github_test_chars', 'r') as f: project_seed = pickle.load(f) initial_seed = ''.join(project_seed) initial_seed = initial_seed.replace('\x1b', '\x0a') missingKeys = set(initial_seed) - set(char_to_idx) print 'Working on %d characters (%d unique).' % (len(train_data + test_data), vocab_size) model = build_model(True, 1024, 1, 1, 3, vocab_size) model.load_weights(path) model.reset_states() start_time = time.time() for c in [char_to_idx[c] for c in initial_seed]: batch = np.zeros((1, 1, vocab_size)) batch[0, 0, c] = 1 model.predict_on_batch(batch) print("--- %s seconds ---" % (time.time() - start_time)) sampled = [char_to_idx[c] for c in seed] for c in seed: batch = np.zeros((1, 1, vocab_size)) batch[0, 0, char_to_idx[c]] = 1
def main(): args = active_args.get_arg_parser().parse_args() # determine device device = 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu' print("using device {} ...".format(device)) model_type = 'bilstm_crf' if args.train_bi_lstm else 'elmo_bilstm_crf' model_type = 'dictionary' if args.train_dictionary else model_type model_type = 'cached' if args.train_cached else model_type model_type = 'phrase_dictionary' if args.train_phrase_dictionary else model_type out = dataset_utils.load_dataset(args, force_load=True) train_dataset, valid_dataset, train_vocab, output_categories = out if args.binary_classifier: b_class = args.binary_classifier print('converting to a binary problem for class: {}'.format(b_class)) output_categories = BinaryVocab(output_categories, select_class=b_class) # phrase: 69 F1 Drug 791 examples # phrase: 58 F1 ADR 791 examples # word: 69 F1 Drug 791 examples # word: 59 F1 ADR 791 examples # build unlabeled corpus unlabeled_corpus = conlldataloader.ConllDataSetUnlabeled(train_dataset) model = utils.build_model( model_type=model_type, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, batch_size=args.batch_size, vocab=train_vocab, tag_vocab=output_categories, ).to(device) if model_type == 'cached': model.embedder.cache_dataset(unlabeled_corpus, verbose=True, device=device) # created a simulated oracle with all the ground truth values sim_oracle = oracle.SimulatedOracle(train_dataset) # heuristic if args.heuristic == constants.ACTIVE_LEARNING_RANDOM_H: h = active_heuristic.Random(train_vocab, output_categories) elif args.heuristic == constants.ACTIVE_LEARNING_UNCERTAINTY_H: h = active_heuristic.Uncertantiy(train_vocab, output_categories) elif args.heuristic == constants.ACTIVE_LEARNING_KNN: h = active_heuristic.KNNEmbeddings(train_vocab, output_categories) h.prepare( model=model, dataset=unlabeled_corpus, device=device, ) else: raise Exception("Unknown heurisitc: {}".format(args.heuristic)) active_train( log_dir=args.log_dir, model=model, model_path=args.model_path, unlabeled_dataset=unlabeled_corpus, test_dataset=valid_dataset, # active learning parameters iterations=args.iterations, heuritic=h, oracle=sim_oracle, sample_size=args.sample_size, sampling_strategy=args.sampling_strategy, # train parameters vocab=train_vocab, tag_vocab=output_categories, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, num_epochs=args.num_epochs, learning_rate=args.learning_rate, weight_decay=args.weight_decay, momentum=args.momentum, optimizer_type=args.optimizer_type, # Other parameters device=device, summary_file=args.summary_file, )
def classify(**args): """ Main method that prepares dataset, builds model, executes training and displays results. :param args: keyword arguments passed from cli parser """ # only allow print-outs if execution has no repetitions allow_print = args['repetitions'] == 1 # determine classification targets and parameters to construct datasets properly cls_target, cls_str = set_classification_targets(args['cls_choice']) d = prepare_dataset(0, cls_target, args['batch_size'], args['norm_choice']) print('\n\tTask: Classify «{}» using «{}»'.format(cls_str, d['data_str'])) print_dataset_info(d) model = build_model(0, d['num_classes'], name='baseline_mlp', new_input=True) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # train and evaluate - pre-transfer model.fit(d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'], verbose=1, class_weight=d['class_weights']) print('Evaluate ...') model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) del d d = prepare_dataset( 1, # HH12 cls_target, args['batch_size'], args['norm_choice']) print_dataset_info(d) # make layers untrainable and remove classification layer, then train new last layer on handheld data for l in model.layers[:-1]: l.trainable = False if allow_print: plot_model(model, to_file='img/transfer_mlp_pre.png') new_layer = Dense(d['num_classes'], activation='softmax', name='dense_transfer')(model.layers[-2].output) model = Model(inputs=model.inputs, outputs=new_layer, name='transfer_model') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) if allow_print: model.summary() print('') plot_model(model, to_file='img/transfer_mlp_post.png') # train and evaluate - post-transfer model.fit(d['train_data'], steps_per_epoch=d['train_steps'], epochs=args['epochs'] * 2, verbose=1, class_weight=d['class_weights']) print('Evaluate ...') model.evaluate(d['eval_data'], steps=d['test_steps'], verbose=1) # predict on testset and calculate classification report and confusion matrix for diagnosis print('Test ...') pred = model.predict(d['test_data'], steps=d['test_steps']) diagnose_output( d['test_labels'], pred.argmax(axis=1), d['classes_trans'], show=False, file_name= f'heatmap_transfer_{datetime.now().hour}_{datetime.now().minute}') return balanced_accuracy_score(d['test_labels'], pred.argmax(axis=1))
"delay_dur": 100, "resp_dur": 25, "kappa": 2.0, "spon_rate": 0.1, "tr_max_iter": 25001, "test_max_iter": 2501 } # Build task generators generator, test_generator = build_generators(ExptDict) # Define the input and expected output variable input_var, target_var = T.tensor3s('input', 'target') # Build the model l_out, l_rec = build_model(input_var, ExptDict) # The generated output variable and the loss function if ExptDict["task"]["task_id"] in ['DE1', 'DE2', 'GDE2', 'VDE1', 'SINE']: pred_var = lasagne.layers.get_output(l_out) elif ExptDict["task"]["task_id"] in [ 'CD1', 'CD2', 'Harvey2012', 'Harvey2012Dynamic', 'Harvey2016', 'COMP' ]: pred_var = T.clip(lasagne.layers.get_output(l_out), 1e-6, 1.0 - 1e-6) # Build loss rec_act = lasagne.layers.get_output(l_rec) l2_penalty = T.mean( lasagne.objectives.squared_error(rec_act[:, -5:, :], 0.0)) * 1e-4 l2_params = regularize_network_params(l_out, l2, tags={'trainable': True})
splitPoint = int(np.ceil(len(minified_data) * 0.95)) train_data = ''.join(minified_data[:splitPoint]) test_data = ''.join(minified_data[splitPoint:]) char_to_idx = {ch: i for (i, ch) in enumerate(sorted(list(set(train_data + test_data))))} idx_to_char = {i: ch for (ch, i) in char_to_idx.items()} vocab_size = len(char_to_idx) with open('../data/github_test_chars', 'r') as f: project_seed = pickle.load(f) initial_seed = ''.join(project_seed) initial_seed = initial_seed.replace('\x1b', '\x0a') missingKeys = set(initial_seed) - set(char_to_idx) print 'Working on %d characters (%d unique).' % (len(train_data + test_data), vocab_size) model = build_model(True, 1024, 1, 1, 3, vocab_size) model.load_weights(path) model.reset_states() start_time = time.time() for c in [char_to_idx[c] for c in initial_seed]: batch = np.zeros((1, 1, vocab_size)) batch[0, 0, c] = 1 model.predict_on_batch(batch) print("--- %s seconds ---" % (time.time() - start_time)) sampled = [char_to_idx[c] for c in seed] for c in seed: batch = np.zeros((1, 1, vocab_size)) batch[0, 0, char_to_idx[c]] = 1
def main(reps, pretrained_w_path, do_module1, init_seed=0, load_t=0, num_epochs=200, batchsize=96, fine_tune=0, patience=500, lr_init = 1e-3, optim='adagrad', toy=0, num_classes=23): res_root = '/home/hoa/Desktop/projects/resources' X_path=osp.join(res_root, 'datasets/msrcv2/Xaug_b01c.npy') Y_path=osp.join(res_root, 'datasets/msrcv2/Y.npy') MEAN_IMG_PATH=osp.join(res_root, 'models/ilsvrc_2012_mean.npy') snapshot=50 # save model after every `snapshot` epochs drop_p=0.5 # drop out prob. lambda2=0.0005/2 # l2-regularizer constant # step=patience/4 # decay learning after every `step` epochs lr_patience=60 # for learning rate schedule, if optim=='momentum' if toy: # unit testing num_epochs=10 data_multi=3 reps = 2 #drop_p=0 #lambda2=0 # Create name tag for the experiment if fine_tune: full_or_tune = 'tune' # description tag for storing associated files else: full_or_tune = 'full' time_stamp=time.strftime("%y%m%d%H%M%S", time.localtime()) snapshot_root = '../snapshot_models/' snapshot_name = str(num_classes)+'alex'+time_stamp+full_or_tune # LOADING DATA print 'LOADING DATA ...' X = np.load(X_path) Y = np.load(Y_path) if X.shape[1]!=3: X = b01c_to_bc01(X) N = len(Y) print 'Raw X,Y shape', X.shape, Y.shape if len(X) != len(Y): print 'Inconsistent number of input images and labels. X is possibly augmented.' MEAN_IMG = np.load(MEAN_IMG_PATH) MEAN_IMG_227 = skimage.transform.resize( np.swapaxes(np.swapaxes(MEAN_IMG,0,1),1,2), (227,227), mode='nearest', preserve_range=True) MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227,1,2),0,1).reshape((1,3,227,227)) all_metrics = [] # store metrics in each run time_profiles = { 'train_module1': [], 'train_module1_eff': [], 'train_module2': [], 'test': [] } # record training and testing time # PREPARE THEANO EXPRESSION FOR BOTH MODULES print 'COMPILING THEANO EXPRESSION ...' input_var = T.tensor4('inputs') target_var = T.imatrix('targets') network = build_model(num_classes=num_classes, input_var=input_var) # Create a loss expression for training prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_crossentropy(prediction, target_var) weights = lasagne.layers.get_all_params(network, regularizable=True) l2reg = theano.shared(floatX(lambda2))*T.sum([T.sum(w ** 2) for w in weights]) loss = loss.mean() + l2reg lr = theano.shared(np.array(lr_init, dtype=theano.config.floatX)) lr_decay = np.array(1./3, dtype=theano.config.floatX) # Create update expressions for training params = lasagne.layers.get_all_params(network, trainable=True) # last-layer case is actually very simple: # `params` above is a list of all (W,b)-pairs # Therefore last layer's (W,b) is params[-2:] if fine_tune == 7: # tuning params from fc7 to fc8 params = params[-2:] # elif fine_tune == 6: # tuning params from fc6 to fc8 # params = params[-4:] # TODO adjust for per-layer training with local_lr if optim=='momentum': updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) elif optim=='rmsprop': updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr, rho=0.9, epsilon=1e-06) elif optim=='adam': updates = lasagne.updates.adam( loss, params, learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-08) elif optim=='adagrad': updates = lasagne.updates.adagrad(loss, params, learning_rate=lr, epsilon=1e-06) # Create a loss expression for validation/testing test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() + l2reg # zero-one loss with threshold t = 0.5 for reference # zero_one_loss = T.abs_((test_prediction > theano.shared(floatX(0.5))) - target_var).sum(axis=1) #zero_one_loss /= target_var.shape[1].astype(theano.config.floatX) #zero_one_loss = zero_one_loss.mean() # Compile a function performing a backward pass (training step) on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: bwd_fn = theano.function([input_var, target_var], loss, updates=updates,) # Compile a second function performing a forward pass, # returns validation loss, 0/1 Error, score i.e. Xout: fwd_fn = theano.function([input_var, target_var], test_loss) # Create a theano function for computing score score = lasagne.layers.get_output(network, deterministic=True) score_fn = theano.function([input_var], score) def compute_score(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros(Y.shape) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False) if len(flip_idx)>1: inputs[flip_idx] = inputs[flip_idx,:,:,::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs)==batchsize: out[batch_id*batchsize : (batch_id+1)*batchsize] = score_fn(inputs) batch_id += 1 else: out[batch_id*batchsize : ] = score_fn(inputs) return out try: # MAIN LOOP FOR EACH RUN for seed in np.arange(reps)+init_seed: # reset learning rate lr.set_value(lr_init) print '\nRUN', seed, '...' # Split train/val/test set indicies = np.arange(len(Y)) Y_train_val, Y_test, idx_train_val, idx_test = train_test_split( Y, indicies, random_state=seed, train_size=float(2)/3) Y_train, Y_val, idx_train, idx_val = train_test_split( Y_train_val, idx_train_val, random_state=seed) print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test) idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx', N=N) Xaug_train = X[idx_aug_train] Yaug_train = data_aug(Y_train, mode='aug', isMat='Y', N=N) idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx', N=N) Xaug_val = X[idx_aug_val] Yaug_val = data_aug(Y_val, mode='aug', isMat='Y', N=N) # Module 2 training set is composed of module 1 training and validation set idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx', N=N) Xaug_train_val = X[idx_aug_train_val] Yaug_train_val = data_aug(Y_train_val, mode='aug', isMat='Y', N=N) # Test set X_test = X[idx_test] # Y_test is already returned in the first train_test_split print "Augmented train/val/test set size:",len(Xaug_train),len(Yaug_val), len(X_test) print "Augmented (X,Y) dtype:", Xaug_train.dtype, Yaug_val.dtype print "Processed Mean image:",MEAN_IMG.dtype,MEAN_IMG.shape if toy: # try to overfit a tiny subset of the data Xaug_train = Xaug_train[:batchsize*data_multi + batchsize/2] Yaug_train = Yaug_train[:batchsize*data_multi + batchsize/2] Xaug_val = Xaug_val[:batchsize + batchsize/2] Yaug_val = Yaug_val[:batchsize + batchsize/2] # Init by pre-trained weights, if any if len(pretrained_w_path)>0: layer_list = lasagne.layers.get_all_layers(network) # 22 layers if pretrained_w_path.endswith('pkl'): # load reference_net # use case: weights initialized from pre-trained reference nets f = open(pretrained_w_path, 'r') w_list = pickle.load(f) # list of 11 (W,b)-pairs f.close() lasagne.layers.set_all_param_values(layer_list[-3], w_list[:-2]) # exclude (W,b) of fc8 # BIG NOTE: don't be confused, it's pure coincident that layer_list # and w_list have the same index here. The last element of layer_list are # [.., fc6, drop6, fc7, drop7, fc8], while w_list are # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to # params that are associated with fc7 i.e. params that connect drop6 to fc7 elif pretrained_w_path.endswith('npz'): # load self-trained net # use case: continue training from a snapshot model with np.load(pretrained_w_path) as f: # NOTE: only load snapshot of the same `seed` # w_list = [f['arr_%d' % i] for i in range(len(f.files))] w_list = [f.items()['arr_%d' % i] for i in range(len(f.files))] # load from bkviz, one-time use lasagne.layers.set_all_param_values(network, w_list) elif pretrained_w_path.endswith('/'): # init from 1 of the 30 snapshots from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) # START MODULE 1 module1_time = 0 if do_module1: print 'MODULE 1' training_history={} training_history['iter_training_loss'] = [] training_history['iter_validation_loss'] = [] training_history['training_loss'] = [] training_history['validation_loss'] = [] training_history['learning_rate'] = [] # http://deeplearning.net/tutorial/gettingstarted.html#early-stopping # early-stopping parameters n_train_batches = Xaug_train.shape[0] / batchsize if Xaug_train.shape[0] % batchsize != 0: n_train_batches += 1 patience = patience # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found lr_patience_increase = 1.01 improvement_threshold = 0.995 # a relative improvement of this much is # considered significant; a significant test # MIGHT be better validation_frequency = min(n_train_batches, patience/2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None epoch_validation_loss = 0 # indicates that valid_loss has not been computed yet best_validation_loss = np.inf best_iter = -1 lr_iter = -1 test_score = 0. start_time = time.time() done_looping = False epoch = 0 # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: print("\nEpoch\tTrain Loss\tValid Loss\tBest-ValLoss-and-Iter\tTime\tL.Rate") sys.setrecursionlimit(10000) try: # Early-stopping implementation while (not done_looping) and (epoch<num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(Xaug_train, Yaug_train, batchsize, shuffle=True): inputs, targets = batch # Horizontal flip half of the images bs = inputs.shape[0] indices = np.random.choice(bs, bs / 2, replace=False) inputs[indices] = inputs[indices, :, :, ::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead train_err_batch = bwd_fn(inputs, targets) train_err += train_err_batch train_batches += 1 iter_now = epoch*n_train_batches + train_batches training_history['iter_training_loss'].append(train_err_batch) training_history['iter_validation_loss'].append(epoch_validation_loss) if (iter_now+1) % validation_frequency == 0: # a full pass over the validation data: val_err = 0 #zero_one_err = 0 val_batches = 0 for batch in iterate_minibatches(Xaug_val, Yaug_val, batchsize, shuffle=False): inputs, targets = batch # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead val_err_batch = fwd_fn(inputs, targets) val_err += val_err_batch val_batches += 1 epoch_validation_loss = val_err / val_batches if epoch_validation_loss < best_validation_loss: if epoch_validation_loss < best_validation_loss*improvement_threshold: patience = max(patience, iter_now * patience_increase) # lr_patience *= lr_patience_increase best_params = lasagne.layers.get_all_param_values(network) best_validation_loss = epoch_validation_loss best_iter = iter_now lr_iter = best_iter else: # decay learning rate if optim=='momentum' if optim=='momentum' and (iter_now - lr_iter) > lr_patience: lr.set_value(lr.get_value() * lr_decay) lr_iter = iter_now if patience <= iter_now: done_looping = True break # Record training history training_history['training_loss'].append(train_err / train_batches) training_history['validation_loss'].append(epoch_validation_loss) training_history['learning_rate'].append(lr.get_value()) epoch_time = time.time() - start_time module1_time += epoch_time # Then we print the results for this epoch: print("{}\t{:.6f}\t{:.6f}\t{:.6f}\t{}\t{:.3f}\t{}".format( epoch+1, training_history['training_loss'][-1], training_history['validation_loss'][-1], best_validation_loss, best_iter+1, epoch_time, training_history['learning_rate'][-1] )) if (epoch+1)%snapshot==0: # TODO try to save weights at best_iter snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) try: # use case: terminate experiment before reaching `reps` np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass epoch += 1 except KeyboardInterrupt, MemoryError: # Sadly this can only catch KeyboardInterrupt pass print 'Training finished or KeyboardInterrupt (Training is never finished, only abandoned)' module1_time_eff = module1_time / iter_now * best_iter print('Total and Effective training time are {:.0f} and {:.0f}').format( module1_time, module1_time_eff) time_profiles['train_module1'].append(module1_time) time_profiles['train_module1_eff'].append(module1_time_eff) # Save model after num_epochs or KeyboardInterrupt if (epoch+1)%snapshot!=0: # to avoid duplicate save snapshot_path_string = snapshot_root+snapshot_name+str(seed)+'_'+str(iter_now+1) if not toy: try: # use case: terminate experiment before reaching `reps` print 'Saving model...' np.savez(snapshot_path_string+'.npz', *best_params) np.savez(snapshot_path_string+'_history.npz', training_history) plot_loss(training_history, snapshot_path_string+'_loss.png') # plot_conv_weights(lasagne.layers.get_all_layers(network)[1], # snapshot_path_string+'_conv1weights_') except KeyboardInterrupt, TypeError: print 'Did not save', snapshot_name+str(seed)+'_'+str(iter_now+1) pass # And load them again later on like this: #with np.load('../snapshot_models/23alex16042023213910.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # or # training_history = f['arr_0'].items() # lasagne.layers.set_all_param_values(network, param_values) # END OF MODULE 1 # START MODULE 2 print '\nMODULE 2' if not do_module1: if pretrained_w_path.endswith('pkl'): snapshot_name = str(num_classes)+'alexOTS' # short for "off-the-shelf init" elif pretrained_w_path.endswith('npz'): # Resume from a SINGLE snapshot # extract name pattern, e.g. '23alex16042023213910full10' # from string '../snapshot_models/23alex16042023213910full10_100.npz' import re regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+" match = re.search(regex, pretrained_w_path) snapshot_name = match.group(0) elif pretrained_w_path.endswith('/'): # RESUMED FROM TRAINED MODULE 1 (ONE-TIME USE) from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) print snapshot_name with np.load(osp.join(pretrained_w_path,snapshot_name)+'.npz') as f: w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) else: # MAIN BRANCH - assume do_module1 is True AND have run `snapshot` epochs if (epoch+1)>snapshot: with np.load(snapshot_path_string+'.npz') as f: # reload the best params for module 1 w_list = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, w_list) score_train = compute_score(Xaug_train_val, Yaug_train_val) start_time = time.time() if load_t: # Server failed at the wrong time. We only have t backed-up if pretrained_w_path.endswith('/'): from os import listdir import re files = [f for f in listdir(pretrained_w_path) if osp.isfile(osp.join(pretrained_w_path, f))] for file_name in files: regex_seed = 'full%d_' %seed match_seed = re.search(regex_seed, file_name) if match_seed: regex = r"\d+[a-zA-Z]+\d+[a-zA-Z]+\d+\_\d+" match = re.search(regex, file_name) snapshot_name = match.group(0) t_train = np.load(osp.join('t','{0}.npy'.format(snapshot_name))) else: # MAIN BRANCH thresholds = Threshold(score_train, Yaug_train_val) thresholds.find_t_for() # determine t_train for each score_train. It will take a while t_train = np.asarray(thresholds.t) print 't_train is in ', t_train.min(), '..', t_train.max() # `thresholds` holds t_train vector in .t attribute print('t_train produced in {:.3f}s').format(time.time()-start_time) np.save('t/'+snapshot_name+str(seed)+'.npy', t_train) # Predictive model for t regr = linear_model.RidgeCV(cv=5) # Ridge() is LinearClassifier() with L2-reg regr.fit(score_train, t_train) time_profiles['train_module2'].append(time.time()-start_time) # END OF MODULE 2 # TESTING PHASE start_time = time.time() score_test = compute_score(X_test, Y_test) t_test = regr.predict(score_test) print 'original t_test is in ', min(t_test), '..', max(t_test) t_test[t_test>1] = max(t_test[t_test<1]) t_test[t_test<0] = min(t_test[t_test>0]) # ! Keep t_test in [0,1] print 'corrected t_test is in ', min(t_test), '..', max(t_test) # Predict label metrics = predict_label(score_test, Y_test, t_test, seed, num_classes, verbose=1) time_profiles['test'].append(time.time()-start_time) all_metrics.append(metrics)
def main(reps, pretrained_w_path, batchsize, init_seed=0, verbose=1, num_classes=374, mode='ots', load_t=0, save_clf=1): res_root = '/home/hoa/Desktop/projects/resources' X_path=osp.join(res_root, 'datasets/corel5k/X_train_rgb.npy') Y_path=osp.join(res_root, 'datasets/corel5k/Y_train.npy') MEAN_IMG_PATH=osp.join(res_root, 'models/ilsvrc_2012_mean.npy') # baseline_msrcv2_net = build_model(pretrained_w_path, num_classes) ### LOADING DATA print 'LOADING DATA ...' X = np.load(X_path) Y = np.load(Y_path) N = len(Y) print 'Raw X,Y shape', X.shape, Y.shape if len(X) != len(Y): print 'Inconsistent number of input images and labels. X is possibly augmented.' MEAN_IMG = np.load(MEAN_IMG_PATH) MEAN_IMG_227 = skimage.transform.resize( np.swapaxes(np.swapaxes(MEAN_IMG,0,1),1,2), (227,227), mode='nearest', preserve_range=True) MEAN_IMG = np.swapaxes(np.swapaxes(MEAN_IMG_227,1,2),0,1).reshape((1,3,227,227)) # Prepare Theano variables for inputs input_var = T.tensor4('inputs') network = build_model(num_classes=num_classes, input_var=input_var) layer_list = lasagne.layers.get_all_layers(network) # 22 layers features = lasagne.layers.get_output(layer_list[-3], # get 'fc7' in network deterministic=True) feat_fn = theano.function([input_var], features) def compute_feature(X, Y, batchsize=batchsize, shuffle=False): out = np.zeros((len(Y), 4096)) batch_id = 0 for batch in iterate_minibatches(X, Y, batchsize, shuffle=False): inputs, _ = batch # Flip random half of the batch flip_idx = np.random.choice(len(inputs),size=len(inputs)/2,replace=False) if len(flip_idx)>1: inputs[flip_idx] = inputs[flip_idx,:,:,::-1] # Substract mean image inputs = (inputs - MEAN_IMG).astype(theano.config.floatX) # MEAN_IMG is broadcasted numpy-way, take note if want theano expression instead if len(inputs)==batchsize: out[batch_id*batchsize : (batch_id+1)*batchsize] = feat_fn(inputs) batch_id += 1 else: out[batch_id*batchsize : ] = feat_fn(inputs) return out all_metrics = [] # store all evaluation metrics for seed in np.arange(reps)+init_seed: print '\nRUN', seed, '...' # Split train/val/test set # indicies = np.arange(len(Y)) # Y_train_val, Y_test, idx_train_val, idx_test = train_test_split( # Y, indicies, random_state=seed, train_size=float(2)/3) # # Y_train, Y_val, idx_train, idx_val = train_test_split( # Y_train_val, idx_train_val, random_state=seed) # print "Train/val/test set size:",len(idx_train),len(idx_val),len(idx_test) # idx_aug_train = data_aug(idx_train, mode='aug', isMat='idx') # Xaug_train = X[idx_aug_train] # Yaug_train = data_aug(Y_train, mode='aug', isMat='Y') # idx_aug_val = data_aug(idx_val, mode='aug', isMat='idx') # Xaug_val = X[idx_aug_val] # Yaug_val = data_aug(Y_val, mode='aug', isMat='Y') # Module 2 training set is composed of module 1 training and validation set idx_train_val = np.arange(len(Y)) # idx_aug_train_val = data_aug(idx_train_val, mode='aug', isMat='idx') # Xaug_train_val = X[idx_aug_train_val] # Yaug_train_val = data_aug(Y, mode='aug', isMat='Y') Xaug_train_val = data_aug(X, mode='noaug', isMat='X', N=N) if Xaug_train_val.shape[1]!=3: Xaug_train_val = b01c_to_bc01(Xaug_train_val) Yaug_train_val = Y # Test set X_test = np.load(osp.join(res_root,'datasets/corel5k/X_test_rgb.npy')) if X_test.shape[1]!=3: X_test = b01c_to_bc01(X_test) Y_test = np.load(osp.join(res_root,'datasets/corel5k/Y_test.npy')) # load reference_net f = open(pretrained_w_path, 'r') w_list = pickle.load(f) # list of 11 (W,b)-pairs f.close() # Reset init weights lasagne.layers.set_all_param_values(layer_list[-3], w_list[:-2]) # exclude (W,b) of fc8 # BIG NOTE: don't be confused, it's pure coincident that layer_list # and w_list have the same index here. The last element of layer_list are # [.., fc6, drop6, fc7, drop7, fc8], while w_list are # [..., W, b, W, b, W, b] which, eg w_list[-4] and w_list[-3] correspond to # params that are associated with fc7 i.e. params that connect drop6 to fc7 ### Extracting features on fc7 feats_train = compute_feature(Xaug_train_val, Yaug_train_val) if mode=="ots": # OvR linear SVM classifier start_time = time.time() clf_path = '../snapshot_models/{0}{1}{2}.pkl'.format(num_classes,mode,seed) if osp.exists(clf_path): save_clf = 0 with open(clf_path, 'rb') as fid: clf = pickle.load(fid) print 'Loaded', clf_path else: clf = OneVsRestClassifier(LinearSVC()) clf.fit(feats_train, Yaug_train_val) if save_clf: with open(clf_path, 'wb') as fid: # save classifier pickle.dump(clf, fid) # Prediction on test set start_time = time.time() # Feature extraction on test set feats_test = compute_feature(X_test, Y_test) y_pred = clf.predict(feats_test) print('Prediction on test set: {:.1f}s').format(time.time()-start_time) elif mode=="tune": # Module 2 of CNN-AT, only train the label scorer print "MODULE 2" clf = OneVsRestClassifier(LogisticRegression(C=2000)) # C=1/5e-4 clf.fit(feats_train, Yaug_train_val) score_train = clf.predict_proba(feats_train) # LABEL THRESHOLDER if not load_t: start_time = time.time() thresholds = Threshold(score_train, Yaug_train_val) thresholds.find_t_for() # determine t_train for each score_train. It will take a while t_train = np.asarray(thresholds.t) print 't_train is in ', t_train.min(), '..', t_train.max() # `thresholds` holds t_train vector in .t attribute print('t_train produced in {:.3f}s').format(time.time()-start_time) np.save(osp.join('t', "{0}tune{1}.npy".format(num_classes,seed)), t_train) else: print 'Loading t_train in {0}tune{1}.npy'.format(num_classes,seed) t_train = np.load(osp.join('t', "{0}tune{1}.npy".format(num_classes,seed))) # ## Ridge regression for predicting t regr = RidgeCV(cv=5) # Ridge() is LinearClassifier() with L2-reg regr.fit(score_train, t_train) # TESTING PHASE start_time = time.time() feats_test = compute_feature(X_test, Y_test) score_test = clf.predict_proba(feats_test) t_test = regr.predict(score_test) print 'original t_test is in ', min(t_test), '..', max(t_test) epsilon = 1e-6 t_test[t_test>1] = max(t_test[t_test<1]) - epsilon t_test[t_test<0] = 0 # ! Keep t_test in [0,1] print 'corrected t_test is in ', min(t_test), '..', max(t_test) y_pred = score_test > t_test.reshape((len(t_test),1)) # Evaluate k=5 if k: # Evaluate@k idx_k = np.where(y_pred.sum(1)==k) # Extract examples annotated by exactly k labels Y_test = Y_test[idx_k] y_pred = y_pred[idx_k] print "Nr. of test images: %d" %len(idx_k[0]) metrics = produce_metrics(Y_test, y_pred, seed, num_classes, verbose=verbose) all_metrics.append(metrics) print '\nFINAL ESTIMATES FOR {0} IN {1} RUNS'.format(mode, len(all_metrics)) estimate_metrics(all_metrics) np.save(osp.join('metrics',"{0}{1}_allmetrics.npy".format(num_classes,mode)), all_metrics)
LAYERS = 3 NUM_EPOCHS = 100 # Data loading with open('../data/npm_chars_shuf', 'rb') as f: minified_data = pickle.load(f) splitPoint = int(np.ceil(len(minified_data) * 0.90)) train_data = ''.join(minified_data[:splitPoint]) test_data = ''.join(minified_data[splitPoint:]) char_to_idx = {ch: i for (i, ch) in enumerate(sorted(list(set(train_data + test_data))))} idx_to_char = {i: ch for (ch, i) in char_to_idx.items()} vocab_size = len(char_to_idx) print 'Working on %d characters (%d unique).' % (len(train_data + test_data), vocab_size) training_model = build_model(False, LSTM_SIZE, BATCH_SIZE, SEQ_LEN, LAYERS, vocab_size) test_model = build_model(True, LSTM_SIZE, BATCH_SIZE, SEQ_LEN, LAYERS, vocab_size) print training_model.summary() starting_epoch = 0 avg_train_loss = 0 avg_train_acc = 0 avg_test_loss = 0 avg_test_acc = 0 prev_loss = 100 if path_to_model: training_model.load_weights(path_to_model) # TODO: Fix double digit epoch numbers starting_epoch = int(path_to_model[-4]) # Conventionally take the number before the extension as an epoch to start