def main(cfg): # TODO: implement device selection # if config.device == "auto": # device = # elif config.device == "cpu": # net = net.cpu() # elif config.device == "gpu": # net = net.cuda() # else:q # raise ValueError if cfg.mode.startswith("train"): run_train(cfg) elif cfg.mode.startswith("test"): # take the best model on validation set cfg.pretrained_model = r'global_min_acer_model.pth' run_test(cfg, dir='global_test_36_TTA') elif cfg.mode == 'realtime': cfg.pretrained_model = r'global_min_acer_model.pth' run_realtime(cfg) return
def train_prefv_naggn(s=2): '''load fvextractor from pretrained resnet_si''' train_dataset = dataset.DrosophilaDataset(mode='train', stage=s) val_dataset = dataset.DrosophilaDataset(mode='val', stage=s) test_dataset = dataset.DrosophilaDataset(mode='test', stage=s) cfg = util.default_cfg() cfg['train'] = train_dataset cfg['val'] = val_dataset cfg['test'] = test_dataset cfg['batch'] = 32 cfg['lr'] = 0.0001 cfg['model'] = 'prefv_naggn_l2' cfg['model_dir'] = 'modeldir/stage%d/prefv_naggn_l2' % s cfg['collate'] = dataset.fly_collate_fn cfg['instance'] = train._train_mi model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel(naggn.NAggN(agg='l2').cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) else: fv_model_dir = 'modeldir/stage%d/resnet_si' % s fv_model_pth = os.path.join(fv_model_dir, 'model.pth') ckp = torch.load(fv_model_pth) model.state_dict().update(ckp['model']) print("load fvextractor from pretrained resnet_si") # for p in model.module.fvextractor.parameters(): # p.require_grad = False train.run_train(model, cfg)
def train_transformer(s=2): train_dataset = dataset.DrosophilaDataset(mode='train', stage=s) val_dataset = dataset.DrosophilaDataset(mode='val', stage=s) test_dataset = dataset.DrosophilaDataset(mode='test', stage=s) cfg = util.default_cfg() cfg['train'] = train_dataset cfg['val'] = val_dataset cfg['test'] = test_dataset cfg['batch'] = 32 cfg['lr'] = 0.00001 cfg['model'] = 'transformer' cfg['model_dir'] = 'modeldir/stage%d/transformer' % s cfg['collate'] = dataset.fly_collate_fn cfg['instance'] = train._train_mi model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel(transformer.E2ETransformer().cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_sequence_si(): ks = [10, 20, 30] fecs = [[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]] logdir = 'modeldir/stage_all/seq_si/' from loss import FECLoss for i, k in enumerate(ks): for fec in fecs[i]: cfg = _allrun_config_si(k) cfg['lr'] = 0.00001 cfg['batch'] = 64 cfg['epochs'] = 200 cfg['scheduler'] = True cfg['patience'] = 30 cfg['step'] = 0 if fec % 2 == 1: continue if fec == 0: cfg['criterion'] = torch.nn.BCELoss() cfg['model'] = 'resnet18b4_si_k%d' % (k) else: cfg['criterion'] = FECLoss(alpha=cfg['batch'] * fec) cfg['model'] = 'resnet18b4_si_k%d_fec%d' % (k, fec) cfg['model_dir'] = '%s/%s' % (logdir, cfg['model']) model = nn.DataParallel(sinet.SiNet(nblock=4, k=k).cuda()) train.run_train(model, cfg)
def train_sequence_pre_dragn(): # ks = [10, 20, 30] ks = [10] # levels = [1, 2, 3] levels = [3] withbns = [True, False] for k in ks: for l in levels: for withbn in withbns: cfg = _allrun_config_agg(k) if withbn: cfg['model'] = 'pre_dragn_k%d_%dl-BN' % (k, l) else: cfg['model'] = 'pre_dragn_k%d_%dl' % (k, l) cfg['model_dir'] = 'modeldir/agg_stage_all/%s' % cfg['model'] cfg['lr'] = 0.0001 cfg['batch'] = 32 cfg['patience'] = 10 cfg['epochs'] = 80 model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel( dragn.PreDRAGN(k, agglevel=l, withbn=withbn).cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained:", model_pth, "start:", cfg['step']) train.run_train(model, cfg)
def run_MATCH_with_PeTaL_data(cnf, verbose=False, do_split=True, do_augment=True, do_transform=True, do_preprocess=True, do_train=True, do_eval=True, infer_mode=False, remake_vocab_file=False): """Runs MATCH on PeTaL data. Args: cnf (Dict): Python dictionary whose structure adheres to our config.yaml file. verbose (bool): Verbose output. do_split (bool): Whether to perform train-dev-test split. do_augment (bool): Whether to perform data augmentation on the training set. do_transform (bool): Whether to transform json to txt. do_preprocess (bool): Whether to preprocess txt into npy. do_train (bool): Whether to do training. do_eval (bool): Whether to do inference/evaluation. infer_mode (bool): Whether to run in inference mode. remake_vocab_file (bool): Whether to force vocab.npy and emb_init.npy to be recomputed. """ logging.basicConfig(level=logging.DEBUG, format="[%(asctime)s:%(name)s] %(message)s") logger = logging.getLogger("main") if verbose: if infer_mode: logger.info( "Begin run_MATCH_with_PeTaL_data pipeline in inference mode.") else: logger.info("Begin run_MATCH_with_PeTaL_data pipeline.") ''' Preprocessing: Perform train-dev-test split, transform json to txt, and preprocess txt into npy in preprocess.py. ''' preprocess(cnf, verbose, do_split, do_augment, do_transform, do_preprocess, infer_mode, remake_vocab_file) ''' Training: Run training in train.py. ''' if do_train: run_train(cnf, infer_mode, verbose) ''' Evaluation: Run testing/inference/evaluation in eval.py. ''' if do_eval: run_eval(cnf, infer_mode, verbose) if verbose: logger.info("End run_MATCH_with_PeTaL_data pipeline.")
def train_resnet_pj(s=2, k=10): cfg = util.default_cfg() cfg = train._config_pj_dataset(cfg, s, k) cfg['model'] = 'resnet_pj_k%d' % (k) cfg['model_dir'] = 'modeldir/stage%d/resnet_pj_k%d' % (s, k) model = nn.DataParallel(sinet.SiNet(nblock=4, k=k).cuda()) cfg = train._train_config_pj(model, cfg) train.run_train(model, cfg)
def train_resnet_si(s=2, k=10, val_index=4): cfg = util.default_cfg() cfg = train._config_si_dataset(cfg, s, k) cfg['model'] = 'resnet_si_k%d_val%d' % (k, val_index) cfg['model_dir'] = 'modeldir/stage%d/resnet_si_k%d_val%d' % (s, k, val_index) model = nn.DataParallel(sinet.SiNet(nblock=4, k=k).cuda()) cfg = train._train_config_si(model, cfg) train.run_train(model, cfg)
def main(): """Build hparams, the graph, and train it.""" hparams = initialize() if hparams.run_inference_test: hparams.batch_size = 2 X_mixtures, phases, inference_summaries = graph.build_inference_graph(hparams) inference.run_inference(hparams, X_mixtures, phases, inference_summaries) else: inputs, embedding_info, loss, loss_summary, summaries = ( graph.build_train_graph(hparams)) train.run_train(hparams, inputs, embedding_info, loss, loss_summary, summaries)
def train_smallnet_stratify_si(s=2, k=10): cfg = util.default_cfg() cfg = train._config_stratify_si_dataset(cfg, s, k) import loss cfg['criterion'] = loss.FECLoss(alpha=64) model = nn.DataParallel(sinet.SmallNet(k=k).cuda()) cfg['model'] = 'smallnet_si_k%d_fec1' % (k) cfg['model_dir'] = 'modeldir/stage%d/smallnet_si_k%d_fec1' % (s, k) cfg = train._train_config_si(model, cfg) cfg['scheduler'] = True cfg['lr'] = 0.0001 train.run_train(model, cfg)
def train_smallnet_stratify_pj_fecq(s=2, k=10): cfg = util.default_cfg() cfg = train._config_stratify_pj_dataset(cfg, s, k) model = nn.DataParallel(sinet.SmallNet(k=k).cuda()) from loss import FECLoss cfg['criterion'] = FECLoss(alpha=8) cfg['model'] = 'smallnet_pj_k%d_fec0.25' % (k) cfg['model_dir'] = 'modeldir/stage%d/smallnet_pj_k%d_fec0.25' % (s, k) cfg = train._train_config_pj(model, cfg) cfg['scheduler'] = False cfg['lr'] = 0.0001 cfg['epochs'] = 1000 train.run_train(model, cfg)
def train_resnet_stratify_si(s=2, k=10): cfg = util.default_cfg() cfg = train._config_stratify_si_dataset(cfg, s, k) # from loss import FECLoss # cfg['criterion'] = FECLoss(alpha=48) # from loss import SFocalLoss # cfg['criterion'] = SFocalLoss(gamma=1) model = nn.DataParallel(sinet.SiNet(nblock=2, k=k).cuda()) cfg['model'] = 'resnet18b2_si_k%d' % (k) cfg['model_dir'] = 'modeldir/stage%d/resnet18b2_si_k%d' % (s, k) cfg = train._train_config_si(model, cfg) cfg['scheduler'] = False train.run_train(model, cfg)
def main(): hparams = hyperparameters.hparams print("************************************") print("*********** Begin Train ************") print("************************************") print("Model: %s" % hparams.model_name) print("Optimizer: %s" % hparams.optimizer_name) print("Data directory: %s" % hparams.data_dir) print("Log directory: %s" % hparams.log_dir) inputs, loss, train_op = model.build_graph(hparams) train.run_train(hparams, inputs, loss, train_op)
def train_naggn_agg(k=10): cfg = _allrun_config_agg(k) cfg['model'] = 'naggn-l1' cfg['model_dir'] = 'modeldir/agg_stage_all/naggn-l1_k%d' % k cfg['lr'] = 0.0001 cfg['nworker'] = 8 model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel(naggn.NAggN(agg='l1').cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_tinynet_stratify_si(s=2, k=10): cfg = util.default_cfg() cfg = train._config_stratify_si_dataset(cfg, s, k) from loss import FECLoss cfg['criterion'] = FECLoss(alpha=32) cfg['model'] = 'tinynet_si_k%d_fec0.5' % (k) cfg['model_dir'] = 'modeldir/stage%d/tinynet_si_k%d_fec0.5' % (s, k) cfg['collate'] = default_collate cfg['instance'] = train._train_si model = nn.DataParallel(sinet.TinyNet(k=k).cuda()) cfg = train._train_config_si(model, cfg) train.run_train(model, cfg)
def train_transformer_agg(k=10): cfg = _allrun_config_agg(k) cfg['model'] = 'transformer' cfg['model_dir'] = 'modeldir/agg_stage_all/transformer_k%d' % k cfg['lr'] = 0.0001 cfg['nworker'] = 8 cfg['batch'] = 16 model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel(transformer.E2ETransformer().cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_senet_pj(k=10): cfg = _allrun_config_pj(k) model = nn.DataParallel(sinet.FlySENet(k=k).cuda()) cfg['model'] = 'senet_pj_k%d' % (k) cfg['model_dir'] = 'modeldir/stage_all/senet_pj_k%d' % (k) cfg['lr'] = 0.0001 cfg['scheduler'] = True model_pth = os.path.join(cfg['model_dir'], 'model.pth') if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_smallnet_si(k=10): cfg = _allrun_config_si(k) # from loss import FECLoss # cfg['criterion'] = FECLoss(alpha=48) model = nn.DataParallel(sinet.SmallNet(k=k).cuda()) cfg['model'] = 'smallnet_si_k%d' % (k) cfg['model_dir'] = 'modeldir/stage_all/smallnet_si_k%d' % (k) model_pth = os.path.join(cfg['model_dir'], 'model.pth') if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_pre_dragn_agg(k=10): cfg = _allrun_config_agg(k) cfg['model'] = 'pre_dragn' cfg['model_dir'] = 'modeldir/agg_stage_all/pre_dragn_k%d-1layer' % k cfg['lr'] = 0.0001 cfg['batch'] = 32 cfg['patience'] = 10 cfg['epochs'] = 120 model_pth = os.path.join(cfg['model_dir'], 'model.pth') model = nn.DataParallel(dragn.PreDRAGN(k, agglevel=1).cuda()) if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def train_resnet_si(k=10): cfg = _allrun_config_si(k) from loss import FECLoss cfg['criterion'] = FECLoss(alpha=64) model = nn.DataParallel(sinet.SiNet(nblock=4, k=k).cuda()) cfg['model'] = 'resnet18b4_si_k%d_fec1' % (k) cfg['model_dir'] = 'modeldir/stage_all/resnet18b4_si_k%d_fec1' % (k) cfg['lr'] = 0.0001 model_pth = os.path.join(cfg['model_dir'], 'model.pth') if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def training(load_path, save_path, save): X_train = joblib.load(load_path + 'X_train.pkl') X_test = joblib.load(load_path + 'X_test.pkl') y_train = joblib.load(load_path + 'y_train.pkl') y_test = joblib.load(load_path + 'y_test.pkl') model, S_test, y_test = run_train((X_train, X_test, y_train, y_test)) if save: joblib.dump(model, save_path + 'model.pkl') joblib.dump(S_test, save_path + 'S_test.pkl') joblib.dump(y_test, save_path + 'y_test.pkl')
def train_resnet50_pj(k=10): cfg = _allrun_config_pj(k) # from loss import FECLoss # cfg['criterion'] = FECLoss(alpha=48) model = nn.DataParallel(sinet.Resnet50(nblock=4, k=k).cuda()) cfg['model'] = 'resnet50b4_pj_k%d' % (k) cfg['model_dir'] = 'modeldir/stage_all/resnet50b4_pj_k%d' % (k) cfg['lr'] = 0.0001 cfg['scheduler'] = True model_pth = os.path.join(cfg['model_dir'], 'model.pth') if os.path.exists(model_pth): ckp = torch.load(model_pth) model.load_state_dict(ckp['model']) cfg['step'] = ckp['epoch'] + 1 print("load pretrained model", model_pth, "start epoch:", cfg['step']) train.run_train(model, cfg)
def main(): run_type = sys.argv[1] print('*' * 120) print('*' * 120) print('*' * 120) print(run_type) if run_type == 'train': import train # os.system('python3 train.py') train.run_train() elif run_type == 'test': import infer infer.infer(sys.argv[2]) infer.get_activations(sys.argv[2]) else: print( "To run this script please enter either: 'train' or 'test <x>.png'" )
import argparse from azureml.core import Workspace, Dataset, Experiment, Run from azureml.core.model import Model # local imports from train import run_train import config as cfg if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--output_path', type=str, required=True) parser.add_argument('--input_path', type=str, required=True) args = parser.parse_args() run = Run.get_context() best_model_path, best_rmse, best_alpha = run_train(args.input_path, args.output_path) run.log('rmse', best_rmse) run.log('alpha', best_alpha) run.upload_file(name='diabetes_model.pkl', path_or_stream=best_model_path) # TODO register model if better than previous models? if not 'offline' in run.id.lower(): run.register_model(cfg.model_name, best_model_path) run.complete()
from train import run_train from eval import run_test from calibration import run_calibration, plot_calibration from params import get_parser from gradcam import run_gradcam if __name__ == "__main__": start_time = time.time() parser = get_parser() args_dict, unknown = parser.parse_known_args() assert args_dict.model in [ 'covidnet', 'resnet' ], 'incorrect model selection! --model best be covidnet or resnet' if args_dict.mode == 'train': run_train(args_dict) elif args_dict.mode == 'test': run_test(args_dict) elif args_dict.mode == 'gradcam': run_gradcam(args_dict) elif args_dict.mode == 'calibration': run_calibration(args_dict) elapsed_time = time.time() - start_time time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
def main() -> None: """ main entry point for program """ strategy = initialize() dataset_size: int = 30000 input_tensor_train, target_tensor_train, input_language, target_language, max_length_target, max_length_input, input_vals, target_vals = read_data( dataset_size) BUFFER_SIZE = len(input_tensor_train) BATCH_SIZE = 64 * strategy.num_replicas_in_sync EPOCHS = 15 steps_per_epoch = len(input_tensor_train) // BATCH_SIZE embedding_dim = 256 units = 1024 vocab_input_size = len(input_language.word_index) + 1 vocab_target_size = len(target_language.word_index) + 1 model_name: str = 'model_1' checkpoint_dir = file_path_relative(f'{model_folder}/{model_name}') with strategy.scope(): optimizer = tf.keras.optimizers.Adam() encoder = Encoder(vocab_input_size, embedding_dim, units, BATCH_SIZE) decoder = Decoder(vocab_target_size, embedding_dim, units, BATCH_SIZE) checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) run_train(input_tensor_train, target_tensor_train, target_language, checkpoint, checkpoint_dir, encoder, optimizer, decoder, steps_per_epoch, BUFFER_SIZE, BATCH_SIZE, EPOCHS, model_name) # restoring the latest checkpoint in checkpoint_dir checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) # run tests and get score run_tests(max_length_target, max_length_input, input_language, target_language, units, encoder, decoder, input_vals, target_vals, model_name) # second model embedding_dim = 512 units = 2048 model_name: str = 'model_2' checkpoint_dir = file_path_relative(f'{model_folder}/{model_name}') with strategy.scope(): optimizer_2 = tf.keras.optimizers.Adam() encoder_2 = Encoder(vocab_input_size, embedding_dim, units, BATCH_SIZE, gru=True) decoder_2 = Decoder(vocab_target_size, embedding_dim, units, BATCH_SIZE, gru=True) checkpoint_2 = tf.train.Checkpoint(optimizer=optimizer_2, encoder=encoder_2, decoder=decoder_2) run_train(input_tensor_train, target_tensor_train, target_language, checkpoint_2, checkpoint_dir, encoder_2, optimizer_2, decoder_2, steps_per_epoch, BUFFER_SIZE, BATCH_SIZE, EPOCHS, model_name) # restoring the latest checkpoint in checkpoint_dir checkpoint_2.restore(tf.train.latest_checkpoint(checkpoint_dir)) # run tests and get score run_tests(max_length_target, max_length_input, input_language, target_language, units, encoder_2, decoder_2, input_vals, target_vals, model_name)
def main(): main_start = time.time() tracemalloc.start() parser = argparse.ArgumentParser() # data specific parameters parser.add_argument("-trp", "--train_path", type=str, required=True, help="path to train file", default="") parser.add_argument("-tp", "--test_path", type=str, help="path to test file", default="") parser.add_argument("-lp", "--label_prefix", type=str, help="label prefix", default="__label__") parser.add_argument("-df", "--data_fraction", type=float, default=1, help="data fraction") parser.add_argument("-seed", "--seed", type=int, default=17) # hyper-parameters parser.add_argument("-dim", "--embedding_dim", type=int, default=100, help="length of embedding vector") parser.add_argument("-nep", "--num_epochs", type=int, default=5, help="number of epochs") parser.add_argument("-wng", "--word_ngrams", type=int, default=1, help="word ngrams") parser.add_argument("-sng", "--sort_ngrams", type=int, default=0, help="sort n-grams alphabetically") parser.add_argument("-bs", "--batch_size", type=int, default=4096, help="batch size for train") parser.add_argument("-bn", "--use_batch_norm", type=int, default=0, help="use batch norm") parser.add_argument( "-mwc", "--min_word_count", type=int, default=1, help="discard words which appear less than this number") parser.add_argument("-lr", "--learning_rate", type=float, default=0.3, help="learning rate") parser.add_argument("-lrm", "--learning_rate_multiplier", type=float, default=0.8, help="learning rate multiplier") parser.add_argument("-dr", "--dropout", type=float, default=0.5, help="train dropout keep rate") parser.add_argument("-l2", "--l2_reg_weight", type=float, default=1e-6, help="regularization weight") # parameters parser.add_argument("-bsi", "--batch_size_inference", type=int, default=4096, help="batch size for test") parser.add_argument("-k", "--top_k", type=int, default=3, help="report results for top k predictions") parser.add_argument( "-ck", "--compare_top_k", type=int, default=0, help="compare top k accuracies for determining the best model") parser.add_argument("-sm", "--save_all_models", type=int, default=0, help="save model after each epoch") parser.add_argument("-ut", "--use_test", type=int, default=1, help="evaluate on test data") parser.add_argument("-gpu", "--use_gpu", type=int, default=0, help="use gpu for training") parser.add_argument("-gpu_fr", "--gpu_fraction", type=float, default=0.5, help="what fraction of gpu to allocate") parser.add_argument("-utb", "--use_tensorboard", type=int, default=0, help="use tensorboard") parser.add_argument("-cd", "--cache_dir", type=str, help="cache directory", default="./cache/") parser.add_argument("-ld", "--log_dir", type=str, help="log directory", default="./results/") parser.add_argument("-f", "--force", type=int, default=0, help="force retraining") parser.add_argument("-pb", "--progress_bar", type=int, default=1, help="show progress bar") parser.add_argument("-fl", "--flush", type=int, default=0, help="flush after print") args = parser.parse_args() for bool_param in [ args.use_batch_norm, args.save_all_models, args.use_test, args.sort_ngrams, args.use_gpu, args.use_tensorboard, args.force, args.flush, args.compare_top_k, args.progress_bar ]: if bool_param not in [0, 1]: raise ValueError("{} should be 0 or 1.".format(bool_param)) train_path = os.path.abspath(args.train_path) sort_ngrams = bool(args.sort_ngrams) progress_bar = bool(args.progress_bar) flush = bool(args.flush) use_test = False if args.test_path: args.test_path = os.path.abspath(args.test_path) if bool(args.use_test): use_test = True print("\n\nTraining with arguments:\n{}\n".format(args)) cache_dir = validate(args.cache_dir) log_dir = validate(args.log_dir) train_history_path = os.path.join(log_dir, "history.json") np.random.seed(args.seed) train_descriptions, train_labels, max_words = \ parse_txt(train_path, as_tokens=True, return_max_len=True, fraction=args.data_fraction, seed=args.seed, label_prefix=args.label_prefix) data_specific = { "seed": args.seed, "data_fraction": args.data_fraction, "min_word_count": args.min_word_count, "word_ngrams": args.word_ngrams, "sort_ngrams": sort_ngrams, } data_hash = get_cache_hash(list_of_texts=train_descriptions, data_specific_params=data_specific) cache_dir = os.path.abspath(validate(os.path.join(cache_dir, data_hash))) train_specific = { "embedding_dim": args.embedding_dim, "num_epochs": args.num_epochs, "batch_size": args.batch_size, "learning_rate": args.learning_rate, "learning_rate_multiplier": args.learning_rate_multiplier, "use_batch_norm": bool(args.use_batch_norm), "l2_reg_weight": args.l2_reg_weight, "dropout": args.dropout, "cache_dir": cache_dir } for k, v in data_specific.items(): train_specific[k] = v model_params = { "word_ngrams": args.word_ngrams, "sort_ngrams": sort_ngrams, "word_dict_path": os.path.abspath(os.path.join(cache_dir, "word_dict.json")), "label_dict_path": os.path.abspath(os.path.join(cache_dir, "label_dict.json")) } hyperparams_hashed = hash_("".join( [str(i) for i in train_specific.values()])) current_log_dir = validate(os.path.join(log_dir, hyperparams_hashed)) data_specific["train_path"], train_specific[ "train_path"] = train_path, train_path train_params = { "use_gpu": bool(args.use_gpu), "gpu_fraction": args.gpu_fraction, "use_tensorboard": bool(args.use_tensorboard), "top_k": args.top_k, "save_all_models": bool(args.save_all_models), "compare_top_k": bool(args.compare_top_k), "use_test": use_test, "log_dir": current_log_dir, "batch_size_inference": args.batch_size_inference, "progress_bar": progress_bar, "flush": flush, } if os.path.exists(train_history_path): with open(train_history_path) as infile: train_history = json.load(infile) if hyperparams_hashed in train_history and check_model_presence( current_log_dir): if not bool(args.force): if args.test_path: get_accuracy(current_log_dir, train_params, train_history_path, hyperparams_hashed, train_history, args.test_path, args.label_prefix) else: get_accuracy(current_log_dir, train_params, train_history_path, hyperparams_hashed, train_history, train_path, args.label_prefix) print("The model is stored at {}".format(current_log_dir)) exit() else: print("Forced retraining") print("Training hyper-parameters hashed: {}".format( hyperparams_hashed)) else: print("Training hyper-parameters hashed: {}".format( hyperparams_hashed)) else: train_history = dict() clean_directory(current_log_dir) max_words_with_ng = get_max_words_with_ngrams(max_words, args.word_ngrams) print("Preparing dataset") print("Total number of datapoints: {}".format(len(train_descriptions))) print("Max number of words in description: {}".format(max_words)) print("Max number of words with n-grams in description: {}".format( max_words_with_ng)) word_vocab, label_vocab = get_word_label_vocabs(train_descriptions, train_labels, args.word_ngrams, args.min_word_count, sort_ngrams, cache_dir, bool(args.force), show_progress=progress_bar, flush=flush) with open(os.path.join(current_log_dir, "model_params.json"), "w+") as outfile: json.dump(model_params, outfile) num_words_in_train = len(word_vocab) train_description_hashes, train_labels, cache = \ cache_data(train_descriptions, train_labels, word_vocab, label_vocab, args.word_ngrams, sort_ngrams, show_progress=progress_bar, progress_desc="Cache train descriptions", flush=flush) del train_descriptions test_description_hashes, test_labels = [], [] initial_test_len = 0 if use_test: test_descriptions, test_labels, max_words_test = parse_txt( args.test_path, as_tokens=True, return_max_len=True, label_prefix=args.label_prefix) initial_test_len = len(test_descriptions) print("Total number of test datapoints: {}".format( len(test_descriptions))) test_description_hashes, test_labels, cache = \ cache_data(test_descriptions, test_labels, word_vocab, label_vocab, args.word_ngrams, sort_ngrams, cache=cache, is_test_data=True, show_progress=progress_bar, progress_desc="Cache test descriptions", flush=flush) del test_descriptions data = { "train_description_hashes": train_description_hashes, "train_labels": train_labels, "test_description_hashes": test_description_hashes, "test_labels": test_labels, "cache": cache, "label_vocab": label_vocab, "num_words_in_train": num_words_in_train, "test_path": args.test_path, "initial_test_len": initial_test_len, } run_train(data, train_specific, train_params, data_specific, train_history, train_history_path) print("All process took {} seconds".format( round(time.time() - main_start, 0)), flush=flush)
def train(dataset, outdir): initilize(outdir, remove=False) run_train(dataset, outdir)
import argparse import sys sys.path.append(".") import train if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--z_dim', type=int, default=20, help='-') parser.add_argument('--learning_rate', type=float, default=0.0001, help='-') parser.add_argument('--beta_1', type=float, default=0.9, help='-') parser.add_argument('--beta_2', type=float, default=0.999, help='-') parser.add_argument('--epsilon', type=float, default=1e-08, help='-') parser.add_argument('--training_epoch', type=int, default=100, help='-') parser.add_argument('--batch_size', type=int, default=64, help='-') args, unknown = parser.parse_known_args() train.run_train(args)
def run(options=None, load_model=None, mode_interactive=True): if options is None: options = {} if not mode_interactive: # Setup save paths and redirect stdout to file ctime = time.strftime("%c") run_path, run_name = get_run_path(options, ctime) # Create folder os.makedirs(run_path, exist_ok=True) if load_model is not None: #Load saved models options options = load_checkpoint_options_dict( os.path.join(os.path.dirname(load_model), "options.txt"), options, default_options) else: # Use option and fill in with default values options = create_full_options_dict( options, default_options=default_options) # Fill with default values if not mode_interactive: if load_model is None: save_sourcecode(options, run_path, options['save_code']) logger.init(False, True, { "project": "MSVAE", "run_name": run_name, 'options': options }, {'logdir': run_path}) # Set stdout to print to file and console logger.set_redirects(run_path) if options["dataset"] == "mnist": if options["model_options"]["dimensions"] != 2: raise Exception("Dataset " + options["dataset"] + " requires 2 dimensions!") elif options["dataset"] == "pianoroll": if options["model_options"]["dimensions"] != 1: raise Exception("Dataset " + options["dataset"] + " requires 1 dimensions!") # Load datasets loaders, statistics = \ loader.load_dataset(dataset_basepath=options["dataset_basepath"], dataset=options["dataset"], dataset_options=options["dataset_options"], train_batch_size=options["train_options"]["batch_size"], val_batch_size=options["test_options"]["batch_size"]) # Define Model model = Model(n_channels=loaders["train"].nc, statistics=statistics, **options["model_options"]) # Setup Modelstate modelstate = ModelState(seed=options["seed"], model=model, optimizer=options["optimizer"], optimizer_options=options["optimizer_options"], init_lr=options["train_options"]["init_lr"], init_freebits=options["train_options"] ["freebits_options"]["init_threshold"], init_annealing=options["train_options"] ["annealing_options"]["init_annealing"]) if options["cuda"]: modelstate.model.cuda() if load_model is not None: # Restore model current_epoch = modelstate.load_model(load_model) else: current_epoch = 0 if not mode_interactive: print("Training starting at: " + ctime) # Run model train.run_train(start_epoch=current_epoch, cuda=options["cuda"], ll_normalization=options["ll_normalization"], ind_latentstate=options["ind_latentstate"], modelstate=modelstate, logdir=run_path, loaders=loaders, train_options=options["train_options"], test_options=options["test_options"]) else: return modelstate, loaders, options