def main(): print('add projection-layer, use dropout without bn') parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=32, type=int) parser.add_argument('--char-dim', default=20, type=int) parser.add_argument('--char-hidden-size', default=50, type=int) parser.add_argument('--data-type', default='SNLI', help='available: SNLI or Quora') parser.add_argument('--dropout', default=0.5, type=float) parser.add_argument('--epochs', default=15, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=300, type=int) parser.add_argument('--learning-rate', default=0.0004, type=float) parser.add_argument( '--max-sent-len', default=-1, type=int, help= 'max length of input sentences model can accept, if -1, it accepts any length' ) # parser.add_argument('--num-perspective', default=20, type=int) parser.add_argument('--print-freq', default=1000, type=int) parser.add_argument('--use-char-emb', default=False, action='store_true') parser.add_argument('--word-dim', default=300, type=int) parser.add_argument('--patience', default=3, type=int) parser.add_argument('--train_embed', action='store_false', dest='fix_emb') args = parser.parse_args() args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu') print(args.use_char_emb) if args.data_type == 'SNLI': print('loading SNLI data...') data = SNLI(args) elif args.data_type == 'Quora': print('loading Quora data...') data = Quora(args) else: raise NotImplementedError('only SNLI or Quora data is possible') # setattr(args, 'char_vocab_size', len(data.char_vocab)) setattr(args, 'word_vocab_size', len(data.TEXT.vocab)) setattr(args, 'class_size', len(data.LABEL.vocab)) setattr(args, 'max_word_len', data.max_word_len) setattr(args, 'model_time', strftime('%H:%M:%S', localtime())) print('training start!') best_model, max_dev_acc = train(args, data) if not os.path.exists('saved_models'): os.makedirs('saved_models') torch.save(best_model.state_dict(), f'saved_models/ESIM_{args.data_type}_{max_dev_acc:.3f}.pt') print('training finished!')
def main(): import sys sys.argv = ['foo'] parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=32, type=int) parser.add_argument('--char-dim', default=20, type=int) parser.add_argument('--char-hidden-size', default=50, type=int) parser.add_argument('--data-type', default='Quora', help='available: SNLI or Quora') parser.add_argument('--dropout', default=0.1, type=float) parser.add_argument('--epoch', default=15, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=300, type=int) parser.add_argument('--learning-rate', default=0.001, type=float) parser.add_argument( '--max-sent-len', default=100, type=int, help= 'max length of input sentences model can accept, if -1, it accepts any length' ) parser.add_argument('--num-perspective', default=20, type=int) parser.add_argument('--print-freq', default=1, type=int) parser.add_argument('--use-char-emb', default=False, action='store_true') parser.add_argument('--word-dim', default=300, type=int) parser.add_argument('--training', default=0, type=int) args = parser.parse_args() print(args.training) if args.data_type == 'SNLI': print('loading SNLI data...') data = SNLI(args) elif args.data_type == 'Quora': print('loading Quora data...') data = Quora(args) else: raise NotImplementedError('only SNLI or Quora data is possible') setattr(args, 'char_vocab_size', len(data.char_vocab)) setattr(args, 'word_vocab_size', len(data.TEXT.vocab)) setattr(args, 'class_size', len(data.LABEL.vocab)) setattr(args, 'max_word_len', data.max_word_len) setattr(args, 'model_time', strftime('%H:%M:%S', gmtime())) print('training start!') best_model = train(args, data) if not os.path.exists('saved_models'): os.makedirs('saved_models') torch.save( best_model.state_dict(), 'saved_models/BIBPM_' + args.data_type + '_' + args.model_time + 'train' + args.training + '.pt') print('training finished!')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=64, type=int) parser.add_argument('--char-dim', default=20, type=int) parser.add_argument('--char-hidden-size', default=50, type=int) parser.add_argument('--data-type', default='SNLI', help='available: SNLI or Quora') parser.add_argument('--dropout', default=0.1, type=float) parser.add_argument('--epoch', default=10, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=100, type=int) parser.add_argument('--learning-rate', default=0.001, type=float) parser.add_argument( '--max-sent-len', default=200, type=int, help= 'max length of input sentences model can accept, if -1, it accepts any length' ) parser.add_argument('--num-perspective', default=20, type=int) parser.add_argument('--print-freq', default=500, type=int) parser.add_argument('--use-char-emb', default=False, action='store_true') parser.add_argument('--word-dim', default=300, type=int) parser.add_argument('--loss-curve', default='default_loss', type=str) parser.add_argument('--title', default='default', type=str) parser.add_argument('--acc-curve', default='default_acc', type=str) parser.add_argument('--auc-curve', default='default_auc', type=str) parser.add_argument('--line-suffix', default='tmp', type=str) parser.add_argument('--log-file', default='output.log', type=str, help='log file path') args = parser.parse_args() #-------------------------------------------------------------------------------- # Set logging logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: [ %(message)s ]', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) logger.addHandler(console) if args.log_file: logfile = logging.FileHandler(args.log_file, 'w') logfile.setFormatter(fmt) logger.addHandler(logfile) logger.info('COMMAND: %s' % ' '.join(sys.argv)) #-------------------------------------------------------------------------------- # load dataset if args.data_type == 'SNLI': print('loading SNLI data...') data = SNLI(args) elif args.data_type == 'Quora': print('loading Quora data...') data = Quora(args) elif args.data_type == 'Searchqa': print('loading Searchqa data...') data = Searchqa(args) elif args.data_type == 'Quasart': print('loading Quasart data...') data = Quasart(args) else: raise NotImplementedError('only SNLI or Quora data is possible') setattr(args, 'char_vocab_size', len(data.char_vocab)) setattr(args, 'word_vocab_size', len(data.TEXT.vocab)) setattr(args, 'class_size', len(data.LABEL.vocab)) setattr(args, 'max_word_len', data.max_word_len) setattr(args, 'model_time', strftime('%Y%m%d-%H-%M-%S', gmtime())) logger.info('training start!') train(args, data) logger.info('training finished!')
parser.add_argument('--data-type', default='SNLI', help='available: SNLI or Quora') parser.add_argument('--epoch', default=10, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=100, type=int) parser.add_argument('--learning-rate', default=0.001, type=float) parser.add_argument('--num-perspective', default=20, type=int) parser.add_argument('--use-char-emb', default=True, action='store_true') parser.add_argument('--word-dim', default=300, type=int) parser.add_argument('--model-path', required=True) args = parser.parse_args() if args.data_type == 'SNLI': print('loading SNLI data...') data = SNLI(args) elif args.data_type == 'Quora': print('loading Quora data...') data = Quora(args) setattr(args, 'char_vocab_size', len(data.char_vocab)) setattr(args, 'word_vocab_size', len(data.TEXT.vocab)) setattr(args, 'class_size', len(data.LABEL.vocab)) setattr(args, 'max_word_len', data.max_word_len) print('loading model...') model = load_model(args, data) _, acc = test(model, args, data) print(f'test acc: {acc:.3f}')
def main(): if not os.path.exists('saved_models'): os.makedirs('saved_models') parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=64, type=int) parser.add_argument('--char-dim', default=20, type=int) parser.add_argument('--char-hidden-size', default=50, type=int) parser.add_argument('--data-type', default='SNLI', help='available: SNLI or Quora') parser.add_argument('--dropout', default=0.1, type=float) parser.add_argument('--epoch', default=1, type=int) parser.add_argument('--gpu', default=0, type=int) parser.add_argument('--hidden-size', default=100, type=int) parser.add_argument('--learning-rate', default=0.001, type=float) parser.add_argument( '--max-sent-len', default=-1, type=int, help= 'max length of input sentences model can accept, if -1, it accepts any length' ) parser.add_argument('--num-perspective', default=20, type=int) parser.add_argument('--print-freq', default=500, type=int) parser.add_argument('--use-char-emb', default=False, action='store_true') parser.add_argument('--word-dim', default=300, type=int) parser.add_argument('--n_fm', default=50, type=int) parser.add_argument('--conv', default=True, type=bool) parser.add_argument('--kernel_size', default=3, type=int) parser.add_argument('--use_my_model', default=False, type=bool) parser.add_argument('--use_only_conv', default=False, type=bool) args = parser.parse_args() if args.data_type == 'SNLI': print('loading SNLI data...') data = SNLI(args) elif args.data_type == 'Quora': print('loading Quora data...') data = Quora(args) else: raise NotImplementedError('only SNLI or Quora data is possible') setattr(args, 'char_vocab_size', len(data.char_vocab)) setattr(args, 'word_vocab_size', len(data.TEXT.vocab)) setattr(args, 'class_size', len(data.LABEL.vocab)) setattr(args, 'max_word_len', data.max_word_len) setattr(args, 'model_time', strftime('%H:%M:%S', gmtime())) if not os.path.exists('saved_models/' + args.model_time): os.makedirs('saved_models/' + args.model_time) with codecs.open('saved_models/' + args.model_time + "/log.txt", "w+", "utf-8") as output: output.write(json.dumps(args.__dict__)) start = time.time() print("Timer Start at " + str(start)) print('training start!') best_model = train(args, data) end = time.time() print("Timer Stop at " + str(end) + " Time cost: " + str(end - start)) with codecs.open('saved_models/' + args.model_time + "/log.txt", "a+", "utf-8") as output: output.write("\nTime cost: " + str(end - start)) torch.save( best_model.state_dict(), "saved_models/" + args.model_time + "/CBIMPM_" + str(args.data_type) + "_" + str(args.model_time)) print('training finished!')
def main(shutdown: ("shutdown system after training", 'flag', 's'), travis: ("use small testing dataset", 'flag', 't'), app: ("evaluate user queries from app", 'flag', 'a'), model_path, batch_size: (None, 'option', None, int) = 64, char_input_size: (None, 'option', None, int) = 20, char_hidden_size: (None, 'option', None, int) = 50, data_type: ("use quora, snli, or app", 'option', None, str, ['quora', 'snli', 'app']) = 'quora', dropout: (None, 'option', None, float) = 0.1, epoch: (None, 'option', None, int) = 10, hidden_size: (None, 'option', None, int) = 100, lr: (None, 'option', None, float) = 0.001, num_perspectives: (None, 'option', None, int) = 20, print_interval: (None, 'option', None, int) = 500, word_dim: (None, 'option', None, int) = 300): """Print the best BiMPM model accuracy for the test set in a cycle. Parameters ---------- shutdown : bool, flag Shutdown system after training (default is False). travis : bool, flag Run tests on small dataset (default is False) app : bool, flag Whether to evaluate queries from bimpm app (default is False). model_path : str A path to the location of the BiMPM trained model. batch_size : int, optional Number of examples in one iteration (default is 64). char_input_size : int, optional Size of character embedding (default is 20). char_hidden_size : int, optional Size of hidden layer in char lstm (default is 50). data_type : {'Quora', 'SNLI'}, optional Choose either SNLI or Quora (default is 'quora'). dropout : int, optional Applied to each layer (default is 0.1). epoch : int, optional Number of passes through full dataset (default is 10). hidden_size : int, optional Size of hidden layer for all BiLSTM layers (default is 100). lr : int, optional Learning rate (default is 0.001). num_perspectives : int, optional Number of perspectives in matching layer (default is 20). word_dim : int, optional Size of word embeddings (default is 300). Raises ------ RuntimeError If any data source other than SNLI or Quora is requested. """ # Store local namespace dict in Args() object args = Args(locals()) args.device = torch.device('cuda:0' if torch.cuda. is_available() else 'cpu') # Hanlde travis mode if args.travis and args.data_type.lower() == 'snli': raise RuntimeError("Invalid dataset size specified for SNLI data.") if args.travis: print('Travis mode detected. Adjusting parameters...') args.epoch = 2 args.batch_size = 2 args.print_interval = 1 if app: # Load sample queries and model_data for app mode help_message = ("\nPlease create a csv file " "`./app_data/sample_queries.csv` with two queries." " For example:" "\n\t$ cat sample_queries.csv" "\n\tHow can I run faster?" "\n\tHow do I get better at running?\n") try: with open('./app_data/sample_queries.csv', 'r') as f: reader = csv.reader(f) app_data = [] [app_data.extend(line) for line in reader] assert len( app_data) == 2, f"Too many queries to unpack. {help_message}" except FileNotFoundError as e: print(e) print(help_message) return print("Loading App data...") model_data = AppData(args, app_data) elif args.data_type.lower() == 'snli': print("Loading SNLI data...") model_data = SNLI(args) elif args.data_type.lower() == 'quora': print("Loading Quora data...") model_data = Quora(args) else: raise RuntimeError( 'Data source other than SNLI or Quora was provided.') # Create a few more parameters based on chosen dataset args.word_vocab_size = len(model_data.TEXT.vocab) args.char_vocab_size = len(model_data.char_vocab) args.class_size = len(model_data.LABEL.vocab) args.max_word_len = model_data.max_word_len print("Loading model...") model = load_model(args, model_data) if app: # Store args for use in app pickle_dir = './app_data/' args_pickle = 'args.pkl' if not os.path.exists(pickle_dir): os.makedirs(pickle_dir) pickle.dump(args, open(f'{pickle_dir}{args_pickle}', 'wb')) preds = evaluate(model, args, model_data, mode='app') print('\nQueries:\n', f'\n{app_data[0]}\n', f'{app_data[1]}\n', sep='') print('\nPrediction:') if max(preds) == preds.data[1]: print('\nSIMILAR based on max value at index 1:', f'\npreds: {preds.data}\n') else: print('\nNOT SIMILAR based on max value at index 0', f'\npreds: {preds.data}\n') else: _, eval_acc = evaluate(model, args, model_data, mode='eval') print(f'\neval_acc: {eval_acc:.3f}\n')
def main(shutdown: ("shutdown system after training", 'flag', 's'), travis: ("use small testing dataset", 'flag', 't'), experiment: ("name of experiment", 'option', 'e', str) = '0.0', grad_clip: (None, 'option', None, int) = 100, batch_size: (None, 'option', None, int) = 64, char_input_size: (None, 'option', None, int) = 20, char_hidden_size: (None, 'option', None, int) = 50, data_type: ("use quora or snli", 'option', None, str, ['quora', 'snli']) = 'quora', dropout: (None, 'option', None, float) = 0.1, epoch: (None, 'option', None, int) = 10, hidden_size: (None, 'option', None, int) = 100, lr: (None, 'option', None, float) = 0.001, num_perspectives: (None, 'option', None, int) = 20, print_interval: (None, 'option', None, int) = 500, word_dim: (None, 'option', None, int) = 300): """Train and store the best BiMPM model in a cycle. Parameters ---------- shutdown : bool, flag Shutdown system after training (default is False). travis : bool, flag Run tests on small dataset (default is False). experiment : str, optional Name of the current experiment (default is '0.0'). grad_clip : int, optional Amount by which to clip the gradient (default is 100). batch_size : int, optional Number of examples in one iteration (default is 64). char_input_size : int, optional Size of character embedding (default is 20). char_hidden_size : int, optional Size of hidden layer in char lstm (default is 50). data_type : {'Quora', 'SNLI'}, optional Choose either SNLI or Quora (default is 'quora'). dropout : int, optional Applied to each layer (default is 0.1). epoch : int, optional Number of passes through full dataset (default is 10). hidden_size : int, optional Size of hidden layer for all BiLSTM layers (default is 100). lr : int, optional Learning rate (default is 0.001). num_perspectives : int, optional Number of perspectives in matching layer (default is 20). print_interval : int, optional How often to write to tensorboard (default is 500). word_dim : int, optional Size of word embeddings (default is 300). Raises ------ RuntimeError If any data source other than SNLI or Quora is requested. """ # Store local namespace dict in Args() object args = Args(locals()) args.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') args.app = False # Disable app mode for training # Handle travis mode if args.travis and args.data_type.lower() == 'snli': raise RuntimeError("Invalid dataset size specified for SNLI data.") if args.travis: print('Travis mode detected. Adjusting parameters...') args.epoch = 2 args.batch_size = 2 args.print_interval = 1 # Load data from sources if args.data_type.lower() == 'snli': print("Loading SNLI data...") model_data = SNLI(args) elif args.data_type.lower() == 'quora': print("Loading Quora data...") model_data = Quora(args) else: raise RuntimeError( 'Data source other than SNLI or Quora was provided.') # Create a few more parameters based on chosen dataset args.char_vocab_size = len(model_data.char_vocab) args.word_vocab_size = len(model_data.TEXT.vocab) args.class_size = len(model_data.LABEL.vocab) args.max_word_len = model_data.max_word_len args.model_time = str(calendar.timegm(gmtime())) # Store hyperparameters for reproduceability if not os.path.exists('research/configs'): os.makedirs('research/configs') if not args.travis: args.store_params() print("Starting training...") best_model = train(args, model_data) if not os.path.exists('saved_models'): os.makedirs('saved_models') if not args.travis: torch.save( best_model.state_dict(), f'saved_models/bimpm_{args.data_type}_{args.model_time}.pt') print("Finished training...") if args.shutdown: print("Shutting system down...") os.system("sudo shutdown now -h")