def validate_input_args(): global model_path validate((args.b, int, 1, 100000000)) validate((args.e, int, 1, 100000000)) validate((args.sentence_length, int, 1, 10000)) validate((args.token_emb_size, int, 1, 10000)) validate((args.intent_hidden_size, int, 1, 10000)) validate((args.lstm_hidden_size, int, 1, 10000)) validate((args.save_epochs, int, 1, 1000)) validate((args.tagger_dropout, float, 0, 1)) model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_path)) validate_parent_exists(model_path)
def validate_input_args(): global model_path validate((args.b, int, 1, 100000000)) validate((args.e, int, 1, 100000000)) validate((args.sentence_length, int, 1, 10000)) validate((args.token_emb_size, int, 1, 10000)) validate((args.intent_hidden_size, int, 1, 10000)) validate((args.lstm_hidden_size, int, 1, 10000)) validate((args.save_epochs, int, 1, 1000)) validate((args.tagger_dropout, float, 0, 1)) model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_path)) validate_parent_exists(model_path)
def validate_input_args(): global model_path, settings_path validate((args.sentence_len, int, 1, 1000)) validate((args.lstm_depth, int, 1, 10)) validate((args.lstm_hidden_size, int, 1, 10000)) validate((args.token_embedding_size, int, 1, 10000)) validate((args.pos_embedding_size, int, 1, 1000)) validate((args.vocab_size, int, 1, 100000000)) validate((args.char_hidden_size, int, 1, 1000)) validate((args.max_char_word_length, int, 1, 100)) model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_name)) settings_path = path.join(path.dirname(path.realpath(__file__)), str(args.settings)) validate_parent_exists(model_path) validate_parent_exists(settings_path)
def validate_input_args(args): validate((args.b, int, 1, 100000)) validate((args.e, int, 1, 100000)) validate((args.tag_num, int, 1, 1000)) validate((args.sentence_length, int, 1, 10000)) validate((args.word_length, int, 1, 100)) validate((args.word_embedding_dims, int, 1, 10000)) validate((args.character_embedding_dims, int, 1, 1000)) validate((args.char_features_lstm_dims, int, 1, 10000)) validate((args.entity_tagger_lstm_dims, int, 1, 10000)) validate((args.dropout, float, 0, 1)) model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_path)) validate_parent_exists(model_path) model_info_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_info_path)) validate_parent_exists(model_info_path)
def validate_input_args(): global model_path, settings_path validate((args.sentence_len, int, 1, 1000)) validate((args.lstm_depth, int, 1, 10)) validate((args.lstm_hidden_size, int, 1, 10000)) validate((args.token_embedding_size, int, 1, 10000)) validate((args.pos_embedding_size, int, 1, 1000)) validate((args.vocab_size, int, 1, 100000000)) validate((args.char_hidden_size, int, 1, 1000)) validate((args.max_char_word_length, int, 1, 100)) model_path = path.join(path.dirname(path.realpath(__file__)), str(args.model_name)) settings_path = path.join(path.dirname(path.realpath(__file__)), str(args.settings)) validate_parent_exists(model_path) validate_parent_exists(settings_path)
def validate_parent_exists(file_path): """Validates parent directory exists in case the file_path is not None""" if file_path is not None and file_path: io.validate_parent_exists(fix_path(file_path))
tf.flags.DEFINE_boolean("use_match_type", False, "use match type features") tf.flags.DEFINE_boolean("cache_match_type", False, "cache match type answers") tf.flags.DEFINE_boolean("cache_vectorized", False, "cache vectorized data") tf.flags.DEFINE_boolean("use_oov", False, "use OOV test set") tf.flags.DEFINE_string("data_dir", "data/", "File to save model weights to.") tf.flags.DEFINE_string("weights_save_path", "saved_tf/", "File to save model weights to.") FLAGS = tf.flags.FLAGS validate((FLAGS.task, int, 1, 7), (FLAGS.nhops, int, 1, 100), (FLAGS.emb_size, int, 1, 10000)) # Validate inputs current_dir = os.path.dirname(os.path.realpath(__file__)) weights_save_path = os.path.join(current_dir, FLAGS.weights_save_path) validate_parent_exists(weights_save_path) data_dir = os.path.join(current_dir, FLAGS.data_dir) validate_parent_exists(data_dir) babi = BABI_Dialog( path=data_dir, task=FLAGS.task, oov=FLAGS.use_oov, use_match_type=FLAGS.use_match_type, cache_match_type=FLAGS.cache_match_type, cache_vectorized=FLAGS.cache_vectorized, ) with tf.Session() as sess: memn2n = MemN2N_Dialog( 32,
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--file_path', type=validate_existing_filepath, default='./', help='file_path where the files to parse are located') parser.add_argument('--data_type', type=str, default='amazon', choices=['amazon']) parser.add_argument('--output_file', type=validate_parent_exists, default='./opt_trials.pkl', help='file_path where the output of the trials will be located') parser.add_argument('--new_trials', type=int, default=20, action=check_size(1, 20000)) args_in = parser.parse_args() # Check inputs if args_in.file_path: validate_existing_filepath(args_in.file_path) if args_in.output_file: validate_parent_exists(args_in.output_file) if args_in.data_type == 'amazon': data_in = Amazon_Reviews(args_in.file_path) try: if args_in.output_file.endswith('.pkl'): with open(args_in.output_file, 'rb') as read_f: trials_to_keep = pickle.load(read_f) print("Utilizing existing trial files") else: trials_to_keep = Trials() # If the file does not already exist we will start with a new set of trials except FileNotFoundError: trials_to_keep = Trials()
model.save(model_path) if __name__ == '__main__': # read input args and validate parser = create_argument_parser() args = parser.parse_args() validate((args.sentence_length, int, 1, 1000)) validate((args.feature_size, int, 1, 10000)) validate((args.b, int, 1, 100000)) validate((args.e, int, 1, 100000)) model_path = path.join(path.dirname(path.realpath(__file__)), '{}.h5'.format(str(args.model_name))) settings_path = path.join(path.dirname(path.realpath(__file__)), '{}.params'.format(str(args.model_name))) validate_parent_exists(model_path) # load dataset and get tokens/chunks/pos tags dataset = CONLL2000(data_path=args.data_dir, sentence_length=args.sentence_length, extract_chars=args.char_features, max_word_length=args.max_word_length) train_set = dataset.train_set test_set = dataset.test_set words_train, pos_train, chunk_train = train_set[:3] words_test, pos_test, chunk_test = test_set[:3] # get label sizes, transform y's into 1-hot encoding chunk_labels = len(dataset.chunk_vocab) + 1 pos_labels = len(dataset.pos_vocab) + 1 word_vocab_size = len(dataset.word_vocab) + 2
'--double_dict', type=str, default='3', help='specifies whether to use a second dictionary for words within ' + 'specified extended window. ie for "-w 1 -d 2", the ' + 'sentence "hello world how are things" creates a window of "2:hello ' + '1:world <NULL> 1:are 2:things"') parser.add_argument('-t', '--num_threads', type=int, default=4, help='number of threads to use', action=check_size(1, 10)) args = vars(parser.parse_args()) validate_parent_exists(args['data_dir']) if args['entities']: validate_parent_exists(args['entities']) validate((args['window_size'], str, 1, 100), (args['double_dict'], str, 1, 100)) beg = time.time() if args['data_dir']: # also set the entities and input file here args['entities'] = os.path.expanduser( args['data_dir'] + '/movieqa/knowledge_source/entities.txt') args['input_file'] = [ os.path.expanduser(args['data_dir'] + '/movieqa/knowledge_source/wiki.txt') ]
help='default behavior merges lines from the same example--set this flag ' + 'to disable and only consider windows from the same line in the file') parser.add_argument('-w', '--window_size', type=str, default='0', help='sizes of windows PER SIDE around words to generate. eg 1 or ' + '1,2,3. ie "-w 1" for "hey world hey" produces "hey <NULL> hey"') parser.add_argument('-d', '--double_dict', type=str, default='3', help='specifies whether to use a second dictionary for words within ' + 'specified extended window. ie for "-w 1 -d 2", the ' + 'sentence "hello world how are things" creates a window of "2:hello ' + '1:world <NULL> 1:are 2:things"') parser.add_argument('-t', '--num_threads', type=int, default=4, help='number of threads to use', action=check_size(1,10)) args = vars(parser.parse_args()) validate_parent_exists(args['data_dir']) if args['entities']: validate_parent_exists(args['entities']) validate((args['window_size'], str, 1, 100), (args['double_dict'], str, 1, 100)) beg = time.time() if args['data_dir']: # also set the entities and input file here args['entities'] = os.path.expanduser(args['data_dir'] + '/movieqa/knowledge_source/entities.txt') args['input_file'] = [os.path.expanduser(args['data_dir'] + '/movieqa/knowledge_source/wiki.txt')] else: ValueError("No data_dir given.")
parser.add_argument( '--test', default=False, action='store_true', help='evaluate on the test set at the end of training.') parser.set_defaults(batch_size=32, epochs=200) args = parser.parse_args() validate((args.emb_size, int, 1, 10000), (args.eps, float, 1e-15, 1e-2), (args.lr, float, 1e-8, 10), (args.grad_clip_norm, float, 1e-3, 1e5)) # Validate inputs validate_parent_exists(args.log_file) log_file = args.log_file validate_parent_exists(args.weights_save_path) weights_save_path = args.weights_save_path validate_parent_exists(args.data_dir) data_dir = args.data_dir assert weights_save_path.endswith('.npz') assert log_file.endswith('.txt') gradient_clip_norm = args.grad_clip_norm babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type,
action="store_true", help="Run Inference with loaded weight") parser.add_argument('--restore', action="store_true", help="Run the model restoring weights from model_file") parser.add_argument( '--interactive', action="store_true", help="Run Inference on User-supplied text either after training or \ with saved weights") parser.set_defaults() args = parser.parse_args() if args.model_file: validate_parent_exists(args.model_file) if (args.inference is True) and (args.model_file is None): print("Need to set --model_file for Inference problem") quit() if args.model_file is not None: model_file = os.path.expanduser(args.model_file) else: model_file = None wikimovies = WIKIMOVIES(args.data_dir, subset=args.subset, reparse=args.reparse, mem_source=args.mem_mode)
action=check_size(1e-100, 1e-2)) parser.add_argument('--model_file', default='memn2n_weights.npz', help='File to load model weights from.', type=str) parser.set_defaults(batch_size=32, epochs=200) args = parser.parse_args() validate((args.emb_size, int, 1, 10000), (args.eps, float, 1e-15, 1e-2)) # Sanitize inputs validate_existing_filepath(args.model_file) model_file = args.model_file assert model_file.endswith('.npz') validate_parent_exists(args.data_dir) data_dir = args.data_dir babi = BABI_Dialog(path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size
'--model_file', default='memn2n_weights.npz', help='File to load model weights from.', type=str) parser.set_defaults(batch_size=32, epochs=200) args = parser.parse_args() validate((args.emb_size, int, 1, 10000), (args.eps, float, 1e-15, 1e-2)) # Sanitize inputs validate_existing_filepath(args.model_file) model_file = args.model_file assert model_file.endswith('.npz') validate_parent_exists(args.data_dir) data_dir = args.data_dir babi = BABI_Dialog( path=data_dir, task=args.task, oov=args.use_oov, use_match_type=args.use_match_type, cache_match_type=args.cache_match_type, cache_vectorized=args.cache_vectorized) weight_saver = Saver() # Set num iterations to 1 epoch since we loop over epochs & shuffle ndata = babi.data_dict['train']['memory']['data'].shape[0] num_iterations = ndata // args.batch_size