required=True, help="Data file root (eg. flickr30k_train)") parser.add_argument("--eval_data_root", type=str, help="Data file root for eval data (eg. flickr30k_dev)") parser.add_argument("--train", action='store_true', help='Trains a model') parser.add_argument("--predict", action='store_true', help='Predicts using pre-trained model') parser.add_argument("--model_file", type=str, required=True, help="Model file to save/load") args = parser.parse_args() arg_dict = vars(args) util.dump_args(arg_dict, log) # Construct data files from the root directory and filename data_dir = arg_dict['data_dir'] + "/" data_root = arg_dict['data_root'] eval_data_root = arg_dict['eval_data_root'] train_file = data_dir + "feats/" + data_root + "_card_classifier.feats" eval_file = data_dir + "feats/" + eval_data_root + "_card_classifier.feats" scores_file = data_dir + "scores/" + eval_data_root + "_card_classifier.scores" meta_file = data_dir + "feats/" + data_root + "_card_classifier_meta.json" meta_dict = json.load(open(meta_file, 'r')) model_file = arg_dict['model_file'] if model_file is not None: model_file = abspath(expanduser(model_file)) ablation_file = arg_dict['ablation_file'] ablation_groups = None
def __init__(): # Set up the global logger log = Logger('debug', 180) # Parse arguments parser = ArgumentParser( "ImageCaptionLearn_py: Neural Network for Nonvisual " "Prediction; Bidirectional LSTM to hidden layer " "to softmax over (v)isual and (n)onvisual labels") parser.add_argument( "--epochs", type=int, default=20, help="train opt; number of times to iterate over the dataset") parser.add_argument( "--batch_size", type=int, default=512, help="train opt; number of random mention pairs per batch") parser.add_argument("--lstm_hidden_width", type=int, default=200, help="train opt; number of hidden units within " "the LSTM cells") parser.add_argument("--start_hidden_width", type=int, default=512, help="train opt; number of hidden units in the " "layer after the LSTM") parser.add_argument("--hidden_depth", type=int, default=2, help="train opt; number of hidden layers after the " "lstm, where each is last_width/2 units wide, " "starting with start_hidden_width") parser.add_argument("--weighted_classes", action="store_true", help="Whether to inversely weight the classes " "in the loss") parser.add_argument("--learn_rate", type=float, default=0.001, help="train opt; optimizer learning rate") parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="train opt; Adam optimizer epsilon value") parser.add_argument("--clip_norm", type=float, default=5.0, help='train opt; global clip norm value') parser.add_argument( "--data_norm", action='store_true', help="train opt; Whether to L2-normalize the w2v word vectors") parser.add_argument("--lstm_input_dropout", type=float, default=0.5, help="train opt; probability to keep lstm input nodes") parser.add_argument("--dropout", type=float, default=0.5, help="train opt; probability to keep all other nodes") parser.add_argument( "--data_dir", required=True, type=lambda f: util.arg_path_exists(parser, f), help="Directory containing raw/, feats/, and scores/ directories") parser.add_argument( "--data", choices=["flickr30k", "mscoco", "coco30k", "flickr30k_v1"], required=True, help="Dataset to use") parser.add_argument("--split", choices=["train", "dev", "test", "trainDev"], required=True, help="Dataset split") parser.add_argument("--eval_data", choices=["flickr30k", "mscoco", "coco30k"], help="Evaluation dataset to use") parser.add_argument("--eval_split", choices=["train", "dev", "test", "trainDev"], help="Evaluation dataset split") parser.add_argument( "--encoding_scheme", choices=["first_last_sentence", 'first_last_mention'], default="first_last_mention", help="train opt; specifies how lstm outputs are transformed") parser.add_argument("--train", action='store_true', help='Trains a model') parser.add_argument( "--activation", choices=['sigmoid', 'tanh', 'relu', 'leaky_relu'], default='relu', help='train opt; which nonlinear activation function to use') parser.add_argument("--predict", action='store_true', help='Predicts using pre-trained model') parser.add_argument("--model_file", type=str, help="Model file to save/load") parser.add_argument("--embedding_type", choices=['w2v', 'glove'], default='w2v', help="Word embedding type to use") parser.add_argument( "--early_stopping", action='store_true', help="Whether to implement early stopping based on the " "evaluation performance") args = parser.parse_args() arg_dict = vars(args) if arg_dict['train'] and arg_dict['model_file'] is None: arg_dict['model_file'] = "/home/ccervan2/models/tacl201801//" + \ nn_data.build_model_filename(arg_dict, "affinity_lstm") model_file = arg_dict['model_file'] util.dump_args(arg_dict, log) # Construct data files from the root directory and filename data_dir = arg_dict['data_dir'] + "/" data_root = arg_dict['data'] + "_" + arg_dict['split'] eval_data_root = None if arg_dict['train']: eval_data_root = arg_dict['eval_data'] + "_" + arg_dict['eval_split'] sentence_file = data_dir + "raw/" + data_root + "_captions.txt" mention_idx_file = data_dir + "raw/" + data_root + "_mentions_affinity.txt" feature_file = data_dir + "feats/" + data_root + "_affinity_neural.feats" feature_meta_file = data_dir + "feats/" + data_root + "_affinity_neural_meta.json" box_dir = data_dir + "feats/" + arg_dict['data'] + "_boxes/" + arg_dict[ "split"] + "/" mention_box_label_file = data_dir + "raw/" + data_root + "_affinity_labels.txt" box_category_file = None #if "coco" in data_root: # box_category_file = data_dir + "raw/" + data_root + "_box_cats.txt" if eval_data_root is not None: eval_box_dir = data_dir + "feats/" + arg_dict[ 'eval_data'] + "_boxes/" + arg_dict["eval_split"] + "/" eval_sentence_file = data_dir + "raw/" + eval_data_root + "_captions.txt" eval_mention_idx_file = data_dir + "raw/" + eval_data_root + "_mentions_affinity.txt" eval_feature_file = data_dir + "feats/" + eval_data_root + "_affinity_neural.feats" eval_feature_meta_file = data_dir + "feats/" + eval_data_root + "_affinity_neural_meta.json" eval_mention_box_label_file = data_dir + "raw/" + eval_data_root + "_affinity_labels.txt" eval_box_category_file = None # if "coco" in eval_data_root: # eval_box_category_file = data_dir + "raw/" + eval_data_root + "_box_cats.txt" #endif # Load the appropriate word embeddings embedding_type = arg_dict['embedding_type'] if embedding_type == 'w2v': log.info("Initializing word2vec") nn_data.init_w2v() elif embedding_type == 'glove': log.info("Initializing glove") nn_data.init_glove() #endif # Set the random seeds identically every run nn_util.set_random_seeds() # Set up the minimum tensorflow logging level os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Train, if training was specified if arg_dict['train']: train(encoding_scheme=arg_dict['encoding_scheme'], embedding_type=embedding_type, sentence_file=sentence_file, mention_idx_file=mention_idx_file, feature_file=feature_file, feature_meta_file=feature_meta_file, box_dir=box_dir, mention_box_label_file=mention_box_label_file, epochs=arg_dict['epochs'], batch_size=arg_dict['batch_size'], lstm_hidden_width=arg_dict['lstm_hidden_width'], start_hidden_width=arg_dict['start_hidden_width'], hidden_depth=arg_dict['hidden_depth'], weighted_classes=arg_dict['weighted_classes'], input_dropout=arg_dict['lstm_input_dropout'], other_dropout=arg_dict['dropout'], lrn_rate=arg_dict['learn_rate'], clip_norm=arg_dict['clip_norm'], data_norm=arg_dict['data_norm'], adam_epsilon=arg_dict['adam_epsilon'], activation=arg_dict['activation'], model_file=model_file, eval_sentence_file=eval_sentence_file, eval_mention_idx_file=eval_mention_idx_file, eval_feature_file=eval_feature_file, eval_feature_meta_file=eval_feature_meta_file, eval_box_dir=eval_box_dir, eval_mention_box_label_file=eval_mention_box_label_file, early_stopping=arg_dict['early_stopping'], box_category_file=box_category_file, eval_box_category_file=eval_box_category_file, log=log) elif arg_dict['predict']: scores_file = data_dir + "scores/" + data_root + "_affinity.scores" # Restore our variables tf.reset_default_graph() with tf.Session() as sess: saver = tf.train.import_meta_graph(model_file + ".meta") saver.restore(sess, model_file) predict(encoding_scheme=arg_dict['encoding_scheme'], embedding_type=embedding_type, tf_session=sess, batch_size=arg_dict['batch_size'], sentence_file=sentence_file, mention_idx_file=mention_idx_file, feature_file=feature_file, feature_meta_file=feature_meta_file, box_dir=box_dir, mention_box_label_file=mention_box_label_file, scores_file=scores_file, box_category_file=box_category_file, log=log)
def __init__(): # Set up the logger log = Logger('debug', 180) # Parse arguments parser = ArgumentParser("ImageCaptionLearn_py: Core Neural " "Network classification architecture; " "used for nonvis and cardinality prediction") parser.add_argument("--epochs", type=int, default=20, help="train opt; number of times to " "iterate over the dataset") parser.add_argument("--batch_size", type=int, default=512, help="train opt; number of random mention " "pairs per batch") parser.add_argument("--lstm_hidden_width", type=int, default=200, help="train opt; number of hidden units " "within the LSTM cells") parser.add_argument("--start_hidden_width", type=int, default=512, help="train opt; number of hidden units " "in the layer after the LSTM") parser.add_argument("--hidden_depth", type=int, default=2, help="train opt; number of hidden layers " "after the lstm, where each is " "last_width/2 units wide, starting " "with start_hidden_width") parser.add_argument("--weighted_classes", action="store_true", help="Whether to inversely weight the " "classes in the loss") parser.add_argument("--learn_rate", type=float, default=0.001, help="train opt; optimizer learning rate") parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="train opt; Adam optimizer epsilon value") parser.add_argument("--clip_norm", type=float, default=5.0, help='train opt; global clip norm value') parser.add_argument( "--data_norm", action='store_true', help="train opt; Whether to L2-normalize the w2v word vectors") parser.add_argument("--lstm_input_dropout", type=float, default=0.5, help="train opt; probability to keep lstm input nodes") parser.add_argument("--dropout", type=float, default=0.5, help="train opt; probability to keep all other nodes") parser.add_argument( "--data_dir", required=True, type=lambda f: util.arg_path_exists(parser, f), help="Directory containing raw/, feats/, and scores/ directories") parser.add_argument("--data_root", type=str, required=True, help="Data file root (eg. flickr30k_train)") parser.add_argument( "--eval_data_root", type=str, help="Data file root for eval data (eg. flickr30k_dev)") parser.add_argument("--train", action='store_true', help='Trains a model') parser.add_argument( "--activation", choices=['sigmoid', 'tanh', 'relu', 'leaky_relu'], default='relu', help='train opt; which nonlinear activation function to use') parser.add_argument("--predict", action='store_true', help='Predicts using pre-trained model') parser.add_argument("--model_file", type=str, help="Model file to save/load") parser.add_argument("--embedding_type", choices=['w2v', 'glove'], default='w2v', help="Word embedding type to use") parser.add_argument( "--early_stopping", action='store_true', help="Whether to implement early stopping based on the " + "evaluation performance") parser.add_argument("--skip_epoch_eval", action='store_true', help='Skips evaluation each epoch during training') parser.add_argument("--encoding_scheme", choices=['first_last_sentence', 'first_last_mention'], default='first_last_mention') parser.add_argument("--task", required=True, choices=['nonvis', 'card']) args = parser.parse_args() arg_dict = vars(args) task = arg_dict['task'] if arg_dict['train']: arg_dict['model_file'] = "/home/ccervan2/models/tacl201801/" + \ nn_data.build_model_filename(arg_dict, task + "_lstm") model_file = arg_dict['model_file'] util.dump_args(arg_dict, log) # Construct data files from the root directory and filename data_dir = arg_dict['data_dir'] + "/" data_root = arg_dict['data_root'] eval_data_root = arg_dict['eval_data_root'] sentence_file = data_dir + "raw/" + data_root + "_captions.txt" mention_idx_file = data_dir + "raw/" + data_root + "_mentions_" + task + ".txt" feature_file = data_dir + "feats/" + data_root + "_" + task + "_neural.feats" feature_meta_file = data_dir + "feats/" + data_root + "_" + task + "_neural_meta.json" if eval_data_root is not None: eval_sentence_file = data_dir + "raw/" + eval_data_root + "_captions.txt" eval_mention_idx_file = data_dir + "raw/" + eval_data_root + "_mentions_" + task + ".txt" eval_feature_file = data_dir + "feats/" + eval_data_root + "_" + task + "_neural.feats" eval_feature_meta_file = data_dir + "feats/" + eval_data_root + "_" + task + "_neural_meta.json" #endif # Load the appropriate word embeddings embedding_type = arg_dict['embedding_type'] if embedding_type == 'w2v': log.info("Initializing word2vec") nn_data.init_w2v() elif embedding_type == 'glove': log.info("Initializing glove") nn_data.init_glove() #endif # Set the random seeds identically every run nn_util.set_random_seeds() # Set up the minimum tensorflow logging level os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Train, if training was specified if arg_dict['train']: train(task=task, encoding_scheme=arg_dict['encoding_scheme'], embedding_type=embedding_type, sentence_file=sentence_file, mention_idx_file=mention_idx_file, feature_file=feature_file, feature_meta_file=feature_meta_file, epochs=arg_dict['epochs'], batch_size=arg_dict['batch_size'], lstm_hidden_width=arg_dict['lstm_hidden_width'], start_hidden_width=arg_dict['start_hidden_width'], hidden_depth=arg_dict['hidden_depth'], weighted_classes=arg_dict['weighted_classes'], lstm_input_dropout=arg_dict['lstm_input_dropout'], dropout=arg_dict['dropout'], lrn_rate=arg_dict['learn_rate'], clip_norm=arg_dict['clip_norm'], data_norm=arg_dict['data_norm'], adam_epsilon=arg_dict['adam_epsilon'], activation=arg_dict['activation'], model_file=model_file, eval_sentence_file=eval_sentence_file, eval_mention_idx_file=eval_mention_idx_file, eval_feature_file=eval_feature_file, eval_feature_meta_file=eval_feature_meta_file, early_stopping=arg_dict['early_stopping'], log=log) elif arg_dict['predict']: scores_file = data_dir + "scores/" + data_root + "_" + task + ".scores" # Restore our variables tf.reset_default_graph() with tf.Session() as sess: saver = tf.train.import_meta_graph(model_file + ".meta") saver.restore(sess, model_file) predict(task=task, encoding_scheme=arg_dict['encoding_scheme'], embedding_type=embedding_type, tf_session=sess, batch_size=arg_dict['batch_size'], sentence_file=sentence_file, mention_idx_file=mention_idx_file, feature_file=feature_file, feature_meta_file=feature_meta_file, scores_file=scores_file, log=log)
def __init__(): """ :return: """ global TASKS # Set up the global logger log = Logger('debug', 180) # Parse arguments parser = ArgumentParser("ImageCaptionLearn_py: Neural Network for " "multitask learning; shared bidirectional " "LSTM to hidden layers to softmax over " "labels") parser.add_argument( "--epochs", type=int, default=20, help="train opt; number of times to iterate over the dataset") parser.add_argument("--batch_size", type=int, default=512, help="train opt; number of random examples per batch") parser.add_argument("--lstm_hidden_width", type=int, default=200, help="train opt; number of hidden units within " "the LSTM cells") parser.add_argument("--start_hidden_width", type=int, default=512, help="train opt; number of hidden units in the " "layer after the LSTM") parser.add_argument("--hidden_depth", type=int, default=2, help="train opt; number of hidden layers after the " "lstm, where each is last_width/2 units wide, " "starting with start_hidden_width") parser.add_argument("--weighted_classes", action="store_true", help="Whether to inversely weight the classes " "in the loss") parser.add_argument("--learn_rate", type=float, default=0.001, help="train opt; optimizer learning rate") parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="train opt; Adam optimizer epsilon value") parser.add_argument("--clip_norm", type=float, default=5.0, help='train opt; global clip norm value') parser.add_argument( "--data_norm", action='store_true', help="train opt; Whether to L2-normalize the w2v word vectors") parser.add_argument("--lstm_input_dropout", type=float, default=0.5, help="train opt; probability to keep lstm input nodes") parser.add_argument("--dropout", type=float, default=0.5, help="train opt; probability to keep all other nodes") parser.add_argument( "--encoding_scheme", choices=["first_last_sentence", 'first_last_mention'], default="first_last_mention", help="train opt; specifies how lstm outputs are transformed") parser.add_argument( "--data_dir", required=True, type=lambda f: util.arg_path_exists(parser, f), help="Directory containing raw/, feats/, and scores/ directories") parser.add_argument("--data", choices=["flickr30k", "mscoco", "coco30k"], required=True, help="Dataset to use") parser.add_argument("--split", choices=["train", "dev", "test", "trainDev"], required=True, help="Dataset split") parser.add_argument("--eval_data", choices=["flickr30k", "mscoco", "coco30k"], required=True, help="Evaluation dataset to use") parser.add_argument("--eval_split", choices=["train", "dev", "test", "trainDev"], required=True, help="Evaluation dataset split") parser.add_argument("--train", action='store_true', help='Trains a model') parser.add_argument( "--activation", choices=['sigmoid', 'tanh', 'relu', 'leaky_relu'], default='relu', help='train opt; which nonlinear activation function to use') parser.add_argument("--predict", action='store_true', help='Predicts using pre-trained model') parser.add_argument("--model_file", type=str, help="Model file to save/load") parser.add_argument("--embedding_type", choices=['w2v', 'glove'], default='w2v', help="Word embedding type to use") parser.add_argument( "--multitask_scheme", choices=["simple_joint", "weighted_joint", "alternate"], default="simple_joint", help="Multitask learning scheme") parser.add_argument( "--mention_box_label_file", type=lambda f: util.arg_path_exists(parser, f), help="Label file; overrides the default path from by combining " "data_dir, data, and split arguments") parser.add_argument("--eval_mention_box_label_file", type=lambda f: util.arg_path_exists(parser, f), help="Label file for eval data; overrides default") parser.add_argument("--box_category_file", type=lambda f: util.arg_path_exists(parser, f), help="File containing box category one-hots, which are" "added to bounding box representations") parser.add_argument("--eval_box_category_file", type=lambda f: util.arg_path_exists(parser, f), help="Box category one-hot file for evaluation data") args = parser.parse_args() arg_dict = vars(args) train_model = arg_dict['train'] predict_scores = arg_dict['predict'] multitask_scheme = arg_dict['multitask_scheme'] if train_model: arg_dict['model_file'] = "/home/ccervan2/models/tacl201712/" + \ nn_data.build_model_filename(arg_dict, "multitask_" + multitask_scheme + "_lstm") model_file = arg_dict['model_file'] util.dump_args(arg_dict, log) # Initialize the word embeddings embedding_type = arg_dict['embedding_type'] if embedding_type == 'w2v': log.info("Initializing word2vec") nn_data.init_w2v() elif embedding_type == 'glove': log.info("Initializing glove") nn_data.init_glove() #endif # Override the label files, if specified mention_box_label_file = arg_dict['mention_box_label_file'] eval_mention_box_label_file = arg_dict['eval_mention_box_label_file'] # Get the category file, if specified box_category_file = arg_dict['box_category_file'] eval_box_category_file = arg_dict['eval_box_category_file'] # Load the data task_data_dicts = load_data(arg_dict['data_dir'] + "/", arg_dict['data'], arg_dict['split'], embedding_type, mention_box_label_file, box_category_file, log) eval_task_data_dicts = dict() if train_model: eval_task_data_dicts = load_data(arg_dict['data_dir'] + "/", arg_dict['eval_data'], arg_dict['eval_split'], embedding_type, eval_mention_box_label_file, eval_box_category_file, log) # Set the random seeds identically every run nn_util.set_random_seeds() # Set up the minimum tensorflow logging level os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # DEBUG: mixing ratios task_batch_sizes = { 'rel_intra': 512, 'rel_cross': 512, 'nonvis': 512, 'card': 512, 'affinity': 512 } # Retrieve our sample IDs, by task task_ids = dict() for task in TASKS: if task == 'affinity': task_ids[task] = get_valid_mention_box_pairs(task_data_dicts[task]) else: task_ids[task] = list( task_data_dicts[task]['mention_indices'].keys()) eval_task_ids = dict() if train_model: for task in TASKS: if task == 'affinity': eval_task_ids[task] = get_valid_mention_box_pairs( eval_task_data_dicts[task]) else: eval_task_ids[task] = list( eval_task_data_dicts[task]['mention_indices'].keys()) #endfor #endif if train_model: # Set up the shared network task_vars = setup(multitask_scheme, arg_dict['lstm_hidden_width'], arg_dict['data_norm'], arg_dict['start_hidden_width'], arg_dict['hidden_depth'], arg_dict['weighted_classes'], arg_dict['activation'], arg_dict['encoding_scheme'], task_data_dicts, arg_dict['batch_size'], task_batch_sizes) # Train the model if 'joint' in multitask_scheme: train_jointly(multitask_scheme, arg_dict['epochs'], arg_dict['batch_size'], arg_dict['lstm_input_dropout'], arg_dict['dropout'], arg_dict['learn_rate'], arg_dict['adam_epsilon'], arg_dict['clip_norm'], arg_dict['encoding_scheme'], task_vars, task_data_dicts, eval_task_data_dicts, task_ids, eval_task_ids, model_file, log) elif multitask_scheme == 'alternate': train_alternately(arg_dict['epochs'], task_batch_sizes, arg_dict['lstm_input_dropout'], arg_dict['dropout'], arg_dict['learn_rate'], arg_dict['adam_epsilon'], arg_dict['clip_norm'], arg_dict['encoding_scheme'], task_vars, task_data_dicts, eval_task_data_dicts, task_ids, eval_task_ids, model_file, log) #endif elif predict_scores: for task in TASKS: task_data_dicts[task]['scores_file'] =\ arg_dict['data_dir'] + "/scores/" + arg_dict['data'] + "_" + \ arg_dict['split'] + "_" + task + "_mulit_" + multitask_scheme + \ "_lstm.scores" #endfor # Restore our variables tf.reset_default_graph() with tf.Session() as sess: saver = tf.train.import_meta_graph(model_file + ".meta") saver.restore(sess, model_file) predict(sess, multitask_scheme, task_data_dicts, task_ids, batch_size=arg_dict['batch_size'], task_batch_sizes=task_batch_sizes, log=log)