def exp_concat_with_glove(): args = parser.parse_args() not_specified_args = manage_required_args( args, parser, required_args=[ 'embed_dim', 'xform', 'glove_dim']) exp_name = f'concat_with_glove_{args.glove_dim}' # alt. xformed_ out_base_dir = os.path.join( os.getcwd(), f'symlinks/exp/multi_sense_cooccur/{args.xform}_{args.embed_dim}') exp_const = ExpConstants(exp_name,out_base_dir) visual_embed_dir = exp_const.out_base_dir data_const = Constants() data_const.visual_word_to_idx = os.path.join( visual_embed_dir, 'word_to_idx.json') data_const.visual_embeddings_npy = os.path.join( visual_embed_dir, 'visual_embeddings.npy') # alt. _xformed.npy glove_const = GloveConstantsFactory.create(dim=str(args.glove_dim)) data_const.glove_idx = glove_const.word_to_idx_json data_const.glove_h5py = glove_const.embeddings_h5py concat_with_glove.main(exp_const,data_const)
def exp_concat_random_with_glove(): exp_name = 'concat_with_glove_100' # alt. xformed_ out_base_dir = os.path.join( os.getcwd(), 'symlinks/exp/multi_sense_cooccur/linear_100') exp_const = ExpConstants(exp_name,out_base_dir) exp_const.random_dim = 100 data_const = Constants() glove_const = GloveConstantsFactory.create(dim='100') data_const.glove_idx = glove_const.word_to_idx_json data_const.glove_h5py = glove_const.embeddings_h5py concat_random_with_glove.main(exp_const,data_const)
def main(): glove_const = GloveConstantsFactory.create() glove_embeddings_h5py = h5py.File( glove_const.embeddings_h5py, 'r') glove_embeddings = glove_embeddings_h5py['embeddings'][()] mean = np.mean(glove_embeddings) std = np.std(glove_embeddings) min_ = np.min(glove_embeddings) max_ = np.max(glove_embeddings) num_words, dim = glove_embeddings.shape # Create random normal embeddings random_normal_embeddings_h5py = h5py.File( os.path.join(glove_const.proc_dir,'random_normal_embeddings.h5py'), 'w') random_normal_embeddings = np.random.normal( loc=mean, scale=std, size=(num_words,dim)) random_normal_embeddings = np.minimum( random_normal_embeddings, max_) random_normal_embeddings = np.maximum( random_normal_embeddings, min_) random_normal_embeddings_h5py.create_dataset( 'embeddings', data=random_normal_embeddings) random_normal_embeddings_h5py.close() # Create random uniform embeddings random_uniform_embeddings_h5py = h5py.File( os.path.join(glove_const.proc_dir,'random_uniform_embeddings.h5py'), 'w') random_uniform_embeddings = np.random.uniform( low=min_, high=max_, size=(num_words,dim)) random_uniform_embeddings_h5py.create_dataset( 'embeddings', data=random_uniform_embeddings) random_uniform_embeddings_h5py.close()
def exp_combine_glove_with_visual_features(): exp_name = 'concat_glove_and_ae_visual' out_base_dir = os.path.join( os.getcwd(), 'symlinks/exp/google_images/' + \ 'normalized_resnet_embeddings_recon_loss_trained_on_google') exp_const = ExpConstants(exp_name, out_base_dir) visual_feat_dir = os.path.join( os.getcwd(), 'symlinks/exp/google_images/' + \ 'normalized_resnet_features_recon_loss_trained_on_google/' + \ 'ae_visual_features') data_const = Constants() data_const.visual_features_idx = os.path.join(visual_feat_dir, 'word_to_idx.json') data_const.visual_features_h5py = os.path.join(visual_feat_dir, 'word_features.h5py') glove_const = GloveConstantsFactory.create() data_const.glove_idx = glove_const.word_to_idx_json data_const.glove_h5py = glove_const.embeddings_h5py combine_glove_with_visual_features.main(exp_const, data_const)
def __getitem__(self,i): sample = self.samples[i] word1,word2,feature,label = sample to_return = { 'word1': word1, 'word2': word2, 'feature': feature, 'label': np.float32(label), 'word1_embedding': self.get_embedding(word1), 'word2_embedding': self.get_embedding(word2), 'feature_embedding': self.get_embedding(feature), #'mean_embedding': self.embeddings['mean'].value.astype(np.float32), #'std_embedding': self.embeddings['std'].value.astype(np.float32), } return to_return if __name__=='__main__': from data.glove.constants import GloveConstantsFactory glove_const = GloveConstantsFactory.create() semeval_const = SemEval201810DatasetConstants() semeval_const.embeddings_h5py = glove_const.embeddings_h5py semeval_const.word_to_idx_json = glove_const.word_to_idx_json dataloader = DataLoader( SemEval201810Dataset(semeval_const), batch_size=4) for data in dataloader: import pdb; pdb.set_trace()
def exp_train_concat_svm(): args = parser.parse_args() not_specified_args = manage_required_args(args, parser, required_args=[ 'lr', 'l2_weight', 'batch_size', 'glove_dim', 'embed_linear_feat', 'embed_quadratic_feat', 'distance_linear_feat', 'distance_quadratic_feat', 'visual_only', ], optional_args=[ 'exp_name', 'out_base_dir', 'embeddings_h5py', 'word_to_idx_json' ]) if args.exp_name is None: exp_name = 'trial' else: exp_name = args.exp_name if args.out_base_dir is None: out_base_dir = os.path.join(os.getcwd(), 'symlinks/exp/semeval_2018_10/concat_svm') else: out_base_dir = args.out_base_dir exp_const = ExpConstants(exp_name=exp_name, out_base_dir=out_base_dir) exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log') exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.num_epochs = 20 exp_const.batch_size = args.batch_size exp_const.lr = args.lr data_const = SemEval201810DatasetConstants() if args.embeddings_h5py is None: glove_const = GloveConstantsFactory.create() data_const.embeddings_h5py = glove_const.embeddings_h5py data_const.word_to_idx_json = glove_const.word_to_idx_json else: data_const.embeddings_h5py = args.embeddings_h5py data_const.word_to_idx_json = args.word_to_idx_json embed_dim = h5py.File(data_const.embeddings_h5py, 'r')['embeddings'].shape[1] model_const = Constants() model_const.concat_svm = ConcatSVMConstants() model_const.concat_svm.l2_weight = args.l2_weight model_const.concat_svm.embedding_dim = embed_dim model_const.concat_svm.glove_dim = args.glove_dim model_const.concat_svm.layer_units = [] model_const.concat_svm.use_embedding_linear_feats = args.embed_linear_feat model_const.concat_svm.use_embedding_quadratic_feats = \ args.embed_quadratic_feat model_const.concat_svm.use_distance_linear_feats = args.distance_linear_feat model_const.concat_svm.use_distance_quadratic_feats = \ args.distance_quadratic_feat model_const.concat_svm.visual_only = args.visual_only train_concat_svm.main(exp_const, data_const, model_const)
def exp_eval_concat_svm(): args = parser.parse_args() not_specified_args = manage_required_args(args, parser, required_args=[ 'batch_size', 'glove_dim', 'embed_linear_feat', 'embed_quadratic_feat', 'distance_linear_feat', 'distance_quadratic_feat', 'visual_only', 'visual_vocab_json', ], optional_args=[ 'exp_name', 'out_base_dir', 'embeddings_h5py', 'word_to_idx_json' ]) if args.exp_name is None: exp_name = 'trial' else: exp_name = args.exp_name if args.out_base_dir is None: out_base_dir = os.path.join(os.getcwd(), 'symlinks/exp/semeval_2018_10/concat_svm') else: out_base_dir = args.out_base_dir exp_const = ExpConstants(exp_name=exp_name, out_base_dir=out_base_dir) exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.batch_size = 2560 data_const = SemEval201810DatasetConstants() data_const.subset = 'test' if args.embeddings_h5py is None: glove_const = GloveConstantsFactory.create() data_const.embeddings_h5py = glove_const.embeddings_h5py data_const.word_to_idx_json = glove_const.word_to_idx_json data_const.visual_vocab_json = data_const.word_to_idx_json else: data_const.embeddings_h5py = args.embeddings_h5py data_const.word_to_idx_json = args.word_to_idx_json data_const.visual_vocab_json = args.visual_vocab_json # data_const.vocab_json = os.path.join( # os.getcwd(), # 'symlinks/data/visualgenome/proc/all_word_freqs.json') # data_const.visual_vocab_json = os.path.join( # os.getcwd(), # 'symlinks/exp/combine_glove_visual_reps/concat_glove_visual_avg_reps/visual_words.json') embed_dim = h5py.File(data_const.embeddings_h5py, 'r')['embeddings'].shape[1] model_const = Constants() model_const.concat_svm = ConcatSVMConstants() model_const.concat_svm.embedding_dim = embed_dim model_const.concat_svm.glove_dim = args.glove_dim model_const.concat_svm.layer_units = [] model_const.concat_svm.use_embedding_linear_feats = args.embed_linear_feat model_const.concat_svm.use_embedding_quadratic_feats = \ args.embed_quadratic_feat model_const.concat_svm.use_distance_linear_feats = args.distance_linear_feat model_const.concat_svm.use_distance_quadratic_feats = \ args.distance_quadratic_feat model_const.concat_svm.visual_only = args.visual_only eval_concat_svm.main(exp_const, data_const, model_const)