Exemple #1
0
def exp_concat_with_glove():
    args = parser.parse_args()
    not_specified_args = manage_required_args(
        args,
        parser,
        required_args=[
            'embed_dim',
            'xform',
            'glove_dim'])

    exp_name = f'concat_with_glove_{args.glove_dim}' # alt. xformed_
    out_base_dir = os.path.join(
        os.getcwd(),
        f'symlinks/exp/multi_sense_cooccur/{args.xform}_{args.embed_dim}')
    exp_const = ExpConstants(exp_name,out_base_dir)

    visual_embed_dir = exp_const.out_base_dir
    data_const = Constants()
    data_const.visual_word_to_idx = os.path.join(
        visual_embed_dir,
        'word_to_idx.json')
    data_const.visual_embeddings_npy = os.path.join(
        visual_embed_dir,
        'visual_embeddings.npy') # alt. _xformed.npy
    glove_const = GloveConstantsFactory.create(dim=str(args.glove_dim))
    data_const.glove_idx = glove_const.word_to_idx_json
    data_const.glove_h5py = glove_const.embeddings_h5py

    concat_with_glove.main(exp_const,data_const)
Exemple #2
0
def exp_concat_random_with_glove():
    exp_name = 'concat_with_glove_100' # alt. xformed_
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/multi_sense_cooccur/linear_100')
    exp_const = ExpConstants(exp_name,out_base_dir)
    exp_const.random_dim = 100

    data_const = Constants()
    glove_const = GloveConstantsFactory.create(dim='100')
    data_const.glove_idx = glove_const.word_to_idx_json
    data_const.glove_h5py = glove_const.embeddings_h5py

    concat_random_with_glove.main(exp_const,data_const)
def main():
    glove_const = GloveConstantsFactory.create()
    glove_embeddings_h5py = h5py.File(
        glove_const.embeddings_h5py,
        'r')
    glove_embeddings = glove_embeddings_h5py['embeddings'][()]
    mean = np.mean(glove_embeddings)
    std = np.std(glove_embeddings)
    min_ = np.min(glove_embeddings)
    max_ = np.max(glove_embeddings)
    num_words, dim = glove_embeddings.shape

    # Create random normal embeddings
    random_normal_embeddings_h5py = h5py.File(
        os.path.join(glove_const.proc_dir,'random_normal_embeddings.h5py'),
        'w')
    random_normal_embeddings = np.random.normal(
        loc=mean,
        scale=std,
        size=(num_words,dim))
    random_normal_embeddings = np.minimum(
        random_normal_embeddings,
        max_)
    random_normal_embeddings = np.maximum(
        random_normal_embeddings,
        min_)
    random_normal_embeddings_h5py.create_dataset(
        'embeddings',
        data=random_normal_embeddings)
    random_normal_embeddings_h5py.close()

    # Create random uniform embeddings
    random_uniform_embeddings_h5py = h5py.File(
        os.path.join(glove_const.proc_dir,'random_uniform_embeddings.h5py'),
        'w')
    random_uniform_embeddings = np.random.uniform(
        low=min_,
        high=max_,
        size=(num_words,dim))
    random_uniform_embeddings_h5py.create_dataset(
        'embeddings',
        data=random_uniform_embeddings)
    random_uniform_embeddings_h5py.close()
Exemple #4
0
def exp_combine_glove_with_visual_features():
    exp_name = 'concat_glove_and_ae_visual'
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_embeddings_recon_loss_trained_on_google')
    exp_const = ExpConstants(exp_name, out_base_dir)

    visual_feat_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_features_recon_loss_trained_on_google/' + \
        'ae_visual_features')
    data_const = Constants()
    data_const.visual_features_idx = os.path.join(visual_feat_dir,
                                                  'word_to_idx.json')
    data_const.visual_features_h5py = os.path.join(visual_feat_dir,
                                                   'word_features.h5py')
    glove_const = GloveConstantsFactory.create()
    data_const.glove_idx = glove_const.word_to_idx_json
    data_const.glove_h5py = glove_const.embeddings_h5py

    combine_glove_with_visual_features.main(exp_const, data_const)
Exemple #5
0
    def __getitem__(self,i):
        sample = self.samples[i]
        word1,word2,feature,label = sample
        to_return = {
            'word1': word1,
            'word2': word2,
            'feature': feature,
            'label': np.float32(label),
            'word1_embedding': self.get_embedding(word1),
            'word2_embedding': self.get_embedding(word2),
            'feature_embedding': self.get_embedding(feature),
            #'mean_embedding': self.embeddings['mean'].value.astype(np.float32),
            #'std_embedding': self.embeddings['std'].value.astype(np.float32),
        }
        return to_return


if __name__=='__main__':
    from data.glove.constants import GloveConstantsFactory
    glove_const = GloveConstantsFactory.create()
    semeval_const = SemEval201810DatasetConstants()
    semeval_const.embeddings_h5py = glove_const.embeddings_h5py
    semeval_const.word_to_idx_json = glove_const.word_to_idx_json
    dataloader = DataLoader(
        SemEval201810Dataset(semeval_const),
        batch_size=4)
    for data in dataloader:
        import pdb; pdb.set_trace()

Exemple #6
0
def exp_train_concat_svm():
    args = parser.parse_args()
    not_specified_args = manage_required_args(args,
                                              parser,
                                              required_args=[
                                                  'lr',
                                                  'l2_weight',
                                                  'batch_size',
                                                  'glove_dim',
                                                  'embed_linear_feat',
                                                  'embed_quadratic_feat',
                                                  'distance_linear_feat',
                                                  'distance_quadratic_feat',
                                                  'visual_only',
                                              ],
                                              optional_args=[
                                                  'exp_name', 'out_base_dir',
                                                  'embeddings_h5py',
                                                  'word_to_idx_json'
                                              ])

    if args.exp_name is None:
        exp_name = 'trial'
    else:
        exp_name = args.exp_name

    if args.out_base_dir is None:
        out_base_dir = os.path.join(os.getcwd(),
                                    'symlinks/exp/semeval_2018_10/concat_svm')
    else:
        out_base_dir = args.out_base_dir

    exp_const = ExpConstants(exp_name=exp_name, out_base_dir=out_base_dir)
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log')
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.num_epochs = 20
    exp_const.batch_size = args.batch_size
    exp_const.lr = args.lr

    data_const = SemEval201810DatasetConstants()
    if args.embeddings_h5py is None:
        glove_const = GloveConstantsFactory.create()
        data_const.embeddings_h5py = glove_const.embeddings_h5py
        data_const.word_to_idx_json = glove_const.word_to_idx_json
    else:
        data_const.embeddings_h5py = args.embeddings_h5py
        data_const.word_to_idx_json = args.word_to_idx_json

    embed_dim = h5py.File(data_const.embeddings_h5py,
                          'r')['embeddings'].shape[1]

    model_const = Constants()
    model_const.concat_svm = ConcatSVMConstants()
    model_const.concat_svm.l2_weight = args.l2_weight
    model_const.concat_svm.embedding_dim = embed_dim
    model_const.concat_svm.glove_dim = args.glove_dim
    model_const.concat_svm.layer_units = []
    model_const.concat_svm.use_embedding_linear_feats = args.embed_linear_feat
    model_const.concat_svm.use_embedding_quadratic_feats = \
        args.embed_quadratic_feat
    model_const.concat_svm.use_distance_linear_feats = args.distance_linear_feat
    model_const.concat_svm.use_distance_quadratic_feats = \
        args.distance_quadratic_feat
    model_const.concat_svm.visual_only = args.visual_only

    train_concat_svm.main(exp_const, data_const, model_const)
Exemple #7
0
def exp_eval_concat_svm():
    args = parser.parse_args()
    not_specified_args = manage_required_args(args,
                                              parser,
                                              required_args=[
                                                  'batch_size',
                                                  'glove_dim',
                                                  'embed_linear_feat',
                                                  'embed_quadratic_feat',
                                                  'distance_linear_feat',
                                                  'distance_quadratic_feat',
                                                  'visual_only',
                                                  'visual_vocab_json',
                                              ],
                                              optional_args=[
                                                  'exp_name', 'out_base_dir',
                                                  'embeddings_h5py',
                                                  'word_to_idx_json'
                                              ])

    if args.exp_name is None:
        exp_name = 'trial'
    else:
        exp_name = args.exp_name

    if args.out_base_dir is None:
        out_base_dir = os.path.join(os.getcwd(),
                                    'symlinks/exp/semeval_2018_10/concat_svm')
    else:
        out_base_dir = args.out_base_dir

    exp_const = ExpConstants(exp_name=exp_name, out_base_dir=out_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.batch_size = 2560

    data_const = SemEval201810DatasetConstants()
    data_const.subset = 'test'
    if args.embeddings_h5py is None:
        glove_const = GloveConstantsFactory.create()
        data_const.embeddings_h5py = glove_const.embeddings_h5py
        data_const.word_to_idx_json = glove_const.word_to_idx_json
        data_const.visual_vocab_json = data_const.word_to_idx_json
    else:
        data_const.embeddings_h5py = args.embeddings_h5py
        data_const.word_to_idx_json = args.word_to_idx_json
        data_const.visual_vocab_json = args.visual_vocab_json
    # data_const.vocab_json = os.path.join(
    #     os.getcwd(),
    #     'symlinks/data/visualgenome/proc/all_word_freqs.json')
    # data_const.visual_vocab_json = os.path.join(
    #     os.getcwd(),
    #     'symlinks/exp/combine_glove_visual_reps/concat_glove_visual_avg_reps/visual_words.json')

    embed_dim = h5py.File(data_const.embeddings_h5py,
                          'r')['embeddings'].shape[1]

    model_const = Constants()
    model_const.concat_svm = ConcatSVMConstants()
    model_const.concat_svm.embedding_dim = embed_dim
    model_const.concat_svm.glove_dim = args.glove_dim
    model_const.concat_svm.layer_units = []
    model_const.concat_svm.use_embedding_linear_feats = args.embed_linear_feat
    model_const.concat_svm.use_embedding_quadratic_feats = \
        args.embed_quadratic_feat
    model_const.concat_svm.use_distance_linear_feats = args.distance_linear_feat
    model_const.concat_svm.use_distance_quadratic_feats = \
        args.distance_quadratic_feat
    model_const.concat_svm.visual_only = args.visual_only

    eval_concat_svm.main(exp_const, data_const, model_const)