コード例 #1
0
max_segment_len = 20  #The max length of a segment in dataset is 114
min_word_frequency = 1
embedding_vecor_length = 300
use_pretrained_word_embedding = True
use_pretrained_single_model = True
use_cartesian_fusion = True
end_to_end = True
feature_selection = True
a, b = 4.0, 8.0

fusion_method = 'Cartesion' if use_cartesian_fusion else 'Concat'

#word2ix = loader.load_word2ix()
word_embedding = [loader.load_word_embedding()
                  ] if use_pretrained_word_embedding else None
train, valid, test = loader.load_word_level_features(max_segment_len, tr_split)

feature_str = ''
if args.feature_selection:
    with open('/media/bighdd5/Paul/mosi/fs_mask.pkl') as f:
        [covarep_ix, facet_ix] = pickle.load(f)
    facet_train = train['facet'][:, :, facet_ix]
    facet_valid = valid['facet'][:, :, facet_ix]
    facet_test = test['facet'][:, :, facet_ix]
    covarep_train = train['covarep'][:, :, covarep_ix]
    covarep_valid = valid['covarep'][:, :, covarep_ix]
    covarep_test = test['covarep'][:, :, covarep_ix]
    feature_str = '_t' + str(embedding_vecor_length) + '_c' + str(
        covarep_test.shape[2]) + '_f' + str(facet_test.shape[2])
else:
    facet_train = train['facet']
コード例 #2
0
def get_data(args, config):
    tr_split = 2.0 / 3  # fixed. 62 training & validation, 31 test
    val_split = 0.1514  # fixed. 52 training 10 validation
    use_pretrained_word_embedding = True  # fixed. use glove 300d
    embedding_vecor_length = 300  # fixed. use glove 300d
    # 115                   # fixed for MOSI. The max length of a segment in MOSI dataset is 114
    max_segment_len = config['seqlength']
    end_to_end = True  # fixed

    word2ix = loader.load_word2ix()
    word_embedding = [loader.load_word_embedding()
                      ] if use_pretrained_word_embedding else None
    train, valid, test = loader.load_word_level_features(
        max_segment_len, tr_split)

    ix2word = inv_map = {v: k for k, v in word2ix.iteritems()}
    print len(word2ix)
    print len(ix2word)
    print word_embedding[0].shape

    feature_str = ''
    if args.feature_selection:
        with open('/media/bighdd5/Paul/mosi/fs_mask.pkl') as f:
            [covarep_ix, facet_ix] = pickle.load(f)
        facet_train = train['facet'][:, :, facet_ix]
        facet_valid = valid['facet'][:, :, facet_ix]
        facet_test = test['facet'][:, :, facet_ix]
        covarep_train = train['covarep'][:, :, covarep_ix]
        covarep_valid = valid['covarep'][:, :, covarep_ix]
        covarep_test = test['covarep'][:, :, covarep_ix]
        feature_str = '_t' + str(embedding_vecor_length) + '_c' + str(
            covarep_test.shape[2]) + '_f' + str(facet_test.shape[2])
    else:
        facet_train = train['facet']
        facet_valid = valid['facet']
        covarep_train = train['covarep'][:, :, 1:35]
        covarep_valid = valid['covarep'][:, :, 1:35]
        facet_test = test['facet']
        covarep_test = test['covarep'][:, :, 1:35]

    text_train = train['text']
    text_valid = valid['text']
    text_test = test['text']
    y_train = train['label']
    y_valid = valid['label']
    y_test = test['label']

    lengths_train = train['lengths']
    lengths_valid = valid['lengths']
    lengths_test = test['lengths']

    #f = h5py.File("out/mosi_lengths_test.hdf5", "w")
    #f.create_dataset('d1',data=lengths_test)
    #f.close()
    #assert False

    facet_train_max = np.max(np.max(np.abs(facet_train), axis=0), axis=0)
    facet_train_max[facet_train_max == 0] = 1
    #covarep_train_max =  np.max(np.max(np.abs(covarep_train), axis =0),axis=0)
    #covarep_train_max[covarep_train_max==0] = 1

    facet_train = facet_train / facet_train_max
    facet_valid = facet_valid / facet_train_max
    #covarep_train = covarep_train / covarep_train_max
    facet_test = facet_test / facet_train_max
    #covarep_test = covarep_test / covarep_train_max

    text_input = Input(shape=(max_segment_len, ),
                       dtype='int32',
                       name='text_input')
    text_eb_layer = Embedding(word_embedding[0].shape[0],
                              embedding_vecor_length,
                              input_length=max_segment_len,
                              weights=word_embedding,
                              name='text_eb_layer',
                              trainable=False)(text_input)
    model = Model(text_input, text_eb_layer)
    text_train_emb = model.predict(text_train)
    print text_train_emb.shape  # n x seq x 300
    print covarep_train.shape  # n x seq x 5/34
    print facet_train.shape  # n x seq x 20/43
    X_train = np.concatenate((text_train_emb, covarep_train, facet_train),
                             axis=2)

    text_valid_emb = model.predict(text_valid)
    print text_valid_emb.shape  # n x seq x 300
    print covarep_valid.shape  # n x seq x 5/34
    print facet_valid.shape  # n x seq x 20/43
    X_valid = np.concatenate((text_valid_emb, covarep_valid, facet_valid),
                             axis=2)

    text_test_emb = model.predict(text_test)
    print text_test_emb.shape  # n x seq x 300
    print covarep_test.shape  # n x seq x 5/34
    print facet_test.shape  # n x seq x 20/43
    X_test = np.concatenate((text_test_emb, covarep_test, facet_test), axis=2)

    return X_train, y_train, X_valid, y_valid, X_test, y_test