예제 #1
0
def validate(): 
	print('Loading dataset...') 
	dataset = load_data()
	word_index = dataset['word_index']  
	x_val = np.load('data/x_val-L2X.npy') 
	pred_val = np.load('data/pred_val.npy') 
				
	print('Creating model...')
	model = create_original_model(word_index)
	model.load_weights('./models/original.hdf5', 
		by_name=True) 

	print('Making prediction with selected sentences...')
	new_pred_val = model.predict(x_val, verbose = 1, batch_size = 1000)
	val_acc = np.mean(np.argmax(new_pred_val, axis = -1)==np.argmax(pred_val, axis = -1))

	print('the validation accuracy is {}.'.format(val_acc)) 
	np.save('data/pred_val-{}.npy'.format('L2X'), new_pred_val)
예제 #2
0
def generate_original_preds(train=True):
    """
	Generate the predictions of the original model on training
	and validation datasets. 

	The original model is also trained if train = True. 

	"""
    print('Loading data...')
    dataset = load_data()
    word_index = dataset['word_index']
    x_train, x_val, y_train, y_val = dataset['x_train'], \
    dataset['x_val'], dataset['y_train'], dataset['y_val']

    print('Creating model...')
    model = create_original_model(word_index)

    if train:
        if 'models' not in os.listdir('.'):
            os.mkdir('models')

        filepath = "models/original.hdf5"
        checkpoint = ModelCheckpoint(filepath,
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        callbacks_list = [checkpoint]
        model.fit(x_train,
                  y_train,
                  validation_data=(x_val, y_val),
                  callbacks=callbacks_list,
                  epochs=5,
                  batch_size=BATCHSIZE)

    weights_name = 'original.hdf5'
    model.load_weights('./models/' + weights_name, by_name=True)

    pred_train = model.predict(x_train, verbose=1, batch_size=1000)
    pred_val = model.predict(x_val, verbose=1, batch_size=1000)

    np.save('data/pred_train.npy', pred_train)
    np.save('data/pred_val.npy', pred_val)
예제 #3
0
def pipeline(modeltype, k, points, tune=False, fe=False):
    '''
    specify (i) the modeltype ('RF' or 'NN') (ii) if you want to do some hyperparametr tuning (tune=True)
    (iii) do you want to perform the feature engineering step (it will take time specify fe=True) or simply use what has been done.
    Output: prediction file. (iv) k is the number of clusters determine from the elbow curve (v)points is the specitifed length of bounded box around a tile.
    '''
    if fe == True:
        finaldata = datapipeline(k, points)
    else:
        #finaldata = load_pickle('./data/finaldata.p')
        finaldata = kmeans(load_data(), k)

    X_train, y_train, X_test, y_test = onehotencoding_and_standardize(
        finaldata)

    start_time = timeit.default_timer()
    model = train_models(X_train, y_train, modeltype, tune=tune)
    elapsed = timeit.default_timer() - start_time
    print("Elapsed time: " + str(elapsed) + " (s)")
    predict_evaluate(X_test, y_test,
                     model).to_csv('./result/prediction_%s.csv' % (modeltype))
    return predict_evaluate(X_test, y_test, model)
예제 #4
0
def L2X(train=True):
    """
	Generate scores on features on validation by L2X.

	Train the L2X model with variational approaches 
	if train = True. 

	"""
    print('Loading dataset...')
    dataset = load_data()
    word_index = dataset['word_index']
    x_train, x_val, y_train, y_val = dataset['x_train'], dataset[
        'x_val'], dataset['y_train'], dataset['y_val']
    with open('./data/word_index.pkl', 'rb') as f:
        word_index = pkl.load(f)

    print('Creating model...')

    # P(S|X)
    with tf.variable_scope('selection_model'):

        review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
        logits_T = construct_gumbel_selector(review_input, MAX_SENT_LENGTH,
                                             EMBEDDING_DIM, MAX_SENTS,
                                             word_index)
        tau = 0.5
        T = Sample_Concrete(tau, k)(logits_T)

    # q(X_S)
    with tf.variable_scope('prediction_model'):
        sentence_input = Input(shape=(MAX_SENT_LENGTH, ), dtype='int32')

        embedding_layer = Embedding(MAX_NUM_WORDS + 1,
                                    EMBEDDING_DIM,
                                    input_length=MAX_SENT_LENGTH,
                                    name='embedding',
                                    trainable=True)

        embedded_sequences = embedding_layer(sentence_input)
        net = Dropout(0.2)(embedded_sequences)
        net = Conv1D(250, 3, padding='valid', activation='relu',
                     strides=1)(net)
        net = GlobalMaxPooling1D()(net)
        sentEncoder2 = Model(sentence_input, net)

        review_encoder2 = TimeDistributed(sentEncoder2)(review_input)
        selected_encoding = Multiply()([review_encoder2, T])
        net = Mean(selected_encoding)
        net = Dense(250)(net)
        net = Activation('relu')(net)
        preds = Dense(2, activation='softmax', name='new_dense')(net)

    model = Model(inputs=review_input, outputs=preds)

    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['acc'])

    pred_train = np.load('data/pred_train.npy')
    pred_val = np.load('data/pred_val.npy')

    val_acc = np.mean(np.argmax(pred_val, axis=1) == np.argmax(y_val, axis=1))
    print(
        'The validation accuracy of the original model is {}'.format(val_acc))

    if train:
        filepath = "models/l2x.hdf5"
        checkpoint = ModelCheckpoint(filepath,
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        callbacks_list = [checkpoint]

        model.fit(x_train,
                  pred_train,
                  validation_data=(x_val, pred_val),
                  callbacks=callbacks_list,
                  epochs=10,
                  batch_size=BATCHSIZE)

    weights_name = 'l2x.hdf5'
    model.load_weights('models/{}'.format(weights_name), by_name=True)

    pred_model = Model(review_input, [T, logits_T, preds])

    pred_model.compile(loss='categorical_crossentropy',
                       optimizer='adam',
                       metrics=['acc'])

    st = time.time()
    selections, scores, interp_val = pred_model.predict(x_val,
                                                        verbose=1,
                                                        batch_size=BATCHSIZE)

    print('Time spent is {}'.format(time.time() - st))
    return scores, x_val
예제 #5
0
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x


def alexnet(pretrained=False, **kwargs):
    model = AlexNet(**kwargs)
    if pretrained:
        model.load_state_dict(torch.load('./model/alexnet_model_para.pkl'))
    return model


model = AlexNet().cuda(device)
#print(model)
train_load, train_data = load_data('./data/')

optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.001,
                            momentum=0.9,
                            dampening=0,
                            weight_decay=0.0001)

loss_func = torch.nn.CrossEntropyLoss()

train_epoch = 100


def train(epoch):
    model.train()