Exemple #1
0
    ## Append sample to training list
    x_test.append(line)

## Convert list to array
x_test = np.asarray(x_test)

## Correct classification (first half positive, second half negative
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

## Allow 8000 words + 1 unknown
vocab_size += 1

## Define model with 500 hidden units
model = BOW_model(vocab_size, 500)
model.cuda()

## Define optimizer
# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.01
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

## Hyper-parameters
batch_size = 200
no_of_epochs = 40
Exemple #2
0
def main(input_optimizer, input_batch_size, input_hidden_units, input_epochs):

    glove_embeddings = np.load('../preprocessed_data/glove_embeddings.npy')
    vocab_size = 100000

    x_train = []
    with io.open('../preprocessed_data/imdb_train_glove.txt',
                 'r',
                 encoding='utf-8') as f:
        lines = f.readlines()
    for line in lines:
        line = line.strip()
        line = line.split(' ')
        line = np.asarray(line, dtype=np.int)

        line[line > vocab_size] = 0
        line = line[line != 0]

        line = np.mean(glove_embeddings[line], axis=0)

        x_train.append(line)
    x_train = np.asarray(x_train)
    x_train = x_train[0:25000]
    y_train = np.zeros((25000, ))
    y_train[0:12500] = 1

    x_test = []
    with io.open('../preprocessed_data/imdb_test_glove.txt',
                 'r',
                 encoding='utf-8') as f:
        lines = f.readlines()
    for line in lines:
        line = line.strip()
        line = line.split(' ')
        line = np.asarray(line, dtype=np.int)

        line[line > vocab_size] = 0
        line = line[line != 0]

        line = np.mean(glove_embeddings[line], axis=0)

        x_test.append(line)
    x_test = np.asarray(x_test)
    y_test = np.zeros((25000, ))
    y_test[0:12500] = 1

    vocab_size += 1
    # no_hidden_units = 500 # try 300 as well
    no_hidden_units = input_hidden_units

    model = BOW_model(no_hidden_units)
    model.cuda()

    # opt = 'sgd'
    # LR = 0.01
    # opt = 'adam'
    opt = input_optimizer
    LR = 0.001
    if (opt == 'adam'):
        optimizer = optim.Adam(model.parameters(), lr=LR)
    elif (opt == 'sgd'):
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

    # batch_size = 200
    batch_size = input_batch_size
    # no_of_epochs = 6
    no_of_epochs = input_epochs

    L_Y_train = len(y_train)
    L_Y_test = len(y_test)

    model.train()

    train_loss = []
    train_accu = []
    test_accu = []

    print("Optmizer: %s" % opt, "LR: %.6f" % LR,
          "EpochSize: %d" % no_of_epochs, "BatchSize: %d" % batch_size,
          "VocalSize: %d" % (vocab_size - 1),
          "HidenSize: %d" % no_hidden_units)

    for epoch in range(no_of_epochs):

        # training
        model.train()

        epoch_acc = 0.0
        epoch_loss = 0.0

        epoch_counter = 0

        time1 = time.time()

        I_permutation = np.random.permutation(L_Y_train)

        for i in range(0, L_Y_train, batch_size):

            x_input = x_train[I_permutation[i:i + batch_size]]
            y_input = y_train[I_permutation[i:i + batch_size]]

            data = Variable(torch.FloatTensor(x_input)).cuda()
            target = Variable(torch.FloatTensor(y_input)).cuda()

            optimizer.zero_grad()
            loss, pred = model(data, target)
            loss.backward()

            optimizer.step()  # update weights

            prediction = pred >= 0.0
            truth = target >= 0.5
            acc = prediction.eq(truth).sum().cpu().data.numpy()
            epoch_acc += acc
            epoch_loss += loss.data.item()
            epoch_counter += batch_size

        epoch_acc /= epoch_counter
        epoch_loss /= (epoch_counter / batch_size)

        train_loss.append(epoch_loss)
        train_accu.append(epoch_acc)

        print(epoch, "%.2f" % (epoch_acc * 100.0), "%.4f" % epoch_loss,
              "%.4f" % float(time.time() - time1))

        # ## test
        model.eval()

        epoch_acc = 0.0
        epoch_loss = 0.0

        epoch_counter = 0

        time1 = time.time()

        I_permutation = np.random.permutation(L_Y_test)

        for i in range(0, L_Y_test, batch_size):

            x_input = x_train[I_permutation[i:i + batch_size]]
            y_input = y_train[I_permutation[i:i + batch_size]]

            data = Variable(torch.FloatTensor(x_input)).cuda()
            target = Variable(torch.FloatTensor(y_input)).cuda()

            with torch.no_grad():
                loss, pred = model(data, target)

            prediction = pred >= 0.0
            truth = target >= 0.5
            acc = prediction.eq(truth).sum().cpu().data.numpy()
            epoch_acc += acc
            epoch_loss += loss.data.item()
            epoch_counter += batch_size

        epoch_acc /= epoch_counter
        epoch_loss /= (epoch_counter / batch_size)

        test_accu.append(epoch_acc)

        time2 = time.time()
        time_elapsed = time2 - time1

        print("  ", "%.2f" % (epoch_acc * 100.0), "%.4f" % epoch_loss)

    torch.save(model, 'BOW.model')
    data = [train_loss, train_accu, test_accu]
    data = np.asarray(data)
    np.save('data.npy', data)
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0
    line = line[line != 0]

    line = np.mean(glove_embeddings[line], axis=0)

    x_test.append(line)
x_test = np.asarray(x_test)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

vocab_size += 1

model = BOW_model(1000)  # try 300 as well

model.cuda()

####################################################################

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
	line = np.asarray(line, dtype = np.int)

	line[line > vocab_size] = 0
	line = line[line != 0]

	line = np.mean(glove_embeddings[line], axis = 0)
	x_test.append(line)

x_test = np.asarray(x_test)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

#-----------------------------------------------------------------
# Load model
vocab_size += 1
model = BOW_model(no_of_hidden_units = 100)
model.cuda()

# Optimizer and learning rate
# opt = "SGD"
# LR = 0.01

opt = "Adam"
LR = 0.001

if opt == "SGD":
	optimizer = optim.SGD(model.parameters(),
			   			  lr = LR,
			   			  momentum = 0.9)
elif opt == "Adam":
	optimizer = optim.Adam(model.parameters(),
Exemple #5
0
             encoding='utf-8') as f:
    lines = f.readlines()
for line in lines:
    line = line.strip()
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0

    x_test.append(line)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

vocab_size += 1

model = BOW_model(vocab_size, 1000)
model.cuda()

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=10**-6)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
no_of_epochs = 100
L_Y_train = len(y_train)
L_Y_test = len(y_test)
for line in lines:
    line = line.strip()
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0
    line = line[line != 0]

    line = np.mean(glove_embeddings[line], axis=0)

    x_test.append(line)
x_test = np.asarray(x_test)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

model = BOW_model(500)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
no_of_epochs = 6
L_Y_train = len(y_train)
Exemple #7
0
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0
    line = line[line != 0]

    line = np.mean(glove_embeddings[line], axis=0)

    x_test.append(line)
x_test = np.asarray(x_test)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

vocab_size += 1

model = BOW_model(300)  # try 300 as well

model.cuda()

####################################################################

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=10**-4)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
Exemple #8
0
    lines = f.readlines()
for line in lines:
    line = line.strip()
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0

    x_test.append(line)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

vocab_size += 1

for idx, no_of_hidden_units in enumerate(no_of_hidden_units_list):
    model = BOW_model(vocab_size, no_of_hidden_units)
    model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=LR)

    L_Y_train = len(y_train)
    L_Y_test = len(y_test)

    model.train()

    train_loss = []
    train_accu = []
    test_accu = []

    for epoch in range(no_of_epochs[idx]):
with io.open('../preprocessed_data/imdb_test.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
for line in lines:
    line = line.strip()
    line = line.split(' ')
    line = np.asarray(line, dtype=np.int)

    line[line > vocab_size] = 0

    x_test.append(line)
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

vocab_size += 1

model = BOW_model(vocab_size, 500)
model.cuda()

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
no_of_epochs = 6
L_Y_train = len(y_train)
L_Y_test = len(y_test)