예제 #1
0
    data_sh = theano.shared(np.asarray(data, dtype=theano.config.floatX),
                            borrow=True)
    data_t_sh = theano.shared(np.asarray(data_t, dtype=theano.config.floatX),
                              borrow=True)
    return data_sh, T.cast(data_t_sh, 'int32')


train_data_sh, train_data_t_sh = make_theano_dataset(
    (train_data, train_data_t))
valid_data_sh, valid_data_t_sh = make_theano_dataset(
    (valid_data, valid_data_t))
test_data_sh, test_data_t_sh = make_theano_dataset((test_data, test_data_t))

datasets = [(train_data_sh, train_data_t_sh), (valid_data_sh, valid_data_t_sh),
            (test_data_sh, test_data_t_sh)]
rbm_stack = RBMStack(num_dims, [500])
dbn = DBN(rbm_stack, 2)

batch_size = 100
max_epoch = 10
train_params = {
    'batch_size': batch_size,
    'learning_rate': 0.01,
    'cd_steps': 2,
    'max_epoch': max_epoch,
    'persistent': True,
    'finetune_learning_rate': 0.1
}

pre_fn = dbn.pretrain_fun(train_data_sh, train_params)
예제 #2
0
data_target_sh = theano.shared(np.asarray(data_target,
                                          dtype=theano.config.floatX),
                               borrow=True)

batch_size = 70
num_batches = num_cases / batch_size
max_epoch = 50
train_params = {
    'batch_size': batch_size,
    'learning_rate': 0.01,
    'cd_steps': 5,
    'max_epoch': max_epoch,
    'persistent': True,
    'learning_rate_line': 0.0001,
    'finetune_learning_rate': 0.1
}

stack_rbm = RBMStack(num_vis=num_vis, hid_layers_size=[500])
if not load_from_file(stack_rbm, train_params):
    stack_rbm.pretrain(data_sh, train_params)
    save_to_file(stack_rbm, train_params)

print_top_to_file(sent_ae, "pre_train", data_sh, data_target, range(0, 999))
fine_tune = sent_ae.finetune_fun(data_sh, train_params)
for ep in xrange(0, max_epoch):
    for b in xrange(0, num_batches):
        cost = fine_tune(b)
        print "Epoch # %d:%d cost: %f" % (ep, b, cost)

print_top_to_file(sent_ae, "fine_tune", data_sh, data_target, range(0, 999))
예제 #3
0
    start = time.time()   
    for k in watches:
        name = "%i:%s" % (iter, k)
        load_bin(name, watches[k])
    db_redis.r0.set("last_it", iter)
    iter+=1
    end = time.time()
    print "Inserted in %s" % (end - start,)

free_en_acc = []
free_en_valid_acc = []
cost_acc = []
cost_valid_acc = []

#rbm.need_train = True
rbms = RBMStack(rbms=[rbm])

def train(rbms, data_sh, data_valid_sh, train_params):
    for watches in rbms.pretrain(data_sh, data_valid_sh, train_params):
        free_en_acc.append(watches['free_en'])
        free_en_valid_acc.append(watches['free_en_valid'])
        cost_acc.append(watches['cost'])
        cost_valid_acc.append(watches['cost_valid'])

        watches['free_en_acc'] = np.asarray(free_en_acc, dtype=theano.config.floatX)
        watches['free_en_valid_acc'] = np.asarray(free_en_valid_acc, dtype=theano.config.floatX)
        watches['cost_acc'] = np.asarray(cost_acc, dtype=theano.config.floatX)
        watches['cost_valid_acc'] = np.asarray(cost_valid_acc, dtype=theano.config.floatX)

        del watches['free_en'] 
        del watches['free_en_valid']
예제 #4
0
    return data_sh, T.cast(data_t_sh, 'int32')

train_data_sh, train_data_t_sh = make_theano_dataset((train_data, train_data_t))
valid_data_sh, valid_data_t_sh = make_theano_dataset((valid_data, valid_data_t))
test_data_sh, test_data_t_sh = (None, None) #make_theano_dataset((test_data, test_data_t))

train_data_wp_sh, train_data_wp_t_sh = make_theano_dataset((data_wo_p, data_target_wo_p))

datasets = [(train_data_sh, train_data_t_sh),(valid_data_sh, valid_data_t_sh),(test_data_sh, test_data_t_sh)]

batch_size = 100
max_epoch = 30
train_params = {'batch_size' : batch_size, 'learning_rate' : 0.1, 'cd_steps' : 10, 'max_epoch' : max_epoch, 'persistent' : True, 'cd_steps_line' : 1, 'learning_rate_line' : 0.001, 'finetune_learning_rate' : 0.1, 'validation_frequency' : batch_size }


stack_rbm = RBMStack(num_vis = num_vis, hid_layers_size = hid_layers_size)

if not load_from_file(stack_rbm, train_params):
    stack_rbm.pretrain(data_sh, train_params)
    save_to_file(stack_rbm, train_params)


dbn = DBN(stack_rbm, 2)
#dbn.finetune(datasets, train_params)
#ae = AutoEncoder(stack_rbm, 100)
#ae.pretrain(train_data_sh, train_params)
#ae.finetune(train_data_sh, train_params)

get_output = theano.function([], dbn.stack[-1].output, givens=[(dbn.input,train_data_sh)])
out = get_output()
np.savetxt("/home/alexeyche/prog/sentiment/out.tsv", train_data, delimiter="\t")
예제 #5
0
                continue
            case_arr = np.concatenate(hist, axis=1)
            l = case_arr.shape[0]
            data_train[i,0:l] = case_arr
            i+=1
            del hist[:]
            if i == num_cases:
                break
        else:
            hist.append(case[:-1])
    return data_train

data_train = preprocess()
data_train_sh = theano.shared(np.asarray(data_train, dtype=theano.config.floatX), borrow=True) 

rbms = RBMStack(hid_layers_size = [15], bottomRBMtype = RBMMultSoftmax, add_opts = { 'num_units' : num_units, 'unit_size' : unit_size })

train_params = { 'max_epoch' : 50, 'batch_size' : 50, 'learning_rate' : 0.001, 'cd_steps' : 15, 'persistent' : True} 
rbms.pretrain(data_train_sh, train_params)

rbm = rbms.stack[0]
rbm.save_model()

def evaluate(data):
    def softmax(w):
        e = np.exp(w)
        dist = e / np.sum(e)
        return dist

    w = rbm.W.get_value(borrow=True)
    vbias = rbm.vbias.get_value(borrow=True)
예제 #6
0
            del hist[:]
            if i == num_cases:
                break
        else:
            hist.append(case[:-1])
    return data_train


data_train = preprocess()
data_train_sh = theano.shared(np.asarray(data_train,
                                         dtype=theano.config.floatX),
                              borrow=True)

rbms = RBMStack(hid_layers_size=[15],
                bottomRBMtype=RBMMultSoftmax,
                add_opts={
                    'num_units': num_units,
                    'unit_size': unit_size
                })

train_params = {
    'max_epoch': 50,
    'batch_size': 50,
    'learning_rate': 0.001,
    'cd_steps': 15,
    'persistent': True
}
rbms.pretrain(data_train_sh, train_params)

rbm = rbms.stack[0]
rbm.save_model()