Esempio n. 1
0
File: Driver.py Progetto: atulkum/ml
def main():
     
    algo = sys.argv[1]
    trainfile = sys.argv[2]
    weightfile = sys.argv[3]
    testfilename = sys.argv[4]
    isCr = sys.argv[5]
    
    if(algo == 'LR'):
        D = 2**20   
        lr = LR(D)
        lr.train(trainfile)
        lr.saveWeight(weightfile)
        #lr.readWeight(weightfile)
        if(isCr == 'cr'):
            Util.cross_validation(testfilename, lr.get_features, lr.get_prediction)
    elif(algo == 'LR_CONV'):
        D = 2**30   
        lr = LR(D)
        lr.train_conjecture(trainfile)
        lr.saveWeight(weightfile)
        if(isCr == 'cr'):
            Util.cross_validation(testfilename, lr.get_features_conjectured, lr.get_prediction)
        else:
            Util.test(testfilename, 'LR_CONV_TEST', lr.get_features_conjectured, lr.get_prediction)
    elif(algo == 'PROBIT'):
        D = 2**20   
        pr = PROBIT_R(D, 0.3, 0.05, 0.01)
        pr.train(trainfile)
        pr.saveWeight(weightfile)
        if(isCr == 'cr'):
            Util.cross_validation(testfilename, pr.get_features, pr.predict)
        else:
            Util.test(testfilename, 'PROBIT_TEST',pr.get_features, pr.predict)
Esempio n. 2
0
class MLP(object):
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
        self.x = input
        self.y = label

        if rng is None:
            rng = np.random.RandomState(1234)

        # construct hidden layer
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=tanh)

        # construct log_layer

        self.log_layer = LR(input=self.hidden_layer.output,
                            label=self.y,
                            n_in=n_hidden,
                            n_out=n_out)

    def train(self):
        # forward hidden_layer
        layer_input = self.hidden_layer.forward()

        # forward & backward log_layer
        self.log_layer.train(input=layer_input)

        # backward hidden_layer
        self.hidden_layer.backward(prev_layer=self.log_layer)

    def predict(self, x):
        x = self.hidden_layer.output(input=x)
        return self.log_layer.predict(x)
Esempio n. 3
0
def DBN_JIT(train_features, train_labels, test_features, test_labels, hidden_units=[20, 12, 12], num_epochs_LR=200):
    # training DBN model
    #################################################################################################
    starttime = time.time()
    dbn_model = DBN(visible_units=train_features.shape[1],
                    hidden_units=hidden_units,
                    use_gpu=False)
    dbn_model.train_static(train_features, train_labels, num_epochs=10)
    # Finishing the training DBN model
    # print('---------------------Finishing the training DBN model---------------------')
    # using DBN model to construct features
    DBN_train_features, _ = dbn_model.forward(train_features)
    DBN_test_features, _ = dbn_model.forward(test_features)
    DBN_train_features = DBN_train_features.numpy()
    DBN_test_features = DBN_test_features.numpy()

    train_features = np.hstack((train_features, DBN_train_features))
    test_features = np.hstack((test_features, DBN_test_features))


    if len(train_labels.shape) == 1:
        num_classes = 1
    else:
        num_classes = train_labels.shape[1]
    # lr_model = LR(input_size=hidden_units, num_classes=num_classes)
    lr_model = LR(input_size=train_features.shape[1], num_classes=num_classes)
    optimizer = torch.optim.Adam(lr_model.parameters(), lr=0.00001)
    steps = 0
    batches_test = mini_batches(X=test_features, Y=test_labels)
    for epoch in range(1, num_epochs_LR + 1):
        # building batches for training model
        batches_train = mini_batches_update(X=train_features, Y=train_labels)
        for batch in batches_train:
            x_batch, y_batch = batch
            x_batch, y_batch = torch.tensor(x_batch).float(), torch.tensor(y_batch).float()

            optimizer.zero_grad()
            predict = lr_model.forward(x_batch)
            loss = nn.BCELoss()
            loss = loss(predict, y_batch)
            loss.backward()
            optimizer.step()

            # steps += 1
            # if steps % 100 == 0:
            #     print('\rEpoch: {} step: {} - loss: {:.6f}'.format(epoch, steps, loss.item()))

    endtime = time.time()
    dtime = endtime - starttime
    print("Train Time: %.8s s" % dtime)  #显示到微秒 

    starttime = time.time()
    y_pred, lables = lr_model.predict(data=batches_test)
    endtime = time.time()
    dtime = endtime - starttime
    print("Eval Time: %.8s s" % dtime)  #显示到微秒 
    return y_pred
Esempio n. 4
0
    def __init__(self,
                 input=None,
                 label=None,
                 n_ins=2,
                 hidden_layer_sizes=[3, 3],
                 n_outs=2,
                 rng=None):
        self.x = input
        self.y = label
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)

            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LR(input=self.sigmoid_layers[-1].sample_h_given_v(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_outs)

        self.finetune_cost = self.log_layer.negative_log_likelihood()
Esempio n. 5
0
def main():
    # 随机生成数据
    x = np.random.random((50, 1))
    x.sort()
    y = np.random.random((50))
    y.sort()
    xMat = np.mat(x)
    yMat = np.mat(y)
    w = LR().train(x, y)
    xMat_copy = xMat.copy()
    y_pred = LR().predict(xMat_copy, w)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter([xMat.flatten()], [yMat.flatten()])
    ax.plot(xMat_copy, y_pred)
    plt.show()
Esempio n. 6
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):
        self.x = input
        self.y = label

        if rng is None:
            rng = np.random.RandomState(1234)

        # construct hidden layer
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=tanh)

        # construct log_layer

        self.log_layer = LR(input=self.hidden_layer.output,
                            label=self.y,
                            n_in=n_hidden,
                            n_out=n_out)
Esempio n. 7
0
    def __init__(self,
                 input,
                 label,
                 n_in,
                 hidden_layer_sizes,
                 n_out,
                 rng=None,
                 activation=ReLU):
        self.x = input
        self.y = label
        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output()

            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            self.hidden_layers.append(hidden_layer)

        self.log_layer = LR(input=self.hidden_layers[-1].output(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_out)
Esempio n. 8
0
def main():

    algo = sys.argv[1]
    trainfile = sys.argv[2]
    weightfile = sys.argv[3]
    testfilename = sys.argv[4]
    isCr = sys.argv[5]

    if (algo == 'LR'):
        D = 2**20
        lr = LR(D)
        lr.train(trainfile)
        lr.saveWeight(weightfile)
        #lr.readWeight(weightfile)
        if (isCr == 'cr'):
            Util.cross_validation(testfilename, lr.get_features,
                                  lr.get_prediction)
    elif (algo == 'LR_CONV'):
        D = 2**30
        lr = LR(D)
        lr.train_conjecture(trainfile)
        lr.saveWeight(weightfile)
        if (isCr == 'cr'):
            Util.cross_validation(testfilename, lr.get_features_conjectured,
                                  lr.get_prediction)
        else:
            Util.test(testfilename, 'LR_CONV_TEST',
                      lr.get_features_conjectured, lr.get_prediction)
    elif (algo == 'PROBIT'):
        D = 2**20
        pr = PROBIT_R(D, 0.3, 0.05, 0.01)
        pr.train(trainfile)
        pr.saveWeight(weightfile)
        if (isCr == 'cr'):
            Util.cross_validation(testfilename, pr.get_features, pr.predict)
        else:
            Util.test(testfilename, 'PROBIT_TEST', pr.get_features, pr.predict)
Esempio n. 9
0
from LR import LR
# parameters
name = 'stdev2'
print '======Training======'
# load data from csv files
train = loadtxt('newData-2/data_' + name + '_train.csv')
#train = loadtxt('data/data_'+name+'_train.csv')
X = train[:, 0:2]
Y = train[:, 2:3]

#X = np.array([[1.0,2.0],[2.0,2.0],[0.0,0.0],[-2.0,3.0]])
#Y = np.array([[1.0],[1.0],[-1.0],[-1.0]])

# Carry out training.
L = .01
lr = LR(X, Y, L)
lr.train()
'''
[[ 0.89444823  0.19756899]]
[-0.24464889]
model = lr.train_gold()
print model.coef_
print model.intercept_
'''


# Define the predictLR(x) function, which uses trained parameters
def predictLR(x):
    return lr.test(x)

Esempio n. 10
0
	if save_model and mode == "train":
		os.mkdir(model_path)

	print(tag)

	X_dim = camp_info["dim"]
	X_field = camp_info["field"]

	seeds = [0x0123, 0x4567, 0x3210, 0x7654, 0x89AB, 0xCDEF, 0xBA98, 0xFEDC,
	         0x0123, 0x4567, 0x3210, 0x7654, 0x89AB, 0xCDEF, 0xBA98, 0xFEDC]

	if "LR" in algo:
		batch_size = 10000
		buf_size = 1000000
		model = LR([X_dim, X_field], batch_size,
		           data_path + camp + "/urp-model/lr.pickle"
		           # None
		           , [('uniform', -0.001, 0.001, seeds[4])], ['sgd', 1e-3, 'sum'], 0)  # 1e-3

	print("batch size={0}, buf size={1}".format(batch_size, buf_size))
	print(model.log)

	if mode == "train":
		if save_model:
			utility.write_log(log_path, model.log)

		# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
		# sess_config = tf.ConfigProto(gpu_options=gpu_options)
		# with tf.Session(graph=model.graph, config=sess_config) as sess:
		with tf.Session(graph=model.graph) as sess:
			tf.initialize_all_variables().run()
			print("model initialized")
Esempio n. 11
0
File: EXEC.py Progetto: bianc2018/ms
        else:
            print "ERROR:变量",p,"未定义!"
            self.qcode=-1
            return None
    def LE(self,p):
        if self.qcode!=None:
            if debug:
                print "qcode:",qcode
            return None
        if debug:
            print "CONST:",p,"qcode:",self.qcode
        print ""
if __name__ == "__main__":
    lex = LEX() #词法分析器实例
    lex.init("s","data")
    lr = LR()
    lr.init("s","data")
    #lr.print_table()
    ex = EXEC()
    attr={} #数据栈
    s = file("1",'r+')
    lines = s.readlines()
    s.close()
    line_no = 1
    Tokens = []
    for line in lines:
        T = lex.getToken(line,line_no)
        if T==None:
            print "ERROR:dont KNOW"
            break
        Tokens+=T
Esempio n. 12
0
def train():
    eval_labels = []
    eval_cols = []
    eval_wts = []
    eval_cnt = 0
    with open(eval_path, 'r') as eval_set:
        buf = []
        for line in eval_set:
            buf.append(line)
            eval_cnt += 1
            if eval_cnt == eval_buf_size:
                break
        np.random.shuffle(buf)
        for line in buf:
            y, f, x = get_fxy(line)
            eval_cols.append(f)
            eval_wts.append(x)
            eval_labels.append(y)
    eval_cols = np.array(eval_cols)
    eval_wts = np.float32(np.array(eval_wts))
    eval_labels = np.array(eval_labels)

    if 'LR' in algo:
        model = LR(batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train', eval_size)
    elif 'FM' in algo:
        model = FM(batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train', eval_size)
    elif 'FNN_IP_L3' in algo:
        model = FNN_IP_L3(cat_sizes, offsets, batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_OP_L3' in algo:
        model = FNN_OP_L3(cat_sizes, offsets, batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_IP_L5' in algo:
        model = FNN_IP_L5(cat_sizes, offsets, batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_IP_L7' in algo:
        model = FNN_IP_L7(cat_sizes, offsets, batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN' in algo:
        model = FNN(cat_sizes, offsets, batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv, 'train', eval_size)

    write_log(model.log, echo=True)

    # with tf.Session(graph=model.graph, config=sess_config) as sess:
    with tf.Session(graph=model.graph) as sess:
        tf.initialize_all_variables().run()
        print 'model initialized'
        start_time = time.time()
        step = 0
        batch_preds = []
        batch_labels = []
        err_rcds = []
        while True:
            labels, _, cols, vals = get_batch_xy(buffer_size)
            for _i in range(labels.shape[0] / batch_size):
                step += 1
                _labels = labels[_i * batch_size: (_i + 1) * batch_size]
                _cols = cols[_i * batch_size: (_i + 1) * batch_size, :]
                _vals = vals[_i * batch_size: (_i + 1) * batch_size, :]

                if 'LR' in algo or 'FM' in algo:
                    feed_dict = {model.sp_id_hldr: _cols.flatten(), model.sp_wt_hldr: _vals.flatten(),
                                 model.lbl_hldr: _labels}
                elif 'FNN' in algo:
                    feed_dict = {model.v_wt_hldr: _vals[:, :13], model.c_id_hldr: _cols[:, 13:] - offsets,
                                 model.c_wt_hldr: _vals[:, 13:], model.lbl_hldr: _labels}

                _, l, p = sess.run([model.ptmzr, model.loss, model.train_preds], feed_dict=feed_dict)
                batch_preds.extend(p)
                batch_labels.extend(_labels)
                if step % epoch == 0:
                    print 'step: %d\tloss: %g\ttime: %d' % (step * batch_size, l, time.time() - start_time)
                    start_time = time.time()
                    eval_preds = []
                    for _i in range(eval_buf_size / eval_size):
                        eval_inds = eval_cols[_i * eval_size:(_i + 1) * eval_size]
                        eval_vals = eval_wts[_i * eval_size:(_i + 1) * eval_size]
                        if 'LR' in algo or 'FM' in algo:
                            feed_dict = {model.eval_id_hldr: eval_inds.flatten(),
                                         model.eval_wts_hldr: eval_vals.flatten()}
                        elif 'FNN' in algo or 'FPNN' in algo:
                            feed_dict = {model.eval_id_hldr: eval_inds, model.eval_wts_hldr: eval_vals}
                        eval_preds.extend(model.eval_preds.eval(feed_dict=feed_dict))
                    eval_preds = np.array(eval_preds)
                    if re_calibration:
                        eval_preds /= eval_preds + (1 - eval_preds) / nds_rate
                    metrics = watch_train(step, batch_labels, batch_preds, eval_preds, eval_labels)
                    print metrics
                    err_rcds.append(metrics['eval_auc'])
                    err_rcds = err_rcds[-2 * skip_window * (stop_window + smooth_window):]
                    batch_preds = []
                    batch_labels = []
                    if step % ckpt == 0:
                        model.dump(model_path + '_%d' % step)
                    if early_stop(step, err_rcds):
                        return
Esempio n. 13
0
import numpy as np
import pandas as pd
from get_data import DataLoader
from LR import LR
if __name__ == "__main__":
    file_name = 'data/train.csv'
    test_file = 'data/test.csv'
    data_loader = DataLoader(file_name)
    raw_data = data_loader.get_data_as_df()
    data_dict = data_loader.get_data_by_month(raw_data)
    train_data, labels, mean_x, std_x = data_loader.get_final_data(data_dict)
    #x_train_set,label_train_set,\
    #x_validation,label_validation = data_loader.split_train_and_valid(train_data,labels)
    test_data = pd.read_csv(test_file, header=None, encoding='big5')
    test_data = test_data.iloc[:, 2:]
    test_data[test_data == 'NR'] = 0
    test_data = test_data.to_numpy()
    test_x = np.empty([240, 18 * 9], dtype=float)
    for i in range(240):
        test_x[i] = test_data[18 * i:18 * (i + 1), :].reshape(1, -1)

    for i in range(len(test_x)):
        for j in range(len(test_x[0])):
            if std_x[j] != 0:
                test_x[i][j] = (test_x[i][j] - mean_x[j]) / std_x[j]
    test_x = np.concatenate((np.ones([240, 1]), test_x), axis=1).astype(float)

    linear_model = LR(train_data, labels)
    linear_model.train()
    linear_model.get_predict_csv(test_x)
Esempio n. 14
0
def train():
    eval_labels = []
    eval_cols = []
    eval_wts = []
    eval_cnt = 0
    with open(eval_path, 'r') as eval_set:
        buf = []
        for line in eval_set:
            buf.append(line)
            eval_cnt += 1
            if eval_cnt == eval_buf_size:
                break
        np.random.shuffle(buf)
        for line in buf:
            y, f, x = get_fxy(line)
            eval_cols.append(f)
            eval_wts.append(x)
            eval_labels.append(y)
    eval_cols = np.array(eval_cols)
    eval_wts = np.float32(np.array(eval_wts))
    eval_labels = np.array(eval_labels)

    if 'LR' in algo:
        model = LR(batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv,
                   'train', eval_size)
    elif 'FM' in algo:
        model = FM(batch_size, _rch_argv, _init_argv, _ptmzr_argv, _reg_argv,
                   'train', eval_size)
    elif 'FNN_IP_L3' in algo:
        model = FNN_IP_L3(cat_sizes, offsets, batch_size, _rch_argv,
                          _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_OP_L3' in algo:
        model = FNN_OP_L3(cat_sizes, offsets, batch_size, _rch_argv,
                          _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_IP_L5' in algo:
        model = FNN_IP_L5(cat_sizes, offsets, batch_size, _rch_argv,
                          _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN_IP_L7' in algo:
        model = FNN_IP_L7(cat_sizes, offsets, batch_size, _rch_argv,
                          _init_argv, _ptmzr_argv, _reg_argv, 'train',
                          eval_size)
    elif 'FNN' in algo:
        model = FNN(cat_sizes, offsets, batch_size, _rch_argv, _init_argv,
                    _ptmzr_argv, _reg_argv, 'train', eval_size)

    write_log(model.log, echo=True)

    # with tf.Session(graph=model.graph, config=sess_config) as sess:
    with tf.Session(graph=model.graph) as sess:
        tf.initialize_all_variables().run()
        print 'model initialized'
        start_time = time.time()
        step = 0
        batch_preds = []
        batch_labels = []
        err_rcds = []
        while True:
            labels, _, cols, vals = get_batch_xy(buffer_size)
            for _i in range(labels.shape[0] / batch_size):
                step += 1
                _labels = labels[_i * batch_size:(_i + 1) * batch_size]
                _cols = cols[_i * batch_size:(_i + 1) * batch_size, :]
                _vals = vals[_i * batch_size:(_i + 1) * batch_size, :]

                if 'LR' in algo or 'FM' in algo:
                    feed_dict = {
                        model.sp_id_hldr: _cols.flatten(),
                        model.sp_wt_hldr: _vals.flatten(),
                        model.lbl_hldr: _labels
                    }
                elif 'FNN' in algo:
                    feed_dict = {
                        model.v_wt_hldr: _vals[:, :13],
                        model.c_id_hldr: _cols[:, 13:] - offsets,
                        model.c_wt_hldr: _vals[:, 13:],
                        model.lbl_hldr: _labels
                    }

                _, l, p = sess.run(
                    [model.ptmzr, model.loss, model.train_preds],
                    feed_dict=feed_dict)
                batch_preds.extend(p)
                batch_labels.extend(_labels)
                if step % epoch == 0:
                    print 'step: %d\tloss: %g\ttime: %d' % (
                        step * batch_size, l, time.time() - start_time)
                    start_time = time.time()
                    eval_preds = []
                    for _i in range(eval_buf_size / eval_size):
                        eval_inds = eval_cols[_i * eval_size:(_i + 1) *
                                              eval_size]
                        eval_vals = eval_wts[_i * eval_size:(_i + 1) *
                                             eval_size]
                        if 'LR' in algo or 'FM' in algo:
                            feed_dict = {
                                model.eval_id_hldr: eval_inds.flatten(),
                                model.eval_wts_hldr: eval_vals.flatten()
                            }
                        elif 'FNN' in algo or 'FPNN' in algo:
                            feed_dict = {
                                model.eval_id_hldr: eval_inds,
                                model.eval_wts_hldr: eval_vals
                            }
                        eval_preds.extend(
                            model.eval_preds.eval(feed_dict=feed_dict))
                    eval_preds = np.array(eval_preds)
                    if re_calibration:
                        eval_preds /= eval_preds + (1 - eval_preds) / nds_rate
                    metrics = watch_train(step, batch_labels, batch_preds,
                                          eval_preds, eval_labels)
                    print metrics
                    err_rcds.append(metrics['eval_auc'])
                    err_rcds = err_rcds[-2 * skip_window *
                                        (stop_window + smooth_window):]
                    batch_preds = []
                    batch_labels = []
                    if step % ckpt == 0:
                        model.dump(model_path + '_%d' % step)
                    if early_stop(step, err_rcds):
                        return
Esempio n. 15
0
            loss_history.append(cost)
            accuracy_history.append(acc)
        if epoch % 2 == 0:
            print('eps: {}, loss: {}, acc: {}'.format(epoch, loss_history[-1],
                                                      accuracy_history[-1]))


if __name__ == "__main__":
    N_CLASSES = 2
    BATCH_SIZE = 128
    EPOCHS = 10

    numerical_cols = ['C1', 'C15', 'C16', 'C18']
    dummy_cols = ['banner_pos', 'device_conn_type']
    target_colname = 'click'
    train_x, train_y, train_xv, train_yv, test_x, test_y, test_xv, test_yv = \
                        load_data(dummy_cols, numerical_cols, target_colname,
                                  train_file='../train_df.csv', test_file='../test_df.csv')
    num_feat = train_x.shape[1]
    model = LR(num_feat=num_feat, num_class=N_CLASSES)
    print(dir(model))
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        train(sess,
              model,
              train_xv,
              train_yv,
              batch_size=BATCH_SIZE,
              epochs=EPOCHS)
Esempio n. 16
0
class DBN(object):
    def __init__(self,
                 input=None,
                 label=None,
                 n_ins=2,
                 hidden_layer_sizes=[3, 3],
                 n_outs=2,
                 rng=None):
        self.x = input
        self.y = label
        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)

            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LR(input=self.sigmoid_layers[-1].sample_h_given_v(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_outs)

        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, k=1, epochs=100):
        # pre-train layer-wise
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)
            rbm = self.rbm_layers[i]

            for epoch in xrange(epochs):
                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0
        while epoch < epochs:
            self.log_layer.train(lr=lr, input=layer_input)

            lr *= 0.95

            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        out = self.log_layer.predict(layer_input)

        return out
Esempio n. 17
0
# coding=utf-8
from LR import LR
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

if __name__ == '__main__':
    # load data
    iris = pd.read_csv('data/iris.csv')
    iris.loc[iris['Species'] == 'setosa', 'Species'] = 1
    iris.loc[iris['Species'] == 'versicolor', 'Species'] = 0
    # print(iris.head())
    X = iris.drop(['Species', 'id'], axis=1)
    y = iris['Species']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
    lr = LR(0.1, 100)
    lr.gradient_descent(X_train.values,
                        y_train.values.reshape(y_train.shape[0], 1))
    y_pred = lr.predict(X_test)
    print(f1_score(y_test.values.astype(int), y_pred))
Esempio n. 18
0
class SdA(object):
    def __init__(self,
                 input=None,
                 label=None,
                 n_ins=2,
                 hidden_layer_sizes=[3, 3],
                 n_outs=2,
                 rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.dA_layers = []

        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            dA_layer = dA(input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layer_sizes[i],
                          W=sigmoid_layer.W,
                          hbias=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        self.log_layer = LR(input=self.sigmoid_layers[-1].sample_h_given_v(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_outs)

        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100):
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)

            da = self.dA_layers[i]

            for epoch in xrange(epochs):
                da.train(lr=lr,
                         corruption_level=corruption_level,
                         input=layer_input)

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()
        # train log_layer
        epoch = 0

        while epoch < epochs:
            self.log_layer.train(lr=lr, input=layer_input)

            lr *= 0.95
            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        return self.log_layer.predict(layer_input)
Esempio n. 19
0
    X_dim_test, X_feas_test = stat(test_path)
    X_dim = max(X_dim_train, X_dim_test) + 2
    X_feas = max(X_feas_train, X_feas_test)

    algo = 'FM'

    tag = (str(cam) + ' ' + time.strftime('%c') + ' ' + algo).replace(' ', '_')
    log_path = '../log/%s' % tag
    print log_path

    if 'LR' in algo:
        batch_size = 1
        epoch = 100000
        eval_size = 100000
        model = LR(batch_size, [X_dim, X_feas],
                   ['uniform', -0.001, 0.001, [0x89AB], None], ['sgd', 1e-3],
                   [1e-3], 'train', eval_size)
    elif 'FM' in algo:
        batch_size = 1
        epoch = 10000
        eval_size = 100000
        model = FM(batch_size, [X_dim, X_feas, 10],
                   ['uniform', -0.001, 0.001, [0x3210, 0x7654], None],
                   ['sgd', 1e-3], [1e-2], 'train', eval_size)

    print batch_size, epoch, eval_size

    write_log(model.log, True)

    with tf.Session(graph=model.graph) as sess:
        tf.initialize_all_variables().run()
class Experiments:

    def __init__(self, start_date_data, end_date_data, start_date_cohort, end_date_cohort, file_path_mp, nr_top_ch,
                 train_prop, ratio_maj_min_class, use_time, simulate, cohort_size, sim_time, epochs, batch_size,
                 learning_rate, ctrl_var, ctrl_var_value, eval_fw, total_budget, custom_attr_eval):

        self.start_date_data = start_date_data
        self.end_date_data = end_date_data
        self.start_date_cohort = start_date_cohort
        self.end_date_cohort = end_date_cohort
        self.file_path_mp = file_path_mp
        self.ratio_maj_min_class = ratio_maj_min_class
        self.cohort_size = cohort_size
        self.sim_time = sim_time
        self.ctrl_var = ctrl_var
        self.ctrl_var_value = ctrl_var_value

        self.data_loader = None
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.SP_model = None
        self.LTA_model = None
        self.LR_model = None
        self.LSTM_model = None
        self.use_time = use_time
        self.simulate = simulate
        self.idx_to_ch = {}
        self.ch_to_idx = {}
        self.attributions = {}
        self.attribution_hist = {}
        self.attributions_std = {}
        self.attributions_mean_std = {}
        self.attributions_roi = {}
        self.train_prop = train_prop
        self.nr_top_ch = nr_top_ch
        self.model_stats = {}
        self.eval_fw = eval_fw
        self.custom_attr_eval = custom_attr_eval
        self.total_budget = total_budget

    def init_data_loader(self, nr_pos_sim=None, nr_neg_sim=None):
        self.data_loader = ModelDataLoader(self.start_date_data, self.end_date_data, self.start_date_cohort, self.end_date_cohort,
                                           self.file_path_mp, self.nr_top_ch, self.ratio_maj_min_class, self.simulate, self.cohort_size,
                                           self.sim_time, self.ctrl_var, self.ctrl_var_value, self.eval_fw, nr_pos_sim, nr_neg_sim)
        self.idx_to_ch = self.data_loader.get_idx_to_ch_map()
        self.ch_to_idx = self.data_loader.get_ch_to_idx_map()

    def init_models(self):
        self.SP_model = SP()
        self.LTA_model = LTA()
        self.LR_model = LR()
        self.LSTM_model = LSTM(self.epochs, self.batch_size, self.learning_rate)

    def get_path_len(self, seq_lists):
        path_lengths = []
        for seq in seq_lists:
            path_lengths.append(len(seq))
        return path_lengths

    def plot_path_lenghts(self, seq_lists_train_real, seq_lists_train_sim):
        path_lengths_real = self.get_path_len(seq_lists_train_real)
        path_lengths_sim = self.get_path_len(seq_lists_train_sim)

        real_df = pd.DataFrame({'real len': path_lengths_real})
        sim_df = pd.DataFrame({'sim len': path_lengths_sim})

        real_df = real_df.groupby('real len', as_index=False).size()
        real_df.rename(columns={"size": "Real data"}, inplace=True)
        sim_df = sim_df.groupby('sim len', as_index=False).size()
        sim_df.rename(columns={"size": "Synthetic data"}, inplace=True)

        real_sim = pd.concat([real_df, sim_df], axis=1).fillna(0)
        real_sim['len'] = list(range(1, len(real_sim)+1))
        real_sim.plot(y=["Real data", "Synthetic data"], x='len', kind="bar")
        plt.title('Path lengths')
        plt.xlabel('Length [clicks]')
        plt.ylabel('Customer Journeys')
        plt.show()

    def validate_sim(self):
        self.simulate = False
        self.init_data_loader()
        clients_data_train_real, clients_data_test_real = self.data_loader.get_clients_dict_split(self.train_prop)
        x_train_real, y_train_real, x_test_real, y_test_real = self.data_loader.get_feature_matrix_split(self.train_prop, self.use_time)
        seq_lists_train_real, labels_train_real, seq_lists_test_real, labels_test_real = self.data_loader.get_seq_lists_split(self.train_prop)

        self.simulate = True
        nr_pos_sim = sum(labels_train_real) + sum(labels_test_real)
        nr_neg_sim = len(labels_train_real) + len(labels_test_real) - nr_pos_sim
        self.init_data_loader(nr_pos_sim, nr_neg_sim)
        clients_data_train_sim, _ = self.data_loader.get_clients_dict_split(self.train_prop)
        x_train_sim, y_train_sim, _, _ = self.data_loader.get_feature_matrix_split(self.train_prop, self.use_time)
        seq_lists_train_sim, labels_train_sim, _, _ = self.data_loader.get_seq_lists_split(self.train_prop)

        self.plot_path_lenghts(seq_lists_train_real, seq_lists_train_sim)

        # Train on sim, test on real
        self.init_models()
        self.load_models(clients_data_train_sim, clients_data_test_real, x_train_sim, y_train_sim, x_test_real, y_test_real,
                         seq_lists_train_sim, labels_train_sim, seq_lists_test_real, labels_test_real)
        self.train_all()
        self.validate()

        # Train on real, test on real
        self.init_models()
        self.load_models(clients_data_train_real, clients_data_test_real, x_train_real, y_train_real, x_test_real, y_test_real,
                         seq_lists_train_real, labels_train_real, seq_lists_test_real, labels_test_real)
        self.train_all()
        self.validate()

    def cv(self, nr_splits=5):
        self.init_data_loader()
        train_prop = 1
        clients_data, _ = self.data_loader.get_clients_dict_split(train_prop)
        x, y, _, _ = self.data_loader.get_feature_matrix_split(train_prop, self.use_time)
        seq_lists, labels, _, _ = self.data_loader.get_seq_lists_split(train_prop)

        tot_nr_samples = len(clients_data)
        nr_samples_test = tot_nr_samples // nr_splits
        for split_idx in range(nr_splits):
            self.init_models()
            test_start_idx = int(nr_samples_test * split_idx)
            test_end_idx = int(nr_samples_test * (split_idx + 1))
            clients_data_train, clients_data_test = self.get_train_test_dicts(clients_data, test_start_idx, test_end_idx)
            x_train, y_train, x_test, y_test = self.get_train_test_arr(x, y, test_start_idx, test_end_idx)
            seq_lists_train, labels_train, seq_lists_test, labels_test = self.get_train_test_list(seq_lists, labels, test_start_idx, test_end_idx)
            self.load_models(clients_data_train, clients_data_test, x_train, y_train, x_test, y_test,
                    seq_lists_train, labels_train, seq_lists_test, labels_test)

            self.train_all()
            self.collect_models_pred_stats()
            self.collect_models_attr(nr_splits, split_idx)
        self.show_cv_results()
        if self.eval_fw:
            self.init_eval()

    def init_eval(self):
        if self.custom_attr_eval is not None:
            self.add_custom_attr()
        self.profit_eval()

    def show_cv_results(self):
        models_res = self.calc_mean_and_std()
        self.show_pred_res(models_res, cv=True)
        self.plot_attributions(self.attributions, print_sum_attr=False, cv=True)

    def calc_mean_and_std(self):
        models_res = self.calc_mean_pred()
        self.calc_mean_and_std_attr()
        return models_res

    def calc_mean_pred(self):
        models_res = []
        for model_name in self.model_stats:
            model_stats_filt = self.model_stats[model_name].copy()
            model_res_means, model_stats_filt = self.calc_metrics(model_stats_filt)
            for model_stat in model_stats_filt:
                stat_list = model_stats_filt[model_stat]
                model_res_means[model_stat] = sum(stat_list) / len(stat_list)
            model_res_means['model'] = model_name
            models_res.append(model_res_means)
        return models_res

    def calc_metrics(self, model_stats_filt):
        model_res_means = {}
        model_res_means['precision'] = (np.array(model_stats_filt['tp']) / (np.array(model_stats_filt['tp']) + np.array(model_stats_filt['fp']))).mean()
        model_res_means['recall'] = (np.array(model_stats_filt['tp']) / (np.array(model_stats_filt['tp']) + np.array(model_stats_filt['fn']))).mean()

        model_res_means['F1'] = 2 * model_res_means['precision'] * model_res_means['recall'] / (
                    model_res_means['precision'] + model_res_means['recall'])

        model_res_means['accuracy'] = ((np.array(model_stats_filt['tp']) + np.array(model_stats_filt['tn'])) / (
                    np.array(model_stats_filt['tn']) + np.array(model_stats_filt['tp']) + np.array(model_stats_filt['fp']) + np.array(model_stats_filt['fn']))).mean()

        model_stats_filt.pop('tn')
        model_stats_filt.pop('fp')
        model_stats_filt.pop('fn')
        model_stats_filt.pop('tp')
        model_stats_filt.pop('model')
        return model_res_means, model_stats_filt

    def calc_mean_and_std_attr(self):
        for model_name in self.attribution_hist:
            self.attributions[model_name] = self.attribution_hist[model_name].mean(axis=0).tolist()
            self.attributions_std[model_name] = self.attribution_hist[model_name].std(axis=0).tolist()
            self.attributions_mean_std[model_name] = sum(self.attributions_std[model_name]) / len(self.attributions_std[model_name])

    def collect_models_attr(self, nr_splits, split_idx):
        models_attr_dict = self.load_attributions(output=True)
        for model_name in models_attr_dict:
            if model_name not in self.attribution_hist:
                self.attribution_hist[model_name] = np.zeros((nr_splits, len(self.ch_to_idx)))
            self.attribution_hist[model_name][split_idx] = np.array(models_attr_dict[model_name])

    def collect_models_pred_stats(self):
        models_res = self.validate(output=True)
        for model_res in models_res:
            self.collect_model_pred_stats(model_res['model'], model_res)

    def collect_model_pred_stats(self, model_name, model_res):
        if model_name not in self.model_stats:
            self.model_stats[model_name] = {}
            for model_stat in model_res:
                self.model_stats[model_name][model_stat] = [model_res[model_stat]]
            return
        self.add_model_stats(model_name, model_res)

    def add_model_stats(self, model_name, model_stats):
        for model_stat in model_stats:
            self.model_stats[model_name][model_stat].append(model_stats[model_stat])

    def get_train_test_arr(self, x, y, test_start_idx, test_end_idx):
        if test_start_idx == 0:
            x_train = x[test_end_idx:]
            y_train = y[test_end_idx:]
        elif test_end_idx == len(x)-1:
            x_train = x[:test_start_idx]
            y_train = y[:test_start_idx]
        else:
            x_train = np.concatenate((x[:test_start_idx], x[test_end_idx:]), axis=0)
            y_train = np.concatenate((y[:test_start_idx], y[test_end_idx:]), axis=0)
        x_test = x[test_start_idx:test_end_idx]
        y_test = y[test_start_idx:test_end_idx]
        return x_train, y_train, x_test, y_test

    def get_train_test_list(self, seq_lists, labels, test_start_idx, test_end_idx):
        seq_lists_train = seq_lists[:test_start_idx] + seq_lists[test_end_idx:]
        labels_train = labels[:test_start_idx] + labels[test_end_idx:]
        seq_lists_test = seq_lists[test_start_idx:test_end_idx]
        labels_test = labels[test_start_idx:test_end_idx]
        return seq_lists_train, labels_train, seq_lists_test, labels_test

    def get_train_test_dicts(self, clients_data, test_start_idx, test_end_idx):
        all_client_ids = list(clients_data.keys())
        train_client_ids = all_client_ids[:test_start_idx] + all_client_ids[test_end_idx:]
        test_client_ids = all_client_ids[test_start_idx:test_end_idx]
        clients_data_train = {client_id: clients_data[client_id] for client_id in train_client_ids}
        clients_data_test = {client_id: clients_data[client_id] for client_id in test_client_ids}
        return clients_data_train, clients_data_test

    def load_data(self):
        self.init_data_loader()
        clients_data_train, clients_data_test = self.data_loader.get_clients_dict_split(self.train_prop)
        x_train, y_train, x_test, y_test = self.data_loader.get_feature_matrix_split(self.train_prop, self.use_time)
        seq_lists_train, labels_train, seq_lists_test, labels_test = self.data_loader.get_seq_lists_split(self.train_prop)

        self.load_models(clients_data_train, clients_data_test, x_train, y_train, x_test, y_test,
                        seq_lists_train, labels_train, seq_lists_test, labels_test)

    def load_models(self, clients_data_train, clients_data_test, x_train, y_train, x_test, y_test,
                    seq_lists_train, labels_train, seq_lists_test, labels_test):
        self.SP_model.load_train_test_data(clients_data_train, clients_data_test)
        self.LTA_model.load_train_test_data(clients_data_train, clients_data_test)
        self.LR_model.load_train_test_data(x_train, y_train, x_test, y_test)
        self.LSTM_model.load_data(seq_lists_train, labels_train, seq_lists_test, labels_test)

    def train_all(self):
        self.SP_model.train()
        self.LTA_model.train()
        self.LR_model.train()
        self.LSTM_model.train()

    def validate(self, output=False):
        LTA_res = self.LTA_model.get_results()
        LR_res = self.LR_model.get_results()
        SP_res = self.SP_model.get_results()
        LSTM_res = self.LSTM_model.get_results()
        LTA_res['model'] = 'LTA'
        LR_res['model'] = 'LR'
        SP_res['model'] = 'SP'
        LSTM_res['model'] = 'LSTM'
        models_res = [LTA_res, LR_res, SP_res, LSTM_res]
        if output:
            return models_res
        self.show_pred_res(models_res)
        if self.eval_fw:
            self.init_eval()

    def show_pred_res(self, models_res, cv=False):
        results_df = pd.DataFrame()
        for model_res in models_res:
            results_df = results_df.append(model_res, ignore_index=True)

        if not cv:
            results_df['precision'] = results_df['tp'] / (results_df['tp'] + results_df['fp'])
            results_df['recall'] = results_df['tp'] / (results_df['tp'] + results_df['fn'])
            results_df['F1'] = 2 * results_df['precision'] * results_df['recall'] / (results_df['precision'] + results_df['recall'])
            results_df['accuracy'] = (results_df['tp'] + results_df['tn']) / (results_df['tn'] + results_df['tp'] + results_df['fp'] + results_df['fn'])

        print('Theoretical max accuracy on all data is: ', self.data_loader.get_theo_max_accuracy())
        print(results_df)

    def load_attributions(self, output=False):
        SP_attr = self.SP_model.get_normalized_attributions()
        LTA_attr = self.LTA_model.get_normalized_attributions()
        LR_attr = self.LR_model.get_normalized_attributions()
        LSTM_attr_shap = self.LSTM_model.get_normalized_attributions('shap')
        LSTM_attr_atten = self.LSTM_model.get_normalized_attributions('attention')
        LSTM_attr_frac = self.LSTM_model.get_normalized_attributions('fractional')

        attributions = {'SP': SP_attr, 'LTA': LTA_attr, 'LR': LR_attr, 'LSTM SHAP': LSTM_attr_shap,
                        'LSTM Attention': LSTM_attr_atten, 'LSTM Fractional': LSTM_attr_frac}
        if output:
            return attributions
        self.attributions = attributions

    def load_non_norm_attributions(self):
        SP_non_norm = self.SP_model.get_non_normalized_attributions()
        LTA_non_norm = self.LTA_model.get_non_normalized_attributions()
        LR_non_norm = self.LR_model.get_coefs()
        LSTM_non_norm_shap = self.LSTM_model.get_non_normalized_attributions('shap')
        LSTM_non_norm_atten = self.LSTM_model.get_non_normalized_attributions('attention')
        LSTM_non_norm_frac = self.LSTM_model.get_non_normalized_attributions('fractional')

        return {'SP': sum(SP_non_norm), 'LTA': sum(LTA_non_norm), 'LR': sum(LR_non_norm),
                'LSTM SHAP': sum(LSTM_non_norm_shap), 'LSTM Attention': sum(LSTM_non_norm_atten),
                'LSTM Fractional': sum(LSTM_non_norm_frac)}

    def plot_attributions(self, attributions=None, print_sum_attr=True, cv=False):
        channel_names = []
        for ch_idx in range(len(self.idx_to_ch)):
            channel_names.append(self.idx_to_ch[ch_idx])

        if attributions is None:
            attributions = self.attributions

        df_means = pd.DataFrame({'Channel': channel_names,
                           'LTA': attributions['LTA'],
                           'SP': attributions['SP'],
                           'LR': attributions['LR'],
                           'LSTM SHAP': attributions['LSTM SHAP'],
                           'LSTM Attention': attributions['LSTM Attention'],
                           'LSTM Fractional': attributions['LSTM Fractional']})
        if self.simulate:
            true_attr = self.data_loader.get_true_norm_attributions()
            df_means['True Attribution'] = true_attr
            self.print_RSS(true_attr, attributions)
        if cv:
            print('Mean attribution std: ', self.attributions_mean_std)
            df_std = pd.DataFrame({'LTA': self.attributions_std['LTA'],
                                   'SP': self.attributions_std['SP'],
                                   'LR': self.attributions_std['LR'],
                                   'LSTM SHAP': self.attributions_std['LSTM SHAP'],
                                   'LSTM Attention': self.attributions_std['LSTM Attention'],
                                   'LSTM Fractional': self.attributions_std['LSTM Fractional']})
            if self.simulate:
                df_std['True Attribution'] = [0] * self.nr_top_ch

            df_std.to_csv('df_std.csv')
            yerr = df_std.values.T
        else:
            yerr = 0

        ax = df_means.plot.bar(x='Channel', rot=90, yerr=yerr, capsize=3)
        if print_sum_attr and not cv:
            ax.legend(['LTA (sum ' + str(round(self.load_non_norm_attributions()['LTA'], 2)) + ')',
                       'SP (sum ' + str(round(self.load_non_norm_attributions()['SP'], 2)) + ')',
                       'LR (sum ' + str(round(self.load_non_norm_attributions()['LR'], 2)) + ')',
                       'LSTM SHAP (sum ' + str(round(self.load_non_norm_attributions()['LSTM SHAP'], 2)) + ')',
                       'LSTM Attention (sum ' + str(round(self.load_non_norm_attributions()['LSTM Attention'], 2)) + ')',
                       'LSTM Fractional (sum ' + str(round(self.load_non_norm_attributions()['LSTM Fractional'], 2)) + ')'])
        ax.set_xlabel("Channel")
        df_means.to_csv('df_means.csv')

        plt.tight_layout()
        plt.title('Attributions', fontsize=16)
        plt.show()
        self.plot_touchpoint_attributions()

    def print_RSS(self, true_attr, attributions):
        RSS = {}
        for model_name in attributions:
            RSS[model_name] = np.square(np.array(true_attr) - np.array(attributions[model_name])).sum()
        print('Attributions RSS: ', RSS)

    def plot_touchpoint_attributions(self, max_seq_len=5):
        for seq_len in range(2, max_seq_len+1):
            touchpoint_attr = self.LSTM_model.get_touchpoint_attr(seq_len)
            print(touchpoint_attr)
            plt.plot(touchpoint_attr, marker='.', linewidth=2, markersize=12)
            plt.title('Touchpoint attention attributions')
            plt.xlabel('Touchpoint Index')
            plt.ylabel('Normalized Attention')
        plt.show()

    def add_custom_attr(self):
        attr = np.zeros(self.nr_top_ch)
        for ch_name in self.custom_attr_eval:
            if not ch_name in self.ch_to_idx:
                print('Oops! Could not match', ch_name, '. Custom attribution not possible.')
                return
            idx = self.ch_to_idx[ch_name]
            attr[idx] = self.custom_attr_eval[ch_name]
        attr = attr / attr.sum()
        self.attributions['custom'] = attr.tolist()

    def profit_eval(self):
        if self.simulate:
            print('Oops... Can\'t run eval FW with simulated data')
            return
        GA_unbalanced_df = self.data_loader.get_GA_unbalanced_df()
        converted_clients_df = self.data_loader.get_converted_clients_df()

        eval_results_df = pd.DataFrame()
        for model_name in self.attributions:
            evaluation = Evaluation(GA_unbalanced_df, converted_clients_df, self.total_budget, self.attributions[model_name], self.ch_to_idx)
            results = evaluation.evaluate()
            results['model'] = model_name
            eval_results_df = eval_results_df.append(results, ignore_index=True)
            self.attributions_roi[model_name] = evaluation.get_channels_roi()
        print(eval_results_df)
        self.plot_attributions(self.attributions_roi, print_sum_attr=False)
Esempio n. 21
0
def test():
    if 'LR' in algo:
        model = LR(test_batch_size, _rch_argv, _init_argv, None, None, 'test',
                   None)
    elif 'FM' in algo:
        model = FM(test_batch_size, _rch_argv, _init_argv, None, None, 'test',
                   None)
    elif 'FNN_IP_L3' in algo:
        model = FNN_IP_L3(cat_sizes, offsets, test_batch_size, _rch_argv,
                          _init_argv, None, None, 'test', None)
    elif 'FNN_IP_L5' in algo:
        model = FNN_IP_L5(cat_sizes, offsets, test_batch_size, _rch_argv,
                          _init_argv, None, None, 'test', None)
    elif 'FNN_IP_ L7' in algo:
        model = FNN_IP_L7(cat_sizes, offsets, test_batch_size, _rch_argv,
                          _init_argv, None, None, 'test', None)
    elif 'FNN' in algo:
        model = FNN(cat_sizes, offsets, test_batch_size, _rch_argv, _init_argv,
                    None, None, 'test', None)

    print 'testing model: %s' % _init_argv[-1]
    # with tf.Session(graph=model.graph, config=sess_config) as sess:
    with tf.Session(graph=model.graph) as sess:
        tf.initialize_all_variables().run()
        print 'model initialized'
        test_preds = []
        test_labels = []
        step = 0
        start_time = time.time()
        while True:
            labels, _, cols, vals = get_batch_sparse_tensor(buffer_size)
            labels, cols, vals = np.array(labels), np.array(cols), np.array(
                vals)
            for _i in range(labels.shape[0] / test_batch_size):
                step += test_batch_size
                _labels = labels[_i * test_batch_size:(_i + 1) *
                                 test_batch_size]
                _cols = cols[_i * test_batch_size:(_i + 1) *
                             test_batch_size, :]
                _vals = vals[_i * test_batch_size:(_i + 1) *
                             test_batch_size, :]

                if 'LR' in algo or 'FM' in algo:
                    feed_dict = {
                        model.sp_id_hldr: _cols.flatten(),
                        model.sp_wt_hldr: _vals.flatten(),
                        model.lbl_hldr: _labels
                    }
                elif 'FNN' in algo or 'FPNN' in algo:
                    feed_dict = {
                        model.v_wt_hldr: _vals[:, :13],
                        model.c_id_hldr: _cols[:, 13:] - offsets,
                        model.c_wt_hldr: _vals[:, 13:],
                        model.lbl_hldr: _labels
                    }

                p = model.test_preds.eval(feed_dict=feed_dict)
                p /= p + (1 - p) / nds_rate

                test_preds.extend(p)
                test_labels.extend(_labels)
                if step % epoch == 0:
                    print 'test-auc: %g\trmse: %g\tlog-loss: %g' % (
                        roc_auc_score(test_labels, test_preds),
                        np.sqrt(mean_squared_error(test_labels, test_preds)),
                        log_loss(test_labels, test_preds))
                    print 'step: %d\ttime: %g' % (step,
                                                  time.time() - start_time)
                    start_time = time.time()

            if len(labels) < buffer_size:
                print 'test-auc: %g\trmse: %g\tlog-loss: %g' % (
                    roc_auc_score(test_labels, test_preds),
                    np.sqrt(mean_squared_error(test_labels, test_preds)),
                    log_loss(test_labels, test_preds))
                exit(0)
def train_DBN(train_data,
              test_data,
              hidden_units,
              num_epochs_DBN=50,
              num_epochs_LR=100):
    train_features, train_labels = train_data
    test_features, test_labels = test_data

    # training DBN model
    #################################################################################################
    # dbn_model = DBN(visible_units=train_features.shape[1],
    #                 hidden_units=[20, hidden_units],
    #                 k=5,
    #                 learning_rate=0.01,
    #                 learning_rate_decay=True,
    #                 xavier_init=True,
    #                 increase_to_cd_k=False,
    #                 use_gpu=False)
    # dbn_model.train_static(train_features, train_labels, num_epochs=num_epochs_DBN, batch_size=32)
    # # Finishing the training DBN model
    # print('---------------------Finishing the training DBN model---------------------')
    # # using DBN model to construct features
    # train_features, _ = dbn_model.forward(train_features)
    # test_features, _ = dbn_model.forward(test_features)
    ##################################################################################################

    # training LR model
    ##################################################################################################
    if len(train_labels.shape) == 1:
        num_classes = 1
    else:
        num_classes = train_labels.shape[1]
    # lr_model = LR(input_size=hidden_units, num_classes=num_classes)
    lr_model = LR(input_size=train_features.shape[1], num_classes=num_classes)
    optimizer = torch.optim.Adam(lr_model.parameters(), lr=0.00001)
    steps = 0
    batches_test = mini_batches(X=test_features, Y=test_labels)
    for epoch in range(1, num_epochs_LR + 1):
        # building batches for training model
        batches_train = mini_batches_update(X=train_features, Y=train_labels)
        for batch in batches_train:
            x_batch, y_batch = batch
            if torch.cuda.is_available():
                x_batch, y_batch = torch.tensor(
                    x_batch).cuda(), torch.cuda.FloatTensor(y_batch)
            else:
                x_batch, y_batch = torch.tensor(x_batch).float(), torch.tensor(
                    y_batch).float()

            optimizer.zero_grad()
            predict = lr_model.forward(x_batch)
            loss = nn.BCELoss()
            loss = loss(predict, y_batch)
            loss.backward()
            optimizer.step()

            steps += 1
            if steps % 10 == 0:
                print('\rEpoch: {} step: {} - loss: {:.6f}'.format(
                    epoch, steps, loss.item()))

        print('Epoch: %i ---Training data' % (epoch))
        acc, prc, rc, f1, auc_ = eval(data=batches_train, model=lr_model)
        print(
            'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f'
            % (acc, prc, rc, f1, auc_))
        print('Epoch: %i ---Testing data' % (epoch))
        acc, prc, rc, f1, auc_ = eval(data=batches_test, model=lr_model)
        print(
            'Accuracy: %f -- Precision: %f -- Recall: %f -- F1: %f -- AUC: %f'
            % (acc, prc, rc, f1, auc_))
 def init_models(self):
     self.SP_model = SP()
     self.LTA_model = LTA()
     self.LR_model = LR()
     self.LSTM_model = LSTM(self.epochs, self.batch_size, self.learning_rate)
Esempio n. 24
0
class Dropout(object):
    def __init__(self,
                 input,
                 label,
                 n_in,
                 hidden_layer_sizes,
                 n_out,
                 rng=None,
                 activation=ReLU):
        self.x = input
        self.y = label
        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)

        if rng is None:
            rng = np.random.RandomState(1234)

        assert self.n_layers > 0

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output()

            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            self.hidden_layers.append(hidden_layer)

        self.log_layer = LR(input=self.hidden_layers[-1].output(),
                            label=self.y,
                            n_in=hidden_layer_sizes[-1],
                            n_out=n_out)

    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
        for epoch in xrange(epochs):
            dropout_masks = []

            for i in xrange(self.n_layers):
                if i == 0:
                    layer_input = self.x
                layer_input = self.hidden_layers[i].forward(input=layer_input)

                if dropout == True:
                    mask = self.hidden_layers[i].dropout(input=layer_input,
                                                         p=p_dropout,
                                                         rng=rng)
                    layer_input *= mask

                    dropout_masks.append(mask)

            self.log_layer.train(input=layer_input)

            for i in reversed(xrange(0, self.n_layers)):
                if i == self.n_layers - 1:
                    prev_layer = self.log_layer
                else:
                    prev_layer = self.hidden_layers[i + 1]

                if dropout == True:
                    self.hidden_layers[i].backward(prev_layer=prev_layer,
                                                   dropout=True,
                                                   mask=dropout_masks[i])
                else:
                    self.hidden_layers[i].backward(prev_layer=prev_layer)

    def predict(self, x, dropout=True, p_dropout=0.5):
        layer_input = x

        for i in xrange(self.n_layers):
            if dropout == True:
                self.hidden_layers[i].W = (1 -
                                           p_dropout) * self.hidden_layers[i].W

            layer_input = self.hidden_layers[i].output(input=layer_input)

        return self.log_layer.predict(layer_input)
Esempio n. 25
0
#-* -coding:GBK -* -
#中文注释模板
#-* -coding:GBK -* -
#中文注释模板
from LEX import LEX
from LR import LR
from EXEC import EXEC
if __name__ == '__main__':
    lex = LEX() #词法分析器实例
    ll = LEX()
    lr = LR(ll)
    ex = EXEC()

    while True:
        l = [] #结果集
        script = raw_input(">") #获取指令
        if len(script)==0:
            continue
        elif script[0]!='@': #输入是代码
            l += lex.getToken(script) #词法分析
        else:
            path = script[1:] #输入是文件路径,切片提取文件路径
            s = file(path,'r+')
            lines = s.readlines()
            s.close()
            line_no = 1
            for line in lines:
                l += lex.getToken(line,line_no)
                line_no+=1
        print "@SLR1:"
        E = lr.analysis_exec(l)
Esempio n. 26
0
from LR import LR
# parameters
name= 'stdev2'
print '======Training======'
# load data from csv files
train = loadtxt('newData-2/data_'+name+'_train.csv')
#train = loadtxt('data/data_'+name+'_train.csv')
X = train[:,0:2]
Y = train[:,2:3]

#X = np.array([[1.0,2.0],[2.0,2.0],[0.0,0.0],[-2.0,3.0]])
#Y = np.array([[1.0],[1.0],[-1.0],[-1.0]])

# Carry out training.
L = .01
lr = LR(X,Y,L)
lr.train()
'''
[[ 0.89444823  0.19756899]]
[-0.24464889]
model = lr.train_gold()
print model.coef_
print model.intercept_
'''
# Define the predictLR(x) function, which uses trained parameters
def predictLR(x):
        return lr.test(x)

# plot training results
plotDecisionBoundary(X, Y, predictLR, [0.5], title = 'LR Train')
Esempio n. 27
0
    X_dim = camp_info["dim"]
    X_field = camp_info["field"]

    seeds = [
        0x0123, 0x4567, 0x3210, 0x7654, 0x89AB, 0xCDEF, 0xBA98, 0xFEDC, 0x0123,
        0x4567, 0x3210, 0x7654, 0x89AB, 0xCDEF, 0xBA98, 0xFEDC
    ]

    if "LR" in algo:
        batch_size = 10000
        buf_size = 1000000
        model = LR(
            [X_dim, X_field],
            batch_size,
            data_path + camp + "/urp-model/lr.pickle"
            # None
            ,
            [('uniform', -0.001, 0.001, seeds[4])],
            ['sgd', 1e-3, 'sum'],
            0)  # 1e-3

    print("batch size={0}, buf size={1}".format(batch_size, buf_size))
    print(model.log)

    if mode == "train":
        if save_model:
            utility.write_log(log_path, model.log)

        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        # sess_config = tf.ConfigProto(gpu_options=gpu_options)
        # with tf.Session(graph=model.graph, config=sess_config) as sess: