Beispiel #1
0
def run_data(data, c, inputs, sess, xy, filename=None):
    truth, features = l_utils.prepare_data(data, c)

    feed = {inputs: features}
    results = sess.run(xy, feed_dict=feed)[:, 0]

    if filename is not None:
        with open(filename, 'w') as fp:
            skip = int(len(truth) / 2000)
            if skip == 0:
                skip = 1
            for a in range(len(truth)):
                if a % skip == 0:
                    fp.write('{},{}\n'.format(truth[a], results[a]))

    return np.mean(np.abs(results - truth))
Beispiel #2
0
def run_data(data, c, inputs, sess, xy, filename, cfg):
    truth, features = l_utils.prepare_data(data, c)
    feed = {}
    b_sz = len(truth)
    att = cfg['n_att']
    length = features.shape[1] / att
    feed[inputs['input_0']] = np.repeat(cfg['refs'], b_sz, axis=0)
    for a in range(length):
        feed[inputs['input_{}'.format(a + 1)]] = features[:, att * a:att *
                                                          (a + 1)]

    results = sess.run(xy, feed_dict=feed)[:, 0]

    if filename is not None:
        with open(filename, 'w') as fp:
            skip = int(len(truth) / 2000)
            if skip == 0:
                skip = 1
            for a in range(len(truth)):
                if a % skip == 0:
                    fp.write('{},{}\n'.format(truth[a], results[a]))

    return np.mean(np.abs(results - truth))
Beispiel #3
0
def nn_fit(config, cntn):
    cfg = Utils.load_json_file(config)

    locs = sorted(glob.glob(cfg['out_location'].format(HOME, '*')))
    print locs[0]
    data, att = l_utils.get_dataset(locs)
    print 'att', att
    CV = cfg['CV']
    nodes = map(int, cfg['nodes'].split(','))
    netFile = cfg['netFile']
    # CV = 5
    #nodes = [ 256, 16]
    # nodes = [256, 16]
    lr0 = 1e-4
    iterations = 10
    loop = 1
    batch_size = 100

    for c in range(CV):
        print 'CV', c
        lr = lr0
        #te, tr = create_data(data, c)
        #len(te[0][0][0][1])
        output = tf.placeholder(tf.float32, [None, 1])
        input = tf.placeholder(tf.float32, [None, att])
        learning_rate = tf.placeholder(tf.float32, shape=[])

        net = sNet3({'data': input})
        net.real_setup(nodes, 1)

        xy = net.layers['output']
        loss = tf.reduce_sum(tf.abs(tf.subtract(xy, output)))
        #loss = tf.reduce_sum(tf.square(tf.subtract(xy, output)))

        opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9,
                        beta2=0.999, epsilon=0.00000001,
                        use_locking=False, name='Adam').\
            minimize(loss)
        # opt = tf.train.GradientDescentOptimizer(learning_rate=cfg.lr).minimize(loss)

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)

            if cntn:
                saver.restore(sess, netFile.format(HOME, c))

            t00 = datetime.datetime.now()
            st1 = ''
            for a in range(iterations):

                te_loss = run_data(data[0], c + 1, input, sess, xy,
                                   '{}/tmp/te.csv'.format(HOME))
                tr_loss = run_data(data[0], -c - 1, input, sess, xy,
                                   '{}/tmp/tr.csv'.format(HOME))

                t1 = (datetime.datetime.now() - t00).seconds / 3600.0
                str = "it: {0} {1:.3f} {2} {3} {4}".format(
                    a * loop / 1000.0, t1, lr, tr_loss, te_loss)
                print str, st1

                t_loss = 0
                t_count = 0
                for dd in data:
                    truth, features = l_utils.prepare_data(dd, -c - 1, rd=True)
                    length = len(truth)
                    b0 = truth.reshape((length, 1))
                    for lp in range(loop):
                        for d in range(0, length, batch_size):
                            feed = {
                                input: features[d:d + batch_size, :],
                                output: b0[d:d + batch_size, :],
                                learning_rate: lr
                            }
                            _, A = sess.run([opt, loss], feed_dict=feed)
                            t_loss += A
                            t_count += len(b0[d:d + batch_size])
                st1 = '{}'.format(t_loss / t_count)

                saver.save(sess, netFile.format(HOME, c))

        tf.reset_default_graph()
Beispiel #4
0
            for a in range(iterations):

                te_loss = run_data(data[0], c + 1, input_dic, sess, xy,
                                   '{}/tmp/te.csv'.format(HOME), cfg)
                tr_loss = run_data(data[0], -c - 1, input_dic, sess, xy,
                                   '{}/tmp/tr.csv'.format(HOME), cfg)

                t1 = (datetime.datetime.now() - t00).seconds / 3600.0
                str = "it: {0} {1:.3f} {2} {3} {4}".format(
                    a * loop / 1000.0, t1, lr, tr_loss, te_loss)
                print str, st1

                t_loss = 0
                t_count = 0
                for dd in data:
                    truth, features = l_utils.prepare_data(dd, -c - 1, rd=True)
                    b0 = truth.reshape((len(truth), 1))
                    for lp in range(loop):
                        for d in range(0, len(truth), batch_size):
                            feed = {}
                            att = cfg['n_att']
                            length = features.shape[1] / att
                            for a in range(length):
                                feed[input_dic['input_{}'.format(
                                    a + 1)]] = features[d:d + batch_size,
                                                        att * a:att * (a + 1)]
                            b_sz = len(features[d:d + batch_size, 0])
                            feed[input_dic['input_0']] = np.repeat(cfg['refs'],
                                                                   b_sz,
                                                                   axis=0)
                            feed[output] = b0[d:d + batch_size, :]
Beispiel #5
0
                total_loss = run_data(tr, input, sess, xy,
                                      '/home/weihao/tmp/tr.csv')

                te_loss = run_data(te, input, sess, xy,
                                   '/home/weihao/tmp/te.csv')

                t1 = (datetime.datetime.now() - t00).seconds / 3600.0
                str = "it: {0} {1:.3f} {2} {3} {4}".format(
                    a * loop / 1000.0, t1, lr, total_loss, te_loss)
                print str, st1

                t_loss = 0
                t_count = 0
                for lp in range(loop):
                    truth, features = l_utils.prepare_data(tr, rd=True)
                    length = len(truth)
                    b0 = truth.reshape((length, 1))
                    for d in range(0, length, batch_size):
                        feed = {
                            input: features[d:d + batch_size, :],
                            output: b0[d:d + batch_size, :],
                            learning_rate: lr
                        }
                        _, A = sess.run([opt, loss], feed_dict=feed)
                        t_loss += A
                        t_count += len(b0[d:d + batch_size])
                st1 = '{}'.format(t_loss / t_count)

                saver.save(sess, netFile.format(c))
                lr *= 0.99
Beispiel #6
0
            truth = b[0]
        else:
            results = np.concatenate((results, result))
            truth = np.concatenate((truth, b[0]))
    return np.mean(np.abs(results-truth))


if __name__ == '__main__':

    locs = ['L_0', 'L_1', 'L_2', 'L_3', 'L_4', 'L_5', 'L_6', 'L_7', 'L_8', 'L_9']
    dd = l_utils.get_dataset('/home/weihao/Projects/p_files/L10000', locs)

    CV = 5


    for c in range(CV):
        truth, features = l_utils.prepare_data(dd[0], -c-1)
        print len(truth)
        rf = RandomForestRegressor()
        #rf = RN()

        rf.fit(features, truth)
        truth, features = l_utils.prepare_data(dd[0], c+1)
        print len(truth)
        rst1 = rf.predict(features)
        print 'error1', np.mean(np.abs(np.array(rst1)-np.array(truth)))
        for a in range(0, len(truth), 10):
            print rst1[a], truth[a]

        break