def run_data(data, c, inputs, sess, xy, filename=None): truth, features = l_utils.prepare_data(data, c) feed = {inputs: features} results = sess.run(xy, feed_dict=feed)[:, 0] if filename is not None: with open(filename, 'w') as fp: skip = int(len(truth) / 2000) if skip == 0: skip = 1 for a in range(len(truth)): if a % skip == 0: fp.write('{},{}\n'.format(truth[a], results[a])) return np.mean(np.abs(results - truth))
def run_data(data, c, inputs, sess, xy, filename, cfg): truth, features = l_utils.prepare_data(data, c) feed = {} b_sz = len(truth) att = cfg['n_att'] length = features.shape[1] / att feed[inputs['input_0']] = np.repeat(cfg['refs'], b_sz, axis=0) for a in range(length): feed[inputs['input_{}'.format(a + 1)]] = features[:, att * a:att * (a + 1)] results = sess.run(xy, feed_dict=feed)[:, 0] if filename is not None: with open(filename, 'w') as fp: skip = int(len(truth) / 2000) if skip == 0: skip = 1 for a in range(len(truth)): if a % skip == 0: fp.write('{},{}\n'.format(truth[a], results[a])) return np.mean(np.abs(results - truth))
def nn_fit(config, cntn): cfg = Utils.load_json_file(config) locs = sorted(glob.glob(cfg['out_location'].format(HOME, '*'))) print locs[0] data, att = l_utils.get_dataset(locs) print 'att', att CV = cfg['CV'] nodes = map(int, cfg['nodes'].split(',')) netFile = cfg['netFile'] # CV = 5 #nodes = [ 256, 16] # nodes = [256, 16] lr0 = 1e-4 iterations = 10 loop = 1 batch_size = 100 for c in range(CV): print 'CV', c lr = lr0 #te, tr = create_data(data, c) #len(te[0][0][0][1]) output = tf.placeholder(tf.float32, [None, 1]) input = tf.placeholder(tf.float32, [None, att]) learning_rate = tf.placeholder(tf.float32, shape=[]) net = sNet3({'data': input}) net.real_setup(nodes, 1) xy = net.layers['output'] loss = tf.reduce_sum(tf.abs(tf.subtract(xy, output))) #loss = tf.reduce_sum(tf.square(tf.subtract(xy, output))) opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').\ minimize(loss) # opt = tf.train.GradientDescentOptimizer(learning_rate=cfg.lr).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) if cntn: saver.restore(sess, netFile.format(HOME, c)) t00 = datetime.datetime.now() st1 = '' for a in range(iterations): te_loss = run_data(data[0], c + 1, input, sess, xy, '{}/tmp/te.csv'.format(HOME)) tr_loss = run_data(data[0], -c - 1, input, sess, xy, '{}/tmp/tr.csv'.format(HOME)) t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format( a * loop / 1000.0, t1, lr, tr_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 for dd in data: truth, features = l_utils.prepare_data(dd, -c - 1, rd=True) length = len(truth) b0 = truth.reshape((length, 1)) for lp in range(loop): for d in range(0, length, batch_size): feed = { input: features[d:d + batch_size, :], output: b0[d:d + batch_size, :], learning_rate: lr } _, A = sess.run([opt, loss], feed_dict=feed) t_loss += A t_count += len(b0[d:d + batch_size]) st1 = '{}'.format(t_loss / t_count) saver.save(sess, netFile.format(HOME, c)) tf.reset_default_graph()
for a in range(iterations): te_loss = run_data(data[0], c + 1, input_dic, sess, xy, '{}/tmp/te.csv'.format(HOME), cfg) tr_loss = run_data(data[0], -c - 1, input_dic, sess, xy, '{}/tmp/tr.csv'.format(HOME), cfg) t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format( a * loop / 1000.0, t1, lr, tr_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 for dd in data: truth, features = l_utils.prepare_data(dd, -c - 1, rd=True) b0 = truth.reshape((len(truth), 1)) for lp in range(loop): for d in range(0, len(truth), batch_size): feed = {} att = cfg['n_att'] length = features.shape[1] / att for a in range(length): feed[input_dic['input_{}'.format( a + 1)]] = features[d:d + batch_size, att * a:att * (a + 1)] b_sz = len(features[d:d + batch_size, 0]) feed[input_dic['input_0']] = np.repeat(cfg['refs'], b_sz, axis=0) feed[output] = b0[d:d + batch_size, :]
total_loss = run_data(tr, input, sess, xy, '/home/weihao/tmp/tr.csv') te_loss = run_data(te, input, sess, xy, '/home/weihao/tmp/te.csv') t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format( a * loop / 1000.0, t1, lr, total_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 for lp in range(loop): truth, features = l_utils.prepare_data(tr, rd=True) length = len(truth) b0 = truth.reshape((length, 1)) for d in range(0, length, batch_size): feed = { input: features[d:d + batch_size, :], output: b0[d:d + batch_size, :], learning_rate: lr } _, A = sess.run([opt, loss], feed_dict=feed) t_loss += A t_count += len(b0[d:d + batch_size]) st1 = '{}'.format(t_loss / t_count) saver.save(sess, netFile.format(c)) lr *= 0.99
truth = b[0] else: results = np.concatenate((results, result)) truth = np.concatenate((truth, b[0])) return np.mean(np.abs(results-truth)) if __name__ == '__main__': locs = ['L_0', 'L_1', 'L_2', 'L_3', 'L_4', 'L_5', 'L_6', 'L_7', 'L_8', 'L_9'] dd = l_utils.get_dataset('/home/weihao/Projects/p_files/L10000', locs) CV = 5 for c in range(CV): truth, features = l_utils.prepare_data(dd[0], -c-1) print len(truth) rf = RandomForestRegressor() #rf = RN() rf.fit(features, truth) truth, features = l_utils.prepare_data(dd[0], c+1) print len(truth) rst1 = rf.predict(features) print 'error1', np.mean(np.abs(np.array(rst1)-np.array(truth))) for a in range(0, len(truth), 10): print rst1[a], truth[a] break