def net_run(cfg, x): # eval_file = cfg['eval_file'].format(HOME) location = cfg['location'].format(HOME) SEG = cfg['SEG'] dct = cfg['dct'] > 0 dim = cfg['dim'] step = cfg['testing_step'] # fp0 = open(eval_file, 'w') CV = cfg['CV'] out_location = cfg['out_location'].format(HOME, 0) par_location = os.path.dirname(out_location) avg_file = os.path.join(par_location, 'Avg.p') with open(avg_file, 'r') as fp: A = pickle.load(fp) avgf = A[0] stdf = A[1] #avg0 = A[2] att = len(avgf) rst_file = cfg['refit_file'].format(HOME) with open(rst_file, 'r') as fp: refit = pickle.load(fp) output1 = [] output2 = [] for c in range(CV): netFile = cfg['netFile'].format(HOME, c) # files = glob.glob(os.path.join(location, 'L_*.csv')) # idx = l_utils.rdm_ids(files) input = tf.placeholder(tf.float32, [None, att]) nodes = map(int, cfg['nodes'].split(',')) net = sNet3({'data': input}) net.real_setup(nodes, 1, False) xy = net.layers['output'] init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, netFile) a = 0 r = [] while a <= len(x) - SEG: features = l_utils.feature_final(x[a:a + SEG], dct, dim) features = (features - avgf) / stdf features = features.reshape((1, len(features))) feed = {input: features} results = sess.run(xy, feed_dict=feed)[:, 0] a += step r.append(results[0]) if len(r) > 0: v_mean = run_refit(np.mean(r) , refit[c][0]) v_median = run_refit(np.median(r), refit[c][0]) print( '{},{},{},{},{}'.format(c, len(r), v_mean, v_median, np.std(r))) plt.plot(r, '.') plt.show() output1.append(v_mean) output2.append(v_median) tf.reset_default_graph() output1 = np.mean(np.array(output1)) output2 = np.mean(np.array(output2)) return output1, output2
def nn_fit(config, cntn): cfg = Utils.load_json_file(config) locs = sorted(glob.glob(cfg['out_location'].format(HOME, '*'))) print locs[0] data, att = l_utils.get_dataset(locs) print 'att', att CV = cfg['CV'] nodes = map(int, cfg['nodes'].split(',')) netFile = cfg['netFile'] # CV = 5 #nodes = [ 256, 16] # nodes = [256, 16] lr0 = 1e-4 iterations = 10 loop = 1 batch_size = 100 for c in range(CV): print 'CV', c lr = lr0 #te, tr = create_data(data, c) #len(te[0][0][0][1]) output = tf.placeholder(tf.float32, [None, 1]) input = tf.placeholder(tf.float32, [None, att]) learning_rate = tf.placeholder(tf.float32, shape=[]) net = sNet3({'data': input}) net.real_setup(nodes, 1) xy = net.layers['output'] loss = tf.reduce_sum(tf.abs(tf.subtract(xy, output))) #loss = tf.reduce_sum(tf.square(tf.subtract(xy, output))) opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').\ minimize(loss) # opt = tf.train.GradientDescentOptimizer(learning_rate=cfg.lr).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) if cntn: saver.restore(sess, netFile.format(HOME, c)) t00 = datetime.datetime.now() st1 = '' for a in range(iterations): te_loss = run_data(data[0], c + 1, input, sess, xy, '{}/tmp/te.csv'.format(HOME)) tr_loss = run_data(data[0], -c - 1, input, sess, xy, '{}/tmp/tr.csv'.format(HOME)) t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format( a * loop / 1000.0, t1, lr, tr_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 for dd in data: truth, features = l_utils.prepare_data(dd, -c - 1, rd=True) length = len(truth) b0 = truth.reshape((length, 1)) for lp in range(loop): for d in range(0, length, batch_size): feed = { input: features[d:d + batch_size, :], output: b0[d:d + batch_size, :], learning_rate: lr } _, A = sess.run([opt, loss], feed_dict=feed) t_loss += A t_count += len(b0[d:d + batch_size]) st1 = '{}'.format(t_loss / t_count) saver.save(sess, netFile.format(HOME, c)) tf.reset_default_graph()
def train(c, cfg, te=None): files = glob.glob(os.path.join(cfg['location'].format(HOME), 'L_*.csv')) ids = l_utils.rdm_ids(files) file1 = [] file2 = [] for f in ids: if ids[f] == c: file1.append(f) else: file2.append(f) nodes = map(int, cfg['nodes'].split(',')) netFile = cfg['netFile'] lr = cfg['lr'] iterations = 10000 loop = 1 batch_size = 100 cntn = cfg['cntn'] > 0 print 'CV', c if te is None: data2 = generate_data(file2, .1, cfg['tmp'] + '/L*.p') data1 = generate_data(file1, 1.1, cfg['tmp'] + '/L*.p') att = data2[1].shape[1] else: data1 = get_testing_data(file1, cfg) att = data1[0][1].shape[1] output = tf.placeholder(tf.float32, [None, 1]) input = tf.placeholder(tf.float32, [None, att]) learning_rate = tf.placeholder(tf.float32, shape=[]) net = sNet3({'data': input}) net.real_setup(nodes, 1) avg_file = avg_file_name(cfg['netFile']).format(HOME, c) if (not cntn) and (te is None): get_avg_file(data2[1], avg_file) if te is None: data1 = (data1[0], avg_correction(data1[1], avg_file)) data2 = (data2[0], avg_correction(data2[1], avg_file)) else: data2 = [] for d in data1: data2.append((d[0], avg_correction(d[1], avg_file))) data1 = data2 xy = net.layers['output'] loss = tf.reduce_sum(tf.abs(tf.subtract(xy, output))) #loss = tf.reduce_sum(tf.square(tf.subtract(xy, output))) opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').\ minimize(loss) # opt = tf.train.GradientDescentOptimizer(learning_rate=cfg.lr).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) if cntn or te is not None: saver.restore(sess, netFile.format(HOME, c)) if te is not None: run_testing(data1, input, sess, xy, '{}/tmp/test.csv'.format(HOME), cfg) exit(0) t00 = datetime.datetime.now() st1 = '' for a in range(iterations): te_loss = run_data(data1, input, sess, xy, '{}/tmp/te.csv'.format(HOME), cfg) tr_loss = run_data(data2, input, sess, xy, '{}/tmp/tr.csv'.format(HOME), cfg) t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format(a * loop / 1000.0, t1, lr, tr_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 data = generate_data(file2, .3, cfg['tmp'] + '/L*.p') truth = data[0] features = avg_correction(data[1], avg_file) length = len(truth) b0 = truth.reshape((length, 1)) for lp in range(loop): for d in range(0, length, batch_size): feed = { input: features[d:d + batch_size, :], output: b0[d:d + batch_size, :], learning_rate: lr } _, A = sess.run([opt, loss], feed_dict=feed) t_loss += A t_count += len(b0[d:d + batch_size]) st1 = '{}'.format(t_loss / t_count) saver.save(sess, netFile.format(HOME, c)) tf.reset_default_graph()
def fft_test(config): cfg = Utils.load_json_file(config) eval_file = cfg['eval_file'].format(HOME) location = cfg['location'].format(HOME) SEG = cfg['SEG'] dct = cfg['dct']>0 dim = cfg['dim'] step =cfg['testing_step'] fp0 = open(eval_file, 'w') CV = cfg['CV'] for c in range(CV): netFile = cfg['netFile'].format(HOME, c) files = glob.glob(os.path.join(location, 'L_*.csv')) idx = l_utils.rdm_ids(files) out_location = cfg['out_location'].format(HOME, c) par_location = os.path.dirname(out_location) avg_file = os.path.join(par_location, 'Avg.p') with open(avg_file, 'r') as fp: A = pickle.load(fp) avgf = A[0] stdf = A[1] # avg0 = A[2] att = len(avgf) input = tf.placeholder(tf.float32, [None, att]) nodes = map(int, cfg['nodes'].split(',')) net = sNet3({'data': input}) net.real_setup(nodes, 1) xy = net.layers['output'] init = tf.global_variables_initializer() saver = tf.train.Saver() err1 = [] err2 = [] with tf.Session() as sess: sess.run(init) saver.restore(sess, netFile) for filename in idx: if not idx[filename] == c: continue #filename = '/home/weihao/tmp/L/L_11.csv' with open(filename, 'r') as fp: line0 = fp.readlines() print len(line0) seg_step = 100000 t_scale = float(line0[0].split(',')[1]) for start in range(0, len(line0), seg_step): lines = line0[start:start+SEGMENT] if len(lines)<SEGMENT: break avg, x = get_values(lines) avg /= t_scale a = 0 r = [] while a<=len(x)-SEG: features = l_utils.feature_final(x[a:a+SEG], dct, dim) features = (features-avgf)/stdf features = features.reshape((1, len(features))) feed = {input: features} results = sess.run(xy, feed_dict=feed)[:, 0] a += step r.append(results[0]) if len(r) > 0: #plt.plot(r) #plt.show() r = np.array(r) dr = r - avg dr = np.mean(np.abs(dr)) err1.append(np.mean(r)-avg) err2.append(np.median(r)-avg) fp0.write('{},{},{},{},{},{},{},{},{}\n'. format(c,filename, avg, len(r), np.mean(r), np.median(r), np.std(r), dr, r[0])) # print c, avg, len(r), np.mean(r)+avg0, np.median(r)+avg0 tf.reset_default_graph() print 'errors', np.mean(np.abs(err1)), np.mean(np.abs(err2)) fp0.close()
def nn_fit2(config, cntn): cfg = Utils.load_json_file(config) data_file = "{}/tmp/fit_full.csv" #cfg['eval_file'] data_file = data_file.format(HOME) dd = np.array(Utils.read_csv(data_file)).astype(float) print dd.shape T = dd[:, 1] F = np.concatenate((dd[:, :1], dd[:, 3:]), axis=1) T1 = [] T2 = [] F1 = [] F2 = [] for a in range(len(T)): if np.random.random() > 0.2: T1.append(T[a]) F1.append(F[a, :]) else: T2.append(T[a]) F2.append(F[a, :]) att = F.shape[1] node2 = map(int, cfg['node2'].split(',')) netFile = cfg['netFile2'] # CV = 5 #nodes = [ 256, 16] # nodes = [256, 16] lr = 1e-3 iterations = 1000 loop = 100 batch_size = 100 output = tf.placeholder(tf.float32, [None, 1]) input = tf.placeholder(tf.float32, [None, att]) learning_rate = tf.placeholder(tf.float32, shape=[]) net = sNet3({'data': input}) net.real_setup(node2, 1, keep_prob=0.8) xy = net.layers['output'] loss = tf.reduce_sum(tf.abs(tf.subtract(xy, output))) opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').\ minimize(loss) # opt = tf.train.GradientDescentOptimizer(learning_rate=cfg.lr).minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) if cntn: saver.restore(sess, netFile.format(HOME)) t00 = datetime.datetime.now() st1 = '' for a in range(iterations): te_loss = run_data(T2, F2, input, sess, xy, '{}/../tmp/te.csv'.format(HOME)) tr_loss = run_data(T1, F1, input, sess, xy, '{}/../tmp/tr.csv'.format(HOME)) t1 = (datetime.datetime.now() - t00).seconds / 3600.0 str = "it: {0} {1:.3f} {2} {3} {4}".format(a * loop / 1000.0, t1, lr, tr_loss, te_loss) print str, st1 t_loss = 0 t_count = 0 truth = np.array(T1) features = np.array(F1) length = len(truth) b0 = truth.reshape((length, 1)) for lp in range(loop): for d in range(0, length, batch_size): feed = { input: features[d:d + batch_size, :], output: b0[d:d + batch_size, :], learning_rate: lr } _, A = sess.run([opt, loss], feed_dict=feed) t_loss += A t_count += len(b0[d:d + batch_size]) st1 = '{}'.format(t_loss / t_count) lr *= 0.99 saver.save(sess, netFile.format(HOME)) tf.reset_default_graph()