def test_linearize(): nodes = make_caterpillar_graph(5) linearize.linearize() sess = create_session() import memory_util memory_util.vlog(1) with memory_util.capture_stderr() as stderr: sess.run(nodes[-1].op) memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)
def run_and_analyze(in_shape): with memory_util.capture_stderr() as stderr: res = sess.run(y, feed_dict={x: np.random.randn(*in_shape)}) print res.shape expected_mem = reduce(lambda i, j: i * j, in_shape) # inputs expected_mem += (kernel_size**2) + in_channels * out_channels # weights expected_mem += reduce(lambda i, j: i * j, res.shape) # outputs expected_mem *= 4 # 4 bytes per float peak_mem = memory_util.peak_memory(stderr) print 'expected mem usage (MB): ', expected_mem / BYTES_PER_MB print 'peak mem usage (MB): ', peak_mem / BYTES_PER_MB print 'peak:expected mem ratio: ', peak_mem / float(expected_mem) print memory_util.print_memory_timeline(stderr) memory_util.plot_memory_timeline(plt, stderr) import ipdb ipdb.set_trace()
def test_linearize(): nodes = make_caterpillar_graph(5) linearize.linearize() sess = create_session() import memory_util memory_util.vlog(1) with memory_util.capture_stderr() as stderr: sess.run(nodes[-1].op) memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000) if __name__=='__main__': setup_env() import memory_util memory_util.vlog(1) # sess = create_session() #nodes = make_caterpillar_graph() # test_print() # linearize.print_tf_graph(linearize.get_graph()) # print(tf.get_default_graph().as_graph_def()) # test_toposort() test_linearize() sys.exit() # with memory_util.capture_stderr() as stderr: # print(sess.run(nodes[-1][0,0])) print(len(stderr.getvalue())) memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)
config = tf.ConfigProto( log_device_placement=False, graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0))) return tf.InteractiveSession(config=config) node_mbs = 1 length = 5 dtype = np.float32 n = node_mbs * 250000 a0_ = tf.ones((n, ), dtype=dtype) a0 = tf.Variable(a0_, name="a0") a = a0 for i in range(1, length): name = "a" + str(i) a = tf.tanh(a, name=name) grad = tf.gradients([a], [a0])[0] sess = create_session() sess.run(tf.global_variables_initializer()) with memory_util.capture_stderr() as stderr: sess.run(grad.op) peak_memory = memory_util.peak_memory(stderr) memory_util.print_memory_timeline(stderr) print("Peak memory: %d" % (peak_memory, ))
PARAMS["learning_rate"] = 0.00001 PARAMS["momentum"] = 0.95 PARAMS["max_steps"] = 101 PARAMS["batch_size"] = 400 # 40 the max min-batch size it can go without memory error for training PARAMS["test_freq"] = 2 PARAMS["tf_prec"] = "tf.float64" PARAMS["GradScaler"] = 1.0 PARAMS["DipoleScaler"]=1.0 PARAMS["NeuronType"] = "relu" PARAMS["HiddenLayers"] = [1000, 1000, 1000] PARAMS["EECutoff"] = 15.0 PARAMS["EECutoffOn"] = 7.0 PARAMS["Erf_Width"] = 0.4 #PARAMS["AN1_r_Rc"] = 8.0 #PARAMS["AN1_num_r_Rs"] = 64 PARAMS["EECutoffOff"] = 15.0 PARAMS["learning_rate_dipole"] = 0.0001 PARAMS["learning_rate_energy"] = 0.00001 PARAMS["SwitchEpoch"] = 10 d = MolDigester(TreatedAtoms, name_="ANI1_Sym_Direct", OType_="EnergyAndDipole") # Initialize a digester that apply descriptor for the fragme tset = TensorMolData_BP_Direct_EE_WithEle(a, d, order_=1, num_indis_=1, type_="mol", WithGrad_ = True) #tset = TensorMolData_BP_Direct_EE(a, d, order_=1, num_indis_=1, type_="mol", WithGrad_ = True) # Initialize TensorMolData that contain the training data fo #tset = TensorMolData_BP_Multipole_2_Direct(a, d, order_=1, num_indis_=1, type_="mol", WithGrad_ = False) #manager=TFMolManage("",tset,False,"fc_sqdiff_BP_Direct_EE_ChargeEncode") # Initialzie a manager than manage the training of neural network. #manager=TFMolManage("",tset,False,"Dipole_BP_2_Direct") manager=TFMolManage("",tset,False,"fc_sqdiff_BP_Direct_EE_Update") PARAMS['Profiling']=1 with memory_util.capture_stderr() as stderr: manager.Train(1) memory_util.print_memory_timeline(stderr, ignore_less_than_bytes=1000)