def __init__(self, min_action_set, hist_len, checkpoint_policy): self.minimal_action_set = min_action_set print("hist len", hist_len) self.network = Network(len(self.minimal_action_set), hist_len) self.network.load_state_dict( torch.load(checkpoint_policy)['state_dict']) if torch.cuda.is_available(): print("Initializing Cuda Nets...") self.network.cuda()
def __init__(self, min_action_set, learning_rate, alpha, checkpoint_dir, hist_len, l2_penalty): self.minimal_action_set = min_action_set print("hist len", hist_len) self.network = Network(len(self.minimal_action_set), hist_len) if torch.cuda.is_available(): print("Initializing Cuda Nets...") self.network.cuda() self.optimizer = optim.Adam(self.network.parameters(), lr=learning_rate, weight_decay=l2_penalty) self.checkpoint_directory = checkpoint_dir
def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' logging.basicConfig(filename='log.txt', level=logging.DEBUG) logging.getLogger().addHandler(logging.StreamHandler()) net = Network("model") self_play(net)
def main(): ValidSampler = Sampler(utils.valid_file) TestSampler = Sampler(utils.test_file) networks = [] weights = [] for i in xrange(5): if i == 0: TrainSampler = Sampler(utils.train_file) prev_ys = np.copy(TrainSampler.labels) else: TrainSampler = Sampler(utils.train_file, prev_ys) network = Network() network.train(TrainSampler) cur_ys = network.predict(TrainSampler) b1 = np.sum(np.multiply(cur_ys, prev_ys)) b2 = np.sum(np.multiply(cur_ys, cur_ys)) w = float(b1) / b2 prev_ys = np.subtract(prev_ys, w * cur_ys) print i, 'done with weight', w network.save('network_' + str(i) + '.ckpt') weights.append(w) networks.append(network) validate_boost(ValidSampler, networks, weights) validate_boost(TestSampler, networks, weights) np.save('weights.npy', weights)
def test(a, b): white = Network(a) black = Network(b) print("training") white.train() print("playing") board = chess.Board() for i in range(10): print(i) board.reset() while not board.is_game_over(): move = play(board, white, True) if board.is_game_over(): break board.push(move) move = play(board, black, False) board.push(move) print(board.result())
class Clone: def __init__(self, min_action_set, hist_len, checkpoint_policy): self.minimal_action_set = min_action_set print("hist len", hist_len) self.network = Network(len(self.minimal_action_set), hist_len) self.network.load_state_dict( torch.load(checkpoint_policy)['state_dict']) if torch.cuda.is_available(): print("Initializing Cuda Nets...") self.network.cuda() def predict(self, state): # predict action probabilities outputs = self.network(Variable(utils.float_tensor(state))) vals = outputs[len(outputs) - 1].data.cpu().numpy() return vals def get_action(self, state): vals = self.predict(state) return np.argmax(vals)
Iterator/Generator: get a batch of data 这个函数是一个迭代器/生成器,用于每一次只得到 chunk_size 这么多的数据 用于 for/loop, 就像range()函数 """ if len(samples) != len(labels): raise Exception('Length of samples and labels must equal') stepStart = 0 # initial step i = 0 while stepStart < len(samples): stepEnd = stepStart + chunk_size if stepEnd < len(samples): yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] i += 1 stepStart = stepEnd net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2) net.define_inputs( train_samples_shape=(64, image_size, image_size, num_channels), train_labels_shape=(64, num_labels), test_samples_shape=(500, image_size, image_size, num_channels) ) net.add_conv(patch_size=3, in_depth=num_channels, out_depth=16, activation='relu', pooling=False, name='conv1') net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv2') net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=False, name='conv3') net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv4') # 4 = 两次 pooling,每一次缩小为1/2 # 16 = conv4 out_depth net.add_fc(in_num_nodes=(image_size // 4) * (image_size // 4) * 16, out_num_nodes=16, activation='relu', name='fc1') net.add_fc(in_num_nodes=16, out_num_nodes=10, activation='relu', name='fc2')
用于 for loop, just like range() function """ if len(samples) != len(labels): raise Exception('Length of samples and labels must equal') stepStart = 0 # initial step i = 0 while stepStart < len(samples): stepEnd = stepStart + chunkSize if stepEnd < len(samples): yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd] i += 1 stepStart = stepEnd net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2, dropout_rate=0.9, base_learning_rate=0.001, decay_rate=0.99) net.define_inputs( train_samples_shape=(64, image_size, image_size, num_channels), train_labels_shape=(64, num_labels), test_samples_shape=(500, image_size, image_size, num_channels), ) # net.add_conv(patch_size=3, in_depth=num_channels, out_depth=32, activation='relu', pooling=False, name='conv1') net.add_conv(patch_size=3,
import os import numpy as np from sklearn.tree import DecisionTreeClassifier as Model from cnn import Network DATA_DIR = 'data' pipeline = [ (preprocessing.extract_rgb, True), # (preprocessing.enrich_mirror, False), ] data_file = os.path.join(DATA_DIR, 'data_train.dat') targets_file = os.path.join(DATA_DIR, 'targets_train.dat') data, targets = preprocessing.load_data(data_file), preprocessing.load_data(targets_file) data_train, data_validation, targets_train, targets_validation = sklearn.model_selection.train_test_split( data, targets, test_size=0.25, random_state=42, shuffle=True, stratify=targets ) for func, apply_test in pipeline: data_train, targets_train = func(data_train, targets_train) if apply_test: data_validation, targets_validation = func(data_validation, targets_validation) print(data_train.shape[1:]) model = Network(input_shape=data_train.shape[1:]) model.fit(data_train, targets_train, data_validation, targets_validation) predict = model.predict(data_validation) print(sklearn.metrics.accuracy_score(targets_validation, predict))
class Imitator: def __init__(self, min_action_set, learning_rate, alpha, checkpoint_dir, hist_len, l2_penalty): self.minimal_action_set = min_action_set print("hist len", hist_len) self.network = Network(len(self.minimal_action_set), hist_len) if torch.cuda.is_available(): print("Initializing Cuda Nets...") self.network.cuda() self.optimizer = optim.Adam(self.network.parameters(), lr=learning_rate, weight_decay=l2_penalty) self.checkpoint_directory = checkpoint_dir def predict(self, state): # predict action probabilities outputs = self.network(Variable(utils.float_tensor(state))) vals = outputs[len(outputs) - 1].data.cpu().numpy() return vals def get_action(self, state): vals = self.predict(state) return np.argmax(vals) # potentially optimizable def compute_labels(self, sample, minibatch_size): #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") labels = Variable(utils.long_tensor(minibatch_size)) actions_taken = [x.action for x in sample] #print(actions_taken[0]) for i in range(len(actions_taken)): #print(actions_taken[i]) labels[i] = np.int(actions_taken[i]) # The list of ALE actions taken for the minibatch #labels = torch.from_numpy(np.array([x.action for x in sample])).long().to(device) #for index in range(len(actions_taken)): # labels[index] = torch.from_numpy(actions_taken[index]) #print(labels[0]) return labels def get_loss(self, outputs, labels): return nn.CrossEntropyLoss()(outputs, labels) def validate(self, dataset, minibatch_size): '''run dataset through loss to get validation error''' validation_data = dataset.get_dataset() v_loss = 0.0 for i in range(0, len(validation_data) - minibatch_size, minibatch_size): sample = validation_data[i:i + minibatch_size] with torch.no_grad(): state = Variable( utils.float_tensor( np.stack([np.squeeze(x.state) for x in sample]))) #print(state.size()) # compute the target values for the minibatch labels = self.compute_labels(sample, minibatch_size) #print(labels.size()) #print("labels", labels) self.optimizer.zero_grad() ''' Forward pass the minibatch through the prediction network. ''' activations = self.network(state) ''' Extract the Q-value vectors of the minibatch from the final layer's activations. See return values of the forward() functions in cnn.py ''' output = activations[len(activations) - 1] loss = self.get_loss(output, labels) v_loss += loss return v_loss def train(self, dataset, minibatch_size): # sample a minibatch of transitions sample = dataset.sample_minibatch(minibatch_size) state = Variable( utils.float_tensor(np.stack([np.squeeze(x.state) for x in sample]))) # compute the target values for the minibatch labels = self.compute_labels(sample, minibatch_size) #print("labels", labels) self.optimizer.zero_grad() ''' Forward pass the minibatch through the prediction network. ''' activations = self.network(state) ''' Extract the Q-value vectors of the minibatch from the final layer's activations. See return values of the forward() functions in cnn.py ''' output = activations[len(activations) - 1] loss = self.get_loss(output, labels) #self.losses.append(loss) loss.backward() self.optimizer.step() return loss ''' Args: This function checkpoints the network. ''' def checkpoint_network(self, env_name, extra_info): print("Checkpointing Weights") utils.save_checkpoint({'state_dict': self.network.state_dict()}, self.checkpoint_directory, env_name, extra_info) print("Checkpointed.")
description='Farsi digit Detection Network') parser.add_argument("--cfg", dest='cfgfile', help="Config file", default="cfg/architecture.cfg", type=str) return parser.parse_args() args = arg_parse() #Set up the neural network print("Preparing network .....") network = Network(args.cfgfile) network.compile() print("Loading input .....") dataset = Dataset() x_train, y_train, x_test, y_test = dataset.loadData( network.net_info.input_shape) # # Encode the data y_train = to_categorical(y_train) y_test = to_categorical(y_test) print("Training network .....") network.fit(x_train, y_train, x_test, y_test) print("evaluation: ")
def main(): current_time = datetime.now().strftime('%Y%m%d-%H%M') checkpoint_dir = 'checkpoints' if FLAGS.checkpoint is not None: checkpoint_path = os.path.join(checkpoint_dir, FLAGS.checkpoint.lstrip('checkpoints/')) else: checkpoint_path = os.path.join(checkpoint_dir, '{}'.format(current_time)) try: os.makedirs(checkpoint_path) except os.error: print('Unable to make checkpoints direction: %s' % checkpoint_path) model_save_path = os.path.join(checkpoint_path, 'model.ckpt') nn = Network() saver = tf.train.Saver() print('Build session.') tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) if FLAGS.checkpoint is not None: print('Restore from pre-trained model.') checkpoint = tf.train.get_checkpoint_state(checkpoint_path) meta_graph_path = checkpoint.model_checkpoint_path + '.meta' restore = tf.train.import_meta_graph(meta_graph_path) restore.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) step = int(meta_graph_path.split('-')[2].split('.')[0]) else: print('Initialize.') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) step = 0 loss_list = [] train_accuracy_list = [] val_accuracy_list = [] test_accuracy_list = [] step = 0 train_writer = tf.summary.FileWriter('logs/train@' + current_time, sess.graph) val_writer = tf.summary.FileWriter('logs/valid@' + current_time, sess.graph) summary_op = tf.summary.merge_all() print('Start training:') train_len = len(y_train) for epoch in range(config.num_epochs): permutation = np.random.permutation(train_len) X_train_data = X_train[permutation] y_train_data = y_train[permutation] data_idx = 0 while data_idx < train_len - 1: X_train_batch = X_train_data[data_idx:np. clip(data_idx + config.batch_size, 0, train_len - 1)] y_train_batch = y_train_data[data_idx:np. clip(data_idx + config.batch_size, 0, train_len - 1)] data_idx += config.batch_size loss, _, train_accuracy, summary, lr = sess.run( [ nn.loss, nn.optimizer, nn.accuracy, summary_op, nn.learning_rate ], { nn.X_inputs: X_train_batch, nn.y_inputs: y_train_batch, nn.keep_prob: config.keep_prob, nn.training: True }) loss_list.append(loss) train_accuracy_list.append(train_accuracy) print( '>> At step %i: loss = %.2f, train accuracy = %.3f%%, learning rate = %.7f' % (step, loss, train_accuracy * 100, lr)) train_writer.add_summary(summary, step) step += 1 accuracy, summary = sess.run( [nn.accuracy, summary_op], { nn.X_inputs: X_val, nn.y_inputs: y_val, nn.keep_prob: 1.0, nn.training: False }) val_accuracy_list.append(accuracy) print('For epoch %i: valid accuracy = %.2f%%\n' % (epoch, accuracy * 100)) val_writer.add_summary(summary, epoch) test_len = len(y_test) data_idx = 0 while data_idx < test_len - 1: X_test_batch = X_test[data_idx:np.clip(data_idx + config.batch_size, 0, test_len - 1)] y_test_batch = y_test[data_idx:np.clip(data_idx + config.batch_size, 0, test_len - 1)] data_idx += config.batch_size test_accuracy = sess.run( nn.accuracy, { nn.X_inputs: X_test_batch, nn.y_inputs: y_test_batch, nn.keep_prob: 1.0, nn.training: False }) test_accuracy_list.append(test_accuracy) save_path = saver.save(sess, model_save_path, global_step=step) print('Model saved in file: %s' % save_path) sess.close() train_writer.close() val_writer.close() print('Test accuracy = %.2f%%\n' % (np.mean(test_accuracy_list) * 100))
class Imitator: def __init__(self, min_action_set, learning_rate, alpha, min_squared_gradient, checkpoint_dir, hist_len, l2_penalty): self.minimal_action_set = min_action_set self.network = Network(len(self.minimal_action_set)) if torch.cuda.is_available(): print "Initializing Cuda Nets..." self.network.cuda() self.optimizer = optim.Adam(self.network.parameters(), lr=learning_rate, weight_decay=l2_penalty) self.checkpoint_directory = checkpoint_dir self.losses = [] def predict(self, state): # predict action probabilities outputs = self.network(Variable(utils.float_tensor(state))) vals = outputs[len(outputs) - 1].data.cpu().numpy() return vals def get_action(self, state): vals = self.predict(state) return self.minimal_action_set[np.argmax(vals)] # potentially optimizable def compute_labels(self, sample, minibatch_size): labels = Variable(utils.long_tensor(minibatch_size)) # The list of ALE actions taken for the minibatch actions_taken = [x.action for x in sample] # The indices of the ALE actions taken in the action set action_indices = [ self.minimal_action_set.index(x) for x in actions_taken ] for index in range(len(action_indices)): labels[index] = action_indices[index] return labels def get_loss(self, outputs, labels): return nn.CrossEntropyLoss()(outputs, labels) def train(self, dataset, minibatch_size): # sample a minibatch of transitions sample = dataset.sample_minibatch(minibatch_size) state = Variable( utils.float_tensor(np.stack([np.squeeze(x.state) for x in sample]))) # compute the target values for the minibatch labels = self.compute_labels(sample, minibatch_size) self.optimizer.zero_grad() ''' Forward pass the minibatch through the prediction network. ''' activations = self.network(state) ''' Extract the Q-value vectors of the minibatch from the final layer's activations. See return values of the forward() functions in cnn.py ''' output = activations[len(activations) - 1] loss = self.get_loss(output, labels) self.losses.append(loss) loss.backward() self.optimizer.step() ''' Args: This function checkpoints the network. ''' def checkpoint_network(self): print "Checkpointing Weights" utils.save_checkpoint({'state_dict': self.network.state_dict()}, self.checkpoint_directory) print "Checkpointed."