Ejemplo n.º 1
0
 def __init__(self, min_action_set, hist_len, checkpoint_policy):
     self.minimal_action_set = min_action_set
     print("hist len", hist_len)
     self.network = Network(len(self.minimal_action_set), hist_len)
     self.network.load_state_dict(
         torch.load(checkpoint_policy)['state_dict'])
     if torch.cuda.is_available():
         print("Initializing Cuda Nets...")
         self.network.cuda()
Ejemplo n.º 2
0
 def __init__(self, min_action_set, learning_rate, alpha, checkpoint_dir,
              hist_len, l2_penalty):
     self.minimal_action_set = min_action_set
     print("hist len", hist_len)
     self.network = Network(len(self.minimal_action_set), hist_len)
     if torch.cuda.is_available():
         print("Initializing Cuda Nets...")
         self.network.cuda()
     self.optimizer = optim.Adam(self.network.parameters(),
                                 lr=learning_rate,
                                 weight_decay=l2_penalty)
     self.checkpoint_directory = checkpoint_dir
Ejemplo n.º 3
0
Archivo: play.py Proyecto: yardal/rlc
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    logging.basicConfig(filename='log.txt', level=logging.DEBUG)
    logging.getLogger().addHandler(logging.StreamHandler())

    net = Network("model")
    self_play(net)
Ejemplo n.º 4
0
def main():
    ValidSampler = Sampler(utils.valid_file)
    TestSampler = Sampler(utils.test_file)
    networks = []
    weights = []
    for i in xrange(5):
        if i == 0:
            TrainSampler = Sampler(utils.train_file)
            prev_ys = np.copy(TrainSampler.labels)
        else:
            TrainSampler = Sampler(utils.train_file, prev_ys)

        network = Network()
        network.train(TrainSampler)

        cur_ys = network.predict(TrainSampler)
        b1 = np.sum(np.multiply(cur_ys, prev_ys))
        b2 = np.sum(np.multiply(cur_ys, cur_ys))
        w = float(b1) / b2
        prev_ys = np.subtract(prev_ys, w * cur_ys)

        print i, 'done with weight', w
        network.save('network_' + str(i) + '.ckpt')
        weights.append(w)
        networks.append(network)

        validate_boost(ValidSampler, networks, weights)
    validate_boost(TestSampler, networks, weights)

    np.save('weights.npy', weights)
Ejemplo n.º 5
0
Archivo: play.py Proyecto: yardal/rlc
def test(a, b):
    white = Network(a)
    black = Network(b)
    print("training")
    white.train()

    print("playing")
    board = chess.Board()
    for i in range(10):
        print(i)
        board.reset()
        while not board.is_game_over():
            move = play(board, white, True)
            if board.is_game_over():
                break
            board.push(move)
            move = play(board, black, False)
            board.push(move)
        print(board.result())
Ejemplo n.º 6
0
class Clone:
    def __init__(self, min_action_set, hist_len, checkpoint_policy):
        self.minimal_action_set = min_action_set
        print("hist len", hist_len)
        self.network = Network(len(self.minimal_action_set), hist_len)
        self.network.load_state_dict(
            torch.load(checkpoint_policy)['state_dict'])
        if torch.cuda.is_available():
            print("Initializing Cuda Nets...")
            self.network.cuda()

    def predict(self, state):
        # predict action probabilities
        outputs = self.network(Variable(utils.float_tensor(state)))
        vals = outputs[len(outputs) - 1].data.cpu().numpy()
        return vals

    def get_action(self, state):
        vals = self.predict(state)
        return np.argmax(vals)
def main():
    ValidSampler = Sampler(utils.valid_file)
    TestSampler = Sampler(utils.test_file)
    networks = []
    weights = []
    for i in xrange(5):
        if i == 0:
            TrainSampler = Sampler(utils.train_file)
            prev_ys = np.copy(TrainSampler.labels)
        else:
            TrainSampler = Sampler(utils.train_file, prev_ys)
        
        network = Network()
        network.train(TrainSampler)
        
        cur_ys = network.predict(TrainSampler)
        b1 = np.sum(np.multiply(cur_ys, prev_ys))
        b2 = np.sum(np.multiply(cur_ys, cur_ys))
        w = float(b1) / b2
        prev_ys = np.subtract(prev_ys, w * cur_ys)
        
        print i, 'done with weight', w
        network.save('network_' + str(i) + '.ckpt')
        weights.append(w)
        networks.append(network)

        validate_boost(ValidSampler, networks, weights)
    validate_boost(TestSampler, networks, weights)

    np.save('weights.npy', weights)
Ejemplo n.º 8
0
        Iterator/Generator: get a batch of data
        这个函数是一个迭代器/生成器,用于每一次只得到 chunk_size 这么多的数据
        用于 for/loop, 就像range()函数
        """
        if len(samples) != len(labels):
            raise Exception('Length of samples and labels must equal')
        stepStart = 0 # initial step
        i = 0
        while stepStart < len(samples):
            stepEnd = stepStart + chunk_size
            if stepEnd < len(samples):
                yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd]
                i += 1
            stepStart = stepEnd

    net = Network(train_batch_size=64, test_batch_size=500, pooling_scale=2)
    net.define_inputs(
        train_samples_shape=(64, image_size, image_size, num_channels),
        train_labels_shape=(64, num_labels),
        test_samples_shape=(500, image_size, image_size, num_channels)
    )
    net.add_conv(patch_size=3, in_depth=num_channels, out_depth=16, activation='relu', pooling=False, name='conv1')
    net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv2')
    net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=False, name='conv3')
    net.add_conv(patch_size=3, in_depth=16, out_depth=16, activation='relu', pooling=True, name='conv4')

    # 4 = 两次 pooling,每一次缩小为1/2
    # 16 = conv4 out_depth
    net.add_fc(in_num_nodes=(image_size // 4) * (image_size // 4) * 16, out_num_nodes=16, activation='relu', name='fc1')
    net.add_fc(in_num_nodes=16, out_num_nodes=10, activation='relu', name='fc2')
Ejemplo n.º 9
0
        用于 for loop, just like range() function
        """
        if len(samples) != len(labels):
            raise Exception('Length of samples and labels must equal')
        stepStart = 0  # initial step
        i = 0
        while stepStart < len(samples):
            stepEnd = stepStart + chunkSize
            if stepEnd < len(samples):
                yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd]
                i += 1
            stepStart = stepEnd

    net = Network(train_batch_size=64,
                  test_batch_size=500,
                  pooling_scale=2,
                  dropout_rate=0.9,
                  base_learning_rate=0.001,
                  decay_rate=0.99)
    net.define_inputs(
        train_samples_shape=(64, image_size, image_size, num_channels),
        train_labels_shape=(64, num_labels),
        test_samples_shape=(500, image_size, image_size, num_channels),
    )
    #
    net.add_conv(patch_size=3,
                 in_depth=num_channels,
                 out_depth=32,
                 activation='relu',
                 pooling=False,
                 name='conv1')
    net.add_conv(patch_size=3,
Ejemplo n.º 10
0
import os
import numpy as np
from sklearn.tree import DecisionTreeClassifier as Model
from cnn import Network

DATA_DIR = 'data'

pipeline = [
    (preprocessing.extract_rgb, True),
    # (preprocessing.enrich_mirror, False),
]

data_file = os.path.join(DATA_DIR, 'data_train.dat')
targets_file = os.path.join(DATA_DIR, 'targets_train.dat')
data, targets = preprocessing.load_data(data_file), preprocessing.load_data(targets_file)

data_train, data_validation, targets_train, targets_validation = sklearn.model_selection.train_test_split(
    data, targets, test_size=0.25, random_state=42, shuffle=True, stratify=targets
)

for func, apply_test in pipeline:
    data_train, targets_train = func(data_train, targets_train)
    if apply_test:
        data_validation, targets_validation = func(data_validation, targets_validation)


print(data_train.shape[1:])
model = Network(input_shape=data_train.shape[1:])
model.fit(data_train, targets_train, data_validation, targets_validation)
predict = model.predict(data_validation)
print(sklearn.metrics.accuracy_score(targets_validation, predict))
Ejemplo n.º 11
0
class Imitator:
    def __init__(self, min_action_set, learning_rate, alpha, checkpoint_dir,
                 hist_len, l2_penalty):
        self.minimal_action_set = min_action_set
        print("hist len", hist_len)
        self.network = Network(len(self.minimal_action_set), hist_len)
        if torch.cuda.is_available():
            print("Initializing Cuda Nets...")
            self.network.cuda()
        self.optimizer = optim.Adam(self.network.parameters(),
                                    lr=learning_rate,
                                    weight_decay=l2_penalty)
        self.checkpoint_directory = checkpoint_dir

    def predict(self, state):
        # predict action probabilities
        outputs = self.network(Variable(utils.float_tensor(state)))
        vals = outputs[len(outputs) - 1].data.cpu().numpy()
        return vals

    def get_action(self, state):
        vals = self.predict(state)
        return np.argmax(vals)

    # potentially optimizable
    def compute_labels(self, sample, minibatch_size):
        #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        labels = Variable(utils.long_tensor(minibatch_size))
        actions_taken = [x.action for x in sample]
        #print(actions_taken[0])
        for i in range(len(actions_taken)):
            #print(actions_taken[i])
            labels[i] = np.int(actions_taken[i])
        # The list of ALE actions taken for the minibatch
        #labels = torch.from_numpy(np.array([x.action for x in sample])).long().to(device)
        #for index in range(len(actions_taken)):
        #	labels[index] = torch.from_numpy(actions_taken[index])
        #print(labels[0])
        return labels

    def get_loss(self, outputs, labels):
        return nn.CrossEntropyLoss()(outputs, labels)

    def validate(self, dataset, minibatch_size):
        '''run dataset through loss to get validation error'''
        validation_data = dataset.get_dataset()
        v_loss = 0.0
        for i in range(0,
                       len(validation_data) - minibatch_size, minibatch_size):
            sample = validation_data[i:i + minibatch_size]
            with torch.no_grad():
                state = Variable(
                    utils.float_tensor(
                        np.stack([np.squeeze(x.state) for x in sample])))
                #print(state.size())
                # compute the target values for the minibatch
                labels = self.compute_labels(sample, minibatch_size)
                #print(labels.size())
                #print("labels", labels)
                self.optimizer.zero_grad()
                '''
				Forward pass the minibatch through the
				prediction network.
				'''
                activations = self.network(state)
                '''
				Extract the Q-value vectors of the minibatch
				from the final layer's activations. See return values
				of the forward() functions in cnn.py
				'''
                output = activations[len(activations) - 1]
                loss = self.get_loss(output, labels)
                v_loss += loss
        return v_loss

    def train(self, dataset, minibatch_size):
        # sample a minibatch of transitions
        sample = dataset.sample_minibatch(minibatch_size)
        state = Variable(
            utils.float_tensor(np.stack([np.squeeze(x.state)
                                         for x in sample])))

        # compute the target values for the minibatch
        labels = self.compute_labels(sample, minibatch_size)
        #print("labels", labels)
        self.optimizer.zero_grad()
        '''
		Forward pass the minibatch through the
		prediction network.
		'''
        activations = self.network(state)
        '''
		Extract the Q-value vectors of the minibatch
		from the final layer's activations. See return values
		of the forward() functions in cnn.py
		'''
        output = activations[len(activations) - 1]
        loss = self.get_loss(output, labels)
        #self.losses.append(loss)
        loss.backward()
        self.optimizer.step()
        return loss

    '''
	Args:
	This function checkpoints the network.
	'''

    def checkpoint_network(self, env_name, extra_info):
        print("Checkpointing Weights")
        utils.save_checkpoint({'state_dict': self.network.state_dict()},
                              self.checkpoint_directory, env_name, extra_info)
        print("Checkpointed.")
Ejemplo n.º 12
0
        description='Farsi digit Detection Network')

    parser.add_argument("--cfg",
                        dest='cfgfile',
                        help="Config file",
                        default="cfg/architecture.cfg",
                        type=str)

    return parser.parse_args()


args = arg_parse()

#Set up the neural network
print("Preparing network .....")
network = Network(args.cfgfile)
network.compile()

print("Loading input .....")
dataset = Dataset()
x_train, y_train, x_test, y_test = dataset.loadData(
    network.net_info.input_shape)

# # Encode the data
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print("Training network .....")
network.fit(x_train, y_train, x_test, y_test)

print("evaluation: ")
Ejemplo n.º 13
0
def main():
    current_time = datetime.now().strftime('%Y%m%d-%H%M')
    checkpoint_dir = 'checkpoints'
    if FLAGS.checkpoint is not None:
        checkpoint_path = os.path.join(checkpoint_dir,
                                       FLAGS.checkpoint.lstrip('checkpoints/'))
    else:
        checkpoint_path = os.path.join(checkpoint_dir,
                                       '{}'.format(current_time))
        try:
            os.makedirs(checkpoint_path)
        except os.error:
            print('Unable to make checkpoints direction: %s' % checkpoint_path)
    model_save_path = os.path.join(checkpoint_path, 'model.ckpt')

    nn = Network()

    saver = tf.train.Saver()
    print('Build session.')
    tfconfig = tf.ConfigProto()
    tfconfig.gpu_options.allow_growth = True
    sess = tf.Session(config=tfconfig)

    if FLAGS.checkpoint is not None:
        print('Restore from pre-trained model.')
        checkpoint = tf.train.get_checkpoint_state(checkpoint_path)
        meta_graph_path = checkpoint.model_checkpoint_path + '.meta'
        restore = tf.train.import_meta_graph(meta_graph_path)
        restore.restore(sess, tf.train.latest_checkpoint(checkpoint_path))
        step = int(meta_graph_path.split('-')[2].split('.')[0])
    else:
        print('Initialize.')
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        step = 0

    loss_list = []
    train_accuracy_list = []
    val_accuracy_list = []
    test_accuracy_list = []
    step = 0

    train_writer = tf.summary.FileWriter('logs/train@' + current_time,
                                         sess.graph)
    val_writer = tf.summary.FileWriter('logs/valid@' + current_time,
                                       sess.graph)
    summary_op = tf.summary.merge_all()

    print('Start training:')
    train_len = len(y_train)
    for epoch in range(config.num_epochs):
        permutation = np.random.permutation(train_len)
        X_train_data = X_train[permutation]
        y_train_data = y_train[permutation]
        data_idx = 0
        while data_idx < train_len - 1:
            X_train_batch = X_train_data[data_idx:np.
                                         clip(data_idx +
                                              config.batch_size, 0, train_len -
                                              1)]
            y_train_batch = y_train_data[data_idx:np.
                                         clip(data_idx +
                                              config.batch_size, 0, train_len -
                                              1)]
            data_idx += config.batch_size

            loss, _, train_accuracy, summary, lr = sess.run(
                [
                    nn.loss, nn.optimizer, nn.accuracy, summary_op,
                    nn.learning_rate
                ], {
                    nn.X_inputs: X_train_batch,
                    nn.y_inputs: y_train_batch,
                    nn.keep_prob: config.keep_prob,
                    nn.training: True
                })
            loss_list.append(loss)
            train_accuracy_list.append(train_accuracy)
            print(
                '>> At step %i: loss = %.2f, train accuracy = %.3f%%, learning rate = %.7f'
                % (step, loss, train_accuracy * 100, lr))
            train_writer.add_summary(summary, step)
            step += 1

        accuracy, summary = sess.run(
            [nn.accuracy, summary_op], {
                nn.X_inputs: X_val,
                nn.y_inputs: y_val,
                nn.keep_prob: 1.0,
                nn.training: False
            })
        val_accuracy_list.append(accuracy)
        print('For epoch %i: valid accuracy = %.2f%%\n' %
              (epoch, accuracy * 100))
        val_writer.add_summary(summary, epoch)

    test_len = len(y_test)
    data_idx = 0
    while data_idx < test_len - 1:
        X_test_batch = X_test[data_idx:np.clip(data_idx +
                                               config.batch_size, 0, test_len -
                                               1)]
        y_test_batch = y_test[data_idx:np.clip(data_idx +
                                               config.batch_size, 0, test_len -
                                               1)]
        data_idx += config.batch_size

        test_accuracy = sess.run(
            nn.accuracy, {
                nn.X_inputs: X_test_batch,
                nn.y_inputs: y_test_batch,
                nn.keep_prob: 1.0,
                nn.training: False
            })
        test_accuracy_list.append(test_accuracy)

    save_path = saver.save(sess, model_save_path, global_step=step)
    print('Model saved in file: %s' % save_path)
    sess.close()
    train_writer.close()
    val_writer.close()
    print('Test accuracy = %.2f%%\n' % (np.mean(test_accuracy_list) * 100))
Ejemplo n.º 14
0
class Imitator:
    def __init__(self, min_action_set, learning_rate, alpha,
                 min_squared_gradient, checkpoint_dir, hist_len, l2_penalty):
        self.minimal_action_set = min_action_set
        self.network = Network(len(self.minimal_action_set))
        if torch.cuda.is_available():
            print "Initializing Cuda Nets..."
            self.network.cuda()
        self.optimizer = optim.Adam(self.network.parameters(),
                                    lr=learning_rate,
                                    weight_decay=l2_penalty)
        self.checkpoint_directory = checkpoint_dir
        self.losses = []

    def predict(self, state):
        # predict action probabilities
        outputs = self.network(Variable(utils.float_tensor(state)))
        vals = outputs[len(outputs) - 1].data.cpu().numpy()
        return vals

    def get_action(self, state):
        vals = self.predict(state)
        return self.minimal_action_set[np.argmax(vals)]

    # potentially optimizable
    def compute_labels(self, sample, minibatch_size):
        labels = Variable(utils.long_tensor(minibatch_size))
        # The list of ALE actions taken for the minibatch
        actions_taken = [x.action for x in sample]
        # The indices of the ALE actions taken in the action set
        action_indices = [
            self.minimal_action_set.index(x) for x in actions_taken
        ]
        for index in range(len(action_indices)):
            labels[index] = action_indices[index]
        return labels

    def get_loss(self, outputs, labels):
        return nn.CrossEntropyLoss()(outputs, labels)

    def train(self, dataset, minibatch_size):
        # sample a minibatch of transitions
        sample = dataset.sample_minibatch(minibatch_size)
        state = Variable(
            utils.float_tensor(np.stack([np.squeeze(x.state)
                                         for x in sample])))

        # compute the target values for the minibatch
        labels = self.compute_labels(sample, minibatch_size)

        self.optimizer.zero_grad()
        '''
		Forward pass the minibatch through the 
		prediction network.
		'''
        activations = self.network(state)
        '''
		Extract the Q-value vectors of the minibatch
		from the final layer's activations. See return values
		of the forward() functions in cnn.py
		'''
        output = activations[len(activations) - 1]
        loss = self.get_loss(output, labels)
        self.losses.append(loss)
        loss.backward()
        self.optimizer.step()

    '''
	Args:
	This function checkpoints the network.
	'''

    def checkpoint_network(self):
        print "Checkpointing Weights"
        utils.save_checkpoint({'state_dict': self.network.state_dict()},
                              self.checkpoint_directory)
        print "Checkpointed."