Ejemplo n.º 1
0
def train(epoch):
    h_phi.train()
    to_average = []
    # train
    for query, candidates in zip(batched_query_train, batched_neighbor_train):
        optimizer.zero_grad()
        cand_x, cand_y = candidates
        query_x, query_y = query

        cand_x = cand_x.to(device=gpu)
        cand_y = cand_y.to(device=gpu)
        query_x = query_x.to(device=gpu)
        query_y = query_y.to(device=gpu)

        neighbor_e = h_phi(cand_x).reshape(NUM_TRAIN_NEIGHBORS, EMBEDDING_SIZE)
        query_e = h_phi(query_x).reshape(NUM_TRAIN_QUERIES, EMBEDDING_SIZE)

        neighbor_y_oh = one_hot(cand_y).reshape(NUM_TRAIN_NEIGHBORS, 10)
        query_y_oh = one_hot(query_y).reshape(NUM_TRAIN_QUERIES, 10)

        losses = dknn_loss(query_e, neighbor_e, query_y_oh, neighbor_y_oh)
        loss = losses.mean()
        loss.backward()
        optimizer.step()
        to_average.append((-loss).item() / k)

    print('Avg. train correctness of top k:',
          sum(to_average) / len(to_average))
    print('Avg. train correctness of top k:',
          sum(to_average) / len(to_average),
          file=logfile)
    logfile.flush()
Ejemplo n.º 2
0
def continuous_kappa(y, t, y_pow=1, eps=1e-15):
    if y.ndim == 1:
        y = one_hot(y, m=5)

    if t.ndim == 1:
        t = one_hot(t, m=5)

    # Weights.
    num_scored_items, num_ratings = y.shape
    ratings_mat = np.tile(np.arange(0, num_ratings)[:, None],
                          reps=(1, num_ratings))
    ratings_squared = (ratings_mat - ratings_mat.T)**2
    weights = ratings_squared / float(num_ratings - 1)**2

    if y_pow != 1:
        y_ = y**y_pow
        y_norm = y_ / (eps + y_.sum(axis=1)[:, None])
        y = y_norm

    hist_rater_a = np.sum(y, axis=0)
    hist_rater_b = np.sum(t, axis=0)

    conf_mat = np.dot(y.T, t)

    nom = weights * conf_mat
    denom = (weights * np.dot(hist_rater_a[:, None], hist_rater_b[None, :]) /
             num_scored_items)

    return 1 - nom.sum() / denom.sum(), conf_mat, \
        hist_rater_a, hist_rater_b, nom, denom
Ejemplo n.º 3
0
def continuous_kappa(y, t, y_pow=1, eps=1e-15):
    if y.ndim == 1:
        y = one_hot(y, m=5)

    if t.ndim == 1:
        t = one_hot(t, m=5)

    # Weights.
    num_scored_items, num_ratings = y.shape
    ratings_mat = np.tile(np.arange(0, num_ratings)[:, None],
                          reps=(1, num_ratings))
    ratings_squared = (ratings_mat - ratings_mat.T) ** 2
    weights = ratings_squared / float(num_ratings - 1) ** 2

    if y_pow != 1:
        y_ = y ** y_pow
        y_norm = y_ / (eps + y_.sum(axis=1)[:, None])
        y = y_norm

    hist_rater_a = np.sum(y, axis=0)
    hist_rater_b = np.sum(t, axis=0)

    conf_mat = np.dot(y.T, t)

    nom = weights * conf_mat
    denom = (weights * np.dot(hist_rater_a[:, None],
                              hist_rater_b[None, :]) /
             num_scored_items)

    return 1 - nom.sum() / denom.sum(), conf_mat, \
        hist_rater_a, hist_rater_b, nom, denom
Ejemplo n.º 4
0
    def __generateData(self, sample_size):

        self.data = {}

        symbolsIn = []
        symbolsOut = []

        mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
        trainSamples = [[], [], [], [], [], [], [], [], [], []]
        testSamples = [[], [], [], [], [], [], [], [], [], []]

        symbols = [
            np.load("MNIST_data/0.npy"),
            np.load("MNIST_data/1.npy"),
            np.load("MNIST_data/2.npy"),
            np.load("MNIST_data/3.npy"),
            np.load("MNIST_data/4.npy"),
            np.load("MNIST_data/5.npy"),
            np.load("MNIST_data/6.npy"),
            np.load("MNIST_data/7.npy"),
            np.load("MNIST_data/8.npy"),
            np.load("MNIST_data/9.npy")
        ]

        for i in range(len(mnist.train.images)):
            trainSamples[argmax(mnist.train.labels[i])].append(
                mnist.train.images[i])
        for i in range(len(mnist.test.images)):
            testSamples[argmax(mnist.test.labels[i])].append(
                mnist.test.images[i])

        for a in range(0, 10):
            for b in range(0, 10):

                self.data[str(a) + "+" + str(b)] = []

                leftSymbol = symbols[a]
                rightSymbol = symbols[b]

                target = one_hot((a + b) // 10) + one_hot((a + b) % 10)

                symbolsIn.append([leftSymbol, rightSymbol])
                symbolsOut.append(target)

                for i in range(sample_size):
                    left = trainSamples[a][randint(0,
                                                   len(trainSamples[a]) - 1)]
                    right = trainSamples[b][randint(0,
                                                    len(trainSamples[b]) - 1)]

                    self.data[str(a) + "+" + str(b)].append({
                        "left-noisy": left,
                        "left-symbol": leftSymbol,
                        "right-noisy": right,
                        "right-symbol": rightSymbol,
                        "target": target
                    })

        self.symbolsIn = np.array(symbolsIn)
        self.symbolsOut = np.array(symbolsOut)
Ejemplo n.º 5
0
    def forward(self, center_word, context_words):
        '''
        Forward pass
        @param center_word
        @param context_words: list of words
        '''
        center_word_1hot = one_hot(center_word)
        center_word_embedding = self.fc1(center_word_1hot)

        context_representation = torch.zeros(self.embedding_dimension *
                                             2).cuda()

        for i, context_word in enumerate(context_words):
            context_word_embedding = self.fc1(one_hot(context_word))

            concatenated = torch.cat(
                [context_word_embedding, center_word_embedding], dim=0)
            context_representation += F.relu(self.fc2(concatenated))

        mu = self.fc3(context_representation)
        sigma = F.softplus(self.fc4(context_representation))

        # Kingma-Welling reparameterization trick
        epsilon_noise = self.epsilon.sample().cuda()
        reparameterized_sample = mu + (epsilon_noise * sigma)
        categorical_distribution = F.softmax(self.re1(reparameterized_sample),
                                             dim=0)

        p_mean = self.p_mean(center_word_1hot)
        p_sigma = F.softplus(self.p_sigma(center_word_1hot))

        return categorical_distribution, mu, sigma, p_mean, p_sigma
 def sufficient_statistics(self, x):
     if len(x.shape) == 2:
         stats = one_hot(x.long(), self.K)
     elif len(x.shape) == 3:
         stats = one_hot(x.long(), self.K).reshape(-1, self.num_dims * self.K)
     else:
         raise AssertionError("Input must be 2 or 3 dimensional tensor.")
     return stats
Ejemplo n.º 7
0
def train():
    tr_X, tr_y = load_train(size='_t')
    tr_X = norm4d_per_sample(tr_X)
    tr_y = one_hot(tr_y, 2)
    te_X, te_y = load_test(size='_t')
    te_y = one_hot(te_y, 2)
    te_X = norm4d_per_sample(te_X)
    model = DeepCNN('vgg')
    model.train(tr_X, tr_y, te_X, te_y)
Ejemplo n.º 8
0
def train():
    tr_X, tr_y = load_train(size='_t')
    tr_X = norm4d_per_sample(tr_X)
    tr_y = one_hot(tr_y, 2)
    te_X, te_y = load_test(size='_t')
    te_y = one_hot(te_y, 2)
    te_X = norm4d_per_sample(te_X)
    model = RL(istrained=False, name='rl_noun')
    model.train(tr_X, tr_y, te_X, te_y)
Ejemplo n.º 9
0
def train(G, D, data_iter, n_epochs, lr):
    opt_g = optim.Adam(G.parameters(), lr, betas=[.5, .999])
    opt_d = optim.Adam(D.parameters(), lr, betas=[.5, .999])

    G.train()
    D.train()

    for epoch in range(n_epochs):
        for i, (X, L) in enumerate(data_iter):
            x_real = X.view(-1, dim_im).to(DEVICE)
            l = one_hot(L, 10).to(DEVICE)
            z = torch.randn(l.size(0), dim_z, device=DEVICE)

            fake_x = G(torch.cat([z, l], 1))

            fake_score = D(torch.cat([fake_x.detach(), l], 1))
            real_score = D(torch.cat([x_real, l], 1))

            loss_d = -torch.mean(
                torch.log(real_score + 1e-10) +
                torch.log(1 - fake_score + 1e-10))
            D.zero_grad()
            loss_d.backward()
            opt_d.step()

            fake_score = D(torch.cat([fake_x, l], 1))
            real_score = D(torch.cat([x_real, l], 1))

            loss_g = torch.mean(torch.log(1 - fake_score + 1e-10))
            # loss_g = -torch.mean(torch.log(fake_score + 1e-10))
            G.zero_grad()
            loss_g.backward()
            opt_g.step()

            if (i + 1) % print_every == 0:
                print('Epoch %d Batch %d ' % (epoch + 1, i + 1) +
                      'Loss D: %0.3f ' % loss_d.item() +
                      'Loss G: %0.3f ' % loss_g.item() +
                      'fake_score: %0.3f ' % torch.mean(fake_score).item() +
                      'real_score: %0.3f ' % torch.mean(real_score).item())

                _imags = fake_x.view(batch_size, 1, 28, 28).data
                tv.utils.save_image(
                    _imags, save_dir + '{}_{}.png'.format(epoch + 1, i + 1))
        if (epoch + 1) % save_epoch_freq == 0:
            torch.save(G.state_dict(), save_dir + 'net_g.pt')
            torch.save(G.state_dict(), save_dir + 'net_d.pt')
        # test
        with torch.no_grad():
            G.eval()
            z = torch.randn(1, dim_z, device=DEVICE).repeat(10, 1)
            c = one_hot(torch.range(0, 9, dtype=torch.long), 10).to(DEVICE)
            fake_x = G(torch.cat([z, c], 1))
            _imags = fake_x.view(-1, 1, 28, 28)
            tv.utils.save_image(_imags, save_dir + '{}.png'.format(epoch + 1))
 def data_generator(self):
     data_path = '../processed_dataset/frame_step_1_seperate_no_empty'
     c = data_reader(data_path, self.time_window)
     self.x_train, self.x_test, label_train, label_test, self.gestures = c.run()
     self.y_train = utils.one_hot(label_train, len(self.gestures))
     self.y_test = utils.one_hot(label_test, len(self.gestures))
     self.n_classes = len(self.gestures)
     print('x_train:', np.shape(self.x_train))
     print('x_test:', np.shape(self.x_test))
     print('y_train:', np.shape(self.y_train))
     print('y_test:', np.shape(self.y_test))
Ejemplo n.º 11
0
def sample():
    xs = {0: one_hot(char_to_i[np.random.choice(chars)], vocab_size)}
    h = {-1: np.zeros((1, hidden_size))}
    print('\n\n')
    for t in range(100):
        h[t] = np.tanh(xs[t].dot(wxh) + h[t - 1].dot(whh) + bh)
        y = h[t].dot(why) + by
        softmax = np.exp(y) / np.exp(y).sum()
        next_i = np.random.choice(vocab_size, p=softmax[0])
        xs[t + 1] = one_hot(next_i, vocab_size)
        print(i_to_char[next_i], end='')
    print('\n\n')
Ejemplo n.º 12
0
 def get_batch(self, batch_size, shuffle=True):
     indices = list(range(self.num_samples))
     if shuffle:
         np.random.shuffle(indices)
     have_train_num = 0
     for i, begin in enumerate(
             range(0, self.num_samples - batch_size, batch_size)):
         yield self.X[indices[begin:begin + batch_size], :], one_hot(
             self.y[indices[begin:begin + batch_size]], self.class_num)
         have_train_num = (i + 1) * batch_size
     if have_train_num < self.num_samples:
         yield self.X[indices[have_train_num:], :], one_hot(
             self.y[indices[have_train_num:]], self.class_num)
Ejemplo n.º 13
0
def state2tensor(state):
    player, leader, trick, hand, played_cards = state

    player = one_hot(player, 4)
    leader = one_hot(leader, 4)
    trick = hand2tensor(trick)
    hand = hand2tensor(hand)
    pc = torch.zeros(32)
    for c in played_cards:
        pc[card2ix[c]] = 1.

    state = torch.cat((player, leader, trick, hand, pc)).unsqueeze(0)
    return state
Ejemplo n.º 14
0
def interpolate(G, z, shifts_r, shifts_count, dim, deformator=None, with_central_border=False):
    shifted_images = []
    for shift in np.arange(-shifts_r, shifts_r + 1e-9, shifts_r / shifts_count):
        if deformator is not None:
            z_deformed = z + deformator(one_hot(z.shape[1:], shift, dim).cuda())
        else:
            z_deformed = z + one_hot(z.shape[1:], shift, dim).cuda()
        shifted_image = G(z_deformed).cpu()[0]
        if shift == 0.0 and with_central_border:
            shifted_image = add_border(shifted_image)

        shifted_images.append(shifted_image)
    return shifted_images
Ejemplo n.º 15
0
def standardize_targets(Y, cost):
    Y = np.asarray(Y)
    ndim = len(Y.shape)
    if ndim == 1:
        Y = Y.reshape(-1, 1)
    if Y.shape[1] == 1 and cost.__name__ == 'CategoricalCrossEntropy':
        Y = one_hot(Y, negative_class=0.)
    if Y.shape[1] == 1 and 'Hinge' in cost.__name__:
        if len(np.unique(Y)) > 2:
            Y = one_hot(Y, negative_class=-1.)
        else:
            Y[Y==0] -= 1
    return Y
Ejemplo n.º 16
0
def standardize_targets(Y, cost):
    Y = np.asarray(Y)
    ndim = len(Y.shape)
    if ndim == 1:
        Y = Y.reshape(-1, 1)
    if Y.shape[1] == 1 and cost.__name__ == 'CategoricalCrossEntropy':
        Y = one_hot(Y, negative_class=0.)
    if Y.shape[1] == 1 and 'Hinge' in cost.__name__:
        if len(np.unique(Y)) > 2:
            Y = one_hot(Y, negative_class=-1.)
        else:
            Y[Y == 0] -= 1
    return Y
Ejemplo n.º 17
0
def gen_images(paths, labels=None, shuffle=False, repeat=False, name="NOT NAMED!!!(check data.gen_images)"):
    paths_shuffled = np.array(paths)
    print "Data loader INITIALIZED: %s" % name
    print "  shuffle: %r" % shuffle
    print "  repeat: %r" % repeat
    print "  no. paths: %d" % len(paths)
    
    if labels is not None:
        labels = utils.one_hot(labels, m=num_classes).astype('float32')
        labels_shuffled = np.array(labels)
    
    while True:
        if shuffle:
            state = np.random.get_state()
            np.random.shuffle(paths_shuffled)
            if labels is not None:
                np.random.set_state(state)
                np.random.shuffle(labels_shuffled)
        for k in xrange(len(paths_shuffled)):
            path = paths_shuffled[k]
            img = skimage.io.imread(path, as_grey=True)
            if labels is not None:
                yield img, labels_shuffled[k]
            else:
                yield img
        if not repeat:
            break
Ejemplo n.º 18
0
def predict(model, test_iter, cuda=CUDA_DEFAULT):
    # Monitoring loss
    total_loss = 0
    count = 0
    criterion = get_criterion(model.model_str)

    for batch in test_iter:
        # Get data
        img, label = batch
        label = one_hot(label)
        img = img.view(img.size(0), -1)
        img, label = variable(img, cuda=cuda), variable(label, to_float=False, cuda=cuda)
        batch_size = img.size(0)

        if cuda:
            img = img.cuda()
            label = label.cuda()

        # predict
        kwargs = _get_kwargs(model.model_str, img, label)
        output = model.forward(**kwargs)
        output = (output[0].view(batch_size, -1), output[1], output[2])
        loss = criterion(img, output)

        # monitoring
        count += batch_size
        total_loss += t.sum(loss.data)  # cut graph with .data

    # monitoring
    avg_loss = total_loss / count
    print("Validation loss is %.4f" % avg_loss)
    return avg_loss
Ejemplo n.º 19
0
    def gen_samples(self, z=None, classes=None, batch_size=None, device='cpu'):
        assert (z is None) ^ (batch_size is
                              None), 'one of: z, batch_size should be provided'

        if z is None:
            z = self.make_noise(batch_size, device)
        if classes is None:
            classes = self.G.mixed_classes(z.shape[0]).to(device)

        shift = self.deformator(
            one_hot(self.G.dim_z, self.p.latent_shift_r,
                    self.background_dim).to(device))

        img = self.G(z, classes)
        img_shifted_pos = self.G(z + shift, classes)

        if self.p.synthezing == MaskSynthesizing.DIFF:
            img_shifted_neg = self.G(z - shift, classes)
            diff = get_diff(0.5 * img_shifted_neg + 0.5,
                            0.5 * img_shifted_pos + 0.5)
            diff = normalize_per_sample(diff)
            mask = diff_to_mask(diff, self.p.mask_thr)

        elif self.p.synthezing == MaskSynthesizing.INTENSITY:
            intensity = 0.5 * torch.mean(img_shifted_pos, dim=1) + 0.5
            mask = (intensity < self.p.mask_thr).to(torch.long)

        return img, img_shifted_pos, mask
Ejemplo n.º 20
0
    def initialize(self, x_shared, y_shared, m_shared, start=True):
        self.x_shared, self.y_shared, self.m_shared = x_shared, y_shared, m_shared

        labels = np.load(self.labels_path)
        labels[:] = map(lambda y: utils.one_hot(y,self.n_classes), labels)
        self.labels_shm = shm.memmap(data=labels)

        self.x_shm, self.y_shm = [], []
        for _ in range(3):
            self.x_shm.append(shm.memmap(shape=self.x_shape, dtype=floatX))
            self.y_shm.append(shm.memmap(shape=self.y_shape, dtype=floatX))

        self.is_terminated = mp.Value(ctypes.c_bool, False)
        self.write_locks = [mp.Lock(), mp.Lock()]
        self.read_locks = [mp.Lock(), mp.Lock()]
        for lock in self.write_locks: lock.acquire()
        for lock in self.read_locks: lock.acquire()
        self.validating = mp.Lock()

        self.loader_path = "/dev/shm/loader/"
        if not os.path.exists(self.loader_path): os.mkdir(self.loader_path)
        self.loader_path += "%f"%time()

        self.mem_locks = []
        for i in range(self.n_mem_slots): 
            self.mem_locks.append(mp.Lock())
            video_number = split.train_idxs[npr.randint(len(split.train_idxs))]
            path = self.data_path+str(video_number).zfill(4)+".npy"
            shutil.copyfile(path, self.loader_path+"%i_%i"%(i, video_number))

        if start: self.start()

        signal.signal(signal.SIGINT, self.terminate)
        signal.signal(signal.SIGTERM, self.terminate)
Ejemplo n.º 21
0
    def sample(self, t=1.):
        gumbel = -torch.log(-torch.log(
            torch.Tensor(self.logit_probs.size()).uniform_(
                1e-5, 1. - 1e-5).cuda()))  # B, M, H, W
        sel = one_hot(torch.argmax(self.logit_probs / t + gumbel, 1),
                      self.num_mix,
                      dim=1)  # B, M, H, W
        sel = sel.unsqueeze(1)  # B, 1, M, H, W

        # select logistic parameters
        means = torch.sum(self.means * sel, dim=2)  # B, 3, H, W
        log_scales = torch.sum(self.log_scales * sel, dim=2)  # B, 3, H, W
        coeffs = torch.sum(self.coeffs * sel, dim=2)  # B, 3, H, W

        # cells from logistic & clip to interval
        # we don't actually round to the nearest 8bit value when sampling
        u = torch.Tensor(means.size()).uniform_(1e-5,
                                                1. - 1e-5).cuda()  # B, 3, H, W
        x = means + torch.exp(log_scales) / t * (
            torch.log(u) - torch.log(1. - u))  # B, 3, H, W

        x0 = torch.clamp(x[:, 0, :, :], -1, 1.)  # B, H, W
        x1 = torch.clamp(x[:, 1, :, :] + coeffs[:, 0, :, :] * x0, -1,
                         1)  # B, H, W
        x2 = torch.clamp(x[:, 2, :, :] + coeffs[:, 1, :, :] * x0 +
                         coeffs[:, 2, :, :] * x1, -1, 1)  # B, H, W

        x0 = x0.unsqueeze(1)
        x1 = x1.unsqueeze(1)
        x2 = x2.unsqueeze(1)

        x = torch.cat([x0, x1, x2], 1)
        x = x / 2. + 0.5
        return x
Ejemplo n.º 22
0
    def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')
                                   ]).astype(np.float32)
        moments = np.load("data/image_moment_stats_v1_train.pkl")

        centroid_distance = np.abs(moments["centroids"][:, [1, 0]] -
                                   image_shapes / 2)
        info = np.concatenate(
            (centroid_distance, image_shapes, moments["angles"][:, None],
             moments["minor_axes"][:, None], moments["major_axes"][:, None]),
            1).astype(np.float32)

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 23
0
    def __getitem__(self, index: int) -> List[Any]:
        filename: str = self.filenames[index]
        path_name: Path = Path(filename)
        images: List[D]
        if path_name.suffix == ".png":
            images = [Image.open(files[index]).convert('L') for files in self.files]
        elif path_name.suffix == ".npy":
            images = [np.load(files[index]) for files in self.files]
        else:
            raise ValueError(filename)

        # Final transforms and assertions
        t_tensors: List[Tensor] = [tr(e) for (tr, e) in zip(self.transforms, images)]

        assert 0 <= t_tensors[0].min() and t_tensors[0].max() <= 1  # main image is between 0 and 1
        b, w, h = t_tensors[0].shape
        assert b == 1
        for ttensor in t_tensors[1:]:  # All masks (ground truths) are class encoded
            assert one_hot(ttensor, axis=0)
            assert ttensor.shape == (self.C, w, h)

        bounds = [f(*t_tensors, filename) for f in self.bounds_generators]

        # return t_tensors + [filename] + bounds
        return [filename] + t_tensors + bounds
Ejemplo n.º 24
0
def run_evaluation(agents, game, boards, name=''):
    winners = 0.
    for t in range(args.trials):
        states, any_valid = game.reset(boards)
        print(f'\r[{t + 1:2d}/{args.trials:2d}] {name} boards eval...')
        step = 0
        while any_valid and not game.finished():
            print(f'\r[{step:4d}]', end='')
            step += 1
            actions = torch.stack([a.act(s) for a, s in zip(agents, states)], dim=0)
            states, rewards = game.step(actions)
            for a, r in zip(agents, rewards):
                a.observe(r)
        print()

        for a in agents:
            a.reset()

        winners += one_hot(game.winners() + 1, num_classes=game.num_players + 1).float()
    winners /= args.trials

    print(winners.float().mean(0))
    print(winners.float().std(0))

    plt.subplot(1, 2, 1)
    plt.hist(winners[:, 1].float().cpu().numpy(), bins=args.trials + 1, range=(0, 1), label='player 1')
    plt.legend()
    plt.xlim(0., 1.)
    plt.subplot(1, 2, 2)
    plt.hist(winners[:, 2].float().cpu().numpy(), bins=args.trials + 1, range=(0, 1), label='player 2')
    plt.xlim(0., 1.)
    plt.legend()
    plt.savefig(f'{name.lower()}.png')
    plt.close()
Ejemplo n.º 25
0
    def create_decoder_input(self, final_caps, labels=None):
        """ Construct decoder input based on class probs and final capsules.

        Flattens capsules to [batch_size, num_final_caps * dim_final_caps] and sets all values which do not come from
        the correct class/capsule to zero (masks). During training the labels are used to masks, during inference the
        max of the class probabilities.

        Args:
            final_caps (FloatTensor): Final capsules of shape: batch_size, num final caps, length final caps.
            labels (LongTensor, optional): Corresponding labels of shape: batch_size. Used to mask the decoder input if
                given, else the largest logit computed from the final_caps is used.

        Returns:
            FloatTensor: Flattend and masked version of the final capsules.

        """

        # get targets form the final_caps if not given
        if labels is None:
            targets = self.compute_predictions(self.compute_logits(final_caps))
        else:
            targets = labels

        # create one hot masks
        masks = one_hot(targets, final_caps.shape[1])

        # mask the capsules
        masked_caps = final_caps * masks[:, :, None]

        # flatten the masked, final capsules
        decoder_input = masked_caps.view(final_caps.shape[0], -1)

        return decoder_input
Ejemplo n.º 26
0
def log_losses(y, t, eps=1e-15):
    if t.ndim == 1:
        t = one_hot(t)

    y = np.clip(y, eps, 1 - eps)
    losses = -np.sum(t * np.log(y), axis=1)
    return losses
Ejemplo n.º 27
0
        def edge_tensors(self, edge_types, device, type_onehot=True):
            num_edges = len(self.edges)

            # get directed edge indices in both directions as tensor
            edge_indices = torch.tensor(
                [[e['part_a'], e['part_b']]
                 for e in self.edges] + [[e['part_b'], e['part_a']]
                                         for e in self.edges],
                device=device,
                dtype=torch.long).view(1, num_edges * 2, 2)

            # get edge type as tensor
            edge_type = torch.tensor(
                [edge_types.index(edge['type']) for edge in self.edges],
                device=device,
                dtype=torch.long)
            if type_onehot:
                edge_type = one_hot(
                    inp=edge_type, label_count=len(edge_types)).transpose(
                        0, 1).view(1, num_edges,
                                   len(edge_types)).to(dtype=torch.float32)
            else:
                edge_type = edge_type.view(1, num_edges)
            edge_type = torch.cat(
                [edge_type, edge_type],
                dim=1)  # add edges in other direction (symmetric adjacency)

            return edge_type, edge_indices
Ejemplo n.º 28
0
  def _get_minibatch_feed_dict(self, target_q_values, 
                               non_terminal_minibatch, terminal_minibatch):
    """
    Helper to construct the feed_dict for train_op. Takes the non-terminal and 
    terminal minibatches as well as the max q-values computed from the target
    network for non-terminal states. Computes the expected q-values based on
    discounted future reward.

    @return: feed_dict to be used for train_op
    """
    assert len(target_q_values) == len(non_terminal_minibatch)

    states = []
    expected_q = []
    actions = []

    # Compute expected q-values to plug into the loss function
    minibatch = itertools.chain(non_terminal_minibatch, terminal_minibatch)
    for item, target_q in zip_longest(minibatch, target_q_values, fillvalue=0):
      state, action, reward, _, _ = item
      states.append(state)
      # target_q will be 0 for terminal states due to fillvalue in zip_longest
      expected_q.append(reward + self.config.reward_discount * target_q)
      actions.append(utils.one_hot(action, self.env.action_space.n))

    return {
      self.network.x_placeholder: states, 
      self.network.q_placeholder: expected_q,
      self.network.action_placeholder: actions,
    }
    def generate_gradients(self, input_image, target_class):
        # Put model in evaluation mode
        self.model.eval()

        x = input_image.clone()

        x.requires_grad = True

        with torch.enable_grad():
            # Forward
            model_output = self.model(x)
            # Zero grads
            self.model.zero_grad()

            grad_outputs = one_hot(target_class, model_output.shape[1])
            grad_outputs = tensor2cuda(grad_outputs)

            grad = torch.autograd.grad(model_output,
                                       x,
                                       grad_outputs=grad_outputs,
                                       only_inputs=True)[0]

            self.model.train()

        return grad
Ejemplo n.º 30
0
    def __init__(self, filename, neurons=[2, 3, 1], activations=None, learning_rate=0.1, epoch=1000,
                 mini_batch_size=256):
        data = pd.read_csv(filename).to_numpy()

        self.X = data[:, :len(data[0]) - 1]
        self.Y = data[:, len(data[0]) - 1]
        self.Y = self.Y.reshape((self.Y.shape[0], 1))
        self.number_of_examples, self.number_of_features = self.X.shape
        self.number_of_classes, self.Y = np.unique(self.Y, return_inverse=True)
        self.multi_class = False

        if len(self.number_of_classes) > 2:
            self.multi_class = True
            neurons[-1] = len(self.number_of_classes)
            self.Y = utils.one_hot(self.Y)

        self.layers = len(neurons)
        self.neurons = neurons
        self.neurons.insert(0, self.number_of_features)
        self.activations = activations
        self.learning_rate = learning_rate
        self.epoch = epoch

        self.mini_batch_size = mini_batch_size
        if self.number_of_examples < 2000:
            self.mini_batch_size = self.number_of_examples

        self.predicted_weights = []
        self.predicted_b_values = []
Ejemplo n.º 31
0
def log_losses(y, t, eps=1e-15):
    if t.ndim == 1:
        t = one_hot(t)

    y = np.clip(y, eps, 1 - eps)
    losses = -np.sum(t * np.log(y), axis=1)
    return losses
Ejemplo n.º 32
0
    def predict(self, case_ids, catchphrase_repr_norm, case_sentences,
                 case_catchphrases):
        """
        case_ids: list of case_id
        """
        LOGGER.info("start to predict {}case".format(len(case_ids)))
        case_sents = [case_sentences[case_id] for case_id in case_ids]
        y_preds=[]
        LOGGER.info("retrieve relevant documents for queries in test set")
        for i,sent in enumerate(tqdm(case_sents)):
            y_pred = self.match_catch_repr(sent, catchphrase_repr_norm).detach().numpy()
            # LOGGER.info("The prediction for sentence:"+sent)
            # LOGGER.info(y_pred)
            y_preds.append(y_pred)

        y_pred=np.vstack(y_preds)
        NUM_CATCHPHRASES = len(catchphrase_repr_norm)

        LOGGER.info("The prediction for The first 3 sentence:")
        LOGGER.info(y_pred[:3])


        y_true_s = [case_catchphrases[case_id] for case_id in case_ids]
        y_true = np.array([one_hot(y_true_, NUM_CATCHPHRASES) for y_true_ in y_true_s])  # one hot form
        return y_true, y_pred
Ejemplo n.º 33
0
    def learn(self):
        num_steps = len(self.rewards)
        # discount reward over whole episode
        r = 0.
        rewards = torch.zeros((num_steps, self.states[0].shape[0]), device=device)
        for n in reversed(range(num_steps)):
            rewards[n, :] = r = self.rewards[n] + self.discount * r

        rewards = rewards.view(-1)
        actions = one_hot(torch.cat(self.actions), num_classes=self.num_actions)
        policy = torch.cat(self.policies).view(-1, self.num_actions)
        value = torch.cat(self.values).view(-1)

        advantage = rewards - value
        # MSE on rewards and values
        loss = 0.5 * torch.mean(torch.pow(advantage, 2.))
        # CE on policy and actions
        loss -= torch.mean(advantage.detach() * torch.log(torch.sum(actions.float() * policy, dim=1) + 1e-8))
        # entropy pentalty
        loss += self.beta * torch.mean(torch.sum(policy * torch.log(policy + 1e-8), dim=-1))
        loss.backward()

        self.optimizer.step()
        self.optimizer.zero_grad()
        self.reset()

        return loss.item(), torch.mean(value).item()
Ejemplo n.º 34
0
    def multinomial_train(self,
                          X,
                          y,
                          C,
                          w0=None,
                          b0=None,
                          eta=0.5,
                          max_iterations=1000):
        """ Inputs:
            - X: training features, a N-by-D numpy array, where N is the
            number of training points and D is the dimensionality of features
            - y: multiclass training labels, a N dimensional numpy array where
            N is the number of training points, indicating the labels of
            training data
            - C: number of classes in the data
            - eta: learning rate
            - max_iterations: maximum number for iterations to perform

            Returns:
            - w: C-by-D weight matrix of multinomial logistic regression, where
            C is the number of classes and D is the dimensionality of features.
            - b: bias vector of length C, where C is the number of classes
        """

        N, D = X.shape

        w = np.zeros((C, D))
        if w0 is not None:
            w = w0
        assert w.shape == (
            C, D), f"check your w0, its dimension should be: {(C, D)}"

        b = np.zeros(C)
        if b0 is not None:
            b = b0

        W = np.hstack(
            (b.reshape(-1, 1),
             w)).T  # shape (D+1, C), you could ignore this, I use this
        # shape(N, D + 1),  to visulize vectorization when implement it
        X = np.insert(X, 0, 1, axis=1)
        Y = one_hot(y, nb_class=C)
        P = softmax(X @ W) - Y

        tol = 1e-5
        for it in range(max_iterations):
            idx = minibatch(X)
            W_prev = W
            W = W - eta / N * X[idx, :].T @ P[idx, :]
            P[idx, :] = softmax(X[idx, :] @ W) - Y[idx, :]
            if np.max(np.abs(W_prev - W)) < tol:
                print(f"Converged in {it} iters.")
                break

        w = W.T[:, 1:]
        b = W.T[:, 0]
        assert w.shape == (C, D)
        assert b.shape == (C, )
        self.w = w
        self.b = b
Ejemplo n.º 35
0
def compute_roc_data(dataloader, 
                     model,
                     device):
    female_y = []
    male_y   = []
    ys = (female_y,male_y)

    female_p = []
    male_p   = []
    ps = (female_p,male_p)
    
    model.eval()
    for X,y,gender in dataloader:
        pred = model(X.to(device))
        for i in range(2):
            filt = (gender[:,i] == 1)
            if y[filt].shape[0] > 0:
                ys[i].append(utils.one_hot(y[filt]).cpu().numpy())
                ps[i].append(pred[filt].data.cpu().numpy())
    female_ys = np.concatenate(ys[0])
    male_ys   = np.concatenate(ys[1])
    female_ps = np.concatenate(ps[0])
    male_ps   = np.concatenate(ps[1])

    return female_ys, male_ys, female_ps, male_ps
 def iterate(self, loader, model, criterion, optimizer, training=True):
     if training:
         model.train()
     else:
         model.eval()
     props = {k: 0 for k in status_properties}
     for i, data in enumerate(loader):
         x, targets = data
         x = x.to(device)
         targets = targets.view(-1).to(device)
         preds = model(x)
         if isinstance(self.criterion, BCEDiceLoss):
             targets = one_hot(targets, 3)
         loss = criterion(preds, targets)
         props['loss'] += loss.item()
         a, a1, a2, a3 = accuracy_from_logits(preds.clone(),
                                              targets.clone())
         props['accuracy'] += a.item()
         props['accuracy_1'] += a1
         props['accuracy_2'] += a2
         props['accuracy_3'] += a3
         if training:
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
             clip_grad_norm_(model.parameters(), 0.5)
         L = len(loader)
     props = {k: v / L for k, v in props.items()}
     return props
Ejemplo n.º 37
0
 def build_set(corpus, vocab_len, size=2000, one_hot_enc=True):
     dataset = itertools.islice(
         corpus.generate(indexer=idxr, fitted=True), size)
     X, y = list(zip(*dataset))
     X = np.asarray(X)
     if one_hot_enc:
         X = one_hot(X, vocab_len)
     y = to_categorical(y, nb_classes=vocab_len)
     return X, y
Ejemplo n.º 38
0
    def load_train(self):
        images = data.load('train')
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)

        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = images[indices_train]
        self.labels_train = labels[indices_train]
        self.images_valid = images[indices_valid]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 39
0
    def load_train(self):
        images = data.load('train')
        labels = utils.one_hot(data.labels_train).astype(np.float32)

        split = pickle.load(open(self.validation_split_path, 'rb'))
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = images[indices_train]
        self.labels_train = labels[indices_train]
        self.images_valid = images[indices_valid]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 40
0
def update_agent(agent, replay_memory, gamma, optim, batch_size):
    samples = replay_memory.sample(batch_size)
    states, actions, rewards, next_states, non_ends = samples_to_tensors(samples)
    actions = utils.one_hot(actions.unsqueeze(1), agent.num_actions)
    targets = agent.compute_targets(rewards, next_states, non_ends, gamma)
    states = Variable(states)
    actions = Variable(actions)
    targets = Variable(targets)
    loss = agent.loss(states, actions, targets)
    loss.backward()
    optim.step()
    optim.zero_grad()
    return loss.data[0]
Ejemplo n.º 41
0
    def compute_targets(self, rewards, next_states, non_ends, gamma):
        """Compute batch of targets for distributional dqn

        params:
            rewards: Tensor [batch, 1]
            next_states: Tensor [batch, channel, w, h]
            non_ends: Tensor [batch, 1]
            gamma: float
        """
        assert not self.double_dqn, 'not supported yet'

        # get next distribution
        next_states = Variable(next_states, volatile=True)
        # [batch, num_actions], [batch, num_actions, num_atoms]
        next_q_vals, next_probs = self._q_values(self.target_q_net, next_states)
        next_actions = next_q_vals.data.max(1, True)[1] # [batch, 1]
        next_actions = utils.one_hot(next_actions, self.num_actions).unsqueeze(2)
        next_greedy_probs = (next_actions * next_probs.data).sum(1)

        # transform the distribution
        rewards = rewards.unsqueeze(1)
        non_ends = non_ends.unsqueeze(1)
        proj_zpoints = rewards + gamma * non_ends * self.zpoints.data
        proj_zpoints.clamp_(self.vmin, self.vmax)

        # project onto shared support
        b = (proj_zpoints - self.vmin) / self.delta_z
        lower = b.floor()
        upper = b.ceil()
        # handle corner case where b is integer
        eq = (upper == lower).float()
        lower -= eq
        lt0 = (lower < 0).float()
        lower += lt0
        upper += lt0

        # note: it's faster to do the following on cpu
        ml = (next_greedy_probs * (upper - b)).cpu().numpy()
        mu = (next_greedy_probs * (b - lower)).cpu().numpy()

        lower = lower.cpu().numpy().astype(np.int32)
        upper = upper.cpu().numpy().astype(np.int32)

        batch_size = rewards.size(0)
        mass = np.zeros((batch_size, self.num_atoms), dtype=np.float32)
        brange = range(batch_size)
        for i in range(self.num_atoms):
            mass[brange, lower[brange, i]] += ml[brange, i]
            mass[brange, upper[brange, i]] += mu[brange, i]

        return torch.from_numpy(mass).cuda()
Ejemplo n.º 42
0
def sample_evolution(start, cls, ns=100): # start = start data
    sample = t.compile_function(initial_vmap, mb_size=1, monitors=[m_model], name='evaluate', train=False, mode=mode)
    
    data = start
    plot_data(data)
    
    label = one_hot(np.atleast_2d(cls), dim=10)
    label = label.reshape((label.shape[0], 1, label.shape[1]))
    

    while True:
        for k in range(ns):
            for x in sample({ rbm.v: data, rbm.s: label }): # draw a new sample
                data = x[0]
            
        plot_data(data)
Ejemplo n.º 43
0
    def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        self.image_shapes_train = image_shapes[indices_train]
        self.image_shapes_valid = image_shapes[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 44
0
    def load_train(self):
        train_images = data.load('train')
        train_labels = utils.one_hot(data.labels_train).astype(np.float32)

        if ("valid_pred_file" in self.__dict__):
            valid_pseudo_labels = np.load(self.valid_pred_file).astype(np.float32)
        else:
            print "No valid_pred_file set. Only using test-set for pseudolabeling!!"

        shuffle = np.load("test_shuffle_seed0.npy")
        if not ("shard" in self.__dict__):
            raise ValueError("Missing argument: shard: (should be value in {0, 1, 2})")
        if not self.shard in [0, 1, 2]:
            raise ValueError("Wrong argument: shard: (should be value in {0, 1, 2})")
        N = len(shuffle)
        if self.shard == 0:
            train_shard = shuffle[N/3:]
        if self.shard == 1:
            train_shard = np.concatenate((shuffle[:N/3], shuffle[2*N/3:]))
        if self.shard == 2:
            train_shard = shuffle[:2*N/3]

        test_images = data.load('test')[train_shard]
        test_pseudo_labels = np.load(self.test_pred_file)[train_shard].astype(np.float32)
        print test_pseudo_labels.shape

        if not hasattr(self, 'validation_split_path'):
            self.validation_split_path = DEFAULT_VALIDATION_SPLIT_PATH
        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = train_images[indices_train]
        self.labels_train = train_labels[indices_train]
        if ("valid_pred_file" in self.__dict__):
            self.images_pseudo = np.concatenate((train_images[indices_valid], test_images), 0)
            self.labels_pseudo = np.concatenate((valid_pseudo_labels, test_pseudo_labels), 0)
        else:
            self.images_pseudo = test_images
            self.labels_pseudo = test_pseudo_labels

        self.images_valid = train_images[indices_valid]
        self.labels_valid = train_labels[indices_valid]
Ejemplo n.º 45
0
    def compute_targets(self, rewards, next_states, non_ends, gamma):
        """Compute batch of targets for dqn

        params:
            rewards: Tensor [batch]
            next_states: Tensor [batch, channel, w, h]
            non_ends: Tensor [batch]
            gamma: float
        """
        next_q_vals = self.target_q_values(next_states)

        if self.double_dqn:
            next_actions = self.online_q_values(next_states).max(1, True)[1]
            next_actions = utils.one_hot(next_actions, self.num_actions)
            next_qs = (next_q_vals * next_actions).sum(1)
        else:
            next_qs = next_q_vals.max(1)[0] # max returns a pair

        targets = rewards + gamma * next_qs * non_ends
        return targets
Ejemplo n.º 46
0
    def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        moments = np.load("data/image_moment_stats_v1_train.pkl")

        centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
        info = np.concatenate((centroid_distance, image_shapes, moments["angles"][:, None], moments["minor_axes"][:, None], moments["major_axes"][:, None]), 1).astype(np.float32)

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 47
0
    def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']
        features = np.load("data/features_train.pkl").item()

        if "aaronmoments" in self.features:
            print "aaronmoments"
            def normalize(x):
                return x
                # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True)
            image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
            moments = np.load("data/image_moment_stats_v1_train.pkl")
            centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
            angles = moments["angles"][:, None]
            minor_axes = moments["minor_axes"][:, None]
            major_axes = moments["major_axes"][:, None]
            centroid_distance = normalize(centroid_distance)
            angles = normalize(angles)
            minor_axes = normalize(minor_axes)
            major_axes = normalize(major_axes)
            features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32)

        info = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32)

        print info.shape

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid]
Ejemplo n.º 48
0
                        if prev != 0: s+= "%i,%i,%i\n"%(prev, start+1, j)
                        start = j
                prev = p

            file = open(csv_path+"Sample"+str(split.test_idxs[i]).zfill(4)+"_prediction.csv", "w")
            file.write(s[:-1])
            file.close()
            print "%i%%"%int(np.round((i+1)/float(len(preds))*100.))
    from sklearn import metrics
    labels = np.load("data/labels_raw_test.npy")
    # labelsn = labels.copy()
    for i, lbl in enumerate(labels):
        labels[i] = lbl[:-1]
        # labels[i] = lbl[1:]

    labels[:] = map(lambda y: utils.one_hot(y,21), labels)



    # print labels[0].shape, preds[0].shape
    roc, prec, rec, acc = [], [],[], []
    l, p = np.vstack(labels), np.vstack(preds)
    # ln = np.vstack(labelsn)

    # print np.mean(l==p)
    y_pred = np.argmax(p,1)
    y_true = np.argmax(l,1)
    # print l.shape
    # print y.shape
    acc = np.mean(y_true==y_pred)
    rec = metrics.recall_score(y_true, y_pred)
def run():
    # Fetch data
    f1 = file('../data/mldata/mnist_data.pkl','rb')
    mnist = pickle.load(f1)
    f1.close()
    split = 60000
    X_train = np.reshape(mnist.data[:split], (-1,1,28,28))/255.0
    Y_train = mnist.target[:split]
    X_test = np.reshape(mnist.data[split:], (-1,1,28,28))/255.0
    Y_test = mnist.target[split:]
    n_classes = np.unique(Y_train).size

    # Downsample training data
    n_train_samples = 3000
    train_idxs = np.random.random_integers(0, split-1, n_train_samples)
    X_train = X_train[train_idxs, ...]
    Y_train = Y_train[train_idxs, ...]
    Y_train_one_hot = one_hot(Y_train)

    print ('number of train samples: %d')%(n_train_samples)
    print ('number of test samples: %d')%(X_test.shape[0])

    # setup network
    nn = NeuralNetwork(
        layers = [
            Layers.Convolution(
                n_feats=12, 
                filter_shape=(5,5),
                strides=(1,1),
                weight_scale=0.1,
                weight_decay=0.001),
            Layers.Activation('relu'),
            Layers.Pool(
                pool_shape=(2,2),
                strides=(2,2),
                mode='max'),
            Layers.Convolution(
                n_feats=16,
                filter_shape=(5,5),
                strides=(1,1),
                weight_scale=0.1,
                weight_decay=0.001),
            Layers.Activation('relu'),
            Layers.Flatten(),
            Layers.Linear(
                n_out=n_classes,
                weight_scale=0.1,
                weight_decay=0.02),
            Layers.Softmax()
            ]
        )

    #check gradient
    # nn.check_gradients(X_train[:10], Y_train_one_hot[:10])

    # Train neural network
    t0 = time.time()
    nn.train(X_train, Y_train_one_hot, learning_rate=0.05, max_iter=3, batch_size=32)
    t1 = time.time()
    print('Duration: %.1fs' % (t1-t0))

    # Evaluate on test data
    # Y_test_one_hot = one_hot(Y_test)
    error = nn.error(X_test, Y_test)
    print('Test error rate: %.4f' % error)
Ejemplo n.º 50
0
    model.compile(OPTIMIZER, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    print("Starting training")
    db = E.use(path, exp_id="char-fill").model(args.model)
    with db.session(vars(args), ensure_unique=False) as session:
        try:
            from time import time
            start = time()
            for e in range(EPOCHS):
                losses = []
                batches = train.generate_batches(
                    indexer=idxr, batch_size=BATCH_SIZE),
                batches = itertools.islice(batches, NUM_BATCHES)
                for b, (X, y) in enumerate(batches):
                    X = np.asarray(X) if has_emb else one_hot(X, n_chars)
                    y = to_categorical(y, nb_classes=n_chars)
                    loss, _ = model.train_on_batch(X, y)
                    losses.append(loss)
                    if b % args.loss == 0:
                        dev_loss, dev_acc = model.test_on_batch(X_dev, y_dev)
                        avg_loss, last_loss = np.mean(losses), losses[-1]
                        log_batch(e, b, avg_loss, last_loss, dev_loss, dev_acc)
                print()
                session.add_epoch(
                    e, {'training_loss': str(np.mean(losses)),
                        'dev_loss': str(dev_loss),
                        'dev_acc': str(dev_acc)})
        except KeyboardInterrupt:
            print("Interrupted")
        finally:
Ejemplo n.º 51
0
mode = None


# load data
print ">> Loading dataset..."

f = gzip.open('datasets/mnist.pkl.gz','rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()

train_set_x, train_set_y = train_set
valid_set_x, valid_set_y = valid_set
test_set_x, test_set_y = test_set

# convert labels to one hot representation
train_set_y_oh = one_hot(np.atleast_2d(train_set_y).T)
valid_set_y_oh = one_hot(np.atleast_2d(valid_set_y).T)
test_set_y_oh = one_hot(np.atleast_2d(test_set_y).T)

# dim 0 = minibatches, dim 1 = units, dim 2 = states
train_set_y_oh = train_set_y_oh.reshape((train_set_y_oh.shape[0], 1, train_set_y_oh.shape[1]))
valid_set_y_oh = valid_set_y_oh.reshape((valid_set_y_oh.shape[0], 1, valid_set_y_oh.shape[1]))
test_set_y_oh = test_set_y_oh.reshape((test_set_y_oh.shape[0], 1, test_set_y_oh.shape[1]))


# make the sets a bit smaller for testing purposes
train_set_x = train_set_x[:10000]
train_set_y_oh = train_set_y_oh[:10000]
valid_set_x = valid_set_x[:1000]
valid_set_y_oh = valid_set_y_oh[:1000]
Ejemplo n.º 52
0
    def train(self, learning_schedule = {0: 0.015, 500: 0.0015,  800: 0.00015, 1000: 0.000015}, 
                momentum = 0.9, max_epochs=3000, save_every = 20, save_path = os.getcwd()):

        self.save_every = save_every
        self.metadata_tmp_path = save_path+"/model_params.pkl"
        self.learning_rate_schedule = learning_schedule
        self.learning_rate = theano.shared(np.float32(self.learning_rate_schedule[0]))
        self.momentum = momentum

        #for trainer
        self.updates = nn.updates.nesterov_momentum(self.loss, self.all_params, self.learning_rate, self.momentum)


        train_fn = self.nesterov_trainer() #nesterov with momentum.
        train_set_iterator = DataLoader(os.getcwd(),train_test_valid='train')
        best_dev_loss = numpy.inf
        dev_set_iterator = DataLoader(os.getcwd(), train_test_valid='valid')
        dev_set_iterator.build_unequal_samples_map()
        
        #for loading the data onto the gpu
        #create_train_gen = lambda: train_set_iterator.create_gen(max_epochs)

        patience = 1000  
        patience_increase = 2.
        improvement_threshold = 0.995
        done_looping = False
        print '... training the model'
        start_time = time.clock()
        epoch = 0
        timer = None

        #for plotting
        self._costs = []
        self._train_errors = []
        self._dev_errors = []

        while (epoch < max_epochs) and (not done_looping):
            losses_train = []
            losses = []
            avg_costs = []
            timer = time.time()
            for iteration, (x, y) in enumerate(train_set_iterator):

                if iteration in self.learning_rate_schedule:
                    lr = np.float32(self.learning_rate_schedule[iteration])
                    print "  setting learning rate to %.7f" % lr
                    self.learning_rate.set_value(lr)


                print "  load training data onto GPU"
                avg_cost = train_fn(x, y)
                if np.isnan(avg_cost):
                    raise RuntimeError("NaN DETECTED.")
                
                if type(avg_cost) == list:
                    avg_costs.append(avg_cost[0])
                else:
                    avg_costs.append(avg_cost)
            
                #for saving the batch
                if ((iteration + 1) % save_every) == 0:
                    print
                    print "Saving metadata, parameters"

                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)},
                                     f, pickle.HIGHEST_PROTOCOL)

                mean_train_loss = numpy.mean(avg_costs)
                #print "  mean training loss:\t\t%.6f" % mean_train_loss
                #losses_train.append(mean_train_loss)

                #accuracy assessment
                output = utils.one_hot(self.predict_(x)(),m=20)
                train_loss = utils.log_loss(output, y)
                acc = 1 - utils.accuracy(output, y)
                losses.append(train_loss)
                del output
                del x
                del y

                print('  epoch %i took %f seconds' %
                    (epoch, time.time() - timer))
                print('  epoch %i, avg costs %f' %
                    (epoch, mean_train_loss))
                print('  epoch %i, training error %f' %
                    (epoch, acc))

                #for plotting
                self._costs.append(mean_train_loss)
                self._train_errors.append(acc)
                
                #valid accuracy
                xd,yd = dev_set_iterator.random_batch()

                valid_output = utils.one_hot(self.predict_(xd)(),m=20)
                valid_acc = 1 - utils.accuracy(valid_output, yd)
                self._dev_errors.append(valid_acc)
                del valid_output
                del xd
                del yd

                if valid_acc < best_dev_loss:
                    best_dev_loss = valid_acc
                    best_params = copy.deepcopy(self.all_params )
                    print('!!!  epoch %i, validation error of best model %f' %
                        (epoch, valid_acc))
                    print
                    print "Saving best performance parameters"
                    with open(self.metadata_tmp_path, 'w') as f:
                        pickle.dump({'losses_train': avg_costs,'param_values': nn.layers.get_all_param_values(self.output_layer)},
                                     f, pickle.HIGHEST_PROTOCOL)
                    if (valid_acc < best_dev_loss *
                        improvement_threshold):
                        patience = max(patience, iteration * patience_increase)
                    if patience <= iteration:
                        done_looping = True
                        break
                epoch += 1
s = np.load("validation_split_v1.pkl")
t_valid = data.labels_train[s['indices_valid']]

predictions_list = [np.load(path) for path in valid_predictions_paths]
predictions_stack = np.array(predictions_list).astype(theano.config.floatX)  # num_sources x num_datapoints x 121

print "Individual prediction errors"
individual_prediction_errors = [utils.log_loss(p, t_valid) for p in predictions_list]
del predictions_list
for i in xrange(n_models):
    print individual_prediction_errors[i], os.path.basename(valid_predictions_paths[i])
print

# optimizing weights
X = theano.shared(predictions_stack)  # source predictions
t = theano.shared(utils.one_hot(t_valid))  # targets
W = T.vector('W')

s = T.nnet.softmax(W).reshape((W.shape[0], 1, 1))
weighted_avg_predictions = T.sum(X * s, axis=0)  # T.tensordot(X, s, [[0], [0]])
error = nn_plankton.log_loss(weighted_avg_predictions, t)
grad = T.grad(error, W)

f = theano.function([W], error)
g = theano.function([W], grad)

w_init = np.zeros(n_models, dtype=theano.config.floatX)
out, loss, _ = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000)

weights = np.exp(out)
weights /= weights.sum()