Example #1
0
def get_mnist(location="./", batch_size=64, labels_per_class=100):
    from functools import reduce
    from operator import __or__
    from torch.utils.data.sampler import SubsetRandomSampler
    from torchvision.datasets import MNIST
    import torchvision.transforms as transforms
    from utils import onehot

    flatten_bernoulli = lambda x: transforms.ToTensor()(x).view(-1).bernoulli()

    mnist_train = MNIST(location,
                        train=True,
                        download=True,
                        transform=flatten_bernoulli,
                        target_transform=onehot(n_labels))
    mnist_valid = MNIST(location,
                        train=False,
                        download=True,
                        transform=flatten_bernoulli,
                        target_transform=onehot(n_labels))

    def get_sampler(labels, n=None):
        # Only choose digits in n_labels
        (indices, ) = np.where(
            reduce(__or__, [labels == i for i in np.arange(n_labels)]))

        # Ensure uniform distribution of labels
        np.random.shuffle(indices)
        indices = np.hstack([
            list(filter(lambda idx: labels[idx] == i, indices))[:n]
            for i in range(n_labels)
        ])

        indices = torch.from_numpy(indices)
        sampler = SubsetRandomSampler(indices)
        return sampler

    # Dataloaders for MNIST
    labelled = torch.utils.data.DataLoader(
        mnist_train,
        batch_size=batch_size,
        num_workers=2,
        pin_memory=cuda,
        sampler=get_sampler(mnist_train.train_labels.numpy(),
                            labels_per_class))
    unlabelled = torch.utils.data.DataLoader(
        mnist_train,
        batch_size=batch_size,
        num_workers=2,
        pin_memory=cuda,
        sampler=get_sampler(mnist_train.train_labels.numpy()))
    validation = torch.utils.data.DataLoader(
        mnist_valid,
        batch_size=batch_size,
        num_workers=2,
        pin_memory=cuda,
        sampler=get_sampler(mnist_valid.test_labels.numpy()))

    return labelled, unlabelled, validation
Example #2
0
    def init_train_and_val_classes_and_labels(self):
        '''
        Method initialized val and train classes and labels. These properties will be
        used by various other methods.
        :return: self.val_classes, self.val_labels, self.train_classes, self.train_labels,
        '''
        self.val_classes = self.val_batches.classes
        self.val_labels = onehot(self.val_classes)

        self.train_classes = self.train_batches.classes
        self.train_labels = onehot(self.train_classes)
def true_online_gtd(env,
                    episodes,
                    target,
                    behavior,
                    Lambda,
                    gamma=lambda x: 0.95,
                    alpha=0.05,
                    beta=0.0001,
                    diagnose=False,
                    evaluation=None):
    """
    episodes:   number of episodes
    target:     target policy matrix (|S|*|A|)
    behavior:   behavior policy matrix (|S|*|A|)
    Lambda:     LAMBDA object determining each lambda for each feature (or state or observation)
    gamma:      anonymous function determining each lambda for each feature (or state or observation)
    alpha:      learning rate for the weight vector of the values
    beta:       learning rate for the auxiliary vector for off-policy
    """
    learner = TRUE_ONLINE_GTD_LEARNER(env)
    if evaluation is not None:
        value_trace = np.zeros((episodes, 1))
        value_trace[:] = np.nan
    else:
        value_trace = []
    for epi in range(episodes):
        s_curr, done = env.reset(), False
        x_curr = onehot(s_curr, env.observation_space.n)
        learner.refresh()
        if evaluation is not None:
            value_trace[epi, 0] = evaluation(learner.w_curr, 'expectation')
        else:
            value_trace.append(np.copy(learner.w_curr))
        while not done:
            action = decide(s_curr, behavior)
            rho_curr = importance_sampling_ratio(target, behavior, s_curr,
                                                 action)
            s_next, r_next, done, _ = env.step(action)
            x_next = onehot(s_next, env.observation_space.n)
            if diagnose:
                print('rho_curr: %.2e, lambda_curr: %.2e, lambda_next: %.2e' %
                      (rho_curr, Lambda.value(x_curr), Lambda.value(x_next)))
            learner.learn(r_next, gamma(x_next), gamma(x_curr), x_next, x_curr,
                          Lambda.value(x_next), Lambda.value(x_curr), rho_curr,
                          alpha, beta)
            learner.next()
            x_curr = x_next
    return value_trace
Example #4
0
    def train(self,
              data_loader,
              valid_loader,
              epochs,
              learning_rate,
              dropout_prob=None):
        losses_train = []
        losses_valid = []
        for epoch in range(epochs):
            print("epoch", epoch)
            # 训练部分
            epoch_loss_train = 0
            for step, (x, y) in enumerate(data_loader):
                # x:[b, 28, 28] -> [b, 784] , y:[b, 1] -> [b, 10]
                x = x.reshape(-1, 28 * 28)
                y = onehot(y, 10)
                nets, pred = self.forward(x, dropout_prob)
                loss = cross_entropy(y, pred)
                epoch_loss_train += loss
                grads = self.backward(nets, y, pred, dropout_prob)
                # SGD更新参数
                # self.params = optimizer.optimize(self.weight_num, self.params, grads, y.shape[0])
                self.params = self.optimizer.optimize(self.weight_num,
                                                      self.params, grads,
                                                      y.shape[0])

                if step % 100 == 0:
                    print("epoch {} training step {} loss {:.4f}".format(
                        epoch, step, loss))
            losses_train.append(epoch_loss_train)
            print(epoch_loss_train)
            data_loader.restart()
            # 验证部分,只进行前向传播
            epoch_loss_valid = 0
            for step, (x, y) in enumerate(valid_loader):
                x = x.reshape(-1, 28 * 28)
                y = onehot(y, 10)
                nets, pred = self.forward(x, dropout_prob)
                loss = cross_entropy(y, pred)
                epoch_loss_valid += loss

                if step % 100 == 0:
                    print("epoch {} validation step {} loss {:.4f}".format(
                        epoch, step, loss))
            losses_valid.append(epoch_loss_valid)
            valid_loader.restart()
        his = {'train_loss': losses_train, 'valid_loss': losses_valid}
        return his
Example #5
0
 def Q_estimates(self, state, goal=None):
     # Generate Q values for all actions.
     if goal == None:
         goal = self.w
     else:
         goal = utils.onehot(goal, self.n_state)
     return np.matmul(self.M[:, state, :], goal)
def decode_for_classification(X_syn):
    bins = np.linspace(-1e-6, 1, 17, endpoint=True)
    for name, dtype in zip(X_syn.columns, X_syn.dtypes):
        if name in disc_features:
            feature_min = X_syn[name].min()
            feature_max = X_syn[name].max()
            X_syn[name] = (X_syn[name] - feature_min) / (feature_max -
                                                         feature_min)
            X_syn[name] = pd.cut(X_syn[name], bins=bins,
                                 labels=range(16)).astype('int')
            X_syn[name] = X_syn[name].map(
                {key: i
                 for i, key in enumerate(np.unique(X_syn[name]))})

    del X_syn['Education']
    ## Relabel education number
    X_syn['Education-Num'] = X_syn['Education-Num'] + 1
    ## One hot categorical features
    onehotteds = []
    for col in X_syn.columns:
        feature = X_syn[col]
        if (feature.dtype == 'int'
                or feature.dtype == 'O') and col not in onehotteds:
            if len(np.unique(feature)) > 2:
                X_syn.pop(col)
                onehotted = onehot(feature)
                X_syn = pd.concat([X_syn, onehotted], axis=1)
                onehotteds.append(col)
    X_syn['Sex'] = X_syn['Sex'].map({'Female': 0, 'Male': 1})
    return X_syn
Example #7
0
    def propagate_error(self, target):
        """Propagate the error backwards through the network (backpropagation)."""
        if not self.continuous:
            target = onehot(target, self.num_targets)

        for layer in reversed(self.layers):
            layer.propagate_error(target)
Example #8
0
 def checkOneHot(self):
     v = torch.LongTensor([1, 2, 1, 2, 0])
     v_length = torch.LongTensor([2, 3])
     v_onehot = utils.onehot(v, v_length, 4)
     target = torch.FloatTensor([[[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]],
                                 [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0,
                                                               0]]])
     assert target.equal(v_onehot)
Example #9
0
 def sq_error(self, sample):
     """Calculate the square error for a given sample."""
     prediction = self.predict(sample.features)
     if self.continuous:
         return (sample.label - prediction)**2
     else:
         target = onehot(sample.label, self.num_targets)
         return sum([(target[i] - prediction[i])**2
                     for i in range(len(target))])
Example #10
0
 def update_sr(self, current_exp, next_exp):
     # SARSA TD learning rule
     # update the M(s, s', a)
     s = current_exp[0]  # current state
     s_a = current_exp[1]  # choosed action
     s_ = current_exp[2]  # next state
     s_a_1 = next_exp[1]  # next state choosed action
     r = current_exp[3]  # reward in current state
     d = current_exp[4]  # wheather the current state is terminal
     I = utils.onehot(
         s, env.state_size)  # transform current state to one-hot vector
     if d:
         td_error = (I + self.gamma * utils.onehot(s_, env.state_size) -
                     self.M[s_a, s, :])
     else:
         td_error = (I + self.gamma * self.M[s_a_1, s_, :] -
                     self.M[s_a, s, :])
     self.M[s_a, s, :] += self.learning_rate * td_error
     return td_error
Example #11
0
 def bprop(self, X, y):
     X = np.array([[float(x)] for x in X])
     self._gradoa = self._os - utils.onehot(self._m,y)
     self._gradb2 = self._gradoa
     self._gradw2 = np.dot(self._gradoa, np.transpose(self._hs)) + 2 * self.wd * self._w2
     self._gradhs = np.dot(np.transpose(self._w2), self._gradoa)
     self._gradha = self._gradhs * np.where(self._ha > 0, 1, 0)
     self._gradb1 = np.array(self._gradha)
     self._gradw1 = np.dot(self._gradha,np.transpose(X)) + 2 * self.wd * self._w1
     self._gradx = np.dot(np.transpose(self._w1), self._gradha)
Example #12
0
 def bprop(self, X, y):
     X = np.array([[float(x)] for x in X])
     self._gradoa = self._os - utils.onehot(self._m, y)
     self._gradb2 = self._gradoa
     self._gradw2 = np.dot(self._gradoa, np.transpose(
         self._hs)) + 2 * self.wd * self._w2
     self._gradhs = np.dot(np.transpose(self._w2), self._gradoa)
     self._gradha = self._gradhs * np.where(self._ha > 0, 1, 0)
     self._gradb1 = np.array(self._gradha)
     self._gradw1 = np.dot(self._gradha,
                           np.transpose(X)) + 2 * self.wd * self._w1
     self._gradx = np.dot(np.transpose(self._w1), self._gradha)
Example #13
0
 def _load(self, filenames):
     images, labels = None, []
     for i, filename in enumerate(filenames):
         datafile = utils.unpickle(filename)
         if i == 0:
             images = datafile['data']
         else:
             images = np.append(images, datafile['data'], axis=0)
         labels.extend(datafile['labels'])
         print(images.shape, len(labels))
     return images, utils.onehot(np.asarray(labels),
                                 label_size=self.labels_size)
Example #14
0
    def train_bn(self, data_loader, valid_loader, epochs, learning_rate):
        losses_train = []
        losses_valid = []
        for epoch in range(epochs):
            print("epoch", epoch)
            epoch_loss_train = 0
            # 重置全局均值和方差
            # 批量训练
            for step, (x, y) in enumerate(data_loader):
                # x:[b, 28, 28] -> [b, 784] , y:[b, 1] -> [b, 10]
                x = x.reshape(-1, 28 * 28)
                y = onehot(y, 10)
                nets, pred = self.forward_bn(x, bn_mode='train')
                grads = self.backward_bn(nets, y, pred)
                self.optimizer.optimize(self.weight_num, self.params, grads,
                                        y.shape[0])
                loss = cross_entropy(y, pred)
                epoch_loss_train += loss
                if step % 100 == 0:
                    print("epoch {} step {} loss {:.4f}".format(
                        epoch, step, loss))
            losses_train.append(epoch_loss_train)
            data_loader.restart()
            print(epoch_loss_train)
            # 验证集测试
            epoch_loss_valid = 0
            for step, (x, y) in enumerate(valid_loader):
                x = x.reshape(-1, 28 * 28)
                y = onehot(y, 10)
                nets, pred = self.forward_bn(x, bn_mode='test')
                loss = cross_entropy(y, pred)
                epoch_loss_valid += loss
                if step % 100 == 0:
                    print("epoch {} step {} loss {:.4f}".format(
                        epoch, step, loss))
            losses_valid.append(epoch_loss_valid)
            valid_loader.restart()
        his = {'train_loss': losses_train, 'valid_loss': losses_valid}

        return his
Example #15
0
 def gen_test(self):
     x_batch, y_batch = self._batch_init()
     i = 0
     for idx in self._idcs_test:
         x_batch[i] = self._test[idx]
         y_batch[i] = onehot(self._test_label[idx], self._num_classes)
         i += 1
         if i >= self._batch_size:
             yield i, x_batch, y_batch
             x_batch, y_batch = self._batch_init()
             i = 0
     if i != 0:
         yield i, x_batch, y_batch
def phi8(x, a):
    f = [[x[0]]]
    for aa in range(nactions):
        if aa == a:
            y = np.array([x[1 + i] for i in range(nactions)])
            f += [y]
        else:
            f += [np.zeros((nactions, ))]

    y3 = int(x[1 + a] + 1 > env.max_queue_length)
    f.append([y3])
    f.append(onehot(nactions, a))
    return np.concatenate(f)
Example #17
0
 def goal(self):
     if self.obs_mode == "onehot":
         return utils.onehot(
             self.goal_pos[0] * self.grid_size + self.goal_pos[1],
             self.state_size)
     if self.obs_mode == "twohot":
         return self.twohot(self.goal_pos, self.grid_size)
     if self.obs_mode == "geometric":
         return (2 * np.array(self.goal_pos) / (self.grid_size - 1)) - 1
     if self.obs_mode == "visual":
         return env.grid
     if self.obs_mode == "index":
         return self.goal_pos[0] * self.grid_size + self.goal_pos[1]
Example #18
0
    def __getitem__(self, index):
        if isinstance(index, torch.Tensor):
            index = index.item()
        line = self.ids.iloc[index]

        image = cv2.imread(line['path'])
        image = self.transform(image=image)['image']

        label = np.array([self.mapping[line['label']]])
        label = ToTensor()(image=label)['image']
        label = onehot(label, self.num_classes)

        return {'image': image, 'mask': label}
 def bprop(self, X, y):
     # chaque colonne de X est une entrée
     X = np.array([np.array([float(x) for x in j]) for j in X])
     X = X.transpose()
     self._gradoa = self._os - utils.onehot(self._m, y)
     self._gradb2 = self._gradoa
     # gradw2 va être la somme des gradient pour chaque point individuelle
     self._gradw2 = np.dot(self._gradoa, np.transpose(self._hs)) + 2 * self.wd * self._w2
     self._gradhs = np.dot(np.transpose(self._w2), self._gradoa)
     self._gradha = self._gradhs * np.where(self._ha > 0, 1, 0)
     self._gradb1 = np.array(self._gradha)
     # gradw2 va être la somme des gradient pour chaque point individuelle
     self._gradw1 = np.dot(self._gradha, np.transpose(X)) + 2 * self.wd * self._w1
     self._gradx = np.dot(np.transpose(self._w1), self._gradha)
Example #20
0
    def __getitem__(self, index):
        if isinstance(index, torch.Tensor):
            index = index.item()

        line_1 = self.ids.iloc[index]
        label_1 = np.array([self.mapping[line_1['label']]])
        image_1 = cv2.imread(line_1['path'])

        if self.mixup and np.random.uniform(0, 1) > self.mixup_p:
            while True:
                idx = next(iter(
                    self.sampler)).item()  # generate idx with self.sampler
                line_2 = self.ids.iloc[idx]
                label_2 = np.array([self.mapping[line_2['label']]])
                if label_1 != label_2:
                    break

            image_2 = cv2.imread(line_2['path'])
            image_1 = self.transform(image=image_1)['image']
            image_2 = self.transform(image=image_2)['image']

            label_1 = ToTensor()(image=label_1)['image']
            label_2 = ToTensor()(image=label_2)['image']
            label_1 = onehot(label_1, self.num_classes)
            label_2 = onehot(label_2, self.num_classes)

            _lambda = np.random.beta(self.alpha, self.alpha)
            images = _lambda * image_1 + (1 - _lambda) * image_2
            labels = _lambda * label_1 + (1 - _lambda) * label_2

        else:
            images = self.transform(image=image_1)['image']
            label_1 = ToTensor()(image=label_1)['image']
            labels = onehot(label_1, self.num_classes)

        return {'image': images, 'mask': labels}
Example #21
0
 def state_to_obs(self, state):
     if self.obs_mode == "onehot":
         point = self.state_to_point(state)
         return utils.onehot(point[0] * self.grid_size + point[1],
                             self.state_size)
     if self.obs_mode == "twohot":
         point = self.state_to_point(state)
         return self.twohot(point, self.grid_size)
     if self.obs_mode == "geometric":
         point = self.state_to_point(state)
         return (2 * np.array(point) / (self.grid_size - 1)) - 1
     if self.obs_mode == "visual":
         return self.state_to_grid(state)
     if self.obs_mode == "index":
         return state
 def bprop(self, X, y):
     # chaque colonne de X est une entrée
     X = np.array([np.array([float(x) for x in j]) for j in X])
     X = X.transpose()
     self._gradoa = self._os - utils.onehot(self._m, y)
     self._gradb2 = self._gradoa
     # gradw2 va être la somme des gradient pour chaque point individuelle
     self._gradw2 = np.dot(self._gradoa, np.transpose(
         self._hs)) + 2 * self.wd * self._w2
     self._gradhs = np.dot(np.transpose(self._w2), self._gradoa)
     self._gradha = self._gradhs * np.where(self._ha > 0, 1, 0)
     self._gradb1 = np.array(self._gradha)
     # gradw2 va être la somme des gradient pour chaque point individuelle
     self._gradw1 = np.dot(self._gradha,
                           np.transpose(X)) + 2 * self.wd * self._w1
     self._gradx = np.dot(np.transpose(self._w1), self._gradha)
def phi2(x, a):
    f = [[x[0]]]
    for aa in range(nactions):
        if aa == a:
            if x[1 + a] > 0:
                y = [float(x[1 + i]) / (x[1 + a] + x[1 + i]) for i in range(nactions) if not i == a]
            else:
                y = np.ones((nactions - 1,))
            y2 = [x[1 + nactions + i] - x[1 + nactions + a] for i in range(nactions) if not i == a]
            f += [y, y2]
        else:
            f.append(np.zeros((2 * nactions - 2,)))

    y3 = int(x[1 + a] + 1 > env.max_queue_length)
    f.append([y3])
    f.append(onehot(nactions, a))
    return np.concatenate(f)
Example #24
0
 def gen_train(self):
     x_batch, y_batch = self._batch_init()
     iteration = 0
     i = 0
     while iteration < self._num_iterations:
         # shuffling all batches
         self._shuffle_train()
         for idx in self._idcs_train:
             # extract data from dict
             x_batch[i], y_batch[i] = random_flip(
                 self._train[idx],
                 onehot(self._train_label[idx], self._num_classes))
             i += 1
             if i >= self._batch_size:
                 yield x_batch, y_batch
                 x_batch, y_batch = self._batch_init()
                 i = 0
         iteration += 1
Example #25
0
    def _get_bp_indexes_labranchor(self, soi):
        """
        Get indexes of branch point regions in given sequences.

        :param soi: batch of sequences of interest for introns (intron-3..intron+6)
        :return: array of predicted bp indexes
        """
        encoded = [
            onehot(str(seq)[self.acc_i - 70:self.acc_i])
            for seq in np.nditer(soi)
        ]
        labr_in = np.stack(encoded, axis=0)
        out = self.labranchor.predict_on_batch(labr_in)
        # for each row, pick the base with max branchpoint probability, and get its index
        max_indexes = np.apply_along_axis(
            lambda x: self.acc_i - 70 + np.argmax(x), axis=1, arr=out)
        # self.write_bp(max_indexes)
        return max_indexes
Example #26
0
    def predict(self, data_loader, bn=False):
        labels = []
        pred = []
        losses = 0
        for (x, y) in data_loader:
            x = x.reshape(-1, 28 * 28)
            y = onehot(y, 10)
            if bn:
                _, out = self.forward_bn(x, 'test')
            else:
                _, out = self.forward(x)
            loss = cross_entropy(y, out)
            losses += loss
            out = list(np.argmax(out, axis=-1).flatten())
            y = list(np.argmax(y, axis=1).flatten())
            labels += y
            pred += out

        return np.array(pred).astype('int'), np.array(labels).astype('int')
def phi3(x, a):

    f = [[x[0]]]
    for aa in range(nactions):
        if aa == a:
            y = np.array([np.tanh(float(x[1 + a]) / (x[1 + i])) if x[1 + i] > 0 else 1 for i in range(nactions) if not i == a])
            if x[1 + a] > 0:
                y2 = np.array([np.tanh(float(x[1 + i]) / (x[1 + a])) for i in range(nactions) if not i == a])
            else:
                y2 = np.ones((nactions - 1,))
#            y2 = [x[1 + nactions + i] - x[1 + nactions + a] for i in range(nactions) if not i == a]
            f += [y, 1-y, y2, 1 - y2]
        else:
            f.append(np.zeros((4 * (nactions - 1),)))

    y3 = int(x[1 + a] + 1 > env.max_queue_length)
    f.append([y3])
    f.append(onehot(nactions, a))
    return np.concatenate(f)
def phi4(x,a):
    f = [[x[0]]]
    for aa in range(nactions):
        if not aa == a :
            if x[1+a] == 0 :
                f += [np.zeros((2,))]
            else :
                if x[1+aa] == 0:
                    f += [[1, 0]]
                else:
                    frac = float(x[1+a]/x[1+aa])
                    f += [[0,frac]]
        else :
            f += [np.zeros((2,))]

    y3 = int(x[1 + a] + 1 > env.max_queue_length)
    f.append(onehot(nactions, a))
    f.append([y3])
    return np.concatenate(f)
def phi6(x,a):
    f = [[x[0]]]
    for aa in range(nactions):
        if not aa == a :
            if x[1+a] == 0 :
                f += [np.zeros((12,))]
            else :
                if x[1+aa] == 0:
                    f += [[1], np.zeros(11,)]
                else:
                    frac = float(x[1+a]/x[1+aa])
                    y = np.array([int(frac>j) for j in (0.1, 0.2, 0.25, 1.0/3, 0.5, 1, 2, 3, 4, 5, 10) ])
                    f += [[0],y]
        else :
            f += [np.zeros((12,))]
            pass

    y3 = int(x[1 + a] + 1 > env.max_queue_length)
    f.append(onehot(nactions, a))
    f.append([y3])
    return np.concatenate(f)
Example #30
0
def decode_for_classification(X_syn):
    ## Decode features
    for col in X_syn.columns:
        if col not in disc_features:
            if data[col].dtype == 'float' and col != 'Education-Num':
                min_value = maps[col][0]
                max_value = maps[col][1]
                X_syn[col] = X_syn[col] * (max_value - min_value) + min_value
            else:
                X_syn[col] = X_syn[col].map(maps[col][1])
        if col == 'Education-Num':
            X_syn[col] = X_syn[col].map(maps[col][1])

    ## Decode discretized features
    bins = np.linspace(-1e-6, 1, 17, endpoint=True)
    for col in disc_features:
        if col != 'Education-Num':
            discr_feature = pd.cut(data[col], bins=bins,
                                   labels=range(16)).astype('int')
            decode_map = {i: u for i, u in enumerate(np.unique(discr_feature))}
            X_syn[col] = X_syn[col].map(decode_map)

    ## One hot categorical features
    from utils import onehot
    onehotteds = []
    for col in X_syn.columns:
        feature = X_syn[col]
        if (feature.dtype == 'int'
                or feature.dtype == 'O') and col not in onehotteds:
            if len(np.unique(feature)) > 2:
                X_syn.pop(col)
                onehotted = onehot(feature)
                X_syn = pd.concat([X_syn, onehotted], axis=1)
                onehotteds.append(col)

    ## Reorder columns
    X_syn = X_syn[X_test.columns]
    return X_syn
Example #31
0
def processing_data(infile, labelfile, outfile, vocab_file, stopwords_file):
    print('Loading stopwords...')
    stopwords = get_stopwords(stopwords_file)

    print('Loading data...')
    data = pd.read_csv(infile)

    print('Saving labels')
    with open(labelfile, 'w') as f:
        for label in data.columns[2:]:
            f.write(label + '\n')

    # 把句子分割成词
    print('Splitting content')
    contents = data['content'].tolist()
    seg_contents = segmentData(contents, stopwords)

    if not os.path.exists(vocab_file):
        print('Creating vocabulary...')
        create_vocab(seg_contents, vocab_file, 50000)

    print('Loading vocabulary...')
    w2i, _ = read_vocab(vocab_file)

    # word2id
    print('Tokenize...')
    token_contents = [tokenizer(c, w2i) for c in seg_contents]
    data['content'] = token_contents

    # 把标签转换成one hot形式
    print('One-hot label')
    for col in data.columns[2:]:
        label = data[col].tolist()
        onehot_label = [onehot(l) for l in label]
        data[col] = onehot_label

    print('Saving...')
    data[data.columns[1:]].to_csv(outfile, index=False)
Example #32
0
def generate_seqs(images, data_desc, onehot_lab=True):
    idx = []
    runn_idx = 0
    img_seqs = []
    labels = []
    label = None
    tid = 0
    for _, row in data_desc.iterrows():
        if (tid != row['trackid']):
            if (len(idx) != 0):
                idx = list(map(lambda x: x + runn_idx, idx))
                img_seqs.append(np.array(images[idx]))
                labels.append(label)
                runn_idx = runn_idx + len(idx)

            tid = row['trackid']
            idx = [row['framenr'] - 2]  #TODO
        else:
            idx.append(row['framenr'] - 2)
        label = row['class']
    if (onehot_lab):
        labels = onehot(labels, label_dict={'boat': 1, 'nature': 0})
    return img_seqs, labels
softmax = GumbelSoftmax()

# train adverserially
try:
	while epoch < num_epochs:
		train_iter = iter(train_data)
		temperature = max_temperature**((epoch+1)/num_epochs)
		g_lr_scheduler.step()
		d_lr_scheduler.step()
		for n_batch,batch in enumerate(train_iter):
			real_data = batch.text.to(device)
			N = real_data.size(0)
			num_steps = real_data.size(1)
			# 1. Train Discriminator
			real_data_onehot = onehot(real_data,num_classes)
			real_data_onehot[real_data_onehot==1] = 0.7
			real_data_onehot[real_data_onehot==0] = (1.0-0.7)/(num_classes-1.0)
			real_data_onehot = softmax(real_data_onehot,temperature)

			# Generate fake data and detach 
			# (so gradients are not calculated for generator)
			noise_tensor = sample_noise(N,noise_size,device)
			with torch.no_grad():
				fake_data = generator(z=noise_tensor,num_steps=num_steps,temperature=temperature).detach()
			# Train D
			d_error = train_discriminator(discriminator,real_data_onehot,fake_data,d_optimizer)

			# 2. Train Generator every 'gen_train_freq' steps
			if global_step % gen_train_freq == 0:
				for _ in range(gen_steps):
Example #34
0
import numpy as np
from confusionmatrix import ConfusionMatrix
from layers import *
from utils import onehot


# Load Mnist data and convert vector represention to one-hot
data = np.load('mnist.npz')
num_classes = 10
x_train = data['X_train']
targets_train = data['y_train']
targets_train = onehot(targets_train, num_classes)
num_samples, num_inputs = x_train.shape
num_hidden_units = 100

batch_size = 200
num_epochs = 50
learning_rate = 0.001
num_samples = x_train.shape[0]
num_batches = num_samples // batch_size




ffn = FeedforwardNetwork()
ffn.add(LinearLayer(num_inputs, num_hidden_units))
ffn.add(ReluActivationLayer())
ffn.add(LinearLayer(num_hidden_units, num_classes))
ffn.add(SoftmaxActivationLayer())
losslayer = CrossEntropyLoss()