Ejemplo n.º 1
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 dataset='cifar10',
                 noise_type='symmetric',
                 noise_rate=0.5,
                 split_per=0.9,
                 random_seed=1,
                 num_class=10):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train

        original_images = np.load('data/cifar10/train_images.npy')
        original_labels = np.load('data/cifar10/train_labels.npy')

        # clean images and noisy labels (training and validation)
        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split(
            original_images, original_labels, dataset, noise_type, noise_rate,
            split_per, random_seed, num_class)

        if self.train:
            self.train_data = self.train_data.reshape((-1, 3, 32, 32))
            self.train_data = self.train_data.transpose((0, 2, 3, 1))

        else:
            self.val_data = self.val_data.reshape((-1, 3, 32, 32))
            self.val_data = self.val_data.transpose((0, 2, 3, 1))
Ejemplo n.º 2
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 noise_rate=0.5,
                 split_per=0.9,
                 random_seed=1,
                 num_class=10):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train

        original_images = np.load('data/cifar10/train_images.npy')
        original_labels = np.load('data/cifar10/train_labels.npy')
        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split(
            original_images, original_labels, noise_rate, split_per,
            random_seed, num_class)
        if self.train:
            self.train_data = self.train_data.reshape((45000, 3, 32, 32))
            self.train_data = self.train_data.transpose((0, 2, 3, 1))

        else:
            self.val_data = self.val_data.reshape((5000, 3, 32, 32))
            self.val_data = self.val_data.transpose((0, 2, 3, 1))
Ejemplo n.º 3
0
def dev_loss(args):
    '''
    调参优化目标函数,使用了hold out验证
    '''
    tune_args = Parameters(**args)
    if tune_args.get('batch_size'):
        tune_args.batch_size = int(tune_args.batch_size)
    net_params.update(tune_args)
    train_params.update(tune_args)
    logger.info(f'\n\n \t <<<<<< ** parameters: {tune_args.to_str()} >>>>>>\n')

    HOLD_OUT = 5  # hold out 次数
    min_dev_loss = []
    max_dev_acc = []
    epochs = []
    for _ in range(HOLD_OUT):
        time_str = str(time.time()).replace('.', '')[:11]
        train_params.set('model_name',
                         f'{time_str}__{tune_args.to_str()}')  # 模型保存位置
        # 数据集划分
        train_x, train_y, val_x, val_y, _, _ = tools.dataset_split(
            texts, labels, train_percent=train_percent)
        train_params.extend({
            'train_x': train_x,  # 训练数据
            'train_y': train_y,  # 训练数据标记
            'dev_x': val_x,  # 验证数据
            'dev_y': val_y,  # 验证数据标记
        })

        model = dm.DNNModel(net_params)
        model.build()

        min_dev_loss_, max_dev_acc_, epoch = dm.train(model, train_params)
        min_dev_loss.append(min_dev_loss_)
        max_dev_acc.append(max_dev_acc_)
        epochs.append(epoch)

    global lowerst_dev_loss
    if lowerst_dev_loss > np.average(min_dev_loss):  # 仅保留最好的参数对应的模型,其余删除
        lowerst_dev_loss = np.average(min_dev_loss)
        tools.rm_dirs('./checkpoints/best', tune_args.to_str(), False)
    else:
        tools.rm_dirs('./checkpoints/best', tune_args.to_str(), True)

    logger.info(
        f'\n\n\t >>>>>> Acc   {str(max_dev_acc)}\t{np.average(max_dev_acc)}\
             \n\t >>>>>> Loss  {str(min_dev_loss)}\t{np.average(min_dev_loss)}\n\t >>>>>> Epoch {str(epochs)}\n'
    )

    return {
        'status': STATUS_OK,
        'loss': np.average(min_dev_loss),
        'epochs': epochs,
        'metrics': {
            'accuracys': max_dev_acc,
            'accuracy': np.average(max_dev_acc),
            'losses': min_dev_loss
        }
    }
Ejemplo n.º 4
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 noise_rate=0.5,
                 split_per=0.9,
                 random_seed=1,
                 num_class=10):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        original_images = np.load('data/mnist/train_images.npy')
        original_labels = np.load('data/mnist/train_labels.npy')
        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split(
            original_images, original_labels, noise_rate, split_per,
            random_seed, num_class)
        pass
Ejemplo n.º 5
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 dataset='fmnist',
                 noise_type='symmetric',
                 noise_rate=0.5,
                 split_per=0.9,
                 random_seed=1,
                 num_class=10):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        original_images = np.load(
            'data/fashionmnist/train_images.npy').reshape((-1, 1, 28, 28))
        original_labels = np.load('data/fashionmnist/train_labels.npy')

        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split(
            original_images, original_labels, dataset, noise_type, noise_rate,
            split_per, random_seed, num_class)
Ejemplo n.º 6
0
    def __init__(self,
                 train=True,
                 transform=None,
                 target_transform=None,
                 dataset='mnist',
                 noise_type='symmetric',
                 noise_rate=0.5,
                 split_per=0.9,
                 random_seed=1,
                 num_class=10):

        self.transform = transform
        self.target_transform = target_transform
        self.train = train
        original_images = np.load('data/mnist/train_images.npy')
        original_labels = np.load('data/mnist/train_labels.npy')

        # clean images and noisy labels (training and validation)
        self.train_data, self.val_data, self.train_labels, self.val_labels = tools.dataset_split(
            original_images, original_labels, dataset, noise_type, noise_rate,
            split_per, random_seed, num_class)
Ejemplo n.º 7
0
# vocab_to_int, embedding_matrix = tools.load_embedding(data_path + "word_embedding_300_new.txt") # 英文词向量
# vocab_to_int, embedding_matrix = tools.load_embedding(data_path + "glove.6B.200d.txt")  # 英文词向量
# vocab_to_int, embedding_matrix = tools.load_embedding(data_path + "sgns.weibo.word.txt") # 中文词向量

net_params.set('embedding_matrix', embedding_matrix)  # 添加词向量矩阵参数
net_params.set('vocab_size', len(vocab_to_int))  # 添加词典大小参数

logger.info(f"dictionary length: {len(vocab_to_int)}")

texts = tools.wordlists2idlists(texts, vocab_to_int)  # 将句子转成词典id列表
texts, labels = tools.drop_empty_texts(texts, labels)  # 清除预处理后文本为空的数据
labels = tools.labels2onehot(labels, net_params.class_num)  # 将类别标记转为one-hot形式
texts = tools.dataset_padding(texts, sent_len=net_params.max_sent_len)  # 左侧补0

# 数据集划分
train_x, train_y, val_x, val_y, test_x, test_y = tools.dataset_split(
    texts, labels, train_percent=train_percent)
train_params.extend({
    'train_x': train_x,  # 训练数据
    'train_y': train_y,  # 训练数据标记
    'dev_x': val_x,  # 验证数据
    'dev_y': val_y,  # 验证数据标记
})
# ================== step2: 构建模型 =================
model = dm.DNNModel(net_params)
model.build()

# ================== step3: 训练 =================
min_dev_loss = dm.train(model, train_params)
logger.info(f' ** The minimum dev_loss is {min_dev_loss}')

# ================== step4: 测试 =================