Ejemplo n.º 1
0
    def train_leam(self,gpu_id=0,epoch_size=1000,max_epoch=15,batch_size=128):
        train_x, test_x, train_y, test_y = train_test_split(self.id_texts, self.labels_multi, random_state=0)
        train = datasets.TupleDataset(train_x, train_y)
        test = datasets.TupleDataset(test_x, test_y)

        train_iter = iterators.SerialIterator(train, batch_size)
        test_iter = iterators.SerialIterator(test, batch_size, False, False)

        
        models = model.LEAM(self.vocab_size, self.n_class)
        models.embed.W.copydata(Variable(self.embedding_vectors))
        models.embed_class.W.copydata(Variable(self.value_mean))

        if gpu_id >= 0:
            models.to_gpu(gpu_id)


        models = L.Classifier(models, lossfun=self.lossfun_multi_leam, accfun=self.auc_fun_leam)
        optimizer = optimizers.Adam(alpha=0.001)
        optimizer.setup(models)
        updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id)
        trainer = training.Trainer(updater, (epoch_size * max_epoch, 'iteration'), out='./appr-leam')

        trainer.extend(extensions.LogReport(trigger=(epoch_size, 'iteration')))
        trainer.extend(extensions.snapshot(filename='snapshot_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration'))
        trainer.extend(extensions.snapshot_object(models.predictor, filename='models_iteration-{.updater.iteration}'), trigger=(epoch_size, 'iteration'))
        trainer.extend(extensions.Evaluator(test_iter, models, device=gpu_id), trigger=(epoch_size, 'iteration'))
        trainer.extend(extensions.observe_lr(), trigger=(epoch_size, 'iteration'))
        trainer.extend(extensions.PrintReport(['iteration', 'main/loss', 'validation/main/loss','main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=(epoch_size, 'iteration'))
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.ProgressBar(update_interval=1000))
        trainer.run()
Ejemplo n.º 2
0
def get_fashion_mnist():
    _retrieve_fashion_mnist()
    train = np.load(os.path.join(_save_dir, _train_name))
    test = np.load(os.path.join(_save_dir, _test_name))
    train_dataset = datasets.TupleDataset(train['x'].astype(np.float32), train['y'].astype(np.int32))
    test_dataset = datasets.TupleDataset(test['x'].astype(np.float32), test['y'].astype(np.int32))
    return train_dataset, test_dataset
Ejemplo n.º 3
0
    def __init__(self, data_name):
        # get data from chainer
        # images are normalized to [0.0, 1.0]
        if data_name == 'mnist':
            train_tuple, test_tuple = datasets.get_mnist(ndim=3)
        elif data_name == 'fmnist':
            train_tuple, test_tuple = get_fmnist(withlabel=True,
                                                 ndim=3,
                                                 scale=1.0)
        elif data_name == 'cifar10':
            train_tuple, test_tuple = datasets.get_cifar10()
        else:
            raise ValueError('Invalid data')

        self.data_name = data_name

        # preprocess
        # convert to array
        train_image, train_label = concat_examples(train_tuple)
        test_image, test_label = concat_examples(test_tuple)

        # set images to [-0.5, 0.5]
        self.train_image = np.array(train_image, dtype=np.float32) - 0.5
        self.train_label = np.array(train_label, dtype=np.int32)
        self.test_image = np.array(test_image, dtype=np.float32) - 0.5
        self.test_label = np.array(test_label, dtype=np.int32)

        # re-convert to TupleDataset
        self.train_tuple = datasets.TupleDataset(self.train_image,
                                                 self.train_label)
        self.test_tuple = datasets.TupleDataset(self.test_image,
                                                self.test_label)
Ejemplo n.º 4
0
def make_model(record_X, record_y, mode):
    X = np.array(record_X, dtype=np.float32)  # 全ゲームの棋譜リスト
    y = np.array(record_y, dtype=np.int32)  # 全ゲームの石の置き場の正解リスト

    train = datasets.TupleDataset(X, y)
    train_iter = iterators.SerialIterator(train, batch_size=100)

    model = Classifier(MLP())  #MLPという手法で学習
    #ここもSDGじゃなくてAdamとかに変えたら精度あがるかも。時間があったら試したい。
    optimizer = optimizers.SGD()
    optimizer.setup(model)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (150, 'epoch'), out='result')
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                              x_key='epoch',
                              file_name='accuracy.png',
                              trigger=(1, 'epoch')))
    trainer.run()

    if mode == "black":
        serializers.save_npz('black.npz', model)
        files.download('black.npz')
    elif mode == "white":
        serializers.save_npz('white.npz', model)
        files.download('white.npz')
    else:
        serializers.save_npz('model.npz', model)
        files.download('model.npz')
Ejemplo n.º 5
0
def main():
    mushroomsfile = "mushrooms.csv"

    data_array = np.genfromtxt(mushroomsfile,
                               delimiter=',',
                               dtype=str,
                               skip_header=1)
    n_data, n_featrue = data_array.shape
    for col in range(n_featrue):
        data_array[:, col] = np.unique(data_array[:, col],
                                       return_inverse=True)[1]
    X = data_array[:, 1:].astype(np.float32)
    Y = data_array[:, 0].astype(np.int32)[:, None]
    train, test = datasets.split_dataset_random(datasets.TupleDataset(X, Y),
                                                int(n_data * 0.7))
    train_iter = ch.iterators.SerialIterator(train, 100)
    test_iter = ch.iterators.SerialIterator(test,
                                            100,
                                            repeat=False,
                                            shuffle=False)
    model = L.Classifier(MLP(44, 1),
                         lossfun=F.sigmoid_cross_entropy,
                         accfun=F.binary_accuracy)
    optimizer = ch.optimizers.SGD().setup(model)
    updater = training.StandardUpdater(train_iter, optimizer, device=-1)
    trainer = training.Trainer(updater, (50, 'epoch'), out='result')
    trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
    trainer.extend(extensions.DumpGraph('main/loss'))
    trainer.extend(
        extensions.snapshot(filename='trainer_epoch_{.updater.epoch}'),
        trigger=(10, 'epoch'))

    trainer.extend(extensions.LogReport())
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.run()

    x, t = test[np.random.randint(len(test))]

    predict = model.predictor(x[None]).array
    predict = predict[0][0]

    if predict >= 0:
        print('Predicted Poisonous, Actual ' + ['Edible', 'Poisonous'][t[0]])
    else:
        print('Predicted Edible, Actual ' + ['Edible', 'Poisonous'][t[0]])
Ejemplo n.º 6
0
def load(fname, V, C, T=700, L=57):
    a = numpy.load(fname)
    N = len(a)
    a = a.reshape(N, T, L)
    acids_raw = a[..., :V + 1]
    structure_labels_raw = a[..., V + 1:V + C + 2]

    acids = numpy.full((N, T), numpy.nan, dtype=numpy.int32)
    structure_labels = numpy.full((N, T), numpy.nan, dtype=numpy.int32)

    for i in six.moves.range(V):
        acids[acids_raw[..., i] == 1.0] = i
    acids[acids_raw[..., V] == 1.0] = -1
    assert not (numpy.isnan(acids)).any()

    for i in six.moves.range(C):
        structure_labels[structure_labels_raw[..., i] == 1.0] = i
    structure_labels[structure_labels_raw[..., C] == 1.0] = -1
    assert not (numpy.isnan(structure_labels)).any()

    # To reduce the computational time, we reduce the time step
    # in this example.
    acids = acids[..., :100]
    structure_labels = structure_labels[..., :100]

    # As opposed to the original papere, we do not use protein profiles
    # nor solvency to simplify the model.
    # profiles = a[..., -V - 1:-1].astype(numpy.float32)
    # absolute_solvent_labels = a[..., V + C + 4].astype(numpy.int32)
    # relative_solvent_labels = a[..., V + C + 5].astype(numpy.int32)
    # profiles = profiles[:, :100, :]
    # absolute_solvent_labels = absolute_solvent_labels[..., :100]
    # relative_solvent_labels = relative_solvent_labels[..., :100]

    return datasets.TupleDataset(acids, structure_labels)
Ejemplo n.º 7
0
def conv(data):
    # 抽出値を初期化
    X = []

    Y = []

    # numpy変換用
    data_array = data.as_matrix()

    # 説明変数(20)と目的変数(1)を分割する。
    for j in data_array:
        x_split = np.hsplit(j, [10, 11])
        X.append(x_split[0].astype(np.float32))
        Y.append(x_split[1].astype(np.int32))

    X = np.array(X)
    Y = np.ndarray.flatten(np.array(Y))

    # テストデータのスケーリング
    scaler = MinMaxScaler()
    scaler.fit(X)
    X = scaler.transform(X)

    # 訓練データ、検証データを無作為に8:2で分割する。
    train, test = datasets.split_dataset_random(datasets.TupleDataset(X, Y),
                                                32000)

    # 訓練データ、検証データを返却する
    return train, test
Ejemplo n.º 8
0
def create_dataset_board_and_hand_and_result():
    '''
    input looks like ([board ... hand ...], win/lost)
    '''
    train_raw = []
    score_raw = []
    for i in range(10000):
        g = Game()
        g.play()
        result, winner = g.result()
        if result == TicTocToe.WIN:
            for b, h in g.playing.record(winner):
                b_num = [0 if v is None else 1 if v else -1 for v in b]
                r_num = [0] * len(b)
                r_num[h] = np.float32(1)
                train_raw.append(np.array(b_num + r_num, np.float32))
                score_raw.append(W_WIN)
            for b, h in g.playing.record(
                    TicTocToe.SIDE_O if winner ==
                    TicTocToe.SIDE_X else TicTocToe.SIDE_X):
                b_num = [0 if v is None else 1 if v else -1 for v in b]
                r_num = [0] * len(b)
                r_num[h] = np.float32(1)
                train_raw.append(np.array(b_num + r_num, np.float32))
                score_raw.append(W_LOST)
    return datasets.TupleDataset(train_raw, score_raw)
Ejemplo n.º 9
0
def get_soft_label(model, image):
    """This function calculates soft labelings of input image data.
    
    Args:
        model (ClassiferNN)   : model which calculates the soft labelings
        image (numpy ndarray) : image data which is fed into the model
        
    Return:
        TupleDataset (image, soft label) located in GPU if possible
    """

    xp_image = xp.array(image)
    iterator = iterators.SerialIterator(xp_image,
                                        batch_size=400,
                                        shuffle=False,
                                        repeat=False)

    soft_label = xp.empty((0, model.n_classes), dtype=xp.float32)
    y = []

    # loops until iterator gets end
    while iterator.epoch == 0:
        x = iterator.next()
        y.append(model.predict_proba(x).data)

    soft_label = xp.concatenate(y, axis=0)
    tuple_data = datasets.TupleDataset(xp_image, soft_label)

    return tuple_data
Ejemplo n.º 10
0
def train():
    epoch_num = 100
    batchsize = 64
    gpu_id = -1

    train = load_images('./train_data')
    train = datasets.TupleDataset(train, train)
    train_iter = chainer.iterators.SerialIterator(train, batchsize)

    model = L.Classifier(Autoencoder(256 * 256, 128),
                         lossfun=F.mean_squared_error)
    model.compute_accuracy = False

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
    trainer = training.Trainer(updater, (epoch_num, 'epoch'), out='result')

    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.ProgressBar())
    trainer.run()

    model.to_cpu()
    serializers.save_npz('sushi_hotdog.model', model)
Ejemplo n.º 11
0
def sample_5():
    # make a iterator
    x = X.copy()
    y = Y.copy()
    dataset = D.TupleDataset(x, y)
    train_iter = Iter.SerialIterator(dataset, batch_size=DATA_SIZE, shuffle=False)

    # create a network
    model = TwoLayerNet(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    loss_calculator = LossCalculator(model)

    # create an optimizer
    optimizer = P.SGD(lr=LEARNING_RATE)

    # connect the optimizer with the network
    optimizer.setup(loss_calculator)

    # make a updater
    updater = training.StandardUpdater(train_iter, optimizer)

    # make a trainer
    trainer = training.Trainer(updater, (EPOCHS, 'epoch'), out='result')
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time']))

    trainer.run()
Ejemplo n.º 12
0
def train():
    # _/_/_/ load dataset

    xs = np.load(XS_PATH)
    ys = np.load(YS_PATH)

    # _/_/_/ split dataset and make iterators

    # for training
    train_dataset = D.TupleDataset(xs, ys)
    train_iter = Iter.SerialIterator(train_dataset,
                                     batch_size=BATCH_SIZE,
                                     shuffle=False)

    # _/_/_/ create a network

    model = MyNet(LENGTH_SCALE, INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE,
                  DROPOUT_RATIO)
    loss_calculator = LossCalculator(model)

    # _/_/_/ create an optimizer

    # optimizer = P.SGD(lr=LEARNING_RATE)
    optimizer = P.Adam()
    # ptimizer = P.RMSprop(lr=LEARNING_RATE)

    # _/_/_/ connect the optimizer with the network

    optimizer.setup(loss_calculator)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=WEIGHT_DECAY))

    # _/_/_/ make a updater

    updater = training.StandardUpdater(train_iter, optimizer)

    # _/_/_/ make a trainer

    epoch_interval = (1, 'epoch')
    model_interval = (EPOCHS, 'epoch')

    trainer = training.Trainer(updater, (EPOCHS, 'epoch'), out=OUTPUT_DIR_PATH)
    # trainer.extend(extensions.ExponentialShift('lr', 0.99), trigger=epoch_interval)

    # save a trainer
    trainer.extend(extensions.snapshot(), trigger=model_interval)

    # save a model
    trainer.extend(extensions.snapshot_object(model, MODEL_NAME),
                   trigger=model_interval)

    trainer.extend(extensions.LogReport(trigger=epoch_interval))
    trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss']),
                   trigger=epoch_interval)
    trainer.extend(
        extensions.PlotReport(['main/loss'], 'epoch', file_name='loss.png'))
    # _/_/_/ run

    trainer.run()
Ejemplo n.º 13
0
def temp_data_prep():

    X = np.random.rand(40, 4, 256, 256, 256).astype(np.float32)

    Y = np.random.rand(40, 4).astype(np.float32)

    dataset = datasets.TupleDataset(X, Y)

    return dataset
Ejemplo n.º 14
0
def objective(trial):

    # Create model instance
    model = TripletClassifier(
        MyNeuralNetwork(n_mid_units=mid_size, n_out=out_size))
    optimizer = create_optimizer(trial, model)
    batchsize = trial.suggest_int('batchsize', 10, len(y))
    epoch = trial.suggest_int('epoch', 10, 50)

    # Assign GPU or CPU to the model
    if gpu_id >= 0:
        cuda.get_device(gpu_id).use()
        model.to_gpu(gpu_id)

    # Define Iterator
    train_set = datasets.TupleDataset(train_triplet, train_label)
    test_set = datasets.TupleDataset(test_triplet, test_label)
    train_iter = iterators.SerialIterator(train_set, batchsize)
    test_iter = iterators.SerialIterator(test_set,
                                         batchsize,
                                         repeat=False,
                                         shuffle=False)

    # Define Trainer
    updater = chainer.training.StandardUpdater(train_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (epoch, 'epoch'))
    trainer.extend(chainer.training.extensions.Evaluator(test_iter, model))
    log_report_extension = chainer.training.extensions.LogReport(log_name=None)
    trainer.extend(
        chainer.training.extensions.PrintReport([
            'epoch', 'main/squared_error', 'validation/main/squared_error',
            'main/abs_error', 'validation/main/abs_error', 'elapsed_time'
        ]))
    trainer.extend(log_report_extension)

    trainer.run()

    log_last = log_report_extension.log[-1]
    for key, value in log_last.items():
        trial.set_user_attr(key, value)

    val_err = log_report_extension.log[-1]['validation/main/squared_error']
    return val_err
Ejemplo n.º 15
0
def load_omniglot(section, git_root_dir, resize=None, ndim=2, verbose=False):
    """
    ----
    Args:
        section (str):
            ['train-small1', 'train-small2', 'train', 'eval', 'all'] のいずれか
            'all' は 'train' + 'eval' です.
            'train-small1' や 'train-small2' は 'train' の部分集合です.
        git_root_dir (str or Path):
            omniglot の公式リポジトリを clone したディレクトリを指定.
            あらかじめ,"python" フォルダ内の zip ファイルを全て展開しておく必要があります.
        resize (None or tuple):
            画像のリサイズ指定.例えば Finn+ 2017 では (28, 28).None の場合は (105, 105)
        ndim (int):
            各画像データの array の ndim 指定.[1, 2, 3] のいずれか.
    Returns:
        (chainer.datasets.TupleDataset)
            クラスラベルは 0 始まりの連続した int32 です
    """
    assert section in ['train-small1', 'train-small2', 'train', 'eval', 'all']
    assert ndim in [1, 2, 3]
    images_dir = Path(git_root_dir) / "python" / {
        "train-small1": "images_background_small1",
        "train-small2": "images_background_small2",
        "train": "images_background",
        "eval": "images_evaluation",
        "all": "images_*[dn]",
    }[section]

    class_dirs = sorted(glob.glob(str(images_dir / "*/*/")))
    if verbose:
        print('{} classes found.'.format(len(class_dirs)))

    Xs, ys = [], []
    for i, cl in enumerate(class_dirs):
        pngs = sorted(glob.glob(str(Path(cl) / "*.png")))
        for p in pngs:
            img = Image.open(p, 'r')
            if resize is not None:
                img = img.resize(resize, resample=Image.LANCZOS)

            # オリジナルは bool 形式かつ「白地に黒」
            # logical_not するのは「黒地に白」に色反転するため
            ary = np.logical_not(np.array(img))
            if ndim == 3:
                ary = ary[None, :, :]
            elif ndim == 1:
                ary = ary.reshape(-1)

            Xs.append(ary)
            ys.append(i)

    Xs = np.array(Xs, dtype=np.float32)
    ys = np.array(ys, dtype=np.int32)
    return datasets.TupleDataset(Xs, ys)
    def check_tuple_dataset(self, x0, x1):
        td = datasets.TupleDataset(x0, x1)
        self.assertEqual(len(td), len(x0))

        for i in range(len(x0)):
            example = td[i]
            self.assertEqual(len(example), 2)

            numpy.testing.assert_array_equal(
                cuda.to_cpu(example[0]), cuda.to_cpu(x0[i]))
            numpy.testing.assert_array_equal(
                cuda.to_cpu(example[1]), cuda.to_cpu(x1[i]))
Ejemplo n.º 17
0
def mnist_data_for_chainer(csv_loaded_data):
    images = np.array(csv_loaded_data["images"])
    labels = np.array(csv_loaded_data["labels"])

    X = []
    Y = []
    n = len(images)
    for i in range(n):
        d = np.array(images[i].reshape(28, 28), dtype=np.float32)
        X.append([d])
        Y.append(labels[i].astype(np.int32))
    X = np.array(X)
    Y = np.array(Y)
    return datasets.TupleDataset(X, Y)
Ejemplo n.º 18
0
def training_vae(data,
                 hidden,
                 max_epoch,
                 batchsize,
                 act_func='sigmoid',
                 gpu_device=0,
                 loss_function='mse',
                 out_dir='result'):

    # 入力サイズ
    inputs = data.shape[1]
    layers = [inputs] + hidden

    # モデルの定義
    vae = VariationalAutoEncoder(layers, act_func=act_func)
    model = VariationalAutoencoderTrainer(vae,
                                          beta=1.0,
                                          k=1,
                                          loss_function=loss_function)
    opt = optimizers.Adam()
    opt.setup(model)

    # データの形式を変換する
    train = datasets.TupleDataset(data, data)
    train_iter = iterators.SerialIterator(train, batchsize)

    # 学習ループ
    updater = training.StandardUpdater(train_iter, opt, device=gpu_device)
    trainer = training.Trainer(updater, (max_epoch, 'epoch'), out=out_dir)
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/reconst_loss', 'main/kld',
            'elapsed_time'
        ]))
    trainer.run()

    # GPUを使っていた場合CPUに戻す
    if -1 < gpu_device:
        vae.to_cpu()

    return vae
Ejemplo n.º 19
0
def create_dataset(assays, smiles, featurizer):
    # Merge assay results
    assays = pubchem.concat_assays(assays)
    df = pd.merge(assays, smiles, on='PUBCHEM_SID', how='inner')

    # Convert smiles to fingerprint and drop substances
    # that cannot be converted to fingerprint.
    print('Creating feature vectors from SMILES...')
    df['FINGERPRINT'] = df['SMILES'].apply(featurizer)
    df = df[df['FINGERPRINT'] != -1]
    fps = np.array(list(df['FINGERPRINT'].values), dtype=np.float32)

    # Convert outcome to binary value
    assays = df.drop(['PUBCHEM_SID', 'SMILES', 'FINGERPRINT'], axis=1).values
    assays[assays == 'Active'] = 0
    assays[assays == 'Inactive'] = 1
    assays[(assays != 0) & (assays != 1)] = -1
    assays = assays.astype(np.int32)

    assert len(fps) == len(assays)
    return D.TupleDataset(fps, assays)
def main():
    epoch = 100
    batch_size = 1
    n_in = 4

    data = generate_data(n_in)

    # Convert to set of tuples (target, label).
    train = datasets.TupleDataset(*data)

    model = L.Classifier(MyModel(4, 1), lossfun=F.mean_squared_error)

    # Set compute_accuracy=False when using MSE.
    model.compute_accuracy = False

    # Define optimizer (Adam, RMSProp, etc)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Define iterators.
    train_iter = chainer.iterators.SerialIterator(train, batch_size)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (epoch, 'epoch'))

    # Helper functions (extensions) to monitor progress on stdout.
    report_params = [
        'epoch',
        'main/loss',
    ]
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(report_params))
    trainer.extend(extensions.ProgressBar())

    # Run trainer
    trainer.run()

    # Should print out value close to W.
    print(model.predictor(np.ones((1, n_in)).astype(np.float32)).data)
Ejemplo n.º 21
0
def train():
    # train_txt = "/media/common-ns/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/CV01.txt"
    train_dir = "/media/common-ns/New Volume/reseach/Dataset/OU-ISIR_by_Setoguchi/Gallery/CV01(Gallery)/*"
    train = load_OULP(path_dir=train_dir)

    # print(train[0])

    # 教師データ
    # train = train[0:1000]
    train = [i[0] for i in train]  # dataのパスとラベルのうち、dataだけ抜き出す
    train = datasets.TupleDataset(train, train)  # 同じパス画像のペアから、dataに変換してタプルにする

    batch_size = 195
    train_iter = chainer.iterators.SerialIterator(train, batch_size=batch_size)

    #model = L.Classifier(Autoencoder(), lossfun=F.mean_squared_error)
    model = L.Classifier(CAE(), lossfun=sce_loss)
    model.compute_accuracy = False
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    updater = StandardUpdater(train_iter, optimizer, device=0)
    trainer = Trainer(
        updater,
        (1000, 'epoch'),
        out="result",
    )
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))
    trainer.extend(extensions.snapshot(), trigger=(200, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        target=model, filename='model_snapshot_{.updater.iteration}'),
                   trigger=(250, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
    serializers.save_npz(
        "/home/common-ns/setoguchi/chainer_files/Convolutional_Auto_Encoder/CAE_FC_model",
        model)
Ejemplo n.º 22
0
def train(model, number_of_epoch, once=False, show_hidden_layer=False):
    #optimizer = optimizers.SGD()
    #optimizer = optimizers.SGD(lr = 0.5)
    optimizer = optimizers.Adam(alpha=0.1)
    #optimizer = optimizers.MomentumSGD(lr = 0.1, momentum = 0.9)
    optimizer.setup(model)

    train = datasets.TupleDataset(data_axis, data_value)
    train_iter = iterators.SerialIterator(train,
                                          batch_size=number_of_data,
                                          shuffle=True)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (number_of_epoch, 'epoch'),
                               out='result')

    if (once):
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))

    p = model.predictor
    if (not once):
        with open("./output/initial-weight.txt", mode='a') as f:
            for (a, b) in zip(p.l1.W.data.reshape((number_of_hidden_nodes, )),
                              p.l1.b.data):
                f.write("{0} {1}\n".format(a, b))

    trainer.run()

    if (not once):
        with open("./output/final-weight.txt", mode='a') as f:
            for (a, b) in zip(p.l1.W.data.reshape((number_of_hidden_nodes, )),
                              p.l1.b.data):
                f.write("{0} {1}\n".format(a, b))

    if (once):
        plotGraph(model, show_hidden_layer)
    return
Ejemplo n.º 23
0
def load_svhn(path, fmt='TupleDataset', image_size=[70, 30], num=None):
    assert fmt in ['TupleDataset', 'dict']
    print('Loading digitStruct.mat...')
    dsm = load_digitStruct(str(Path(path) / 'digitStruct.mat'), num=num)
    print('    Done.')
    print('Loading images...')
    xs = []
    for name in tqdm(dsm['name']):
        xs.append(np.array(Image.open(str(Path(path) / name)).resize(image_size)))
    print('    Done.')
    xs = np.asarray(xs, dtype=np.float32).transpose([0, 3, 1, 2]) / 256  # (batch, color, height, width)
    ys = [''.join(map(str, map(int, b['label']))) for b in dsm['bbox']]

    if fmt == 'dict':
        return {
            "xs": xs,
            "ys": ys,
            "digitStruct": dsm
        }
    elif fmt == 'TupleDataset':
        return datasets.TupleDataset(xs, ys)
    else:
        raise ValueError
Ejemplo n.º 24
0
    print('depth must be (9, 17, 29)')
    sys.exit()

print('VDCNN setting: emb_dim={} n_out={}, depth={}'.format(
    len(char2id) + 1, kind,
    sum(depth) * 2 + 1))

gpu_id = args.gpu

model = VDCNN(len(char2id) + 1, kind, depth)
if gpu_id >= 0:
    model.to_gpu(gpu_id)

print(mode, train_x.shape, train_y.shape, test_x.shape, test_y.shape)

train = datasets.TupleDataset(train_x, train_y)
test = datasets.TupleDataset(test_x, test_y)

batch_size = 128

train_iter = iterators.SerialIterator(train, batch_size)
test_iter = iterators.SerialIterator(test, batch_size, False, False)

epoch_size = 5000
max_epoch = 15

model = L.Classifier(model)

optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)

optimizer.setup(model)
Ejemplo n.º 25
0
import numpy as np

import matplotlib
matplotlib.use('Agg')

mushroomsfile = 'mushrooms.csv'
data_array = np.genfromtxt(mushroomsfile,
                           delimiter=',',
                           dtype=str,
                           skip_header=1)
for col in range(data_array.shape[1]):
    data_array[:, col] = np.unique(data_array[:, col], return_inverse=True)[1]

X = data_array[:, 1:].astype(np.float32)
Y = data_array[:, 0].astype(np.int32)[:, None]
train, test = datasets.split_dataset_random(datasets.TupleDataset(X, Y),
                                            int(data_array.shape[0] * .7))

train_iter = ch.iterators.SerialIterator(train, 100)
test_iter = ch.iterators.SerialIterator(test, 100, repeat=False, shuffle=False)


# Network definition
def MLP(n_units, n_out):
    layer = ch.Sequential(L.Linear(n_units), F.relu)
    model = layer.repeat(2)
    model.append(L.Linear(n_out))

    return model

Ejemplo n.º 26
0
    Y = iris.target
    Y = Y.flatten().astype(np.int32)  # NOTICE: calling flatten
    return (X, Y)


verbose = True
report_params = [
    'epoch',
    'main/loss',
    'validation/main/loss',
    'main/accuracy',
    'validation/main/accuracy',
]

iris_data = iris_data()
train, test = datasets.split_dataset_random(datasets.TupleDataset(*iris_data),
                                            100)
train_iter = iterators.SerialIterator(train, batch_size=10, shuffle=True)
test_iter = iterators.SerialIterator(test,
                                     batch_size=1,
                                     repeat=False,
                                     shuffle=False)

model = L.Classifier(MyModel())
optimizer = optimizers.Adam()
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

if verbose:
 def test_tuple_dataset_overrun(self):
     td = datasets.TupleDataset(self.x0, self.x1)
     with self.assertRaises(IndexError):
         td[3]
 def test_tuple_dataset_len_mismatch(self):
     with self.assertRaises(ValueError):
         datasets.TupleDataset(self.x0, self.z0)
Ejemplo n.º 29
0
from chainer import training
from chainer.training import extensions

import numpy as np

mushroomsfile = 'mushrooms.csv'

data_array = np.genfromtxt(
    mushroomsfile, delimiter=',', dtype=str, skip_header=1)
for col in range(data_array.shape[1]):
    data_array[:, col] = np.unique(data_array[:, col], return_inverse=True)[1]

X = data_array[:, 1:].astype(np.float32)
Y = data_array[:, 0].astype(np.int32)[:, None]
train, test = datasets.split_dataset_random(
    datasets.TupleDataset(X, Y), int(data_array.shape[0] * .7))

train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(
    test, 100, repeat=False, shuffle=False)


# Network definition
class MLP(chainer.Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the input size to each layer inferred from the layer before
            self.l1 = L.Linear(n_units)  # n_in -> n_units
            self.l2 = L.Linear(n_units)  # n_units -> n_units
            self.l3 = L.Linear(n_out)  # n_units -> n_out
Ejemplo n.º 30
0
def main():
    
    # オプション処理
    parser = argparse.ArgumentParser(description='話者認識モデルの学習')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--datadir', '-d', default='train',
                        help='学習データのディレクトリ')
    args = parser.parse_args()

    sys.stderr.write('GPU: {}\n'.format(args.gpu))
    sys.stderr.write('# minibatch-size: {}\n'.format(args.batchsize))
    sys.stderr.write('# epoch: {}\n'.format(args.epoch))

    trainf = []
    label = 0
    print('loading dataset')
    mfcc = os.listdir(args.datadir)
    for i in [f for f in mfcc if ('mfcc' in f)]:
        trainf.append([os.path.join(args.datadir, i), label])            
        label += 1
    #print('{}'.format(trainf))
    input = []
    target = []
    if not os.path.exists(args.out):
        os.mkdir(args.out)
    log = open(args.out+"/class_log",'w')
    for file in trainf:
        print('{}'.format(file))
        log.write('{},{}\n'.format(file[0].strip().split("/")[-1].split(".")[0],file[1]))
        with open(file[0], 'r') as f:
            lines = f.readlines()         
        for i, l in enumerate(lines):
            #print('{}'.format(len(lines)))
            tmp = []
            flag = False
            for j in range(3): # 3フレームで評価
                if i + 2 < len(lines):
                    frame = lines[i+j].strip().split(" ")
                    #print("i:{},file:{}".format(i,file))
                    np.array(frame,dtype=np.float32)
                    tmp.extend(frame)
                    flag = True
                    #print('{}'.format(tmp))
            if flag:
                input.append(tmp)
                target.append(file[1])
            #    print('{}'.format(input))
    log.close()
    #print('{}'.format(input))  
    #sys.exit()
    #print(np.array(input))
    input = np.array(input).astype(np.float32)
    target = np.array(target).astype(np.int32)
    #print('{},{}'.format(len(input), len(target)))
    train = D.TupleDataset(input, target)
    #sys.stderr.write(train)
    #print(len(input)*0.9)
    train, test = D.split_dataset_random(train, int(len(input)*0.9))
    print('{},{}'.format(len(train), len(test)))
    #sys.exit()
        
    model = L.Classifier(DNN(label)) # CNNにする
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)
    
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    # trainer.extend(
    #     extensions.PlotReport('main/loss', 'epoch', file_name='loss.png'))
    # trainer.extend(
    #     extensions.PlotReport('main/accuracy', 'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    #trainer.extend(extensions.PrintReport(
    #    ['epoch', 'main/loss', 'main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    print('training start!')
    trainer.run()
    # モデルをCPU対応へ
    model.to_cpu()
    # 保存
    modelname = args.out + "/speaker.model"
    print('save the trained model: {}'.format(modelname))
    chainer.serializers.save_npz(modelname, model)