Example #1
0
 def test_permuted_split_dataset(self):
     original = [1, 2, 3, 4, 5]
     subset1, subset2 = datasets.split_dataset(original, 2, [2, 0, 3, 1, 4])
     self.assertEqual(len(subset1), 2)
     self.assertEqual(subset1[0], 3)
     self.assertEqual(subset1[1], 1)
     self.assertEqual(len(subset2), 3)
     self.assertEqual(subset2[0], 4)
     self.assertEqual(subset2[1], 2)
     self.assertEqual(subset2[2], 5)
Example #2
0
 def test_permuted_split_dataset(self):
     original = [1, 2, 3, 4, 5]
     subset1, subset2 = datasets.split_dataset(original, 2, [2, 0, 3, 1, 4])
     self.assertEqual(len(subset1), 2)
     self.assertEqual(subset1[0], 3)
     self.assertEqual(subset1[1], 1)
     self.assertEqual(len(subset2), 3)
     self.assertEqual(subset2[0], 4)
     self.assertEqual(subset2[1], 2)
     self.assertEqual(subset2[2], 5)
Example #3
0
def make_dataset():
    train, test = get_tox21()
    train, test, atom2id = getAtom2id(train, test)
    train, val = D.split_dataset(train, int(0.9 * len(train)))



    print("size of train set:", len(train))

    print("size of val set:", len(val))
    print('size of test set:', len(test))

    return train, val, test, atom2id
Example #4
0
def __filter_class(dataset, extract_class):
    target_data = []
    target_label = []
    for data, label in dataset:
        if label in extract_class:
            target_data.append(data)
            target_label.append(extract_class.index(label))
    target_data = np.array(target_data)
    target_label = np.array(target_label, dtype=np.int32)

    dataset = tuple_dataset.TupleDataset(target_data, target_label)
    train, val = split_dataset(dataset, int(len(dataset) * 0.9))
    return train, val
Example #5
0
def get_clf_data(use_memory=True,
                 img_size=224,
                 img_type='warp',
                 split_val=0.9):
    def __get_train_list():
        train_list_path = 'data/clf/train_master.tsv'
        dataframe = pd.read_csv(train_list_path,
                                sep='\t',
                                usecols=['file_name', 'category_id'])
        train_data_list = pd.DataFrame(dataframe).to_records(index=False)
        return train_data_list

    def __get_test_list():
        test_list_path = 'data/clf/test.tsv'
        test_data_list = pd.read_csv(test_list_path,
                                     sep='\t',
                                     usecols=['file_name'])
        test_data_list = pd.DataFrame(test_data_list).to_records(index=False)
        test_data_list = [data[0] for data in test_data_list]
        return test_data_list

    img_average = None
    # if use_average_image:
    #     img_average = 'data/clf/ave_%s_%s.png' % (img_size, img_type)

    # train, val
    logging.info('Loading train, val dataset...')
    labeled = WarpedLabeledImageDataset(__get_train_list(),
                                        root='data/clf/train_images_labeled',
                                        use_memory=use_memory,
                                        img_size=img_size,
                                        img_type=img_type,
                                        img_average=img_average)
    logging.info('Done.')

    # test
    logging.info('Loading test dataset...')
    test = WarpedImageDataset(__get_test_list(),
                              root='data/clf/test_images',
                              use_memory=use_memory,
                              img_size=img_size,
                              img_type=img_type,
                              img_average=img_average)
    logging.info('Done.')

    if split_val is not False:
        train, val = split_dataset(labeled, int(len(labeled) * split_val))
        return train, val, test
    else:
        train = labeled
        return train, test
Example #6
0
    def get_subdatasets(self):
        order = []
        if self.split_inter:
            if self.subsampling:
                for i in range(0, len(self)):
                    frame, subject = divmod(i, self.subject_number)
                    if (frame % sum(self.split_ratio)) < self.split_ratio[0]:
                        order.append(i)
            else:
                order = list(
                    range(self.subject_number * self.split_ratio[0] //
                          sum(self.split_ratio) * self.frame_number))
        else:
            order = list(
                range(self.frame_number * self.split_ratio[0] //
                      sum(self.split_ratio) * self.subject_number))
        split_at = len(order)
        assert (split_at != 0) & (split_at != len(self))

        order.extend(set(range(len(self))) - set(order))

        return split_dataset(self, split_at, order)
Example #7
0
 def test_split_dataset_invalid_position(self):
     original = [1, 2, 3, 4, 5]
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, -1)
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 5)
Example #8
0
    def train_model(self, datasets):
        parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
        parser.add_argument('--dataset',
                            '-d',
                            default='cifar10',
                            help='The dataset to use: cifar10 or cifar100')
        parser.add_argument('--batchsize',
                            '-b',
                            type=int,
                            default=10,
                            help='Number of images in each mini-batch')
        parser.add_argument('--learnrate',
                            '-l',
                            type=float,
                            default=0.05,
                            help='Learning rate for SGD')
        parser.add_argument('--epoch',
                            '-e',
                            type=int,
                            default=300,
                            help='Number of sweeps over the dataset to train')
        parser.add_argument('--gpu',
                            '-g',
                            type=int,
                            default=-1,
                            help='GPU ID (negative value indicates CPU)')
        parser.add_argument('--out',
                            '-o',
                            default='result',
                            help='Directory to output the result')
        parser.add_argument('--resume',
                            '-r',
                            default='',
                            help='Resume the training from snapshot')
        parser.add_argument('--early-stopping',
                            type=str,
                            help='Metric to watch for early stopping')
        args = parser.parse_args()

        print('GPU: {}'.format(args.gpu))
        print('# Minibatch-size: {}'.format(args.batchsize))
        print('# epoch: {}'.format(args.epoch))

        if args.gpu >= 0:
            chainer.backends.cuda.get_device_from_id(args.gpu).use()
            self.model.to_gpu()

        optimizer = chainer.optimizers.Adam(args.learnrate)
        optimizer.setup(self.model)
        optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))

        train, test = split_dataset(datasets, 80)

        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        stop_trigger = (args.epoch, 'epoch')
        # Early stopping option
        if args.early_stopping:
            stop_trigger = triggers.EarlyStoppingTrigger(
                monitor=args.early_stopping,
                verbose=True,
                max_trigger=(args.epoch, 'epoch'))

        # Set up a trainer
        updater = training.updaters.StandardUpdater(
            train_iter,
            optimizer,
            device=args.gpu,
            loss_func=mean_squared_error)
        trainer = training.Trainer(updater, stop_trigger, out=args.out)

        # Evaluate the model with the test dataset for each epoch
        trainer.extend(
            extensions.Evaluator(test_iter, self.model, device=args.gpu))

        # Reduce the learning rate by half every 25 epochs.
        trainer.extend(extensions.ExponentialShift('lr', 0.5),
                       trigger=(25, 'epoch'))

        # Dump a computational graph from 'loss' variable at the first iteration
        # The "main" refers to the target link of the "main" optimizer.
        trainer.extend(extensions.dump_graph('main/loss'))

        # Take a snapshot at each epoch
        trainer.extend(
            extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}'))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.LogReport())

        # Print selected entries of the log to stdout
        # Here "main" refers to the target link of the "main" optimizer again, and
        # "validation" refers to the default name of the Evaluator extension.
        # Entries other than 'epoch' are reported by the Classifier link, called by
        # either the updater or the evaluator.
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ]))

        # Print a progress bar to stdout
        trainer.extend(extensions.ProgressBar())

        if args.resume:
            # Resume from a snapshot
            chainer.serializers.load_npz(args.resume, trainer)

        print(train[:1])

        # Run the training
        trainer.run()

        return self.model
Example #9
0
 def test_split_dataset_with_invalid_length_permutation(self):
     original = [1, 2, 3, 4, 5]
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 2, [2, 0, 3, 1])
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 2, [2, 0, 3, 1, 4, 5])
Example #10
0
def get_dataset(dataset_type, matrixForData, **kwargs):

    if dataset_type == 'synthetic':
        train = binary_tree.get_data(matrixForData)
        valid = train.copy()
        test_data = train.copy()
        if kwargs['dataset_randomness'] != -1:
            train = binary_tree.ProbabilisticBinaryTreeDataset(
                train, eps=kwargs['dataset_randomness'])
            valid = binary_tree.ProbabilisticBinaryTreeDataset(
                valid, eps=kwargs['dataset_randomness'])
            test = binary_tree.ProbabilisticBinaryTreeDataset(
                test_data, eps=kwargs['dataset_randomness'])

    elif dataset_type == 'mnist':
        # Load the MNIST dataset
        ndim = kwargs.get('ndim') if 'ndim' in kwargs else 1
        train, test = mnist_activity.get_mnist(withlabel=False,
                                               ndim=ndim,
                                               data=matrixForData,
                                               dtype=matrixForData.dtype)

        #        train, test = datasets.get_mnist(withlabel=False, ndim=ndim)

        # Binarize dataset
        #train[train >= 0.5] = 1.0
        #train[train < 0.5] = 0.0
        #test[test >= 0.5] = 1.0
        #test[test < 0.5] = 0.0

        size_data = len(train[:, 1])
        upper_part = math.floor(0.8 * size_data)

        train, valid = datasets.split_dataset(train, upper_part)

    elif dataset_type == 'cifar100':
        # Load the Cifar-100 dataset
        train, test = datasets.get_cifar100(withlabel=False)
        train = 2 * (train - 0.5)
        test = 2 * (test - 0.5)

        train, valid = datasets.split_dataset(train, 49000)

    elif dataset_type == 'breakout':
        train, test = breakout.load_dataset(withlabel=False)
        # scaling data from [0, 1] to [-1, 1]
        train = 2 * (train - 0.5)
        test = 2 * (test - 0.5)
        train, valid = datasets.split_dataset(train, 80000)

    elif dataset_type == 'wordnet':
        num_negatives = kwargs['num_negatives']
        symmetrize = kwargs['symmetrize']
        assert num_negatives == 1
        train = wordnet.load_dataset(num_negatives, symmetrize)
        valid = None
        test = None

    elif dataset_type == 'mammal':
        num_negatives = kwargs['num_negatives']
        symmetrize = kwargs['symmetrize']
        assert num_negatives == 1
        train = wordnet.load_dataset(num_negatives, symmetrize, mammal=True)
        valid = None
        test = None

    else:
        raise ValueError

    return train, valid, test
Example #11
0
 def test_split_dataset_invalid_position(self):
     original = [1, 2, 3, 4, 5]
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, -1)
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 5)
Example #12
0
def main(commands=None):
    parser = argparse.ArgumentParser(description='Segmentation Predict')
    parser.add_argument('--model', '-m', nargs='+', help='Path to model')
    parser.add_argument('--config', '-c', nargs='*', default=['examples/configs/seg_resnet.yaml'])
    parser.add_argument('--val-set', type=int)
    parser.add_argument('--x-flip', type=int, help='0: no, 1: yes, 2: both (average)', default=0)
    parser.add_argument('--multiscale', action='store_true')

    # Args for ensembling
    parser.add_argument('--ensemble-seg', action='store_true')
    parser.add_argument('--seg-weight', type=float, nargs='*', default=None)
    parser.add_argument('--edge-weight', type=float, nargs='*', default=None)

    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--n-process', '-p', type=int, default=30)
    parser.add_argument('--out', '-o', default='out.csv')

    parser.add_argument('--test', action='store_true')
    parser.add_argument('--limit', '-n', type=int, default=0)
    parser.add_argument('--thresh', '-t', type=float, default=0.1,
                        help='Threshold for edge confidence')

    parser.add_argument('--save-demo-to', metavar='/path/to/out_demo/dir')
    parser.add_argument('--overlay-seg', action='store_true')

    parser.add_argument('--cprofile', action='store_true',
                        help='To profile with cprofile')

    args = parser.parse_args(commands)
    configs = [load_config(yaml.load(open(args.config[i]))) for i in range(len(args.config))]
    master_config = configs[0]

    comm = chainermn.create_communicator(communicator_name='pure_nccl')
    device = comm.intra_rank + args.gpu
    print('Device = {}'.format(device))

    if len(configs) == 1 and len(args.model) >= 2:
        # Duplicate same config
        configs = configs * len(args.model)
    else:
        assert len(configs) == len(args.model), "# of configs and models don't match."

    # Setup models
    models = []
    for i in range(len(args.model)):
        model = setup_model(configs[i], args.x_flip)
        chainer.serializers.load_npz(args.model[i], model)
        models.append(model)

    if len(models) == 1:
        model = models[0]
    else:
        ensembler_cls = MultiScaleModelEnsembler if args.multiscale else ModelEnsembler
        model = ensembler_cls(models, ensemble_seg=args.ensemble_seg,
                              seg_weight=args.seg_weight, edge_weight=args.edge_weight)

    with cuda.get_device_from_id(device):
        model.to_gpu()

    # Setup dataset
    if comm.rank == 0:
        if args.test:
            dataset = RSNASubmissionDataset()
        else:
            if args.val_set is not None:
                master_config['val_set'] = args.val_set
            dataset = RSNATrainDataset()

            if args.val_set is not None:
                master_config['val_set'] = args.val_set

            if master_config['val_set'] == -1:
                val_mask = dataset.patient_df['withinTestRange'].values == 1
                val_indices = val_mask.nonzero()[0]
            else:
                _, val_indices = create_train_val_indices(np.ones(len(dataset), dtype=bool),
                                                          master_config['val_set'])

            dataset = dataset.slice[val_indices, ('dicom_data', 'img', 'bbox')]

        if args.limit and args.limit < len(dataset):
            dataset, _ = split_dataset(dataset, args.limit)
    else:
        dataset = None

    dataset = chainermn.scatter_dataset(dataset, comm)

    if args.cprofile:
        import cProfile
        import pstats
        import io
        pr = cProfile.Profile()
        pr.enable()

    if comm.rank == 0:
        print('Extracting network outputs...')
    outputs = []
    gt_bboxes = []
    for i in range(len(dataset)):
        if comm.rank == 0 and i % 100 == 0:
            print('Processing {}-th sample...'.format(i))
        if args.test:
            dicom_data, image = dataset[i]
            patient_id = dicom_data.PatientID
            gt_bbox = np.empty((0, 4), dtype=np.float32)
        else:
            dicom_data, image, gt_bbox = dataset[i]
            patient_id = dicom_data.PatientID

        if master_config['data_augmentation']['window_width'] > 1.0:
            image = (image - 128) * master_config['data_augmentation']['window_width'] + 128
            image = np.clip(image, 0, 255)

        with cuda.get_device_from_id(device):
            h_seg, h_hor, h_ver = [x[0] for x in model.extract([image])]

        outputs.append((patient_id, image, h_seg, h_hor, h_ver))
        gt_bboxes.append((patient_id, gt_bbox))

    if comm.rank == 0:
        for i in range(1, comm.size):
            other_outputs = comm.recv_obj(i)
            outputs.extend(other_outputs)
            other_gt_bboxes = comm.recv_obj(i)
            gt_bboxes.extend(other_gt_bboxes)
    else:
        comm.send_obj(outputs, 0)
        comm.send_obj(gt_bboxes, 0)
        print('Bye {}.'.format(comm.rank))
        exit(0)

    outputs = sorted(outputs, key=lambda x: x[0])
    gt_bboxes = sorted(gt_bboxes, key=lambda x: x[0])

    print('Done.')
    print('Postprocessing...')
    postprocessor = Postprocessor(master_config['downscale'], args.thresh,
                                  master_config['size_thresh'],
                                  master_config['edge_conf_operation'])
    with multiprocessing.Pool(args.n_process) as p:
        results = p.map(postprocessor.postprocess, outputs)

    results = sorted(results, key=lambda x: x[0])
    print('Done.')

    outputs_ids = [x[0] for x in outputs]
    results_ids = [x[0] for x in results]
    assert outputs_ids == results_ids

    print('Dumping final results...')
    pred_manager = PredictionsManager()
    n_positive = 0
    for result in results:
        patient_id, bbox, label, score = result
        pred_manager.add_prediction(patient_id, bbox, score)
        if len(bbox) > 0:
            n_positive += 1

    print('Complete!')
    print('{} / {} are predicted as positive.'.format(n_positive, len(dataset)))
    with open(args.out, 'w') as f:
        pred_manager.dump(f)

    if args.save_demo_to:
        print('Start saving demos...')
        os.makedirs(args.save_demo_to, exist_ok=True)
        demo_saver = DemoSaver(args.save_demo_to, master_config['downscale'], args.overlay_seg)
        with multiprocessing.Pool(args.n_process) as p:
            p.map(demo_saver.save, list(zip(results, outputs, gt_bboxes)))

    if args.cprofile:
        pr.disable()
        s = io.StringIO()
        sortby = 'time'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()

        print(s.getvalue())
        pr.dump_stats('prof.cprofile'.format(args.out, 0))
Example #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', help="path to train json file")
    parser.add_argument('test_dataset', help="path to test dataset json file")
    parser.add_argument(
        '--dataset-root',
        help=
        "path to dataset root if dataset file is not already in root folder of dataset"
    )
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd512')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, nargs='*', default=[])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help="default learning rate")
    parser.add_argument('--port',
                        type=int,
                        default=1337,
                        help="port for bbox sending")
    parser.add_argument('--ip',
                        default='127.0.0.1',
                        help="destination ip for bbox sending")
    parser.add_argument(
        '--test-image',
        help="path to test image that shall be displayed in bbox vis")
    args = parser.parse_args()

    if args.dataset_root is None:
        args.dataset_root = os.path.dirname(args.dataset)

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=1, pretrained_model='imagenet')
        image_size = (300, 300)
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=1, pretrained_model='imagenet')
        image_size = (512, 512)
    else:
        raise NotImplementedError("The model you want to train does not exist")

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)

    train = TransformDataset(
        SheepDataset(args.dataset_root, args.dataset, image_size=image_size),
        Transform(model.coder, model.insize, model.mean))

    if len(args.gpu) > 1:
        gpu_datasets = split_dataset_n_random(train, len(args.gpu))
        if not len(gpu_datasets[0]) == len(gpu_datasets[-1]):
            adapted_second_split = split_dataset(gpu_datasets[-1],
                                                 len(gpu_datasets[0]))[0]
            gpu_datasets[-1] = adapted_second_split
    else:
        gpu_datasets = [train]

    train_iter = [
        ThreadIterator(gpu_dataset, args.batchsize)
        for gpu_dataset in gpu_datasets
    ]

    test = SheepDataset(args.dataset_root,
                        args.test_dataset,
                        image_size=image_size)
    test_iter = chainer.iterators.MultithreadIterator(test,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False,
                                                      n_threads=2)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.Adam(alpha=args.lr)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    if len(args.gpu) <= 1:
        updater = training.updaters.StandardUpdater(
            train_iter[0],
            optimizer,
            device=args.gpu[0] if len(args.gpu) > 0 else -1,
        )
    else:
        updater = training.updaters.MultiprocessParallelUpdater(
            train_iter, optimizer, devices=args.gpu)
        updater.setup_workers()

    if len(args.gpu) > 0 and args.gpu[0] >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu[0]).use()
        model.to_gpu()

    trainer = training.Trainer(updater, (200, 'epoch'), args.out)

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(1000, 'iteration'))

    # build logger
    # make sure to log all data necessary for prediction
    log_interval = 100, 'iteration'
    data_to_log = {
        'image_size': image_size,
        'model_type': args.model,
    }

    # add all command line arguments
    for argument in filter(lambda x: not x.startswith('_'), dir(args)):
        data_to_log[argument] = getattr(args, argument)

    # create callback that logs all auxiliary data the first time things get logged
    def backup_train_config(stats_cpu):
        if stats_cpu['iteration'] == log_interval:
            stats_cpu.update(data_to_log)

    trainer.extend(
        extensions.LogReport(trigger=log_interval,
                             postprocess=backup_train_config))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(5000, 'iteration'))

    if args.test_image is not None:
        plot_image = train._dataset.load_image(args.test_image,
                                               resize_to=image_size)
    else:
        plot_image, _, _ = train.get_example(0)
        plot_image += train._transform.mean

    bbox_plotter = BBOXPlotter(
        plot_image,
        os.path.join(args.out, 'bboxes'),
        send_bboxes=True,
        upstream_port=args.port,
        upstream_ip=args.ip,
    )
    trainer.extend(bbox_plotter, trigger=(10, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Example #14
0
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(
        description='Multi-Perceptron classifier/regressor')
    parser.add_argument('dataset', help='Path to data file')
    parser.add_argument('--activation',
                        '-a',
                        choices=activ.keys(),
                        default='sigmoid',
                        help='Activation function')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=10,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--dropout_ratio',
                        '-dr',
                        type=float,
                        default=0,
                        help='dropout ratio')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--snapshot',
                        '-s',
                        type=int,
                        default=-1,
                        help='snapshot interval')
    parser.add_argument('--labelcol',
                        '-l',
                        type=int,
                        nargs="*",
                        default=[0, 1, 2, 3],
                        help='column indices of target variables')
    parser.add_argument('--initmodel',
                        '-i',
                        help='Initialize the model from given file')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--outdir',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument(
        '--optimizer',
        '-op',
        default='MomentumSGD',
        help='optimizer {MomentumSGD,AdaDelta,AdaGrad,Adam,RMSprop}')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--skip_rows',
                        '-sr',
                        type=int,
                        default=0,
                        help='num of rows skipped in the data')
    parser.add_argument(
        '--skip_column',
        '-sc',
        type=int,
        nargs="*",
        default=[],
        help='set of indices of columns to be skipped in the data')
    parser.add_argument('--unit',
                        '-nu',
                        type=int,
                        nargs="*",
                        default=[128, 64, 32, 4],
                        help='Number of units in the hidden layers')
    parser.add_argument(
        '--test_every',
        '-t',
        type=int,
        default=5,
        help='use one in every ? entries in the dataset for validation')
    parser.add_argument('--regression',
                        action='store_true',
                        help="set for regression, otherwise classification")
    parser.add_argument('--batchnorm',
                        '-bn',
                        action='store_true',
                        help="perform batchnormalization")
    parser.add_argument('--predict', action='store_true')
    parser.add_argument('--weight_decay',
                        '-w',
                        type=float,
                        default=1e-5,
                        help='weight decay for regularization')
    args = parser.parse_args()

    ##
    if not args.gpu:
        if chainer.cuda.available:
            args.gpu = 0
        else:
            args.gpu = -1

    print('GPU: {} Minibatch-size: {} # epoch: {}'.format(
        args.gpu, args.batchsize, args.epoch))

    # Set up a neural network to train
    model = MLP(args)
    if args.initmodel:
        print('Load model from: ', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Set up an optimizer
    if args.optimizer == 'MomentumSGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9)
    elif args.optimizer == 'AdaDelta':
        optimizer = chainer.optimizers.AdaDelta(rho=0.95, eps=1e-06)
    elif args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(lr=0.01, eps=1e-08)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=0.01,
                                            beta1=0.9,
                                            beta2=0.999,
                                            eps=1e-08)
    elif args.optimizer == 'RMSprop':
        optimizer = chainer.optimizers.RMSprop(lr=0.01, alpha=0.99, eps=1e-08)
    else:
        print("Wrong optimiser")
        exit(-1)
    optimizer.setup(model)
    if args.weight_decay > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    print(
        'units: {}, optimiser: {}, Weight decay: {}, dropout ratio: {}'.format(
            args.unit, args.optimizer, args.weight_decay, args.dropout_ratio))

    # select numpy or cupy
    xp = chainer.cuda.cupy if args.gpu >= 0 else np
    label_type = np.float32 if args.regression else np.int32

    # read csv file
    csvdata = np.loadtxt(args.dataset, delimiter=",", skiprows=args.skip_rows)
    ind = np.ones(csvdata.shape[1], dtype=bool)  # indices for unused columns
    ind[args.labelcol] = False
    for i in args.skip_column:
        ind[i] = False
    x = np.array(csvdata[:, ind], dtype=np.float32)
    t = csvdata[:, args.labelcol]
    t = np.array(t, dtype=label_type)
    if not args.regression:
        t = t[:, 0]
    print('target column: {}, excluded columns: {}'.format(
        args.labelcol,
        np.where(ind == False)[0].tolist()))
    print("variable shape: {}, label shape: {}, label type: {}".format(
        x.shape, t.shape, label_type))

    ## train-validation data
    # random spliting
    #train, test = datasets.split_dataset_random(datasets.TupleDataset(x, t), int(0.8*t.size))
    # splitting by modulus of index
    train_idx = [i for i in range(len(t)) if (i + 1) % args.test_every != 0]
    var_idx = [i for i in range(len(t)) if (i + 1) % args.test_every == 0]
    n = len(train_idx)
    train_idx.extend(var_idx)
    train, test = datasets.split_dataset(datasets.TupleDataset(x, t), n,
                                         train_idx)

    # dataset iterator
    train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True)
    test_iter = iterators.SerialIterator(test,
                                         args.batchsize,
                                         repeat=False,
                                         shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot)
    log_interval = 1, 'epoch'
    val_interval = frequency / 10, 'epoch'

    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=(frequency, 'epoch'))
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(frequency / 5, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.dump_graph('main/loss'))

    if args.optimizer in ['MomentumSGD', 'AdaGrad', 'RMSprop']:
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.ExponentialShift('lr', 0.5),
                       trigger=(args.epoch / 5, 'epoch'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch',
                file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
        'validation/main/accuracy', 'elapsed_time', 'lr'
    ]),
                   trigger=log_interval)

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # ChainerUI
    trainer.extend(CommandsExtension())
    save_args(args, args.outdir)
    trainer.extend(extensions.LogReport(trigger=log_interval))

    if not args.predict:
        trainer.run()
    else:
        test = datasets.TupleDataset(x, t)

    ## prediction
    print("predicting: {} entries...".format(len(test)))
    x, t = chainer.dataset.concat_examples(test, args.gpu)

    with chainer.using_config('train', False):
        y = model(x, t)
    if args.gpu >= 0:
        pred = chainer.cuda.to_cpu(y.data)
        t = chainer.cuda.to_cpu(t)
    else:
        pred = y.data
    if args.regression:
        left = np.arange(t.shape[0])
        for i in range(len(args.labelcol)):
            rmse = F.mean_squared_error(pred[:, i], t[:, i])
            plt.plot(left, t[:, i], color="royalblue")
            plt.plot(left, pred[:, i], color="crimson", linestyle="dashed")
            plt.title("RMSE: {}".format(np.sqrt(rmse.data)))
            plt.savefig(args.outdir + '/result{}.png'.format(i))
            plt.close()
        result = np.hstack((t, pred))
        np.savetxt(args.outdir + "/result.csv",
                   result,
                   fmt='%1.5f',
                   delimiter=",",
                   header="truth,prediction")
    else:
        p = np.argmax(pred, axis=1)
        result = np.vstack((t, p)).astype(np.int32).transpose()
        print(result.tolist())
        np.savetxt(args.outdir + "/result.csv",
                   result,
                   delimiter=",",
                   header="truth,prediction")
Example #15
0
 def test_split_dataset_invalid_type(self):
     original = [1, 2, 3, 4, 5]
     with self.assertRaises(TypeError):
         datasets.split_dataset(original, 3.5)
Example #16
0
path = 'path.txt'  # 500fps
picture = ImageDataset(path)
picture = TransformDataset(picture, transform)

path = '20191101/2019110110201.csv'  # 1000fps
force = np.loadtxt(path, delimiter=',', skiprows=7)
force_z = force[:len(picture) * 2:2, 3].astype(np.float32)

x = picture
t = np.reshape(force_z, (6400, 1))
dataset = TupleDataset(x, t)

n_train = int(len(dataset) * 0.8)
n_valid = int(len(dataset) * 0.1)
train, valid_test = split_dataset(dataset, n_train)
valid, test = split_dataset(valid_test, n_valid)
# train, valid_test = split_dataset_random(dataset, n_train, seed=0)
# valid, test = split_dataset_random(valid_test, n_valid, seed=0)

print('Training dataset size:', len(train))
print('Validation dataset size:', len(valid))
print('Test dataset size:', len(test))

train_mode = False

if train_mode == True:
    batchsize = 16
    train_iter = iterators.SerialIterator(train, batchsize)
    valid_iter = iterators.SerialIterator(valid,
                                          batchsize,
Example #17
0
def main():
    args = create_args('train')
    result_dir = create_result_dir(args.model_name)

    # Prepare devices
    devices = get_gpu_dict(args.gpus)

    # Instantiate a model
    model = RegNet(epsilon=args.epsilon)

    # Instantiate a optimizer
    optimizer = get_optimizer(model, **vars(args))

    # Setting up datasets
    prep = TransformDataset(KITTI(args.kitti_path, 'train'),
                            CalibPrepare(args.init_pose))
    train, valid = split_dataset(
        prep, round(len(prep) * (1 - args.valid_proportion)))
    print("========== Model Parameters ==========")
    print("location loss weight (epsilon):", args.epsilon)
    print('train samples: {}, valid samples: {}'.format(
        len(train), len(valid)))

    # Iterator
    if DEBUG: Iterator = SerialIterator
    else: Iterator = MultiprocessIterator
    train_iter = Iterator(train, args.batchsize)
    valid_iter = Iterator(valid,
                          args.valid_batchsize,
                          repeat=False,
                          shuffle=False)

    # Updater
    if DEBUG:
        Updater = StandardUpdater(train_iter,
                                  optimizer,
                                  device=devices['main'])
    else:
        Updater = ParallelUpdater(train_iter, optimizer, devices=devices)
    trainer = Trainer(Updater, (args.epoch, 'epoch'), out=result_dir)

    # Extentions
    trainer.extend(extensions.Evaluator(valid_iter,
                                        model,
                                        device=devices['main']),
                   trigger=(args.valid_freq, 'epoch'))
    trainer.extend(extensions.snapshot(),
                   trigger=(args.snapshot_iter, 'iteration'))
    trainer.extend(extensions.LogReport(),
                   trigger=(args.show_log_iter, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=20))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'elapsed_time'
        ]))

    # Resume from snapshot
    if args.resume_from:
        chainer.serializers.load_npz(args.resume_from, trainer)

    # Train and save
    print("========== Training ==========")
    hook = CupyMemoryProfileHook()
    with hook:
        trainer.run()

    print("========== Saving ==========")
    chainer.serializers.save_hdf5(create_result_file(args.model_name), model)
    print("Done.")
    print("========== Memory Profiling ==========")
    hook.print_report()
Example #18
0
def split_dataset(dataset, train_frac=.8, test_frac=.5):
    train_end = int(len(dataset) * train_frac)
    train, rest = D.split_dataset(dataset, train_end)
    test_end = int(len(rest) * test_frac)
    test, val = D.split_dataset(rest, test_end)
    return train, test, val
def main():

    # 出力フォルダ配下のファイル全削除
    remove_dir_and_file(u'result')

    # 画像ファイル名と教師データの一覧ファイルのパス格納
    image_files = os.path.join(u'dataset', '03_duplicate_pict_anser.csv')

    # datasets.LabeledImageDatasetでいい感じにデータセットとして読み込んでくれます。
    dataset = datasets.LabeledImageDataset(image_files)
    #print (u'dataset')
    #print dataset[0]
    #print (u'---')

    # データ部を0〜1の値にする必要があるため255で割ります
    dataset = chainer.datasets.TransformDataset(dataset, transform)

    # 8割を学習データに、2割をテストデータにします。
    split_at = int(len(dataset) * 0.8)
    train, test = datasets.split_dataset(dataset, split_at)

    # バッチ実行か、シャッフルしてデータ使うかなどの指定
    train_iter = iterators.SerialIterator(train, batchsize, shuffle=True)
    test_iter = iterators.SerialIterator(test,
                                         batchsize,
                                         repeat=False,
                                         shuffle=True)

    # モデルの定義。GPU使うかもここで指定。
    # model = MLP()
    # model.to_gpu(gpu_id)

    # モデルをClassifierで包んで、ロスの計算などをモデルに含める
    model = MLP()
    model = L.Classifier(model)
    model.to_gpu(gpu_id)

    # 最適化手法の選択
    optimizer = optimizers.SGD()
    optimizer.setup(model)

    # UpdaterにIteratorとOptimizerを渡す
    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)

    # TrainerにUpdaterを渡す
    trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='result')

    # ログの出力方法などの定義
    trainer.extend(extensions.LogReport())
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
    trainer.extend(
        extensions.snapshot_object(model.predictor,
                                   filename='model_epoch-{.updater.epoch}'))
    trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
            'validation/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'],
                              x_key='epoch',
                              file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))

    # トレーニングスタート!!
    trainer.run()

    # 学習結果の保存
    model.to_cpu()  # CPUで計算できるようにしておく
    serializers.save_npz(os.path.join(u'result', u'sakamotsu.model'), model)
dataset = chainer.datasets.TransformDataset(dataset, transform)

# 変換したデータセットの中身をサンプルで確認します。

# In[11]:

dataset[0]

# ### トレーニングデータとテストデータと分割
# データセットをトレーニングデータとテストデータに分割し、過学習にならないようにチェックできるようにします。

# In[12]:

# 8割を学習データに、2割をテストデータにします。
split_at = int(len(dataset) * 0.8)
train, test = datasets.split_dataset(dataset, split_at)

# In[13]:

len(train)

# In[14]:

len(test)

# ### イテレータにデータセットを渡す
# データセットから決まった数のデータを取り出し、それらを束ねてミニバッチを作成して返してくれるIteratorを作成しましょう。
#
# - Chainerがいくつか用意しているIteratorの一種である`SerialIterator`は、データセットの中のデータを順番に取り出してくる最もシンプルなIteratorです。
# - 引数にデータセットオブジェクトと、バッチサイズを取ります。
# - また、このとき渡したデータセットから、何周も何周もデータを繰り返し読み出す必要がある場合は`repeat`引数を`True`とし、1周が終わったらそれ以上データを取り出したくない場合はこれを`False`とします。デフォルトでは、`True`になっています。
Example #21
0
def main3():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu_id', '-g', type=int, default=1)
    parser.add_argument('--batch_size', '-b', type=int, default=100)
    parser.add_argument('--test_split', type=float, default=0.2)
    parser.add_argument(
        '--real_test',
        dest='real_test',
        action='store_true',
        help='Whether to split the data or use a complete new trial.')
    parser.add_argument('--mdn_hidden-units', '-u', type=int, default=24)
    parser.add_argument('--mdn_gaussian-mixtures', '-m', type=int, default=24)
    parser.add_argument('--max_epoch', '-e', type=int, default=250)
    parser.add_argument('--resume', '-r', type=int, default=None)
    parser.add_argument('--out_dir',
                        '-o',
                        type=str,
                        default='results/result_test')
    parser.add_argument('--data_base_dir',
                        type=str,
                        default='/media/daniel/data/hhc/')
    parser.add_argument('--data_file_pattern',
                        '-f',
                        type=str,
                        default='trial{}.avi')
    args = parser.parse_args()

    # frames, labels = load_frames_labels(filestype='/media/daniel/data/hhc/trial{}_r_forearm.avi')
    frames, labels = load_frames_labels(filestype=''.join(
        (args.data_base_dir, args.data_file_pattern)),
                                        verbose=0)

    frames, labels = unison_shuffled_copies(frames, labels)
    print('Frames shape: ', frames.shape, ' Labels shape: ', labels.shape)

    data = chainer.datasets.TupleDataset(frames, labels)  #.to_device(gpu_id)
    print('Dataset length: ', data._length)

    print('Frame size: ', data[0][0].shape, data[0][0].dtype)

    if args.real_test:
        print('Using test trial.')
        train_iter = iterators.SerialIterator(data,
                                              args.batch_size,
                                              shuffle=True)

        # Load the test data
        test_frames, test_labels = load_frames_labels(
            ids=[11],
            filestype=''.join((args.data_base_dir, args.data_file_pattern)))
        test_data = chainer.datasets.TupleDataset(test_frames, test_labels)
        test_iter = iterators.SerialIterator(test_data,
                                             args.batch_size,
                                             repeat=False,
                                             shuffle=False)
    else:
        data_test, data_train = split_dataset(data,
                                              int(args.test_split * len(data)))
        train_iter = iterators.SerialIterator(data_train,
                                              args.batch_size,
                                              shuffle=True)
        test_iter = iterators.SerialIterator(data_test,
                                             args.batch_size,
                                             repeat=False,
                                             shuffle=False)

    model = GoalScoreModel()

    if args.gpu_id >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu_id).use()
        model.to_gpu(args.gpu_id)
        # labels = chainer.dataset.to_device(args.gpu_id, labels)
        # frames = chainer.dataset.to_device(args.gpu_id, frames)

    # Create the optimizer for the model
    optimizer = optimizers.Adam().setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=1e-6))
    # optimizer.add_hook(chainer.optimizer_hooks.GradientHardClipping(-.1, .1))

    # xp = chainer.backend.get_array_module(data_train)
    # optimizer.update(model.calc_loss, xp.asarray([data_train[0][0]]), xp.asarray([data_train[0][1]]))
    # import chainer.computational_graph as c
    # g = c.build_computational_graph(model.calc_loss)
    # with open('results/graph.dot', 'w') as o:
    #     o.write(g.dump())

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       loss_func=model.calc_loss,
                                       device=args.gpu_id)

    # updater = training.ParallelUpdater(train_iter, optimizer,
    #                                 loss_func=model.calc_loss,
    #                                 devices={'main': args.gpu_id, 'second': 1})

    # Pre-training
    print('Pretraining started.')
    trainer = training.Trainer(updater, (3, 'epoch'), out=args.out_dir)
    # Disable update for the head model
    print('Disabling training of head model.')
    model.head_model.disable_update()
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.FailOnNonNumber())
    trainer.run()

    # Full training
    print('Full model training ...')
    trainer = training.Trainer(updater, (args.max_epoch, 'epoch'),
                               out=args.out_dir)
    trainer.extend(extensions.Evaluator(test_iter,
                                        model,
                                        eval_func=model.calc_loss,
                                        device=args.gpu_id),
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/nll', 'main/mae', 'main/sigma',
            'validation/main/loss', 'validation/main/mae',
            'validation/main/sigma', 'elapsed_time'
        ]))  #, 'main/loss', 'validation/main/loss', 'elapsed_time'], ))
    trainer.extend(
        extensions.PlotReport(['main/mae', 'validation/main/mae'],
                              x_key='epoch',
                              file_name='loss.png',
                              marker=None))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.FailOnNonNumber())
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'),
        trigger=(20, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_epoch_{.updater.epoch}.model'),
                   trigger=(20, 'epoch'))

    # Disable/Enable update for the head model
    model.head_model.enable_update()

    # Resume from a specified snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
    print('Done.')
Example #22
0
def main():
    # command line argument parsing
    parser = argparse.ArgumentParser(description='Multi-Perceptron classifier/regressor')
    parser.add_argument('dataset', help='Path to data file')
    parser.add_argument('--activation', '-a', choices=activ.keys(), default='sigmoid',
                        help='Activation function')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of samples in each mini-batch')
    parser.add_argument('--dropout_ratio', '-dr', type=float, default=0,
                        help='dropout ratio')
    parser.add_argument('--epoch', '-e', type=int, default=100,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--snapshot', '-s', type=int, default=-1,
                        help='snapshot interval')
    parser.add_argument('--label_index', '-l', type=int, default=5,
                        help='Column number of the target variable (5=Melting)')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--outdir', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--out_ch', '-oc', type=int, default=1,
                        help='num of output channels. set to 1 for regression')
    parser.add_argument('--optimizer', '-op', default='AdaDelta',
                        help='optimizer {MomentumSGD,AdaDelta,AdaGrad,Adam}')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--skip_columns', '-sc', type=int, default=29,
                        help='num of columns which are not used as explanatory variables')
    parser.add_argument('--layers', '-nl', type=int, default=3,
                        help='Number of layers')
    parser.add_argument('--unit', '-nu', type=int, default=100,
                        help='Number of units in the hidden layers')
    parser.add_argument('--test_every', '-t', type=int, default=5,
                        help='use one in every ? entries in the dataset for validation')
    parser.add_argument('--predict', action='store_true')
    parser.add_argument('--weight_decay', '-w', type=float, default=0,
                        help='weight decay for regularization')
    args = parser.parse_args()
    args.regress = (args.out_ch == 1)

    # select numpy or cupy
    xp = chainer.cuda.cupy if args.gpu >= 0 else np
    label_type = np.int32 if not args.regress else np.float32

    # read csv file
    dat = pd.read_csv(args.dataset, header=0)

    ##
    print('Target: {}, GPU: {} Minibatch-size: {} # epoch: {}'.format(dat.keys()[args.label_index],args.gpu,args.batchsize,args.epoch))

#    csvdata = np.loadtxt(args.dataset, delimiter=",", skiprows=args.skip_rows)
    ind = np.ones(dat.shape[1], dtype=bool)  # indices for unused columns
    dat = dat.dropna(axis='columns')
    x = dat.iloc[:,args.skip_columns:].values
    args.in_ch = x.shape[1]
    t = (dat.iloc[:,args.label_index].values)[:,np.newaxis]
    print('target column:', args.label_index)
#    print('excluded columns: {}'.format(np.where(ind==False)[0].tolist()))
    print("data shape: ",x.shape, t.shape)
    x = np.array(x, dtype=np.float32)
    if args.regress:
        t = np.array(t, dtype=label_type)
    else:
        t = np.array(np.ndarray.flatten(t), dtype=label_type)

    # standardize
    t_mean = np.mean(t)
    t_std = np.std(t)
    x_mean = np.mean(x)
    x_std = np.std(x)
    x = (x-x_mean)/x_std
    t = (t-t_mean)/t_std

    # Set up a neural network to train
    model = MLP(args,std=t_std)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimiser
    if args.optimizer == 'MomentumSGD':
        optimizer = chainer.optimizers.MomentumSGD(lr=0.003, momentum=0.9)
    elif args.optimizer == 'AdaDelta':
        optimizer = chainer.optimizers.AdaDelta(rho=0.95, eps=1e-06)
    elif args.optimizer == 'AdaGrad':
        optimizer = chainer.optimizers.AdaGrad(lr=0.001, eps=1e-08)
    elif args.optimizer == 'Adam':
        optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08)
    else:
        print("Wrong optimiser")
        exit(-1)
    optimizer.setup(model)
    if args.weight_decay>0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    print('layers: {}, units: {}, optimiser: {}, Weight decay: {}, dropout ratio: {}'.format(args.layers,args.unit,args.optimizer,args.weight_decay,args.dropout_ratio))


## train-validation data
# random spliting
    #train, test = datasets.split_dataset_random(datasets.TupleDataset(x, t), int(0.8*t.size))
# splitting by modulus of index
    train_idx = [i for i in range(t.size) if (i+1) % args.test_every != 0]
    var_idx = [i for i in range(t.size) if (i+1) % args.test_every == 0]
    n = len(train_idx)
    train_idx.extend(var_idx)
    train, test = datasets.split_dataset(datasets.TupleDataset(x, t), n, train_idx)

# dataset iterator
    train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True)
    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir)

    frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot)
    log_interval = 1, 'epoch'
    val_interval = frequency/10, 'epoch'

    trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))

    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['main/accuracy', 'validation/main/accuracy'],
                'epoch', file_name='accuracy.png'))

    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss', 'main/MAE', 'validation/main/MAE',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=log_interval)

    trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if not args.predict:
        trainer.run()
    else:
        test = datasets.TupleDataset(x, t)

    ## prediction
    print("predicting: {} entries...".format(len(test)))
    test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    converter=concat_examples
    idx=0
    with open(os.path.join(args.outdir,'result.txt'),'w') as output:
        for batch in test_iter:
            x, t = converter(batch, device=args.gpu)
            with chainer.using_config('train', False):
                with chainer.function.no_backprop_mode():
                    if args.regress:
                        y = model(x).data
                        if args.gpu>-1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
                        y = y * t_std + t_mean
                        t = t * t_std + t_mean
                    else:
                        y = F.softmax(model(x)).data
                        if args.gpu>-1:
                            y = xp.asnumpy(y)
                            t = xp.asnumpy(t)
            for i in range(y.shape[0]):
                output.write(str(dat.iloc[var_idx[i],0]))
                if(len(t.shape)>1):
                    for j in range(t.shape[1]):
                        output.write(",{}".format(t[i,j]))
                        output.write(",{}".format(y[i,j]))
                else:
                    output.write(",{0:1.5f},{0:1.5f}".format(t[i],y[i]))
#                    output.write(",{0:1.5f}".format(np.argmax(y[i,:])))
#                    for yy in y[i]:
#                        output.write(",{0:1.5f}".format(yy))
                output.write("\n")
                idx += 1
Example #23
0
 def test_split_dataset_with_invalid_length_permutation(self):
     original = [1, 2, 3, 4, 5]
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 2, [2, 0, 3, 1])
     with self.assertRaises(ValueError):
         datasets.split_dataset(original, 2, [2, 0, 3, 1, 4, 5])
Example #24
0
def main3():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu_id', '-g', type=int, default=0)
    parser.add_argument('--batch_size', '-b', type=int, default=60)
    parser.add_argument('--test_split', type=float, default=0.2)
    parser.add_argument(
        '--real_test',
        dest='real_test',
        action='store_true',
        help='Whether to split the data or use a complete new trial.')
    parser.add_argument('--max_epoch', '-e', type=int, default=110)
    parser.add_argument('--resume', '-r', type=int, default=None)
    parser.add_argument(
        '--out_dir',
        '-o',
        type=str,
        default=
        '/mnt/7ac4c5b9-8c05-451f-9e6d-897daecb7442/gears/results_gsm/result_right_arm2'
    )
    args = parser.parse_args()

    model = GoalScoreModel()

    frames, labels = load_all_data(prep_f=model.prepare)

    frames, labels = igp.unison_shuffled_copies(frames, labels)
    print('Frames shape: ', frames.shape, ' Labels shape: ', labels.shape)

    data = chainer.datasets.TupleDataset(frames, labels)  #.to_device(gpu_id)
    print('Dataset length: ', data._length)

    print('Frame size: ', data[0][0].shape, data[0][0].dtype)

    if args.real_test:
        print('Using test trial.')
        train_iter = iterators.SerialIterator(data,
                                              args.batch_size,
                                              shuffle=True)

        # Load the test data
        test_frames, test_labels = load_frames_labels(
            ids=[11],
            filestype=''.join((args.data_base_dir, args.data_file_pattern)),
            blackout=args.blackout)
        data_test = chainer.datasets.TupleDataset(test_frames, test_labels)
        test_iter = iterators.SerialIterator(data_test,
                                             args.batch_size,
                                             repeat=False,
                                             shuffle=False)
    else:
        data_test, data_train = split_dataset(data,
                                              int(args.test_split * len(data)))
        train_iter = iterators.SerialIterator(data_train,
                                              args.batch_size,
                                              shuffle=True)
        test_iter = iterators.SerialIterator(data_test,
                                             args.batch_size,
                                             repeat=False,
                                             shuffle=False)

    if args.gpu_id >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu_id).use()
        model.to_gpu(args.gpu_id)

    # Create the optimizer for the model
    optimizer = optimizers.Adam().setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=1e-6))

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       loss_func=model.calc_loss,
                                       device=args.gpu_id)

    # Full training
    print('Full model training ...')
    trainer = training.Trainer(updater, (args.max_epoch, 'epoch'),
                               out=args.out_dir)
    trainer.extend(extensions.Evaluator(test_iter,
                                        model,
                                        eval_func=model.calc_loss,
                                        device=args.gpu_id),
                   name='val',
                   trigger=(1, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'main/mae', 'main/gnll',
            'main/weighted', 'main/VAE', 'main/VAE_REC', 'main/VAE_KL',
            'val/main/loss', 'val/main/mae', 'val/main/weighted',
            'elapsed_time'
        ])
    )  #, 'val/main/VAE', 'main/loss', 'validation/main/loss', 'elapsed_time'], ))
    trainer.extend(
        extensions.PlotReport(
            ['main/mae', 'val/main/mae', 'main/VAE', 'val/main/VAE'],
            x_key='epoch',
            file_name='loss.png',
            marker=None))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.FailOnNonNumber())
    # Save every X epochs
    trainer.extend(extensions.snapshot(
        filename='snapshot_epoch_{.updater.epoch}.trainer'),
                   trigger=(200, 'epoch'))
    trainer.extend(extensions.snapshot_object(
        model,
        '%s_model_epoch_{.updater.epoch}.model' % (model.__class__.__name__)),
                   trigger=(10, 'epoch'))

    trainer.extend(utils.display_image(model.vae_image,
                                       data_test,
                                       args.out_dir,
                                       args.gpu_id,
                                       n=3),
                   trigger=(1, 'epoch'))

    trainer.extend(extensions.ExponentialShift('alpha',
                                               0.5,
                                               init=1e-3,
                                               target=1e-8),
                   trigger=(100, 'epoch'))

    # Resume from a specified snapshot
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
    print('Done.')
Example #25
0
    def on_status(self, status):
        status.created_at += datetime.timedelta(hours=9)

        # リプライが来た場合
        if str(status.in_reply_to_screen_name) == bot_user_name:

            # テキストメッセージ
            tweet_text = "@" + str(status.user.screen_name) + " "

            # タイムラインを取得
            time_line = api.mentions_timeline()

            # タイムラインの先頭のメッセージ内容
            print("リプライが届きました...\n[@" + status.user.screen_name + "]\n" +
                  time_line[0].text + "\n")

            # ファイル名の先頭
            date_name = re.split(' ', str(datetime.datetime.today()))[0] + '_'

            # 1.リプライ画像の保存 -> 2.顔を切り取りcat.jpgで保存 -> 3.chainerに通して判定

            # 1.リプライ画像の保存
            try:
                j = 0
                reply_images = []
                for img in time_line[0].extended_entities['media']:
                    # print(img['media_url'])
                    reply_image = urllib.request.urlopen(img['media_url'])
                    # ファイル名を確定後、リストに格納
                    image_name = date_name + str(
                        time_line[0].id) + '-' + str(j) + '.jpg'
                    reply_images.append(image_name)
                    # 画像を読み込んで保存
                    image_file = open(image_name, 'wb')
                    image_file.write(reply_image.read())
                    image_file.close()
                    reply_image.close()
                    print('画像 ' + image_name + ' を保存しました')
                    j = j + 1
            except:
                # 例外処理
                if j == 0:
                    tweet_text += "Error:画像がありませんฅ(´・ω・`)ฅにゃーん"
                else:
                    tweet_text += "Error:画像の保存に失敗しましたฅ(´・ω・`)ฅにゃーん"
                api.update_status(status=tweet_text,
                                  in_reply_to_status_id=status.id)
                print(tweet_text)
                return True

            # 2.顔を切り取りcat.jpgで保存
            try:
                image = cv2.imread(reply_images[0])
                image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                cascade = cv2.CascadeClassifier("cat_cascade.xml")
                face_images = cascade.detectMultiScale(image_gray,
                                                       scaleFactor=1.1,
                                                       minNeighbors=1,
                                                       minSize=(1, 1))
                face_image_len = 0
                if len(face_images) > 0:
                    for (x, y, w, h) in face_images:
                        face_image = image[y:y + h, x:x + w]
                        if face_image_len < w:
                            face_image_len = w
                            cv2.imwrite("cat_face.jpg", face_image)
                            face_image = cv2.resize(face_image, (64, 64))
                            cv2.imwrite("cat_face_min.jpg", face_image)
                else:
                    tweet_text += "Error:猫の顔が検出できませんでした...ฅ(´・ω・`)ฅにゃーん"
                    api.update_status(status=tweet_text,
                                      in_reply_to_status_id=status.id)
                    print(tweet_text)
                    return True
            except:
                tweet_text += "Error:猫の顔の検出に失敗しました...ฅ(´・ω・`)ฅにゃーん"
                api.update_status(status=tweet_text,
                                  in_reply_to_status_id=status.id)
                print(tweet_text)
                return True

            # 3.chainerに通して判定
            try:
                data = [('cat_face_min.jpg', 3), ('cat_face_min.jpg', 3)]
                d = datasets.LabeledImageDataset(data)

                def transform(data):
                    img, lable = data
                    img = img / 255.
                    return img, lable

                d = datasets.TransformDataset(d, transform)

                train, test = datasets.split_dataset(d, 1)
                x, t = test[0]
                x = x[None, ...]
                y = self.model(x)
                y = y.data

                cats = [
                    "スフィンクス", "アビシニアン", "ベンガル", "バーマン", "ボンベイ",
                    "ブリティッシュショートヘア", "エジプシャンマウ", "メインクーン", "ペルシャ", "ラグドール",
                    "ロシアンブルー", "シャム"
                ]
                cats_images = [
                    "Sphynx.jpg", "Abyssinian.jpg", "Bengal.jpg", "Birman.jpg",
                    "Bombay.jpg", "British_Shorthair.jpg", "Egyptian_Mau.jpg",
                    "Maine_Coon.jpg", "Persian.jpg", "Ragdoll.jpg",
                    "Russian_Blue.jpg", "Siamese.jpg"
                ]

                tweet_text += "この猫は... " + cats[y.argmax(
                    axis=1)[0]] + " ですฅ(´・ω・`)ฅにゃーん"

                media_images = [
                    "cat_face.jpg",
                    "./cat_images/" + cats_images[y.argmax(axis=1)[0]]
                ]
                media_ids = [
                    api.media_upload(i).media_id_string for i in media_images
                ]
                api.update_status(status=tweet_text,
                                  in_reply_to_status_id=status.id,
                                  media_ids=media_ids)
                print(tweet_text)
                return True

            except:
                tweet_text += "Error:猫の顔の判定に失敗しました...ฅ(´・ω・`)ฅにゃーん"
                api.update_status(status=tweet_text,
                                  in_reply_to_status_id=status.id)
                print(tweet_text)
                return True

        return True
Example #26
0
 def test_split_dataset_tail(self):
     original = [1, 2, 3, 4, 5]
     subset1, subset2 = datasets.split_dataset(original, 5)
     self.assertEqual(len(subset1), 5)
     self.assertEqual(len(subset2), 0)