def test_permuted_split_dataset(self): original = [1, 2, 3, 4, 5] subset1, subset2 = datasets.split_dataset(original, 2, [2, 0, 3, 1, 4]) self.assertEqual(len(subset1), 2) self.assertEqual(subset1[0], 3) self.assertEqual(subset1[1], 1) self.assertEqual(len(subset2), 3) self.assertEqual(subset2[0], 4) self.assertEqual(subset2[1], 2) self.assertEqual(subset2[2], 5)
def make_dataset(): train, test = get_tox21() train, test, atom2id = getAtom2id(train, test) train, val = D.split_dataset(train, int(0.9 * len(train))) print("size of train set:", len(train)) print("size of val set:", len(val)) print('size of test set:', len(test)) return train, val, test, atom2id
def __filter_class(dataset, extract_class): target_data = [] target_label = [] for data, label in dataset: if label in extract_class: target_data.append(data) target_label.append(extract_class.index(label)) target_data = np.array(target_data) target_label = np.array(target_label, dtype=np.int32) dataset = tuple_dataset.TupleDataset(target_data, target_label) train, val = split_dataset(dataset, int(len(dataset) * 0.9)) return train, val
def get_clf_data(use_memory=True, img_size=224, img_type='warp', split_val=0.9): def __get_train_list(): train_list_path = 'data/clf/train_master.tsv' dataframe = pd.read_csv(train_list_path, sep='\t', usecols=['file_name', 'category_id']) train_data_list = pd.DataFrame(dataframe).to_records(index=False) return train_data_list def __get_test_list(): test_list_path = 'data/clf/test.tsv' test_data_list = pd.read_csv(test_list_path, sep='\t', usecols=['file_name']) test_data_list = pd.DataFrame(test_data_list).to_records(index=False) test_data_list = [data[0] for data in test_data_list] return test_data_list img_average = None # if use_average_image: # img_average = 'data/clf/ave_%s_%s.png' % (img_size, img_type) # train, val logging.info('Loading train, val dataset...') labeled = WarpedLabeledImageDataset(__get_train_list(), root='data/clf/train_images_labeled', use_memory=use_memory, img_size=img_size, img_type=img_type, img_average=img_average) logging.info('Done.') # test logging.info('Loading test dataset...') test = WarpedImageDataset(__get_test_list(), root='data/clf/test_images', use_memory=use_memory, img_size=img_size, img_type=img_type, img_average=img_average) logging.info('Done.') if split_val is not False: train, val = split_dataset(labeled, int(len(labeled) * split_val)) return train, val, test else: train = labeled return train, test
def get_subdatasets(self): order = [] if self.split_inter: if self.subsampling: for i in range(0, len(self)): frame, subject = divmod(i, self.subject_number) if (frame % sum(self.split_ratio)) < self.split_ratio[0]: order.append(i) else: order = list( range(self.subject_number * self.split_ratio[0] // sum(self.split_ratio) * self.frame_number)) else: order = list( range(self.frame_number * self.split_ratio[0] // sum(self.split_ratio) * self.subject_number)) split_at = len(order) assert (split_at != 0) & (split_at != len(self)) order.extend(set(range(len(self))) - set(order)) return split_dataset(self, split_at, order)
def test_split_dataset_invalid_position(self): original = [1, 2, 3, 4, 5] with self.assertRaises(ValueError): datasets.split_dataset(original, -1) with self.assertRaises(ValueError): datasets.split_dataset(original, 5)
def train_model(self, datasets): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() self.model.to_gpu() optimizer = chainer.optimizers.Adam(args.learnrate) optimizer.setup(self.model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train, test = split_dataset(datasets, 80) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=mean_squared_error) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, self.model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) print(train[:1]) # Run the training trainer.run() return self.model
def test_split_dataset_with_invalid_length_permutation(self): original = [1, 2, 3, 4, 5] with self.assertRaises(ValueError): datasets.split_dataset(original, 2, [2, 0, 3, 1]) with self.assertRaises(ValueError): datasets.split_dataset(original, 2, [2, 0, 3, 1, 4, 5])
def get_dataset(dataset_type, matrixForData, **kwargs): if dataset_type == 'synthetic': train = binary_tree.get_data(matrixForData) valid = train.copy() test_data = train.copy() if kwargs['dataset_randomness'] != -1: train = binary_tree.ProbabilisticBinaryTreeDataset( train, eps=kwargs['dataset_randomness']) valid = binary_tree.ProbabilisticBinaryTreeDataset( valid, eps=kwargs['dataset_randomness']) test = binary_tree.ProbabilisticBinaryTreeDataset( test_data, eps=kwargs['dataset_randomness']) elif dataset_type == 'mnist': # Load the MNIST dataset ndim = kwargs.get('ndim') if 'ndim' in kwargs else 1 train, test = mnist_activity.get_mnist(withlabel=False, ndim=ndim, data=matrixForData, dtype=matrixForData.dtype) # train, test = datasets.get_mnist(withlabel=False, ndim=ndim) # Binarize dataset #train[train >= 0.5] = 1.0 #train[train < 0.5] = 0.0 #test[test >= 0.5] = 1.0 #test[test < 0.5] = 0.0 size_data = len(train[:, 1]) upper_part = math.floor(0.8 * size_data) train, valid = datasets.split_dataset(train, upper_part) elif dataset_type == 'cifar100': # Load the Cifar-100 dataset train, test = datasets.get_cifar100(withlabel=False) train = 2 * (train - 0.5) test = 2 * (test - 0.5) train, valid = datasets.split_dataset(train, 49000) elif dataset_type == 'breakout': train, test = breakout.load_dataset(withlabel=False) # scaling data from [0, 1] to [-1, 1] train = 2 * (train - 0.5) test = 2 * (test - 0.5) train, valid = datasets.split_dataset(train, 80000) elif dataset_type == 'wordnet': num_negatives = kwargs['num_negatives'] symmetrize = kwargs['symmetrize'] assert num_negatives == 1 train = wordnet.load_dataset(num_negatives, symmetrize) valid = None test = None elif dataset_type == 'mammal': num_negatives = kwargs['num_negatives'] symmetrize = kwargs['symmetrize'] assert num_negatives == 1 train = wordnet.load_dataset(num_negatives, symmetrize, mammal=True) valid = None test = None else: raise ValueError return train, valid, test
def main(commands=None): parser = argparse.ArgumentParser(description='Segmentation Predict') parser.add_argument('--model', '-m', nargs='+', help='Path to model') parser.add_argument('--config', '-c', nargs='*', default=['examples/configs/seg_resnet.yaml']) parser.add_argument('--val-set', type=int) parser.add_argument('--x-flip', type=int, help='0: no, 1: yes, 2: both (average)', default=0) parser.add_argument('--multiscale', action='store_true') # Args for ensembling parser.add_argument('--ensemble-seg', action='store_true') parser.add_argument('--seg-weight', type=float, nargs='*', default=None) parser.add_argument('--edge-weight', type=float, nargs='*', default=None) parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--n-process', '-p', type=int, default=30) parser.add_argument('--out', '-o', default='out.csv') parser.add_argument('--test', action='store_true') parser.add_argument('--limit', '-n', type=int, default=0) parser.add_argument('--thresh', '-t', type=float, default=0.1, help='Threshold for edge confidence') parser.add_argument('--save-demo-to', metavar='/path/to/out_demo/dir') parser.add_argument('--overlay-seg', action='store_true') parser.add_argument('--cprofile', action='store_true', help='To profile with cprofile') args = parser.parse_args(commands) configs = [load_config(yaml.load(open(args.config[i]))) for i in range(len(args.config))] master_config = configs[0] comm = chainermn.create_communicator(communicator_name='pure_nccl') device = comm.intra_rank + args.gpu print('Device = {}'.format(device)) if len(configs) == 1 and len(args.model) >= 2: # Duplicate same config configs = configs * len(args.model) else: assert len(configs) == len(args.model), "# of configs and models don't match." # Setup models models = [] for i in range(len(args.model)): model = setup_model(configs[i], args.x_flip) chainer.serializers.load_npz(args.model[i], model) models.append(model) if len(models) == 1: model = models[0] else: ensembler_cls = MultiScaleModelEnsembler if args.multiscale else ModelEnsembler model = ensembler_cls(models, ensemble_seg=args.ensemble_seg, seg_weight=args.seg_weight, edge_weight=args.edge_weight) with cuda.get_device_from_id(device): model.to_gpu() # Setup dataset if comm.rank == 0: if args.test: dataset = RSNASubmissionDataset() else: if args.val_set is not None: master_config['val_set'] = args.val_set dataset = RSNATrainDataset() if args.val_set is not None: master_config['val_set'] = args.val_set if master_config['val_set'] == -1: val_mask = dataset.patient_df['withinTestRange'].values == 1 val_indices = val_mask.nonzero()[0] else: _, val_indices = create_train_val_indices(np.ones(len(dataset), dtype=bool), master_config['val_set']) dataset = dataset.slice[val_indices, ('dicom_data', 'img', 'bbox')] if args.limit and args.limit < len(dataset): dataset, _ = split_dataset(dataset, args.limit) else: dataset = None dataset = chainermn.scatter_dataset(dataset, comm) if args.cprofile: import cProfile import pstats import io pr = cProfile.Profile() pr.enable() if comm.rank == 0: print('Extracting network outputs...') outputs = [] gt_bboxes = [] for i in range(len(dataset)): if comm.rank == 0 and i % 100 == 0: print('Processing {}-th sample...'.format(i)) if args.test: dicom_data, image = dataset[i] patient_id = dicom_data.PatientID gt_bbox = np.empty((0, 4), dtype=np.float32) else: dicom_data, image, gt_bbox = dataset[i] patient_id = dicom_data.PatientID if master_config['data_augmentation']['window_width'] > 1.0: image = (image - 128) * master_config['data_augmentation']['window_width'] + 128 image = np.clip(image, 0, 255) with cuda.get_device_from_id(device): h_seg, h_hor, h_ver = [x[0] for x in model.extract([image])] outputs.append((patient_id, image, h_seg, h_hor, h_ver)) gt_bboxes.append((patient_id, gt_bbox)) if comm.rank == 0: for i in range(1, comm.size): other_outputs = comm.recv_obj(i) outputs.extend(other_outputs) other_gt_bboxes = comm.recv_obj(i) gt_bboxes.extend(other_gt_bboxes) else: comm.send_obj(outputs, 0) comm.send_obj(gt_bboxes, 0) print('Bye {}.'.format(comm.rank)) exit(0) outputs = sorted(outputs, key=lambda x: x[0]) gt_bboxes = sorted(gt_bboxes, key=lambda x: x[0]) print('Done.') print('Postprocessing...') postprocessor = Postprocessor(master_config['downscale'], args.thresh, master_config['size_thresh'], master_config['edge_conf_operation']) with multiprocessing.Pool(args.n_process) as p: results = p.map(postprocessor.postprocess, outputs) results = sorted(results, key=lambda x: x[0]) print('Done.') outputs_ids = [x[0] for x in outputs] results_ids = [x[0] for x in results] assert outputs_ids == results_ids print('Dumping final results...') pred_manager = PredictionsManager() n_positive = 0 for result in results: patient_id, bbox, label, score = result pred_manager.add_prediction(patient_id, bbox, score) if len(bbox) > 0: n_positive += 1 print('Complete!') print('{} / {} are predicted as positive.'.format(n_positive, len(dataset))) with open(args.out, 'w') as f: pred_manager.dump(f) if args.save_demo_to: print('Start saving demos...') os.makedirs(args.save_demo_to, exist_ok=True) demo_saver = DemoSaver(args.save_demo_to, master_config['downscale'], args.overlay_seg) with multiprocessing.Pool(args.n_process) as p: p.map(demo_saver.save, list(zip(results, outputs, gt_bboxes))) if args.cprofile: pr.disable() s = io.StringIO() sortby = 'time' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue()) pr.dump_stats('prof.cprofile'.format(args.out, 0))
def main(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help="path to train json file") parser.add_argument('test_dataset', help="path to test dataset json file") parser.add_argument( '--dataset-root', help= "path to dataset root if dataset file is not already in root folder of dataset" ) parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, nargs='*', default=[]) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--lr', type=float, default=0.001, help="default learning rate") parser.add_argument('--port', type=int, default=1337, help="port for bbox sending") parser.add_argument('--ip', default='127.0.0.1', help="destination ip for bbox sending") parser.add_argument( '--test-image', help="path to test image that shall be displayed in bbox vis") args = parser.parse_args() if args.dataset_root is None: args.dataset_root = os.path.dirname(args.dataset) if args.model == 'ssd300': model = SSD300(n_fg_class=1, pretrained_model='imagenet') image_size = (300, 300) elif args.model == 'ssd512': model = SSD512(n_fg_class=1, pretrained_model='imagenet') image_size = (512, 512) else: raise NotImplementedError("The model you want to train does not exist") model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) train = TransformDataset( SheepDataset(args.dataset_root, args.dataset, image_size=image_size), Transform(model.coder, model.insize, model.mean)) if len(args.gpu) > 1: gpu_datasets = split_dataset_n_random(train, len(args.gpu)) if not len(gpu_datasets[0]) == len(gpu_datasets[-1]): adapted_second_split = split_dataset(gpu_datasets[-1], len(gpu_datasets[0]))[0] gpu_datasets[-1] = adapted_second_split else: gpu_datasets = [train] train_iter = [ ThreadIterator(gpu_dataset, args.batchsize) for gpu_dataset in gpu_datasets ] test = SheepDataset(args.dataset_root, args.test_dataset, image_size=image_size) test_iter = chainer.iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=2) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.Adam(alpha=args.lr) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if len(args.gpu) <= 1: updater = training.updaters.StandardUpdater( train_iter[0], optimizer, device=args.gpu[0] if len(args.gpu) > 0 else -1, ) else: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=args.gpu) updater.setup_workers() if len(args.gpu) > 0 and args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu() trainer = training.Trainer(updater, (200, 'epoch'), args.out) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1000, 'iteration')) # build logger # make sure to log all data necessary for prediction log_interval = 100, 'iteration' data_to_log = { 'image_size': image_size, 'model_type': args.model, } # add all command line arguments for argument in filter(lambda x: not x.startswith('_'), dir(args)): data_to_log[argument] = getattr(args, argument) # create callback that logs all auxiliary data the first time things get logged def backup_train_config(stats_cpu): if stats_cpu['iteration'] == log_interval: stats_cpu.update(data_to_log) trainer.extend( extensions.LogReport(trigger=log_interval, postprocess=backup_train_config)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(5000, 'iteration')) if args.test_image is not None: plot_image = train._dataset.load_image(args.test_image, resize_to=image_size) else: plot_image, _, _ = train.get_example(0) plot_image += train._transform.mean bbox_plotter = BBOXPlotter( plot_image, os.path.join(args.out, 'bboxes'), send_bboxes=True, upstream_port=args.port, upstream_ip=args.ip, ) trainer.extend(bbox_plotter, trigger=(10, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): # command line argument parsing parser = argparse.ArgumentParser( description='Multi-Perceptron classifier/regressor') parser.add_argument('dataset', help='Path to data file') parser.add_argument('--activation', '-a', choices=activ.keys(), default='sigmoid', help='Activation function') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of samples in each mini-batch') parser.add_argument('--dropout_ratio', '-dr', type=float, default=0, help='dropout ratio') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--snapshot', '-s', type=int, default=-1, help='snapshot interval') parser.add_argument('--labelcol', '-l', type=int, nargs="*", default=[0, 1, 2, 3], help='column indices of target variables') parser.add_argument('--initmodel', '-i', help='Initialize the model from given file') parser.add_argument('--gpu', '-g', type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument( '--optimizer', '-op', default='MomentumSGD', help='optimizer {MomentumSGD,AdaDelta,AdaGrad,Adam,RMSprop}') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--skip_rows', '-sr', type=int, default=0, help='num of rows skipped in the data') parser.add_argument( '--skip_column', '-sc', type=int, nargs="*", default=[], help='set of indices of columns to be skipped in the data') parser.add_argument('--unit', '-nu', type=int, nargs="*", default=[128, 64, 32, 4], help='Number of units in the hidden layers') parser.add_argument( '--test_every', '-t', type=int, default=5, help='use one in every ? entries in the dataset for validation') parser.add_argument('--regression', action='store_true', help="set for regression, otherwise classification") parser.add_argument('--batchnorm', '-bn', action='store_true', help="perform batchnormalization") parser.add_argument('--predict', action='store_true') parser.add_argument('--weight_decay', '-w', type=float, default=1e-5, help='weight decay for regularization') args = parser.parse_args() ## if not args.gpu: if chainer.cuda.available: args.gpu = 0 else: args.gpu = -1 print('GPU: {} Minibatch-size: {} # epoch: {}'.format( args.gpu, args.batchsize, args.epoch)) # Set up a neural network to train model = MLP(args) if args.initmodel: print('Load model from: ', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Set up an optimizer if args.optimizer == 'MomentumSGD': optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) elif args.optimizer == 'AdaDelta': optimizer = chainer.optimizers.AdaDelta(rho=0.95, eps=1e-06) elif args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad(lr=0.01, eps=1e-08) elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=0.01, beta1=0.9, beta2=0.999, eps=1e-08) elif args.optimizer == 'RMSprop': optimizer = chainer.optimizers.RMSprop(lr=0.01, alpha=0.99, eps=1e-08) else: print("Wrong optimiser") exit(-1) optimizer.setup(model) if args.weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) print( 'units: {}, optimiser: {}, Weight decay: {}, dropout ratio: {}'.format( args.unit, args.optimizer, args.weight_decay, args.dropout_ratio)) # select numpy or cupy xp = chainer.cuda.cupy if args.gpu >= 0 else np label_type = np.float32 if args.regression else np.int32 # read csv file csvdata = np.loadtxt(args.dataset, delimiter=",", skiprows=args.skip_rows) ind = np.ones(csvdata.shape[1], dtype=bool) # indices for unused columns ind[args.labelcol] = False for i in args.skip_column: ind[i] = False x = np.array(csvdata[:, ind], dtype=np.float32) t = csvdata[:, args.labelcol] t = np.array(t, dtype=label_type) if not args.regression: t = t[:, 0] print('target column: {}, excluded columns: {}'.format( args.labelcol, np.where(ind == False)[0].tolist())) print("variable shape: {}, label shape: {}, label type: {}".format( x.shape, t.shape, label_type)) ## train-validation data # random spliting #train, test = datasets.split_dataset_random(datasets.TupleDataset(x, t), int(0.8*t.size)) # splitting by modulus of index train_idx = [i for i in range(len(t)) if (i + 1) % args.test_every != 0] var_idx = [i for i in range(len(t)) if (i + 1) % args.test_every == 0] n = len(train_idx) train_idx.extend(var_idx) train, test = datasets.split_dataset(datasets.TupleDataset(x, t), n, train_idx) # dataset iterator train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot) log_interval = 1, 'epoch' val_interval = frequency / 10, 'epoch' trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(frequency, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(frequency / 5, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.dump_graph('main/loss')) if args.optimizer in ['MomentumSGD', 'AdaGrad', 'RMSprop']: trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(args.epoch / 5, 'epoch')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # ChainerUI trainer.extend(CommandsExtension()) save_args(args, args.outdir) trainer.extend(extensions.LogReport(trigger=log_interval)) if not args.predict: trainer.run() else: test = datasets.TupleDataset(x, t) ## prediction print("predicting: {} entries...".format(len(test))) x, t = chainer.dataset.concat_examples(test, args.gpu) with chainer.using_config('train', False): y = model(x, t) if args.gpu >= 0: pred = chainer.cuda.to_cpu(y.data) t = chainer.cuda.to_cpu(t) else: pred = y.data if args.regression: left = np.arange(t.shape[0]) for i in range(len(args.labelcol)): rmse = F.mean_squared_error(pred[:, i], t[:, i]) plt.plot(left, t[:, i], color="royalblue") plt.plot(left, pred[:, i], color="crimson", linestyle="dashed") plt.title("RMSE: {}".format(np.sqrt(rmse.data))) plt.savefig(args.outdir + '/result{}.png'.format(i)) plt.close() result = np.hstack((t, pred)) np.savetxt(args.outdir + "/result.csv", result, fmt='%1.5f', delimiter=",", header="truth,prediction") else: p = np.argmax(pred, axis=1) result = np.vstack((t, p)).astype(np.int32).transpose() print(result.tolist()) np.savetxt(args.outdir + "/result.csv", result, delimiter=",", header="truth,prediction")
def test_split_dataset_invalid_type(self): original = [1, 2, 3, 4, 5] with self.assertRaises(TypeError): datasets.split_dataset(original, 3.5)
path = 'path.txt' # 500fps picture = ImageDataset(path) picture = TransformDataset(picture, transform) path = '20191101/2019110110201.csv' # 1000fps force = np.loadtxt(path, delimiter=',', skiprows=7) force_z = force[:len(picture) * 2:2, 3].astype(np.float32) x = picture t = np.reshape(force_z, (6400, 1)) dataset = TupleDataset(x, t) n_train = int(len(dataset) * 0.8) n_valid = int(len(dataset) * 0.1) train, valid_test = split_dataset(dataset, n_train) valid, test = split_dataset(valid_test, n_valid) # train, valid_test = split_dataset_random(dataset, n_train, seed=0) # valid, test = split_dataset_random(valid_test, n_valid, seed=0) print('Training dataset size:', len(train)) print('Validation dataset size:', len(valid)) print('Test dataset size:', len(test)) train_mode = False if train_mode == True: batchsize = 16 train_iter = iterators.SerialIterator(train, batchsize) valid_iter = iterators.SerialIterator(valid, batchsize,
def main(): args = create_args('train') result_dir = create_result_dir(args.model_name) # Prepare devices devices = get_gpu_dict(args.gpus) # Instantiate a model model = RegNet(epsilon=args.epsilon) # Instantiate a optimizer optimizer = get_optimizer(model, **vars(args)) # Setting up datasets prep = TransformDataset(KITTI(args.kitti_path, 'train'), CalibPrepare(args.init_pose)) train, valid = split_dataset( prep, round(len(prep) * (1 - args.valid_proportion))) print("========== Model Parameters ==========") print("location loss weight (epsilon):", args.epsilon) print('train samples: {}, valid samples: {}'.format( len(train), len(valid))) # Iterator if DEBUG: Iterator = SerialIterator else: Iterator = MultiprocessIterator train_iter = Iterator(train, args.batchsize) valid_iter = Iterator(valid, args.valid_batchsize, repeat=False, shuffle=False) # Updater if DEBUG: Updater = StandardUpdater(train_iter, optimizer, device=devices['main']) else: Updater = ParallelUpdater(train_iter, optimizer, devices=devices) trainer = Trainer(Updater, (args.epoch, 'epoch'), out=result_dir) # Extentions trainer.extend(extensions.Evaluator(valid_iter, model, device=devices['main']), trigger=(args.valid_freq, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_iter, 'iteration')) trainer.extend(extensions.LogReport(), trigger=(args.show_log_iter, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=20)) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time' ])) # Resume from snapshot if args.resume_from: chainer.serializers.load_npz(args.resume_from, trainer) # Train and save print("========== Training ==========") hook = CupyMemoryProfileHook() with hook: trainer.run() print("========== Saving ==========") chainer.serializers.save_hdf5(create_result_file(args.model_name), model) print("Done.") print("========== Memory Profiling ==========") hook.print_report()
def split_dataset(dataset, train_frac=.8, test_frac=.5): train_end = int(len(dataset) * train_frac) train, rest = D.split_dataset(dataset, train_end) test_end = int(len(rest) * test_frac) test, val = D.split_dataset(rest, test_end) return train, test, val
def main(): # 出力フォルダ配下のファイル全削除 remove_dir_and_file(u'result') # 画像ファイル名と教師データの一覧ファイルのパス格納 image_files = os.path.join(u'dataset', '03_duplicate_pict_anser.csv') # datasets.LabeledImageDatasetでいい感じにデータセットとして読み込んでくれます。 dataset = datasets.LabeledImageDataset(image_files) #print (u'dataset') #print dataset[0] #print (u'---') # データ部を0〜1の値にする必要があるため255で割ります dataset = chainer.datasets.TransformDataset(dataset, transform) # 8割を学習データに、2割をテストデータにします。 split_at = int(len(dataset) * 0.8) train, test = datasets.split_dataset(dataset, split_at) # バッチ実行か、シャッフルしてデータ使うかなどの指定 train_iter = iterators.SerialIterator(train, batchsize, shuffle=True) test_iter = iterators.SerialIterator(test, batchsize, repeat=False, shuffle=True) # モデルの定義。GPU使うかもここで指定。 # model = MLP() # model.to_gpu(gpu_id) # モデルをClassifierで包んで、ロスの計算などをモデルに含める model = MLP() model = L.Classifier(model) model.to_gpu(gpu_id) # 最適化手法の選択 optimizer = optimizers.SGD() optimizer.setup(model) # UpdaterにIteratorとOptimizerを渡す updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) # TrainerにUpdaterを渡す trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='result') # ログの出力方法などの定義 trainer.extend(extensions.LogReport()) trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend( extensions.snapshot_object(model.predictor, filename='model_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id)) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) # トレーニングスタート!! trainer.run() # 学習結果の保存 model.to_cpu() # CPUで計算できるようにしておく serializers.save_npz(os.path.join(u'result', u'sakamotsu.model'), model)
dataset = chainer.datasets.TransformDataset(dataset, transform) # 変換したデータセットの中身をサンプルで確認します。 # In[11]: dataset[0] # ### トレーニングデータとテストデータと分割 # データセットをトレーニングデータとテストデータに分割し、過学習にならないようにチェックできるようにします。 # In[12]: # 8割を学習データに、2割をテストデータにします。 split_at = int(len(dataset) * 0.8) train, test = datasets.split_dataset(dataset, split_at) # In[13]: len(train) # In[14]: len(test) # ### イテレータにデータセットを渡す # データセットから決まった数のデータを取り出し、それらを束ねてミニバッチを作成して返してくれるIteratorを作成しましょう。 # # - Chainerがいくつか用意しているIteratorの一種である`SerialIterator`は、データセットの中のデータを順番に取り出してくる最もシンプルなIteratorです。 # - 引数にデータセットオブジェクトと、バッチサイズを取ります。 # - また、このとき渡したデータセットから、何周も何周もデータを繰り返し読み出す必要がある場合は`repeat`引数を`True`とし、1周が終わったらそれ以上データを取り出したくない場合はこれを`False`とします。デフォルトでは、`True`になっています。
def main3(): parser = argparse.ArgumentParser() parser.add_argument('--gpu_id', '-g', type=int, default=1) parser.add_argument('--batch_size', '-b', type=int, default=100) parser.add_argument('--test_split', type=float, default=0.2) parser.add_argument( '--real_test', dest='real_test', action='store_true', help='Whether to split the data or use a complete new trial.') parser.add_argument('--mdn_hidden-units', '-u', type=int, default=24) parser.add_argument('--mdn_gaussian-mixtures', '-m', type=int, default=24) parser.add_argument('--max_epoch', '-e', type=int, default=250) parser.add_argument('--resume', '-r', type=int, default=None) parser.add_argument('--out_dir', '-o', type=str, default='results/result_test') parser.add_argument('--data_base_dir', type=str, default='/media/daniel/data/hhc/') parser.add_argument('--data_file_pattern', '-f', type=str, default='trial{}.avi') args = parser.parse_args() # frames, labels = load_frames_labels(filestype='/media/daniel/data/hhc/trial{}_r_forearm.avi') frames, labels = load_frames_labels(filestype=''.join( (args.data_base_dir, args.data_file_pattern)), verbose=0) frames, labels = unison_shuffled_copies(frames, labels) print('Frames shape: ', frames.shape, ' Labels shape: ', labels.shape) data = chainer.datasets.TupleDataset(frames, labels) #.to_device(gpu_id) print('Dataset length: ', data._length) print('Frame size: ', data[0][0].shape, data[0][0].dtype) if args.real_test: print('Using test trial.') train_iter = iterators.SerialIterator(data, args.batch_size, shuffle=True) # Load the test data test_frames, test_labels = load_frames_labels( ids=[11], filestype=''.join((args.data_base_dir, args.data_file_pattern))) test_data = chainer.datasets.TupleDataset(test_frames, test_labels) test_iter = iterators.SerialIterator(test_data, args.batch_size, repeat=False, shuffle=False) else: data_test, data_train = split_dataset(data, int(args.test_split * len(data))) train_iter = iterators.SerialIterator(data_train, args.batch_size, shuffle=True) test_iter = iterators.SerialIterator(data_test, args.batch_size, repeat=False, shuffle=False) model = GoalScoreModel() if args.gpu_id >= 0: chainer.backends.cuda.get_device_from_id(args.gpu_id).use() model.to_gpu(args.gpu_id) # labels = chainer.dataset.to_device(args.gpu_id, labels) # frames = chainer.dataset.to_device(args.gpu_id, frames) # Create the optimizer for the model optimizer = optimizers.Adam().setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=1e-6)) # optimizer.add_hook(chainer.optimizer_hooks.GradientHardClipping(-.1, .1)) # xp = chainer.backend.get_array_module(data_train) # optimizer.update(model.calc_loss, xp.asarray([data_train[0][0]]), xp.asarray([data_train[0][1]])) # import chainer.computational_graph as c # g = c.build_computational_graph(model.calc_loss) # with open('results/graph.dot', 'w') as o: # o.write(g.dump()) updater = training.StandardUpdater(train_iter, optimizer, loss_func=model.calc_loss, device=args.gpu_id) # updater = training.ParallelUpdater(train_iter, optimizer, # loss_func=model.calc_loss, # devices={'main': args.gpu_id, 'second': 1}) # Pre-training print('Pretraining started.') trainer = training.Trainer(updater, (3, 'epoch'), out=args.out_dir) # Disable update for the head model print('Disabling training of head model.') model.head_model.disable_update() trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.FailOnNonNumber()) trainer.run() # Full training print('Full model training ...') trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out_dir) trainer.extend(extensions.Evaluator(test_iter, model, eval_func=model.calc_loss, device=args.gpu_id), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/nll', 'main/mae', 'main/sigma', 'validation/main/loss', 'validation/main/mae', 'validation/main/sigma', 'elapsed_time' ])) #, 'main/loss', 'validation/main/loss', 'elapsed_time'], )) trainer.extend( extensions.PlotReport(['main/mae', 'validation/main/mae'], x_key='epoch', file_name='loss.png', marker=None)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.FailOnNonNumber()) trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'), trigger=(20, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}.model'), trigger=(20, 'epoch')) # Disable/Enable update for the head model model.head_model.enable_update() # Resume from a specified snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() print('Done.')
def main(): # command line argument parsing parser = argparse.ArgumentParser(description='Multi-Perceptron classifier/regressor') parser.add_argument('dataset', help='Path to data file') parser.add_argument('--activation', '-a', choices=activ.keys(), default='sigmoid', help='Activation function') parser.add_argument('--batchsize', '-b', type=int, default=50, help='Number of samples in each mini-batch') parser.add_argument('--dropout_ratio', '-dr', type=float, default=0, help='dropout ratio') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--snapshot', '-s', type=int, default=-1, help='snapshot interval') parser.add_argument('--label_index', '-l', type=int, default=5, help='Column number of the target variable (5=Melting)') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--outdir', '-o', default='result', help='Directory to output the result') parser.add_argument('--out_ch', '-oc', type=int, default=1, help='num of output channels. set to 1 for regression') parser.add_argument('--optimizer', '-op', default='AdaDelta', help='optimizer {MomentumSGD,AdaDelta,AdaGrad,Adam}') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--skip_columns', '-sc', type=int, default=29, help='num of columns which are not used as explanatory variables') parser.add_argument('--layers', '-nl', type=int, default=3, help='Number of layers') parser.add_argument('--unit', '-nu', type=int, default=100, help='Number of units in the hidden layers') parser.add_argument('--test_every', '-t', type=int, default=5, help='use one in every ? entries in the dataset for validation') parser.add_argument('--predict', action='store_true') parser.add_argument('--weight_decay', '-w', type=float, default=0, help='weight decay for regularization') args = parser.parse_args() args.regress = (args.out_ch == 1) # select numpy or cupy xp = chainer.cuda.cupy if args.gpu >= 0 else np label_type = np.int32 if not args.regress else np.float32 # read csv file dat = pd.read_csv(args.dataset, header=0) ## print('Target: {}, GPU: {} Minibatch-size: {} # epoch: {}'.format(dat.keys()[args.label_index],args.gpu,args.batchsize,args.epoch)) # csvdata = np.loadtxt(args.dataset, delimiter=",", skiprows=args.skip_rows) ind = np.ones(dat.shape[1], dtype=bool) # indices for unused columns dat = dat.dropna(axis='columns') x = dat.iloc[:,args.skip_columns:].values args.in_ch = x.shape[1] t = (dat.iloc[:,args.label_index].values)[:,np.newaxis] print('target column:', args.label_index) # print('excluded columns: {}'.format(np.where(ind==False)[0].tolist())) print("data shape: ",x.shape, t.shape) x = np.array(x, dtype=np.float32) if args.regress: t = np.array(t, dtype=label_type) else: t = np.array(np.ndarray.flatten(t), dtype=label_type) # standardize t_mean = np.mean(t) t_std = np.std(t) x_mean = np.mean(x) x_std = np.std(x) x = (x-x_mean)/x_std t = (t-t_mean)/t_std # Set up a neural network to train model = MLP(args,std=t_std) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimiser if args.optimizer == 'MomentumSGD': optimizer = chainer.optimizers.MomentumSGD(lr=0.003, momentum=0.9) elif args.optimizer == 'AdaDelta': optimizer = chainer.optimizers.AdaDelta(rho=0.95, eps=1e-06) elif args.optimizer == 'AdaGrad': optimizer = chainer.optimizers.AdaGrad(lr=0.001, eps=1e-08) elif args.optimizer == 'Adam': optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08) else: print("Wrong optimiser") exit(-1) optimizer.setup(model) if args.weight_decay>0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) print('layers: {}, units: {}, optimiser: {}, Weight decay: {}, dropout ratio: {}'.format(args.layers,args.unit,args.optimizer,args.weight_decay,args.dropout_ratio)) ## train-validation data # random spliting #train, test = datasets.split_dataset_random(datasets.TupleDataset(x, t), int(0.8*t.size)) # splitting by modulus of index train_idx = [i for i in range(t.size) if (i+1) % args.test_every != 0] var_idx = [i for i in range(t.size) if (i+1) % args.test_every == 0] n = len(train_idx) train_idx.extend(var_idx) train, test = datasets.split_dataset(datasets.TupleDataset(x, t), n, train_idx) # dataset iterator train_iter = iterators.SerialIterator(train, args.batchsize, shuffle=True) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.outdir) frequency = args.epoch if args.snapshot == -1 else max(1, args.snapshot) log_interval = 1, 'epoch' val_interval = frequency/10, 'epoch' trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu),trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/MAE', 'validation/main/MAE', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.predict: trainer.run() else: test = datasets.TupleDataset(x, t) ## prediction print("predicting: {} entries...".format(len(test))) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) converter=concat_examples idx=0 with open(os.path.join(args.outdir,'result.txt'),'w') as output: for batch in test_iter: x, t = converter(batch, device=args.gpu) with chainer.using_config('train', False): with chainer.function.no_backprop_mode(): if args.regress: y = model(x).data if args.gpu>-1: y = xp.asnumpy(y) t = xp.asnumpy(t) y = y * t_std + t_mean t = t * t_std + t_mean else: y = F.softmax(model(x)).data if args.gpu>-1: y = xp.asnumpy(y) t = xp.asnumpy(t) for i in range(y.shape[0]): output.write(str(dat.iloc[var_idx[i],0])) if(len(t.shape)>1): for j in range(t.shape[1]): output.write(",{}".format(t[i,j])) output.write(",{}".format(y[i,j])) else: output.write(",{0:1.5f},{0:1.5f}".format(t[i],y[i])) # output.write(",{0:1.5f}".format(np.argmax(y[i,:]))) # for yy in y[i]: # output.write(",{0:1.5f}".format(yy)) output.write("\n") idx += 1
def main3(): parser = argparse.ArgumentParser() parser.add_argument('--gpu_id', '-g', type=int, default=0) parser.add_argument('--batch_size', '-b', type=int, default=60) parser.add_argument('--test_split', type=float, default=0.2) parser.add_argument( '--real_test', dest='real_test', action='store_true', help='Whether to split the data or use a complete new trial.') parser.add_argument('--max_epoch', '-e', type=int, default=110) parser.add_argument('--resume', '-r', type=int, default=None) parser.add_argument( '--out_dir', '-o', type=str, default= '/mnt/7ac4c5b9-8c05-451f-9e6d-897daecb7442/gears/results_gsm/result_right_arm2' ) args = parser.parse_args() model = GoalScoreModel() frames, labels = load_all_data(prep_f=model.prepare) frames, labels = igp.unison_shuffled_copies(frames, labels) print('Frames shape: ', frames.shape, ' Labels shape: ', labels.shape) data = chainer.datasets.TupleDataset(frames, labels) #.to_device(gpu_id) print('Dataset length: ', data._length) print('Frame size: ', data[0][0].shape, data[0][0].dtype) if args.real_test: print('Using test trial.') train_iter = iterators.SerialIterator(data, args.batch_size, shuffle=True) # Load the test data test_frames, test_labels = load_frames_labels( ids=[11], filestype=''.join((args.data_base_dir, args.data_file_pattern)), blackout=args.blackout) data_test = chainer.datasets.TupleDataset(test_frames, test_labels) test_iter = iterators.SerialIterator(data_test, args.batch_size, repeat=False, shuffle=False) else: data_test, data_train = split_dataset(data, int(args.test_split * len(data))) train_iter = iterators.SerialIterator(data_train, args.batch_size, shuffle=True) test_iter = iterators.SerialIterator(data_test, args.batch_size, repeat=False, shuffle=False) if args.gpu_id >= 0: chainer.backends.cuda.get_device_from_id(args.gpu_id).use() model.to_gpu(args.gpu_id) # Create the optimizer for the model optimizer = optimizers.Adam().setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=1e-6)) updater = training.StandardUpdater(train_iter, optimizer, loss_func=model.calc_loss, device=args.gpu_id) # Full training print('Full model training ...') trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out_dir) trainer.extend(extensions.Evaluator(test_iter, model, eval_func=model.calc_loss, device=args.gpu_id), name='val', trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/mae', 'main/gnll', 'main/weighted', 'main/VAE', 'main/VAE_REC', 'main/VAE_KL', 'val/main/loss', 'val/main/mae', 'val/main/weighted', 'elapsed_time' ]) ) #, 'val/main/VAE', 'main/loss', 'validation/main/loss', 'elapsed_time'], )) trainer.extend( extensions.PlotReport( ['main/mae', 'val/main/mae', 'main/VAE', 'val/main/VAE'], x_key='epoch', file_name='loss.png', marker=None)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.FailOnNonNumber()) # Save every X epochs trainer.extend(extensions.snapshot( filename='snapshot_epoch_{.updater.epoch}.trainer'), trigger=(200, 'epoch')) trainer.extend(extensions.snapshot_object( model, '%s_model_epoch_{.updater.epoch}.model' % (model.__class__.__name__)), trigger=(10, 'epoch')) trainer.extend(utils.display_image(model.vae_image, data_test, args.out_dir, args.gpu_id, n=3), trigger=(1, 'epoch')) trainer.extend(extensions.ExponentialShift('alpha', 0.5, init=1e-3, target=1e-8), trigger=(100, 'epoch')) # Resume from a specified snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() print('Done.')
def on_status(self, status): status.created_at += datetime.timedelta(hours=9) # リプライが来た場合 if str(status.in_reply_to_screen_name) == bot_user_name: # テキストメッセージ tweet_text = "@" + str(status.user.screen_name) + " " # タイムラインを取得 time_line = api.mentions_timeline() # タイムラインの先頭のメッセージ内容 print("リプライが届きました...\n[@" + status.user.screen_name + "]\n" + time_line[0].text + "\n") # ファイル名の先頭 date_name = re.split(' ', str(datetime.datetime.today()))[0] + '_' # 1.リプライ画像の保存 -> 2.顔を切り取りcat.jpgで保存 -> 3.chainerに通して判定 # 1.リプライ画像の保存 try: j = 0 reply_images = [] for img in time_line[0].extended_entities['media']: # print(img['media_url']) reply_image = urllib.request.urlopen(img['media_url']) # ファイル名を確定後、リストに格納 image_name = date_name + str( time_line[0].id) + '-' + str(j) + '.jpg' reply_images.append(image_name) # 画像を読み込んで保存 image_file = open(image_name, 'wb') image_file.write(reply_image.read()) image_file.close() reply_image.close() print('画像 ' + image_name + ' を保存しました') j = j + 1 except: # 例外処理 if j == 0: tweet_text += "Error:画像がありませんฅ(´・ω・`)ฅにゃーん" else: tweet_text += "Error:画像の保存に失敗しましたฅ(´・ω・`)ฅにゃーん" api.update_status(status=tweet_text, in_reply_to_status_id=status.id) print(tweet_text) return True # 2.顔を切り取りcat.jpgで保存 try: image = cv2.imread(reply_images[0]) image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) cascade = cv2.CascadeClassifier("cat_cascade.xml") face_images = cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=1, minSize=(1, 1)) face_image_len = 0 if len(face_images) > 0: for (x, y, w, h) in face_images: face_image = image[y:y + h, x:x + w] if face_image_len < w: face_image_len = w cv2.imwrite("cat_face.jpg", face_image) face_image = cv2.resize(face_image, (64, 64)) cv2.imwrite("cat_face_min.jpg", face_image) else: tweet_text += "Error:猫の顔が検出できませんでした...ฅ(´・ω・`)ฅにゃーん" api.update_status(status=tweet_text, in_reply_to_status_id=status.id) print(tweet_text) return True except: tweet_text += "Error:猫の顔の検出に失敗しました...ฅ(´・ω・`)ฅにゃーん" api.update_status(status=tweet_text, in_reply_to_status_id=status.id) print(tweet_text) return True # 3.chainerに通して判定 try: data = [('cat_face_min.jpg', 3), ('cat_face_min.jpg', 3)] d = datasets.LabeledImageDataset(data) def transform(data): img, lable = data img = img / 255. return img, lable d = datasets.TransformDataset(d, transform) train, test = datasets.split_dataset(d, 1) x, t = test[0] x = x[None, ...] y = self.model(x) y = y.data cats = [ "スフィンクス", "アビシニアン", "ベンガル", "バーマン", "ボンベイ", "ブリティッシュショートヘア", "エジプシャンマウ", "メインクーン", "ペルシャ", "ラグドール", "ロシアンブルー", "シャム" ] cats_images = [ "Sphynx.jpg", "Abyssinian.jpg", "Bengal.jpg", "Birman.jpg", "Bombay.jpg", "British_Shorthair.jpg", "Egyptian_Mau.jpg", "Maine_Coon.jpg", "Persian.jpg", "Ragdoll.jpg", "Russian_Blue.jpg", "Siamese.jpg" ] tweet_text += "この猫は... " + cats[y.argmax( axis=1)[0]] + " ですฅ(´・ω・`)ฅにゃーん" media_images = [ "cat_face.jpg", "./cat_images/" + cats_images[y.argmax(axis=1)[0]] ] media_ids = [ api.media_upload(i).media_id_string for i in media_images ] api.update_status(status=tweet_text, in_reply_to_status_id=status.id, media_ids=media_ids) print(tweet_text) return True except: tweet_text += "Error:猫の顔の判定に失敗しました...ฅ(´・ω・`)ฅにゃーん" api.update_status(status=tweet_text, in_reply_to_status_id=status.id) print(tweet_text) return True return True
def test_split_dataset_tail(self): original = [1, 2, 3, 4, 5] subset1, subset2 = datasets.split_dataset(original, 5) self.assertEqual(len(subset1), 5) self.assertEqual(len(subset2), 0)