def evaluate(self):
        iterator = self._iterators['main']
        target = self._targets['main']

        if self.eval_hook:
            self.eval_hook(self)

        iterator.reset()
        it = iterator

        summary = reporter_module.DictSummary()

        true_y = []
        pred_y = []
        for batch in it:
            in_arrays = convert._call_converter(self.converter, batch,
                                                self.device)
            assert isinstance(in_arrays, tuple)
            x, y = in_arrays
            true_y.append(y)
            pred_y.append(target.predict(x).data)
        auc = roc_auc_score(
            cuda.to_cpu(target.xp.concatenate(true_y, axis=0)),
            cuda.to_cpu(target.xp.concatenate(pred_y, axis=0)),
        )

        summary.add({f'{self.name}/main/auc': auc})

        return summary.compute_mean()
Beispiel #2
0
    def evaluate(self):
        iterator = self._iterators['main']
        eval_func = self.eval_func or self._targets['main']

        if self.eval_hook:
            self.eval_hook(self)

        if hasattr(iterator, 'reset'):
            iterator.reset()
            it = iterator
        else:
            warnings.warn(
                'This iterator does not have the reset method. Evaluator '
                'copies the iterator instead of resetting. This behavior is '
                'deprecated. Please implement the reset method.',
                DeprecationWarning)
            it = copy.copy(iterator)

        summary = reporter.DictSummary()

        for batch in it:
            observation = {}
            with reporter.report_scope(observation):
                in_arrays = convert._call_converter(self.converter, batch,
                                                    self.device)
                xp = self.device.xp

                X, Y = xp.array(in_arrays[0]), xp.array(in_arrays[1])

                with function.no_backprop_mode():
                    eval_func(X, Y)

            summary.add(observation)

        return summary.compute_mean()
    def update_core(self):
        train_iter = self._iterators['main']
        train_batch = train_iter.next()
        train_arrays = convert._call_converter(self.converter, train_batch,
                                               self.device)
        train_x, train_y = train_arrays
        test_batch = self._iterators['test'].next()
        test_x = convert._call_converter(self.converter, test_batch,
                                         self.device)

        optimizer = self._optimizers['main']
        loss_func = self.loss_func or optimizer.target
        optimizer.update(loss_func, train_x, train_y, test_x)

        # update teacher
        student = optimizer.target.student
        teacher = optimizer.target.teacher
        for t, s in zip(teacher.params(), student.params()):
            t.data = self.ema_decay * t.data + (1. - self.ema_decay) * s.data

        if self.auto_new_epoch and train_iter.is_new_epoch:
            optimizer.new_epoch(auto=True)
    def _evaluate_local_single(self, iterator):
        for batch in iterator:
            in_arrays = convert._call_converter(self.converter, batch,
                                                self.device)

            with function.no_backprop_mode():
                if isinstance(in_arrays, tuple):
                    results = self.calc_local(*in_arrays)
                elif isinstance(in_arrays, dict):
                    results = self.calc_local(**in_arrays)
                else:
                    results = self.calc_local(in_arrays)

            if self._progress_hook:
                self._progress_hook(batch)
            yield results
Beispiel #5
0
    def update_core(self):
        iterator = self._iterators['main']
        batch = iterator.next()
        in_arrays = in_arrays = convert._call_converter(
            self.converter, batch, self.device)
        xp = self.device.xp

        X, Y = xp.array(in_arrays[0]), xp.array(in_arrays[1])

        optimizer = self._optimizers['main']
        loss_func = self.loss_func or optimizer.target

        optimizer.update(loss_func, X, Y)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Beispiel #6
0
    def update_core(self):
        iterator = self._iterators['main']
        batch = iterator.next()
        in_arrays = convert._call_converter(self.converter, batch, self.device)

        optimizer = self._optimizers['main']
        loss_func = self.loss_func or optimizer.target

        if isinstance(in_arrays, tuple):
            optimizer.update(loss_func, *in_arrays)
        elif isinstance(in_arrays, dict):
            optimizer.update(loss_func, **in_arrays)
        else:
            optimizer.update(loss_func, in_arrays)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Beispiel #7
0
    def update_core(self):
        iterator = self._iterators['main']
        batch = iterator.next()
        in_arrays = convert._call_converter(self.converter, batch, self.device)

        optimizer = self._optimizers['main']
        loss_func = self.loss_func or optimizer.target

        if isinstance(in_arrays, tuple):
            optimizer.update(loss_func, *in_arrays)
        elif isinstance(in_arrays, dict):
            optimizer.update(loss_func, **in_arrays)
        else:
            optimizer.update(loss_func, in_arrays)

        if self.auto_new_epoch and iterator.is_new_epoch:
            optimizer.new_epoch(auto=True)
Beispiel #8
0
    def update_core(self):

        iterator = self._iterators['main']
        batch = iterator.next()
        in_arrays = convert._call_converter(self.converter, batch, self.device)

        opt_dis = self._optimizers['dis']
        opt_gen = self._optimizers['gen']

        x_real, y_real = in_arrays

        # generative
        y_fake = self.generator(x_real)
        xy_fake = F.concat((x_real, y_fake))
        p_fake = self.discriminator(xy_fake)

        loss_gen = self.generative_lossfun(p_fake) \
                    + self.alpha * self.conditional_lossfun(y_fake, y_real)

        self.generator.cleargrads()
        loss_gen.backward()
        opt_gen.update()

        # discriminative
        # NOTE: deallocate intermediate variable nodes related to the generator
        #       with `array` method instead of `unchain_backward`
        y_fake_old = self._buffer(y_fake.array)

        xy_fake = F.concat((x_real, y_fake_old))
        p_fake = self.discriminator(xy_fake)

        xy_real = F.concat((x_real, y_real))
        p_real = self.discriminator(xy_real)

        loss_dis = self.discriminative_lossfun(p_real, p_fake)

        self.discriminator.cleargrads()
        loss_dis.backward()
        opt_dis.update()

        if self.auto_new_epoch and iterator.is_new_epoch:
            opt_gen.new_epoch(auto=True)
            opt_dis.new_epoch(auto=True)
    def update_core(self):
        iterator = self._iterators["main"]
        batch = iterator.next()
        in_arrays = convert._call_converter(
            self.converter, batch, self.input_device)

        optimizer = self._optimizers["main"]
        loss_func = self.loss_func or optimizer.target

        # The graph should be traversed in PyTorch
        # the optimizer holds a chainerized model
        # that can't be executed
        loss = loss_func(*in_arrays)

        # We need to do the backward step ourselves instead
        # of relying in optimizer because it does calls that
        # the torch API does not support
        loss.backward()

        optimizer.update()
    def _evaluate_local(self, iterator):
        # Check whether local eval is all done every 8 rounds
        gather_interval = 8

        all_done = None
        while not all_done:
            all_done = None
            results = None
            for _ in range(gather_interval):
                try:
                    batch = iterator.next()
                    in_arrays = convert._call_converter(
                        self.converter, batch, self.device)

                    with function.no_backprop_mode():
                        if isinstance(in_arrays, tuple):
                            results = self.calc_local(*in_arrays)
                        elif isinstance(in_arrays, dict):
                            results = self.calc_local(**in_arrays)
                        else:
                            results = self.calc_local(in_arrays)

                    if self.comm.rank == self.root and self._progress_hook:
                        self._progress_hook(batch)

                except StopIteration:
                    batch = None
                    results = None

                results = self.comm.gather_obj(results, root=self.root)

                if self.comm.rank == self.root:
                    valid_results = [r for r in results if r is not None]
                    for result in valid_results:
                        yield result

                    all_done = len(valid_results) == 0

            all_done = self.comm.bcast_obj(all_done, root=self.root)
        return
    def evaluate(self):
        """Evaluates the model and returns a result dictionary.

        This method runs the evaluation loop over the validation dataset. It
        accumulates the reported values to :class:`~chainer.DictSummary` and
        returns a dictionary whose values are means computed by the summary.

        Note that this function assumes that the main iterator raises
        ``StopIteration`` or code in the evaluation loop raises an exception.
        So, if this assumption is not held, the function could be caught in
        an infinite loop.

        Users can override this method to customize the evaluation routine.

        .. note::

            This method encloses :attr:`eval_func` calls with
            :func:`function.no_backprop_mode` context, so all calculations
            using :class:`~chainer.FunctionNode`\\s inside
            :attr:`eval_func` do not make computational graphs. It is for
            reducing the memory consumption.

        Returns:
            dict: Result dictionary. This dictionary is further reported via
            :func:`~chainer.report` without specifying any observer.

        """
        iterator = self._iterators['main']
        eval_func = self.eval_func or self._targets['main']

        if self.eval_hook:
            self.eval_hook(self)

        if hasattr(iterator, 'reset'):
            iterator.reset()
            it = iterator
        else:
            warnings.warn(
                'This iterator does not have the reset method. Evaluator '
                'copies the iterator instead of resetting. This behavior is '
                'deprecated. Please implement the reset method.',
                DeprecationWarning)
            it = copy.copy(iterator)

        summary = reporter_module.DictSummary()

        if self._progress_bar:
            pbar = _IteratorProgressBar(iterator=it)

        for batch in it:
            observation = {}
            with reporter_module.report_scope(observation):
                in_arrays = convert._call_converter(self.converter, batch,
                                                    self.device)
                with function.no_backprop_mode():
                    if isinstance(in_arrays, tuple):
                        eval_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        eval_func(**in_arrays)
                    else:
                        eval_func(in_arrays)

            summary.add(observation)

            if self._progress_bar:
                pbar.update()

        if self._progress_bar:
            pbar.close()

        return summary.compute_mean()
Beispiel #12
0
    def evaluate(self):
        """Evaluates the model and returns a result dictionary.

        This method runs the evaluation loop over the validation dataset. It
        accumulates the reported values to :class:`~chainer.DictSummary` and
        returns a dictionary whose values are means computed by the summary.

        Note that this function assumes that the main iterator raises
        ``StopIteration`` or code in the evaluation loop raises an exception.
        So, if this assumption is not held, the function could be caught in
        an infinite loop.

        Users can override this method to customize the evaluation routine.

        .. note::

            This method encloses :attr:`eval_func` calls with
            :func:`function.no_backprop_mode` context, so all calculations
            using :class:`~chainer.FunctionNode`\\s inside
            :attr:`eval_func` do not make computational graphs. It is for
            reducing the memory consumption.

        Returns:
            dict: Result dictionary. This dictionary is further reported via
            :func:`~chainer.report` without specifying any observer.

        """
        iterator = self._iterators['main']
        eval_func = self.eval_func or self._targets['main']

        if self.eval_hook:
            self.eval_hook(self)

        if hasattr(iterator, 'reset'):
            iterator.reset()
            it = iterator
        else:
            warnings.warn(
                'This iterator does not have the reset method. Evaluator '
                'copies the iterator instead of resetting. This behavior is '
                'deprecated. Please implement the reset method.',
                DeprecationWarning)
            it = copy.copy(iterator)

        summary = reporter_module.DictSummary()

        for batch in it:
            observation = {}
            with reporter_module.report_scope(observation):
                in_arrays = convert._call_converter(
                    self.converter, batch, self.device)
                with function.no_backprop_mode():
                    if isinstance(in_arrays, tuple):
                        eval_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        eval_func(**in_arrays)
                    else:
                        eval_func(in_arrays)

            summary.add(observation)

        return summary.compute_mean()
Beispiel #13
0
    def update_core(self):
        d_optimizer = self.get_optimizer('d_optimizer')
        g_optimizer = self.get_optimizer('g_optimizer')
        p_optimizer = self.get_optimizer('p_optimizer')
        t_optimizer = self.get_optimizer('t_optimizer')

        discriminator = d_optimizer.target
        generator = g_optimizer.target
        posterior = p_optimizer.target
        transision = t_optimizer.target

        iterator = self.get_iterator('main')
        batch_size = iterator.batch_size
        batch = iterator.next()

        in_arrays = convert._call_converter(self.converter, batch, self.device)

        real_o_current = self._subtract_background(in_arrays[0])
        real_o_next = self._subtract_background(in_arrays[1])
        assert len(real_o_current) == iterator.batch_size
        assert len(real_o_next) == iterator.batch_size

        # Update discriminator network
        d_optimizer.target.cleargrads()

        real_loss = self._discriminator_loss(discriminator, real_o_current,
                                             real_o_next, self._one_labels)

        s_current, s_next, z = self._sample_state(transision,
                                                  s_shape=(batch_size, 7),
                                                  z_shape=(batch_size, 4))
        fake_o_current, fake_o_next = self._generate_observation(
            generator, z, s_current, s_next)
        fake_d_loss = self._discriminator_loss(
            discriminator, chainer.Variable(fake_o_current.data),
            chainer.Variable(fake_o_next.data), self._zero_labels)

        d_loss = real_loss + fake_d_loss
        d_loss.backward()
        d_optimizer.update()

        # Update generator, posterior and transision network
        g_optimizer.target.cleargrads()
        p_optimizer.target.cleargrads()
        t_optimizer.target.cleargrads()

        fake_g_loss = self._discriminator_loss(discriminator, fake_o_current,
                                               fake_o_next, self._one_labels)

        q_current_nll = self._posterior_nll(posterior, fake_o_current,
                                            s_current)
        q_next_nll = self._posterior_nll(posterior, fake_o_next, s_next)
        t_pll = self._transition_pll(transision, s_current, s_next)
        transition_loss = self._transition_loss(transision, s_current)

        # NOTE: q_current_nll and q_next_nll is negative value
        mutual_information_loss = q_current_nll + q_next_nll + t_pll
        assert fake_g_loss is not None
        assert mutual_information_loss is not None
        assert transition_loss is not None
        gpt_loss = fake_g_loss + \
            self._mutual_info_loss_weight * mutual_information_loss + \
            self._transition_loss_weight * transition_loss

        gpt_loss.backward()

        g_optimizer.update()
        p_optimizer.update()
        t_optimizer.update()

        # remove backward references
        d_loss.unchain_backward()
        gpt_loss.unchain_backward()

        chainer.reporter.report({'d_loss': d_loss})
        chainer.reporter.report({'gpt_loss': gpt_loss})
Beispiel #14
0
    def evaluate(self):
        iterator = self._iterators["main"]
        eval_func = self.eval_func or self._targets["main"]

        if self.eval_hook:
            self.eval_hook(self)

        if hasattr(iterator, "reset"):
            iterator.reset()
            it = iterator
        else:
            warnings.warn(
                "This iterator does not have the reset method. Evaluator "
                "copies the iterator instead of resetting. This behavior is "
                "deprecated. Please implement the reset method.",
                DeprecationWarning,
            )
            it = copy.copy(iterator)

        if self._progress_bar and self.comm is None or self.comm.rank == 0:
            pbar = _IteratorProgressBar(iterator=it)

        observations = []
        for batch in it:
            observation = {}
            with reporter_module.report_scope(observation):
                in_arrays = convert_module._call_converter(
                    self.converter, batch, self.device)
                with function.no_backprop_mode():
                    if isinstance(in_arrays, tuple):
                        eval_func(*in_arrays)
                    elif isinstance(in_arrays, dict):
                        eval_func(**in_arrays)
                    else:
                        eval_func(in_arrays)

            for k, v in list(observation.items()):
                if hasattr(v, "array"):
                    v = chainer.cuda.to_cpu(v.array)
                if hasattr(v, "item"):
                    v = v.item()
                observation[k] = v
            observations.append(observation)

            if self._progress_bar and self.comm is None or self.comm.rank == 0:
                pbar.update()

        if self._progress_bar and self.comm is None or self.comm.rank == 0:
            pbar.close()

        local_df = pandas.DataFrame(observations)
        if self.comm:
            dfs = self.comm.gather_obj(local_df)
            if self.comm.rank == 0:
                global_df = pandas.concat(dfs, sort=True)
            else:
                return {}
        else:
            global_df = local_df

        summary = reporter_module.DictSummary()
        adds = collections.defaultdict(list)
        for _, row in global_df.iterrows():
            observation = row.dropna().to_dict()

            observation_processed = {}
            add_types = ["add", "add_s", "add_or_add_s"]
            for key, value in observation.items():
                for add_type in add_types:
                    # validation/main/{add_type}/{class_id}/{instance_id}
                    pattern = f"validation/main/{add_type}/([0-9]+)/.+"
                    match = re.match(pattern, key)
                    if not match:
                        continue
                    class_id = match.groups()[0]
                    key = f"validation/main/{add_type}/{class_id}"
                    adds[f"{add_type}/{class_id}"].append(value)
                    break
                observation_processed[key] = value
            summary.add(observation_processed)
        result = summary.compute_mean()

        # compute auc for adds
        for add_type_and_class_id, values in adds.items():
            # auc = metrics.auc_for_errors(values, max_threshold=0.1)
            auc = metrics.ycb_video_add_auc(values, max_value=0.1)
            result[f"validation/main/auc/{add_type_and_class_id}"] = auc
            lt_2cm = (np.array(values) < 0.02).sum() / len(values)
            result[f"validation/main/<2cm/{add_type_and_class_id}"] = lt_2cm

        # average child observations
        parent_keys = [
            "validation/main/loss",
            "validation/main/loss_quaternion",
            "validation/main/loss_translation",
            "validation/main/add",
            "validation/main/add_s",
            "validation/main/add_or_add_s",
            "validation/main/auc/add",
            "validation/main/auc/add_s",
            "validation/main/auc/add_or_add_s",
            "validation/main/<2cm/add",
            "validation/main/<2cm/add_s",
            "validation/main/<2cm/add_or_add_s",
        ]
        summary = reporter_module.DictSummary()
        for parent_key in parent_keys:
            if parent_key in result:
                continue
            for key, value in result.items():
                if osp.dirname(key) == parent_key:
                    summary.add({parent_key: value})
        result.update(summary.compute_mean())

        return result
Beispiel #15
0
def main():
    # Parse the arguments.
    args = parse_arguments()
    args.out = os.path.join(args.out, args.method)
    save_args(args, args.out)

    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label_float(label_list):
        return numpy.asarray(label_list, dtype=numpy.float32)
    def postprocess_label_int(label_list):
        return numpy.asarray(label_list, dtype=numpy.int64)

    # Apply a preprocessor to the dataset.
    if args.train:
    ## training data
        fn,ext = os.path.splitext(args.train)
        if ext==".npz":
            print('Loading training dataset...')
            train = NumpyTupleDataset.load(args.train)
        else:
            print('Preprocessing training dataset...')
            preprocessor = preprocess_method_dict[args.method]()
            if args.classification:
                parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES')
            else:
                parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES')
            train = parser.parse(args.train)['dataset']
            NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), train)        
        # Scale the label values, if necessary.
        if args.scale == 'standardize':
            scaler = StandardScaler()
            scaler.fit(train.get_datasets()[-1])
        else:
            scaler = None

    ## test data
    fn,ext = os.path.splitext(args.val)
    if ext==".npz":
        print('Loading test dataset...')
        test = NumpyTupleDataset.load(args.val)
    else:
        print('Preprocessing test dataset...')
        preprocessor = preprocess_method_dict[args.method]()
        if args.classification:
            parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES')
        else:
            parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES')
        test = parser.parse(args.val)['dataset']
        NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), test)


    # Set up the model.
    device = chainer.get_device(args.device)
    converter = converter_method_dict[args.method]
    metrics_fun = {'mae': F.mean_absolute_error, 'rmse': rmse}
    if args.classification:
        if args.load_model:
            model = Classifier.load_pickle(args.load_model, device=device)
            print("model file loaded: ",args.load_model)
        else:
            predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num)
            model = Classifier(predictor,
                                    lossfun=F.sigmoid_cross_entropy,
                                    metrics_fun=F.binary_accuracy,
                                    device=device)
    else:
        if args.load_model:
            model = Regressor.load_pickle(args.load_model, device=device)
            print("model file loaded: ",args.load_model)
        else:
            predictor = set_up_predictor(
                args.method+args.method_suffix, args.unit_num,
                args.conv_layers, class_num, label_scaler=scaler)
            model = Regressor(predictor, lossfun=F.mean_squared_error,
                            metrics_fun=metrics_fun, device=device)

    if args.train:
        if args.balanced_iter:
            train = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1)
            train.show_label_stats()
            
        print('Training...')
        log_keys = ['main/mae','main/rmse','validation/main/mae','validation/main/rmse','validation/main/roc_auc']
        extensions_list = [extensions.PlotReport(log_keys, 'iteration', trigger=(100, 'iteration'), file_name='loss.png')]
        if args.eval_roc and args.classification:
            extensions_list.append(ROCAUCEvaluator(
                        test, model, eval_func=predictor,
                        device=device, converter=converter, name='validation',
                        pos_labels=1, ignore_labels=-1, raise_value_error=False))

        save_json(os.path.join(args.out, 'args.json'), vars(args))
        run_train(model, train, valid=test,
                batch_size=args.batchsize, epoch=args.epoch,
                out=args.out, extensions_list=extensions_list,
                device=device, converter=converter) #, resume_path=args.resume)

        # Save the model's parameters.
        model_path = os.path.join(args.out, args.model_filename)
        print('Saving the trained model to {}...'.format(model_path))
        if hasattr(model.predictor.graph_conv, 'reset_state'):
            model.predictor.graph_conv.reset_state()
        model.save_pickle(model_path, protocol=args.protocol)

    ## prediction
    it = SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    result = []
    for batch in it:
        in_arrays = convert._call_converter(converter, batch, device)
        with chainer.using_config('train', False), chainer.function.no_backprop_mode():
            if isinstance(in_arrays, tuple):
                res = model(*in_arrays)
            elif isinstance(in_arrays, dict):
                res = model(**in_arrays)
            else:
                res = model(in_arrays)
        result.extend(model.y.array.get())

    numpy.savetxt(os.path.join(args.out,"result.csv"), numpy.array(result))

    eval_result = Evaluator(it, model, converter=converter,device=device)()
    print('Evaluation result: ', eval_result)