コード例 #1
0
    def fit(self, x_train, y_train, validation_split=0., epochs=1, verbose=0):
        y_train = utils.one_hot_encode(y_train, self.num_classes)

        x_train, x_val, y_train, y_val = train_test_split(
            x_train,
            y_train,
            test_size=validation_split,
        )

        if validation_split > 0.:
            validation_data = self.data_generator.flow(x_val,
                                                       y_val,
                                                       batch_size=32)
            callbacks = [EarlyStopping(patience=3)]
        else:
            validation_data = None
            callbacks = None

        with self.graph.as_default():
            self.data_generator.fit(x_train)
            results = self.model.fit_generator(
                self.data_generator.flow(x_train, y_train, batch_size=32),
                epochs=epochs,
                validation_data=validation_data,
                callbacks=callbacks,
                verbose=verbose,
            )

            return (results.history['loss'][0], results.history['acc'][0])
コード例 #2
0
def test(args, model, loader, prefix='', verbose=True):
    """
    This function does a single pass through the testing set
    and evaluates the average dice_score, and average loss. 

    params: args are the run parameters, model is the model being tested,
            loader is the pytorch test loader, prefix is a name string,
            verbose flag is for printing metrics 

    return: dictionary of metric values 
    """

    print("train: Beginning test")

    metrics = defaultdict(list)
    t = tqdm(loader)
    with torch.no_grad():
        for (img, seg) in t:
            img = img.to(args.device)
            seg = seg_utils.map_segmentation(seg, args.num_classes).long()          
            seg_hot = seg_utils.one_hot_encode(seg, args.num_classes).to(args.device)    
            seg = seg.to(args.device)

            seg_hat = model(img)
            
            if args.loss_func=='dice':
                dice_loss = loss_fns.DiceLoss()
                loss = dice_loss(seg_hat, seg_hot)
            elif args.loss_func=='crossentropy':
                log_loss = nn.CrossEntropyLoss()
                loss = log_loss(seg_hat, seg)

            t.set_postfix_str(s='loss: %f'% loss.item())

            try:    
                metrics['loss'].append(loss.item())
            except ValueError:
                print(metrics)
            for k, v in get_metrics(seg_hot, seg_hat).items():
                metrics[k].append(v)

        for k in replace_metric_by_mean:
            metrics[k] = np.mean(metrics[k])

        # Print!
        if verbose:
            start_string = '#### {} evaluation ####'.format(prefix)
            print(start_string)
            for k, v in metrics.items():
                print('#### {} = {}'.format(k, v))
            print(''.join(['#' for _ in range(len(start_string))]))
    return metrics
コード例 #3
0
ファイル: labeller.py プロジェクト: cockroachzl/light-bulb
    def _score_classification(self, x_test, y_test):
        loss, acc = self.model.evaluate(x_test, y_test)

        num_classes = len(self.label_helper.classes)
        if acc < (1. / num_classes * ACCURACY_RATIO):
            self.logger.debug(
                "Need at least {}% accuracy improvement over naive baseline to start labelling"
                .format(int((ACCURACY_RATIO - 1.) * 100)))
            return 0

        self.logger.debug("Scoring items with model labeller.")
        y_test = utils.one_hot_encode(y_test, num_classes)
        y_pred = self.model.score(x_test)
        threshold = Evaluator.threshold_for_precision(
            y_test,
            y_pred,
            target_precision=TARGET_PRECISION,
        )

        unlabelled, ids = self.dataset.model_labelling_set()
        if len(unlabelled) == 0:
            self.logger.info("Model labelling done!")
            return 0

        scores = self.model.score(unlabelled)
        # if scores is 2 dimentional: (batch x classes)

        # This assumes only classification :(

        # Renormalize scores just in case.
        dist = scores / np.expand_dims(scores.sum(axis=1), -1)
        idxs = np.argmax(dist, -1)

        num_scored = 0
        for _id, (idx, score) in list(
                zip(ids, zip(idxs, dist[np.arange(len(idxs)), idxs]))):
            if score > threshold:
                num_scored += 1
                self.dataset.add_label(
                    _id,
                    idx,
                    stage=Dataset.MODEL_LABELLED,
                    user=Dataset.USER_MODEL_LABELLER,
                    is_labelled=False,
                    save=True,
                )
        return num_scored
コード例 #4
0
def encode_object_data(pitch_data):
    print('Encoding pitch dataframe of shape {}...'.format(pitch_data.shape))

    # Split label column from rest of pitch dataframe then encode
    Y_all = pitch_data.loc[:, 'p1_pitch_type'].copy()
    Y_all = utils.encode_simple_pitch_types(Y_all)

    # Drop label colum from pitch dataframe, then one-hot-encode object columns
    pitch_data = pitch_data.drop('p1_pitch_type', axis=1)
    pitch_data = utils.one_hot_encode(pitch_data, False)

    # Insert label data back into pitch dataframe
    pitch_data['p1_pitch_type'] = Y_all.copy()

    print('Pitch dataframe encoding complete. New shape: {}'.format(
        pitch_data.shape))
    return pitch_data
コード例 #5
0
    def representation_learning(
        self,
        x_texts,
        epochs=1,
        bptt=100,
        batch_size=32,
        verbose=False,
        num_gpus=1,
        on_epoch_done=None,
    ):
        # Unfreeze language model.
        utils.unfreeze_layers(self.language_model)
        all_chunks = self._create_bptt_data(x_texts, bptt)

        batches = [all_chunks[i:i + batch_size] for i in range(0, len(all_chunks), batch_size)]
        total_losses = []
        if num_gpus > 1:
            batch_size = batch_size * num_gpus
            model = multi_gpu_model(self.language_model, gpus=num_gpus)
            model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
            print("Set batch size to {}".format(batch_size))
        else:
            model = self.language_model

        with self.graph.as_default():
            for epoch in range(epochs):
                iterable = tqdm.tqdm(batches) if verbose else batches
                total_loss = 0.
                for tokens_batch in iterable:
                    # Compute x_batch and y_batch
                    x_text = pad_sequences(self.lang._sequence_ids(tokens_batch))
                    x_train = x_text[:, :-1]
                    y_train = x_text[:, 1:]
                    target = utils.one_hot_encode(y_train, self.vocab_size)

                    # Train language model.
                    result = model.fit(x_train, target, batch_size=batch_size, verbose=0)
                    total_loss += 1 / len(iterable) * result.history['loss'][-1]

                if verbose: print("Epoch: {} | Loss: {}".format(epoch, total_loss))

                total_losses.append(total_loss / len(tokens_batch))
                if on_epoch_done: on_epoch_done(self)

        return (total_losses, 0.)
コード例 #6
0
    def fit(self, x_texts, y_train, validation_split=0, epochs=1):
        # Freeze language model
        y_train = utils.one_hot_encode(y_train, self.num_classes)
        x_train = self.vectorize_text(x_texts)
        if validation_split > 0.:
            callbacks = [EarlyStopping(patience=3)]
        else:
            callbacks = None

        with self.graph.as_default():
            history = self.model.fit(
                x_train,
                y_train,
                validation_split=validation_split,
                callbacks=callbacks,
                epochs=epochs,
                verbose=0,
            )
            return history.history['loss'][0], history.history['acc'][0]
コード例 #7
0
def test(args, model, loader, prefix='', verbose=True):
    
    print("train: Beginning test")

    metrics = defaultdict(list)
    t = tqdm(loader)
    with torch.no_grad():
        for (img, seg) in t:
            img = img.to(args.device)
            seg = map_segmentation(seg, args.num_classes).long()          
            seg_hot = seg_utils.one_hot_encode(seg, args.num_classes).to(args.device)    
            seg = seg.to(args.device)

            seg_hat = model(img)
            
            if args.loss_func=='dice':
                dice_loss = loss_fns.DiceLoss()
                loss = dice_loss(seg_hat, seg_hot)
            elif args.loss_func=='crossentropy':
                log_loss = nn.CrossEntropyLoss()
                loss = log_loss(seg_hat, seg)

            t.set_postfix_str(s='loss: %f'% loss.item())

            try:    
                metrics['loss'].append(loss.item())
            except ValueError:
                print(metrics)
            for k, v in get_metrics(seg_hot, seg_hat).items():
                metrics[k].append(v)

        for k in replace_metric_by_mean:
            metrics[k] = np.mean(metrics[k])

        # Print!
        if verbose:
            start_string = '#### {} evaluation ####'.format(prefix)
            print(start_string)
            for k, v in metrics.items():
                print('#### {} = {}'.format(k, v))
            print(''.join(['#' for _ in range(len(start_string))]))
    return metrics
コード例 #8
0
def train_unet_v0(args, dataset, train_loader, test_loader):
    """
    This function defines the model, optimizer and output directories. 
    It performs training, and prints metrics every eval_every epochs. 
    Most importantly, this function also saves the model every eval_every
    epochs.

    params: args are run parameters, dataset is a pytorch dataset of the images,
            train_loader and test_loader are the pytorch dataloaders. 

    return: model and metrics
    """

    output_dir = args.base_output
    model = load_model(args, unet.FRUNetAuto)
    if args.cuda:
        model = model.cuda()
    if args.dataparallel:
        model = nn.DataParallel(model)

    params = list(model.parameters())
    optimizer = optim.Adam(
        params,
        lr=args.lr,
        weight_decay=args.weight_decay
    )
    metrics = defaultdict(list)

    print("Getting Class balances... ")

    # For weighted version of loss functions
    class_weights = None
    if args.class_weights:
        class_weights = [dataset.class_weights[0], dataset.class_weights[1:8].sum()]
        if args.num_classes > 2:
            class_weights.append(dataset.class_weights[-1])
        class_weights = 1 - torch.Tensor(class_weights)
        class_weights = class_weights.to(args.device)

    print("Class balance: %s"%str(class_weights))
    print("train: Beginning train")

    model.train()

    for epoch_idx in range(args.epochs):
        print('Starting epoch {}'.format(epoch_idx))
        epoch_metrics = defaultdict(list)
        tic = time.time()
        t = tqdm(train_loader)
        for (img, seg) in t:
            img = img.to(args.device)
            seg = seg_utils.map_segmentation(seg, args.num_classes).long()
            seg_hot = seg_utils.one_hot_encode(seg, args.num_classes).to(args.device)
            seg = seg.to(args.device)
            
            seg_hat = model(img)

            if args.loss_func=='dice':
                dice_loss = loss_fns.DiceLoss(class_weights)
                loss = dice_loss(seg_hat, seg_hot)
            elif args.loss_func=='crossentropy':
                log_loss = nn.CrossEntropyLoss(weight=class_weights)
                loss = log_loss(seg_hat, seg)

            epoch_metrics['loss'].append(loss.item())
            for k, v in get_metrics(seg_hot, seg_hat).items():
                epoch_metrics[k].append(v)

            t.set_postfix_str(s='loss: %f, dice: %f'%(loss.item(),epoch_metrics['dice'][-1]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        for k, v in epoch_metrics.items():
            metrics[k].append(np.mean(np.array(v), axis=0))
            epoch_metrics[k].append(np.mean(np.array(v), axis=0))

        print('#### [{:.2f}] Epoch {} ####'.format(time.time() - tic, epoch_idx))
        for k, v in epoch_metrics.items():
            print('#### {}: {}'.format(k, v[-1]))
        print('#####################')

        # Eval and save if necessary.
        if general_utils.periodic_integer_delta(epoch_idx, args.eval_every):
            model = model.eval()
            test_metrics = test(args, model, test_loader, prefix='Test Dataset, Epoch {}'.format(epoch_idx))
            model = model.train()
            for k, v in test_metrics.items():
                metrics['test_{}_epoch{}'.format(k, epoch_idx)] = v

        if general_utils.periodic_integer_delta(epoch_idx, args.save_every):
            checkpoint_path = os.path.join(output_dir, "last.checkpoint")
            print('Saving model to {}'.format(checkpoint_path))
            chk = general_utils.make_checkpoint(model, optimizer, epoch_idx)
            torch.save(chk, checkpoint_path)
            np.save(output_dir+'/losses/loss%d.npy'%epoch_idx, epoch_metrics['loss'])
    return model, metrics
コード例 #9
0
 def evaluate(self, x_texts, y_test):
     y_test = utils.one_hot_encode(y_test, self.num_classes)
     x_test = self.vectorize_text(x_texts)
     with self.graph.as_default():
         return self.model.evaluate(x_test, y_test)
コード例 #10
0
 def evaluate(self, x_test, y_test, verbose=0):
     y_test = utils.one_hot_encode(y_test, self.num_classes)
     with self.graph.as_default():
         results = self.model.evaluate(x_test, y_test, verbose=verbose)
         return results[0], results[1]
コード例 #11
0
    def start(self):
        while True:
            # Put this at the top of the while loop to prevent thread timing issues.
            # If we want to solve this properly, check here: https://github.com/keras-team/keras/issues/5223
            time.sleep(
                min(self.exponential_backoff_factor * self.interval,
                    MAX_INTERVAL_TIME))

            x_test, y_test = self.dataset.test_set
            num_classes = len(self.label_helper.classes)
            # TODO(classification_only)

            # TODO(revisit)
            if len(x_test) < MIN_TEST_EXAMPLES:
                self.logger.debug(
                    "Need at least {} labels to start labelling".format(
                        MIN_TEST_EXAMPLES))
                self.exponential_backoff_factor += 1
                continue

            loss, acc = self.model.evaluate(x_test, y_test)

            if acc < (1. / num_classes * ACCURACY_RATIO):
                self.logger.debug(
                    "Need at least {}% accuracy improvement over naive baseline to start labelling"
                    .format(int((ACCURACY_RATIO - 1.) * 100)))
                self.exponential_backoff_factor += 1
                continue

            self.logger.debug("Scoring items with model labeller.")
            y_test = utils.one_hot_encode(y_test, num_classes)
            evaluator = Evaluator(self.model, x_test, y_test)
            threshold = evaluator.threshold_for_precision(THRESHOLD)

            unlabelled, ids = self.dataset.model_labelling_set()
            if len(unlabelled) == 0:
                self.logger.info("Model labelling done!")

            scores = self.model.score(unlabelled)

            # This assumes only classification :(

            # Renormalize scores just in case.
            dist = scores / np.expand_dims(scores.sum(axis=1), -1)
            idxs = np.argmax(dist, -1)

            past_threshold = 0
            for _id, (idx, score) in list(
                    zip(ids, zip(idxs, dist[np.arange(len(idxs)), idxs]))):
                if score > threshold:
                    past_threshold += 1
                    self.dataset.add_label(
                        _id,
                        idx,
                        stage=Dataset.MODEL_LABELLED,
                        user=Dataset.USER_MODEL_LABELLER,
                        is_labelled=False,
                        save=True,
                    )

            if past_threshold > 0:
                self.exponential_backoff_factor = 0
            else:
                self.exponential_backoff_factor += 1
            self.logger.debug("{} / {} labelled.".format(
                past_threshold, len(scores)))