def __init__(self, cfg):
        super(Model, self).__init__()
        self.in_planes = 512
        if cfg.MODEL.PRETRAIN == "outside":
            self.backbone = get_model(cfg.MODEL.NAME)(task=cfg.MODEL.TASK)
        else:
            self.backbone = get_model(cfg.MODEL.NAME)(cfg.DB.NUM_CLASSES,
                                                      task=cfg.MODEL.TASK)

        self.num_classes = cfg.DB.NUM_CLASSES
        self.attr_fc = nn.Linear(self.in_planes, self.num_classes)
        self.attr_fc.apply(weights_init_classifier)
    def __init__(self, cfg):
        super(Model, self).__init__()
        self.in_planes = 512
        if cfg.MODEL.PRETRAIN == "outside":
            self.backbone = get_model(cfg.MODEL.NAME)()
        else:
            self.backbone = get_model(cfg.MODEL.NAME)()

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.BNNeck = nn.BatchNorm1d(self.backbone.feature_dim)
        self.BNNeck.bias.requires_grad_(False)  # no shift
        self.BNNeck.apply(weights_init_kaiming)

        self.id_fc = nn.Linear(self.backbone.feature_dim,
                               cfg.DB.NUM_CLASSES,
                               bias=False)
        self.id_fc.apply(weights_init_classifier)
 def load_model(self, model_dir_, model_name):
     # 1 - load architecture
     params_filename = join(model_dir_, model_name + '_params.yml')
     stream = file(params_filename, 'r')
     params = yaml.load(stream)
     # print params
     # fs_model = model_factory.get_model(params['model_params'][0])
     fs_model = model_factory.get_model(params['model_params'])
     # 2 -compile model and load weights (link weights)
     weights_file = join(model_dir_, 'fs/{}.h5'.format(model_name))
     model = fs_model.load_model(weights_file)
     return model
Beispiel #4
0
 def train_predict_crossvalidation(self, model_params, X, y, info, cols,
                                   model_name):
     logging.info('model_params: {}'.format(model_params))
     splitter = LeaveOneOut()
     folds = list(splitter.split(X, y.ravel()))
     fold_ids = range(len(folds))
     model = get_model(model_params)
     f = partial(eval_model, model, X, y, info, folds, self.directory,
                 model_name)
     p = mp.Pool(5)
     prediction = p.map(f, fold_ids)
     prediction_df = pd.concat(prediction, axis=0)
     return prediction_df
    def train_predict_crossvalidation(self, model_params, X, y, info, cols,
                                      model_name):
        logging.info('model_params: {}'.format(model_params))
        n_splits = self.pipeline_params['params']['n_splits']
        skf = StratifiedKFold(n_splits=n_splits,
                              random_state=123,
                              shuffle=True)
        i = 0
        scores = []
        model_list = []
        for train_index, test_index in skf.split(X, y.ravel()):
            model = get_model(model_params)
            logging.info('fold # ----------------%d---------' % i)
            x_train, x_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            info_train = pd.DataFrame(index=info[train_index])
            info_test = pd.DataFrame(index=info[test_index])
            x_train, x_test = self.preprocess(x_train, x_test)
            # feature extraction
            logging.info('feature extraction....')
            x_train, x_test = self.extract_features(x_train, x_test)

            model = model.fit(x_train, y_train)

            y_pred_test, y_pred_test_scores = self.predict(
                model, x_test, y_test)
            score_test = self.evaluate(y_test, y_pred_test, y_pred_test_scores)
            logging.info('model {} -- Test score {}'.format(
                model_name, score_test))
            self.save_prediction(info_test, y_pred_test, y_pred_test_scores,
                                 y_test, i, model_name)

            if hasattr(model, 'save_model'):
                logging.info('saving coef')
                save_model(model, model_name + '_' + str(i), self.directory)

            if self.save_train:
                logging.info('predicting training ...')
                y_pred_train, y_pred_train_scores = self.predict(
                    model, x_train, y_train)
                self.save_prediction(info_train,
                                     y_pred_train,
                                     y_pred_train_scores,
                                     y_train,
                                     i,
                                     model_name,
                                     training=True)

            scores.append(score_test)

            fs_parmas = deepcopy(model_params)
            if hasattr(fs_parmas, 'id'):
                fs_parmas['id'] = fs_parmas['id'] + '_fold_' + str(i)
            else:
                fs_parmas['id'] = fs_parmas['type'] + '_fold_' + str(i)

            model_list.append((model, fs_parmas))
            i += 1
        self.save_coef(model_list, cols)
        logging.info(scores)
        return scores
    def run(self):
        # logging
        logging.info('loading data....')
        data = Data(**self.data_params[0])
        # will use the whole dataset for training
        x_train, y_train, info_train, cols_train = data.get_data()

        data_types = cols_train.get_level_values(1).unique()
        if len(data_types) > 2:
            cnv_split = True
        else:
            cnv_split = False

        # divide the trainig set into two blanced training sets. Later we will train 2 models and combine their outputs

        index_pos = np.where(y_train == 1)[0]
        index_neg = np.where(y_train == 0)[0]
        n_pos = index_pos.shape[0]
        # select the same number of samples as the positive class
        index_neg1 = index_neg[0:n_pos]

        x_train_pos = x_train[index_pos, :]
        x_train_neg = x_train[index_neg1, :]
        x_train1 = np.concatenate((x_train_pos, x_train_neg))

        y_train_pos = y_train[index_pos, :]
        y_train_neg = y_train[index_neg1, :]
        y_train1 = np.concatenate((y_train_pos, y_train_neg))

        info_train_pos = info_train[index_pos]
        info_train_neg1 = info_train[index_neg1]
        info_train1 = np.concatenate((info_train_pos, info_train_neg1))

        # second training set
        index_neg2 = index_neg[n_pos:]
        x_train_neg2 = x_train[index_neg2, :]
        x_train2 = np.concatenate((x_train_pos, x_train_neg2))

        y_train_neg2 = y_train[index_neg2, :]
        y_train2 = np.concatenate((y_train_pos, y_train_neg2))

        info_train_pos = info_train[index_pos]
        info_train_neg2 = info_train[index_neg2]
        info_train2 = np.concatenate((info_train_pos, info_train_neg2))

        print 'training shape: '
        print x_train1.shape, y_train1.shape, info_train1.shape, cols_train.shape, sum(
            y_train1)
        print x_train2.shape, y_train2.shape, info_train2.shape, cols_train.shape, sum(
            y_train2)

        # get testing data set (external validation)
        # 1- Primary data set (write the paper here)
        # 2- Metastatic dataset ()
        # 3- New dataset ()
        x_test_mets, y_test_mets, info_test_mets, cols_test_mets = get_validation_metastatic(
            cols_train, cnv_split)
        x_test_primary, y_test_primary, info_test_primary, cols_test_primary = get_validation_primary(
            cols_train, cnv_split)
        # x_test_new, y_test_new, info_test_new, cols_test_new = get_validation_primary(cols_train)

        print 'testing shape: '
        print x_test_mets.shape, y_test_mets.shape, info_test_mets.shape, cols_test_mets.shape
        print x_test_primary.shape, y_test_primary.shape, info_test_primary.shape, cols_test_primary.shape
        # print x_test.shape, y_test.shape, info_test.shape, cols_test.shape

        # pre-processing
        logging.info('preprocessing....')
        _, x_test_mets = self.preprocess(x_train, x_test_mets)
        _, x_test_primary = self.preprocess(x_train, x_test_primary)
        _, x_train1 = self.preprocess(x_train, x_train1)
        _, x_train2 = self.preprocess(x_train, x_train2)

        test_scores = []
        # model_names = []
        # model_list = []
        # cnf_matrix_list = []
        fig = plt.figure()
        fig.set_size_inches((10, 6))
        pred_scores = []
        if type(self.model_params) == list:
            for m in self.model_params:
                # get model
                set_random_seeds(random_seed=20080808)

                model1 = get_model(m)
                model2 = get_model(m)
                logging.info('fitting')

                model1 = model1.fit(x_train1, y_train1)
                model2 = model2.fit(x_train2, y_train2)

                logging.info('predicting')

                def predict(x_test, y_test, info_test, model_name,
                            test_set_name):
                    pred = {}
                    y_pred_test2, y_pred_test_scores2 = self.predict(
                        model2, x_test, y_test)
                    y_pred_test1, y_pred_test_scores1 = self.predict(
                        model1, x_test, y_test)

                    y_pred_test_scores = (y_pred_test_scores1 +
                                          y_pred_test_scores2) / 2.
                    y_pred_test = y_pred_test_scores > 0.5

                    logging.info('scoring ...')
                    test_score = evalualte(y_test, y_pred_test,
                                           y_pred_test_scores)
                    cnf_matrix = confusion_matrix(y_test, y_pred_test)

                    pred['model'] = model_name
                    pred['data_set'] = test_set_name
                    pred = dict(pred, **test_score)
                    pred_scores.append(pred)

                    logging.info('saving results')

                    model_name = model_name + '_' + test_set_name
                    self.save_score(test_score, model_name)
                    self.save_prediction(info_test, y_pred_test,
                                         y_pred_test_scores, y_test,
                                         model_name)
                    self.save_cnf_matrix([cnf_matrix], [model_name])

                if 'id' in m:
                    model_name = m['id']
                else:
                    model_name = m['type']

                predict(x_test_mets, y_test_mets, info_test_mets, model_name,
                        '_mets')
                predict(x_test_primary, y_test_primary, info_test_primary,
                        model_name, '_primary')

                pred_scores_df = pd.DataFrame(pred_scores)
                pred_scores_df.to_csv(
                    join(self.directory, 'testing_scores.csv'))

        return test_scores
def train(config, dataset, optimizer, criterion, scheduler, metric, strategy):
    if config.DATA.KFOLD is not -1:
        print('kfold: ', config.DATA.KFOLD, ' fold_ind: ',
              config.DATA.FOLD_IND)
        dataset.set_train_cross_validate_ind(folds=config.DATA.KFOLD,
                                             fold_index=config.DATA.FOLD_IND)
    train_loader, NUM_TRAINING_IMAGES, step_per_epoch = dataset.get_training_dataset(
    )
    val_loader, NUM_VALIDATION_IMAGES = dataset.get_validation_dataset(True)

    if config.TRAIN.DISPLAY:
        train_batch = iter(train_loader.unbatch().batch(3))
        for fi in range(NUM_TRAINING_IMAGES):
            x = next(train_batch)
            display_batch_of_images(x, figsize=8)

    if config.SKIP_VALIDATION:  # only loss
        checkpoints_path = str(
            Path(config.TRAIN_DIR) / config.RECIPE /
            'checkpoints') + '\\epoch_{epoch:03d}_{loss:.4f}.h5'
    else:
        if config.MODEL.METRIC == 'accuracy':
            checkpoints_path = str(
                Path(config.TRAIN_DIR) / config.RECIPE /
                'checkpoints') + '\\epoch_{epoch:03d}_{val_accuracy:.4f}.h5'
        elif config.MODEL.METRIC == 'categorical_accuracy':
            checkpoints_path = str(
                Path(config.TRAIN_DIR) / config.RECIPE / 'checkpoints'
            ) + '\\epoch_{epoch:03d}_{val_categorical_accuracy:.4f}.h5'
        else:
            checkpoints_path = str(
                Path(config.TRAIN_DIR) / config.RECIPE / 'checkpoints'
            ) + '\\epoch_{epoch:03d}_{val_sparse_categorical_accuracy:.4f}.h5'

    checkpoint_all = ModelCheckpoint(
        checkpoints_path,
        # monitor='val_'+config.MODEL.METRIC,
        monitor='loss',
        mode='min',
        verbose=1,
        save_best_only=True,
        period=1)

    # checkpoint_all = ModelCheckpoint(
    #     checkpoints_path,
    #     monitor='val_'+config.MODEL.METRIC,
    #     mode='max',
    #     verbose=1,
    #     save_best_only=True,
    #     period=1
    # )

    log_dir = Path(config.TRAIN_DIR) / config.RECIPE / 'logs'
    print(log_dir)
    tensorboard = TensorBoard(log_dir=log_dir,
                              histogram_freq=0,
                              write_graph=False,
                              write_images=False,
                              update_freq='epoch',
                              profile_batch=0,
                              embeddings_freq=0,
                              embeddings_metadata=None)
    # with tf.device('/CPU:0'):
    with strategy.scope():
        model = get_model(config)
        checkpoint = utils.checkpoint.get_initial_checkpoint(config)
        if checkpoint is not None:
            utils.checkpoint.load_checkpoint(model, checkpoint)
        else:
            print('[*] no checkpoint found')

        model.compile(optimizer=optimizer, loss=criterion, metrics=metric)
        # print(model.metrics_name)
        # model.summary()

    history = model.fit(
        train_loader,
        steps_per_epoch=step_per_epoch,
        epochs=config.TRAIN.NUM_EPOCHS,
        callbacks=[scheduler, tensorboard,
                   checkpoint_all],  #checkpoint_all, ],
        validation_data=None
        if config.SKIP_VALIDATION and not config.DEBUG else val_loader)

    print("End! Goodluck~")
 def __init__(self, cfg):
     super(Model, self).__init__()
     self.backbone = get_model(cfg.MODEL.NAME)(cfg)
Beispiel #9
0
    def run(self):
        test_scores = []
        model_names = []
        model_list = []
        y_pred_test_list = []
        y_pred_test_scores_list = []
        y_test_list = []
        fig = plt.figure()
        fig.set_size_inches((10, 6))
        print self.data_params
        for data_params in self.data_params:
            print 'data_params', data_params
            data_id = data_params['id']
            logging.info('loading data....')
            data = Data(**data_params)
            # get data
            x_train, x_validate_, x_test_, y_train, y_validate_, y_test_, info_train, info_validate_, info_test_, cols = data.get_train_validate_test()

            logging.info('predicting')
            if self.eval_dataset == 'validation':
                x_t = x_validate_
                y_t = y_validate_
                info_t = info_validate_
            else:
                x_t = np.concatenate((x_test_, x_validate_))
                y_t = np.concatenate((y_test_, y_validate_))
                info_t = info_test_.append(info_validate_)

            logging.info('x_train {} y_train {} '.format(x_train.shape, y_train.shape))
            logging.info('x_test {} y_test {} '.format(x_t.shape, y_t.shape))

            # pre-processing
            logging.info('preprocessing....')
            x_train, x_test = self.preprocess(x_train, x_t)
            for m in self.model_params:
                # get model
                model_params_ = deepcopy(m)
                set_random_seeds(random_seed=20080808)
                model = get_model(model_params_)
                logging.info('fitting')
                logging.info(model_params_)
                if model_params_['type'] == 'nn' and not self.eval_dataset == 'validation':
                    model = model.fit(x_train, y_train, x_validate_, y_validate_)
                else:
                    model = model.fit(x_train, y_train)
                logging.info('predicting')

                model_name = get_model_name(model_params_)
                model_name = model_name + '_' + data_id
                model_params_['id'] = model_name
                logging.info('model id: {}'.format(model_name))
                model_list.append((model, model_params_))
                y_pred_test, y_pred_test_scores = self.predict(model, x_test, y_t)
                test_score = self.evaluate(y_t, y_pred_test, y_pred_test_scores)
                logging.info('model name {} -- Test score {}'.format(model_name, test_score))
                test_scores.append(test_score)
                model_names.append(model_name)
                logging.info('saving results')
                self.save_score(data_params, model_params_, test_score, model_name)
                self.save_prediction(info_t, y_pred_test, y_pred_test_scores, y_t, model_name)
                y_test_list.append(y_t)
                y_pred_test_list.append(y_pred_test)
                y_pred_test_scores_list.append(y_pred_test_scores)

                # saving coef
                self.save_coef([(model, model_params_)], cols)

                # saving confusion matrix
                cnf_matrix = confusion_matrix(y_t, y_pred_test)
                save_confusion_matrix(cnf_matrix, self.directory, model_name)

                # saving coefs
                logging.info('saving coef')
                if hasattr(model, 'save_model'):
                    logging.info('saving coef')
                    save_model(model, model_name, self.directory)

                if self.save_train:
                    y_pred_train, y_pred_train_scores = self.predict(model, x_train, y_train)
                    train_score = self.evaluate(y_train, y_pred_train, y_pred_train_scores)
                    logging.info('model {} -- Train score {}'.format(model_name, train_score))
                    self.save_prediction(info_train, y_pred_train, y_pred_train_scores, y_train, model_name,
                                         training=True)

        test_scores = pd.DataFrame(test_scores, index=model_names)
        generate_plots(test_scores, self.directory)
        self.save_all_scores(test_scores)

        if self.task == 'classification_binary':
            auc_fig = plt.figure()
            auc_fig.set_size_inches((10, 6))
            prc_fig = plt.figure()
            prc_fig.set_size_inches((10, 6))
            for y_test, y_pred_test, y_pred_test_scores, model_name in zip(y_test_list, y_pred_test_list,
                                                                           y_pred_test_scores_list, model_names):
                plot_roc(auc_fig, y_test, y_pred_test_scores, self.directory, label=model_name)
                plot_prc(prc_fig, y_test, y_pred_test_scores, self.directory, label=model_name)
            auc_fig.savefig(join(self.directory, 'auc_curves'))
            prc_fig.savefig(join(self.directory, 'auprc_curves'))
        return test_scores