예제 #1
0
    def __init__(self, metadata):
        """
        Args:
            metadata: an AutoDLMetadata object. Its definition can be found in
            AutoDL_ingestion_program/dataset.py
        """
        self.done_training = False
        self.metadata = metadata
        self.domain = "speech"
        test_metadata_filename = self.metadata.get_dataset_name().replace('train', 'test') + '/metadata.textproto'
        self.test_num = [int(line.split(':')[1]) for line in open(test_metadata_filename, 'r').readlines()[:3] if 'sample_count' in line][0]

        # DomainModel = meta_domain_2_model(self.domain)
        self.domain_metadata = get_domain_metadata(metadata, self.domain)
        self.domain_metadata["test_num"] = self.test_num
        self.class_num = self.domain_metadata["class_num"]
        self.train_num = self.domain_metadata["train_num"]

        logger.info("Note:domain={}, domain_metadata is {}".format(self.domain, self.domain_metadata))
        # self.domain_model = DomainModel(self.domain_metadata)
        self.domain_model = ModelExecutor(self.domain_metadata)
        self.ensemble_val_record_list = list()
        self.ensemble_val_nauc_list = list()
        self.cur_cls_name = None
        self.cur_train_his_report = dict()
        self.g_predevel_space = list()
        self.g_train_loss_list = list()

        as_timer("model_speech_init")
예제 #2
0
    def offline_fit(self,
                    train_examples_x: np.ndarray,
                    train_examples_y: np.ndarray,
                    fit_params: dict = None):
        # for single-label
        if fit_params.get("if_multilabel") is False:
            train_examples_y = ohe2cat(train_examples_y)
            self.model.fit(train_examples_x, train_examples_y)
            self.label_map = self.model.classes_
        # for multi-labels.
        else:
            if self.ml_mode == 1:
                for cls in range(self.class_num):
                    cls_y = train_examples_y[:, cls]
                    # self.logReg_pipeline.fit(train_examples_x, cls_y)
                    self.ml_models[cls].fit(train_examples_x, cls_y)

            elif self.ml_mode == 2:
                self.ml_model.fit(train_examples_x, train_examples_y)

            elif self.ml_mode == 3:
                for cls in range(self.class_num):
                    cls_y = train_examples_y[:, cls]
                    self.logReg_pipeline.fit(train_examples_x, cls_y)

            else:
                error("Error: wrong ml_mode={}".format(self.ml_mode))

        as_timer("lr_liblinear_fit_{}".format(len(train_examples_x)))
예제 #3
0
    def test(self, dataset, remaining_time_budget=None):
        """Test method of domain-specific model."""
        cur_y_pred = self.domain_model.test_pipeline(dataset)

        self.cur_train_his_report["pred_probas"] = cur_y_pred

        if self.cur_cls_name == CLS_TR34 and self.domain_model.tr34_cls_train_pip_run >= 8:
            loss_godown_rate = self.domain_model.decision_maker.ensemble_learner.get_loss_godown_rate(self.g_train_loss_list, EVAL_TLOSS_TAIL_SIZE)
            if loss_godown_rate >= EVAL_TLOSS_GODOWN_RATE_THRES:
                self.domain_model.decision_maker.ensemble_learner.add_eval_pred_item(self.cur_train_his_report)

        if self.domain_model.tr34_cls_train_pip_run >= 15:
            pred_rule = {
                "t_loss": 5,
                # "t_acc": 1
            }
            pred_ensemble = self.domain_model.decision_maker.ensemble_learner.softvoting_ensemble_preds(pred_rule)
        else:
            pred_ensemble = cur_y_pred
        # if pred ensemble.
        # real_y_pred =
        self.done_training = False
        as_timer("test_start")
        # return cur_y_pred
        return pred_ensemble
    def online_fit(self, train_examples_x: np.ndarray, train_examples_y: np.ndarray, fit_params:dict):

        self.trn_gen = Tr34DataGenerator(train_examples_x, train_examples_y, **self.tr34_cls_params)
        # self.first_r_train_x = self.train_x
        cur_train_len = len(train_examples_x)
        self.first_r_data_generator = self.trn_gen
        cur_epoch = self.decide_epoch_curround(fit_params.get("first_epoch", 14), fit_params.get("left_epoch", 1))
        early_stopping = TerminateOnBaseline(monitor="acc", baseline=0.999)
        cur_fit_history = self.model.fit_generator(
            self.first_r_data_generator,
            # steps_per_epoch=int(cur_train_len // self.tr34_cls_params["batch_size"] // 2),
            steps_per_epoch=self.decide_stepperepoch_curround(cur_train_len),
            validation_data=fit_params.get("valid_data"), #todo: put in.
            epochs=cur_epoch,
            max_queue_size=10,
            callbacks=self.callbacks + [early_stopping],
            use_multiprocessing=False,
            workers=1,
            verbose=ThinRes34Config.VERBOSE,
        )
        self.round_idx += 1
        as_timer("TR34Cls_r{}_fit".format(self.round_idx))

        # get fit history
        cur_train_loss = round(cur_fit_history.history.get("loss")[-1], 6)
        cur_train_acc = round(cur_fit_history.history.get("acc")[-1], 6)
        cur_lr = cur_fit_history.history.get("lr")[-1]
        cur_fit_history_report = {
            "t_loss": cur_train_loss,
            "t_acc": cur_train_acc
        }
        return cur_fit_history_report
예제 #5
0
    def online_fit(self, train_examples_x: np.ndarray,
                   train_examples_y: np.ndarray, fit_params: dict):
        self.cur_train_num = len(train_examples_x)
        cur_training_generator = MixupGenerator(
            train_examples_x,
            train_examples_y,
            batch_size=self.train_batch_size,
            datagen=self.img_freqmasking_datagen)()
        # update
        self._fit_params_decision(fit_params)

        self.model.fit_generator(
            cur_training_generator,
            steps_per_epoch=self.fit_params_res["steps_per_epoch"],
            epochs=self.fit_params_res["epochs"],
            initial_epoch=self.fit_params_res["initial_epoch"],
            shuffle=True,
            verbose=1,
        )
        self.n_iter += 5
        as_timer("CNNCls_fit_{}_{}_{}_{}".format(
            self.cur_train_num,
            self.fit_params_res["initial_epoch"],
            self.fit_params_res["epochs"],
            self.fit_params_res["steps_per_epoch"],
        ))
예제 #6
0
    def init(self, class_num: int, init_params: dict):
        """
        :param class_num:
        :param init_params:
            - n_mels
            - pretrain_path
        :return:
        """
        self.class_num = class_num
        self.clf_name = "cnn_pret"
        self.n_mels = init_params.get("n_mels")  # 64, fixed, as pretrained.
        # self.model = self._load_pretrained_model(input_shape=(self.n_mels, self.n_mels, 1), n_classes=self.class_num)
        self.model = cnn_load_pretrained_model(input_shape=(self.n_mels,
                                                            self.n_mels, 1),
                                               n_classes=self.class_num)
        info("Backbone classifier={} is init, class_num={}, init_params={}".
             format(self.clf_name, self.class_num, init_params))
        as_timer("clf_{}_init".format(self.clf_name))

        self.train_batch_size = init_params.get("train_batch_size")
        self.predict_batch_size = init_params.get("predict_batch_size")
        self.n_iter = 0

        # option:
        self.img_freqmasking_datagen = ImageDataGenerator(
            preprocessing_function=DNpAugPreprocessor.frequency_masking)
예제 #7
0
 def predict_proba(self, test_examples: np.ndarray,
                   predict_prob_params: dict) -> np.ndarray:
     cur_test_generator = TTAGenerator(test_examples,
                                       batch_size=self.predict_batch_size)()
     test_size = len(test_examples)
     pred_probs = self.model.predict_generator(
         cur_test_generator,
         steps=int(np.ceil(test_size / self.predict_batch_size)))
     as_timer("CNNCls_testpred")
     return pred_probs
예제 #8
0
 def eval_val(self, val_examples_x, val_examples_y):
     valid_generator = TTAGenerator(val_examples_x,
                                    batch_size=self.predict_batch_size)()
     valid_size = len(val_examples_x)
     valid_probas = self.model.predict_generator(
         valid_generator,
         steps=int(np.ceil(valid_size / self.predict_batch_size)))
     # val_auc = ATEvaluator.autodl_auc(val_examples_y, valid_probas)
     val_auc = ATEvaluator.skl_auc_macro(val_examples_y, valid_probas)
     as_timer("CNNCls_evalval_{}_{}".format(valid_size, val_auc))
     return val_auc
    def predict_proba(self, test_examples: np.ndarray, predict_prob_params: dict=None) -> np.ndarray:
        K.set_learning_phase(0)
        if self.good_to_predict():
            y_pred = self.model.predict(test_examples, batch_size=self.tr34_mconfig.PRED_SIZE)
            as_timer("TR34Cls_Test")
            self.test_idx += 1

            self.last_y_pred = y_pred
            self.last_y_pred_round = self.round_idx
            return y_pred
        else:
            return self.last_y_pred
예제 #10
0
    def offline_fit(self,
                    train_examples_x: np.ndarray,
                    train_examples_y: np.ndarray,
                    fit_params: dict = None):
        if fit_params.get("if_multilabel") is False:
            train_examples_y = ohe2cat(train_examples_y)
            self.model.fit(train_examples_x, train_examples_y)
            self.label_map = self.model.classes_

        else:
            self.ml_model.fit(train_examples_x, train_examples_y)

        as_timer("lr_sag_fit_{}".format(len(train_examples_x)))
예제 #11
0
    def predict_proba(self,
                      test_examples: np.ndarray,
                      predict_prob_params: dict = None) -> np.ndarray:
        if predict_prob_params.get("if_multilabel") is True:
            return self.predict_proba_multilabel(test_examples)

        else:
            raw_pred_probas = self.model.predict_proba(test_examples)
            if len(self.label_map) < self.class_num:
                rebuilt_pred_proba = self.rebuild_prob_res(
                    self.label_map, raw_pred_probas)
                as_timer("lr_liblinaer_pred_proba_{}".format(
                    len(test_examples)))
                return rebuilt_pred_proba
            else:
                return raw_pred_probas
예제 #12
0
 def init_kapre_melspectrogram_extractor(self):
     self.kapre_melspectrogram_extractor = self.make_melspectrogram_extractor(
         (1, self.kape_params.get("CROP_SEC") *
          self.kape_params.get("SAMPLING_RATE")))
     if KAPRE_FMAKER_WARMUP:
         warmup_size = 10
         warmup_x = [
             np.array([np.random.uniform() for i in range(48000)],
                      dtype=np.float32) for j in range(warmup_size)
         ]
         # warmup_x_mel = extract_features(warmup_x)
         warmup_x_mel = self.make_features(warmup_x,
                                           feats_maker_params={
                                               "len_sample": 5,
                                               "sr": 16000
                                           })
         info("Kpare_featmaker warmup.")
         as_timer("Kpare_featmaker_warmup")
예제 #13
0
    def train(self, dataset, remaining_time_budget=None):
        """Train method of domain-specific model."""
        logger.info("Note: speech_train_process  model.py starts train")
        as_timer("train_start")

        if IF_TRAIN_BREAK_CONDITION:
            while True:
                self.cur_train_his_report = self.domain_model.train_pipeline(dataset)
                self.cur_cls_name = self.cur_train_his_report.get("cls_name")

                cur_val_nauc = self.cur_train_his_report["val_nauc"]
                self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc])
                self.ensemble_val_nauc_list.append(cur_val_nauc)
                if cur_val_nauc == -1 or cur_val_nauc > self.get_accept_nauc():
                    info("Decision=Yes, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list)))
                    break
                else:
                    info("Decision=No, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list)))

        else:
            self.cur_train_his_report = self.domain_model.train_pipeline(dataset)
            self.cur_cls_name = self.cur_train_his_report.get("cls_name")

            cur_t_loss = self.cur_train_his_report.get("t_loss")
            if cur_t_loss is None:
                self.g_train_loss_list.append(100000)
            else:
                self.g_train_loss_list.append(cur_t_loss)

            info("train_his_report={}".format(self.cur_train_his_report))
            cur_val_nauc = self.cur_train_his_report["val_nauc"]

            self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc])
            self.ensemble_val_nauc_list.append(cur_val_nauc)


        as_timer("speech_model_basic_train")
 def predict_proba_2(self, test_examples: np.ndarray, predict_prob_params: dict=None) -> np.ndarray:
     K.set_learning_phase(0)
     y_pred = self.model.predict(test_examples, batch_size=self.tr34_mconfig.PRED_SIZE)
     as_timer("TR34Cls_Test")
     self.test_idx += 1
     return y_pred
 def init(self, class_num: int, init_params: dict=None):
     self.class_num = class_num
     self.model, self.callbacks = self.tr34_model_init(class_num)
     self.choose_round_spec_len()
     as_timer("TR34Cls_init")