Exemple #1
0
    def predict_with_existing(self,
                              X_train, y_train_regression, y_train_classification,
                              X_val, y_val_regression, y_val_classification,
                              X_test, y_test_regression, y_test_classification,
                              PMTNN_weight_file):
        model = self.setup_model_ensemble()
        model.load_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return
def enrichement_factor_fetcher(y_test, y_pred_on_test, EF_ratio_list):
    ef_values = []
    ef_max_values = []
    for EF_ratio in EF_ratio_list:
        n_actives, ef, ef_max = enrichment_factor_single(y_test, y_pred_on_test, EF_ratio)
        ef_values.append(ef)
        ef_max_values.append(ef_max)
    return ef_values, ef_max_values
Exemple #3
0
def get_ef_table(file_path, target_name, efr_list, ef_header, title):
    """
    :param file_path: Docking results
    :param efr_list: EF ratio list
    :param ef_header: Table header
    :param title: Markdown Table caption
    :return: the markdown content

    example run: get_ef_table(file_path='../../output/docking_result/lc123-pria-dockdata-qnorm.csv.gz',
                              target_name='Keck_Pria_AS_Retest',
                              efr_list=[0.02, 0.01, 0.0015, 0.001],
                              ef_header=['EF_2', 'EF_1', 'EF_015', 'EF_01'],
                              title='Enrichment Factor for Docking Methods')
    """
    pria_pd = pd.read_csv(file_path)
    title = '## {}'.format(title)

    header = '| docking method |'
    for name in ef_header:
        header = '{} {} |'.format(header, name)

    splitter = '| --- |'
    for _ in efr_list:
        splitter = '{} {} |'.format(splitter, '---')

    if target_name == 'Keck_Pria_AS_Retest':
        ground = '../../output/docking/stage_1/lc123-pria-dockdata-qnorm.csv.gz'
    elif target_name == 'Keck_Pria_FP_data':
        ground = '../../output/docking/stage_1/lc123-pria-dockdata-qnorm.csv.gz'
    elif target_name == 'Keck_RMI_cdd':
        ground = '../../output/docking/stage_1/lc123-rmi-dockdata-qnorm.csv.gz'
    else:
        raise ValueError('Target name {} not found.'.format(target_name))

    ground_pd = pd.read_csv(ground)
    ground_pd = ground_pd[['Unnamed: 0', target_name]]
    ground_pd.columns = ['molid', target_name]
    pria_pd = pd.merge(pria_pd, ground_pd, on='molid', how='outer')

    content = ''
    for docking_method in docking_methods:
        # temp_pd = pria_pd[['Unnamed: 0', target_name, docking_method]]
        temp_pd = pria_pd[['molid', target_name, docking_method]]
        filtered_pd = temp_pd.dropna()
        # TODO: may find the difference with panda.series for EF calculation
        # true_label_list = filtered_pd[target_name]
        # docking_ranked_list = filtered_pd[docking_method]
        true_label_list = np.array(filtered_pd[target_name].tolist())
        docking_ranked_list = np.array(filtered_pd[docking_method].tolist())
        row = '| {} |'.format(docking_method)
        for ratio in efr_list:
            n_actives, ef, ef_max = enrichment_factor_single(
                true_label_list, docking_ranked_list, ratio)
            row = '{} {} |'.format(row, ef)
        content = '{}{}\n'.format(content, row)
    content = '{}\n{}\n{}\n{}'.format(title, header, splitter, content)
    return content
    def get_EF_score_with_existing_model(self, X_test, y_test, file_path,
                                         EF_ratio):
        model = self.setup_model()
        model.load_weights(file_path)
        y_pred_on_test = model.predict(X_test)
        n_actives, ef, ef_max = enrichment_factor_single(
            y_test, y_pred_on_test, EF_ratio)
        print('test precision: {}'.format(
            get_model_precision_auc(y_test, y_pred_on_test)))
        print('test auc: {}'.format(get_model_roc_auc(y_test, y_pred_on_test)))
        print('EF: {},\tactive: {}'.format(ef, n_actives))

        return
Exemple #5
0
    def get_EF_score_with_existing_model(self,
                                         X_test, y_test, y_test_classification,
                                         file_path, EF_ratio):
        model = self.setup_model_ensemble()
        model.load_weights(file_path)
        y_pred_on_test = model.predict(X_test)
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
        print('EF: {},\tactive: {}'.format(ef, n_actives))

        return
    def predict_with_existing(self, X_train, y_train, X_val, y_val, X_test,
                              y_test, PMTNN_weight_file):
        model = self.setup_model()
        model.load_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)[:, -1]
        y_train = y_train[:, -1]
        y_pred_on_val = model.predict(X_val)[:, -1]
        y_val = y_val[:, -1]
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)[:, -1]
            y_test = y_test[:, -1]
        print y_train.shape, '\t', y_pred_on_test.shape

        print
        print('train precision: {}'.format(
            precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(
            bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(
            precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val,
                                                         y_pred_on_val)))
        print('validation bedroc: {}'.format(
            bedroc_auc_single(y_val, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(
                precision_auc_single(y_test, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test,
                                                       y_pred_on_test)))
            print('test bedroc: {}'.format(
                bedroc_auc_single(y_test, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(
                    y_test, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(
                    EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return
    def train_and_predict(self,
                          X_train, y_train_regression, y_train_classification,
                          X_val, y_val_regression, y_val_classification,
                          X_test, y_test_regression, y_test_classification,
                          PMTNN_weight_file):
        model = self.setup_model()
        sw = get_sample_weight(self, y_train_regression)
        print 'Sample Weight\t', sw

        model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer)
        model.fit(x=X_train, y=y_train_regression,
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  sample_weight=sw,
                  validation_data=[X_val, y_val_regression],
                  shuffle=True)
        model.save_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
            print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return
def get_EF_values_single_task(task, X_test, y_test, model_weight,
                              EF_ratio_list):
    model = task.setup_model()
    model.load_weights(model_weight)
    y_pred_on_test = model.predict(X_test)

    print('test precision: {}'.format(
        precision_auc_single(y_test, y_pred_on_test)))
    print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test)))
    print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test)))
    print

    ef_values = []
    ef_max_values = []
    for EF_ratio in EF_ratio_list:
        n_actives, ef, ef_max = enrichment_factor_single(
            y_test, y_pred_on_test, EF_ratio)
        ef_values.append(ef)
        ef_max_values.append(ef_max)
    return ef_values, ef_max_values
    def get_rf(self, X_train, y_train, X_val, y_val, X_test, y_test):
        max_features = 'log2'
        n_estimators = 4000
        min_samples_leaf = 1
        class_weight = 'balanced'
        rnd_state = 1337
        np.random.seed(seed=rnd_state)

        rf = RandomForestClassifier(n_estimators=n_estimators,
                                    max_features=max_features,
                                    min_samples_leaf=min_samples_leaf,
                                    n_jobs=3,
                                    class_weight=class_weight,
                                    random_state=rnd_state,
                                    oob_score=False,
                                    verbose=1)
        rf.fit(X_train, y_train)

        y_pred_on_train = reshape_data_into_2_dim(rf.predict(X_train))
        y_pred_on_val = reshape_data_into_2_dim(rf.predict(X_val))
        y_pred_on_test = reshape_data_into_2_dim(rf.predict(X_test))

        print('train precision: {}'.format(precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return rf
Exemple #10
0
    def train_and_predict(self,
                          X_train, y_train, y_train_classification,
                          X_val, y_val, y_val_classification,
                          X_test, y_test, y_test_classification,
                          mode):
        model = Sequential()
        conf = self.conf
        batch_normalizer_epsilon = conf['batch']['epsilon']
        batch_normalizer_mode = conf['batch']['mode']
        batch_normalizer_axis = conf['batch']['axis']
        batch_normalizer_momentum = conf['batch']['momentum']
        batch_normalizer_weights = conf['batch']['weights']
        batch_normalizer_beta_init = conf['batch']['beta_init']
        batch_normalizer_gamma_init = conf['batch']['gamma_init']
        batch_normalizer = BatchNormalization(epsilon=batch_normalizer_epsilon,
                                              mode=batch_normalizer_mode,
                                              axis=batch_normalizer_axis,
                                              momentum=batch_normalizer_momentum,
                                              weights=batch_normalizer_weights,
                                              beta_init=batch_normalizer_beta_init,
                                              gamma_init=batch_normalizer_gamma_init)

        if mode == 'classification':
            model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(1024, init='glorot_normal', activation='relu'))
            model.add(Dropout(0.5))
            if self.batch_is_use:
                model.add(batch_normalizer)
            model.add(Dense(1, init='glorot_normal', activation='sigmoid'))
            model.compile(loss='binary_crossentropy', optimizer='adam')
        else:
            model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='sigmoid'))
            model.add(Dropout(0.5))
            model.add(Dense(1024, init='glorot_normal', activation='sigmoid'))
            model.add(Dropout(0.5))
            if self.batch_is_use:
                model.add(batch_normalizer)
            model.add(Dense(1, init='glorot_normal', activation='linear'))
            model.compile(loss='mse', optimizer='adam')

        model.fit(X_train, y_train,
                  batch_size=self.fit_batch_size,
                  nb_epoch=self.fit_nb_epoch,
                  verbose=self.fit_verbose,
                  validation_data=(X_val, y_val))

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        y_pred_on_test = model.predict(X_test)

        print
        print 'this is mode ', mode
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return y_pred_on_test
Exemple #11
0
    def train_and_predict_ensemble(self,
                                   X_train, y_train_regression, y_train_classification,
                                   X_val, y_val_regression, y_val_classification,
                                   X_test, y_test_regression, y_test_classification,
                                   PMTNN_weight_file):
        model = self.setup_model_ensemble()
        # TODO: remove
        print model.summary()

        if self.weight_schema == 'weighted':
            loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 100.}
        elif self.weight_schema == 'no_weight':
            loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 1.}
        else:
            raise ValueError('Wrong weight schema. Should be no_weight, or weighted.')

        model.compile(optimizer=self.compile_optimizer,
                      loss={'classification_output_layer': 'binary_crossentropy', 'regression_output_layer': 'mse'},
                      loss_weights=loss_weight)

        model.fit({'input_layer': X_train},
                  {'classification_output_layer': y_train_classification,
                   'regression_output_layer': y_train_regression},
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  validation_data=({'input_layer': X_val},
                                   {'classification_output_layer': y_val_classification,
                                    'regression_output_layer': y_val_regression}),
                  shuffle=True)
        model.save_weights(PMTNN_weight_file)

        y_pred_on_train_ensemble = np.array(model.predict(X_train))
        y_pred_on_val_ensmble = np.array(model.predict(X_val))
        y_pred_on_test_ensemble = np.array(model.predict(X_test))

        print
        print 'TreeNet Ensemble'

        mode_list = ['TreeNet classification', 'TreeNet regression']
        for mode in range(2):
            print
            print mode_list[mode]
            y_pred_on_train = y_pred_on_train_ensemble[mode]
            y_pred_on_val = y_pred_on_val_ensmble[mode]
            y_pred_on_test = y_pred_on_test_ensemble[mode]

            print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
            print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
            print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
            print
            print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
            print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
            print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
            print
            print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
            print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
            print

            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
                print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return y_pred_on_test_ensemble
    def train_and_predict(self,
                          X_train,
                          y_train,
                          X_val,
                          y_val,
                          X_test,
                          y_test,
                          PMTNN_weight_file,
                          score_file,
                          sample_weight,
                          eval_indices=[-1],
                          eval_mean_or_median=np.mean):
        def get_model_roc_auc(true_label,
                              predicted_label,
                              eval_indices=eval_indices,
                              eval_mean_or_median=eval_mean_or_median):
            return roc_auc_multi(true_label, predicted_label, eval_indices,
                                 eval_mean_or_median)

        def get_model_bedroc_auc(true_label,
                                 predicted_label,
                                 eval_indices=eval_indices,
                                 eval_mean_or_median=eval_mean_or_median):
            return bedroc_auc_multi(true_label, predicted_label, eval_indices,
                                    eval_mean_or_median)

        def get_model_precision_auc(true_label,
                                    predicted_label,
                                    eval_indices=eval_indices,
                                    eval_mean_or_median=eval_mean_or_median):
            return precision_auc_multi(true_label, predicted_label,
                                       eval_indices, eval_mean_or_median)

        model = self.setup_model()
        if self.early_stopping_option == 'auc':
            early_stopping = MultiCallBackOnROC(
                X_train,
                y_train,
                X_val,
                y_val,
                eval_indices,
                eval_mean_or_median,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        elif self.early_stopping_option == 'precision':
            early_stopping = MultiCallBackOnPR(
                X_train,
                y_train,
                X_val,
                y_val,
                eval_indices,
                eval_mean_or_median,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        else:
            callbacks = []

        if self.weight_schema == 'no_weight':
            cw = get_class_weight(self, y_train)
            print 'cw ', cw
            model.compile(loss=self.compile_loss,
                          optimizer=self.compile_optimizer)
            model.fit(X_train,
                      y_train,
                      nb_epoch=self.fit_nb_epoch,
                      batch_size=self.fit_batch_size,
                      verbose=self.fit_verbose,
                      class_weight=cw,
                      shuffle=True,
                      callbacks=callbacks)
        else:
            reference = []
            total_num = X_train.shape[0]
            for i in range(self.output_layer_dimension):
                active = sum(y_train[:, i])
                active_and_inactive = sum(sample_weight[:, i])
                inactive = active_and_inactive - active
                missing = total_num - active_and_inactive
                reference.append({'-1': missing, '0': inactive, '1': active})

            cw = get_class_weight(self, y_train, reference=reference)
            print 'cw ', cw

            # TODO: customize loss function
            def customized_loss(y_true, y_pred):
                import theano.tensor as T
                y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
                sum_ = 0
                print y_true.shape
                # T.nnet.binary_crossentropy(x_recons, x).mean()
                bce = T.nnet.binary_crossentropy(y_pred, y_true).mean(axis=-1)
                return bce

            # model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer, sample_weight_mode="temporal")
            model.compile(loss=self.compile_loss,
                          optimizer=self.compile_optimizer)
            model.fit(X_train,
                      y_train,
                      nb_epoch=self.fit_nb_epoch,
                      batch_size=self.fit_batch_size,
                      verbose=self.fit_verbose,
                      class_weight=cw,
                      shuffle=True,
                      callbacks=callbacks)

        if self.early_stopping_option == 'auc' or self.early_stopping_option == 'precision':
            model = early_stopping.get_best_model()
        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)

        print('train precision: {}'.format(
            get_model_precision_auc(y_train, y_pred_on_train)))
        print('train roc: {}'.format(
            get_model_roc_auc(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(
            get_model_bedroc_auc(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(
            get_model_precision_auc(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(
            get_model_roc_auc(y_val, y_pred_on_val)))
        print('validation bedroc: {}'.format(
            get_model_bedroc_auc(y_val, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(
                get_model_precision_auc(y_test, y_pred_on_test)))
            print('test roc: {}'.format(
                get_model_roc_auc(y_test, y_pred_on_test)))
            print('test bedroc: {}'.format(
                get_model_bedroc_auc(y_test, y_pred_on_test)))
            print

        if X_test is not None:
            # Just print last target EF into output file.
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(
                    y_test[:, -1], y_pred_on_test[:, -1], EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(
                    EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

            # Store all the target EF into score file.
            out = open(score_file, 'w')
            print >> out, "EF"
            for EF_ratio in self.EF_ratio_list:
                for i in range(y_test.shape[1]):
                    n_actives, ef, ef_max = enrichment_factor_single(
                        y_test[:, i], y_pred_on_test[:, i], EF_ratio)
                    nef = ef / ef_max
                    print >> out, 'ratio:', EF_ratio, 'EF:', ef, 'active:', n_actives
                    print >> out, 'ratio:', EF_ratio, 'EF:', nef

        return
    def train_and_predict(self, X_train, y_train, X_val, y_val, X_test, y_test,
                          PMTNN_weight_file):
        model = self.setup_model()
        if self.early_stopping_option == 'auc':
            early_stopping = KeckCallBackOnROC(
                X_train,
                y_train,
                X_val,
                y_val,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        elif self.early_stopping_option == 'precision':
            early_stopping = KeckCallBackOnPrecision(
                X_train,
                y_train,
                X_val,
                y_val,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        else:
            callbacks = []

        cw = get_class_weight(self, y_train)
        print 'cw ', cw

        model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer)
        model.fit(X_train,
                  y_train,
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  class_weight=cw,
                  shuffle=True,
                  callbacks=callbacks)

        if self.early_stopping_option == 'auc' or self.early_stopping_option == 'precision':
            model = early_stopping.get_best_model()
        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(
            precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(
            bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(
            precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val,
                                                         y_pred_on_val)))
        print('validation bedroc: {}'.format(
            bedroc_auc_single(y_val, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(
                precision_auc_single(y_test, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test,
                                                       y_pred_on_test)))
            print('test bedroc: {}'.format(
                bedroc_auc_single(y_test, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(
                    y_test, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(
                    EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return