Esempio n. 1
0
    def predict(self, smiles, average=True):
        if self.feature_type == 'fingerprints':
            fps = get_fp(smiles)
            assert len(smiles) == len(fps)
            clean_smiles = []
            clean_fps = []
            nan_smiles = []
            for i in range(len(fps)):
                if np.isnan(sum(fps[i])):
                    nan_smiles.append(smiles[i])
                else:
                    clean_smiles.append(smiles[i])
                    clean_fps.append(fps[i])
            clean_fps = np.array(clean_fps)
        elif self.feature_type == 'descriptors':
            clean_fps, clean_smiles, nan_smiles = get_desc(smiles, self.calc)
        prediction = []
        if len(clean_fps) > 0:
            for i in range(self.n_ensemble):
                m = self.model[i]
                if self.feature_type == 'descriptors':
                    clean_fps, _ = normalize_desc(clean_fps, self.desc_mean[i])
                prediction.append(m.predict(clean_fps))
            prediction = np.array(prediction)
            if average:
                prediction = prediction.mean(axis=0)
        assert len(clean_smiles) == len(prediction)

        return clean_smiles, prediction, nan_smiles
Esempio n. 2
0
 def fit_model(self, data, cross_val_data, cross_val_labels):
     eval_metrics = []
     for i in range(self.n_ensemble):
         train_sm = np.concatenate(cross_val_data[:i] + cross_val_data[(i + 1):])
         test_sm = cross_val_data[i]
         train_labels = np.concatenate(cross_val_labels[:i] + cross_val_labels[(i + 1):])
         test_labels = cross_val_labels[i]
         fp_train = get_fp(train_sm)
         fp_test = get_fp(test_sm)
         self.model[i].fit(fp_train, train_labels.ravel())
         predicted = self.model[i].predict(fp_test)
         if self.model_type == 'classifier':
             fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
             eval_metrics.append(metrics.auc(fpr, tpr))
             metrics_type = 'AUC'
         elif self.model_type == 'regressor':
             r2 = metrics.r2_score(test_labels, predicted)
             eval_metrics.append(r2)
             metrics_type = 'R^2 score'
     return eval_metrics, metrics_type
Esempio n. 3
0
 def predict(self, smiles, average=True):
     fps = get_fp(smiles)
     assert len(smiles) == len(fps)
     clean_smiles = []
     clean_fps = []
     nan_smiles = []
     for i in range(len(fps)):
         if np.isnan(sum(fps[i])):
             nan_smiles.append(smiles[i])
         else:
             clean_smiles.append(smiles[i])
             clean_fps.append(fps[i])
     clean_fps = np.array(clean_fps)
     prediction = []
     if len(clean_fps) > 0:
         for m in self.model:
             prediction.append(m.predict(clean_fps))
         prediction = np.array(prediction)
         if average:
             prediction = prediction.mean(axis=0)
     assert len(clean_smiles) == len(prediction)
     return clean_smiles, prediction, nan_smiles
Esempio n. 4
0
    def fit_model(self, data):
        eval_metrics = []
        if self.feature_type == 'fingerprints':
            fps = get_fp(data.smiles)
        elif self.feature_type == 'descriptors':
            fps, _, _ = get_desc(data.smiles, self.calc)
        if self.model_type == 'classifier':
            cross_val_data, cross_val_labels = \
                cross_validation_split(fps, data.binary_labels)
        elif self.model_type == 'regressor':
            cross_val_data, cross_val_labels = \
                cross_validation_split(fps, data.property)
        for i in range(self.n_ensemble):
            train_sm = np.concatenate(cross_val_data[:i] +
                                      cross_val_data[(i + 1):])
            test_sm = cross_val_data[i]
            train_labels = np.concatenate(cross_val_labels[:i] +
                                          cross_val_labels[(i + 1):])
            test_labels = cross_val_labels[i]
            if self.feature_type == 'descriptors':
                train_sm, desc_mean = normalize_desc(train_sm)
                self.desc_mean[i] = desc_mean
                test_sm, _ = normalize_desc(test_sm, desc_mean)
            self.model[i].fit(train_sm, train_labels.ravel())
            predicted = self.model[i].predict(test_sm)
            if self.model_type == 'classifier':
                fpr, tpr, thresholds = metrics.roc_curve(
                    test_labels, predicted)
                eval_metrics.append(metrics.auc(fpr, tpr))
                metrics_type = 'AUC'
            elif self.model_type == 'regressor':
                r2 = metrics.r2_score(test_labels, predicted)
                eval_metrics.append(r2)
                metrics_type = 'R^2 score'

        return eval_metrics, metrics_type