コード例 #1
0
    def test_cross_validation(self):
        all_prediction_results = list()
        for corresp_tissue in range(1, 24):
            cv = CrossValidation(genes=self.genes,
                                 all_gnids=self.gnids,
                                 class_size=self.class_size,
                                 fold_size=self.fold_size,
                                 kmer_size=self.kmer_size,
                                 exp_setting=self.exp_setting)
            cv.build_datasets(assigned_genes=self.assigned_gnids,
                              neg_class_mode=self.exp_setting.get_neg_class_mode(),
                              corresp_tissue=corresp_tissue)
            prediction_results = cv.validation()
            all_prediction_results.append(prediction_results)

        feature_vector = dict()
        for gnid in self.gnids:
            gnid_vector = list()
            for prediction_results in all_prediction_results:
                if prediction_results:
                    gnid_pr = prediction_results.get(gnid)
                    if gnid_pr is None:
                        gnid_vector.append('?')
                    else:
                        gnid_vector.append(gnid_pr.get_predicted_class())
            feature_vector[gnid] = gnid_vector
コード例 #2
0
ファイル: Data.py プロジェクト: towardtruth/nbk
    def cross_validation(self):
        # for each feature, build validation groups with fold size
        print('Cross-Validation')
        for feature in self.features:
            print('Feature Name: {}, k-mer size: {}'.format(
                feature.name, self.kmer_size))

            # get wd_all_gnids per tissue
            scid = feature.corresp_tissue
            #wd_all_gnids_per_tissue = GetData.wd_all_gnid_per_tissue(self.exp_setting, scid)

            cr_validation = CrossValidation(
                genes=self.genes,
                #all_gnids=wd_all_gnids_per_tissue,
                all_gnids=None,
                class_size=self.class_size,
                fold_size=self.fold_size,
                kmer_size=self.kmer_size,
                exp_setting=self.exp_setting)

            cr_validation.build_datasets(
                assigned_genes=feature.assigned_genes,
                neg_class_mode=self.exp_setting.get_neg_class_mode(),
                corresp_tissue=feature.corresp_tissue)

            # Do validation and get prediction results
            prediction_results = cr_validation.validation()
            # store prediction results in each feature
            feature.set_prediction_results(
                prediction_restuls=prediction_results)

            # store confusion matrix in each feature
            #feature.set_confusion_matrix_set(cm_set=cm_set)
            feature.set_confusion_matrix_set(cm_set=self.set_confusion_matrix(
                validation=cr_validation, fold_size=self.fold_size))
コード例 #3
0
 def start(self):
     measurement = []
     for i in range(self.repeat):
         measure = []
         splitted_dataset = cross.split_list(self.raw_data, self.k, True)
         k_fold = cross.KFold(splitted_dataset)
         for fold in k_fold:
             stats, class_prob = NormalDist.calc_normal_stats(fold[0])
             confusion_matrices = ConfusionMatrix.ConfusionMatrixStatistic(
                 stats)
             for item in fold[1]:
                 confusion_matrices.add_result(
                     item[len(item) - 1],
                     NormalDist.get_class(stats, class_prob, item))
             measure.append(confusion_matrices.calc_stats())
         measurement.append(ConfusionMatrix.Measure.connect(measure))
     return ConfusionMatrix.Measure.connect(measurement)
コード例 #4
0
 def start(self):
     measurement = []
     for i in range(self.repeat):
         measure = []
         splitted_dataset = cross.split_list(self.raw_data, self.k, True)
         k_fold = cross.KFold(splitted_dataset)
         for fold in k_fold:
             stats, class_prob, buckets = edp.create_dictionary_with_buckets(
                 self.raw_data, fold[0], self.bins)
             confusion_matrices = ConfusionMatrix.ConfusionMatrixStatistic(
                 stats)
             for item in fold[1]:
                 confusion_matrices.add_result(
                     item[len(item) - 1],
                     edp.get_class(stats, class_prob, buckets, item))
             measure.append(confusion_matrices.calc_stats())
         measurement.append(ConfusionMatrix.Measure.connect(measure))
     return ConfusionMatrix.Measure.connect(measurement)
コード例 #5
0
    def test_prediction_results(self):
        all_prediction_results = list()
        for corresp_tissue in range(1, 24):
            cv = CrossValidation(genes=self.genes,
                                 all_gnids=self.gnids,
                                 class_size=self.class_size,
                                 fold_size=self.fold_size,
                                 kmer_size=self.kmer_size,
                                 exp_setting=self.exp_setting)
            cv.build_datasets(assigned_genes=self.assigned_gnids,
                              neg_class_mode=self.exp_setting.get_neg_class_mode(),
                              corresp_tissue=corresp_tissue)
            prediction_results = cv.validation()
            all_prediction_results.append(prediction_results)

        feature_vector = dict()
        for gnid in self.gnids:
            gnid_vector = list()
            for prediction_results in all_prediction_results:
                if prediction_results:
                    gnid_pr = prediction_results.get(gnid)
                    if gnid_pr is None:
                        gnid_vector.append('?')
                    else:
                        gnid_vector.append(gnid_pr.get_predicted_class())
            feature_vector[gnid] = gnid_vector

        # show feature_vector
        for tissue in range(1, 24):
            print('Tissue#:', tissue)
            for gnid, vector in feature_vector.items():
                line = ",".join(str(value) for value in vector)
                p_results = all_prediction_results[tissue - 1].get(gnid)
                if p_results is None:
                    data_label = '?'
                else:
                    data_label = 'data_label:{}'.format(p_results.get_assigned_class())
                print("%s,%s,%s\n" % (gnid, line, data_label))
コード例 #6
0
ファイル: Data.py プロジェクト: towardtruth/nbk
    def test_features_dataset(self):
        print('TEST features dataset')
        for feature in self.features:
            #print('Feature Name: {}'.format(feature.name))
            #print('\tassigned genes: {}'.format(feature.assigned_genes))
            # get wd_all_gnids per tissue
            scid = feature.corresp_tissue
            wd_all_gnids_per_tissue = GetData.wd_all_gnid_per_tissue(
                self.exp_setting, scid)

            cr_validation = CrossValidation(
                genes=self.genes,
                #all_gnids=self.wd_all_gnids,
                all_gnids=wd_all_gnids_per_tissue,
                class_size=self.class_size,
                fold_size=self.fold_size,
                kmer_size=self.kmer_size)

            cr_validation.build_datasets(
                assigned_genes=feature.assigned_genes,
                neg_class_mode=self.exp_setting.get_neg_class_mode(),
                corresp_tissue=feature.corresp_tissue)

            cr_validation.test_datasets()