예제 #1
0
 def learn(data):
     spn = SPN.LearnStructure(data, featureTypes=featureTypes, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=0.3),
                              domains=domains,
                              alpha=1,
                              # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
                              # domains, families=families, row_split_method=Splitting.KmeansRows(),
                              # col_split_method=Splitting.RDCTest(),
                              min_instances_slice=50)
     return spn
예제 #2
0
 def learn(data):
     spn = SPN.LearnStructure(data, featureTypes=featureTypes, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=0.3),
                              domains=domains,
                              alpha=0.1,
                              families = ['histogram'] * data.shape[1],
                              # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
                              # domains, families=families, row_split_method=Splitting.KmeansRows(),
                              # col_split_method=Splitting.RDCTest(),
                              #min_instances_slice=int(data.shape[0]*0.01))
                              min_instances_slice=200)
     return spn
예제 #3
0
    def learn():
        spn = SPN.LearnStructure(
            data,
            featureTypes=["discrete"] * data.shape[1],
            row_split_method=Splitting.Gower(),
            col_split_method=Splitting.RDCTest(threshold=0.3, linear=True),
            # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
            # domains, families=families, row_split_method=Splitting.KmeansRows(),
            # col_split_method=Splitting.RDCTest(),
            min_instances_slice=200)

        return spn
예제 #4
0
 def learn():
     spn = SPN.LearnStructure(
         traintopics,
         featureTypes=featureTypes,
         row_split_method=Splitting.Gower(),
         col_split_method=Splitting.RDCTest(threshold=0.1, linear=True),
         featureNames=featureNames,
         domains=domains,
         # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
         # domains, families=families, row_split_method=Splitting.KmeansRows(),
         # col_split_method=Splitting.RDCTest(),
         min_instances_slice=100)
     return spn
예제 #5
0
    def estimate_density(self, training_data, validation_data=None):
        """Fit a MSPN on the training data. The variable validation_data is
        never used."""
        feature_types = []
        feature_names = []
        families = []
        for feat, str_type in training_data.features:
            feature_types.append(str_type)
            feature_names.append(feat.symbol_name())
            if 'leaf' in self.learner_args:
                families.append(self.learner_args['leaf'])
            else:
                families.append(MSPNLearner.SPN_feat_fams[feat.symbol_type()])

        if 'row_split' in self.learner_args:
            if self.learner_args['row_split'] == 'gower':
                row_split_method = Splitting.Gower(n_clusters=2)
            elif self.learner_args['row_split'] == 'rdc-kmeans':
                row_split_method = Splitting.KmeansRDCRows(n_clusters=2,
                                                           k=20,
                                                           OHE=1)
            else:
                raise NotImplementedError()

        else:
            row_split_method = Splitting.KmeansRDCRows(n_clusters=2,
                                                       k=20,
                                                       OHE=1)

        col_split_method = Splitting.RDCTest(threshold=0.1, OHE=1, linear=1)

        rand_seed = self.learner_args['seed']
        mspnargs = {
            k: v
            for k, v in self.learner_args.items()
            if k not in ['seed', 'leaf', 'row_split']
        }

        # let MSPNs sort this out
        families = None
        self.spn = SPN.LearnStructure(asarray(training_data.data),
                                      feature_types,
                                      families=families,
                                      featureNames=feature_names,
                                      rand_seed=rand_seed,
                                      row_split_method=row_split_method,
                                      col_split_method=col_split_method,
                                      **mspnargs)
예제 #6
0
    # plt.hist(test[:,0], bins=100, histtype='stepfilled', normed=True, color='r', alpha=0.5, label='Uniform')
    #
    # plt.show()

    # print(domains)
    print(feature_names)
    print(feature_types)
    print(train.shape)

    # spn = SPN.LearnStructure(train, featureNames=feature_names, domains=domains,  featureTypes=feature_types, row_split_method=Splitting.RandomPartitionConditioningRows(), col_split_method=Splitting.RDCTestOHEpy(threshold=0.75),
    # spn = SPN.LearnStructure(train, featureNames=feature_names, domains=domains,  featureTypes=feature_types, row_split_method=Splitting.DBScanOHE(eps=1.0, min_samples=2), col_split_method=Splitting.RDCTestOHEpy(threshold=0.75),
    # spn = SPN.LearnStructure(train, featureNames=feature_names,
    # domains=domains,  featureTypes=feature_types,
    # row_split_method=Splitting.KmeansOHERows(),
    # col_split_method=Splitting.RDCTest(threshold=0.75),
    spn = SPN.LearnStructure(train, featureNames=feature_names, domains=domains,  featureTypes=feature_types, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=0.05),
                             min_instances_slice=20,  cluster_first=True)

    print(spn)

    result.append([dsname, numpy.mean(spn.root.eval(train)), numpy.mean(
        spn.root.eval(valid)), numpy.mean(spn.root.eval(test))])
    print("train", numpy.mean(spn.root.eval(train)))
    print("valid", numpy.mean(spn.root.eval(valid)))
    print("test", numpy.mean(spn.root.eval(test)))

    print("train", numpy.min(spn.root.eval(train)))
    print("valid", numpy.min(spn.root.eval(valid)))
    print("test", numpy.min(spn.root.eval(test)))

print(result)