예제 #1
0
def get_catlearn_fp(A, Name=False):

    if not Name:
        A = autogen_info([A])
    gen = FeatureGenerator()
    if isinstance(A, list):
        gen._get_atom_types(A)
    else:
        gen._get_atom_types([A])

    train_fpv = [gen.mean_chemisorbed_atoms,
                 gen.bulk,
                 gen.term,
                 gen.strain,
                 gen.mean_surf_ligands,
                 gen.mean_site,
                 gen.median_site,
                 gen.max_site,
                 gen.min_site,
                 gen.sum_site,
                 #gen.generalized_cn,
                 #gen.bag_cn,
                 #gen.en_difference_ads,
                 #gen.en_difference_chemi,
                 #gen.en_difference_active,
                 #gen.bag_atoms_ads,
                 #gen.bag_connections_ads,
                 gen.count_chemisorbed_fragment]
    
    if not Name:
        gen.normalize_features(A)
        return gen.return_vec(A, train_fpv)
    else:
        return gen.return_names(train_fpv)
예제 #2
0
    def test_generators(self):
        """Generate features from atoms objects."""
        # Test generic features for Pt then both Pt and Au.
        get_mendeleev_params(atomic_number=78)
        get_mendeleev_params(atomic_number=[78, 79],
                             params=default_params + ['en_ghosh'])

        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        print('Getting candidates from the database')
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        # Setup the test and training datasets.
        testset = get_unique(atoms=all_cand, size=test_size, key='raw_score')
        self.assertTrue(len(testset['atoms']) == test_size)
        self.assertTrue(len(testset['taken']) == test_size)

        trainset = get_train(atoms=all_cand,
                             size=train_size,
                             taken=testset['taken'],
                             key='raw_score')
        self.assertTrue(len(trainset['atoms']) == train_size)
        self.assertTrue(len(trainset['target']) == train_size)

        # Initiate the fingerprint generators with relevant input variables.
        print('Getting the fingerprints')
        f = FeatureGenerator(element_parameters='atomic_radius', nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        data = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec])
        n, d = np.shape(data)
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.nearestneighbour_vec])) == d)
        print('passed nearestneighbour_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.bond_count_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 52)
        print('passed bond_count_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distribution_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 10)
        print('passed distribution_vec')

        # EXPENSIVE to calculate. Not included in training data.
        train_fp = f.return_vec(testset['atoms'], [f.connections_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == test_size and d == 26)
        print('passed connections_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.rdf_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 20)
        print('passed rdf_vec')

        # Start testing the standard fingerprint vector generators.
        train_fp = f.return_vec(trainset['atoms'], [f.element_mass_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 1)
        self.assertTrue(len(f.return_names([f.element_mass_vec])) == d)
        print('passed element_mass_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.element_parameter_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        # print(f.return_names([f.element_parameter_vec]))
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.element_parameter_vec])) == d)
        print('passed element_parameter_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.composition_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.composition_vec])) == d)
        print('passed composition_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.eigenspectrum_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 147)
        self.assertTrue(len(f.return_names([f.eigenspectrum_vec])) == d)
        print('passed eigenspectrum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distance_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed distance_vec')

        train_fp = f.return_vec(
            trainset['atoms'],
            [f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == 150)
        self.assertTrue(
            len(
                f.return_names([
                    f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec
                ])) == d)
        print('passed combined generation')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_sum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_mean_vec')

        f = FeatureGenerator(element_parameters='atomic_radius',
                             max_neighbors='full',
                             nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_sum_vec all neighbors')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_mean_vec all neighbors')

        # Do basic check for atomic porperties.
        no_prop = []
        an_prop = []
        # EXPENSIVE to calculate. Not included in training data.
        for atoms in testset['atoms']:
            no_prop.append(neighbor_features(atoms=atoms))
            an_prop.append(
                neighbor_features(atoms=atoms, property=['atomic_number']))
        self.assertTrue(np.shape(no_prop) == (test_size, 15))
        self.assertTrue(np.shape(an_prop) == (test_size, 30))
        print('passed graph_vec')

        self.__class__.all_cand = all_cand
        self.__class__.data = data
예제 #3
0
train_targets = np.asarray(targets[:train_size])
test_targets = np.asarray(targets[train_size:])

print('{} shape training atoms data'.format(np.shape(train_atoms)))
print('{} shape testing atoms data'.format(np.shape(test_atoms)))

# ## Feature Generation <a name="feature-generation"></a>
# [(Back to top)](#head)
#
# It can be necessary to work out the full range of elements that need to be accounted for in the model. The feature generator tries to work out the range of elements to account for based on the maximum composition. However, explicitly specifying this is more robust.

# In[4]:

generator = FeatureGenerator()

generator.normalize_features(train_candidates=train_atoms,
                             test_candidates=test_atoms)
print('Max number of atom present in data: {}'.format(generator.atom_len))
print('Atom numbers present in data: {}'.format(generator.atom_types))

# We then generate the feature array for all the atoms objects. The `return_vec()` function takes the list of atoms objects and the type of features to generate.

# In[5]:

train_features = generator.return_vec(
    train_atoms, [generator.eigenspectrum_vec, generator.composition_vec])

test_features = generator.return_vec(
    test_atoms, [generator.eigenspectrum_vec, generator.composition_vec])

print('{} shape training feature matrix'.format(np.shape(train_features)))
print('{} shape testing feature matrix'.format(np.shape(test_features)))