def get_data(): """Simple function to pull some training and test data.""" # Attach the database. dd = DescriptorDatabase(db_name='{}/vec_store.sqlite'.format(wkdir), table='FingerVector') # Pull the features and targets from the database. names = dd.get_column_names() features, targets = names[1:-1], names[-1:] feature_data = dd.query_db(names=features) target_data = np.reshape(dd.query_db(names=targets), (np.shape(feature_data)[0], )) # Split the data into so test and training sets. train_features = feature_data[:train_size, :n_features] train_targets = target_data[:train_size] test_features = feature_data[test_size:, :n_features] test_targets = target_data[test_size:] return train_features, train_targets, test_features, test_targets
def todb(self, features, targets): """Function to convert numpy arrays to basic db.""" data = np.concatenate((features, np.reshape(targets, (len(targets), 1))), axis=1) uid = [str(uuid.uuid4()) for _ in range(len(targets))] data = np.concatenate((np.reshape(uid, (len(uid), 1)), data), axis=1) descriptors = ['f' + str(i) for i in range(np.shape(features)[1])] targets = ['target'] names = descriptors + targets # Set up the database to save system descriptors. dd = DescriptorDatabase(db_name=self.db_name, table=self.table) dd.create_db(names=names) # Fill the database with the data. dd.fill_db(descriptor_names=names, data=data)
def test_expand(self): """Generate an extended feature space.""" # Attach the database. dd = DescriptorDatabase(db_name='{}/vec_store.sqlite'.format(wkdir), table='FingerVector') # Pull the features and targets from the database. names = dd.get_column_names() features, targets = names[1:-1], names[-1:] feature_data = dd.query_db(names=features) target_data = np.reshape(dd.query_db(names=targets), (np.shape(feature_data)[0], )) # Split the data into so test and training sets. train_features = feature_data[:train_size, :] train_targets = target_data[:train_size] test_features = feature_data[test_size:, :] d, f = np.shape(train_features) td, tf = np.shape(test_features) # Make some toy names. names = ['f{}'.format(i) for i in range(f)] # Perform feature engineering. extend = fe.single_transform(train_features) self.assertTrue(np.shape(extend) == (d, f * 3)) extend = fe.get_order_2(train_features) ext_n = fe.get_labels_order_2(names, div=False) self.assertTrue(np.shape(extend) == (d, f * (f + 1) / 2)) self.assertTrue(len(ext_n) == np.shape(extend)[1]) extend = fe.get_div_order_2(train_features) ext_n = fe.get_labels_order_2(names, div=True) self.assertTrue(np.shape(extend) == (d, f**2)) self.assertTrue(len(ext_n) == np.shape(extend)[1]) extend = fe.get_order_2ab(train_features, a=2, b=4) ext_n = fe.get_labels_order_2ab(names, a=2, b=4) self.assertTrue(np.shape(extend) == (d, f * (f + 1) / 2)) self.assertTrue(len(ext_n) == np.shape(extend)[1]) extend = fe.get_ablog(train_features, a=2, b=4) ext_n = fe.get_labels_ablog(names, a=2, b=4) self.assertTrue(np.shape(extend) == (d, f * (f + 1) / 2)) self.assertTrue(len(ext_n) == np.shape(extend)[1]) p = train_features[:3, :10] fe.generate_features(p, max_num=2, max_den=0, log=False, sqrt=False, exclude=False, s=True) fe.generate_features(p, max_num=2, max_den=1, log=True, sqrt=True, exclude=True, s=True) self.__class__.train_features = train_features self.__class__.train_targets = train_targets self.__class__.test_features = test_features
def test_storage(self): """Test database functions.""" # Define variables for database to store system descriptors. db_name = '/vec_store.sqlite' descriptors = ['f' + str(i) for i in range(np.shape(self.data)[1])] targets = ['Energy'] names = descriptors + targets # Set up the database to save system descriptors. dd = DescriptorDatabase(db_name=wkdir + db_name, table='FingerVector') dd.create_db(names=names) # Put data in correct format to be inserted into database. print('Generate the database') new_data = [] for i, a in zip(self.data, self.all_cand): d = [] d.append(a.info['unique_id']) for j in i: d.append(j) d.append(a.info['key_value_pairs']['raw_score']) new_data.append(d) # Fill the database with the data. dd.fill_db(descriptor_names=names, data=new_data) # Test out the database functions. train_fingerprint = dd.query_db(names=descriptors) train_target = dd.query_db(names=targets) print('\nfeature data for candidates:\n', train_fingerprint, '\ntarget data for candidates:\n', train_target) cand_data = dd.query_db(unique_id='7a216711c2eae02decc04da588c9e592') print('\ndata for random candidate:\n', cand_data) all_id = dd.query_db(names=['uuid']) dd.create_column(new_column=['random']) for i in all_id: dd.update_descriptor(descriptor='random', new_data=random(), unique_id=i[0]) print('\nretrieve random vars:\n', dd.query_db(names=['random'])) print('\nretrieved column names:\n', dd.get_column_names())