def construct_datasets(self, tag_xyz_map): dc = self.dataset_config mc = self.model_config pc = self.prediction_config preprocesses = [] for (name, args, kwargs) in dc.preprocesses: preprocess = PREPROCESS[name](*args, **kwargs) preprocess.load(pc.load_dir / 'preprocess' / f'{name}.npz', verbose=self.verbose) preprocesses.append(preprocess) datasets = [] for pattern in pc.tags: for tag in fnmatch.filter(tag_xyz_map, pattern): if self.verbose: pprint(f'Construct sub dataset tagged as "{tag}"') tagged_xyz = tag_xyz_map.pop(tag) structures = AtomicStructure.read_xyz(tagged_xyz) # prepare descriptor dataset descriptor = DESCRIPTOR_DATASET[dc.descriptor](pc.order, structures, **dc.parameters) descriptor.make(verbose=self.verbose) ##Save descriptor (symmetory_function) descriptor_npz = pc.load_dir / f'{dc.descriptor}-pred.npz' descriptor.save(descriptor_npz, verbose=self.verbose) ##End of saving descriptor (symmetory_function) # prepare empty property dataset property_ = PROPERTY_DATASET[dc.property_](pc.order, structures) # construct test dataset from descriptor & property datasets dataset = HDNNPDataset(descriptor, property_) dataset.construct(all_elements=pc.elements, preprocesses=preprocesses, shuffle=False, verbose=self.verbose) datasets.append(dataset) dc.n_sample += dataset.total_size mc.n_input = dataset.n_input mc.n_output = dataset.n_label return datasets
def construct_test_datasets(self, tag_xyz_map, load_descriptor): dc = self.dataset_config mc = self.model_config tc = self.training_config preprocess_dir = tc.out_dir / 'preprocess' preprocesses = [] for (name, args, kwargs) in dc.preprocesses: preprocess = PREPROCESS[name](*args, **kwargs) preprocess.load(preprocess_dir / f'{preprocess.name}.npz', verbose=self.verbose) preprocesses.append(preprocess) datasets = [] for pattern in tc.tags: for tag in fnmatch.filter(tag_xyz_map, pattern): if self.verbose: pprint(f'Construct sub test dataset tagged as "{tag}"') tagged_xyz = tag_xyz_map.pop(tag) structures = AtomicStructure.read_xyz(tagged_xyz) # prepare descriptor dataset descriptor = DESCRIPTOR_DATASET[dc.descriptor]( self.loss_function.order['descriptor'], structures, **dc.parameters) descriptor_npz = tagged_xyz.with_name( f'{dc.descriptor}-test.npz') if load_descriptor: descriptor.load(descriptor_npz, verbose=self.verbose, remake=dc.remake) else: descriptor.make(verbose=self.verbose) descriptor.save(descriptor_npz, verbose=self.verbose) # prepare property dataset property_ = PROPERTY_DATASET[dc.property_]( self.loss_function.order['property'], structures) property_npz = tagged_xyz.with_name(f'{dc.property_}-test.npz') if property_npz.exists() and load_descriptor: property_.load(property_npz, verbose=self.verbose, remake=dc.remake) else: property_.make(verbose=self.verbose) property_.save(property_npz, verbose=self.verbose) # construct test dataset from descriptor & property datasets dataset = HDNNPDataset(descriptor, property_) dataset.construct(all_elements=tc.elements, preprocesses=preprocesses, shuffle=False, verbose=self.verbose) dataset.scatter() datasets.append(dataset) dc.n_sample += dataset.total_size mc.n_input = dataset.n_input mc.n_output = dataset.n_label return datasets