def __init__(self, SFBlock, data_loader=None, cuda=False): super(StandardizeSF, self).__init__() device = torch.device("cuda" if cuda else "cpu") self.n_symfuncs = SFBlock.n_symfuncs if data_loader is not None: symfunc_statistics = StatisticsAccumulator(batch=True, atomistic=True) SFBlock = SFBlock.to(device) for sample in data_loader: if cuda: sample = {k: v.to(device) for k, v in sample.items()} symfunc_values = SFBlock.forward(sample) symfunc_statistics.add_sample(symfunc_values.detach()) SF_mean, SF_stddev = symfunc_statistics.get_statistics() else: SF_mean = torch.zeros(self.n_symfuncs) SF_stddev = torch.ones(self.n_symfuncs) self.SFBlock = SFBlock self.standardize = snn.Standardize(SF_mean, SF_stddev)
# Make the loaders train_load = AtomsLoader(train_data, shuffle=True, num_workers=args.num_workers, batch_size=batch_size) valid_load = AtomsLoader(valid_data, num_workers=args.num_workers, batch_size=batch_size) logger.info('Made training set loader. Workers={}, Train Size={}, ' 'Validation Size={}, Batch Size={}'.format( args.num_workers, len(train_data), len(valid_data), batch_size)) # Get the baseline statistics atomref = None if os.path.isfile(os.path.join(schnet_dir, net_dir, 'atomref.npy')): atomref = np.load(os.path.join(schnet_dir, net_dir, 'atomref.npy')) if options.get('delta', None) is not None: # Compute the stats for the delta property delta_prop = options['delta'] statistic = StatisticsAccumulator(batch=True) for d in train_load: d['delta_temp'] = d[options['output_props'][0]] - d[delta_prop] train_load._update_statistic(True, atomref, 'delta_temp', d, statistic) mean, std = statistic.get_statistics() mean = (mean,) # Make them a tuple std = (std,) logger.info('Computed statistics for delta-learning model') else: if atomref is not None: mean, std = zip(*[train_load.get_statistics(x, per_atom=True, atomrefs=ar[:, None]) for x, ar in zip(options['output_props'], atomref.T)]) else: mean, std = zip(*[train_load.get_statistics(x, per_atom=True) for x in options['output_props']])