def _process(self): f = osp.join(self.processed_dir, 'pre_transform.pt') if osp.exists(f) and torch.load(f) != _repr(self.pre_transform): warnings.warn( f"The `pre_transform` argument differs from the one used in " f"the pre-processed version of this dataset. If you want to " f"make use of another pre-processing technique, make sure to " f"delete '{self.processed_dir}' first") f = osp.join(self.processed_dir, 'pre_filter.pt') if osp.exists(f) and torch.load(f) != _repr(self.pre_filter): warnings.warn( "The `pre_filter` argument differs from the one used in " "the pre-processed version of this dataset. If you want to " "make use of another pre-fitering technique, make sure to " "delete '{self.processed_dir}' first") if files_exist(self.processed_paths): # pragma: no cover return print('Processing...', file=sys.stderr) makedirs(self.processed_dir) self.process() path = osp.join(self.processed_dir, 'pre_transform.pt') torch.save(_repr(self.pre_transform), path) path = osp.join(self.processed_dir, 'pre_filter.pt') torch.save(_repr(self.pre_filter), path) print('Done!', file=sys.stderr)
def _process(self): if self.data_list is None: pass else: if osp.exists(osp.join(self.processed_dir, self.files)): return makedirs.makedirs(self.processed_dir) self.process()
def _download(self): if not osp.isdir(self.raw_dir): makedirs(self.raw_dir) self.download() elif len(os.listdir(self.raw_dir)) == 0: self.download() else: return
def class_from_module_repr(cls_name, module_repr): path = osp.join(gettempdir(), f'{getuser()}_pyg_jit') makedirs(path) with TempFile(mode='w+', suffix='.py', delete=False, dir=path) as f: f.write(module_repr) spec = spec_from_file_location(cls_name, f.name) mod = module_from_spec(spec) sys.modules[cls_name] = mod spec.loader.exec_module(mod) return getattr(mod, cls_name)
def dump_cfg(cfg): r""" Dumps the config to the output directory specified in :obj:`cfg.out_dir` Args: cfg (CfgNode): Configuration node """ makedirs(cfg.out_dir) cfg_file = os.path.join(cfg.out_dir, cfg.cfg_dest) with open(cfg_file, 'w') as f: cfg.dump(stream=f)
def _process(self): makedirs(self.processed_dir) if self.split == 'train' and len( glob.glob(osp.join(self.processed_dir, '*train.pt'))) > 0: return if self.split == 'test' and len( glob.glob(osp.join(self.processed_dir, '*test.pt'))) > 0: return if self.split == 'valid' and len( glob.glob(osp.join(self.processed_dir, '*valid.pt'))) > 0: return self.process()
def __init__(self, name='train', task_type=None): self.name = name self.task_type = task_type self._epoch_total = cfg.optim.max_epoch self._time_total = 0 # won't be reset self.out_dir = '{}/{}'.format(cfg.run_dir, name) makedirs(self.out_dir) if cfg.tensorboard_each_run: from tensorboardX import SummaryWriter self.tb_writer = SummaryWriter(self.out_dir) self.reset()
def set_printing(): """ Set up printing options """ logging.root.handlers = [] logging_cfg = {'level': logging.INFO, 'format': '%(message)s'} makedirs(cfg.run_dir) h_file = logging.FileHandler('{}/logging.log'.format(cfg.run_dir)) h_stdout = logging.StreamHandler(sys.stdout) if cfg.print == 'file': logging_cfg['handlers'] = [h_file] elif cfg.print == 'stdout': logging_cfg['handlers'] = [h_stdout] elif cfg.print == 'both': logging_cfg['handlers'] = [h_file, h_stdout] else: raise ValueError('Print option not supported') logging.basicConfig(**logging_cfg)
def _download(self): if osp.isdir(self.raw_dir) and len(os.listdir(self.raw_dir)) > 0: return makedirs(self.raw_dir) self.download()
def from_qm9_pretrained(cls, root: str, dataset: Dataset, target: int): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf assert target >= 0 and target <= 12 and not target == 4 root = osp.expanduser(osp.normpath(root)) path = osp.join(root, 'pretrained_dimenet_pp', qm9_target_dict[target]) makedirs(path) url = f'{cls.url}/{qm9_target_dict[target]}' if not osp.exists(osp.join(path, 'checkpoint')): download_url(f'{url}/checkpoint', path) download_url(f'{url}/ckpt.data-00000-of-00002', path) download_url(f'{url}/ckpt.data-00001-of-00002', path) download_url(f'{url}/ckpt.index', path) path = osp.join(path, 'ckpt') reader = tf.train.load_checkpoint(path) # Configuration from DimeNet++: # https://github.com/gasteigerjo/dimenet/blob/master/config_pp.yaml model = cls( hidden_channels=128, out_channels=1, num_blocks=4, int_emb_size=64, basis_emb_size=8, out_emb_channels=256, num_spherical=7, num_radial=6, cutoff=5.0, max_num_neighbors=32, envelope_exponent=5, num_before_skip=1, num_after_skip=2, num_output_layers=3, ) def copy_(src, name, transpose=False): init = reader.get_tensor(f'{name}/.ATTRIBUTES/VARIABLE_VALUE') init = torch.from_numpy(init) if name[-6:] == 'kernel': init = init.t() src.data.copy_(init) copy_(model.rbf.freq, 'rbf_layer/frequencies') copy_(model.emb.emb.weight, 'emb_block/embeddings') copy_(model.emb.lin_rbf.weight, 'emb_block/dense_rbf/kernel') copy_(model.emb.lin_rbf.bias, 'emb_block/dense_rbf/bias') copy_(model.emb.lin.weight, 'emb_block/dense/kernel') copy_(model.emb.lin.bias, 'emb_block/dense/bias') for i, block in enumerate(model.output_blocks): copy_(block.lin_rbf.weight, f'output_blocks/{i}/dense_rbf/kernel') copy_(block.lin_up.weight, f'output_blocks/{i}/up_projection/kernel') for j, lin in enumerate(block.lins): copy_(lin.weight, f'output_blocks/{i}/dense_layers/{j}/kernel') copy_(lin.bias, f'output_blocks/{i}/dense_layers/{j}/bias') copy_(block.lin.weight, f'output_blocks/{i}/dense_final/kernel') for i, block in enumerate(model.interaction_blocks): copy_(block.lin_rbf1.weight, f'int_blocks/{i}/dense_rbf1/kernel') copy_(block.lin_rbf2.weight, f'int_blocks/{i}/dense_rbf2/kernel') copy_(block.lin_sbf1.weight, f'int_blocks/{i}/dense_sbf1/kernel') copy_(block.lin_sbf2.weight, f'int_blocks/{i}/dense_sbf2/kernel') copy_(block.lin_ji.weight, f'int_blocks/{i}/dense_ji/kernel') copy_(block.lin_ji.bias, f'int_blocks/{i}/dense_ji/bias') copy_(block.lin_kj.weight, f'int_blocks/{i}/dense_kj/kernel') copy_(block.lin_kj.bias, f'int_blocks/{i}/dense_kj/bias') copy_(block.lin_down.weight, f'int_blocks/{i}/down_projection/kernel') copy_(block.lin_up.weight, f'int_blocks/{i}/up_projection/kernel') for j, layer in enumerate(block.layers_before_skip): copy_(layer.lin1.weight, f'int_blocks/{i}/layers_before_skip/{j}/dense_1/kernel') copy_(layer.lin1.bias, f'int_blocks/{i}/layers_before_skip/{j}/dense_1/bias') copy_(layer.lin2.weight, f'int_blocks/{i}/layers_before_skip/{j}/dense_2/kernel') copy_(layer.lin2.bias, f'int_blocks/{i}/layers_before_skip/{j}/dense_2/bias') copy_(block.lin.weight, f'int_blocks/{i}/final_before_skip/kernel') copy_(block.lin.bias, f'int_blocks/{i}/final_before_skip/bias') for j, layer in enumerate(block.layers_after_skip): copy_(layer.lin1.weight, f'int_blocks/{i}/layers_after_skip/{j}/dense_1/kernel') copy_(layer.lin1.bias, f'int_blocks/{i}/layers_after_skip/{j}/dense_1/bias') copy_(layer.lin2.weight, f'int_blocks/{i}/layers_after_skip/{j}/dense_2/kernel') copy_(layer.lin2.bias, f'int_blocks/{i}/layers_after_skip/{j}/dense_2/bias') random_state = np.random.RandomState(seed=42) perm = torch.from_numpy(random_state.permutation(np.arange(130831))) train_idx = perm[:110000] val_idx = perm[110000:120000] test_idx = perm[120000:] return model, (dataset[train_idx], dataset[val_idx], dataset[test_idx])
def from_qm9_pretrained(root, dataset, target): if tf is None: raise ImportError( '`DimeNet.from_qm9_pretrained` requires `tensorflow`.') assert target >= 0 and target <= 12 and not target == 4 root = osp.expanduser(osp.normpath(root)) path = osp.join(root, 'pretrained_dimenet', qm9_target_dict[target]) makedirs(path) url = f'{DimeNet.url}/{qm9_target_dict[target]}' if not osp.exists(osp.join(path, 'checkpoint')): download_url(f'{url}/checkpoint', path) download_url(f'{url}/ckpt.data-00000-of-00002', path) download_url(f'{url}/ckpt.data-00001-of-00002', path) download_url(f'{url}/ckpt.index', path) path = osp.join(path, 'ckpt') reader = tf.train.load_checkpoint(path) model = DimeNet(hidden_channels=128, out_channels=1, num_blocks=6, num_bilinear=8, num_spherical=7, num_radial=6, cutoff=5.0, envelope_exponent=5, num_before_skip=1, num_after_skip=2, num_output_layers=3) def copy_(src, name, transpose=False): init = reader.get_tensor(f'{name}/.ATTRIBUTES/VARIABLE_VALUE') init = torch.from_numpy(init) if name[-6:] == 'kernel': init = init.t() src.data.copy_(init) copy_(model.rbf.freq, 'rbf_layer/frequencies') copy_(model.emb.emb.weight, 'emb_block/embeddings') copy_(model.emb.lin_rbf.weight, 'emb_block/dense_rbf/kernel') copy_(model.emb.lin_rbf.bias, 'emb_block/dense_rbf/bias') copy_(model.emb.lin.weight, 'emb_block/dense/kernel') copy_(model.emb.lin.bias, 'emb_block/dense/bias') for i, block in enumerate(model.output_blocks): copy_(block.lin_rbf.weight, f'output_blocks/{i}/dense_rbf/kernel') for j, lin in enumerate(block.lins): copy_(lin.weight, f'output_blocks/{i}/dense_layers/{j}/kernel') copy_(lin.bias, f'output_blocks/{i}/dense_layers/{j}/bias') copy_(block.lin.weight, f'output_blocks/{i}/dense_final/kernel') for i, block in enumerate(model.interaction_blocks): copy_(block.lin_rbf.weight, f'int_blocks/{i}/dense_rbf/kernel') copy_(block.lin_sbf.weight, f'int_blocks/{i}/dense_sbf/kernel') copy_(block.lin_kj.weight, f'int_blocks/{i}/dense_kj/kernel') copy_(block.lin_kj.bias, f'int_blocks/{i}/dense_kj/bias') copy_(block.lin_ji.weight, f'int_blocks/{i}/dense_ji/kernel') copy_(block.lin_ji.bias, f'int_blocks/{i}/dense_ji/bias') copy_(block.W, f'int_blocks/{i}/bilinear') for j, layer in enumerate(block.layers_before_skip): copy_(layer.lin1.weight, f'int_blocks/{i}/layers_before_skip/{j}/dense_1/kernel') copy_(layer.lin1.bias, f'int_blocks/{i}/layers_before_skip/{j}/dense_1/bias') copy_(layer.lin2.weight, f'int_blocks/{i}/layers_before_skip/{j}/dense_2/kernel') copy_(layer.lin2.bias, f'int_blocks/{i}/layers_before_skip/{j}/dense_2/bias') copy_(block.lin.weight, f'int_blocks/{i}/final_before_skip/kernel') copy_(block.lin.bias, f'int_blocks/{i}/final_before_skip/bias') for j, layer in enumerate(block.layers_after_skip): copy_(layer.lin1.weight, f'int_blocks/{i}/layers_after_skip/{j}/dense_1/kernel') copy_(layer.lin1.bias, f'int_blocks/{i}/layers_after_skip/{j}/dense_1/bias') copy_(layer.lin2.weight, f'int_blocks/{i}/layers_after_skip/{j}/dense_2/kernel') copy_(layer.lin2.bias, f'int_blocks/{i}/layers_after_skip/{j}/dense_2/bias') # Use the same random seed as the official DimeNet` implementation. random_state = np.random.RandomState(seed=42) perm = torch.from_numpy(random_state.permutation(np.arange(130831))) train_idx = perm[:110000] val_idx = perm[110000:120000] test_idx = perm[120000:] return model, (dataset[train_idx], dataset[val_idx], dataset[test_idx])
def from_qm9_pretrained(root: str, dataset: Dataset, target: int): import ase import schnetpack as spk # noqa assert target >= 0 and target <= 12 units = [1] * 12 units[0] = ase.units.Debye units[1] = ase.units.Bohr**3 units[5] = ase.units.Bohr**2 root = osp.expanduser(osp.normpath(root)) makedirs(root) folder = 'trained_schnet_models' if not osp.exists(osp.join(root, folder)): path = download_url(SchNet.url, root) extract_zip(path, root) os.unlink(path) name = f'qm9_{qm9_target_dict[target]}' path = osp.join(root, 'trained_schnet_models', name, 'split.npz') split = np.load(path) train_idx = split['train_idx'] val_idx = split['val_idx'] test_idx = split['test_idx'] # Filter the splits to only contain characterized molecules. idx = dataset.data.idx assoc = idx.new_empty(idx.max().item() + 1) assoc[idx] = torch.arange(idx.size(0)) train_idx = assoc[train_idx[np.isin(train_idx, idx)]] val_idx = assoc[val_idx[np.isin(val_idx, idx)]] test_idx = assoc[test_idx[np.isin(test_idx, idx)]] path = osp.join(root, 'trained_schnet_models', name, 'best_model') with warnings.catch_warnings(): warnings.simplefilter('ignore') state = torch.load(path, map_location='cpu') net = SchNet(hidden_channels=128, num_filters=128, num_interactions=6, num_gaussians=50, cutoff=10.0, atomref=dataset.atomref(target)) net.embedding.weight = state.representation.embedding.weight for int1, int2 in zip(state.representation.interactions, net.interactions): int2.mlp[0].weight = int1.filter_network[0].weight int2.mlp[0].bias = int1.filter_network[0].bias int2.mlp[2].weight = int1.filter_network[1].weight int2.mlp[2].bias = int1.filter_network[1].bias int2.lin.weight = int1.dense.weight int2.lin.bias = int1.dense.bias int2.conv.lin1.weight = int1.cfconv.in2f.weight int2.conv.lin2.weight = int1.cfconv.f2out.weight int2.conv.lin2.bias = int1.cfconv.f2out.bias net.lin1.weight = state.output_modules[0].out_net[1].out_net[0].weight net.lin1.bias = state.output_modules[0].out_net[1].out_net[0].bias net.lin2.weight = state.output_modules[0].out_net[1].out_net[1].weight net.lin2.bias = state.output_modules[0].out_net[1].out_net[1].bias mean = state.output_modules[0].atom_pool.average net.readout = 'mean' if mean is True else 'add' dipole = state.output_modules[0].__class__.__name__ == 'DipoleMoment' net.dipole = dipole net.mean = state.output_modules[0].standardize.mean.item() net.std = state.output_modules[0].standardize.stddev.item() if state.output_modules[0].atomref is not None: net.atomref.weight = state.output_modules[0].atomref.weight else: net.atomref = None net.scale = 1. / units[target] return net, (dataset[train_idx], dataset[val_idx], dataset[test_idx])
def _download(self): if files_exist(self.raw_paths): # pragma: no cover return makedirs(self.raw_dir) self.download()
def test_to_superpixels(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) raw_folder = osp.join(root, 'MNIST', 'raw') processed_folder = osp.join(root, 'MNIST', 'processed') makedirs(raw_folder) makedirs(processed_folder) for resource in resources: path = download_url(resource, raw_folder) extract_gz(path, osp.join(root, raw_folder)) test_set = ( read_image_file(osp.join(raw_folder, 't10k-images-idx3-ubyte')), read_label_file(osp.join(raw_folder, 't10k-labels-idx1-ubyte')), ) torch.save(test_set, osp.join(processed_folder, 'training.pt')) torch.save(test_set, osp.join(processed_folder, 'test.pt')) dataset = MNIST(root, download=False) dataset.transform = T.Compose([T.ToTensor(), ToSLIC()]) data, y = dataset[0] assert len(data) == 2 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.pos.size(0) == data.x.size(0) assert y == 7 loader = DataLoader(dataset, batch_size=2, shuffle=False) for data, y in loader: assert len(data) == 4 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.batch.dim() == 1 assert data.ptr.dim() == 1 assert data.pos.size(0) == data.x.size(0) == data.batch.size(0) assert y.tolist() == [7, 2] break dataset.transform = T.Compose( [T.ToTensor(), ToSLIC(add_seg=True, add_img=True)]) data, y = dataset[0] assert len(data) == 4 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.pos.size(0) == data.x.size(0) assert data.seg.size() == (1, 28, 28) assert data.img.size() == (1, 1, 28, 28) assert data.seg.max().item() + 1 == data.x.size(0) assert y == 7 loader = DataLoader(dataset, batch_size=2, shuffle=False) for data, y in loader: assert len(data) == 6 assert data.pos.dim() == 2 and data.pos.size(1) == 2 assert data.x.dim() == 2 and data.x.size(1) == 1 assert data.batch.dim() == 1 assert data.ptr.dim() == 1 assert data.pos.size(0) == data.x.size(0) == data.batch.size(0) assert data.seg.size() == (2, 28, 28) assert data.img.size() == (2, 1, 28, 28) assert y.tolist() == [7, 2] break shutil.rmtree(root)