def process(self): from ase.io import read extract_tar(self.raw_paths[0], self.raw_dir, log=False) materials = read(osp.join(self.raw_dir, 'structures.xyz'), index=':') bandgaps = np.loadtxt(osp.join(self.raw_dir, 'bandgaps.csv')) data_list = [] for material, bandgap in zip(materials, bandgaps): pos = torch.from_numpy(material.get_positions()).to(torch.float) z = torch.from_numpy(material.get_atomic_numbers()).to(torch.int64) y = torch.tensor([float(bandgap)]) data_list.append(Data(z=z, pos=pos, y=y)) train_data = data_list[:10000] test_data = data_list[10000:] if self.pre_filter is not None: train_data = [d for d in train_data if self.pre_filter(d)] test_data = [d for d in test_data if self.pre_filter(d)] if self.pre_transform is not None: train_data = [self.pre_transform(d) for d in train_data] test_data = [self.pre_transform(d) for d in test_data] torch.save(self.collate(train_data), self.processed_paths[0]) torch.save(self.collate(test_data), self.processed_paths[1])
def download(self): url = self.processed_url if rdkit is None else self.raw_url file_path = download_url(url, self.raw_dir) print('------------') print(self.raw_dir) print('--------------') extract_tar(file_path, self.raw_dir) os.unlink(file_path)
def download(self): for name in self.available_datasets[self.name]: path = download_url(f'{self.url}/{name}', self.raw_dir) if name.endswith('.tar.gz'): extract_tar(path, self.raw_dir) elif name.endswith('.gz'): extract_gz(path, self.raw_dir) os.unlink(path)
def download(self): for name in self.available_datasets[self.name]: path = download_url('{}/{}'.format(self.url, name), self.raw_dir) print(path) if name.endswith('.tar.gz'): extract_tar(path, self.raw_dir) elif name.endswith('.gz'): extract_gz(path, self.raw_dir) os.unlink(path)
def download(self): folder = osp.join(self.root, self.name) for file in self.available_sets_df[self.name]: path = download_url('{}/{}'.format(self.url, file), folder) if file.endswith('.tar.gz'): extract_tar(path, folder) elif file.endswith('.gz'): extract_gz(path, folder) os.unlink(path) shutil.rmtree(self.raw_dir) os.rename(folder, self.raw_dir) print('Raw\n{}'.format(self.raw_dir))
def download(self): path = download_url(self.image_url, self.raw_dir) extract_tar(path, self.raw_dir, mode='r') os.unlink(path) image_path = osp.join(self.raw_dir, 'TrainVal', 'VOCdevkit', 'VOC2011') os.rename(image_path, osp.join(self.raw_dir, 'images')) shutil.rmtree(osp.join(self.raw_dir, 'TrainVal')) path = download_url(self.annotation_url, self.raw_dir) extract_tar(path, self.raw_dir) os.unlink(path) path = download_url(self.split_url, self.raw_dir) os.rename(path, osp.join(self.raw_dir, 'splits.npz'))
def download(self): # Download and extract the archive. filename = self.url.rpartition('/')[2] path = osp.join(self.raw_dir, filename) if osp.exists(path): # pragma: no cover print(f"Using existing file {filename}") else: # - We need to use the system 'wget' because urllib does not work # with Dropbox. print(f'Downloading {self.url}') os.system(f"wget {self.url} -O {path} -q") # - Extract the downloaded archive. extract_tar(path, self.raw_dir) # Delete the archive. os.unlink(path)
def download(self): path = download_url(self.url, self.root) extract_tar(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) os.rename(osp.join(self.root, 'nell_data'), self.raw_dir)
def download(self): path = download_url(self.url, self.raw_dir) extract_tar(path, self.raw_dir) os.unlink(path)
def download(self): archive = download_url(self.raw_url, self.raw_dir) extract_tar(archive, self.raw_dir) os.remove(archive)
def download(self): path = download_url(self.url.format(self.name), self.root) extract_tar(path, self.raw_dir) os.unlink(path)
def download(self): file_path = download_url(self.url, self.raw_dir) extract_tar(file_path, self.raw_dir, mode='r') os.unlink(file_path)