def process(self): extract_zip(self.raw_paths[0], self.raw_dir, log=False) dir_path = os.path.join(self.raw_dir, 'pseudomonas') smiles_idx = {} data_lists = {} for i, split in enumerate(self.splits): df = pd.read_csv(os.path.join(dir_path, split + '.csv')).drop('id', axis=1, errors='ignore').dropna(1) data_lists[split] = [] for row in df.itertuples(False, None): smiles_idx.setdefault(row[0], len(smiles_idx)) data = self._process_row(*row) if self.pre_transform is not None: data = self.pre_transform(data) if self.pre_filter is None or self.pre_filter(data): data_lists[split].append(data) data, slices = self.collate(data_lists[split]) torch.save((data, slices), self.processed_paths[i]) splits = [] for fold in range(10): idx = [] for split in ['train', 'dev', 'test']: df = pd.read_csv(os.path.join(dir_path, 'train_cv', f'fold_{fold}', split + '.csv')) idx.append(torch.LongTensor(df['smiles'].map(lambda s: smiles_idx[s]))) splits.append(tuple(idx)) torch.save(tuple(splits), self.processed_paths[-1])
def download(self): path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) name = 'shapenet' os.rename(osp.join(self.root, name), self.raw_dir)
def download(self): path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) folder = osp.join(self.root, "modelnet40_normal_resampled") shutil.rmtree(self.raw_dir) os.rename(folder, self.raw_dir)
def download(self): path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) name = self.url.split('/')[-1].split('.')[0] os.rename(osp.join(self.root, name), self.raw_dir)
def download(self): path = osp.join(self.root, 'raw.zip') gdd.download_file_from_google_drive(self.file_id, path) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) os.rename(osp.join(self.root, 'DBP15K'), self.raw_dir)
def process(self): folders = sorted(glob(osp.join(self.raw_dir, 'FaceTalk_*'))) if len(folders) == 0: extract_zip(self.raw_paths[0], self.raw_dir, log=False) folders = sorted(glob(osp.join(self.raw_dir, 'FaceTalk_*'))) train_data_list, test_data_list = [], [] for folder in folders: for i, category in enumerate(self.categories): files = sorted(glob(osp.join(folder, category, '*.ply'))) for j, f in enumerate(files): data = read_ply(f) data.y = torch.tensor([i], dtype=torch.long) if self.pre_filter is not None and\ not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) if (j % 100) < 90: train_data_list.append(data) else: test_data_list.append(data) torch.save(self.collate(train_data_list), self.processed_paths[0]) torch.save(self.collate(test_data_list), self.processed_paths[1])
def download(self): path = download_url(self.urls[self.name], self.root) extract_zip(path, self.root) os.unlink(path) folder = osp.join(self.root, 'ModelNet{}'.format(self.name)) shutil.rmtree(self.raw_dir) os.rename(folder, self.raw_dir)
def download(self): raw_folders = os.listdir(self.raw_dir) if len(raw_folders) == 0: if not os.path.exists(osp.join(self.root, self.zip_name)): log.info("WARNING: You are downloading S3DIS dataset") log.info( "Please, register yourself by filling up the form at {}". format(self.form_url)) log.info("***") log.info( "Press any key to continue, or CTRL-C to exit. By continuing, you confirm filling up the form." ) input("") gdown.download(self.download_url, osp.join(self.root, self.zip_name), quiet=False) extract_zip(os.path.join(self.root, self.zip_name), self.root) shutil.rmtree(self.raw_dir) os.rename(osp.join(self.root, self.file_name), self.raw_dir) shutil.copy(self.path_file, self.raw_dir) cmd = "patch -ruN -p0 -d {} < {}".format( self.raw_dir, osp.join(self.raw_dir, "s3dis.patch")) os.system(cmd) else: intersection = len( set(self.folders).intersection(set(raw_folders))) if intersection != 6: shutil.rmtree(self.raw_dir) os.makedirs(self.raw_dir) self.download()
def process(self): extract_zip(self.raw_paths[0], self.raw_dir, log=True) path = os.path.join(self.raw_dir, 'data_5_all') files = [os.path.join(path, f) for f in os.listdir(path)] N = len(files) x = np.load(files[0]) data_shape = x['data'].T.shape label_shape = x['labels'].T.shape with h5py.File(os.path.join(self.processed_paths[0]), 'w') as hf: dset = hf.create_dataset("data", shape=(N,) + data_shape, chunks=(1,) + data_shape, dtype='f4', compression='gzip') lset = hf.create_dataset("labels", shape=(N,) + label_shape, chunks=(1,) + label_shape, dtype='f4', compression='gzip') i = 0 for f in tqdm(files): x = np.load(f) dset[i] = np.ascontiguousarray(x['data'].T) lset[i] = np.ascontiguousarray(x['labels'].T) i += 1 shutil.rmtree(path)
def process(self): print('Processing...') fps = glob(osp.join(self.raw_dir, '*/*/*.ply')) if len(fps) == 0: extract_zip(self.raw_paths[0], self.raw_dir, log=False) fps = glob(osp.join(self.raw_dir, '*/*/*.ply')) print("Number of meshes: {}".format(len(fps))) train_data_list, test_data_list = [], [] for idx, fp in enumerate(tqdm(fps)): #if idx % 2 == 0: data = read_mesh(fp) if self.pre_transform is not None: data = self.pre_transform(data) if self.split == 'interpolation': if (idx % 100) < 10: test_data_list.append(data) else: train_data_list.append(data) elif self.split == 'extrapolation': if fp.split('/')[-2] == self.test_exp: test_data_list.append(data) else: train_data_list.append(data) else: raise RuntimeError(( 'Expected the split of interpolation or extrapolation, but' ' found {}').format(self.split)) torch.save(self.collate(train_data_list), self.processed_paths[0]) torch.save(self.collate(test_data_list), self.processed_paths[1])
def download(self): # we download the raw RGBD file for the train and the validation data folder = osp.join(self.raw_dir, self.mode) log.info("Download elements in the file {}...".format(folder)) for url in self.dict_urls[self.mode]: path = download_url(url, folder, self.verbose) extract_zip(path, folder, self.verbose) os.unlink(path)
def download(self): path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) os.unlink(osp.join(self.root, 'README')) os.unlink(osp.join(self.root, 'demo_showAnno.m')) shutil.rmtree(self.raw_dir) os.rename(osp.join(self.root, 'WILLOW-ObjectClass'), self.raw_dir)
def download(self): if self.name == 'ml-100k': url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip' path = download_url(url, self.root) extract_zip(path=path, folder=self.raw_dir, log=self.name) os.unlink(path) for file in glob.glob(os.path.join(self.raw_dir, self.name, '*')): shutil.move(file, self.raw_dir)
def download(self): # Prepare raw data directory. shutil.rmtree(self.raw_dir) # Download and extract the dataset. path = download_url(self.dataset_url, self.raw_dir) extract_zip(path, self.raw_dir) os.unlink(path)
def download(self): if self.is_test: return path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) name = self.url.split("/")[-1].split(".")[0] os.rename(osp.join(self.root, name), self.raw_dir)
def download(self): url = self.cleaned_url if self.cleaned else self.url folder = osp.join(self.root, self.name) path = download_url('{}/{}.zip'.format(url, self.name), folder) extract_zip(path, folder) os.unlink(path) shutil.rmtree(self.raw_dir) os.rename(osp.join(folder, self.name), self.raw_dir)
def download(self): path = download_url( 'https://www.dropbox.com/s/s3n05sw0zg27fz3/human_seg.tar.gz', self.root) extract_zip(path, self.root) os.unlink(path) folder = osp.join(self.root, 'human_seg') os.rename(folder, self.raw_dir)
def download(self): file_path = download_url(self.raw_url, self.raw_dir) extract_zip(file_path, self.raw_dir) os.unlink(file_path) file_path = download_url(self.raw_url2, self.raw_dir) os.rename(osp.join(self.raw_dir, '3195404'), osp.join(self.raw_dir, 'uncharacterized.txt'))
def download(self): shutil.rmtree(self.raw_dir) path = download_url(self.url, self.root) extract_zip(path, self.root) os.rename(osp.join(self.root, 'net_aminer'), self.raw_dir) os.unlink(path) path = download_url(self.y_url, self.raw_dir) extract_zip(path, self.raw_dir) os.unlink(path)
def download(self): shutil.rmtree(self.raw_dir) path = download_url(self.url, self.root) extract_zip(path, self.root) os.rename(osp.join(self.root, 'molecules'), self.raw_dir) os.unlink(path) for split in ['train', 'val', 'test']: download_url(self.split_url.format(split), self.raw_dir)
def download(raw_path): """Downloads the raw data from URL :param raw_path: download path :type raw_path: str """ path = download_url(URL, raw_path) extract_zip(path, raw_path) os.unlink(path)
def download(raw_path): """Downloads the raw data from URL :param raw_path: download path :type raw_path: str """ path = download_url(URL, raw_path) extract_zip(path, raw_path) os.unlink(path) os.rename(f'{raw_path}/ml-10M100K', f'{raw_path}/ml-10m')
def download(self): for name in self.available_datasets[self.name]: path = download_url('{}/{}'.format(self.url, name), self.raw_dir) if name.endswith('.tar.gz'): extract_tar(path, self.raw_dir) elif name.endswith('.gz'): extract_gz(path, self.raw_dir) elif name.endswith('.zip'): extract_zip(path, self.raw_dir) os.unlink(path)
def download(self): path = download_url(self.url, self.raw_dir) extract_zip(path, self.raw_dir) for f in ['node-feat', 'node-label', 'relations']: shutil.move(osp.join(self.raw_dir, 'mag', 'raw', f), self.raw_dir) shutil.move(osp.join(self.raw_dir, 'mag', 'split'), self.raw_dir) shutil.move( osp.join(self.raw_dir, 'mag', 'raw', 'num-node-dict.csv.gz'), self.raw_dir) shutil.rmtree(osp.join(self.raw_dir, 'mag')) os.remove(osp.join(self.raw_dir, 'mag.zip'))
def download(self): url = self.url.format(self.datasets[self.name]) path = download_url(url, self.raw_dir) extract_zip(path, self.raw_dir) os.unlink(path) path = osp.join(self.raw_dir, f'{self.name}.attr') if self.name == 'mag': path = osp.join(self.raw_dir, self.name) for name in self.raw_file_names: os.rename(osp.join(path, name), osp.join(self.raw_dir, name)) shutil.rmtree(path)
def download(self): # we download the raw RGBD file for the train and the validation data folder = osp.join(self.raw_dir, self.mode) if files_exist([folder]): # pragma: no cover log.warning("already downloaded {}".format(self.mode)) return log.info("Download elements in the file {}...".format(folder)) for url in self.dict_urls[self.mode]: path = download_url(url, folder, self.verbose) extract_zip(path, folder, self.verbose) os.unlink(path)
def download(self): path = osp.join(self.raw_dir, self.dataset) if not osp.exists(path): raise FileExistsError( 'PartNet can only downloaded via application. ' 'See details in https://cs.stanford.edu/~kaichun/partnet/') # path = download_url(self.url, self.root) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) name = self.url.split(os.sep)[-1].split('.')[0] os.rename(osp.join(self.root, name), self.raw_dir)
def download(self): from ogb.utils.url import decide_download, download_url, extract_zip url = self.meta_info['url'] if decide_download(url): path = download_url(url, self.original_root) extract_zip(path, self.original_root) print(f'Removing {path}') os.unlink(path) print(f'Removing {self.root}') shutil.rmtree(self.root) print(f'Moving {osp.join(self.original_root, self.download_name)} to {self.root}') shutil.move(osp.join(self.original_root, self.download_name), self.root)
def download(self): from google_drive_downloader import GoogleDriveDownloader as gdd path = osp.join(self.raw_dir, f'{self.name}.zip') gdd.download_file_from_google_drive(self.datasets[self.name], path) extract_zip(path, self.raw_dir) os.unlink(path) path = osp.join(self.raw_dir, f'{self.name}.attr') if self.name == 'mag': path = osp.join(self.raw_dir, self.name) for name in self.raw_file_names: os.rename(osp.join(path, name), osp.join(self.raw_dir, name)) shutil.rmtree(path)
def download(self): path = download_url(self.urls[self.name], self.root) extract_zip(path, self.root) os.unlink(path) folder = osp.join(self.root, f'ModelNet{self.name}') shutil.rmtree(self.raw_dir) os.rename(folder, self.raw_dir) # Delete osx metadata generated during compression of ModelNet10 metadata_folder = osp.join(self.root, '__MACOSX') if osp.exists(metadata_folder): shutil.rmtree(metadata_folder)