def load_pretrained_model(self): ckpt_path = join('model', 'resnet50se_drop.pth') file_id = '1g_ZxNlH_WKmeGuplhFtKokKblxsJU0e4' if not os.path.isfile(ckpt_path): gdd.download_file_from_google_drive(file_id=file_id, dest_path=ckpt_path) self.model.load_state_dict(torch.load(ckpt_path))
def download_re3_model(): print( "Downloading the re3 model.\nModel size is about 700mb so downloading might take a while." ) dest_folder = './algorithms/re3/logs/' file = './algorithms/re3/logs/checkpoints.tar.gz' google_drive_id = '1mvxA9r9K1sydBEfVWk090f4Pdhg15YQD' gdd.download_file_from_google_drive(file_id=google_drive_id, dest_path=file, unzip=False) print("done downloading, unzipping model") tar = tarfile.open(file, "r:gz") tar.extractall(path=dest_folder) tar.close() print("done unzipping, deleting model checkpoints.tar.gz") if os.path.exists(file): os.remove(file) print("Successfully deleted checkpoints.tar.gz") else: print("Could not find zip to delete")
def download_AIHub_GoogleDrive(): #Download AIHUB OCR data from Google Drive handwritten_file_id_1 = '13GCWsztfD00mHxKGNVO_c6uxS_9J_JOY' handwritten_file_id_2 = '1N2dTwZ8TgYRFBeNDKgjxjDHqULk_JX6X' handwritten_label_id = '1rX979OhUHCKSYRbBPaMIHtFQa0eVdSXt' printed_file_id_1 = '1MNYnv4aO0kWaDigb9iEcIdpxO_pF2s-m' printed_label_id = '1ibZrGauMoM1E9Bx2fMGtiJEqQ6nh8Qy8' idlist_file = [[handwritten_file_id_1, 'handwritten-1'], [handwritten_file_id_2, 'handwritten-2'], [printed_file_id_1, 'printed-1']] idlist_label = [[handwritten_label_id, 'handwritten_label'], [printed_label_id, 'printed_label']] if os.path.isdir(args.AIHub_path) == False: os.mkdir(args.AIHub_path) print("Downloading AIHUB OCR from Google Drive...") for file_id in idlist_file: zip_dest_path = os.path.join(args.AIHub_path, f'{file_id[1]}.zip') gdd.download_file_from_google_drive(file_id=file_id[0], dest_path=zip_dest_path, unzip=True) os.remove(zip_dest_path) for file_id in idlist_label: json_dest_path = os.path.join(args.AIHub_path, f'{file_id[1]}.json') gdd.download_file_from_google_drive(file_id=file_id[0], dest_path=json_dest_path) print("Download complete")
def download_pretrained_model(): model_dir = './pretrained_model.zip' if not os.path.exists(model_dir): gdd.download_file_from_google_drive( file_id='18cui9MlfrH297ezj9jJoxvRtOJTUmWWY', dest_path=model_dir, unzip=True)
def download_and_open_fold(fold=1): """ Args: fold (int): Fold to download Returns: fpaths_dict (dict): dictionary of `train` and `val` filepaths """ fold = str(int(fold)) all_folds = list(FOLDS.keys()) if not fold in all_folds: raise Exception("`fold` must be one of {0}".format(all_folds)) # downloading the fold .json fold_fname = "fold{0}_901000.json".format(fold) f_id = FOLDS[fold] fpath = os.path.join(os.getcwd(), fold_fname) # no need to be redundant and download it again if the .json exists if not os.path.exists(fpath): gdd.download_file_from_google_drive(file_id=f_id, dest_path=fpath, overwrite=False, unzip=False) print("Loading from json...") with open(fold_fname, "r") as fp: fpaths_dict = json.load(fp) return fpaths_dict
def download_and_uncompress_dataset(dataset_dir: str): """Downloads GoogleNews word2vec model, uncompresses it locally. Parameters ---------- dataset_dir : str The directory where the dataset is stored. """ pathlib.Path(dataset_dir).mkdir(parents=True, exist_ok=True) filename_word2vec = FILE_NAME_WORD2VEC + '.gz' filepath_word2vec = os.path.join(dataset_dir, filename_word2vec) print() print("Downloading pretrained word2vec from Google Drive file id", FILE_ID) print("Downloading to", filepath_word2vec) gdd.download_file_from_google_drive(file_id=FILE_ID, dest_path=filepath_word2vec, unzip=False) statinfo = os.stat(filepath_word2vec) print() print('Successfully downloaded', filename_word2vec, statinfo.st_size, 'bytes.') print('Uncompressing...') with gzip.open(filepath_word2vec, 'rb') as f_in: with open(".".join(filepath_word2vec.split('.')[:-1]), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) print('Successfully downloaded and extracted word2vec')
def check_models_exist(): # Download main csv file first gd.download_file_from_google_drive( file_id='1FfifIKiBKy5nfTewrovsq8lzsSVrr6F_', dest_path='./meta/chefkoch.csv') files_id = [ '1LlIKnuQka6uNyalozbuTlSSCEuRmyRam', '19nvL3R2WpLlqIrh2KFoooWl4aQ48vWL2', '1SIp3bkGiRCQks9HTEqkqZXYTEO05IniR', '1qGqMfRuI1rsuUPH6vxPigy0yT9YL1ime' ] files_name = [ 'inceptionv3_4_new_ohne_dpot_2.97270.hdf5', 'sklearn_ipca_object.p', 'image_features_pca_nmslib_index.bin', 'image_paths.hdf5' ] def download_all_models(): for file_id, file_name in zip(files_id, files_name): gd.download_file_from_google_drive(file_id=file_id, dest_path='./models/' + file_name) d = pathlib.Path('./models') if not d.exists(): d.mkdir(parents=True, exist_ok=True) download_all_models() else: for file_id, file_name in zip(files_id, files_name): m = pathlib.Path('./models/' + file_name) if not m.exists(): gd.download_file_from_google_drive(file_id=file_id, dest_path='./models/' + file_name)
def main(): FLAGS = PARSER.parse_args() if not os.path.exists(FLAGS.data_dir): os.makedirs(FLAGS.data_dir) filename = '' if FLAGS.dataset == 'hippocampus': filename = 'Task04_Hippocampus.tar' gdd.download_file_from_google_drive( file_id='1RzPB1_bqzQhlWvU-YGvZzhx2omcDh38C', dest_path=os.path.join(FLAGS.data_dir, filename), unzip=False) print('Unpacking...') tf = tarfile.open(os.path.join(FLAGS.data_dir, filename)) tf.extractall(path=FLAGS.data_dir) print('Cleaning up...') os.remove(os.path.join(FLAGS.data_dir, filename)) print("Finished downloading files for V-Net medical to {}".format( FLAGS.data_dir))
def loadBathysphere(): print("loadBathysphere") # etopo1_bedrock_-80_-35_10_45.nc units = 'meters' googleIdBathysphere = '10VqbV2oNUVcvS6lLP3FekVlFM4LUJj5o' # Extracted from share url tmpBathysphere = tempfile.NamedTemporaryFile (suffix = '.nc', \ prefix = 'tempBathysphere', \ delete = True) # Need file name but not the file or gdd fails tmpBathysphere.close() # Download the file from url and save it locally gdd.download_file_from_google_drive(file_id=googleIdBathysphere, dest_path=tmpBathysphere.name) with netcdf.netcdf_file(tmpBathysphere.name, 'r', mmap=False) as f: loncdf = f.variables['lon'] latcdf = f.variables['lat'] elecdf = f.variables['Band1'] crscdf = f.variables[ 'crs'] # Do not know what this array contains, other than 1 character strings # Transpose lat/lon to lon/lat ele = np.transpose(elecdf.data) # Create an interpolator. This is a regular grid so we use a regular grid interpolator that # exploits the regularity to achieve the most efficient search return units, RegularGridInterpolator((loncdf.data, latcdf.data), ele)
def load_pretrained_weights(model, modeldir, exp_str): # best checkpoint model name model_exp_dir = os.path.join(modeldir, exp_str) best_ckpt_model = os.path.join(model_exp_dir, 'checkpoint_best.pt') # check if the model exists if os.path.exists(best_ckpt_model): model.load_state_dict(torch.load(best_ckpt_model, map_location=lambda storage, loc: storage)) print('Loaded pre-trained weights: {}'.format(best_ckpt_model)) else: found = False print('Pre-trained weights not found. Attempting to download.') for pretrained_weight in pretrained_weights: for key in pretrained_weight.keys(): if key == 'exp': if exp_str == pretrained_weight[key]: os.system('mkdir -p {}'.format(model_exp_dir)) GoogleDriveDownloader.download_file_from_google_drive(file_id=pretrained_weight['file_id'], dest_path=os.path.join(model_exp_dir, 'checkpoint_best.pt'), unzip=False, showsize=True) model.load_state_dict(torch.load(best_ckpt_model, map_location=lambda storage, loc: storage)) print('Loaded pre-trained weights: {}'.format(best_ckpt_model)) found = True if found == False: print('Unable to find pretrained weights with this experiment configuration') raise Exception('Pre-trained weights not found.') return model
def load_items(): save_dest = Path('model') save_dest.mkdir(exist_ok=True) model_checkpoint = Path('model/XGB ssEPE model V4.pkl') feature_checkpoint = Path('model/Features.pkl') explainer_checkpoint = Path('model/explainer.pkl') shap_checkpoint = Path('model/model shap.pkl') # download from Google Drive if model or features are not present if not model_checkpoint.exists(): with st.spinner("Downloading model... this may take awhile! \n Don't stop it!"): gdd.download_file_from_google_drive(Model_location, model_checkpoint) if not feature_checkpoint.exists(): with st.spinner("Downloading model... this may take awhile! \n Don't stop it!"): gdd.download_file_from_google_drive(Feature_location, feature_checkpoint) model = joblib.load(model_checkpoint) features = joblib.load(feature_checkpoint) if not explainer_checkpoint.exists(): explainer = shap.TreeExplainer(model, np.array(features), model_output='probability') joblib.dump(explainer, explainer_checkpoint) explainer2 = joblib.load(explainer_checkpoint) if not shap_checkpoint.exists(): model_shap = explainer2(features) joblib.dump(model_shap, shap_checkpoint) model_shap2 = joblib.load(shap_checkpoint) return model, features, explainer2, model_shap2
def get_tasks(self): if os.path.exists('./data/FIGR-8') is False: if os.path.exists('./data') is False: os.mkdir('./data') os.mkdir('./data/FIGR-8') from google_drive_downloader import GoogleDriveDownloader as gdd gdd.download_file_from_google_drive( file_id='10dF30Qqi9RdIUmET9fBhyeRN0hJmq7pO', dest_path='./data/FIGR-8/Data.zip') import zipfile with zipfile.ZipFile('./data/FIGR-8/Data.zip', 'r') as zip_f: zip_f.extractall('./data/FIGR-8/') os.remove('./data/FIGR-8/Data.zip') tasks = dict() path = './data/FIGR-8/Data' for task in os.listdir(path): tasks[task] = [] task_path = os.path.join(path, task) for imgs in os.listdir(task_path): img = Image.open(os.path.join(task_path, imgs)) tasks[task].append(np.array(self.to_tensor(self.resize(img)))) tasks[task] = np.array(tasks[task]) return tasks
def load_snap_patents_mat(nclass=5): if not path.exists(f'{DATAPATH}snap_patents.mat'): p = dataset_drive_url['snap-patents'] print(f"Snap patents url: {p}") gdd.download_file_from_google_drive( file_id= dataset_drive_url['snap-patents'], \ dest_path=f'{DATAPATH}snap_patents.mat', showsize=True) fulldata = scipy.io.loadmat(f'{DATAPATH}snap_patents.mat') dataset = NCDataset('snap_patents') edge_index = torch.tensor(fulldata['edge_index'], dtype=torch.long) node_feat = torch.tensor(fulldata['node_feat'].todense(), dtype=torch.float) num_nodes = int(fulldata['num_nodes']) dataset.graph = { 'edge_index': edge_index, 'edge_feat': None, 'node_feat': node_feat, 'num_nodes': num_nodes } years = fulldata['years'].flatten() label = even_quantile_labels(years, nclass, verbose=False) dataset.label = torch.tensor(label, dtype=torch.long) return dataset
def test_download_ephys_data(): MORPH_DATA.mkdir(parents=True, exist_ok=True) dest_path = MORPH_DATA / 'B1096_cat_P04_S02_1.kwik' gdd.download_file_from_google_drive( file_id='12bp8fHCC51PWOiX8QxziY7oM7sOxQetA', dest_path=dest_path.as_posix()) assert dest_path.exists()
def load_pickle_file(gdriveurl, filename): pathname = os.path.join(data_dir, filename) if not os.path.isfile(pathname): gdd.download_file_from_google_drive(file_id=gdriveurl, dest_path=pathname) with open(pathname, 'rb') as f: return pickle.load(f)
def download_wavs(): morphs.paths.WAV_ZIP.parent.mkdir(parents=True, exist_ok=True) gdd.download_file_from_google_drive( file_id="1cHZsDqxiiM1uXJM6Yt7VJmaBMCb_PsgJ", dest_path=morphs.paths.WAV_ZIP.as_posix(), unzip=True, )
def create(input=None): return_model = input is None if input is None: input = tf.keras.layers.Input((None, None, 3)) x = input x = resnet.resnet_v1_101(x, stem="b", dilate=[False, False, False, True], config=config) x_skip = tfcv.model.graph.get_unique(x, pred=lambda layer: layer.name.endswith("block1")) x_skip = conv_norm_act(x_skip, filters=48, kernel_size=1, stride=1, name="shortcut", config=config) x = aspp.aspp(x, filters=256, atrous_rates=[6, 12, 18], config=config) x = conv_norm_act(x, filters=256, kernel_size=1, stride=1, name=join("aspp", "final"), config=config) x = tf.keras.layers.Dropout(0.1)(x) x = resize(x, tf.shape(x_skip)[1:-1], method="bilinear", config=config) x = tf.concat([x_skip, x], axis=-1) # TODO: shortcut with resize x = conv_norm_act(x, filters=256, kernel_size=3, stride=1, name="final", config=config) x = decode.decode(x, 19, shape=tf.shape(input)[1:-1], config=config) x = tf.keras.layers.Softmax()(x) model = tf.keras.Model(inputs=[input], outputs=[x]) # TODO: weight initialization from: # https://github.com/VainF/DeepLabV3Plus-Pytorch download_file = os.path.join(os.path.expanduser("~"), ".keras", "best_deeplabv3plus_resnet101_cityscapes_os16.pth") gdd.download_file_from_google_drive(file_id="1t7TC8mxQaFECt4jutdq_NMnWxdm6B-Nb", dest_path=download_file) tfcv.model.pretrained.weights.load_pth(download_file, model, convert_name) return model if return_model else x
def setup_multidim_target(): """ Same as SOURCE data but reconstructed using standard ReconstructOrder pipeline :return: str path to target .tif files """ temp_folder = os.getcwd() + '/temp' if not os.path.isdir(temp_folder): os.mkdir(temp_folder) print("\nsetting up temp folder") if not os.path.isdir(temp_folder + '/target'): os.mkdir(temp_folder + '/target') # DO NOT ADJUST THESE VALUES bulk_file = '1aVIwx-qADT0adMv7XWlLHvXxgfjGW_V-' output = temp_folder + '/target' + '/target_zip.zip' gdd.download_file_from_google_drive(file_id=bulk_file, dest_path=output, unzip=True, showsize=True, overwrite=True) yield temp_folder + '/target'
def get_tasks(self): if os.path.exists('./data/FIGR-8') is False: if os.path.exists('./data') is False: os.mkdir('./data') os.mkdir('./data/FIGR-8') from google_drive_downloader import GoogleDriveDownloader as gdd gdd.download_file_from_google_drive(file_id='10dF30Qqi9RdIUmET9fBhyeRN0hJmq7pO', dest_path='./data/FIGR-8/Data.zip') import zipfile with zipfile.ZipFile('./data/FIGR-8/Data.zip', 'r') as zip_f: zip_f.extractall('./data/FIGR-8/') os.remove('./data/FIGR-8/Data.zip') tasks = dict() # path = './data/FIGR-8/Data' path = '/media/user/05e85ab6-e43e-4f2a-bc7b-fad887cfe312/meta_gan/Matching-network-GAN/datasets/FIGR-8' for task in os.listdir(path): tasks[task] = [] task_path = os.path.join(path, task) for imgs in os.listdir(task_path): img = Image.open(os.path.join(task_path, imgs)) tasks[task].append(np.array(self.to_tensor(self.resize(img)))) if len(tasks[task])<4: print(task) tasks[task] = np.array(tasks[task]) return tasks
def download_model(model_url=model_url, model_dir="../model"): model_name = "finetuned_token_cls_model" if not os.path.exists(model_dir): os.mkdir(model_dir) path_name = "/".join([model_dir, model_name]) gdd.download_file_from_google_drive(file_id=model_url, dest_path=path_name)
def data_download(dest_path: str, category: str): """ Data doanload using google_drive_downloader https://github.com/ndrplz/google-drive-downloader Argument -------- dest_path: str directory where to save file. ex) ../data category: str category for downloading data """ try: print('Start Download') # make destination path to save data if not os.path.isdir(dest_path): os.makedirs(dest_path) gdd.download_file_from_google_drive(file_id='1EKYU6nL0vRs-7sV7g0E_4OJVRlY7LLYC', dest_path=os.path.join(dest_path,f'{category}.json'), unzip=False, overwrite=True) print('End Download') except Exception as e: print(e)
def download(self): path = osp.join(self.root, 'raw.zip') gdd.download_file_from_google_drive(self.file_id, path) extract_zip(path, self.root) os.unlink(path) shutil.rmtree(self.raw_dir) os.rename(osp.join(self.root, 'DBP15K'), self.raw_dir)
def fetch_covid_phl_data(): try: os.remove(COVID_PHL_CSV_PATH) except: print("File not found, unable to delete.") gdd.download_file_from_google_drive(file_id=DATA_URL, dest_path=COVID_PHL_CSV_PATH)
def download(self): from google_drive_downloader import GoogleDriveDownloader as gdd gdd.download_file_from_google_drive( self.ids[self.name], osp.join(self.raw_dir, f'{self.name}.zip'), unzip=True) os.remove(osp.join(self.raw_dir, f'{self.name}.zip'))
def download_zip_folder_from_google_drive(file_id: str, destination: os.PathLike, show_size: bool = False, skip_if_exists: bool = True): """Download and extract a ZIP file from Google Drive. Args: file_id (str): the Google Drive file ID destination (os.PathLike): the destination folder show_size (bool, optional): whether to display a progress bar. Defaults to False. skip_if_exists (bool, optional): if true, will do nothing when the destination path exists already. Defaults to True. """ destination = URI(destination) if skip_if_exists and destination.exists(): logger.info( f"Not downloading {file_id} to {destination} again because it already exists" ) return with tempfile.TemporaryDirectory() as tmp_dir: zip_file = Path(tmp_dir) / f"{destination.name}.zip" logger.info(f"Downloading {file_id} to {zip_file}") gdd.download_file_from_google_drive(file_id=file_id, dest_path=zip_file, overwrite=True, showsize=show_size) logger.info(f"Unzipping {zip_file} to {destination}") shutil.rmtree(destination, ignore_errors=True) with zipfile.ZipFile(zip_file, "r") as f: f.extractall(destination, _get_members(f)) logger.info(f"Finished downloading {file_id} to {destination}")
def _prepare_feature_extraction(): word_embedding_file = '../src/features/glove.6B.50d.txt' paragraph_vector_file = '../src/features/par_vec_trained_400.pkl.docvecs.vectors_docs.npy' print( 'Preparing feature extraction by downloading 3 files: \n {} and \n {}.' .format(word_embedding_file, paragraph_vector_file)) if not os.path.exists(word_embedding_file): print('Downloading GloVe word embedding vectors.') file_name = word_embedding_file gd.download_file_from_google_drive( file_id='1kayd5oNRQm8-NCvA8pIrtezbQ-B1_Vmk', dest_path=file_name, unzip=False, showsize=True) print('GloVe word embedding vectors were downloaded.') if not os.path.exists(paragraph_vector_file): print('Downloading pretrained paragraph vectors.') file_name = paragraph_vector_file gd.download_file_from_google_drive( file_id='1vdyGJ4aB71FCaNqJKYX387eVufcH4SAu', dest_path=file_name, unzip=False, showsize=True) print('Trained paragraph vector model was downloaded.')
def generate(file): model_path = './VAE_CPU_1499_20200922095734.pt' with st.spinner('Reconstructing the input image...'): if file is not None: if not os.path.exists(model_path): gdd.download_file_from_google_drive( file_id='1AX0BRIYN5ty-nDwBWi7NE3XbB2QT0ucl', dest_path='./VAE_CPU_1499_20200922095734.pt', unzip=False) pil_img = Image.open(file).convert('RGB') img = TRANSFORMS(pil_img) img.unsqueeze_(0) VAE_Model = torch.jit.load('./VAE_CPU_1499_20200922095734.pt') with torch.no_grad(): reconstructed_img, mu, logvar = VAE_Model(img) display_img = reconstructed_img.squeeze(0) display_img = cv2.cvtColor(np.float32(display_img.permute(1, 2, 0)), cv2.COLOR_RGB2BGR) display_img = display_img[:, :, ::-1] st.image([pil_img, display_img], caption=['Input Img', 'Reconstructed Img'], width=150)
def download_captions(GDRIVE_ID: str, text_download_location: str, backup_location: str, res_subdir: str): """ The Download and processing for the captions / text part of the dataset """ extracted_text_dir = text_download_location[:-4] if os.path.exists(backup_location): print("Retrieving dataset from: {}".format(backup_location)) shutil.copy(backup_location, text_download_location) with zipfile.ZipFile(backup_location, "r") as zipfd: zipfd.extractall("data/") else: print("Downloading text from Google Drive ID: {}".format(GDRIVE_ID)) gdd.download_file_from_google_drive(file_id=GDRIVE_ID, dest_path=text_download_location, unzip=True) mkdir("data/backup") shutil.copy(text_download_location, backup_location) # Move and clean up data if os.path.isdir(extracted_text_dir): os.rename(extracted_text_dir, f"data/{res_subdir}/text") else: raise Exception( "Expected to find directory {}, but it does not exist".format( extracted_text_dir)) os.remove(text_download_location)
def download(): url=w1.get() l=url.split("d/") l1=l[1].split("/view") file=w2.get() gdd.download_file_from_google_drive(file_id=l1[0],dest_path='./%s'%file,unzip=False) messagebox.showinfo("information","%s file was downloaded successfully"%(file))
def maybe_download_googledrive( google_file_id, file_name, work_directory=".", expected_bytes=None ): """Download a file from google drive if it is not already downloaded. Args: google_file_id (str): The ID of the google file which can be found in the file link, e.g. https://drive.google.com/file/d/{google_file_id}/view file_name (str): Name of the downloaded file. work_directory (str, optional): Directory to download the file to. Defaults to ".". expected_bytes (int, optional): Expected file size in bytes. Returns: str: File path of the file downloaded. """ os.makedirs(work_directory, exist_ok=True) filepath = os.path.join(work_directory, file_name) if not os.path.exists(filepath): gdd.download_file_from_google_drive( file_id=google_file_id, dest_path=filepath) else: logger.info("File {} already downloaded".format(filepath)) if expected_bytes is not None: statinfo = os.stat(filepath) if statinfo.st_size != expected_bytes: os.remove(filepath) raise IOError("Failed to verify {}".format(filepath)) return filepath
def _load_celeba(): with tempfile.NamedTemporaryFile() as f: gdd.download_file_from_google_drive( file_id=URLS['celeba'], dest_path=f.name, overwrite=True) zip_f = zipfile.ZipFile(f) images = [] for image_file in tqdm(zip_f.namelist(), 'Decompressing', leave=False): if os.path.splitext(image_file)[1] == '.jpg': with zip_f.open(image_file) as image_f: images.append(image_f.read()) train_set = {'images': images, 'labels': np.zeros(len(images), int)} return dict(train=train_set)
def test_download_ephys_data(): MORPH_DATA.mkdir(parents=True, exist_ok=True) dest_path = MORPH_DATA / 'B1096_cat_P04_S02_1.kwik' gdd.download_file_from_google_drive(file_id='12bp8fHCC51PWOiX8QxziY7oM7sOxQetA', dest_path=dest_path.as_posix()) assert dest_path.exists()