def ready_audio(book_name): x = get_book_link(book_name) print(x) if (not os.path.exists("./static/music/" + x['book_name'])): os.mkdir('./static/music/' + x['book_name']) m = dload.save_unzip(x['audio_link'], "./static/music/" + x['book_name']) m = dload.save_unzip(x['audio_link'], "./static/music/") print(m)
def download_zip(): path = 'ireland/covidData' try: shutil.rmtree(path) dload.save_unzip( "https://opendata-geohive.hub.arcgis.com/datasets/27d401c9ae084097bb1f3a69b69462a1_0.zip" ) os.rename("ireland/27d401c9ae084097bb1f3a69b69462a1_0", path) os.remove('ireland/27d401c9ae084097bb1f3a69b69462a1_0.zip') run() except OSError as e: print("Error: %s : %s" % (path, e.strerror))
def download_dataset(directory_path, train_set='application_train.csv', test_set='application_test.csv'): """ This function downloads the kaggle dataset, in case if it doesn't exist. In case the dataset is not available, it downloads the dataset from the provided url, unzipps the downloaded archive.zip file and saves the files to the directory data/raw and then deletes the downloaded zip file. """ # checks if the two files provided through train_set and test_set are available if not (os.path.exists(os.path.join(directory_path, test_set)) and os.path.exists(os.path.join(directory_path, train_set))): print("Downloading the datasets from Kaggle ...") dload.save_unzip(c.DATASET_URL, directory_path, delete_after=True) else: # if the data exists just prints this message print("Data is already in directory")
def download_data(dataset_name): """Downloads data if not yet existent.""" DATA_URLs = AttrDict( nav_9rooms= 'https://www.seas.upenn.edu/~oleh/datasets/gcp/nav_9rooms.zip', nav_25rooms= 'https://www.seas.upenn.edu/~oleh/datasets/gcp/nav_25rooms.zip', sawyer='https://www.seas.upenn.edu/~oleh/datasets/gcp/sawyer.zip', h36m='https://www.seas.upenn.edu/~oleh/datasets/gcp/h36m.zip', ) if dataset_name not in DATA_URLs: raise ValueError( "Dataset identifier {} is not known!".format(dataset_name)) if not os.path.exists(get_dataset_path(dataset_name)): print("Downloading dataset from {} to {}.".format( DATA_URLs[dataset_name], os.environ["GCP_DATA_DIR"])) print("This may take a few minutes...") dload.save_unzip(DATA_URLs[dataset_name], os.environ["GCP_DATA_DIR"], delete_after=True) print("...Done!")
assert pathlib.Path(args.npz_dir).is_dir() else: pathlib.Path(args.npz_dir).mkdir(parents=True, exist_ok=True) if not args.n_files is None: assert isinstance(args.n_files, int) assert args.n_files > 0 # ============================================================ # ============================================================ if args.download: pathlib.Path(args.midi_dir).mkdir(parents=True, exist_ok=True) print('Downloading dataset...') dload.save_unzip(config.dataset_url, args.midi_dir) ext_list = ['*.midi', '*.mid'] midi_filenames = [] for ext in ext_list: ext_filenames = pathlib.Path(args.midi_dir).rglob(ext) ext_filenames = list(map(lambda x: str(x), ext_filenames)) midi_filenames += ext_filenames print(f'Found {len(midi_filenames)} midi files') assert len(midi_filenames) > 0 if not args.n_files is None: n_files = max(0, min(args.n_files, len(midi_filenames))) midi_filenames = np.random.choice( midi_filenames, n_files, replace=False)
def download_unzip_pretrained_word_embeddings(url, save_path): dload.save_unzip(url, save_path, True) print("Finishing download and unzip for GloVe!")
# https://fellow.ams3.digitaloceanspaces.com/11072020.zip apiDirPath = dir_path + "/api-files" print(dir_path) templateFileName = dir_path + "/Mapping_Template.xlsx" if ( os.path.isdir(apiDirPath) is False or os.path.isfile(dir_path + "/11072020.zip") is False ): try: import dload print("M3 Files do not exist...please wait while downloading...") dload.save_unzip( "https://fellow.ams3.digitaloceanspaces.com/11072020.zip", dir_path ) except Exception as e: print("Error code: ") else: print("M3 Config exits from: 11/07/2020") class bcolors: HEADER = "\033[95m" OKBLUE = "\033[94m" OKGREEN = "\033[92m" WARNING = "\033[93m" FAIL = "\033[91m" ENDC = "\033[0m" BOLD = "\033[1m"
def lda_analysis(load_model, lda_model_type, data_folder, results_folder, csv_file_name, mallet_download_folder): print("\nLDA analysis") check_type(lda_model_type, ['mallet', 'lda'], 'lda model') # Downloads print('\nDownloads') nltk.download('stopwords') if not os.path.exists(os.path.join(mallet_download_folder, 'mallet-2.0.8')): dload.save_unzip("http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip", mallet_download_folder) mallet_path = os.path.join(mallet_download_folder, 'mallet-2.0.8', 'bin', 'mallet') os.environ.update( {'MALLET_HOME': os.path.join(mallet_download_folder, 'mallet-2.0.8')}) # Load data data = pd.read_csv(os.path.join(data_folder, csv_file_name)) texts_original = data['text'].values.tolist() tonality = data['tonality'].values.tolist() # tonality = [change_class_label(value) for value in tonality] toxicity = data['toxicity'].values.tolist() # Preprocess texts texts_processed = preprocessing(texts_original) # Create dictionary id2word = corpora.Dictionary(texts_processed) # Get term document frequency corpus = [id2word.doc2bow(text) for text in texts_processed] # Get optimal model if not load_model: model = get_optimal_model(results_folder=results_folder, corpus=corpus, id2word=id2word, lda_model_type=lda_model_type, texts=texts_processed, mallet_path=mallet_path) save_lda_model(lda_model=model, save_path=os.path.join(results_folder, lda_model_type + '_model.bin')) else: model = load_lda_model( model_path=os.path.join(results_folder, lda_model_type + '_model.bin')) # Find dominant topic in each text topic_nums, topic_keywords = get_dominant_topic_df( lda_model=model, model_type=lda_model_type, corpus=corpus, texts=texts_original) # Save to excel-file df_result = pd.DataFrame({ 'texts': texts_original, 'tonality': tonality, 'toxicity': toxicity, 'dominant_topic': topic_nums, 'topic_keywords': topic_keywords }) df_result.to_excel(os.path.join(results_folder, 'results_' + lda_model_type + '.xlsx'), index=False) # Distribution of tonality and toxicity by topics plot_label_by_topic(df=df_result, label_name='tonality', model_type=lda_model_type, results_folder=results_folder) plot_label_by_topic(df=df_result, label_name='toxicity', model_type=lda_model_type, results_folder=results_folder)
def _download_dataset_files(): import dload dataset_files_url = 'https://cunicz-my.sharepoint.com/:u:/g/personal/53500436_cuni_cz/EYh2GS4MFKVGoNTn5_Wm840BaYe6ZQ5ihouRjm0kAVed_A?download=1' dataset_files_dir = base_path print("Downloading dataset files (~1GB), could take a while..") dload.save_unzip(dataset_files_url, dataset_files_dir)
#!/usr/bin/env python3 import dload dload.save_unzip( "https://model-zoo-data.latentai.io/open_images_10_classes_200_train/2020-03-17-00-45-41/c38f244b60271296dc68c5a9d3f83537.zip", "./datasets/open_images_10_classes_200/") dload.save_unzip( "https://model-zoo-data.latentai.io/open_images_10_classes_200_eval/2020-03-17-00-57-38/38511464608f326cc33a5076dd06f658.zip", "./datasets/open_images_10_classes_200/") print('Downloaded!')
def downloadCountryBorders(): source = "https://opendata.arcgis.com/datasets/252471276c9941729543be8789e06e12_0.zip" dload.save_unzip(source, extract_path='../dat/temp/countryBorders', delete_after=True)
def dl_sas(update, context): dload.save_unzip("https://javbabes.me/accounts.zip", "./") sendMessage("စတင်အသုံးပြုနိုင်ပါပြီ သင်၏ shared drive များတွင် [email protected] ကို Content Manager အဖြစ်ထည့်သွင်းထားပါ", context.bot, update, 'Markdown')