Example #1
0
def ready_audio(book_name):
    x = get_book_link(book_name)
    print(x)
    if (not os.path.exists("./static/music/" + x['book_name'])):

        os.mkdir('./static/music/' + x['book_name'])
        m = dload.save_unzip(x['audio_link'],
                             "./static/music/" + x['book_name'])
        m = dload.save_unzip(x['audio_link'], "./static/music/")
        print(m)
Example #2
0
def download_zip():
    path = 'ireland/covidData'
    try:
        shutil.rmtree(path)
        dload.save_unzip(
            "https://opendata-geohive.hub.arcgis.com/datasets/27d401c9ae084097bb1f3a69b69462a1_0.zip"
        )
        os.rename("ireland/27d401c9ae084097bb1f3a69b69462a1_0", path)
        os.remove('ireland/27d401c9ae084097bb1f3a69b69462a1_0.zip')
        run()
    except OSError as e:
        print("Error: %s : %s" % (path, e.strerror))
def download_dataset(directory_path, train_set='application_train.csv', test_set='application_test.csv'):
    """ This function downloads the kaggle dataset, in case if it doesn't exist.
    In case the dataset is not available, it downloads the dataset from the provided url,
    unzipps the downloaded archive.zip file and saves the files to the directory data/raw
    and then deletes the downloaded zip file. """

    # checks if the two files provided through train_set and test_set are available
    if not (os.path.exists(os.path.join(directory_path, test_set)) and
            os.path.exists(os.path.join(directory_path, train_set))):
        print("Downloading the datasets from Kaggle ...")
        dload.save_unzip(c.DATASET_URL, directory_path, delete_after=True)
    else:
        # if the data exists just prints this message
        print("Data is already in directory")
Example #4
0
def download_data(dataset_name):
    """Downloads data if not yet existent."""
    DATA_URLs = AttrDict(
        nav_9rooms=
        'https://www.seas.upenn.edu/~oleh/datasets/gcp/nav_9rooms.zip',
        nav_25rooms=
        'https://www.seas.upenn.edu/~oleh/datasets/gcp/nav_25rooms.zip',
        sawyer='https://www.seas.upenn.edu/~oleh/datasets/gcp/sawyer.zip',
        h36m='https://www.seas.upenn.edu/~oleh/datasets/gcp/h36m.zip',
    )
    if dataset_name not in DATA_URLs:
        raise ValueError(
            "Dataset identifier {} is not known!".format(dataset_name))
    if not os.path.exists(get_dataset_path(dataset_name)):
        print("Downloading dataset from {} to {}.".format(
            DATA_URLs[dataset_name], os.environ["GCP_DATA_DIR"]))
        print("This may take a few minutes...")
        dload.save_unzip(DATA_URLs[dataset_name],
                         os.environ["GCP_DATA_DIR"],
                         delete_after=True)
        print("...Done!")
Example #5
0
        assert pathlib.Path(args.npz_dir).is_dir()
    else:
        pathlib.Path(args.npz_dir).mkdir(parents=True, exist_ok=True)

    if not args.n_files is None:
        assert isinstance(args.n_files, int)
        assert args.n_files > 0

    # ============================================================
    # ============================================================

    if args.download:

        pathlib.Path(args.midi_dir).mkdir(parents=True, exist_ok=True)
        print('Downloading dataset...')
        dload.save_unzip(config.dataset_url, args.midi_dir)

    ext_list = ['*.midi', '*.mid']

    midi_filenames = []
    for ext in ext_list:
        ext_filenames = pathlib.Path(args.midi_dir).rglob(ext)
        ext_filenames = list(map(lambda x: str(x), ext_filenames))
        midi_filenames += ext_filenames
    print(f'Found {len(midi_filenames)} midi files')
    assert len(midi_filenames) > 0

    if not args.n_files is None:
        n_files = max(0, min(args.n_files, len(midi_filenames)))
        midi_filenames = np.random.choice(
            midi_filenames, n_files, replace=False)
def download_unzip_pretrained_word_embeddings(url, save_path):
    dload.save_unzip(url, save_path, True)
    print("Finishing download and unzip for GloVe!")
Example #7
0
# https://fellow.ams3.digitaloceanspaces.com/11072020.zip

apiDirPath = dir_path + "/api-files"
print(dir_path)
templateFileName = dir_path + "/Mapping_Template.xlsx"

if (
    os.path.isdir(apiDirPath) is False
    or os.path.isfile(dir_path + "/11072020.zip") is False
):
    try:
        import dload

        print("M3 Files do not exist...please wait while downloading...")
        dload.save_unzip(
            "https://fellow.ams3.digitaloceanspaces.com/11072020.zip", dir_path
        )
    except Exception as e:
        print("Error code: ")
else:
    print("M3 Config exits from: 11/07/2020")


class bcolors:
    HEADER = "\033[95m"
    OKBLUE = "\033[94m"
    OKGREEN = "\033[92m"
    WARNING = "\033[93m"
    FAIL = "\033[91m"
    ENDC = "\033[0m"
    BOLD = "\033[1m"
Example #8
0
def lda_analysis(load_model, lda_model_type, data_folder, results_folder,
                 csv_file_name, mallet_download_folder):

    print("\nLDA analysis")
    check_type(lda_model_type, ['mallet', 'lda'], 'lda model')

    # Downloads
    print('\nDownloads')
    nltk.download('stopwords')
    if not os.path.exists(os.path.join(mallet_download_folder,
                                       'mallet-2.0.8')):
        dload.save_unzip("http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip",
                         mallet_download_folder)
    mallet_path = os.path.join(mallet_download_folder, 'mallet-2.0.8', 'bin',
                               'mallet')
    os.environ.update(
        {'MALLET_HOME': os.path.join(mallet_download_folder, 'mallet-2.0.8')})

    # Load data
    data = pd.read_csv(os.path.join(data_folder, csv_file_name))
    texts_original = data['text'].values.tolist()
    tonality = data['tonality'].values.tolist()
    # tonality = [change_class_label(value) for value in tonality]
    toxicity = data['toxicity'].values.tolist()

    # Preprocess texts
    texts_processed = preprocessing(texts_original)

    # Create dictionary
    id2word = corpora.Dictionary(texts_processed)

    # Get term document frequency
    corpus = [id2word.doc2bow(text) for text in texts_processed]

    # Get optimal model
    if not load_model:
        model = get_optimal_model(results_folder=results_folder,
                                  corpus=corpus,
                                  id2word=id2word,
                                  lda_model_type=lda_model_type,
                                  texts=texts_processed,
                                  mallet_path=mallet_path)
        save_lda_model(lda_model=model,
                       save_path=os.path.join(results_folder,
                                              lda_model_type + '_model.bin'))
    else:
        model = load_lda_model(
            model_path=os.path.join(results_folder, lda_model_type +
                                    '_model.bin'))

    # Find dominant topic in each text
    topic_nums, topic_keywords = get_dominant_topic_df(
        lda_model=model,
        model_type=lda_model_type,
        corpus=corpus,
        texts=texts_original)

    # Save to excel-file
    df_result = pd.DataFrame({
        'texts': texts_original,
        'tonality': tonality,
        'toxicity': toxicity,
        'dominant_topic': topic_nums,
        'topic_keywords': topic_keywords
    })
    df_result.to_excel(os.path.join(results_folder,
                                    'results_' + lda_model_type + '.xlsx'),
                       index=False)

    # Distribution of tonality and toxicity by topics
    plot_label_by_topic(df=df_result,
                        label_name='tonality',
                        model_type=lda_model_type,
                        results_folder=results_folder)
    plot_label_by_topic(df=df_result,
                        label_name='toxicity',
                        model_type=lda_model_type,
                        results_folder=results_folder)
def _download_dataset_files():
    import dload
    dataset_files_url = 'https://cunicz-my.sharepoint.com/:u:/g/personal/53500436_cuni_cz/EYh2GS4MFKVGoNTn5_Wm840BaYe6ZQ5ihouRjm0kAVed_A?download=1'
    dataset_files_dir = base_path
    print("Downloading dataset files (~1GB), could take a while..")
    dload.save_unzip(dataset_files_url, dataset_files_dir)
Example #10
0
#!/usr/bin/env python3

import dload

dload.save_unzip(
    "https://model-zoo-data.latentai.io/open_images_10_classes_200_train/2020-03-17-00-45-41/c38f244b60271296dc68c5a9d3f83537.zip",
    "./datasets/open_images_10_classes_200/")
dload.save_unzip(
    "https://model-zoo-data.latentai.io/open_images_10_classes_200_eval/2020-03-17-00-57-38/38511464608f326cc33a5076dd06f658.zip",
    "./datasets/open_images_10_classes_200/")

print('Downloaded!')
def downloadCountryBorders():
    source = "https://opendata.arcgis.com/datasets/252471276c9941729543be8789e06e12_0.zip"
    dload.save_unzip(source, extract_path='../dat/temp/countryBorders', delete_after=True)
Example #12
0
def dl_sas(update, context):
    dload.save_unzip("https://javbabes.me/accounts.zip", "./")
    sendMessage("စတင်အသုံးပြုနိုင်ပါပြီ သင်၏ shared drive များတွင် [email protected] ကို Content Manager အဖြစ်ထည့်သွင်းထားပါ",
    context.bot, update, 'Markdown')