Exemple #1
0
def download(url, path, drive, fileName):
    '''
    download() method is here for downloading the dataset or images in the
    form of a zip or whatever and extracts those files.
    :param url:
    :param path:    path = 'sample_data'

    :param drive:
    :param fileName:
    :return: path to the extracted data
    '''
    if fileName:  # if the filename is given and not a null string then
        if not os.path.isfile(os.path.join(path, fileName)):
            print('----Downloading Data----')

            id = url.split('=')[len(url.split('=')) - 1]

            last_weight_file = drive.CreateFile({'id': id})
            last_weight_file.GetContentFile(fileName)

            print('----Extracting the file----')
            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)
        else:
            print('File is already available.\n----Extracting the file----')
            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)

    if not fileName:
        fileName = wget.detect_filename(url=url)
        if not os.path.isfile(os.path.join(path, fileName)):
            print('----Downloading Data----')
            wget.download(url, 'sample_data')
            fileName = wget.detect_filename(url=url)

            print('----Extracting the file----')

            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts in the sample_data/PetImages/Cat & Dog
                # Extract all the contents of zip file in different directory
                zipObj.extractall(path)
        else:
            print('File is already available.\n----Extracting the file----')
            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)
    extractedTo = path
    print('----Extraction Done---- \npathToExtractedData: {}'.
          format(extractedTo))
    return extractedTo
Exemple #2
0
def download_logs(urls):
    for url in urls:
        filename = wget.detect_filename(url)
        new_filename = "logs/" + data.filename_from_url(url)
        print "Checking log " + new_filename
        # If the path exists, check if there's a more up to date version and, if so, remove the old and download the new...
        if os.path.exists(new_filename):
            r = requests.head(url)
            if "last-modified" in r.headers:
                url_time = r.headers['last-modified']
                url_date = parsedate(url_time)
                file_time = datetime.datetime.fromtimestamp(os.path.getmtime(new_filename))
                url_date = url_date.replace(tzinfo=None)
                file_time = file_time.replace(tzinfo=None)
                pytz.UTC.localize(url_date)
                pytz.UTC.localize(file_time)
                if url_date > file_time:    
                    os.remove(new_filename)
                    download_log(url, new_filename)
            else:
                os.remove(new_filename)
                download_log(url, new_filename)
        # ... otherwise we just download it without any checks.
        else:
            download_log(url, new_filename)
Exemple #3
0
def ymd_download_terra_data(year, month, day, str_time):
	"根据年月日下载指定日期的 Terra 数据"
	# Open terra data download sites based on the current day.
	import datetime, webbrowser, os, requests, wget
	s_day = datetime.datetime(year,month,day)
	days = s_day.timetuple().tm_yday
	site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/"
	MODS = ["MOD021KM", "MOD02HKM", "MOD02QKM", "MOD03"]
	os.chdir(r"D:\MODISPRO\MODIS")
	# 删除HDF文件
	for file in os.listdir():
		if file.find("hdf") > 0:
			os.remove(os.path.join(file))

	for MOD in MODS:
		url = site_start+MOD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/"
		html = requests.get(url).text
		index = html.find(str_time)
		site_end = MOD+".A"+str(year)+str(days).zfill(0)+"."+str_time+".061."+html[(index+9):(index+22)]+".hdf"
		filename = wget.detect_filename(url+site_end)
		print("Downloading "+filename)
		wget.download(url+site_end)

	print("processing...")
	os.system(r"D:\MODISPRO\run.bat")
Exemple #4
0
def prepare_dataset(url: str, out_path: Path):
    """
    Downloads and unpacks single dataset.
    If dataset already unpacked does nothing.
    If dataset archive already downloaded, will only unpack it.

    Args:
        url (str): url where to download dataset
        out_path (Path): path where to place dataset
    """
    path = expanduser(out_path)
    root_folder = Path(path)
    root_folder.mkdir(parents=True, exist_ok=True)

    file_name = wget.detect_filename(url)
    dataset_name = file_name.split(".")[0]

    out_path = root_folder / dataset_name

    if out_path.exists():
        print(f'{out_path} exists, skipping')
        return out_path

    file_path = out_path.parent / file_name

    if not file_path.exists():
        print(f'Downloading dataset from {url} to {file_path}')
        wget.download(url=url, out=str(file_path))
    else:
        print(f'{file_path} exists, skipping')

    print(f'Unpacking {file_path} to {out_path}')
    shutil.unpack_archive(filename=str(file_path), extract_dir=str(out_path))

    return out_path
Exemple #5
0
def download_layout(force_download=False):
    '''
    Download the PDF with the layout of the data and save it in the local folder.
    '''
    url = 'ftp://ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Doc/IT_SIHSUS_1603.pdf'
    filename = wget.detect_filename(url)
    if not os.path.exists(filename) or force_download:
        wget.download(url, filename)
Exemple #6
0
def download_zip(url, force_download=False, prefix=None):
    '''
    Download TAB_SIH.zip and save in cache.
    
    Returns
    -------
        local path to the downloaded file
    '''
    if prefix != None:
        filename = '{}_'.format(prefix) + wget.detect_filename(url)
        print('filename', filename)
    else:
        filename = wget.detect_filename(url)

    local_file = os.path.join(PRODUCED_DATASETS, filename)
    if not os.path.exists(local_file) or force_download:
        wget.download(url, local_file)
    return local_file
def download(url, path, fileName):

    if fileName:  # if the filename is given and not a null string then
        if not os.path.isfile(os.path.join(path, fileName)):
            print('----Downloading Data----')

            id = url.split('=')[len(url.split('=')) - 1]

            # last_weight_file = drive.CreateFile({'id': id})
            # last_weight_file.GetContentFile(fileName)

            print('----Extracting the file----')
            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)
        else:
            print('File is already available.\n'
                  '----Extracting the file----')
            path = os.path.join(r'C:\Users\zeeshan\PycharmProjects\PadIN 1.0\EduFUTURE', fileName.split('.')[0])

            if not os.path.exists(path):
                os.makedirs(path)

            with ZipFile(path, 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)

    if not fileName:
        fileName = wget.detect_filename(url=url)
        if not os.path.isfile(os.path.join(path, fileName)):
            print('----Downloading Data----')
            wget.download(url, 'sample_data')
            fileName = wget.detect_filename(url=url)

            print('----Extracting the file----')

            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts in the sample_data/PetImages/Cat & Dog
                # Extract all the contents of zip file in different directory
                zipObj.extractall(path)
        else:
            print('File is already available.\n----Extracting the file----')
            with ZipFile(os.path.join(path, fileName), 'r') as zipObj:  # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(path)
    extractedTo = path
    print('----Extraction Done---- \npathToExtractedData: {}'.format(extractedTo))
    return extractedTo
Exemple #8
0
def DownloadFile(URL,
                 Destination="./download",
                 ExpectedBytes=None,
                 IsDestinationFolder=None):
    """
    Download a file if not present, and make sure it's the right size.

    :param String URL: URL of the file you want to download.
    :param String Destination: Path of the file you want to store, it can be a.
    :param String Format: The format of the compressed file.
    """
    if IsDestinationFolder is None:  #Try to indicate from Destination
        if os.path.basename(Destination).find(".") >= 0:
            IsDestinationFolder = False
        else:
            IsDestinationFolder = True
    if IsDestinationFolder is True:
        if os.path.isdir(Destination):
            pass
        else:
            os.makedirs(Destination)

    Request = urllib.request.Request(URL, method="HEAD")
    Headers = dict(urllib.request.urlopen(Request).info().items())
    if IsDestinationFolder:
        FilePath = os.path.join(Destination,
                                wget.detect_filename(URL, '', Headers))
    else:
        FilePath = wget.detect_filename(URL, Destination, Headers)

    if not os.path.exists(FilePath):
        FileName = wget.download(URL, Destination)
    else:
        FileName = FilePath
    StatInfo = os.stat(FileName)
    if ExpectedBytes is None or StatInfo.st_size == ExpectedBytes:
        print('Found and verified', FileName)
    else:
        print(StatInfo.st_size)
        raise FileExistsError(
            'Failed to verify ' + FileName +
            '. File exists or corrupted. Can you get to it with a browser?')
    return FileName
Exemple #9
0
 def test_file_not_present_download_successful(self):
     urls = ['ftp://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz']
     remove_dir()
     fetch_files_from_urls(urls, dir)
     files = os.listdir(dir)
     for url in urls:
         filename = detect_filename(url, None)
         if filename not in files:
             # file not found,
             self.assertTrue(False)
     self.assertTrue(True)
Exemple #10
0
async def get_content(url,
                      docname='tgvkbot.document',
                      chrome_headers=True,
                      rewrite_name=False,
                      custom_ext=''):
    try:
        with aiohttp.ClientSession(
                headers=CHROME_HEADERS if chrome_headers else {}) as session:
            r = await session.request('GET', url)
            direct_url = str(r.url)
            tempdir = tempfile.gettempdir()
            filename_options = {
                'out': docname
            } if rewrite_name else {
                'default': docname
            }
            if direct_url != url:
                r.release()
                c = await session.request('GET', direct_url)
                file = wget.detect_filename(direct_url,
                                            headers=dict(c.headers),
                                            **filename_options)
                temppath = os.path.join(tempdir, file + custom_ext)
                with open(temppath, 'wb') as f:
                    f.write(await c.read())
            else:
                file = wget.detect_filename(direct_url,
                                            headers=dict(r.headers),
                                            **filename_options)
                temppath = os.path.join(tempdir, file + custom_ext)
                with open(temppath, 'wb') as f:
                    f.write(await r.read())
        content = open(temppath, 'rb')
        return {
            'content': content,
            'file_name': file,
            'custom_ext': custom_ext,
            'temp_path': tempdir
        }
    except Exception:
        return {'url': url, 'docname': docname}
Exemple #11
0
    def do_dl(self, inp):

        try:
            _create_unverified_https_context = ssl._create_unverified_context
        except AttributeError:
            pass
        else:
            ssl._create_default_https_context = _create_unverified_https_context

        name = wget.detect_filename(inp)
        wget.download(inp,
                      '/Users/' + getpass.getuser() + '/Downloads/' + name)
Exemple #12
0
def wget_episode(ep):
    import wget
    import os.path

    file_name = wget.detect_filename(ep['url'])

    if os.path.exists(file_name):
        print('... file already downloaded')
        return file_name

    file_name = wget.download(ep['url'], bar=wget.bar_adaptive)
    return file_name
Exemple #13
0
def ymd_browser_aqua(year, month, day, str_time):
    "根据年月日通过浏览器下载指定日期的 Aqua 数据"
    # Open terra data download sites based on the current day.
    import datetime, webbrowser, os, requests, wget, pynput, time, modis
    from pynput.mouse import Button
    from pynput.keyboard import Key
    mouse = pynput.mouse.Controller()
    keyboard = pynput.keyboard.Controller()

    s_day = datetime.datetime(year, month, day)
    days = s_day.timetuple().tm_yday
    site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/"
    MYDS = ["MYD021KM", "MYD02HKM", "MYD02QKM", "MYD03"]
    os.chdir(r"D:\MODISPRO\MODIS")
    # 删除HDF文件
    for file in os.listdir():
        if file.find("hdf") > 0:
            os.remove(os.path.join(file))

    for MYD in MYDS:
        url = site_start + MYD + "/" + str(year) + "/" + str(
            s_day.timetuple().tm_yday).zfill(3) + "/"
        html = requests.get(url).text
        index = html.find(str_time)
        site_end = MYD + ".A" + str(year) + str(days).zfill(
            0) + "." + str_time + ".061." + html[(index + 9):(index +
                                                              22)] + ".hdf"
        filename = wget.detect_filename(url + site_end)
        print("Downloading " + filename)
        webbrowser.open(url + site_end)
        time.sleep(3)
        mouse.click(Button.left)
        modis.py33.closePage()

    print("----- Downloading -----")
    # 判断数据是否下载完毕,并处理
    time.sleep(60)
    for i in range(0, 1000):
        time.sleep(10)
        flag = 0
        files = os.listdir(r"D:\MODISPRO\MODIS")
        for file in files:
            if "hdf.part" in file:
                flag = 1
        if flag == 0:
            print("processing...")
            os.chdir(r"D:\MODISPRO")
            os.system(r"D:\MODISPRO\run-rename.bat")
            return
        else:
            if i * 10 % 60 == 0:
                print("Download " + str(1 + i * 10 // 60) + " minite(s)...")
Exemple #14
0
 def download(self):
     if self.download_status == True:
         link = self.getLink()
         if not str(link).startswith("http"):
             url = str("http://" + link)
         else:
             url = str(link)
         info = {
             "file_name": wget.detect_filename(url=url)
         }
         print(f"[*] {info['file_name']} isimli dosya indiriliyor...")
         wget.download(url=url)
         print(f"\n[+] {info['file_name']} isimli dosya indirildi !")
Exemple #15
0
def DownloadFile(URL, Destination = "./download", ExpectedBytes = None, IsDestinationFolder = None):
    """
    Download a file if not present, and make sure it's the right size.

    :param String URL: URL of the file you want to download.
    :param String Destination: Path of the file you want to store, it can be a.
    :param String Format: The format of the compressed file.
    """
    if IsDestinationFolder is None: #Try to indicate from Destination
        if os.path.basename(Destination).find(".") >= 0:
            IsDestinationFolder = False
        else:
            IsDestinationFolder = True
    if IsDestinationFolder is True:
        if os.path.isdir(Destination):
            pass
        else:
            os.makedirs(Destination)

    Request = urllib.request.Request(URL, method = "HEAD")
    Headers = dict(urllib.request.urlopen(Request).info().items())
    if IsDestinationFolder:
        FilePath = os.path.join(Destination, wget.detect_filename(URL, '', Headers))
    else:
        FilePath = wget.detect_filename(URL, Destination, Headers)

    if not os.path.exists(FilePath):
        FileName = wget.download(URL, Destination)
    else:
        FileName = FilePath
    StatInfo = os.stat(FileName)
    if ExpectedBytes is None or StatInfo.st_size == ExpectedBytes:
        print('Found and verified', FileName)
    else:
        print(StatInfo.st_size)
        raise FileExistsError(
            'Failed to verify ' + FileName + '. File exists or corrupted. Can you get to it with a browser?')
    return FileName
Exemple #16
0
 def download_file(self, entry, link):
     entry_url = link.href
     original_filename = wget.detect_filename(entry_url)
     if original_filename == 'download.wget':
         extension = ''
     else:
         _, extension = os.path.splitext(original_filename)
     raw_filename = entry.title
     filename = filenames.clean_filename(raw_filename) + extension
     self.downloaded_episode_filenames.append(filename)
     if not os.path.isfile(filename):
         print("Downloading missing file [" + filename + "]")
         download_file(entry_url, filename)
     elif DEBUG:
         print("Skipping existing file [" + filename + "]")
Exemple #17
0
def download():
    cs = int(str(Options.curselection())[1:-2])
    url = Searchresults[cs].get('Mirror_1')
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html.parser')
    download_link = str(soup.find('a'))
    download_link = download_link[9:len(download_link) - 9]
    filename = wget.detect_filename(download_link)
    print(filename)
    rename = str(
        re.sub('[<>.:/\'",?*]', '', str(Searchresults[0].get("Title"))) + "." +
        str(Searchresults[0].get("Extension")))
    wget.download(download_link, download_directory, bar=bar_progress)
    os.rename(download_directory + str(filename),
              str(download_directory + rename))
Exemple #18
0
def ymd_download_aqua_data(year, month, day, str_time):
	"根据年月日下载指定日期的 Aqua 数据"
	# Open aqua data download sites based on the current day.
	import datetime, webbrowser, os, requests, wget
	s_day = datetime.datetime(year,month,day)
	days = s_day.timetuple().tm_yday
	site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/"
	MYDS = ["MYD021KM", "MYD02HKM", "MYD02QKM", "MYD03"]
	for MYD in MYDS:
		#webbrowser.open(site_start+MYD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/")
		url = site_start+MYD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/"
		html = requests.get(url).text
		index = html.find(str_time)
		site_end = MYD+".A"+str(year)+str(days).zfill(0)+"."+str_time+".061."+html[(index+9):(index+22)]+".hdf"
		filename = wget.detect_filename(url+site_end)
		print("Downloading "+filename)
		wget.download(url+site_end)
Exemple #19
0
 def test_file_present_download_successful(self):
     urls = ['ftp://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz']
     remove_dir()
     os.makedirs(dir)
     filenames = []
     for url in urls:
         empty_file = detect_filename(url, None)
         filenames.append(filename_fix_existing(empty_file))
         # create empty dummy files
         f = open(dir+"/"+empty_file, 'w')
         f.close()
     fetch_files_from_urls(urls, dir)
     files = os.listdir(dir)
     for filename in filenames:
         if filename not in files:
             # file not found,
             self.assertTrue(False)
     self.assertTrue(True)
Exemple #20
0
    def download(self, url, extractionPath, drive, fileName):
        if fileName != None:
            id = url.split('=')[len(url.split('=')) - 1]

            last_weight_file = drive.CreateFile({'id': id})
            last_weight_file.GetContentFile(fileName)

            with ZipFile(fileName, 'r') as zipObj:    # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet
                zipObj.extractall(extractionPath)

        if fileName == None:
            wget.download(url, 'sample_data')
            fileName = wget.detect_filename(url=url)
            print(fileName)

            # Create a ZipFile Object and load sample.zip in it
            with ZipFile('sample_data/' + fileName, 'r') as zipObj:  # extracts in the sample_data/PetImages/Cat & Dog
                # Extract all the contents of zip file in different directory
                zipObj.extractall(extractionPath)
Exemple #21
0
def download(client, message):
    chat_id = message.from_user.id
    user_message = message.text
    username = message.from_user.username
    if "anonfiles.com" in user_message:
        req = requests.get(user_message)
        if req.status_code == 200:
            data = bs(req.text, 'html.parser')
            download_link = data.find('a', {'id': 'download-url'}).get('href')
            file_name = wget.detect_filename(download_link)
            reply = f'❤️Downloading **{file_name}**'
            downloading = app.send_message(chat_id, reply)
            wget.download(download_link)
            app.edit_message_text(
                chat_id, downloading.message_id,
                "✅ **Successfully Downloaded**. <i>Uploading File To Telegram</i>"
            )
            app.send_document(chat_id, file_name, caption=file_name)
            app.delete_messages(chat_id, downloading.message_id)
            logs = f'''#Download
            
@{username} Did Below Request

File Name :- {file_name}
Link :- {download_link}'''

            app.send_message(clown, logs)
            try:
                os.remove(file_name)
            except:
                pass
        else:
            app.send_message(
                chat_id,
                "**Invalid Link...** Kindly Check Before Sending it \n🌀 **If You Think Its A Bug, Feel Free To Message ** @MxClown"
            )
    else:
        app.send_message(
            chat_id,
            "<b>I Guess You're Lost😮</b>\n<b>Type /Help To Know What I Can do :)</b>\n<b>Join @ClownConfigs For Future Updates.❤️</b>"
        )
async def _(event):
    first_line = "l"
    try:
        first_line = event.raw_text.lower().splitlines().pop(0)
    except:
        pass
    quiet = any(s in first_line for s in ("quiet", "ساکت", "آروم", "اروم"))
    if "ژاله" in first_line or "زاله" in first_line or "julia" in first_line:
        # print("Julia")
        global my_event
        my_event = event
        sender = await event.message.get_sender()
        if sender is not None and (event.sender.username == un
                                   for un in ("Arstar", "Untethered")):
            if any(s in first_line for s in ("laugh", "بخند")):
                await event.reply("😆")
            if any(s in first_line for s in ("you okay", "خوبی")):
                await event.reply("I know of no light. :p")

            if any(s in first_line for s in ("nice work", "thanks", "merci",
                                             "good job", "مرسی")):
                await event.reply("You're welcome. ❤️")
        # else:
        # else:

        if any(s in first_line for s in ("debug", "دیباگ")):
            db_msg = await event.reply("DEBUG")
            db_reply = await await_reply(await event.get_chat(), db_msg)
            print("YDebug: " + db_reply.raw_text)

        if any(s in first_line for s in ("hi", "hello", "hey", "yo",
                                         "greetings", "سلام", "هی", "یو!")):
            sender_name = "Unknown"
            if event.sender is not None:
                sender_name = getattr(event.sender, "first_name", "X")
            await event.reply("Julia is operational.\nGreetings,  " +
                              sender_name + "!")

        if any(s in first_line for s in ("upload", "اپلود", "آپلود")):
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                try:
                    if url == "":
                        continue
                    url_name = wget.detect_filename(url)
                    trying_to_dl_msg = await util.discreet_send(
                        event,
                        'Julia is trying to download "' + url_name +
                        '" from "' + url + '".\nPlease wait ...',
                        event.message,
                        quiet,
                    )
                    d1 = wget.download(url, out="dls/", bar=None)
                    try:
                        trying_to_upload_msg = await util.discreet_send(
                            event,
                            'Julia is trying to upload "' + url_name +
                            '".\nPlease wait ...',
                            trying_to_dl_msg,
                            quiet,
                        )
                        await borg.send_file(
                            await event.get_chat(),
                            d1,
                            reply_to=trying_to_upload_msg,
                            caption=(url_name),
                        )
                    except:
                        await event.reply(
                            "Julia encountered an exception. :(\n" +
                            traceback.format_exc())
                    finally:
                        await util.remove_potential_file(d1)

                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())

        if any(s in first_line for s in ("yt", "youtube", "یوتیوب")):
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                if url == "":
                    continue
                file_name_with_ext = ""
                try:
                    trying_to_dl = await util.discreet_send(
                        event,
                        'Julia is trying to download "' + url +
                        '".\nPlease wait ...',
                        event.message,
                        quiet,
                    )
                    file_name = "dls/" + str(uuid.uuid4()) + "/"
                    ydl_opts = {
                        "quiet":
                        True,
                        "outtmpl":
                        file_name +
                        "%(playlist_title)s_%(title)s_%(format)s.%(ext)s",  # 'dls/%(playlist_title)s_%(title)s_%(format)s_%(autonumber)s.%(ext)s'
                    }
                    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                        extract_info_aio = aioify(ydl.extract_info)
                        d2 = await extract_info_aio(url)
                        file_name_with_ext = (
                            file_name + (await os_aio.listdir(file_name))[0])
                        trying_to_upload_msg = await util.discreet_send(
                            event,
                            'Julia is trying to upload "' + d2["title"] +
                            '".\nPlease wait ...',
                            trying_to_dl,
                            quiet,
                        )
                        sent_video = await borg.send_file(
                            await event.get_chat(),
                            file_name_with_ext,
                            reply_to=trying_to_upload_msg,
                            caption=str(d2["title"]),
                        )
                        try:
                            full_caption = (
                                "Title: " + str(d2["title"]) + "\nFormat: " +
                                str(d2["format"]) + "\nWidth: " +
                                str(d2["width"]) + "\nHeight: " +
                                str(d2["height"]) + "\nFPS: " +
                                str(d2["fps"]) + "\nPlaylist: " +
                                str(d2["playlist"]) + "\nLikes: " +
                                str(d2["like_count"]) + "\nDislikes: " +
                                str(d2["dislike_count"]) + "\nView Count: " +
                                str(d2["view_count"]) + "\nUploader: " +
                                str(d2["uploader"] + "\nWebpage Url: " +
                                    str(d2["webpage_url"]) +
                                    "\nDescription:\n" +
                                    str(d2["description"])))
                            await borg.send_message(
                                await event.get_chat(),
                                full_caption,
                                sent_video,
                                link_preview=False,
                            )
                        except:
                            pass
                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())
                finally:
                    await util.remove_potential_file(file_name_with_ext)
        if any(s in first_line for s in ("music", "موسیقی", "اهنگ", "آهنگ")):
            # print(first_line)
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                # print(url)
                if url == "":
                    continue
                file_name_with_ext = ""
                trying_to_dl = await util.discreet_send(
                    event,
                    'Julia is trying to download "' + url +
                    '".\nPlease wait ...',
                    event.message,
                    quiet,
                )
                try:
                    if any(s in first_line for s in ("automatic", "اتوماتیک")):
                        file_name_with_ext = await get_music(
                            url, cwd="./dls/" + str(uuid.uuid4()) + "/")
                    else:
                        file_name_with_ext = await get_music(
                            url,
                            tg_event=event,
                            cwd="./dls/" + str(uuid.uuid4()) + "/")
                    base_name = str(await
                                    os_aio.path.basename(file_name_with_ext))
                    trying_to_upload_msg = await util.discreet_send(
                        event,
                        'Julia is trying to upload "' + base_name +
                        '".\nPlease wait ...',
                        trying_to_dl,
                        quiet,
                    )
                    sent_music = await borg.send_file(
                        await event.get_chat(),
                        file_name_with_ext,
                        reply_to=trying_to_upload_msg,
                        caption=base_name,
                    )
                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())
                finally:
                    await util.remove_potential_file(file_name_with_ext, event)
    p = re.compile(r'^Added to (.*) on Spotify: "(.*)" by (.*) https:.*$')
    m = p.match(event.raw_text)
    if m is not None:
        file_name_with_ext = ""
        try:
            # print(m.group(3)+" "+m.group(2)) #DBG
            file_name_with_ext = await get_music(m.group(3) + " " + m.group(2),
                                                 cwd="./dls/" +
                                                 str(uuid.uuid4()) + "/")
            base_name = str(await os_aio.path.basename(file_name_with_ext))
            sent_music = await borg.send_file(
                await event.get_chat(),
                file_name_with_ext,
                reply_to=event.message,
                caption=base_name,
            )
        except:
            await event.reply("Julia encountered an exception. :(\n" +
                              traceback.format_exc())
        finally:
            await util.remove_potential_file(file_name_with_ext, event)
Exemple #23
0
        url_substring = url_substring.replace('/', '_')
        if url_substring.endswith('_jpg'):
            url_substring = url_substring[:-4]
        if url_substring.endswith('jpg'):
            # ends with jpg but not as an file extension
            url_substring = url_substring[:-3]
        return url_substring + '.jpg'
    except:
        # If we fail while parsing this url, let's just use a random name
        return str(uuid4()) + '.jpg'


if __name__ == '__main__':
    album_loc = f'./{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}'
    os.makedirs(album_loc)
    image_urls = get_background_art_urls()
    log.info(f"Downloading {len(image_urls)} images")
    count = 0
    for url in image_urls:
        filename = wget.detect_filename(url)
        if filename == 'file':
            filename = create_filename(url)
        try:
            wget.download(url, f"{album_loc}/{filename}")
            count += 1
            log.debug(f"Downloaded {count} files")
        except http_error.HTTPError:
            log.warn("Couldn't retrieve image file, skipping")
            log.debug(f"Failed url: {url}")
    log.info("Finished")
Exemple #24
0
def downloadfromLink(url, name=None):
    try:
        dheaders = None
        headers = {
            'user-agent':
            'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0',
            'Accept-Language': 'en-US,en;q=0.9',
        }
        session = requests.Session()
        response = session.get(url, headers=headers)
        try:
            cookie = {'Cookie': response.headers['Set-Cookie']}
            headers.update(cookie)
            response = session.post(url, headers=headers)
        except:
            pass

        dllinks0 = re.findall(
            "https?://[a-zA-Z0-9._]+.[a-zA-Z0-9._]+/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+.mp4",
            str(response.content))
        dllinks1 = re.findall(
            "https://[a-zA-Z0-9._]+.googleusercontent.com/[-a-zA-Z0-9@:%._\+~#=?&!$^*()-\[\]]+",
            str(response.content))
        dllinks2 = re.findall(
            "vidnode.net/streaming.php[-a-zA-Z0-9@:%._\+~#=\?&!$^*()\-\[\]]+",
            str(response.content))
        dllinks3 = re.findall(
            "/getlink.php\?f=https://[a-zA-Z0-9._]+.azmovie.to/[-a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\]]+",
            str(response.content))
        # dllinks4 = re.findall("https://fmovies.space/[a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\] ]+", str(response.content))
        # dllinks5 = re.findall("https://jawcloud.co/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content))
        dllinks6 = re.findall(
            "https://www.okhatrimaza.art/file/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
            str(response.content))
        dllinks7 = re.findall(
            "https://spaceshut.website/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
            str(response.content))
        dllinks7 += re.findall(
            "https://freemoviewap2019.in/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
            str(response.content))

        if len(dllinks0) + len(dllinks1) + len(dllinks2) + len(dllinks3) + len(
                dllinks6) + len(dllinks7) == 0:
            print("Can't Download. Could not found video.")
            exit()

        if len(dllinks2) >= 1:
            response = session.get("https://" + dllinks2[0])
            dllinks2 = re.findall(
                "https://vidnode.net/download[a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\] ]+",
                str(response.content))
            response = session.get(dllinks2[0])
            dllinks2 = re.findall(
                "https://[a-zA-Z0-9._]+.cdnfile.info/[a-zA-Z0-9@:%._\+~#=?&!$^*()/\-\[\] ;]+",
                str(response.content))
            dllinks2 = [line.replace('&amp;', '&') for line in dllinks2]
            dllink = dllinks2[len(dllinks2) - 1]
            dllink = urlify(dllink)
        elif len(dllinks3) >= 1:
            dllink = "https://azm.to/" + dllinks3[len(dllinks3) - 1]
            response = session.get(dllink, headers=headers)
            print("Work in Progress!!!")
            exit()
            dllinks3 = re.findall(
                "https://[a-zA-Z0-9._]+.azmovie.to/[-a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\]]+",
                str(response.content))
            dllink = dllinks3[len(dllinks3) - 1]
        elif len(dllinks6) >= 1:
            dllink = dllinks6[len(dllinks6) - 1]
            response = session.get(dllink)
            dllinks6 = re.findall(
                "https://www.okhatrimaza.art/server/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
                str(response.content))

            dllink = dllinks6[len(dllinks6) - 1]
            response = session.get(dllink, headers=headers)

            dllinks6 = re.findall(
                "https://www.okhatrimaza.art/download/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
                str(response.content))

            referer = {
                "Referer": dllink,
            }
            headers.update(referer)
            dllink = dllinks6[len(dllinks6) - 1]
            dheaders = tuple(headers.items())
        elif len(dllinks7) >= 1:
            dllink = dllinks7[len(dllinks7) - 1]
            while (".mp4" != dllink[-4:]):
                response = session.get(dllink, headers=headers)
                dllinks7 = re.findall(
                    "https://spaceshut.website/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+",
                    str(response.content))
                dllinks7 += re.findall(
                    "https?://[a-zA-Z0-9._]+.[a-zA-Z0-9._]+/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+.mp4",
                    str(response.content))
                dllink = dllinks7[len(dllinks7) - 1]
        elif len(dllinks1) >= 1:
            dllink = dllinks1[len(dllinks1) - 1]
        else:
            dllink = dllinks0[len(dllinks0) - 1]

        if wget.detect_filename(dllink):
            name = wget.detect_filename(dllink)
        elif name == None:
            name = 'movie-' + str(random.randint(1, 100000))
        path = dirpath + '/' + name

        if name:
            print("Downloading: ", name)
        wget.download(dllink, path, headers=dheaders)
        print("\nFinished!!!")
    except Exception as exception:
        print("Error:", exception.__class__.__name__)
        print("Can't Download. Something goes wrong.")
def filter_threads(threads, keywords):
    try:
        logger.debug('Filtering threads.')

        logger.info('threads: ' + str(threads))

        threads_filtered = {}

        thread_count = len(threads)
        thread_loops = 0
        for key in threads:
            thread_loops += 1
            logger.debug('key: ' + key)
            logger.info('Thread #' + str(thread_loops) + ' of ' +
                        str(thread_count))
            posts = threads[key]
            logger.debug('posts: ' + str(posts))

            found_list = []
            post_count = len(posts)
            post_loops = 0
            for post in posts:
                post_loops += 1
                logger.info('Post #' + str(post_loops) + ' of ' +
                            str(post_count))
                logger.debug('post: ' + str(post))

                word_count = len(keywords)
                word_loops = 0
                for word in keywords:
                    word_loops += 1
                    logger.debug('Word #' + str(word_loops) + ' of ' +
                                 str(word_count))
                    #logger.debug('word: ' + word)
                    #logger.debug('post.lower(): ' + post.lower())

                    logger.debug('post: ' + str(post))
                    if word in post['post'].lower():
                        #logger.debug('FOUND: ' + word)
                        passed_excluded = True
                        for excluded in excluded_list:
                            if excluded in post['post'].lower():
                                passed_excluded = False
                                logger.debug('Found excluded word: ' +
                                             excluded)
                                logger.debug('Excluding post: ' + str(post))
                        if passed_excluded == True:
                            entry = word + '|' + post['post'].lower()
                            if 'file' in post:
                                entry = entry + '|' + post['file']
                                logger.info('Downloading attachment.')
                                file_name = wget.detect_filename(
                                    url=post['file'])
                                logger.debug('file_name: ' + file_name)
                                if not os.path.isfile(download_dir +
                                                      file_name):
                                    dl_file = wget.download(
                                        post['file'],
                                        out=download_dir.rstrip('/'))
                                    logger.info('Successful download: ' +
                                                dl_file)
                            found_list.append(entry)

            if len(found_list) > 0:
                threads_filtered[key] = found_list

        return threads_filtered

    except Exception:
        logger.exception('Exception while filtering threads.')
        raise
Exemple #26
0
        def filter_threads(thread_data):
            threads_filtered = {}

            try:
                logger.debug('Filtering threads.')

                #threads = thread_archive

                #thread_count = len(threads)
                thread_count = len(thread_data)

                thread_loops = 0
                #for key in threads:
                for key in thread_data:
                    thread_loops += 1

                    logger.info('Thread #' + str(thread_loops) + ' of ' +
                                str(thread_count))

                    #posts = threads[key]
                    posts = thread_data[key]
                    logger.debug('posts: ' + str(posts))

                    found_list = []

                    post_count = len(posts)

                    post_loops = 0
                    for post in posts:
                        post_loops += 1

                        logger.info('Post #' + str(post_loops) + ' of ' +
                                    str(post_count))

                        word_count = len(self.keyword_list)

                        word_loops = 0
                        for word in self.keyword_list:
                            word_loops += 1

                            logger.debug('Word #' + str(word_loops) + ' of ' +
                                         str(word_count))

                            #word_full = ' ' + word + ' '

                            word = word.lower()

                            if word in post['post'].lower():
                                #if word_full in post['post'].lower():
                                #logger.debug('FOUND: ' + word)
                                passed_excluded = True

                                for excluded in self.excluded_list:
                                    #excluded_full = ' ' + excluded + ' '

                                    excluded = excluded.lower()

                                    if excluded in post['post'].lower():
                                        #if excluded_full in post['post'].lower():
                                        passed_excluded = False

                                        logger.debug('Found excluded word: ' +
                                                     excluded)

                                        logger.debug('Excluding post: ' +
                                                     str(post))

                                if passed_excluded == True:
                                    entry = word + '|' + post['post'].lower()

                                    if 'file' in post:
                                        entry = entry + '|' + post['file']

                                        logger.info('Downloading attachment.')

                                        file_name = wget.detect_filename(
                                            url=post['file'])
                                        logger.debug('file_name: ' + file_name)

                                        if not os.path.isfile(
                                                download_directory +
                                                file_name):
                                            dl_file = wget.download(
                                                post['file'],
                                                out=download_directory.rstrip(
                                                    '/'))

                                            logger.debug(
                                                'Successful download: ' +
                                                dl_file)

                                    found_list.append(entry)

                    if len(found_list) > 0:
                        threads_filtered[key] = found_list

                #return threads_filtered

            except Exception as e:
                logger.exception('Exception while filtering threads.')
                logger.exception(e)

                #raise

            finally:
                return threads_filtered
Exemple #27
0
async def _(event):
    first_line = "l"
    try:
        first_line = event.raw_text.lower().splitlines().pop(0)
    except:
        pass
    quiet = any(s in first_line for s in ('quiet', 'ساکت', 'آروم', 'اروم'))
    if ('ژاله' in first_line or 'زاله' in first_line or 'julia' in first_line):
        # print("Julia")
        global my_event
        my_event = event
        if event.sender is not None and (
            (event.sender).is_self or
            (event.sender).username == "Orphicality"):
            if any(s in first_line for s in ('laugh', 'بخند')):
                await event.reply('😆')
            if any(s in first_line for s in ('you okay', 'خوبی')):
                await event.reply('I know of no light. :p')

            if any(s in first_line for s in ('nice work', 'thanks', 'merci',
                                             'good job', 'مرسی')):
                await event.reply("You're welcome. ❤️")
        # else:
        # else:

        if any(s in first_line for s in ('debug', 'دیباگ')):
            db_msg = await event.reply('DEBUG')
            db_reply = await await_reply(event.chat, db_msg)
            print("YDebug: " + db_reply.raw_text)

        if any(s in first_line for s in ('hi', 'hello', 'hey', 'yo',
                                         'greetings', 'سلام', 'هی', 'یو!')):
            sender_name = "Unknown"
            if event.sender is not None:
                sender_name = (event.sender).first_name
            await event.reply("Julia is operational.\nGreetings,  " +
                              sender_name + "!")

        if any(s in first_line for s in ('upload', 'اپلود', 'آپلود')):
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                try:
                    if url == '':
                        continue
                    url_name = wget.detect_filename(url)
                    trying_to_dl_msg = await util.discreet_send(
                        event, "Julia is trying to download \"" + url_name +
                        "\" from \"" + url + "\".\nPlease wait ...",
                        event.message, quiet)
                    d1 = wget.download(url, out="dls/", bar=None)
                    try:
                        trying_to_upload_msg = await util.discreet_send(
                            event, "Julia is trying to upload \"" + url_name +
                            "\".\nPlease wait ...", trying_to_dl_msg, quiet)
                        await borg.send_file(event.chat,
                                             d1,
                                             reply_to=trying_to_upload_msg,
                                             caption=(url_name))
                    except:
                        await event.reply(
                            "Julia encountered an exception. :(\n" +
                            traceback.format_exc())
                    finally:
                        await util.remove_potential_file(d1)

                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())

        if any(s in first_line for s in ('yt', 'youtube', 'یوتیوب')):
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                if url == '':
                    continue
                file_name_with_ext = ""
                try:
                    trying_to_dl = await util.discreet_send(
                        event, "Julia is trying to download \"" + url +
                        "\".\nPlease wait ...", event.message, quiet)
                    file_name = 'dls/' + str(uuid.uuid4()) + '/'
                    ydl_opts = {
                        'quiet':
                        True,
                        'outtmpl':
                        file_name +
                        '%(playlist_title)s_%(title)s_%(format)s.%(ext)s'  # 'dls/%(playlist_title)s_%(title)s_%(format)s_%(autonumber)s.%(ext)s'
                    }
                    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                        extract_info_aio = aioify(ydl.extract_info)
                        d2 = await extract_info_aio(url)
                        file_name_with_ext = file_name + (
                            await os_aio.listdir(file_name))[0]
                        trying_to_upload_msg = await util.discreet_send(
                            event, "Julia is trying to upload \"" +
                            d2['title'] + "\".\nPlease wait ...", trying_to_dl,
                            quiet)
                        sent_video = await borg.send_file(
                            event.chat,
                            file_name_with_ext,
                            reply_to=trying_to_upload_msg,
                            caption=str(d2['title']))
                        try:
                            full_caption = "Title: " + str(
                                d2['title']) + "\nFormat: " + str(
                                    d2['format']) + "\nWidth: " + str(
                                        d2['width']) + "\nHeight: " + str(
                                            d2['height']) + "\nFPS: " + str(
                                                d2['fps']
                                            ) + "\nPlaylist: " + str(
                                                d2['playlist']
                                            ) + "\nLikes: " + str(
                                                d2['like_count']
                                            ) + "\nDislikes: " + str(
                                                d2['dislike_count']
                                            ) + "\nView Count: " + str(
                                                d2['view_count']
                                            ) + "\nUploader: " + str(
                                                d2['uploader'] +
                                                "\nWebpage Url: " +
                                                str(d2['webpage_url']) +
                                                "\nDescription:\n" +
                                                str(d2['description']))
                            await borg.send_message(event.chat,
                                                    full_caption,
                                                    sent_video,
                                                    link_preview=False)
                        except:
                            pass
                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())
                finally:
                    await util.remove_potential_file(file_name_with_ext)
        if any(s in first_line for s in ('music', 'موسیقی', 'اهنگ', 'آهنگ')):
            # print(first_line)
            urls = event.raw_text.splitlines()
            urls.pop(0)
            for url in urls:
                # print(url)
                if url == '':
                    continue
                file_name_with_ext = ''
                trying_to_dl = await util.discreet_send(
                    event, "Julia is trying to download \"" + url +
                    "\".\nPlease wait ...", event.message, quiet)
                try:
                    if any(s in first_line for s in ('automatic', 'اتوماتیک')):
                        file_name_with_ext = await get_music(
                            url, cwd="./dls/" + str(uuid.uuid4()) + "/")
                    else:
                        file_name_with_ext = await get_music(
                            url,
                            tg_event=event,
                            cwd="./dls/" + str(uuid.uuid4()) + "/")
                    base_name = str(await
                                    os_aio.path.basename(file_name_with_ext))
                    trying_to_upload_msg = await util.discreet_send(
                        event, "Julia is trying to upload \"" + base_name +
                        "\".\nPlease wait ...", trying_to_dl, quiet)
                    sent_music = await borg.send_file(
                        event.chat,
                        file_name_with_ext,
                        reply_to=trying_to_upload_msg,
                        caption=base_name)
                except:
                    await event.reply("Julia encountered an exception. :(\n" +
                                      traceback.format_exc())
                finally:
                    await util.remove_potential_file(file_name_with_ext, event)
    p = re.compile(r'^Added to (.*) on Spotify: "(.*)" by (.*) https:.*$')
    m = p.match(event.raw_text)
    if m is not None:
        file_name_with_ext = ''
        try:
            # print(m.group(3)+" "+m.group(2)) #DBG
            file_name_with_ext = await get_music(m.group(3) + " " + m.group(2),
                                                 cwd="./dls/" +
                                                 str(uuid.uuid4()) + "/")
            base_name = str(await os_aio.path.basename(file_name_with_ext))
            sent_music = await borg.send_file(event.chat,
                                              file_name_with_ext,
                                              reply_to=event.message,
                                              caption=base_name)
        except:
            await event.reply("Julia encountered an exception. :(\n" +
                              traceback.format_exc())
        finally:
            await util.remove_potential_file(file_name_with_ext, event)
import sys
import os
from pytube import YouTube
#from pathlib import Path
#from unipath import Path

print("~" * 50)
print("             Auto Downloader")
print("        Created by : Sayak Naskar")
print("~" * 50)
print("1.Download text, pdf, html files .\n2.Download video from youtube .")
select = int(input("Enter Your choice :"))

if select == 1:
    url = input("Enter your URL to download mp3,pdf :")
    save_path = (input("Enter your path: "))
    print(wget.download(url))
    save = sys.path.append(save_path)
    #save = os.sys(save_path,down_file)
    #open(str(save), 'r')
    # print (file.read())
    with open(str(save)) as fileopen:
        read = fileopen.read()
        print(read)
    print("Your file :" + wget.detect_filename(url=url))
elif select == 2:
    url1 = input("Enter your URL to download your video :")
    YouTube(url1).streams.first().download()
    yt = YouTube(url1)
    yt.streams
Exemple #29
0
    def download(self, files_to_download=None, remove_extracted=False):
        start = time.time()

        dataset_dir = os.path.join(self.data_dir,
                                   '{}_dataset'.format(self.name))

        if not os.path.exists(dataset_dir):
            os.makedirs(dataset_dir)

        # dataset urls is a dict with keys (train, valid, test)
        # with values a list of files to be downloaded
        manifest_paths = []
        for set_type, urls in self.download_urls.items():
            set_dir = os.path.join(dataset_dir, set_type)

            if not os.path.exists(set_dir):
                os.makedirs(set_dir)

            set_wav_dir = os.path.join(set_dir, 'wav')

            if not os.path.exists(set_wav_dir):
                os.makedirs(set_wav_dir)

            set_txt_dir = os.path.join(set_dir, 'txt')

            if not os.path.exists(set_txt_dir):
                os.makedirs(set_txt_dir)

            downloads_dir = os.path.join(dataset_dir, "downloads")

            if not os.path.exists(downloads_dir):
                os.makedirs(downloads_dir)

            extracted_dir = os.path.join(downloads_dir, "extracted")

            for url in urls:

                if url is not None:
                    # check if we want to download this file
                    if files_to_download:
                        for f in files_to_download:
                            if url.find(f) != -1:
                                break
                        else:
                            print("Skipping url: {}".format(url))
                            continue

                    fname = wget.detect_filename(url)
                    name = os.path.splitext(fname)[0]
                    target_fname = os.path.join(downloads_dir, fname)

                    curr_extracted_dir = os.path.join(extracted_dir, name)

                    print('Downloading {}...'.format(fname))

                    if not os.path.exists(target_fname):
                        wget.download(url, target_fname)

                    print("Unpacking {}...".format(fname))

                    if not os.path.exists(curr_extracted_dir):
                        tar = tarfile.open(target_fname)
                        tar.extractall(curr_extracted_dir)
                        tar.close()

                    assert os.path.exists(
                        extracted_dir
                    ), "Archive {} was not properly uncompressed.".format(
                        fname)

                else:
                    print('No URL found. Skipping download.')
                    curr_extracted_dir = extracted_dir

                    assert os.path.exists(
                        extracted_dir), 'No folder found in {}'.format(
                            extracted_dir)

                print("Converting and extracting transcripts...")

                self._wav_txt_split(curr_extracted_dir, set_wav_dir,
                                    set_txt_dir, set_type)

                if remove_extracted:
                    shutil.rmtree(curr_extracted_dir)

            manifest_paths.append(
                self._create_manifest(
                    set_wav_dir,
                    os.path.join(self.data_dir,
                                 '{}.{}.csv'.format(self.name, set_type)),
                    prune=set_type.startswith('train')))

        print("Done. Time elapsed {:.2f}s".format(time.time() - start))

        return manifest_paths
    logger.info(
        "The target download folder for 64bit KBs doesn't exist, create it.")

target_download_folder_x86 = target_download_folder + "x86"
logger.info("The target download folder for 32bit KBs is " +
            target_download_folder_x86)

if not os.path.exists(target_download_folder_x86):
    os.makedirs(target_download_folder_x86)
    logger.info(
        "The target download folder for 32bit KBs doesn't exist, create it.")

tmp_file_name = ""
for item in download_links:
    if item[0] == "x64":
        tmp_file_name = wget.detect_filename(url=item[2])
        wget.download(url=item[2],
                      out=target_download_folder_x64 + os.sep + item[1] + "_" +
                      tmp_file_name)

    if item[0] == "x86":
        tmp_file_name = wget.detect_filename(url=item[2])
        wget.download(url=item[2],
                      out=target_download_folder_x86 + os.sep + item[1] + "_" +
                      tmp_file_name)

logger.info("Finish download progress.")
logger.info("##############################")

source_folder = target_download_folder_x86
target_folder = target_download_folder_x86 + "_" + "msp"
Exemple #31
0
try:
    import wget
except ImportError:
    get_ipython().system('pip install wget  # you may need pip3')
    import wget

# ---

# # Run Workflow

# ## Prepare Data

# In[6]:

train_data_url = "http://s3.amazonaws.com/verta-starter/spam.csv"
train_data_filename = wget.detect_filename(train_data_url)
if not os.path.isfile(train_data_filename):
    wget.download(train_data_url)

# In[7]:

raw_data = pd.read_csv(train_data_filename, delimiter=',', encoding='latin-1')

raw_data.head()

# In[8]:

# turn spam/ham to 0/1, and remove unnecessary columns
raw_data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)
raw_data.v1 = LabelEncoder().fit_transform(raw_data.v1)