def download(url, path, drive, fileName): ''' download() method is here for downloading the dataset or images in the form of a zip or whatever and extracts those files. :param url: :param path: path = 'sample_data' :param drive: :param fileName: :return: path to the extracted data ''' if fileName: # if the filename is given and not a null string then if not os.path.isfile(os.path.join(path, fileName)): print('----Downloading Data----') id = url.split('=')[len(url.split('=')) - 1] last_weight_file = drive.CreateFile({'id': id}) last_weight_file.GetContentFile(fileName) print('----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) else: print('File is already available.\n----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) if not fileName: fileName = wget.detect_filename(url=url) if not os.path.isfile(os.path.join(path, fileName)): print('----Downloading Data----') wget.download(url, 'sample_data') fileName = wget.detect_filename(url=url) print('----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts in the sample_data/PetImages/Cat & Dog # Extract all the contents of zip file in different directory zipObj.extractall(path) else: print('File is already available.\n----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) extractedTo = path print('----Extraction Done---- \npathToExtractedData: {}'. format(extractedTo)) return extractedTo
def download_logs(urls): for url in urls: filename = wget.detect_filename(url) new_filename = "logs/" + data.filename_from_url(url) print "Checking log " + new_filename # If the path exists, check if there's a more up to date version and, if so, remove the old and download the new... if os.path.exists(new_filename): r = requests.head(url) if "last-modified" in r.headers: url_time = r.headers['last-modified'] url_date = parsedate(url_time) file_time = datetime.datetime.fromtimestamp(os.path.getmtime(new_filename)) url_date = url_date.replace(tzinfo=None) file_time = file_time.replace(tzinfo=None) pytz.UTC.localize(url_date) pytz.UTC.localize(file_time) if url_date > file_time: os.remove(new_filename) download_log(url, new_filename) else: os.remove(new_filename) download_log(url, new_filename) # ... otherwise we just download it without any checks. else: download_log(url, new_filename)
def ymd_download_terra_data(year, month, day, str_time): "根据年月日下载指定日期的 Terra 数据" # Open terra data download sites based on the current day. import datetime, webbrowser, os, requests, wget s_day = datetime.datetime(year,month,day) days = s_day.timetuple().tm_yday site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/" MODS = ["MOD021KM", "MOD02HKM", "MOD02QKM", "MOD03"] os.chdir(r"D:\MODISPRO\MODIS") # 删除HDF文件 for file in os.listdir(): if file.find("hdf") > 0: os.remove(os.path.join(file)) for MOD in MODS: url = site_start+MOD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/" html = requests.get(url).text index = html.find(str_time) site_end = MOD+".A"+str(year)+str(days).zfill(0)+"."+str_time+".061."+html[(index+9):(index+22)]+".hdf" filename = wget.detect_filename(url+site_end) print("Downloading "+filename) wget.download(url+site_end) print("processing...") os.system(r"D:\MODISPRO\run.bat")
def prepare_dataset(url: str, out_path: Path): """ Downloads and unpacks single dataset. If dataset already unpacked does nothing. If dataset archive already downloaded, will only unpack it. Args: url (str): url where to download dataset out_path (Path): path where to place dataset """ path = expanduser(out_path) root_folder = Path(path) root_folder.mkdir(parents=True, exist_ok=True) file_name = wget.detect_filename(url) dataset_name = file_name.split(".")[0] out_path = root_folder / dataset_name if out_path.exists(): print(f'{out_path} exists, skipping') return out_path file_path = out_path.parent / file_name if not file_path.exists(): print(f'Downloading dataset from {url} to {file_path}') wget.download(url=url, out=str(file_path)) else: print(f'{file_path} exists, skipping') print(f'Unpacking {file_path} to {out_path}') shutil.unpack_archive(filename=str(file_path), extract_dir=str(out_path)) return out_path
def download_layout(force_download=False): ''' Download the PDF with the layout of the data and save it in the local folder. ''' url = 'ftp://ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Doc/IT_SIHSUS_1603.pdf' filename = wget.detect_filename(url) if not os.path.exists(filename) or force_download: wget.download(url, filename)
def download_zip(url, force_download=False, prefix=None): ''' Download TAB_SIH.zip and save in cache. Returns ------- local path to the downloaded file ''' if prefix != None: filename = '{}_'.format(prefix) + wget.detect_filename(url) print('filename', filename) else: filename = wget.detect_filename(url) local_file = os.path.join(PRODUCED_DATASETS, filename) if not os.path.exists(local_file) or force_download: wget.download(url, local_file) return local_file
def download(url, path, fileName): if fileName: # if the filename is given and not a null string then if not os.path.isfile(os.path.join(path, fileName)): print('----Downloading Data----') id = url.split('=')[len(url.split('=')) - 1] # last_weight_file = drive.CreateFile({'id': id}) # last_weight_file.GetContentFile(fileName) print('----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) else: print('File is already available.\n' '----Extracting the file----') path = os.path.join(r'C:\Users\zeeshan\PycharmProjects\PadIN 1.0\EduFUTURE', fileName.split('.')[0]) if not os.path.exists(path): os.makedirs(path) with ZipFile(path, 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) if not fileName: fileName = wget.detect_filename(url=url) if not os.path.isfile(os.path.join(path, fileName)): print('----Downloading Data----') wget.download(url, 'sample_data') fileName = wget.detect_filename(url=url) print('----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts in the sample_data/PetImages/Cat & Dog # Extract all the contents of zip file in different directory zipObj.extractall(path) else: print('File is already available.\n----Extracting the file----') with ZipFile(os.path.join(path, fileName), 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(path) extractedTo = path print('----Extraction Done---- \npathToExtractedData: {}'.format(extractedTo)) return extractedTo
def DownloadFile(URL, Destination="./download", ExpectedBytes=None, IsDestinationFolder=None): """ Download a file if not present, and make sure it's the right size. :param String URL: URL of the file you want to download. :param String Destination: Path of the file you want to store, it can be a. :param String Format: The format of the compressed file. """ if IsDestinationFolder is None: #Try to indicate from Destination if os.path.basename(Destination).find(".") >= 0: IsDestinationFolder = False else: IsDestinationFolder = True if IsDestinationFolder is True: if os.path.isdir(Destination): pass else: os.makedirs(Destination) Request = urllib.request.Request(URL, method="HEAD") Headers = dict(urllib.request.urlopen(Request).info().items()) if IsDestinationFolder: FilePath = os.path.join(Destination, wget.detect_filename(URL, '', Headers)) else: FilePath = wget.detect_filename(URL, Destination, Headers) if not os.path.exists(FilePath): FileName = wget.download(URL, Destination) else: FileName = FilePath StatInfo = os.stat(FileName) if ExpectedBytes is None or StatInfo.st_size == ExpectedBytes: print('Found and verified', FileName) else: print(StatInfo.st_size) raise FileExistsError( 'Failed to verify ' + FileName + '. File exists or corrupted. Can you get to it with a browser?') return FileName
def test_file_not_present_download_successful(self): urls = ['ftp://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz'] remove_dir() fetch_files_from_urls(urls, dir) files = os.listdir(dir) for url in urls: filename = detect_filename(url, None) if filename not in files: # file not found, self.assertTrue(False) self.assertTrue(True)
async def get_content(url, docname='tgvkbot.document', chrome_headers=True, rewrite_name=False, custom_ext=''): try: with aiohttp.ClientSession( headers=CHROME_HEADERS if chrome_headers else {}) as session: r = await session.request('GET', url) direct_url = str(r.url) tempdir = tempfile.gettempdir() filename_options = { 'out': docname } if rewrite_name else { 'default': docname } if direct_url != url: r.release() c = await session.request('GET', direct_url) file = wget.detect_filename(direct_url, headers=dict(c.headers), **filename_options) temppath = os.path.join(tempdir, file + custom_ext) with open(temppath, 'wb') as f: f.write(await c.read()) else: file = wget.detect_filename(direct_url, headers=dict(r.headers), **filename_options) temppath = os.path.join(tempdir, file + custom_ext) with open(temppath, 'wb') as f: f.write(await r.read()) content = open(temppath, 'rb') return { 'content': content, 'file_name': file, 'custom_ext': custom_ext, 'temp_path': tempdir } except Exception: return {'url': url, 'docname': docname}
def do_dl(self, inp): try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: pass else: ssl._create_default_https_context = _create_unverified_https_context name = wget.detect_filename(inp) wget.download(inp, '/Users/' + getpass.getuser() + '/Downloads/' + name)
def wget_episode(ep): import wget import os.path file_name = wget.detect_filename(ep['url']) if os.path.exists(file_name): print('... file already downloaded') return file_name file_name = wget.download(ep['url'], bar=wget.bar_adaptive) return file_name
def ymd_browser_aqua(year, month, day, str_time): "根据年月日通过浏览器下载指定日期的 Aqua 数据" # Open terra data download sites based on the current day. import datetime, webbrowser, os, requests, wget, pynput, time, modis from pynput.mouse import Button from pynput.keyboard import Key mouse = pynput.mouse.Controller() keyboard = pynput.keyboard.Controller() s_day = datetime.datetime(year, month, day) days = s_day.timetuple().tm_yday site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/" MYDS = ["MYD021KM", "MYD02HKM", "MYD02QKM", "MYD03"] os.chdir(r"D:\MODISPRO\MODIS") # 删除HDF文件 for file in os.listdir(): if file.find("hdf") > 0: os.remove(os.path.join(file)) for MYD in MYDS: url = site_start + MYD + "/" + str(year) + "/" + str( s_day.timetuple().tm_yday).zfill(3) + "/" html = requests.get(url).text index = html.find(str_time) site_end = MYD + ".A" + str(year) + str(days).zfill( 0) + "." + str_time + ".061." + html[(index + 9):(index + 22)] + ".hdf" filename = wget.detect_filename(url + site_end) print("Downloading " + filename) webbrowser.open(url + site_end) time.sleep(3) mouse.click(Button.left) modis.py33.closePage() print("----- Downloading -----") # 判断数据是否下载完毕,并处理 time.sleep(60) for i in range(0, 1000): time.sleep(10) flag = 0 files = os.listdir(r"D:\MODISPRO\MODIS") for file in files: if "hdf.part" in file: flag = 1 if flag == 0: print("processing...") os.chdir(r"D:\MODISPRO") os.system(r"D:\MODISPRO\run-rename.bat") return else: if i * 10 % 60 == 0: print("Download " + str(1 + i * 10 // 60) + " minite(s)...")
def download(self): if self.download_status == True: link = self.getLink() if not str(link).startswith("http"): url = str("http://" + link) else: url = str(link) info = { "file_name": wget.detect_filename(url=url) } print(f"[*] {info['file_name']} isimli dosya indiriliyor...") wget.download(url=url) print(f"\n[+] {info['file_name']} isimli dosya indirildi !")
def DownloadFile(URL, Destination = "./download", ExpectedBytes = None, IsDestinationFolder = None): """ Download a file if not present, and make sure it's the right size. :param String URL: URL of the file you want to download. :param String Destination: Path of the file you want to store, it can be a. :param String Format: The format of the compressed file. """ if IsDestinationFolder is None: #Try to indicate from Destination if os.path.basename(Destination).find(".") >= 0: IsDestinationFolder = False else: IsDestinationFolder = True if IsDestinationFolder is True: if os.path.isdir(Destination): pass else: os.makedirs(Destination) Request = urllib.request.Request(URL, method = "HEAD") Headers = dict(urllib.request.urlopen(Request).info().items()) if IsDestinationFolder: FilePath = os.path.join(Destination, wget.detect_filename(URL, '', Headers)) else: FilePath = wget.detect_filename(URL, Destination, Headers) if not os.path.exists(FilePath): FileName = wget.download(URL, Destination) else: FileName = FilePath StatInfo = os.stat(FileName) if ExpectedBytes is None or StatInfo.st_size == ExpectedBytes: print('Found and verified', FileName) else: print(StatInfo.st_size) raise FileExistsError( 'Failed to verify ' + FileName + '. File exists or corrupted. Can you get to it with a browser?') return FileName
def download_file(self, entry, link): entry_url = link.href original_filename = wget.detect_filename(entry_url) if original_filename == 'download.wget': extension = '' else: _, extension = os.path.splitext(original_filename) raw_filename = entry.title filename = filenames.clean_filename(raw_filename) + extension self.downloaded_episode_filenames.append(filename) if not os.path.isfile(filename): print("Downloading missing file [" + filename + "]") download_file(entry_url, filename) elif DEBUG: print("Skipping existing file [" + filename + "]")
def download(): cs = int(str(Options.curselection())[1:-2]) url = Searchresults[cs].get('Mirror_1') page = requests.get(url) soup = BeautifulSoup(page.text, 'html.parser') download_link = str(soup.find('a')) download_link = download_link[9:len(download_link) - 9] filename = wget.detect_filename(download_link) print(filename) rename = str( re.sub('[<>.:/\'",?*]', '', str(Searchresults[0].get("Title"))) + "." + str(Searchresults[0].get("Extension"))) wget.download(download_link, download_directory, bar=bar_progress) os.rename(download_directory + str(filename), str(download_directory + rename))
def ymd_download_aqua_data(year, month, day, str_time): "根据年月日下载指定日期的 Aqua 数据" # Open aqua data download sites based on the current day. import datetime, webbrowser, os, requests, wget s_day = datetime.datetime(year,month,day) days = s_day.timetuple().tm_yday site_start = "https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/" MYDS = ["MYD021KM", "MYD02HKM", "MYD02QKM", "MYD03"] for MYD in MYDS: #webbrowser.open(site_start+MYD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/") url = site_start+MYD+"/"+str(year)+"/"+str(s_day.timetuple().tm_yday).zfill(3)+"/" html = requests.get(url).text index = html.find(str_time) site_end = MYD+".A"+str(year)+str(days).zfill(0)+"."+str_time+".061."+html[(index+9):(index+22)]+".hdf" filename = wget.detect_filename(url+site_end) print("Downloading "+filename) wget.download(url+site_end)
def test_file_present_download_successful(self): urls = ['ftp://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz'] remove_dir() os.makedirs(dir) filenames = [] for url in urls: empty_file = detect_filename(url, None) filenames.append(filename_fix_existing(empty_file)) # create empty dummy files f = open(dir+"/"+empty_file, 'w') f.close() fetch_files_from_urls(urls, dir) files = os.listdir(dir) for filename in filenames: if filename not in files: # file not found, self.assertTrue(False) self.assertTrue(True)
def download(self, url, extractionPath, drive, fileName): if fileName != None: id = url.split('=')[len(url.split('=')) - 1] last_weight_file = drive.CreateFile({'id': id}) last_weight_file.GetContentFile(fileName) with ZipFile(fileName, 'r') as zipObj: # extracts the file in sample_data/examples-master/cpp & dcgan & imagenet zipObj.extractall(extractionPath) if fileName == None: wget.download(url, 'sample_data') fileName = wget.detect_filename(url=url) print(fileName) # Create a ZipFile Object and load sample.zip in it with ZipFile('sample_data/' + fileName, 'r') as zipObj: # extracts in the sample_data/PetImages/Cat & Dog # Extract all the contents of zip file in different directory zipObj.extractall(extractionPath)
def download(client, message): chat_id = message.from_user.id user_message = message.text username = message.from_user.username if "anonfiles.com" in user_message: req = requests.get(user_message) if req.status_code == 200: data = bs(req.text, 'html.parser') download_link = data.find('a', {'id': 'download-url'}).get('href') file_name = wget.detect_filename(download_link) reply = f'❤️Downloading **{file_name}**' downloading = app.send_message(chat_id, reply) wget.download(download_link) app.edit_message_text( chat_id, downloading.message_id, "✅ **Successfully Downloaded**. <i>Uploading File To Telegram</i>" ) app.send_document(chat_id, file_name, caption=file_name) app.delete_messages(chat_id, downloading.message_id) logs = f'''#Download @{username} Did Below Request File Name :- {file_name} Link :- {download_link}''' app.send_message(clown, logs) try: os.remove(file_name) except: pass else: app.send_message( chat_id, "**Invalid Link...** Kindly Check Before Sending it \n🌀 **If You Think Its A Bug, Feel Free To Message ** @MxClown" ) else: app.send_message( chat_id, "<b>I Guess You're Lost😮</b>\n<b>Type /Help To Know What I Can do :)</b>\n<b>Join @ClownConfigs For Future Updates.❤️</b>" )
async def _(event): first_line = "l" try: first_line = event.raw_text.lower().splitlines().pop(0) except: pass quiet = any(s in first_line for s in ("quiet", "ساکت", "آروم", "اروم")) if "ژاله" in first_line or "زاله" in first_line or "julia" in first_line: # print("Julia") global my_event my_event = event sender = await event.message.get_sender() if sender is not None and (event.sender.username == un for un in ("Arstar", "Untethered")): if any(s in first_line for s in ("laugh", "بخند")): await event.reply("😆") if any(s in first_line for s in ("you okay", "خوبی")): await event.reply("I know of no light. :p") if any(s in first_line for s in ("nice work", "thanks", "merci", "good job", "مرسی")): await event.reply("You're welcome. ❤️") # else: # else: if any(s in first_line for s in ("debug", "دیباگ")): db_msg = await event.reply("DEBUG") db_reply = await await_reply(await event.get_chat(), db_msg) print("YDebug: " + db_reply.raw_text) if any(s in first_line for s in ("hi", "hello", "hey", "yo", "greetings", "سلام", "هی", "یو!")): sender_name = "Unknown" if event.sender is not None: sender_name = getattr(event.sender, "first_name", "X") await event.reply("Julia is operational.\nGreetings, " + sender_name + "!") if any(s in first_line for s in ("upload", "اپلود", "آپلود")): urls = event.raw_text.splitlines() urls.pop(0) for url in urls: try: if url == "": continue url_name = wget.detect_filename(url) trying_to_dl_msg = await util.discreet_send( event, 'Julia is trying to download "' + url_name + '" from "' + url + '".\nPlease wait ...', event.message, quiet, ) d1 = wget.download(url, out="dls/", bar=None) try: trying_to_upload_msg = await util.discreet_send( event, 'Julia is trying to upload "' + url_name + '".\nPlease wait ...', trying_to_dl_msg, quiet, ) await borg.send_file( await event.get_chat(), d1, reply_to=trying_to_upload_msg, caption=(url_name), ) except: await event.reply( "Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(d1) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) if any(s in first_line for s in ("yt", "youtube", "یوتیوب")): urls = event.raw_text.splitlines() urls.pop(0) for url in urls: if url == "": continue file_name_with_ext = "" try: trying_to_dl = await util.discreet_send( event, 'Julia is trying to download "' + url + '".\nPlease wait ...', event.message, quiet, ) file_name = "dls/" + str(uuid.uuid4()) + "/" ydl_opts = { "quiet": True, "outtmpl": file_name + "%(playlist_title)s_%(title)s_%(format)s.%(ext)s", # 'dls/%(playlist_title)s_%(title)s_%(format)s_%(autonumber)s.%(ext)s' } with youtube_dl.YoutubeDL(ydl_opts) as ydl: extract_info_aio = aioify(ydl.extract_info) d2 = await extract_info_aio(url) file_name_with_ext = ( file_name + (await os_aio.listdir(file_name))[0]) trying_to_upload_msg = await util.discreet_send( event, 'Julia is trying to upload "' + d2["title"] + '".\nPlease wait ...', trying_to_dl, quiet, ) sent_video = await borg.send_file( await event.get_chat(), file_name_with_ext, reply_to=trying_to_upload_msg, caption=str(d2["title"]), ) try: full_caption = ( "Title: " + str(d2["title"]) + "\nFormat: " + str(d2["format"]) + "\nWidth: " + str(d2["width"]) + "\nHeight: " + str(d2["height"]) + "\nFPS: " + str(d2["fps"]) + "\nPlaylist: " + str(d2["playlist"]) + "\nLikes: " + str(d2["like_count"]) + "\nDislikes: " + str(d2["dislike_count"]) + "\nView Count: " + str(d2["view_count"]) + "\nUploader: " + str(d2["uploader"] + "\nWebpage Url: " + str(d2["webpage_url"]) + "\nDescription:\n" + str(d2["description"]))) await borg.send_message( await event.get_chat(), full_caption, sent_video, link_preview=False, ) except: pass except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext) if any(s in first_line for s in ("music", "موسیقی", "اهنگ", "آهنگ")): # print(first_line) urls = event.raw_text.splitlines() urls.pop(0) for url in urls: # print(url) if url == "": continue file_name_with_ext = "" trying_to_dl = await util.discreet_send( event, 'Julia is trying to download "' + url + '".\nPlease wait ...', event.message, quiet, ) try: if any(s in first_line for s in ("automatic", "اتوماتیک")): file_name_with_ext = await get_music( url, cwd="./dls/" + str(uuid.uuid4()) + "/") else: file_name_with_ext = await get_music( url, tg_event=event, cwd="./dls/" + str(uuid.uuid4()) + "/") base_name = str(await os_aio.path.basename(file_name_with_ext)) trying_to_upload_msg = await util.discreet_send( event, 'Julia is trying to upload "' + base_name + '".\nPlease wait ...', trying_to_dl, quiet, ) sent_music = await borg.send_file( await event.get_chat(), file_name_with_ext, reply_to=trying_to_upload_msg, caption=base_name, ) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext, event) p = re.compile(r'^Added to (.*) on Spotify: "(.*)" by (.*) https:.*$') m = p.match(event.raw_text) if m is not None: file_name_with_ext = "" try: # print(m.group(3)+" "+m.group(2)) #DBG file_name_with_ext = await get_music(m.group(3) + " " + m.group(2), cwd="./dls/" + str(uuid.uuid4()) + "/") base_name = str(await os_aio.path.basename(file_name_with_ext)) sent_music = await borg.send_file( await event.get_chat(), file_name_with_ext, reply_to=event.message, caption=base_name, ) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext, event)
url_substring = url_substring.replace('/', '_') if url_substring.endswith('_jpg'): url_substring = url_substring[:-4] if url_substring.endswith('jpg'): # ends with jpg but not as an file extension url_substring = url_substring[:-3] return url_substring + '.jpg' except: # If we fail while parsing this url, let's just use a random name return str(uuid4()) + '.jpg' if __name__ == '__main__': album_loc = f'./{datetime.now().strftime("%Y_%m_%d_%H_%M_%S")}' os.makedirs(album_loc) image_urls = get_background_art_urls() log.info(f"Downloading {len(image_urls)} images") count = 0 for url in image_urls: filename = wget.detect_filename(url) if filename == 'file': filename = create_filename(url) try: wget.download(url, f"{album_loc}/{filename}") count += 1 log.debug(f"Downloaded {count} files") except http_error.HTTPError: log.warn("Couldn't retrieve image file, skipping") log.debug(f"Failed url: {url}") log.info("Finished")
def downloadfromLink(url, name=None): try: dheaders = None headers = { 'user-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0', 'Accept-Language': 'en-US,en;q=0.9', } session = requests.Session() response = session.get(url, headers=headers) try: cookie = {'Cookie': response.headers['Set-Cookie']} headers.update(cookie) response = session.post(url, headers=headers) except: pass dllinks0 = re.findall( "https?://[a-zA-Z0-9._]+.[a-zA-Z0-9._]+/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+.mp4", str(response.content)) dllinks1 = re.findall( "https://[a-zA-Z0-9._]+.googleusercontent.com/[-a-zA-Z0-9@:%._\+~#=?&!$^*()-\[\]]+", str(response.content)) dllinks2 = re.findall( "vidnode.net/streaming.php[-a-zA-Z0-9@:%._\+~#=\?&!$^*()\-\[\]]+", str(response.content)) dllinks3 = re.findall( "/getlink.php\?f=https://[a-zA-Z0-9._]+.azmovie.to/[-a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\]]+", str(response.content)) # dllinks4 = re.findall("https://fmovies.space/[a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\] ]+", str(response.content)) # dllinks5 = re.findall("https://jawcloud.co/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) dllinks6 = re.findall( "https://www.okhatrimaza.art/file/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) dllinks7 = re.findall( "https://spaceshut.website/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) dllinks7 += re.findall( "https://freemoviewap2019.in/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) if len(dllinks0) + len(dllinks1) + len(dllinks2) + len(dllinks3) + len( dllinks6) + len(dllinks7) == 0: print("Can't Download. Could not found video.") exit() if len(dllinks2) >= 1: response = session.get("https://" + dllinks2[0]) dllinks2 = re.findall( "https://vidnode.net/download[a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\] ]+", str(response.content)) response = session.get(dllinks2[0]) dllinks2 = re.findall( "https://[a-zA-Z0-9._]+.cdnfile.info/[a-zA-Z0-9@:%._\+~#=?&!$^*()/\-\[\] ;]+", str(response.content)) dllinks2 = [line.replace('&', '&') for line in dllinks2] dllink = dllinks2[len(dllinks2) - 1] dllink = urlify(dllink) elif len(dllinks3) >= 1: dllink = "https://azm.to/" + dllinks3[len(dllinks3) - 1] response = session.get(dllink, headers=headers) print("Work in Progress!!!") exit() dllinks3 = re.findall( "https://[a-zA-Z0-9._]+.azmovie.to/[-a-zA-Z0-9@:%._\+~#=?&!$^*()\-\[\]]+", str(response.content)) dllink = dllinks3[len(dllinks3) - 1] elif len(dllinks6) >= 1: dllink = dllinks6[len(dllinks6) - 1] response = session.get(dllink) dllinks6 = re.findall( "https://www.okhatrimaza.art/server/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) dllink = dllinks6[len(dllinks6) - 1] response = session.get(dllink, headers=headers) dllinks6 = re.findall( "https://www.okhatrimaza.art/download/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) referer = { "Referer": dllink, } headers.update(referer) dllink = dllinks6[len(dllinks6) - 1] dheaders = tuple(headers.items()) elif len(dllinks7) >= 1: dllink = dllinks7[len(dllinks7) - 1] while (".mp4" != dllink[-4:]): response = session.get(dllink, headers=headers) dllinks7 = re.findall( "https://spaceshut.website/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+", str(response.content)) dllinks7 += re.findall( "https?://[a-zA-Z0-9._]+.[a-zA-Z0-9._]+/[a-zA-Z0-9@;:%._\+~#=?&!$^*()\-\[\]/ ]+.mp4", str(response.content)) dllink = dllinks7[len(dllinks7) - 1] elif len(dllinks1) >= 1: dllink = dllinks1[len(dllinks1) - 1] else: dllink = dllinks0[len(dllinks0) - 1] if wget.detect_filename(dllink): name = wget.detect_filename(dllink) elif name == None: name = 'movie-' + str(random.randint(1, 100000)) path = dirpath + '/' + name if name: print("Downloading: ", name) wget.download(dllink, path, headers=dheaders) print("\nFinished!!!") except Exception as exception: print("Error:", exception.__class__.__name__) print("Can't Download. Something goes wrong.")
def filter_threads(threads, keywords): try: logger.debug('Filtering threads.') logger.info('threads: ' + str(threads)) threads_filtered = {} thread_count = len(threads) thread_loops = 0 for key in threads: thread_loops += 1 logger.debug('key: ' + key) logger.info('Thread #' + str(thread_loops) + ' of ' + str(thread_count)) posts = threads[key] logger.debug('posts: ' + str(posts)) found_list = [] post_count = len(posts) post_loops = 0 for post in posts: post_loops += 1 logger.info('Post #' + str(post_loops) + ' of ' + str(post_count)) logger.debug('post: ' + str(post)) word_count = len(keywords) word_loops = 0 for word in keywords: word_loops += 1 logger.debug('Word #' + str(word_loops) + ' of ' + str(word_count)) #logger.debug('word: ' + word) #logger.debug('post.lower(): ' + post.lower()) logger.debug('post: ' + str(post)) if word in post['post'].lower(): #logger.debug('FOUND: ' + word) passed_excluded = True for excluded in excluded_list: if excluded in post['post'].lower(): passed_excluded = False logger.debug('Found excluded word: ' + excluded) logger.debug('Excluding post: ' + str(post)) if passed_excluded == True: entry = word + '|' + post['post'].lower() if 'file' in post: entry = entry + '|' + post['file'] logger.info('Downloading attachment.') file_name = wget.detect_filename( url=post['file']) logger.debug('file_name: ' + file_name) if not os.path.isfile(download_dir + file_name): dl_file = wget.download( post['file'], out=download_dir.rstrip('/')) logger.info('Successful download: ' + dl_file) found_list.append(entry) if len(found_list) > 0: threads_filtered[key] = found_list return threads_filtered except Exception: logger.exception('Exception while filtering threads.') raise
def filter_threads(thread_data): threads_filtered = {} try: logger.debug('Filtering threads.') #threads = thread_archive #thread_count = len(threads) thread_count = len(thread_data) thread_loops = 0 #for key in threads: for key in thread_data: thread_loops += 1 logger.info('Thread #' + str(thread_loops) + ' of ' + str(thread_count)) #posts = threads[key] posts = thread_data[key] logger.debug('posts: ' + str(posts)) found_list = [] post_count = len(posts) post_loops = 0 for post in posts: post_loops += 1 logger.info('Post #' + str(post_loops) + ' of ' + str(post_count)) word_count = len(self.keyword_list) word_loops = 0 for word in self.keyword_list: word_loops += 1 logger.debug('Word #' + str(word_loops) + ' of ' + str(word_count)) #word_full = ' ' + word + ' ' word = word.lower() if word in post['post'].lower(): #if word_full in post['post'].lower(): #logger.debug('FOUND: ' + word) passed_excluded = True for excluded in self.excluded_list: #excluded_full = ' ' + excluded + ' ' excluded = excluded.lower() if excluded in post['post'].lower(): #if excluded_full in post['post'].lower(): passed_excluded = False logger.debug('Found excluded word: ' + excluded) logger.debug('Excluding post: ' + str(post)) if passed_excluded == True: entry = word + '|' + post['post'].lower() if 'file' in post: entry = entry + '|' + post['file'] logger.info('Downloading attachment.') file_name = wget.detect_filename( url=post['file']) logger.debug('file_name: ' + file_name) if not os.path.isfile( download_directory + file_name): dl_file = wget.download( post['file'], out=download_directory.rstrip( '/')) logger.debug( 'Successful download: ' + dl_file) found_list.append(entry) if len(found_list) > 0: threads_filtered[key] = found_list #return threads_filtered except Exception as e: logger.exception('Exception while filtering threads.') logger.exception(e) #raise finally: return threads_filtered
async def _(event): first_line = "l" try: first_line = event.raw_text.lower().splitlines().pop(0) except: pass quiet = any(s in first_line for s in ('quiet', 'ساکت', 'آروم', 'اروم')) if ('ژاله' in first_line or 'زاله' in first_line or 'julia' in first_line): # print("Julia") global my_event my_event = event if event.sender is not None and ( (event.sender).is_self or (event.sender).username == "Orphicality"): if any(s in first_line for s in ('laugh', 'بخند')): await event.reply('😆') if any(s in first_line for s in ('you okay', 'خوبی')): await event.reply('I know of no light. :p') if any(s in first_line for s in ('nice work', 'thanks', 'merci', 'good job', 'مرسی')): await event.reply("You're welcome. ❤️") # else: # else: if any(s in first_line for s in ('debug', 'دیباگ')): db_msg = await event.reply('DEBUG') db_reply = await await_reply(event.chat, db_msg) print("YDebug: " + db_reply.raw_text) if any(s in first_line for s in ('hi', 'hello', 'hey', 'yo', 'greetings', 'سلام', 'هی', 'یو!')): sender_name = "Unknown" if event.sender is not None: sender_name = (event.sender).first_name await event.reply("Julia is operational.\nGreetings, " + sender_name + "!") if any(s in first_line for s in ('upload', 'اپلود', 'آپلود')): urls = event.raw_text.splitlines() urls.pop(0) for url in urls: try: if url == '': continue url_name = wget.detect_filename(url) trying_to_dl_msg = await util.discreet_send( event, "Julia is trying to download \"" + url_name + "\" from \"" + url + "\".\nPlease wait ...", event.message, quiet) d1 = wget.download(url, out="dls/", bar=None) try: trying_to_upload_msg = await util.discreet_send( event, "Julia is trying to upload \"" + url_name + "\".\nPlease wait ...", trying_to_dl_msg, quiet) await borg.send_file(event.chat, d1, reply_to=trying_to_upload_msg, caption=(url_name)) except: await event.reply( "Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(d1) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) if any(s in first_line for s in ('yt', 'youtube', 'یوتیوب')): urls = event.raw_text.splitlines() urls.pop(0) for url in urls: if url == '': continue file_name_with_ext = "" try: trying_to_dl = await util.discreet_send( event, "Julia is trying to download \"" + url + "\".\nPlease wait ...", event.message, quiet) file_name = 'dls/' + str(uuid.uuid4()) + '/' ydl_opts = { 'quiet': True, 'outtmpl': file_name + '%(playlist_title)s_%(title)s_%(format)s.%(ext)s' # 'dls/%(playlist_title)s_%(title)s_%(format)s_%(autonumber)s.%(ext)s' } with youtube_dl.YoutubeDL(ydl_opts) as ydl: extract_info_aio = aioify(ydl.extract_info) d2 = await extract_info_aio(url) file_name_with_ext = file_name + ( await os_aio.listdir(file_name))[0] trying_to_upload_msg = await util.discreet_send( event, "Julia is trying to upload \"" + d2['title'] + "\".\nPlease wait ...", trying_to_dl, quiet) sent_video = await borg.send_file( event.chat, file_name_with_ext, reply_to=trying_to_upload_msg, caption=str(d2['title'])) try: full_caption = "Title: " + str( d2['title']) + "\nFormat: " + str( d2['format']) + "\nWidth: " + str( d2['width']) + "\nHeight: " + str( d2['height']) + "\nFPS: " + str( d2['fps'] ) + "\nPlaylist: " + str( d2['playlist'] ) + "\nLikes: " + str( d2['like_count'] ) + "\nDislikes: " + str( d2['dislike_count'] ) + "\nView Count: " + str( d2['view_count'] ) + "\nUploader: " + str( d2['uploader'] + "\nWebpage Url: " + str(d2['webpage_url']) + "\nDescription:\n" + str(d2['description'])) await borg.send_message(event.chat, full_caption, sent_video, link_preview=False) except: pass except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext) if any(s in first_line for s in ('music', 'موسیقی', 'اهنگ', 'آهنگ')): # print(first_line) urls = event.raw_text.splitlines() urls.pop(0) for url in urls: # print(url) if url == '': continue file_name_with_ext = '' trying_to_dl = await util.discreet_send( event, "Julia is trying to download \"" + url + "\".\nPlease wait ...", event.message, quiet) try: if any(s in first_line for s in ('automatic', 'اتوماتیک')): file_name_with_ext = await get_music( url, cwd="./dls/" + str(uuid.uuid4()) + "/") else: file_name_with_ext = await get_music( url, tg_event=event, cwd="./dls/" + str(uuid.uuid4()) + "/") base_name = str(await os_aio.path.basename(file_name_with_ext)) trying_to_upload_msg = await util.discreet_send( event, "Julia is trying to upload \"" + base_name + "\".\nPlease wait ...", trying_to_dl, quiet) sent_music = await borg.send_file( event.chat, file_name_with_ext, reply_to=trying_to_upload_msg, caption=base_name) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext, event) p = re.compile(r'^Added to (.*) on Spotify: "(.*)" by (.*) https:.*$') m = p.match(event.raw_text) if m is not None: file_name_with_ext = '' try: # print(m.group(3)+" "+m.group(2)) #DBG file_name_with_ext = await get_music(m.group(3) + " " + m.group(2), cwd="./dls/" + str(uuid.uuid4()) + "/") base_name = str(await os_aio.path.basename(file_name_with_ext)) sent_music = await borg.send_file(event.chat, file_name_with_ext, reply_to=event.message, caption=base_name) except: await event.reply("Julia encountered an exception. :(\n" + traceback.format_exc()) finally: await util.remove_potential_file(file_name_with_ext, event)
import sys import os from pytube import YouTube #from pathlib import Path #from unipath import Path print("~" * 50) print(" Auto Downloader") print(" Created by : Sayak Naskar") print("~" * 50) print("1.Download text, pdf, html files .\n2.Download video from youtube .") select = int(input("Enter Your choice :")) if select == 1: url = input("Enter your URL to download mp3,pdf :") save_path = (input("Enter your path: ")) print(wget.download(url)) save = sys.path.append(save_path) #save = os.sys(save_path,down_file) #open(str(save), 'r') # print (file.read()) with open(str(save)) as fileopen: read = fileopen.read() print(read) print("Your file :" + wget.detect_filename(url=url)) elif select == 2: url1 = input("Enter your URL to download your video :") YouTube(url1).streams.first().download() yt = YouTube(url1) yt.streams
def download(self, files_to_download=None, remove_extracted=False): start = time.time() dataset_dir = os.path.join(self.data_dir, '{}_dataset'.format(self.name)) if not os.path.exists(dataset_dir): os.makedirs(dataset_dir) # dataset urls is a dict with keys (train, valid, test) # with values a list of files to be downloaded manifest_paths = [] for set_type, urls in self.download_urls.items(): set_dir = os.path.join(dataset_dir, set_type) if not os.path.exists(set_dir): os.makedirs(set_dir) set_wav_dir = os.path.join(set_dir, 'wav') if not os.path.exists(set_wav_dir): os.makedirs(set_wav_dir) set_txt_dir = os.path.join(set_dir, 'txt') if not os.path.exists(set_txt_dir): os.makedirs(set_txt_dir) downloads_dir = os.path.join(dataset_dir, "downloads") if not os.path.exists(downloads_dir): os.makedirs(downloads_dir) extracted_dir = os.path.join(downloads_dir, "extracted") for url in urls: if url is not None: # check if we want to download this file if files_to_download: for f in files_to_download: if url.find(f) != -1: break else: print("Skipping url: {}".format(url)) continue fname = wget.detect_filename(url) name = os.path.splitext(fname)[0] target_fname = os.path.join(downloads_dir, fname) curr_extracted_dir = os.path.join(extracted_dir, name) print('Downloading {}...'.format(fname)) if not os.path.exists(target_fname): wget.download(url, target_fname) print("Unpacking {}...".format(fname)) if not os.path.exists(curr_extracted_dir): tar = tarfile.open(target_fname) tar.extractall(curr_extracted_dir) tar.close() assert os.path.exists( extracted_dir ), "Archive {} was not properly uncompressed.".format( fname) else: print('No URL found. Skipping download.') curr_extracted_dir = extracted_dir assert os.path.exists( extracted_dir), 'No folder found in {}'.format( extracted_dir) print("Converting and extracting transcripts...") self._wav_txt_split(curr_extracted_dir, set_wav_dir, set_txt_dir, set_type) if remove_extracted: shutil.rmtree(curr_extracted_dir) manifest_paths.append( self._create_manifest( set_wav_dir, os.path.join(self.data_dir, '{}.{}.csv'.format(self.name, set_type)), prune=set_type.startswith('train'))) print("Done. Time elapsed {:.2f}s".format(time.time() - start)) return manifest_paths
logger.info( "The target download folder for 64bit KBs doesn't exist, create it.") target_download_folder_x86 = target_download_folder + "x86" logger.info("The target download folder for 32bit KBs is " + target_download_folder_x86) if not os.path.exists(target_download_folder_x86): os.makedirs(target_download_folder_x86) logger.info( "The target download folder for 32bit KBs doesn't exist, create it.") tmp_file_name = "" for item in download_links: if item[0] == "x64": tmp_file_name = wget.detect_filename(url=item[2]) wget.download(url=item[2], out=target_download_folder_x64 + os.sep + item[1] + "_" + tmp_file_name) if item[0] == "x86": tmp_file_name = wget.detect_filename(url=item[2]) wget.download(url=item[2], out=target_download_folder_x86 + os.sep + item[1] + "_" + tmp_file_name) logger.info("Finish download progress.") logger.info("##############################") source_folder = target_download_folder_x86 target_folder = target_download_folder_x86 + "_" + "msp"
try: import wget except ImportError: get_ipython().system('pip install wget # you may need pip3') import wget # --- # # Run Workflow # ## Prepare Data # In[6]: train_data_url = "http://s3.amazonaws.com/verta-starter/spam.csv" train_data_filename = wget.detect_filename(train_data_url) if not os.path.isfile(train_data_filename): wget.download(train_data_url) # In[7]: raw_data = pd.read_csv(train_data_filename, delimiter=',', encoding='latin-1') raw_data.head() # In[8]: # turn spam/ham to 0/1, and remove unnecessary columns raw_data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True) raw_data.v1 = LabelEncoder().fit_transform(raw_data.v1)