def get_s3(access_key, secret_key, server, bucket, key, dest_path): """Downloads a file from a bucket""" session = boto3.session.Session() s3_client = session.client( service_name='s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key, endpoint_url=server, ) try: url = s3_client.generate_presigned_url('get_object', { 'Bucket': bucket, 'Key': key }) LOGGER.info('generated download link %s', url) download_file(url, dest_path).dwnl() except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": print("The object does not exist.") else: raise
def transferFromUrl(self, uri, destPath, user=None, password=None): try: download_file(url=uri, file=destPath, user=user, passwd=password).dwnl() return destPath except Exception as e: self.retry(exc=e, coutdown=30**self.request.retries)
def download_ffmpeg(out=os.getcwd()): platform, arch = detect_platform() if platform.lower() == "windows": if arch.lower() == '32bit': ffmpeg_url = "https://ffmpeg.zeranoe.com/builds/win32/static/ffmpeg-latest-win32-static.zip" elif arch.lower() == '64bit': ffmpeg_url = "https://ffmpeg.zeranoe.com/builds/win64/static/ffmpeg-latest-win64-static.zip" ffmpeg = download_file(url=ffmpeg_url, out=out) logger.info("%s downloaded" % ffmpeg) #unzip_without_overwrite(src_path=ffmpeg, dst_dir=out) with zipfile.ZipFile(ffmpeg, 'r') as zip_ref: # zip_ref.extractall(out) for file in zip_ref.filelist: if not os.path.exists(file.filename): zip_ref.extract(file, out) if file.filename.endswith("ffmpeg.exe") and (not file.is_dir()) and int(file.file_size) > 0: ffmpeg_binary = file.filename break elif platform.lower() == "linux": if arch.lower() == '32bit': ffmpeg_url = "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz" elif arch.lower() == '64bit': ffmpeg_url = "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz" ffmpeg = download_file(url=ffmpeg_url, out=out) logger.info("%s downloaded" % ffmpeg) with contextlib.closing(lzma.LZMAFile(ffmpeg)) as xz: with tarfile.open(fileobj=xz) as f: # f.extractall(out) for member in f.members: if not os.path.exists(member.name): f.extractfile(member) if member.name.endswith('ffmpeg') and int(member.size) > 0 and int(member.mode) == 493: ffmpeg_binary = member.name break elif platform.lower() == "darwin": ffmpeg_url = "https://ffmpeg.zeranoe.com/builds/macos64/static/ffmpeg-latest-macos64-static.zip" ffmpeg = download_file(url=ffmpeg_url, out=out) logger.info("%s downloaded" % ffmpeg) #unzip_without_overwrite(src_path=ffmpeg, dst_dir=out) with zipfile.ZipFile(ffmpeg, 'r') as zip_ref: # zip_ref.extractall(out) for file in zip_ref.filelist: if not os.path.exists(file.filename): zip_ref.extract(file, out) if file.filename.endswith("ffmpeg") and (not file.is_dir()) and int(file.file_size) > 0: ffmpeg_binary = file.filename break else: ffmpeg_url = False logger.error("Unsupported system") return False filesize = os.path.getsize(ffmpeg_binary) logger.info("ffmpeg location on [{path}], size = [{size}]".format( path=ffmpeg_binary, size=filesize)) return ffmpeg_binary
def get_wise(ra, dec, band): mission = 'wise' dataset = 'allwise' table = 'p3am_cdd' successful = False while not successful: try: results = Ibe.query_region(coord.SkyCoord(ra, dec, unit=(u.deg, u.deg), frame='icrs'), mission=mission, dataset=dataset, table=table) successful = True except requests.exceptions.ConnectionError: print 'Connection failed, retrying' sleep(10) url = 'http://irsa.ipac.caltech.edu/ibe/data/' + mission + '/' + dataset + '/' + table + '/' params = { 'coadd_id': results[results['band'] == band]['coadd_id'][0], 'band': band } params['coaddgrp'] = params['coadd_id'][:2] params['coadd_ra'] = params['coadd_id'][:4] path = str.format( '{coaddgrp:s}/{coadd_ra:s}/{coadd_id:s}/{coadd_id:s}-w{band:1d}-int-3.fits', **params) outname = path.split('/')[-1] download_file(url + path, outname) return outname
def start_yield(category: str): download_file( 'https://raw.githubusercontent.com/v2fly/domain-list-community/master/data/' + category ) move(category, category + '.ori') with open(category, 'w', encoding='utf-8') as f: urls: List = get_included_urls(category + '.ori', f) os.remove(category + '.ori') os.makedirs(category + '.d', exist_ok=False) os.chdir(category + '.d') results = ThreadPool(8).imap_unordered(download_file, urls) for path in results: pass
def main(self, ids): ''' 1. 下载页面 2. 页面解析 3. 扩链以(歌曲也以及下载连接) 4. 调用下载器下载歌曲 :return: ''' urls = [] for id in ids: # urls.append(self.baseURL+id) urls.append(id) self.driver = webdriver.Chrome() self.driver.set_window_size(1200, 800) for url in urls: try: self.get_songPageUrl(url) except: pass self.driver.close() self.driver.__exit__() print(f"获取到{self.songPage.get_urls_len()}个歌曲") # url="http://node.kg.qq.com/play?s=zSl-ohzGH3_1Uz1W&g_f=personal" while self.songPage.has_new_url(): try: url = self.songPage.get_new_url() songPage_html = self.downloadHTML.download(url) musicInfo = self.songPageParse.parse(songPage_html) self.downloadMusic.add_new_url(json.dumps(musicInfo)) sleep(2) except: pass print(f"获得{self.downloadMusic.get_urls_len()}个下载连接") while self.downloadMusic.has_new_url(): try: itme = json.loads(self.downloadMusic.get_new_url()) url = itme["audio_url"] fileName = self.path + str(itme["singer_name"]) + "-" + str( itme["music_name"]) + ".m4a" download_file(url, fileName) except: pass print("End")
def fetch_hubble_image_by_id(image_id): hubble_image_url = "http://hubblesite.org/api/v3/image/{}".format(image_id) hubble_image_response = requests.get(hubble_image_url) hubble_image_response.raise_for_status() hubble_image_json_content = hubble_image_response.json() image_versions_description = hubble_image_json_content["image_files"] image_best_url = "http:{}".format( image_versions_description[-1]["file_url"]) image_file_extension = os.path.splitext(image_best_url)[1] image_name = hubble_image_json_content["name"] image_name = re.sub(r"[^\w\s\-\(\)]", "", image_name).strip() file_name = Path("images") / "{}{}".format(image_name, image_file_extension) download_file(image_best_url, file_name)
def get_panstarrs(ra, dec, psband): page = requests.get( 'http://ps1images.stsci.edu/cgi-bin/ps1filenames.py?ra=%f&dec=%f' % (ra, dec), verify=False) print page.status_code print page.headers['content-type'] lines = page.text.split('\n') downloads = [] for l in lines[1:]: bits = l.split() if bits[4] in psband or psband == '*': download_file('http://ps1images.stsci.edu/' + bits[7], bits[8]) downloads.append(bits[8]) return downloads
def get_pic(self): time_start3 = time.time() src_list = {} #源文件与原地址集合 for url in self.ablum_url(): src = str(self.my_soup(url).findAll('img', attrs={'id': 'imageview'})) name = re.search(r'alt="(.*?)" id', src).group(1) rule = re.compile(r':|:|/') pic_name = re.sub(rule, ' ', name) dir_name = re.sub('\((.*)\)', '', pic_name) file_path = self.base_path + '/' + dir_name if not os.path.exists(file_path): #创建目录 os.makedirs(file_path) file_name = file_path + '/' + pic_name + '.jpg' pic_src = re.search(r'src="(.*?)"', src).group(1) src_list[pic_name] = pic_src print('开始下载图片: %s ' % pic_name) download_file(pic_src, file_name, headers) print('\n 下载图片耗时:%.2f s\n' % (time.time() - time_start3)) print('----------')
def run(self): import os while not self.exit.is_set(): if len(self.L_toDownload) > 0: surl = self.L_toDownload.pop() self.L_inDownload.append(surl) tar_file = surl.split('/')[-1] # e.g. .../L769079_SB020_uv.MS_daf24388.tar ms_file = surl.split('/')[-1].split('.MS')[0]+'.MS' # e.g. .../L769079_SB020_uv.MS if 'psnc.pl' in surl: url = 'https://lta-download.lofar.psnc.pl/lofigrid/SRMFifoGet.py?surl=%s' % surl LTA_site = 'PL' elif 'sara.nl' in surl: url = 'https://lofar-download.grid.surfsara.nl/lofigrid/SRMFifoGet.py?surl=%s' % surl LTA_site = 'NL' elif 'juelich.de' in surl: url = 'https://lofar-download.fz-juelich.de/webserver-lofar/SRMFifoGet.py?surl=%s' % surl LTA_site = 'DE' else: print('ERROR: unknown archive for %s...' % surl) sys.exit() print("Downloader -- Download: %s (from: %s) " % (tar_file, LTA_site)) # loop until the sanity check on the downloaded MS is ok while True: download_file(url, tar_file, login, password) os.system('tar xf %s' % tar_file) print(tar_file) try: t = tables.table(ms_file, ack=False) break except: print('ERROR opening %s, probably corrupted - redownload it' % ms_file) #os.system('rm -r %s %s' % (tar_file, ms_file)) os.system('rm -r %s' % tar_file) self.L_inDownload.remove(surl) self.L_Downloaded.append(surl) time.sleep(2)
def download_category(url, dest): try: os.makedirs(dest, exist_ok=True) html_str = requests.get(url).text html = BeautifulSoup(html_str, 'html.parser') result_table = html.find(id='subject-table') if result_table is None: return for link_el in tqdm(result_table.find_all('a'), desc=url): link_str_relative = link_el['href'] if not _check_is_valid_file_url(link_str_relative): continue link_str_absolute = url + '/' + link_str_relative name = link_el.string download_file(link_str_absolute, os.path.join(dest, name)) except: print('Failed at category', url) raise
def get_pic(self): time_start3 = time.time() src_list = {} #源文件与原地址集合 for url in self.ablum_url(): src = str( self.my_soup(url).findAll('img', attrs={'id': 'imageview'})) name = re.search(r'alt="(.*?)" id', src).group(1) rule = re.compile(r':|:|/') pic_name = re.sub(rule, ' ', name) dir_name = re.sub('\((.*)\)', '', pic_name) file_path = self.base_path + '/' + dir_name if not os.path.exists(file_path): #创建目录 os.makedirs(file_path) file_name = file_path + '/' + pic_name + '.jpg' pic_src = re.search(r'src="(.*?)"', src).group(1) src_list[pic_name] = pic_src print('开始下载图片: %s ' % pic_name) download_file(pic_src, file_name, headers) print('\n 下载图片耗时:%.2f s\n' % (time.time() - time_start3)) print('----------')
def download_dataset(server,root,workdir='.'): print server+root while True: try: print 'Downloading index page',server+root page=requests.get(server+root,verify=False,timeout=60) except (requests.exceptions.ConnectionError,requests.exceptions.Timeout,requests.exceptions.ReadTimeout): print 'Connection error! sleeping 30 seconds before retry...' sleep(30) else: break print page.status_code if page.status_code!=200: print page.headers return False print page.headers['content-type'] tree=html.fromstring(page.text) row = tree.xpath('//a') files=[] urls=[] for r in row: if 'title' in r.attrib and 'Download' in r.attrib['title'] and 'step1' not in r.attrib['download']: files.append(r.attrib['download']) urls.append(r.attrib['href'].replace('../..','')) if len(files)<24: print 'There should be >=24 files but there are only %s! Check SARA manually.'%len(files) return False else: print 'Downloading',len(files),'distinct files' for f,u in zip(files,urls): if os.path.isfile(workdir+'/'+f): print 'File',f,'already exists, skipping' else: print 'Downloading',f url=server+u print url filename=workdir+'/'+f download_file(url,filename) return True
def download(self): ''' ''' while True: job = self.q_job.get() if self.lock: self.running += 1 try: if VERBOSE: # job[0] - url # job[1] - file to save res = download_file(job[0], job[1], needs_report=True) else: res = download_file(job[0], job[1], needs_report=False) except Exception: pass self.q_done.put((job, res)) if self.lock: self.running -= 1 self.q_job.task_done() print(job[1])
def download_segment(segment_path): """ Function to download the segment""" #segment_url = config_cdash.CONTENT_SERVER + segment_path if "Big" in segment_path: segment_url = config_cdash.CONTENT_SERVER + config_cdash.SERVER[0]+ segment_path elif "Elephants" in segment_path: segment_url = config_cdash.CONTENT_SERVER + config_cdash.SERVER[1]+segment_path elif "OfForest" in segment_path: segment_url = config_cdash.CONTENT_SERVER + config_cdash.SERVER[2]+segment_path elif "Tears" in segment_path: segment_url = config_cdash.CONTENT_SERVER + config_cdash.SERVER[3]+segment_path local_filepath = get_segment_local_path(segment_path) return download_file(segment_url, local_filepath)
def get_first(ra, dec): url = "http://archive.stsci.edu/" page = requests.get( url + "vlafirst/search.php?RA=%.7f&DEC=%.6f&Radius=30.0&action=Search" % (ra, dec), verify=False) print page.status_code tree = html.fromstring(page.text) table = tree.xpath('//tbody') links = [] dists = [] for row in table[0].getchildren(): td = row.getchildren() links.append(td[0].getchildren()[0].attrib['href']) dists.append(float(td[8].text)) index = np.argmin(dists) path = links[index] outname = path.split('/')[-1] download_file(url + path, outname) return outname
# Import relevant libraries import numpy as np import pandas as pd # to deal with the dataset import plotly.express as px #to plot with beauty from download_file import download_file import json ## Get around pandas freezing when opening the file url_name = 'https://base-covid19.pt/export3.json' output_file = 'export3.json' download_file('https://base-covid19.pt/export3.json', output_file=output_file) ## Reads the json brute force data = pd.read_json(output_file) ## Time columns inserted as dictionaries col_names_to_time = ["signingDateParsed", "created", "updated"] for col_name in col_names_to_time: data[col_name] = data[col_name].apply(pd.Series) data[col_name] = pd.to_datetime(data[col_name]) ## Time columns with easy to interpret datetime data["publicationDate"] = pd.to_datetime(data["publicationDate"]) data["signingDate"] = pd.to_datetime(data["signingDate"]) #print(data[["signingDateParsed", "created", "updated"]].head())
def download_pic(prefix_cos_url, file_name, dir_path): file_path = download_file(prefix_cos_url + file_name, file_name, dir_path) return file_path
if not os.path.exists(r"settings.json"): json_config = open(r"settings.json", 'w') json.dump(json.load(r"example_settings.json"), json_config) json_config.close() print('Please, configure settings.json according to the instructions') exit(0) const = json.load(open(r"settings.json")) version = getting_versions(const['request_url'], const['selector']) if version != 'None': for distro in ['win64', 'osx', 'linux64']: file_path, file_name = download_file(const['download_url'], const['path'], const['username'], const['token'], version, 'alpha', distro) # 1)Аунтификация в ЯД и загрузка на диск В ВИДЕ ФУНКЦИИ. СЕЙЧАС ТЕСТИРОВАНИЕ!!! # TODO 2)Аунтификация в МЕГА и загрузка на диск # TODO 3)Проверка на наличие новых версий игры # 4)Конфиги в виде файлов # 5)Проверка на сбои и наличие УЖЕ скачанных версий # 6)Проверка на наличие файлов на диске # 7)Проверка на целостность файлов(возможно) # TODO 8)Скомпилировать программу в ехе
def download_segment(segment_path): """ Function to download the segment""" segment_url = config_cdash.CONTENT_SERVER + segment_path segment_filename = segment_path.replace('/', '-') local_filepath = os.path.join(config_cdash.VIDEO_FOLDER, segment_filename) return download_file(segment_url, local_filepath)
files.append(r.attrib['download']) urls.append(r.attrib['href'].replace('../..','')) if len(files)<25: print 'There should be 25 files but there are only %s! Check SARA manually.'%len(files) return False else: print 'Downloading',len(files),'distinct files' for f,u in zip(files,urls): if os.path.isfile(workdir+'/'+f): print 'File',f,'already exists, skipping' else: print 'Downloading',f url=server+u print url filename=workdir+'/'+f download_file(url,filename) return True if __name__=='__main__': import sys name=sys.argv[1] try: os.mkdir(name) except OSError: pass status=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/',workdir='./'+name)
def get_cat(method, retries=100): cwd = os.getcwd() try: os.mkdir(method) except OSError: pass if method == 'pslocal': hplist = [] if method == 'wise': from astroquery.irsa import Irsa Irsa.ROW_LIMIT = 1000000 ra_factor, pos = tile(find_fullres_image()) print 'Downloading catalogues for', len(pos), 'sky positions' for i, p in enumerate(pos): outfile = method + '/' + method + '-' + str(i) + '.vo' if os.path.isfile(outfile): print 'Catalogue at position', p, 'already present' continue print 'Downloading at position', p if method == 'panstarrs': count = 0 while True: try: r = requests.post( 'http://archive.stsci.edu/panstarrs/search.php', data={ 'ra': p[0], 'dec': p[1], 'SR': CSIZE, 'max_records': 100000, 'nDetections': ">+5", 'action': 'Search', 'selectedColumnsCsv': 'objid,ramean,decmean' }, timeout=300) except requests.exceptions.Timeout: print 'Timeout, retrying!' else: if 'Warning' not in r.text and 'Please' not in r.text: break else: # will go round the loop again print 'Bad response, retry download (%i)' % count sleep(5 + count * 15) count += 1 if count >= retries: raise RuntimeError( 'Number of retries exceeded for download') f = open(outfile, 'w') f.writelines(r.text) f.close() elif method == 'wise': t = Irsa.query_region(coord.SkyCoord(p[0], p[1], unit=(u.deg, u.deg)), catalog='allwise_p3as_psd', radius='0d30m0s') t.write(outfile, format='votable') elif method == 'pslocal': from astropy_healpix import HEALPix hp = HEALPix(nside=64) cs = hp.cone_search_lonlat(p[0] * u.deg, p[1] * u.deg, radius=CSIZE * u.deg) hplist += list(cs) if not os.path.isdir(PSBASE): # we don't have a local PS database, so download for pix in cs: outfile = method + '/' + str(pix) if not os.path.isfile(outfile): print 'Downloading healpix pixel', pix download_file( 'http://uhhpc.herts.ac.uk/panstarrs-healpix/' + str(pix), outfile) else: raise NotImplementedError('Method ' + method) if method == 'pslocal': hplist = list(set(hplist)) print 'Found', len(hplist), 'unique healpix pixels' outname = method + '/' + method + '.txt' with open(outname, 'w') as outfile: outfile.write('# RA DEC ObjID\n') for pixel in hplist: print 'Appending pixel', pixel if os.path.isdir(PSBASE): pixelfile = PSBASE + '/' + str(pixel) else: pixelfile = method + '/' + str(pixel) if not os.path.isfile(pixelfile): raise RuntimeError('Pixel file ' + pixelfile + 'does not exist') os.system('cat ' + pixelfile + ' >> ' + outname)
def get_cat(method,retries=100): cwd=os.getcwd() try: os.mkdir(method) except OSError: pass if method=='pslocal': hplist=[] if method=='wise': from astroquery.irsa import Irsa Irsa.ROW_LIMIT=1000000 ra_factor,pos=tile(cwd+'/image_ampphase1.app.restored.fits') print 'Downloading catalogues for',len(pos),'sky positions' for i,p in enumerate(pos): outfile=method+'/'+method+'-'+str(i)+'.vo' if os.path.isfile(outfile): print 'Catalogue at position',p,'already present' continue print 'Downloading at position',p if method=='panstarrs': count=0 while True: try: r = requests.post('http://archive.stsci.edu/panstarrs/search.php', data = {'ra':p[0],'dec':p[1],'SR':CSIZE,'max_records':100000,'nDetections':">+5",'action':'Search','selectedColumnsCsv':'objid,ramean,decmean'},timeout=300) except requests.exceptions.Timeout: print 'Timeout, retrying!' else: if 'Warning' not in r.text and 'Please' not in r.text: break else: # will go round the loop again print 'Bad response, retry download (%i)' % count sleep(5+count*15) count+=1 if count>=retries: raise RuntimeError('Number of retries exceeded for download') f=open(outfile,'w') f.writelines(r.text) f.close() elif method=='wise': t=Irsa.query_region(coord.SkyCoord(p[0],p[1],unit=(u.deg,u.deg)), catalog='allwise_p3as_psd', radius='0d30m0s') t.write(outfile,format='votable') elif method=='pslocal': from astropy_healpix import HEALPix hp = HEALPix(nside=64) cs = hp.cone_search_lonlat(p[0]*u.deg, p[1]*u.deg, radius=CSIZE*u.deg) hplist += list(cs) if not os.path.isdir(PSBASE): # we don't have a local PS database, so download for pix in cs: outfile=method+'/'+str(pix) if not os.path.isfile(outfile): print 'Downloading healpix pixel',pix download_file('http://uhhpc.herts.ac.uk/panstarrs-healpix/'+str(pix),outfile) else: raise NotImplementedError('Method '+method) if method=='pslocal': hplist=list(set(hplist)) print 'Found',len(hplist),'unique healpix pixels' outname=method+'/'+method+'.txt' with open(outname,'w') as outfile: outfile.write('# RA DEC ObjID\n') for pixel in hplist: print 'Appending pixel',pixel if os.path.isdir(PSBASE): pixelfile=PSBASE+'/'+str(pixel) else: pixelfile=method+'/'+str(pixel) if not os.path.isfile(pixelfile): raise RuntimeError('Pixel file '+pixelfile+'does not exist') os.system('cat '+pixelfile+' >> '+outname)
def generate_json(source_file, download_destination, json_file): print('[-] Downloading source spreadsheet...') download_file(source_file, download_destination) print('[-] Generating json file...') write_json_file(excel_to_json(download_destination), json_file)
def test_download_file(cls): """Test Download_file""" # valid file id real_file_id = '1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9' file = download_file.download_file(real_file_id=real_file_id) cls.assertNotEqual(cls, 0, len(file))
import os import re from typing import List, Pattern import download_file download_file.download_file( 'https://raw.githubusercontent.com/v2ray/domain-list-community/master/data/geolocation-cn' ) with open(r'geolocation-cn', 'r', encoding='UTF-8') as f: tmplist: List = f.readlines() for line in tmplist: pattern: Pattern = re.compile(r'^include:') _: str = pattern.split(line) if _[0] == '': os.chdir(os.path.abspath(os.path.dirname(__file__))) download_file.download_file( 'https://raw.githubusercontent.com/v2ray/domain-list-community/master/data/' + _[1].rstrip())
def update_sources(source, destination): print('[-] Downloading source file...') download_file(source, destination) print('[+] Successfully downloaded and wrote local sources')
def generate_json(source_file, download_destination, json_file): download_file(source_file, download_destination) write_json_file(excel_to_json(download_destination), json_file)
def get_legacy(ra, dec, size=1000, pixscale=0.454, bands='r', ftype='fits'): url = "http://legacysurvey.org/viewer/{}-cutout?ra={}&dec={}&size={}&layer=dr8&pixscale={}&bands={}".format( ftype, ra, dec, size, pixscale, bands) outname = 'legacy-%s-%f-%f.fits' % (bands, ra, dec) download_file(url, outname) return outname
files.append(r.attrib['download']) urls.append(r.attrib['href'].replace('../..','')) if len(files)<24: print 'There should be >=24 files but there are only %s! Check SARA manually.'%len(files) return False else: print 'Downloading',len(files),'distinct files' for f,u in zip(files,urls): if os.path.isfile(workdir+'/'+f): print 'File',f,'already exists, skipping' else: print 'Downloading',f url=server+u print url filename=workdir+'/'+f download_file(url,filename) return True if __name__=='__main__': import sys name=sys.argv[1] try: os.mkdir(name) except OSError: pass status=download_dataset('https://lofar-webdav.grid.sara.nl','/SKSP/'+name+'/',workdir='./'+name)
def download_segment(segment_path): """ Function to download the segment""" segment_url = config_cdash.CONTENT_SERVER + segment_path local_filepath = get_segment_local_path(segment_path) return download_file(segment_url, local_filepath)