def test_path(self): url = FILE_SMALL # path='' download(url=url, path='') f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # path='.' download(url=url, path='.') f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # path=TEST_DATA_SUBDIR download(url=url, path=TEST_DATA_SUBDIR) f = os.path.join(TEST_DATA_SUBDIR, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # path='foobar' download(url=url, path='foobar') f = os.path.join(TEST_DATA_DIR, 'foobar') assert os.path.exists(f) os.remove(f) # path='foo/bar' with self.assertRaises(IOError): download(url=url, path='foo/bar') f = os.path.join(TEST_DATA_DIR, 'foo', 'bar') assert not os.path.exists(f)
def main(cmdargs): index_dir = os.path.abspath(cmdargs.index_dir) if not os.path.exists(index_dir): os.mkdir(index_dir) dl_index_fname = "index.csv" index_db_fname = "index.db" index_db_tname = "landsat" temp_index_fname = "tmpindex" temp_index_db_fname = "tmpindex.db" tmpdir = tempfile.mkdtemp() zip_file = os.path.join(tmpdir, "{0:s}.gz".format(temp_index_fname)) if cmdargs.channel == "google": index_url = GOOGLE_LANDSAT_INDEX index_dtype = GOOGLE_LANDSAT_DTYPE index_parse_dates = ["DATE_ACQUIRED", "SENSING_TIME"] unzip_cmd = ["gzip", "-d", zip_file] elif cmdargs.channel == "aws": index_url = S3_LANDSAT_INDEX index_dtype = S3_LANDSAT_DTYPE index_parse_dates = ["acquisitionDate"] unzip_cmd = ["gzip", "-d", zip_file] else: raise RuntimeError("Channel is not implemented yet!") logger.info("Downloading zipped index file started.") download(index_url, path=zip_file) logger.info("Downloading zipped index file finished.") logger.info("Unzipping index file started.") if (0 != subprocess.call(unzip_cmd)): raise RuntimeError("Unzip the file of Google Landsat index failed!") logger.info("Unzipping index file finished.") csv_file = os.path.join(index_dir, dl_index_fname) shutil.move(os.path.join(tmpdir, temp_index_fname), os.path.join(index_dir, temp_index_fname)) os.rename(os.path.join(index_dir, temp_index_fname), csv_file) tmp_db_file = os.path.join(tmpdir, temp_index_db_fname) csv_db = sa.create_engine("sqlite:///{0:s}".format(tmp_db_file)) chunksize = int(1e5) j = 1 logger.info("Updating SQLite database started.") for df in pd.read_csv(csv_file, chunksize=chunksize, iterator=True, dtype=index_dtype, parse_dates=index_parse_dates): df = df.rename(columns={c: c.replace(' ', '_') for c in df.columns}) df.index += j df.to_sql(index_db_tname, csv_db, if_exists='append') j = df.index[-1] + 1 csv_db_file = os.path.join(index_dir, index_db_fname) shutil.move(tmp_db_file, csv_db_file) logger.info("Updating SQLit database finished.")
def test_pass_through_opts(self): url = FILE_5MB opts_url = FILE_1MB download(url=url, pass_through_opts={pycurl.URL: opts_url}) f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) opts_f = os.path.join(TEST_DATA_DIR, get_resource_name(opts_url)) assert os.path.exists(opts_f) assert not os.path.exists(f) os.remove(opts_f)
def download_and_detect(id, url, filename): # try: opt = {pycurl.PROXY: config.tor_pool_url, pycurl.PROXYPORT: config.tor_pool_port, pycurl.PROXYTYPE: pycurl.PROXYTYPE_SOCKS5_HOSTNAME} download(url,path=filename, pass_through_opts=opt) f = open (filename, 'rb') tags = exifread.process_file(f) set_exif_data(id, {'exif': tags.keys()}) f.close()
def download_all(entries, zipDir, unzipDir): for i in range(0, len(entries)): entry = entries[i] url = entry['link'][0]['href'] print 'downloading product ' + str(i) filepath = zipDir + str(i) + '.zip' homura.download(url=url, auth=session.auth, path=filepath) zip_ref = zipfile.ZipFile(filepath, 'r') zip_ref.extractall(unzipDir) zip_ref.close() print 'finished downloads'
def test_auth(self): url = "http://httpbin.org/basic-auth/aaa/bbb" auth = ("aaa", "bbb") download(url=url, auth=auth) f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) with open(f) as handle: txt = handle.read() assert '"authenticated": true' in txt assert '"user": "******"' in txt assert os.path.exists(f) os.remove(f)
def test_unicode(self): url = FILE_UNICODE path_ascii = TEST_DATA_ASCII path_unicode = TEST_DATA_UNICODE path_utf8 = TEST_DATA_UTF8 # No path download(url=url) f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # ASCII path download(url=url, path=path_ascii) f = os.path.join(TEST_DATA_DIR, path_ascii, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # Unicode path download(url=url, path=path_unicode) f = os.path.join(TEST_DATA_DIR, path_unicode, get_resource_name(url)) assert os.path.exists(f) os.remove(f) # UTF-8 path download(url=url, path=path_utf8) f = os.path.join(utf8_encode(TEST_DATA_DIR), path_utf8, utf8_encode(get_resource_name(url))) assert os.path.exists(f) os.remove(f)
def download(self, scenes, bands): """ Downloads landsat 5 from Google Storage :scenes: list of scenes to down :bands: bands to download """ scene_urls = [build_band_urls(scene, bands) for scene in scenes] image_path = os.path.join(self.download_dir, scenes[0]) os.makedirs(image_path) for bands in scene_urls: for band in bands: download(band, image_path)
def read_file(self, changeset_file): """Download the replication changeset file or read it directly from the filesystem (to test purposes). """ if isfile(changeset_file): self.filename = changeset_file else: self.path = mkdtemp() self.filename = join(self.path, basename(changeset_file)) download(changeset_file, self.path) self.xml = ET.fromstring(gzip.open(self.filename).read()) # delete folder created to download the file if not isfile(changeset_file): rmtree(self.path)
def downloadImages_l(self): self.downloadBar_l.setMinimum(0) self.downloadBar_l.setValue(0) self.downloadLabel_l.setText(u"等待下载") idfile = self.savecsv_l.text() outputdir = self.savedownload_l.text() csvf = open(idfile,'r') csv_count = len(csvf.readlines()) -1 csvf.close() print(csv_count) self.downloadBar_l.setMaximum(csv_count) with open(idfile, 'r') as f: idcsv = csv.reader(f) header = next(idcsv) for ck,row in enumerate(idcsv): self.downloadBar_l.setValue(ck) sid = row[2] ourl = row[5] urlpaths = self.geturl_l(ourl) alldir = os.path.join(outputdir, sid) if os.path.exists(outputdir + '/{}'.format(sid)): shutil.rmtree(outputdir + '/{}'.format(sid)) if not os.path.exists(outputdir + '/{}.zip'.format(sid)): ct = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) self.downloadLabel_l.setText(str(ck) + "/" + str(csv_count)) print(ct + 'Downloading {}...'.format(sid)) for urlpath in urlpaths: if not os.path.exists(alldir): os.makedirs(alldir) ttt = download('{}'.format(urlpath), alldir) # self.downloadLabel.setText(ttt["speed"]) dt = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print(dt + 'compressing......') shutil.make_archive(outputdir + '/{}'.format(sid), 'zip', alldir) shutil.rmtree(alldir)
def download(self, obj, path=None, show_progress=True, resume=True, auto_retry=True): """ Download a file :param obj: :class:`.File` object :param str path: local path :param bool show_progress: whether to show download progress :param bool resume: whether to resume on unfinished downloads identified by filename :param bool auto_retry: whether to retry automatically upon closed transfer until the file's download is finished """ download(obj.url, path=path, session=self.http.session, show_progress=show_progress, resume=resume, auto_retry=auto_retry)
def download_images(scene=SCENE, bands=BANDS, url=URL, path=PATH): """ Download bands on defined PATH for test_process """ downloaded_images = [] _bands = ["_B{}.TIF".format(i) for i in bands] _bands.extend(["_BQA.TIF", "_MTL.txt"]) scene_bands = [{ "url": "{url}/{scene}/{scene}{band}".format(url=url, scene=scene, band=band), "band": band.split(".")[0] } for band in _bands] assert (len(scene_bands) == len(bands) + 2) path = check_create_folder(path) for band in scene_bands: f = os.path.join(path, band["url"].split("/")[-1]) d = download(url=band["url"], path=path) downloaded_images.append(f) return downloaded_images
def install_user_plugin(self, plugin_zip_url): """ Download and install the plugin from an unofficial repository Download and extract a zip archive into DeepNLPF's plugins root directory then run the installation script `requirements.sh` Arguments: plugin_zip_url: str The URL to a plugin's zip archive Return: None """ import re import shutil import zipfile import homura if not os.path.exists(self.PLUGIN_PATH): os.makedirs(self.PLUGIN_PATH) zipfile_path = os.path.join(self.PLUGIN_PATH, 'user_zip.zip') try: homura.download(url=plugin_zip_url, path=zipfile_path) except Exception as err: log.logger.error(err) sys.exit(1) plugin_root_dir = '' with zipfile.ZipFile(zipfile_path) as zfd: plugin_root_dir = zfd.namelist()[0] zfd.extractall(self.PLUGIN_PATH) extracted_path = os.path.join(self.PLUGIN_PATH, plugin_root_dir) installation_dir = re.sub(r'^plugin_', '', plugin_root_dir) installation_dir = re.sub(r'-master/?$', '', installation_dir) installation_path = os.path.join(self.PLUGIN_PATH, installation_dir) if os.path.exists(installation_path): shutil.rmtree(installation_path) os.rename(extracted_path, installation_path) os.remove(zipfile_path) install_script_path = os.path.join(installation_path, 'requirements.sh') os.system(f'sh {install_script_path}')
def test_redirect(self): url = FILE_301_SMALL eurl = FILE_SMALL # No path download(url=url) f = os.path.join(TEST_DATA_DIR, get_resource_name(url)) ef = os.path.join(TEST_DATA_DIR, get_resource_name(eurl)) assert not os.path.exists(f) assert os.path.exists(ef) os.remove(ef) # path='foobar' download(url=url, path='foobar') f = os.path.join(TEST_DATA_DIR, 'foobar') assert os.path.exists(f) os.remove(f)
def download_compranet(years): """ Download Compranet data for a list of years, unzip the files and convert the XLS to CSV :param years: The years for which to download data :type years: List :returns: :example: """ tmp_folder = os.path.join(settings.folder_full_cache, 'tmp') check_create_folder(tmp_folder) for year in years: file_name = os.path.join(settings.fn_prefix + year + settings.fn_extension) src_url = settings.compranet_base_url + file_name print "Downloading %s" % file_name download(url=src_url, path=tmp_folder) file_path = os.path.join(tmp_folder, file_name) with zipfile.ZipFile(file_path, 'r') as myzip: myzip.extractall(tmp_folder) pattern = os.path.join(tmp_folder, '*.xls*') for src_file in list_files(pattern): csv_path = os.path.join(settings.folder_full_cache, get_filename(src_file) + '.csv') wb = xlrd.open_workbook(src_file) sheet = wb.sheet_by_index(0) with open(csv_path, 'w') as csvfile: writer = unicodecsv.writer(csvfile, encoding='utf-8') for rownum in xrange(sheet.nrows): writer.writerow(sheet.row_values(rownum)) remove_folder(tmp_folder)
def download(self, id, path='.', **kwargs): """Download a product using homura's download function. If you don't pass the title of the product, it will use the id as filename. Further keyword arguments are passed to the homura.download() function. """ product = self.get_product_info(id) path = join(path, product['title'] + '.zip') kwargs = self._fillin_cainfo(kwargs) print('Downloading %s to %s' % (id, path)) # Check if the file exists and if it is complete if exists(path): if getsize(path) == product['size']: print('%s was already downloaded.' % path) return path download(product['url'], path=path, session=self.session, **kwargs) return path
def download_mbtiles(dest_folder, country, **kwargs): """Download QA Tiles for the selected country. Download a gzipped mbtiles file of all OSM data within a country from S3. More details at https://osmlab.github.io/osm-qa-tiles/ Parameters ------------ dest_folder: str Folder to save download into country: str Country for which to download the OSM QA tiles **kwargs: dict Other properties from CLI config passed as keywords to other utility functions """ download_file = path.join(dest_folder, '{}.mbtiles'.format(country)) print('Saving QA tiles to {}'.format(download_file)) url = 'https://s3.amazonaws.com/mapbox/osm-qa-tiles-production/latest.country/{}.mbtiles.gz'.format(country) gz = tempfile.TemporaryDirectory() tmp_path = path.join(gz.name, '{}.mbtiles.gz'.format(country)) download(url=url, path=tmp_path) with gzip.open(tmp_path, 'rb') as r: with open(download_file, 'wb') as w: w.write(r.read())
def downloadProduct(self, id, locale=None, path=None): info = None while info is None: try: info = self.getOData(id) except APIError as e: self.logger.info('API error: %s \n Waiting %d seconds.' % (str(e), self.sleep)) time.sleep(self.sleep) if path is None: outputPath = util.checkFolder('SentinelAPI', Output=True) year = datetime.strptime(info['date'], '%Y-%m-%dT%H:%M:%SZ').year outputName = info['name'] + '.zip' # oldPath = os.path.join(outputPath, outputName) if locale: outputPath = util.checkFolder(locale, path=outputPath) outputPath = util.checkFolder(year, path=outputPath) outputPath = os.path.join(outputPath, outputName) else: outputPath = path if os.path.exists(outputPath) and os.path.getsize( outputPath) == info['size']: # check if md5 matches with server if self.compareMD5(outputPath, info['md5']): self.logger.info('%s was already found.' % outputPath) return outputPath, info else: self.logger.info('%s was not downloaded correctly' % outputPath) os.remove(outputPath) homura.download(info['url'], path=outputPath, auth=self.sesh.auth) return outputPath, info
def download(self, id, path='.', checksum=False, **kwargs): """Download a product using homura's download function. If you don't pass the title of the product, it will use the id as filename. Further keyword arguments are passed to the homura.download() function. """ # Check if API is reachable. product = None while product is None: try: product = self.get_product_info(id) except ValueError: print("Invalid API response. Trying again in 3 minutes.") sleep(180) path = join(path, product['title'] + '.zip') kwargs = self._fillin_cainfo(kwargs) print('Downloading %s to %s' % (id, path)) # Check if the file exists and passes md5 test if exists(path): if md5_compare(path, product['md5'].lower()): print('%s was already downloaded.' % path) return path else: remove(path) download(product['url'], path=path, session=self.session, **kwargs) # Check integrity with MD5 checksum if checksum is True: if not md5_compare(path, product['md5'].lower()): raise ValueError('File corrupt: Checksums do not match') return path
def download_file( url, file_path, md5_hash=None, timeout=10, block_size=1024 * 1024, show_progress=False, proxy=None ): """Resumable download. Expect the server to support byte ranges. Parameters ---------- url: string URL file_path: string Local file path to store the downloaded file md5_hash: string Expected MD5 string of downloaded file timeout: int Seconds to wait before terminating request block_size: int Chunkx of bytes to read (default: 1024 * 1024 = 1MB) show_progress: bool Show progress bar """ if os.path.exists(file_path) and os.path.getsize(file_path): return if HOMURA: if proxy: pass_through_opts={pycurl.PROXY: proxy} download(url=url, path=file_path, pass_through_opts=pass_through_opts) else: download(url=url, path=file_path) else: tmp_file_path = file_path + ".part" first_byte = os.path.getsize(tmp_file_path) if os.path.exists(tmp_file_path) else 0 file_mode = "ab" if first_byte else "wb" file_size = -1 try: file_size = int(requests.head(url).headers["Content-length"]) headers = {"Range": "bytes=%s-" % first_byte} r = requests.get(url, headers=headers, stream=True) if show_progress: desc = "Downloading {}".format(url.split("/")[-1]) pbar = tqdm( total=file_size, initial=first_byte, unit="B", unit_scale=True, desc=desc, ) with open(tmp_file_path, file_mode) as f: for chunk in r.iter_content(chunk_size=block_size): if chunk: # filter out keep-alive new chunks f.write(chunk) if show_progress: pbar.update(block_size) if show_progress: pbar.close() except IOError as e: sys.stderr.write("IO Error - {}\n".format(e)) finally: # Move the temp file to desired location if file_size == os.path.getsize(tmp_file_path): # if there's a hash value, validate the file if md5_hash and not md5_validate_file(tmp_file_path, md5_hash): raise Exception("Error validating the file against its MD5 hash") shutil.move(tmp_file_path, file_path) elif file_size == -1: raise Exception("Error getting Content-Length from server: %s" % url)
def get_metadata(download_dir='.'): """Download Landsat 8 metadata file.""" download( 'http://landsat.usgs.gov/metadata_service/bulk_metadata_files/LANDSAT_8.csv', download_dir )
def test_simple(self): download(FILE_1MB) f = os.path.join(TEST_DATA_DIR, get_resource_name(FILE_1MB)) assert os.path.exists(f) os.remove(f)
# -*- coding: utf-8 -*- import gzip import csv import psycopg2 import uuid from homura import download # download index file to current dirctory landsat_index_url = 'http://storage.googleapis.com/gcp-public-data-landsat/index.csv.gz' download(landsat_index_url, './') g = gzip.GzipFile(mode='rb', fileobj=open('./index.csv.gz', 'rb')) f_csv = open('./index.csv', 'wb') f_csv.write(g.read()) f_csv.close() # config csv_file = './index.csv' db = 'landsat' usr = '******' pw = 'postgres' host = '127.0.0.1' port = '5432' def dbimport(tb): # connect to database pgiscon = psycopg2.connect(database=db, user=usr, password=pw, host=host,
def get_screenshot(url, filename): # Use try to handle errors with calling the download function. try: download(url=get_splash_uri(url), path=get_save_path(filename)) except: pass
from homura import download from ftp import ftp_client download( 'https://globalmanifestv2.blob.core.windows.net/manifests/2018-07-07-01-11-07_80-8867.zip?sv=2015-12-11&sr=b&sig=Exl4E3Q3R6N4w8jDcbygZ8aRGLoR3LzfH2VR%2FgMtNSk%3D&st=2018-07-07T06%3A39%3A09Z&se=2018-07-10T06%3A44%3A09Z&sp=rw ' )
sub = script.findAll("script") su.append(sub[1].string) print("done.") for s in su: sa = s[39:-2] subsceneLinks.append(baseUrl + sa) print videolinks print subsceneLinks if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(directory + "/download.txt"): saveFile = open(directory + "/download.txt", 'w') saveFile.write("") saveFile.close() for i in range(len(videolinks)): print i if subsceneLinks[i] not in open(directory + "/download.txt").read(): print("downloading : " + subsceneLinks[i]) download(url=subsceneLinks[i], path=directory) appendFile = open(directory + "/download.txt", 'a') appendFile.write(subsceneLinks[i]) appendFile.close() if videolinks[i] not in open(directory + "/download.txt").read(): print("downloading : " + videolinks[i]) download(url=videolinks[i], path=directory) appendFile = open(directory + "/download.txt", 'a') appendFile.write(videolinks[i]) appendFile.close()
def get_screenshot(url, filename): download(url=get_splash_uri(url), path=get_save_path(filename))
def download_(lines): try: download(lines) except Exception as e: print(e)
# -*- coding: utf-8 -*- import csv import psycopg2 import uuid import gzip from homura import download # download index file to current dirctory sentinel_index_url = 'http://storage.googleapis.com/gcp-public-data-landsat/index.csv.gz' download(sentinel_index_url, './') g = gzip.GzipFile(mode='rb', fileobj=open('./index.csv.gz', 'rb')) f_csv = open('./index.csv', 'wb') f_csv.write(g.read()) f_csv.close() print('unzip successfully!') # config csv_file = './index.csv' db = 'landsat' usr = '******' pw = 'postgres' host = '127.0.0.1' port = '5432' # connect to database pgiscon = psycopg2.connect(database=db, user=usr, password=pw, host=host, port=port) pgiscursor = pgiscon.cursor() # 打开csv文件,按行读取数据并记录到数据库
def main(input_genes, output, disease, cutoff, gene_set): u""" main function """ is_input_file = None if os.path.exists(input_genes): is_input_file = input_genes with open(input_genes) as r: input_genes = [x.strip() for x in r.read().split("\n")] input_genes = [x for x in input_genes if x] else: input_genes = input_genes.split(",") print("Input genes: {}".format(len(input_genes))) check = valid(input_genes, disease) while check is not None: input_genes.remove(check) print("Input genes: {}; {} is invalid".format(len(input_genes), check)) time.sleep(0.5) check = valid(input_genes, disease) if is_input_file is not None: with open(is_input_file, "w+") as w: for i in sorted(input_genes): w.write("{}\n".format(i)) if not input_genes: print("No valid genes") exit(0) if gene_set: data = post(genes=input_genes, disease=disease.split(","), groupcutoff1=cutoff, groupcutoff2=100 - cutoff) if data["outdir"] == "fail": print("Failed {}".format(data["outdir"])) exit(data) outdir = os.path.dirname(output) if not os.path.exists(outdir): os.makedirs(outdir) if os.path.exists(output): os.remove(output) download(url=os.path.join(OUTPUT, data["outdir"]), path=output) else: if not os.path.exists(output): os.makedirs(output) for i in input_genes: print(i) data = post(genes=[i], disease=disease.split(","), groupcutoff1=cutoff, groupcutoff2=100 - cutoff) if data["outdir"] == "fail": print("Failed {}".format(data["outdir"])) continue outfile = os.path.join(output, data["outdir"]) if os.path.exists(outfile): os.remove(outfile) download(url=os.path.join(OUTPUT, data["outdir"]), path=outfile)
def download(self, id, directory_path='.', checksum=False, check_existing=False, **kwargs): """Download a product using homura. Uses the filename on the server for the downloaded file, e.g. "S1A_EW_GRDH_1SDH_20141003T003840_20141003T003920_002658_002F54_4DD1.zip". Incomplete downloads are continued and complete files are skipped. Further keyword arguments are passed to the homura.download() function. Parameters ---------- id : string UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed' directory_path : string, optional Where the file will be downloaded checksum : bool, optional If True, verify the downloaded file's integrity by checking its MD5 checksum. Throws InvalidChecksumError if the checksum does not match. Defaults to False. check_existing : bool, optional If True and a fully downloaded file with the same name exists on the disk, verify its integrity using its MD5 checksum. Re-download in case of non-matching checksums. Defaults to False. Returns ------- path : string Disk path of the downloaded file, product_info : dict Dictionary containing the product's info from get_product_info(). Raises ------ InvalidChecksumError If the MD5 checksum does not match the checksum on the server. """ # Check if API is reachable. product_info = None while product_info is None: try: product_info = self.get_product_info(id) except requests.HTTPError: print("Invalid API response. Trying again in 1 minute.") sleep(60) path = join(directory_path, product_info['title'] + '.zip') kwargs = self._fillin_cainfo(kwargs) print('Downloading %s to %s' % (id, path)) # Check if the file exists and passes md5 test # Homura will by default continue the download if the file exists but is incomplete if exists(path) and getsize(path) == product_info['size']: if not check_existing or md5_compare(path, product_info['md5']): print('%s was already downloaded.' % path) return path, product_info else: print( '%s was already downloaded but is corrupt: checksums do not match. Re-downloading.' % path) remove(path) if (exists(path) and getsize(path) >= 2**31 and pycurl.version.split()[0].lower() <= 'pycurl/7.43.0'): # Workaround for PycURL's bug when continuing > 2 GB files # https://github.com/pycurl/pycurl/issues/405 remove(path) homura.download(product_info['url'], path=path, session=self.session, **kwargs) # Check integrity with MD5 checksum if checksum is True: if not md5_compare(path, product_info['md5']): raise InvalidChecksumError( 'File corrupt: checksums do not match') return path, product_info
def download(self, id, directory_path='.', checksum=False, check_existing=False, **kwargs): """Download a product using homura. Uses the filename on the server for the downloaded file, e.g. "S1A_EW_GRDH_1SDH_20141003T003840_20141003T003920_002658_002F54_4DD1.zip". Incomplete downloads are continued and complete files are skipped. Further keyword arguments are passed to the homura.download() function. Parameters ---------- id : string UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed' directory_path : string, optional Where the file will be downloaded checksum : bool, optional If True, verify the downloaded file's integrity by checking its MD5 checksum. Throws InvalidChecksumError if the checksum does not match. Defaults to False. check_existing : bool, optional If True and a fully downloaded file with the same name exists on the disk, verify its integrity using its MD5 checksum. Re-download in case of non-matching checksums. Defaults to False. Returns ------- path : string Disk path of the downloaded file, product_info : dict Dictionary containing the product's info from get_product_info(). Raises ------ InvalidChecksumError If the MD5 checksum does not match the checksum on the server. """ # Check if API is reachable. product_info = None while product_info is None: try: product_info = self.get_product_info(id) except SentinelAPIError as e: print("Invalid API response:\n{}\nTrying again in 1 minute.".format(str(e))) sleep(60) path = join(directory_path, product_info['title'] + '.zip') kwargs = self._fillin_cainfo(kwargs) print('Downloading %s to %s' % (id, path)) # Check if the file exists and passes md5 test # Homura will by default continue the download if the file exists but is incomplete if exists(path) and getsize(path) == product_info['size']: if not check_existing or md5_compare(path, product_info['md5']): print('%s was already downloaded.' % path) return path, product_info else: print('%s was already downloaded but is corrupt: checksums do not match. Re-downloading.' % path) remove(path) if (exists(path) and getsize(path) >= 2 ** 31 and pycurl.version.split()[0].lower() <= 'pycurl/7.43.0'): # Workaround for PycURL's bug when continuing > 2 GB files # https://github.com/pycurl/pycurl/issues/405 remove(path) homura.download(product_info['url'], path=path, session=self.session, **kwargs) # Check integrity with MD5 checksum if checksum is True: if not md5_compare(path, product_info['md5']): raise InvalidChecksumError('File corrupt: checksums do not match') return path, product_info
def install(self, plugin_name): import zipfile from homura import download # gestor fast download file. # URL for download of plugin. # https://github.com/deepnlpf/plugin_stanza/archive/master.zip URL = (self.PLUGIN_SERVER + "plugin_" + plugin_name + "/archive/master" + self.EXTENSION) # Path for save plugin. PATH_DOWNLOAD_PLUGIN = (self.PLUGIN_PATH + "plugin_" + plugin_name + "-master" + self.EXTENSION) # check folder plugin exist. if not os.path.exists(self.PLUGIN_PATH): os.makedirs(self.PLUGIN_PATH) # Download plugin. try: print("Downloading plugin", plugin_name, "..") # check url exists. download(url=URL, path=PATH_DOWNLOAD_PLUGIN) except Exception as err: print("❗️Plugin no found!") log.logger.error(err) sys.exit(0) # Extracting files plugin. try: fantasy_zip = zipfile.ZipFile(PATH_DOWNLOAD_PLUGIN) fantasy_zip.extractall(self.PLUGIN_PATH) fantasy_zip.close() except Exception as err: print("❗️Error extracting files!") log.logger.error(err) sys.exit(0) # Config dir name plugin. try: os.rename( self.PLUGIN_PATH + "plugin_" + plugin_name + "-master", self.PLUGIN_PATH + plugin_name, ) except Exception as err: print("❗️Error config directory plugin!") log.logger.error(err) sys.exit(0) # Install requirements. try: # Check in plugin file requirements.sh exist. if os.path.isfile(self.PLUGIN_PATH + plugin_name + "/requeriments.sh"): print("Install requirements..") os.system( "cd " + str(self.PLUGIN_PATH + plugin_name + " && chmod 777 requeriments.sh && ./requeriments.sh")) except Exception as err: print("❗Error when executing the requeriments.sh plugin file!") log.logger.error(err) sys.exit(0) os.remove(PATH_DOWNLOAD_PLUGIN) # clear file zip. print("🎉 Plugin", plugin_name, "installed!") log.logger.info("Plugin installed: {}".format(plugin_name)) print("Path of installed plugins:", self.PLUGIN_PATH) sys.exit(0)