def downloadZipFromLink(dowLink, albumName): zipName = albumName.replace(" ", "") + ".zip" tempDir = tempfile.gettempdir(); zipPath = os.path.join(tempDir, zipName) if (os.path.exists(zipPath)): print("Deleting zip which already exists at location '{}'".format(zipPath)); os.remove(zipPath); print("Downloading '{}' from '{}' to file '{}'".format(albumName, dowLink, zipPath)) urlopener = URLopener(); def reporthook(blocknum, blocksize, totalsize): readsofar = blocknum * blocksize if totalsize > 0: percent = readsofar * 1e2 / totalsize s = "\r%5.1f%% %*d / %d" % ( percent, len(str(totalsize)), readsofar, totalsize) sys.stderr.write(s) if readsofar >= totalsize: # near the end sys.stderr.write("\n") else: # total size is unknown sys.stderr.write("read %d\n" % (readsofar,)) try: urlopener.retrieve(dowLink, zipPath, reporthook) except: if (os.path.exists(zipPath)): print("\nDeleting archive file '{}'".format(zipPath)); os.remove(zipPath); raise; print("Download of '{}' has completed.".format(albumName)) return zipPath;
def _verify_and_download(self): """check if file is where it should and download if not""" if path.isfile(self._path): return # File does not exist, so we have to download it. epic_id = int(self.epic_id) d1 = epic_id - epic_id % 100000 d2 = epic_id % 100000 - epic_id % 1000 url_template = 'https://archive.stsci.edu/missions/k2/target_pixel_files/c{0:d}/{1:d}/{2:05d}/{3}' url_to_load = url_template.format(self.campaign, d1, d2, self.file_name) fmt = "Downloading {:} ..... " print(fmt.format(self.file_name), end='', file=sys.stderr, flush=True) url_retriever = URLopener() try: url_retriever.retrieve(url_to_load, self._path) except exceptions: print("", file=sys.stderr, flush=True) raise IOError( "\n\nFailed to download file {:}\n\n".format(url_to_load)) if not path.isfile(self._path): print("", file=sys.stderr, flush=True) raise IOError('Download of\n' + url_to_load + '\nto\n' + self._path + 'somehow failed') print(" done", file=sys.stderr, flush=True)
def download_file(self): """ this function will visit a url for a specific location, enter the date and save the file to a specdified directory # http://penteli.meteo.gr/meteosearch/data/aghiosnikolaos/2009-11.txt """ for station in self.stations['stations'][:]: try: # os.mkdir('./data/' + station) os.mkdir(os.path.join(os.getcwd(), data_folder) + '/' + station) # messy!!! except: # add logging and fix exceptions too broad print('directory: {0} all ready exists!!!'.format(station)) pass testfile = URLopener() os.chdir(data_folder + '/' + station) for i, date in enumerate(self.dates_to_download): name_to_save_file = os.getcwd() + '/' + station + '-' + date + '.txt' print(os.getcwd()) try: # this is the complete url to visit and download its contents url = url_seed + station + '/' + date + '.txt' testfile.retrieve(url, name_to_save_file) except: pass os.chdir(os.pardir) os.chdir(os.pardir)
def download_data(admin_level, plz_regex_string, filename): bbox = '48.07303233901773,11.348190307617188,48.25028349849019,11.73614501953125' query = 'rel(' +bbox + ')[boundary=administrative][admin_level={}]; out geom;'.format(admin_level) + \ 'rel(' +bbox + ')[boundary=postal_code][postal_code~"{}"]; out geom;'.format(plz_regex_string) file = URLopener() file.retrieve('http://overpass-api.de/api/interpreter?data=' + quote_plus(query), filename)
def readTLEfile(source): ''' Read a TLE file (unzip if necessary) ''' sourceName = source['name'] sourceUrl = source['url'] sourceFile = source['file'] if os.path.isfile(sourceFile): print('Using saved TLE data {} ({})'.format(sourceFile, time.ctime(os.path.getmtime(sourceFile)))) else: print('Retrieving TLE data from {}'.format(sourceUrl)) file = URLopener() try: file.retrieve(sourceUrl, sourceFile) except: print("Error: Failed to get TLE data") return None else: print('{} updated'.format(sourceFile)) if sourceFile.lower().endswith('.zip'): print('Unzipping {}...'.format(sourceFile)) zip = zipfile.ZipFile(sourceFile) zip.extractall('.') sourceFile = zip.namelist()[0] print('Extracted {}'.format(zip.namelist())) tempContent = [] with open(sourceFile) as f: for aline in f: tempContent.append(aline.replace('\n', '')) print(len(tempContent) // 3, 'TLEs loaded from {}'.format(sourceFile)) return tempContent
def readTLEfile(source): ''' Read a TLE file (unzip if necessary) ''' sourceName = source['name'] sourceUrl = source['url'] sourceFile = source['file'] if os.path.isfile(sourceFile): print('Using saved TLE data {} ({})'.format( sourceFile, time.ctime(os.path.getmtime(sourceFile)))) else: print('Retrieving TLE data from {}'.format(sourceUrl)) file = URLopener() try: file.retrieve(sourceUrl, sourceFile) except: print("Error: Failed to get TLE data") return None else: print('{} updated'.format(sourceFile)) if sourceFile.lower().endswith('.zip'): print('Unzipping {}...'.format(sourceFile)) zip = zipfile.ZipFile(sourceFile) zip.extractall('.') sourceFile = zip.namelist()[0] print('Extracted {}'.format(zip.namelist())) tempContent = [] with open(sourceFile) as f: for aline in f: tempContent.append(aline.replace('\n', '')) print(len(tempContent) // 3, 'TLEs loaded from {}'.format(sourceFile)) return tempContent
def downloadAsset(uri, dirname): tUrl = uri o = urlparse(tUrl) contentType = "" # targetDir = os.path.join(CURRENT_DIRECTORY, dirname, '/'.join(o.path.split('/')[1:-1])) targetDir = CURRENT_DIRECTORY + '/' + dirname + '/' + '/'.join( o.path.split('/')[1:-1]) # javascript, fragment의 경우 다운로드 불필요 if o.scheme == "javascript" or (o.netloc == '' and o.path == ''): return if o.scheme == "": if uri.startswith("//"): tUrl = f"https:{uri}" else: tUrl = f"https://{uri}" try: contentType = getContentType(tUrl) except Exception: try: if uri.startswith('//'): tUrl = f"http:{uri}" else: tUrl = f"http://{uri}" contentType = getContentType(tUrl) except Exception: pass # raise Exception("Error during connection") else: # text/html 무시 if contentType in mimeTypes[1:]: if not os.path.exists(targetDir): path = Path(targetDir) path.mkdir(parents=True) targetFile = targetDir + '/' + o.path.split('/')[-1] if not os.path.exists(targetFile): try: urlretrieve(tUrl, targetFile) print(f"[Retrieved] {targetFile}") except Exception: try: opener = URLopener() opener.addheader('User-Agent', 'Mozilla/5.0') filename, headers = opener.retrieve(tUrl, targetFile) except Exception: try: tUrl = tUrl.replace('www.', '') tUrl = tUrl.replace('http:', 'https:') filename, headers = opener.retrieve( tUrl, targetFile) except Exception as e: print(str(e)) raise Exception else: pass
def download(self, entity_id: int, destination: str = None, sort: List[Sort] = None) -> str: """ Download sequences from a single entity. """ sort = [Sort('id', 'asc')] if sort is None else sort sort = list(sort_item.to_json() for sort_item in sort) if sort else [] body = {'filter': [], 'selection': [], 'sort': sort} file_path = Sequences.get_filepath_for_entity_id(entity_id) url = '{}/entities/{}/_extract'.format(self.url, entity_id) print('Downloading shards from "{}" to "{}".'.format(url, file_path)) paths = [] with self.session.post(url, stream=True, timeout=10 * 60, json=body) as response: try: links = response.json() print('links', links) if 'statusCode' in links and links['statusCode'] != 200: raise Exception(links['message']) elif len(links) == 0: raise Exception( 'Sequences:download - Error; no download links for {}. Does the table exist?'.format(entity_id)) index = 0 for link in links: testfile = URLopener() path = '{}-{}.gz'.format(file_path, index) paths.append(path) testfile.retrieve(link, path) index = index + 1 except Exception as e: print('Sequences:download - error:', e) raise e sorted_paths = self.get_sorted_file_shard_list(entity_id, paths, []) print(f'Unzipping: entity_id={entity_id} to destination={destination}') skip_first = False with open(destination, 'wb+') as target_file: for file_shard in sorted_paths: with gzip.open(file_shard, 'rb') as g_zip_file: first_line = True for line in g_zip_file: # We skip the first line of every file, except for the very first. if not (first_line and skip_first): line = Sequences.sanitize(line.decode("utf-8")) target_file.write(line.encode("utf-8")) first_line = False # We skip the first line of every file, except for the very first. skip_first = True return destination
def save_downloaded_file(context): """ Saves POEditor terms to a file in output dir :param context: behave context :return: N/A """ file_path = get_poeditor_file_path(context) saved_file = URLopener() saved_file.retrieve(context.poeditor_download_url, file_path) context.logger.info('POEditor terms have been saved in "%s" file' % file_path)
def download_text_file(url, file_name): opener = URLopener() file_name = file_name.split("/")[-1] file_name = file_name.replace("%20", " ") if _is_absolute_link(file_name): url = file_name if not url.startswith("http://"): url = "http://" + url out_name = file_name.split("/")[-1] else: url = "{}{}".format(url, file_name) out_name = file_name opener.retrieve(url, file_name) return out_name
def _download_file(url, destination): logger.info('Downloading %s to %s...', url, destination) response = _open_url(url) if not response.code == 200: raise WagonError("Failed to download file. Request to {0} " "failed with HTTP Error: {1}".format( url, response.code)) final_url = response.geturl() if final_url != url and is_verbose(): logger.debug('Redirected to %s', final_url) f = URLopener() f.retrieve(final_url, destination)
def download_data(): """This function downloads the data, extract them and remove the archive.""" if not os.path.exists(DATA_HOME): print("Data are missing. Downloading them now...", end="", flush=True) datafile = URLopener() datafile.retrieve(DOWNLOAD_URL, ARCHIVE_FNAME) print("Ok.") print("Extracting now...", end="", flush=True) tf = tarfile.open(ARCHIVE_FNAME) tf.extractall() print("Ok.") print("Removing the archive...", end="", flush=True) os.remove(ARCHIVE_FNAME) print("Ok.")
def downloadFile(linkStore): for imgUrl in linkStore: try: #removing double slash from the start of url imgUrl = urlEdit(imgUrl[2:]) fileName = imgUrl.split("/")[-1] imgUrl = 'https://' + imgUrl print('Downloading file: ' + fileName + '\tURL: ' + imgUrl + '\n') image = URLopener() image.retrieve(imgUrl, fileName) # above line may create error due to 403 forbidden response except: print("Error occured while downloading file: " + imgUrl + '\n') continue
def main(): username = input("username: "******"password: "******"http://www.loxa.edu.tw/index.php") with urlopen(r) as response: phpsessid = response.getheader("set-cookie").split("; ")[0].split("=")[1] cookie = "PHPSESSID={0}; Cookie_Allow=1".format(phpsessid) data = {"loginname": username, "loginpswd": password} r = Request( "http://www.loxa.edu.tw/check.php", data=urlencode(data).encode("utf8"), headers={"cookie": cookie}, method="POST", ) try: response = urlopen(r) except HTTPError: sys.exit("Invalid username or password.") r = Request("http://www.loxa.edu.tw/index.php?login=1&show_msg=Y", headers={"cookie": cookie}) response = urlopen(r) r = Request("http://www.loxa.edu.tw/jewelbox/foldertree.php", headers={"cookie": cookie}) with urlopen(r) as response: html = response.read().decode("big5") folder_tree_pattern = re.compile('insFld\(.+?, gFld\(".+?", "file_list.php\?dir_id=(\d+?)", "\w"\)\);') file_url_pattern = re.compile('<td colspan=3 nowrap>\s+?<a href="(http.+?)"') for i in folder_tree_pattern.finditer(html): dir_id = i.group(1) r = Request( "http://www.loxa.edu.tw/jewelbox/file_list.php?dir_id={0}".format(dir_id), headers={"cookie": cookie} ) with urlopen(r) as response: html = response.read().decode("big5") for i in file_url_pattern.finditer(html): url = i.group(1) url_data = urlparse(url) file_path = url_data.path.lstrip("/") dir_name, base_name = os.path.split(file_path) if not os.path.exists(dir_name): os.makedirs(dir_name) url_opener = URLopener() url_opener.addheader("cookie", cookie) print("Download: {0} -> {1}".format(url, file_path)) url_opener.retrieve(url, file_path)
def scrape_pokemon_image(url): req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) page = urlopen(req).read() soup = BeautifulSoup(page, 'html.parser') images = soup.find_all('img') image_link = images[0].get('src') print("[INFO] downloading {}".format(image_link)) name = str(image_link.split('/')[-1]) opener = URLopener() opener.addheader('User-Agent', 'Mozilla/5.0') opener.retrieve(image_link, os.path.join('data/images/', name)) print(image_link)
def install_mpt(install_path, url=DEFAULT_MPT_URL): """ Install MyPyTutor to the given directory. Args: install_path (str): The directory to install MyPyTutor in. url (str, optional): The URL of the MyPyTutor file to use. """ # create our install path if it doesn't already exist if not os.path.exists(install_path): os.makedirs(install_path) print('Installing MyPyTutor...', end='', flush=True) # grab the latest zip file # we use an explicit filename here because we don't yet have access # to the tutorlib module for abstracting away temporary file creation try: urlobj = URLopener() filename, _ = urlobj.retrieve(url, 'MyPyTutor.zip') except Exception: print('failed') sys.exit(1) # extract the file with ZipFile(filename) as zf: zf.extractall(install_path) print('done')
def on_update_button_click(self): try: opener = URLopener() opener.retrieve(self.REMOTE_UPDATE_URL, "resources/parameters.json") # Read the new settings. self.data = read_settings() messagebox.showinfo( "Settings Update", "Settings successfully updated from the server.") except Exception as e: logging.critical( "Couldn't open the remote settings file: {0}".format(str(e))) messagebox.showerror("Couldn't Update Settings", "Couldn't open the remote settings file.")
def Download_File(name): """ Download UCAC4 file. """ url_name = prefix+name ucac_file = URLopener() ucac_file.retrieve(url_name, name) inp = open(name, 'rb') bz2_file = bz2.BZ2File(name+'.bz2', 'wb', compresslevel=1) copyfileobj(inp, bz2_file) inp.close() bz2_file.close() os.remove(name) return 0
def export(self, entity_id: int, format: ExportFormat, destination_folder: str = None): entity = self.entities.get(entity_id) entity_name = entity['name'] user = self.authentication.user path_parts = entity['path'].split('.') # Last path part is always the current document. # Any before that are ancestor folders, the first being the parent. parent_folder_id = int(path_parts[-2]) if len(path_parts) > 1 else None job_id = self.jobs.create(owner_id=user['orgs'][0]['id'], shareable_id=entity['ownerId'], job_type=JobType.ExportJob, name='Export from python client', input_entity_ids=[entity_id], params={ "filter": [], "format": format, "fileName": entity_name, "selection": [], "targetFolderId": parent_folder_id, }) # Wait for the file to be converted to Genbank. job = self.jobs.poll_job(job_id) links = job['outputLinks'] outputs = [] for link in links: testfile = URLopener() destination = os.path.join(destination_folder, entity_name) testfile.retrieve(link['url'], destination) outputs.append(destination) return outputs
def online_install(): #Set location for file download by changing working directory #Variable that stores the file name of the ninite file, the temp folder path, and the current directory dl = 'ninite.exe' dl_path = "c:\\Install_Wizard_Temp" currentDir = os.getcwd() ##This should allow the download location to be changed so that the program can be run off locked flash drive #Test to see if directory exists for program already, if not, create one if not os.path.exists(dl_path): os.makedirs(dl_path) #Change working directory to one on customers computer os.chdir(dl_path) #Check if there is a previous ninite installer if os.path.isfile(dl): os.remove(dl) print('file removed') #Create url url = urlCreate() #Create object to open url ninite = URLopener() #Download file from url and save as installer.exe try: ninite.retrieve(url, dl) except: #Error in retrieving website text1.set('Ninite website could\nnot be accessed') #Run the file try: check_call(dl, shell=True) except: #Error in running file text1.set('Error running ninite file') #Test to see if dl file exists, if so, delete if os.path.isfile(dl): os.remove(dl) #Change directory back to original working directory os.chdir(currentDir) #Check if directory that was created earlier still exists, if so remove it if os.path.exists(dl_path): rmtree(dl_path)
def choose_crawl_and_download_paths(): crawls = get_list_of_crawls() print("Select a crawl [0-{}]:".format(len(crawls))) print_crawls(crawls) try: crawl_no = int(input("Crawl number [0-{}]:".format(len(crawls)))) except: print('Error: Enter a valid crawl number') sys.exit(1) file_type = input("File Type [wat/wet/warc]:").lower() if file_type not in ['warc', 'wat', 'wet']: print("Error: Enter a valid file type") sys.exit(1) url_to_fetch = "https://commoncrawl.s3.amazonaws.com/crawl-data/{}/{}.paths.gz".format( crawls[crawl_no][1], file_type) path_file_opener = URLopener() path_file_opener.retrieve(url_to_fetch, "paths.gz") subprocess.check_output(['gunzip', '--force', 'paths.gz']) return crawls[crawl_no][0]
def download_pic(pic_key, name): #Format of url: http://i.imgur.com/KTqYYKVh.jpg #Create full URL for download of picture url = "http://i.imgur.com/" + pic_key #Check if there is a picture with this name already #Either rename or skip if os.path.isfile(name): return True #return False ''' #Add "_1" to the end of the picture name if name is taken name = "1_" + name ''' #Create object to open url picture = URLopener() #Try to download picture and save as name try: picture.retrieve(url, name) except: #Error in downloading picture return False #Return True if process completes, meaning that picture downloaded return True
def open_url(url, **kwds): """Opens a url or file and returns an appropriate key-value reader.""" reader_cls = fileformat(url) parsed_url = urlparse(url, 'file') if parsed_url.scheme == 'file': f = open(parsed_url.path, 'rb') else: if parsed_url.scheme == 'hdfs': server, username, path = hdfs.urlsplit(url) url = hdfs.datanode_url(server, username, path) if reader_cls is ZipReader and sys.version_info < (3, 2): # In Python <3.2, the gzip module is broken because it depends on # the underlying file being seekable (not true for url objects). opener = URLopener() filename, _ = opener.retrieve(url) f = open(filename, 'rb') os.unlink(filename) else: f = urlopen(url) return reader_cls(f, **kwds)
elif sys.version_info[0] == 2: from urllib import URLopener dataset_url = 'https://vision.in.tum.de/rgbd/dataset/freiburg2/rgbd_dataset_freiburg2_pioneer_slam.tgz' filename_zip = 'rgbd_dataset_freiburg2_pioneer_slam.tgz' filename = 'rgbd_dataset_freiburg2_pioneer_slam' # go to benchmak directory abspath = os.path.abspath(__file__) dname = os.path.dirname(abspath) os.chdir(dname) if not os.path.exists(filename_zip): print('Downloading dataset file ', filename_zip) testfile = URLopener() testfile.retrieve(dataset_url, filename_zip) if not os.path.exists(filename): print('Extracting dataset ', filename) tar = tarfile.open(filename_zip, "r:gz") tar.extractall() tar.close() if not os.path.exists(filename + '/depth_gt.txt'): first_list = associate.read_file_list(filename + '/depth.txt') second_list = associate.read_file_list(filename + '/groundtruth.txt') matches = associate.associate(first_list, second_list, 0.0, 0.02) f = open(filename + '/depth_gt.txt', 'w') for a, b in matches:
def test2_ok(): od = URLopener() # ok: insecure-urlopener-retrieve-ftp url = "ftps://example.com" od.retrieve(url)
thumbnail = hdri.select('.thumbnail')[0]['data-src'] href = urlparse(hdri['href']) filename = href.query[2:] + '_' + resolution new_filename = filename.replace(category + '&h=', '') tonemapped = thumbnail.replace('/files/hdri_images/thumbnails/', '') dl_url = 'https://hdrihaven.com/files/hdris/' + new_filename thumbnail_url = 'https://hdrihaven.com/' + thumbnail tonemapped_url = 'https://hdrihaven.com/files/hdri_images/tonemapped/8192/' + tonemapped print(f"\n{new_filename} - {dl_url}") try: print(f"{new_filename}.hdr downloading...") ext = '.hdr' opener.retrieve(dl_url + ext, new_filename + ext) filesnum += 1 except Exception as e: print(f"{new_filename}.hdr download failed, trying .exr...") try: ext = '.exr' opener.retrieve(dl_url + ext, new_filename + ext) filesnum += 1 except Exception as e: print(f"{new_filename} download failed. Continuing...\n") continue if (tonemappedjpg == 'Y' or tonemappedjpg == 'y' or tonemappedjpg == 'Yes' or tonemappedjpg == 'yes'): print(f"8K Tonemapped {tonemapped} downloading...") opener.retrieve(tonemapped_url, os.path.basename(tonemapped_url))
def downloadAsset(uri, dirname, contentType): # if contentType == 'text/javascript': # return down = time.time() tUrl = uri o = urlparse(tUrl) targetDir = CURRENT_DIRECTORY + '/' + dirname + '/' + '/'.join(o.path.split('/')[1:-1]) # javascript, fragment의 경우 다운로드 불필요 if o.scheme == "javascript" or (o.netloc == '' and o.path == ''): return global ret_time global ret ret += 1 if o.scheme == "": if uri.startswith("//"): tUrl = f"https:{uri}" else: tUrl = f"https://{uri}" if not uri.startswith('http'): if uri.startswith('//'): tUrl = f"http:{uri}" else: tUrl = f"http://{uri}" # text/html 무시 if contentType in mimeTypes[1:]: if not os.path.exists(targetDir): path = Path(targetDir) path.mkdir(parents=True) targetFile = targetDir + '/' + o.path.split('/')[-1] if not os.path.exists(targetFile): try: urlretrieve(tUrl, targetFile) print(f"[Retrieved] {tUrl}", time.time() - down) # print(f"[Retrieved] {targetFile}", time.time() - down) ret_time += time.time() - down except Exception as e: try: print(type(e).__name__ , tUrl) opener = URLopener() opener.addheader('User-Agent', 'Mozilla/5.0') filename, headers = opener.retrieve(tUrl, targetFile) print(f"[Retrieved2] {targetFile}", time.time() - down) ret_time += time.time() - down except Exception as e: try: print(type(e).__name__,'헤더 붙여도' , tUrl) tUrl = tUrl.replace('www.', '') tUrl = tUrl.replace('http:', 'https:') opener.retrieve(tUrl, targetFile) print(f"[Retrieved3] {targetFile}", time.time() - down) ret_time += time.time() - down except Exception as e: print(type(e).__name__, 'https:// 에 www 제외', tUrl) if 'bobae' in tUrl : #보배 드림 image 만을 위한 처리 우선은 이렇게 임시방편 try : tUrl = tUrl.replace('//', '//image.') opener.retrieve(tUrl, targetFile) print(f"[Retrieved4] 보배드림 image {targetFile}", time.time() - down) except : print(type(e).__name__, 'image 처리도 실패', tUrl) pass return finally: if contentType == 'text/css': global args parseCSSURLs(targetFile, args.url, dirname) else: pass
elif file + ext not in files and owerwrite == False: urlopener.retrieve(url + i + ext, file + ext) print("Download complete: " + file + ext) elif owerwrite == True: urlopener.retrieve(url + i + ext, file + ext) print("Already exist (overwrite): " + file + ext) for i in items: item = i["href"].replace("/hdri/?h=", "") # preview image files hdr_file = "https://hdrihaven.com/files/hdris/" + item + "_" thumb_file = "https://hdrihaven.com/files/hdri_images/thumbnails/" + item + ".jpg" preview_file = ( "https://hdrihaven.com/files/hdri_images/tonemapped/1500/" + item + ".jpg") spheres_file = "https://hdrihaven.com/files/hdri_images/spheres/" + item + ".jpg" if down_thumbnail == True and item + "_thumbnail.jpg" not in files: urlopener.retrieve(thumb_file, item + "_thumbnail.jpg") if down_preview == True and item + "_preview.jpg" not in files: urlopener.retrieve(preview_file, item + "_preview.jpg") if down_spheres == True and item + "_spheres.jpg" not in files: urlopener.retrieve(spheres_file, item + "_spheres.jpg") # hdr file try: downloader(hdr_file, ".hdr", item) except: downloader(hdr_file, ".exr", item)
def handle(self, *args, **options): print("Updating catalogs..") update_conf_list = AutomaticProductUpdate.objects.filter(order_number=1) for conf in update_conf_list: shop_shop = conf.shop print("Updating catalog for shop '%s'.." % shop_shop) print("-------------------------------------------------------- ") try: print("Dowloading catalog file for shop '%s', from url:%s" % (shop_shop, conf.catalog_url)) file = URLopener() if not os.path.exists(CATALOGS_ROOT): os.makedirs(CATALOGS_ROOT) catalog_filename = CATALOGS_ROOT+'/%s_catalog' % shop_shop if conf.is_compressed: extension = '.%s' % conf.compress_format else: extension = '.csv' catalog_filename += extension file.retrieve(conf.catalog_url, catalog_filename) print("Catalog file retrieved for shop '%s', local path:%s" % (shop_shop, catalog_filename)) if conf.is_compressed: print("Decompressing file ...") # Get a new clean tmp dir tmp_dir = CATALOGS_ROOT + '/%s_tmp' % shop_shop if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.makedirs(tmp_dir) # Extract catalog (should be a single file inside compressed file) if not decompress_file(input_file=catalog_filename, output_dir=tmp_dir, compression_format=conf.compress_format): print("Decompressing file ... ERROR") return -1 # Copy and rename the extracted catalog file extracted_catalog = os.listdir(tmp_dir)[0] catalog_filename = catalog_filename[:-4] + ".csv" extracted_catalog_path = os.path.abspath(os.path.join(tmp_dir, extracted_catalog)) shutil.copyfile(extracted_catalog_path, catalog_filename) print("Decompressing file ... DONE") print("Cleaning and preparing CSV FILE ...") output_file = CATALOGS_ROOT+'/%s' % shop_shop + ".csv" csv_file = open(catalog_filename, 'r', errors = 'ignore') with open(output_file, 'w') as fh: reader = csv.reader(csv_file, delimiter=';') next(reader,None) writer = csv.writer(fh, delimiter=';') writer.writerow(("aw_deep_link","product_name","search_price","merchant_name","delivery_cost","brand_name","product_model","delivery_time","product_GTIN")) for r in reader: count = 0 for i in r: a = i.count(';') count += a if count ==0: writer.writerow((r[0],r[1],r[2],r[3].replace(" ", ""),r[4],r[5],r[6],r[7],r[8])) csv_file.close() fh.close() conf.last_update = datetime.now() conf.local_file = catalog_filename conf.save() except Exception as e: print("ERROR processing catalog %s [SKIPPED]\n%s" %(shop_shop, e)) continue print("------------------------------------------------------ ") print("All catalogs processed.")
def save_file_on_disc_from(url, directory): try: testfile = URLopener() testfile.retrieve(url, directory + '-' + str(url).split("/")[-1]) except Exception as exception: log.error('Unexpected exception: ' + str(exception))
def getWeatherData(year, month, day): apiKey = getApiKey() url = 'http://api.wunderground.com/api/{0}/history_20{1}{2}{3}/q/WA/seattle.json'.format(apiKey, year, month, day) testfile = URLopener() testfile.retrieve(url, dataLocation + "{0}{1}{2}.{3}".format(year, month, day, dataExtension))
def test6(url="ftp://example.com"): od = URLopener() # ruleid: insecure-urlopener-retrieve-ftp od.retrieve(url)
def test1_ok(): od = URLopener() # ok: insecure-urlopener-retrieve-ftp od.retrieve("ftps://example.com")
for hdri in hdris: thumbnail = hdri.select('.thumbnail')[0]['data-src'] href = urlparse(hdri['href']) filename = href.query[2:] + '_' + resolution # DL link example # https://hdrihaven.com/files/hdris/small_harbor_02_2k.hdr dl_url = ( 'https://hdrihaven.com/files/hdris/' + filename ) thumbnail_url = 'https://hdrihaven.com' + thumbnail print(dl_url) print(thumbnail_url) try: print('downloading hdr...') ext = '.hdr' opener.retrieve(dl_url + ext, filename + ext) except Exception as e: print('hdr download failed, trying exr...') try: ext = '.exr' opener.retrieve(dl_url + ext, filename + ext) except Exception as e: print('download failed. Continuing...\n') continue print('') opener.retrieve(thumbnail_url, os.path.basename(thumbnail_url)) print('Done')
r = requests.get(url_category, allow_redirects=True, headers={'User-Agent': ua.chrome}) soup = BeautifulSoup(r.text, 'html.parser') save_to = category+' Texture ' + resolution try: os.mkdir(save_to) except Exception as e: pass os.chdir(save_to) texs = soup.select('#item-grid a') for tex in texs: href = urlparse(tex['href']) filename = href.query[2:] new_filename = filename.replace(category+'&t=','') dl_url = (f"https://texturehaven.com/files/textures/zip/{resolution}/{new_filename}/{new_filename}_{resolution}_{fileformat}.zip") print(f"\n{dl_url}") try: print(f"{new_filename} downloading...") opener.retrieve(dl_url, os.path.basename(dl_url)) filesnum+=1 except Exception as e: print(f"{new_filename} download failed, Continuing...") continue print(f"\nDownload completed. {filesnum} files downloaded.")
def download_page(pic_url,output): image = URLopener() image.retrieve(pic_url,output)