def download_url_to_zip(zipfile, url): url = url.replace("https://ka-", "http://ka-") filename = os.path.basename(url) try: filecontent = fetch_file_from_url_or_cache(url) except Exception as e: # we don't want a failed image request to download, but we # want to inform the user of the error logging.error("Error when downloading from URL: %s (%s)" % (url, e)) return zipfile.writestr(filename, filecontent)
def _make_zip(zipfile, options): excludes = set(options.plugin.excludes) skips = options.plugin.skip_exclude src_dir = options.plugin.source_dir # noinspection PyShadowingNames exclude = lambda p: any([fnmatch.fnmatch(p, e) for e in excludes]) def filter_excludes(some_files): if not some_files: return [] # to prevent descending into dirs, modify the list in place for i in range(len(some_files) - 1, -1, -1): some_f = some_files[i] if exclude(some_f) and some_f not in skips: some_files.remove(some_f) return some_files for root, dirs, files in os.walk(src_dir): for f in filter_excludes(files): relpath = os.path.relpath(root, '.') zipfile.write(path(root) / f, path(relpath) / f) filter_excludes(dirs) for root, dirs, files in os.walk(options.sphinx.builddir): for f in files: relpath = os.path.join( options.plugin.name, "docs", os.path.relpath(root, options.sphinx.builddir)) zipfile.write(path(root) / f, path(relpath) / f) utils_filename = os.path.join(os.path.dirname(__file__), "planet_explorer", "pe_utils.py") with open(utils_filename) as f: txt = f.read() if hasattr(options.package, 'segments'): txt = txt.replace( "# [set_segments_write_key]", f"os.environ['SEGMENTS_WRITE_KEY'] = '{options.package.segments}'" ) else: print("WARNING: No Segments write key provided.") if hasattr(options.package, 'sentry'): txt = txt.replace( "# [set_sentry_dsn]", f"os.environ['SENTRY_DSN'] = '{options.package.sentry}'") else: print("WARNING: No Sentry DSN write key provided.") zipfile.writestr("planet_explorer/pe_utils.py", txt)
def rezip_one(target_zip, source_zip, compression): try: with ZipFile(source_zip).open('data') as f: data = f.read() except Exception as e: print('skipping zip file', source_zip, 'for error', e) return # write to intermediate file in case source_dir == target_dir zf = ZipFile(target_zip + '.new', 'w', compression=compression) zf.writestr('data', data) zf.close() copystat(source_zip, target_zip + '.new') move(target_zip + '.new', target_zip)
def create_artifact_bundle(org, release, project=None): import zipfile bundle = io.BytesIO() bundle_dir = get_fixture_path("artifact_bundle") with zipfile.ZipFile(bundle, "w", zipfile.ZIP_DEFLATED) as zipfile: for path, _, files in os.walk(bundle_dir): for filename in files: fullpath = os.path.join(path, filename) relpath = os.path.relpath(fullpath, bundle_dir) if filename == "manifest.json": manifest = _patch_artifact_manifest(fullpath, org, release, project) zipfile.writestr(relpath, manifest) else: zipfile.write(fullpath, relpath) return bundle.getvalue()
def image_to_file(self, href, zipfile): try: if re.match("^//", href): href = "http:"+href if not href in self._images_cache: r = requests.get(href, headers=self.headers) r.raise_for_status() filename = os.path.basename(href) zipfile.writestr("%s/img/%s" % (self.username, filename), r.content) self._images_cache[href] = "img/%s" % filename return self._images_cache[href] except requests.exceptions.ConnectionError: sys.stderr.write("\n Ignoring ConnectionError for %s\n" % href) return "image-unavailable.png"
def render_zip_contents(self, zipfile, story, filename, **kw): ext = self.chapter_extension chapters = story.chapter_set.order_by('order') num_width = len(str(chapters.count())) for i, chapter in enumerate(chapters.iterator()): data = render_to_string( self.chapter_template, { 'chapter': chapter, 'story': story, }, ).encode(self.chapter_encoding) name = slugify(chapter.title) num = str(i + 1).rjust(num_width, '0') arcname = str('%s/%s_%s.%s' % (filename, num, name, ext)) zipfile.writestr(arcname, data)
def render_zip_contents(self, zipfile, story, filename, **kw): ext = self.chapter_extension chapters = story.chapter_set.order_by('order') num_width = len(str(chapters.count())) for i, chapter in enumerate(chapters.iterator()): data = render_to_string( self.chapter_template, { 'chapter': chapter, 'story': story, }, ).encode(self.chapter_encoding) name = slugify(chapter.title) num = str(i+1).rjust(num_width, '0') arcname = str('%s/%s_%s.%s' % (filename, num, name, ext)) zipfile.writestr(arcname, data)
def add_dirs(zipfile): """ Given a writable zipfile, inject directory entries for any directories implied by the presence of children. """ names = zipfile.namelist() consume( zipfile.writestr(name + "/", b"") for name in map(posixpath.dirname, names) if name and name + "/" not in names) return zipfile
def to_zipfile(obj, zipfile, path=''): summary = OrderedDict() for key in obj.keys(): val = obj[key] new_path = join_path(path, key) if isinstance(val, h5py.Group): to_zipfile(val, zipfile, new_path) else: fname = join_path(path, key + '.dat') if 'target' in val.attrs and val.attrs['target'] != join_path( '/', path, key): print val.attrs['target'], join_path('/', path, key) summary[key] = OrderedDict([[ 'target', val.attrs['target'] ]]) #, ['shape', (obj[val.attrs['target']]).shape]]) elif numpy.product(val.shape) <= 1: summary[key] = val.value.tolist() else: value = obj[key].value formats = {'S': '%s', 'f': '%.8g', 'i': '%d', 'u': '%d'} if value.dtype.kind in formats: fd, fn = tempfile.mkstemp() os.close(fd) # to be opened by name if DEBUG: print fname, value.dtype.kind if len(value.shape) > 2: with open(fn, 'w') as f: simplejson.dump(value.tolist(), f) else: numpy.savetxt(fn, value, delimiter='\t', fmt=formats[value.dtype.kind]) zipfile.write(fn, fname) os.remove(fn) summary[key] = OrderedDict( [['target', join_path('/', fname)], ['shape', obj[key].shape]]) else: print "unknown type of array: ", fname, value.dtype zipfile.writestr(os.path.join(path, 'fields.json'), simplejson.dumps(summary, indent=' '))
def to_zipfile(obj, zipfile, path=''): summary = OrderedDict() for key in obj.keys(): val = obj[key] new_path = join_path(path, key) if isinstance(val, h5py.Group): to_zipfile(val, zipfile, new_path) else: fname = join_path(path, key+'.dat') if 'target' in val.attrs and val.attrs['target'] != join_path('/', path, key): print val.attrs['target'], join_path('/', path, key) summary[key] = OrderedDict([['target', val.attrs['target']]]) #, ['shape', (obj[val.attrs['target']]).shape]]) elif numpy.product(val.shape) <= 1: summary[key] = val.value.tolist() else: value = obj[key].value formats = { 'S': '%s', 'f': '%.8g', 'i': '%d', 'u': '%d' } if value.dtype.kind in formats: fd, fn = tempfile.mkstemp() os.close(fd) # to be opened by name if DEBUG: print fname, value.dtype.kind if len(value.shape) > 2: with open(fn, 'w') as f: simplejson.dump(value.tolist(), f) else: numpy.savetxt(fn, value, delimiter='\t', fmt=formats[value.dtype.kind]) zipfile.write(fn, fname) os.remove(fn) summary[key] = OrderedDict([['target', join_path('/', fname)], ['shape', obj[key].shape]]) else: print "unknown type of array: ", fname, value.dtype zipfile.writestr(os.path.join(path, 'fields.json'), simplejson.dumps(summary, indent=' '))
def _make_zip(zipfile, options): excludes = set(options.plugin.excludes) skips = options.plugin.skip_exclude src_dir = options.plugin.source_dir exclude = lambda p: any([fnmatch.fnmatch(p, e) for e in excludes]) def filter_excludes(some_files): if not some_files: return [] # to prevent descending into dirs, modify the list in place for i in range(len(some_files) - 1, -1, -1): some_f = some_files[i] if exclude(some_f) and some_f not in skips: some_files.remove(some_f) return some_files for root, dirs, files in os.walk(src_dir): for f in filter_excludes(files): relpath = os.path.relpath(root, ".") zipfile.write(path(root) / f, path(relpath) / f) filter_excludes(dirs) analytics_filename = os.path.join(os.path.dirname(__file__), "planet_explorer", "pe_analytics.py") with open(analytics_filename) as f: txt = f.read() if hasattr(options.package, "segments"): txt = txt.replace( "# [set_segments_write_key]", f"os.environ['SEGMENTS_WRITE_KEY'] = '{options.package.segments}'", ) else: print("WARNING: No Segments write key provided.") if hasattr(options.package, "sentry"): txt = txt.replace( "# [set_sentry_dsn]", f"os.environ['SENTRY_DSN'] = '{options.package.sentry}'", ) else: print("WARNING: No Sentry DSN write key provided.") zipfile.writestr("planet_explorer/pe_analytics.py", txt) metadata_filename = os.path.join(os.path.dirname(__file__), "planet_explorer", "metadata.txt") cfg = SafeConfigParser() cfg.optionxform = str cfg.read(metadata_filename) if hasattr(options.package, "version"): version = "".join(c for c in options.package.version if c.isdigit() or c == ".") cfg.set("general", "version", version) buf = StringIO() cfg.write(buf) zipfile.writestr("planet_explorer/metadata.txt", buf.getvalue()) utils_filename = os.path.join(os.path.dirname(__file__), "planet_explorer", "pe_utils.py") with open(utils_filename) as f: txt = f.read() commitid = (subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()) txt = txt.replace( 'COMMIT_ID = ""', f'COMMIT_ID = "{commitid}"', ) zipfile.writestr("planet_explorer/pe_utils.py", txt)
def load_recipe_index(self, zipfile): self._progress("Downloading recipe index:\n") start_date = "2013-12-12T00:00:00" recipe_cards = self.recipe_cards = [] while start_date: self._progress("\r - %d cards" % len(recipe_cards)) r = requests.get("http://punchfork.com/api/rc", params={ "query": "likes/%s/new" % username, "start": start_date, "size": "100", "_": "%0.10f" % random.random() }, headers=self.headers) r.raise_for_status() json_data = r.json if hasattr(json_data, "__call__"): json_data = json_data() recipe_cards.extend(json_data["cards"]) if len(json_data["cards"]) < 100: start_date = None else: start_date = json_data["next"] self._progress("\r - Found %d cards.\n" % len(recipe_cards)) # generate html soup = BeautifulSoup(self._index_template, "lxml") soup.title.string = self.data["page_title"] marquee_title = soup.find(id="marquee-title") marquee_title.h1.string = self.data["marquee_title_name"] marquee_title.img["src"] = self.image_to_file(self.data["marquee_title_avatar"], zipfile) if len(self.recipe_cards) == 1: recipe_notice = "%d recipe" % len(self.recipe_cards) else: recipe_notice = "%d recipes" % len(self.recipe_cards) end_marker = soup.find(id="end-marker") end_marker.find("div", "notice").string = recipe_notice soup.find("div", "meta").string = "This recipe list was archived on %s." % (strftime("%d %B %Y, %H:%M:%S UTC", gmtime())) self._progress("Building index page:\n") i = 0 recipe_cards_div = end_marker.parent for recipe_card in self.recipe_cards: self._progress("\r - Added %d cards to index." % i) i += 1 card_el = BeautifulSoup(recipe_card, "lxml") likes_a = card_el.find("a", "svc") likes_span = soup.new_tag("span") likes_span["class"] = "metric svc" likes_span.string = likes_a.string likes_a.replace_with(likes_span) for img in card_el.find_all("img"): img["src"] = self.image_to_file(img["src"], zipfile) a = card_el.find("a") a["href"] = "recipe/" + a["href"].split("/")[2] + ".html" del a["target"] end_marker.insert_before(card_el) self._progress("\r - Added %d cards to index.\n" % i) zipfile.writestr("%s/index.html" % self.username, soup.prettify(formatter="minimal").encode("utf-8"))
def load_recipes(self, zipfile): self._progress("Downloading recipes:\n") i = 0 for recipe_card in self.recipe_cards: self._progress("\r - Saved %d recipes." % i) i += 1 recipe_href = BeautifulSoup(recipe_card, "lxml").div.a["href"] recipe_name = recipe_href.split("/")[2] r = requests.get("http://punchfork.com/recipe/%s" % recipe_name, headers=self.headers) r.raise_for_status() # replace some parts t = r.text soup = BeautifulSoup(re.sub("<script(.|\n)+?</script>", "", t, re.DOTALL), "lxml") for tag in soup.find_all("script"): tag.extract() for tag in soup.find_all("link", rel="stylesheet"): tag.extract() for tag_id in ("announcement-banner", "action-buttons", "sharing-block", "footer", "fb-root"): tag = soup.find(id=tag_id) if tag: tag.extract() who_likes = soup.find("div", id="who-likes") if who_likes: for tag in who_likes.find_all("div", "tiny-user-card"): tag.extract() publisher_card = soup.find("div", id="publisher-card") if publisher_card: for tag in publisher_card.find_all("a", href=re.compile("^/from/")): del tag["href"] inner_header = soup.find("div", id="inner-header") del inner_header.find("a", "logo")["href"] for tag in inner_header.find_all("ul", "dropdown-menu"): tag.extract() for source_a in soup.find_all("a", href=re.compile("^/r\?url=")): del source_a["onclick"] source_a["href"] = urlparse.parse_qs(urlparse.urlparse(source_a["href"]).query)["url"] ul = soup.new_tag("ul") ; ul["class"] = "left dropdown-menu dark" li = soup.new_tag("li") ; li["class"] = "menu dropdown-item" a = soup.new_tag("a", href="../index.html") a.string = "Back to index" li.append(a) ul.append(li) inner_header.append(ul) for img in soup.find_all("img"): img["src"] = "../"+self.image_to_file(img["src"], zipfile) for img in soup.find_all("link", rel="apple-touch-icon"): img["href"] = "../"+self.image_to_file(img["href"], zipfile) for img in soup.find_all("link", rel="shortcut icon"): img["href"] = "../"+self.image_to_file(img["href"], zipfile) for img in soup.find_all("meta", property="og:image"): img["content"] = "../"+self.image_to_file(img["content"], zipfile) footer = soup.new_tag("div") footer["class"] = "footer" soup.body.append(footer) soup.head.append(soup.new_tag("link", rel="stylesheet", type="text/css", href="../css/punchfork-81HpuHrf7cX.css")) new_script_tag = soup.new_tag("script", type="text/javascript", src="../js/punchfork-export.js") new_script_tag.string = '// ' soup.body.append(new_script_tag) try: zipfile.writestr("%s/recipe/%s.html" % (self.username, recipe_name), soup.prettify(formatter="minimal").encode("utf-8")) except UnicodeDecodeError: sys.stderr.write("\n Skipping broken recipe with multiple encodings for user %s" % self.username) pass self._progress("\r - Saved %d recipes.\n" % i)