def transfer_metadata(self, finfo, local_root, dest): # TODO this can't be hard-coded for thumbs! generalize # transfer thumbnail info if exists if 'parent_full' in finfo: parent_full_path, parent_name = os.path.split(finfo['parent_full']) rel_path_parent = u.make_rel_path(local_root, parent_full_path, strict=True) dest['parent_key'] = u.make_key(rel_path_parent, parent_name) if 'thumb_full' in finfo: thumb_full_path, thumb_name = os.path.split(finfo['thumb_full']) rel_path_thumb = u.make_rel_path(local_root, thumb_full_path, strict=True) dest['thumb_key'] = u.make_key(rel_path_thumb, thumb_name) # copy other useful metadata if 'width' in finfo and 'height' in finfo: dest['width'] = finfo['width'] dest['height'] = finfo['height']
def copy_with_metadata(self, finfo, dest): (dest_dir, dest_file) = os.path.split(dest) # prohibit destinations outside our output root rel_path = u.make_rel_path(self.config.output, dest_dir, strict=False) if not rel_path: logging.warning( "disallowing copy dest '%s', outside of output_root '%s'" % (dest_dir, self.config.output)) # no self-copy if dest == finfo['full']: logging.warning("not copying file '%s' onto itself!" % dest) return None u.ensure_path(dest_dir) Config.log("%s to %s" % (finfo['full'], dest), tag='COPY_WITH_METADATA') if not os.path.exists(finfo['full']): msg = "file '%s' does not exist" % finfo['full'] Config.log(msg, tag='COPY_WITH_METADATA_ERROR') return None shutil.copyfile(finfo['full'], dest) # set metadata for new file local = u.local_metadata(dest_dir, dest_file) newfi = copy.deepcopy(finfo) # TODO replace with unified metadata-copy system newfi['name'] = dest_file newfi['path'] = dest_dir newfi['full'] = dest newfi['size'] = local['size'] newfi['modified'] = local['modified'] # clear transient metadata not applicable to new file u.remove_no_copy_metadata(newfi) newfi['rules_run'] = False return newfi
def _unpack_if_archive(self, finfo): if 'rules_run' in finfo and finfo['rules_run']: return if finfo['full'].endswith('.tar.gz'): src_path = finfo['path'] name = finfo['name'] if self.unpack_root in src_path: rel_path = u.make_rel_path(self.unpack_root, src_path) else: rel_path = u.make_rel_path(self.config.input, src_path) dest_path = self.unpack_root + rel_path msg = "unpacking '%s' to '%s" % (name, dest_path) regex = None if 'unpack_files_wanted' in self._re: regex = self._re['unpack_files_wanted'] msg = "selectively " + msg + " (unpack_files_wanted = '%s')" % regex.pattern logging.info(msg) self._files_processed += 1 # this counts unpack_dir = u.unpack_file(src_path, dest_path, name, regex_wanted=regex) self.file_info[finfo['full']]['rules_run'] = True if not unpack_dir: Config.log(finfo['full'], tag='TP_IGNORE_BAD_ARCHIVE') return self._walk_files(unpack_dir)
def render_gather_facts(webmaker, work_item): if 'output' in work_item['roles']: finfo = work_item['roles']['output'] else: finfo = work_item['roles']['default'][0] if 'dest_key' in finfo: rel_path = '/' + finfo['dest_key'] elif 'key' in finfo: rel_path = '/' + finfo['key'] elif 'full' in finfo: rel_path = '/' + u.make_rel_path( webmaker.output_root, finfo['full'], no_leading_slash=True) else: raise Exception('logic error, no way to determine rel_path') (path, file) = os.path.split(rel_path) return finfo, rel_path, path, file
def get_tree_info(self, bucket=None, remote_root=''): self.tree_info.clear() for dir_name, subdirs, files in os.walk(self._file_dest_root): for file_name in files: full_src = dir_name + '/' + file_name rel_path = u.make_rel_path(self._file_dest_root, dir_name, strict=False, no_leading_slash=True) local_meta = u.local_metadata(dir_name, file_name) local_meta['key'] = u.make_key(rel_path, file_name) local_meta['md5'] = u.md5(full_src) # important: must never expose 'full' outside this class - it's a private # implementation detail. Same for 'path'. Only 'key' is public del local_meta['full'] del local_meta['path'] self.tree_info[local_meta['key']] = local_meta
def default_template_file_action(self, dir_name, file_name, dest_rel_path=None, dest_name=None): template_full = dir_name + '/' + file_name Config.log("default_template_file_action '%s'" % template_full, tag='DEFAULT_TEMPLATE_FILE_ACTION') if dest_name: rel_path = dest_rel_path dest_path = u.pathify(self.output_root, dest_rel_path) else: rel_path = u.make_rel_path(self.site_root, dir_name) dest_path = u.pathify(self.output_root, rel_path) dest_name = file_name u.ensure_path(dest_path) dest_full = u.pathify(dest_path, dest_name) info = { 'name': dest_name, 'path': dest_path, 'rel_path': rel_path, 'full': dest_full, 'key': u.make_key(rel_path, dest_name) } if self.config.is_template_type(file_name): template = open(template_full).read() output = u.debracket(template, self.interpret) if not self.config.is_special_file(info['key']): open(dest_full, 'w').write(output) local = u.local_metadata(dest_path, dest_name) info['size'] = local['size'] info['modified'] = local['modified'] info['md5'] = u.md5(dest_full) self.track_file(info) else: shutil.copyfile(template_full, dest_full) local = u.local_metadata(dest_path, dest_name) info['size'] = local['size'] info['modified'] = local['modified'] info['md5'] = u.md5(dest_full) self.track_file(info)
def track_file(self, finfo): full = finfo['full'] Config.log(full, tag='TP_TRACK_FILE') if 'md5' not in finfo: finfo['md5'] = u.md5(finfo['full']) if full in self.file_info: # don't replace finfo unless file has actually changed old_finfo = self.file_info[full] if finfo['md5'] == old_finfo['md5']: Config.log(full, tag='TP_TRACK_FILE_UNCHANGED') return self.file_info[full] = finfo if self._track_file_callback and self.will_upload(finfo['full']): if 'rel_path' not in finfo: finfo['rel_path'] = u.make_rel_path(self.config.output, finfo['path'], no_leading_slash=True) if 'key' not in finfo: finfo['key'] = u.make_key(finfo['rel_path'], finfo['name']) if ('size' not in finfo) or ('modified' not in finfo): logging.error('track_file (%s): finfo missing size and/or modified' % finfo['full']) tmp = u.local_metadata(finfo['path'], finfo['name']) finfo['size'] = tmp['size'] finfo['modified'] = tmp['modified'] self._track_file_callback(finfo)
def _full_path_to_file_key_path(self, path): return u.make_rel_path(self.config.input, path, strict=False, no_leading_slash=True)
def render_mode_thumbs(webmaker, dest_key, work_item, list_args): try: if dest_key == 'if_empty': return list_args[dest_key] ret = '' finfo, href, path, file = render_gather_facts(webmaker, work_item) display_name, got_display_name = u.get_display_name(finfo, work_item) render = {} grid_columns = None if 'grid_columns' in list_args: grid_columns = list_args['grid_columns'] # default size 200x200 but should get from thumb metadata below render['width'] = '200' render['height'] = '200' if 'modal' in list_args and list_args['modal']: modal = True # we (that is, the finfo) might be the thumb, or we # might be the image that has a thumb. # first see if we're the thumb: parent_finfo = None parent_key = None if 'thumb_full' in finfo or 'thumb_key' in finfo: if 'thumb_full' in finfo: thumb_path, thumb_file = os.path.split(finfo['thumb_full']) # only valid if relative to output_root thumb_key = u.make_rel_path(webmaker.output_root, thumb_path, strict=False, no_leading_slash=True) thumb_key = thumb_key + '/' + thumb_file else: thumb_key = finfo['thumb_key'] if thumb_key and thumb_key in webmaker.dest_mgr.tree_info: thumb_finfo = webmaker.dest_mgr.tree_info[thumb_key] if 'width' in thumb_finfo and 'height' in thumb_finfo: render['width'] = thumb_finfo['width'] render['height'] = thumb_finfo['height'] if not got_display_name and 'display_name' in thumb_finfo: display_name = thumb_finfo['display_name'] href = key_to_static_url(href) thumb_href = key_to_static_url(thumb_key) if grid_columns: render['title'] = display_name render['full_img_url'] = href render['thumb_url'] = thumb_href render['description'] = ' ' # todo render['caption'] = display_name ret = render_grid_cell(webmaker, render) else: ret = "<p><a href='%s'><img src='%s'></img></a></p>\n" % ( href, thumb_href) ret += "<p><a href='%s'>%s</a></p>\n" % (href, display_name) else: msg = "%s" % thumb_key Config.log(msg, tag='RENDER_THUMBS_thumb_not_in_dest') elif 'parent_full' in finfo or 'parent_key' in finfo: if 'parent_full' in finfo: # only valid if relative to output_root parent_key = u.make_rel_path(webmaker.output_root, finfo['parent_full'], strict=False, no_leading_slash=True) else: parent_key = finfo['parent_key'] if parent_key and parent_key in webmaker.dest_mgr.tree_info: if 'width' in finfo and 'height' in finfo: render['width'] = finfo['width'] render['height'] = finfo['height'] parent_finfo = webmaker.dest_mgr.tree_info[parent_key] if not got_display_name and 'display_name' in parent_finfo: display_name = parent_finfo['display_name'] parent_href = '/' + parent_key if grid_columns: render['title'] = display_name render['full_img_url'] = parent_href render['thumb_url'] = href render['description'] = ' ' # todo render['caption'] = display_name ret = render_grid_cell(webmaker, render) else: ret = "<p><a href='%s'><img src='%s'></img></a></p>\n" % ( parent_href, href) ret += "<p><a href='%s'>%s</a></p>\n" % (parent_href, display_name) else: msg = "%s" % parent_key Config.log(msg, tag='RENDER_THUMBS_parent_not_in_dest') else: msg = "unable to resolve '%s' as thumb or parent of thumb" % href Config.log(msg, tag='RENDER_THUMBS_not_resolved') if grid_columns: render['title'] = 'not available' render['full_img_url'] = '#' render['thumb_url'] = '#' render['description'] = 'description not available' render['caption'] = dest_key + ' not available' ret = render_grid_cell(webmaker, render) else: ret = "<p><a href='%s'>not available</a></p>\n" % (dest_key) ret += "<p><a href='%s'>%s</a></p>\n" % (dest_key, dest_key + ' not available') return ret # NOTE: used to be code here to support fallback to naming convention (prepending 'thumb_' # to the name of the parent file to make thumbnail name). No longer supported, but could # be turned back on if found useful. except Exception as exc: Config.log(str(exc), tag='GRID_THUMBS') raise
def sync_tree_info(self, options=None): if self._synced_tree_info: Config.log(self.config_section, tag='FILE_DEST_META_ALREADY_SYNCED') return start = u.timestamp_now() logging.info("starting FileDest metadata sync") if options is None: options = self._default_sync_options # need to read persisted file, as that's the only place non-file-system metadata can live self.read_tree_info() # determine whether to force full refresh, only do it if tree has changed, on simply trust metadata: do_full_refresh = False if 'refresh_dest_meta' in options: do_full_refresh = options['refresh_dest_meta'] # computing all those md5s takes a long time, so optionally skip it if the most # recent modification time for _file_dest_root is unchanged since we last did it if 'skip_refresh_if_tree_unchanged' in options: last_mod = u.dir_last_modified(self._file_dest_root) expected_last_mod = self._tree_last_modified do_full_refresh = last_mod != expected_last_mod msg = "last_mod do_full_refresh = '%s', last_mod = '%f', expected_last_mod = '%f'" % ( do_full_refresh, last_mod, expected_last_mod) Config.log(msg, tag='FILE_DEST_META_TREE_UNCHANGED_TEST') if do_full_refresh: # physically walk the tree as it might not match persisted data for dir_name, subdirs, files in os.walk(self._file_dest_root): for file_name in files: full_src = dir_name + '/' + file_name setit = False rel_path = u.make_rel_path(self._file_dest_root, dir_name, strict=False, no_leading_slash=True) key = u.make_key(rel_path, file_name) local_meta = u.local_metadata(dir_name, file_name) local_meta['md5'] = u.md5(full_src) if key in self.tree_info: saved_meta = self.tree_info[key] if 'md5' not in saved_meta: saved_meta['md5'] = 'ERROR! md5 MISSING FROM tree_info!' if local_meta['md5'] == saved_meta['md5']: # sanity check if local_meta['size'] != saved_meta['size']: msg = "key '%s', saved: size %i, read: size %i" % ( key, saved_meta['size'], local_meta['size']) Config.log(msg, tag='FILE_DEST_META_ERROR_NONFATAL') # otherwise file is perfect, continue else: msg = "key '%s', md5 mismatch. saved: '%s', read: '%s'" % ( key, saved_meta['md5'], local_meta['md5']) Config.log(msg, tag='FILE_DEST_META_ERROR_FATAL') setit = True else: msg = "key '%s' not found in saved, adding" % key Config.log(msg, tag='FILE_DEST_META_NEW_FILE') setit = True if setit: local_meta['key'] = key self.tree_info[key] = local_meta # important: must never expose 'full' outside this class - it's a private # implementation detail. Same for 'path'. Only 'key' is public del local_meta['full'] del local_meta['path'] self.tree_info[key] = local_meta self.tree_info[key]['_found_file_'] = True missing = [] for key in self.tree_info: if '_found_file_' in self.tree_info[key]: del self.tree_info[key]['_found_file_'] else: missing.append(key) msg = "no file matching key '%s', deleting" % key Config.log(msg, tag='FILE_DEST_META_MISSING') for key in missing: del self.tree_info[key] self.write_tree_info() act = "completed" else: # trust the persisted file (faster) act = "bypassed" elapsed = u.timestamp_now() - start msg = "%s confirmation of tree info in %f seconds" % (act, elapsed) Config.log(msg, tag='FILE_DEST_SYNC') self._synced_tree_info = True
def upload_tree(self, local_root, remote_root='', options=None, local_tree_meta=None): logging.info("starting FileDest upload") if not options: options = {'use_md5': True} start = u.timestamp_now() self._upload_count = 0 # refresh and save data for files already on dest self.sync_tree_info(options=self._default_sync_options) for dir_name, subdirs, files in os.walk(local_root): rel_path = u.make_rel_path(local_root, dir_name) if not rel_path.startswith('/tmp'): for file_name in files: local_file = dir_name + '/' + file_name key = u.make_key(rel_path, file_name) local_md5 = u.md5(local_file) local_meta = None if local_tree_meta and local_file in local_tree_meta: local_meta = local_tree_meta[local_file] else: local_meta = u.local_metadata(dir_name, file_name) size = local_meta['size'] cached_info = None if key in self.tree_info: cached_info = self.tree_info[key] do_upload = True if 'use_md5' in options: if cached_info and not self.is_pending(key): if 'md5' in self.tree_info[key]: remote_md5 = self.tree_info[key]['md5'] do_upload = do_upload and remote_md5 != local_md5 else: err = "no md5 value for existing key '%s' (old version?)" % key logging.error(err) else: Config.log("file '%s' is not in FileDest" % key, tag='DEST_NO_EXISTING_FILE') if self._max_upload_size >= 0 and size > self._max_upload_size: logging.debug("file '%s' size (%i) > limit (%i), won't upload" % (key, size, self._max_upload_size)) do_upload = False if do_upload: extra_args = { 'Metadata': {'md5': local_md5} } logging.debug("FileDest object upload starting, key = '%s', %i bytes" % (key, size)) start = u.timestamp_now() self._upload(dir_name, file_name, key, extra_args=extra_args) rate = size / (u.timestamp_now() - start) Config.log("key = '%s', %f bytes/sec" % (key, rate), tag='FILE_DEST_UPLOAD_OK') # add metadata to our repos info = { 'new': True, 'name': file_name, 'rel_path': rel_path, 'key': key, 'size': local_meta['size'], 'modified': local_meta['modified'], # 'mod_dt': last_mod, # 'e_tag': obj.e_tag, 'md5': local_md5 } # transfer meta (e.g. thumbnail info) if exists if local_tree_meta and local_file in local_tree_meta: self.transfer_metadata(local_tree_meta[local_file], local_root=self.local_root, dest=info) self.tree_info[key] = info self._upload_count += 1 else: Config.log("key = '%s'" % key, tag='FILE_DEST_UPLOAD_NO_CHANGE') self.write_tree_info() elapsed = u.timestamp_now() - start logging.info("FileDest.upload_tree finished in %f seconds, uploaded %i files" % (elapsed, self._upload_count)) return self._upload_count
def will_upload(self, full): path, file = os.path.split(full) rel_path = u.make_rel_path(self.config.output, path, strict=False) return rel_path is not None and not rel_path.startswith('/tmp')