def send_to_dw(doc): client = dw.api_client() username = '******' title = doc.find_first_value("Root.Title") key = join(username, slugify(title)) d = dict(title=doc.find_first_value("Root.Title"), description=doc.find_first_value("Root.Description"), summary=doc.markdown, visibility='OPEN', files=get_resource_urls(doc)) try: ds = client.get_dataset( key) # Raise an error if the dataset does not exist ds = client.replace_dataset(key, **d) ds = client.get_dataset(key) except RestApiError: ds = client.create_dataset('ericbusboom', **d) ds = client.get_dataset(key)
def add_single_resource(doc, ref, cache, seen_names): from metatab.util import slugify t = doc.find_first('Root.Datafile', value=ref) if t: prt("Datafile exists for '{}', deleting".format(ref)) doc.remove_term(t) term_name = classify_url(ref) path, name = extract_path_name(ref) # If the name already exists, try to create a new one. # 20 attempts ought to be enough. if name in seen_names: base_name = re.sub(r'-?\d+$', '', name) for i in range(1, 20): name = "{}-{}".format(base_name, i) if name not in seen_names: break seen_names.add(name) encoding = start_line = None header_lines = [] try: encoding, ri = run_row_intuit(path, cache) prt("Added resource for '{}', name = '{}' ".format(ref, name)) start_line = ri.start_line header_lines = ri.header_lines except RowIntuitError as e: warn("Failed to intuit '{}'; {}".format(path, e)) except SourceError as e: warn("Source Error: '{}'; {}".format(path, e)) except Exception as e: warn("Error: '{}'; {}".format(path, e)) if not name: from hashlib import sha1 name = sha1(slugify(path).encode('ascii')).hexdigest()[:12] # xlrd gets grouchy if the name doesn't start with a char try: int(name[0]) name = 'a' + name[1:] except: pass return doc['Resources'].new_term(term_name, ref, name=name, startline=start_line, headerlines=','.join(str(e) for e in header_lines), encoding=encoding)
def cleanse(self): """Clean up some terms, like ensuring that the name is a slug""" from .util import slugify self.ensure_identifier() try: self.update_name() except MetatabError: identifier = self['Root'].find_first('Root.Identifier') name = self['Root'].find_first('Root.Name') if name and name.value: name.value = slugify(name.value) elif name: name.value = slugify(identifier.value) else: self['Root'].get_or_new_term('Root.Name').value = slugify(identifier.value)
def package_info(doc): client = dw.api_client() username = '******' title = doc.find_first_value("Root.Title") key = join(username, slugify(title)) try: ds = client.get_dataset(key) prt(json.dumps(ds, indent=4)) except RestApiError as e: err(e)
def _load_documentation_files(self): from metapack_jupyter.exporters import DocumentationExporter notebook_docs = [] # First find and remove notebooks from the docs. These wil get processed to create # normal documents. try: for term in list( self.doc['Documentation'].find('Root.Documentation')): u = parse_app_url(term.value) if u is not None and u.target_format == 'ipynb' and u.proto == 'file': notebook_docs.append(term) self.doc.remove_term(term) except KeyError: self.warn("No documentation defined in metadata") # Process all of the normal files super()._load_documentation_files() fw = FilesWriter() fw.build_directory = join(self.package_path.path, 'docs') # Now, generate the notebook documents directly into the filesystem package for term in notebook_docs: de = DocumentationExporter( base_name=term.name or slugify(term.title)) u = parse_app_url(term.value) nb_path = join(self.source_dir, u.path) # Only works if the path is relative. try: output, resources = de.from_filename(nb_path) fw.write(output, resources, notebook_name=de.base_name + '_full') # Write notebook html with inputs de.update_metatab(self.doc, resources) except Exception as e: from metapack.cli.core import warn warn("Failed to convert document for {}: {}".format( term.name, e))
def add_single_resource(self, ref, **properties): """ Add a single resource, without trying to enumerate it's contents :param ref: :return: """ t = self.doc.find_first('Root.Datafile', value=ref) if t: self.prt("Datafile exists for '{}', deleting".format(ref)) self.doc.remove_term(t) term_name = self.classify_url(ref) ref, path, name = self.extract_path_name(ref) self.prt("Adding resource for '{}'".format(ref)) try: encoding, ri = self.run_row_intuit(path, self._cache) except Exception as e: self.warn("Failed to intuit '{}'; {}".format(path, e)) return None if not name: from hashlib import sha1 name = sha1(slugify(path).encode('ascii')).hexdigest()[:12] # xlrd gets grouchy if the name doesn't start with a char try: int(name[0]) name = 'a' + name[1:] except: pass if 'name' in properties: name = properties['name'] del properties['name'] return self.sections.resources.new_term( term_name, ref, name=name, startline=ri.start_line, headerlines=','.join(str(e) for e in ri.header_lines), encoding=encoding, **properties)
def _load_documentation_files(self): """Copy all of the Datafile entries into the Excel file""" from rowgenerators.generators import get_dflo, download_and_cache from rowgenerators import SourceSpec from rowgenerators.exceptions import DownloadError from os.path import basename, splitext for doc in self.doc.find('Root.Documentation'): ss = SourceSpec(doc.value) try: d = download_and_cache(ss, self._cache) except DownloadError as e: self.warn("Failed to load documentation for '{}'".format( doc.value)) continue dflo = get_dflo(ss, d['sys_path']) f = dflo.open('rb') try: # FOr file file, the target_file may actually be a regex, so we have to resolve the # regex before using it as a filename real_name = basename( dflo.memo[1].name ) # Internal detail of how Zip files are accessed except (AttributeError, TypeError): real_name = basename(ss.target_file) # Prefer the slugified title to the base name, because in cases of collections # of many data releases, like annual datasets, documentation files may all have the same name, # but the titles should be different. real_name_base, ext = splitext(real_name) name = doc.get_value('title') if doc.get_value( 'title') else real_name_base real_name = slugify(name) + ext self._load_documentation(doc, f.read(), real_name) f.close()
def _generate_identity_name(self): name_t = self.find_first('Root.Name', section='Root') name = name_t.value datasetname = name_t.get_value('Name.Dataset', self.get_value('Root.Dataset')) version = name_t.get_value('Name.Version', self.get_value('Root.Version')) origin = name_t.get_value('Name.Origin', self.get_value('Root.Origin')) time = name_t.get_value('Name.Time', self.get_value('Root.Time')) space = name_t.get_value('Name.Space', self.get_value('Root.Space')) grain = name_t.get_value('Name.Grain', self.get_value('Root.Grain')) parts = [ slugify(e.replace('-', '_')) for e in (origin, datasetname, time, space, grain, version) if e and str(e).strip() ] return '-'.join(parts)
def save(self, path=None): self.check_is_ready() root_dir = slugify(self.doc.find_first_value('Root.Name')) self.prt( "Creating ZIP Package at '{}' from filesystem package at '{}'". format(self.package_path, self.source_dir)) self.zf = zipfile.ZipFile(self.package_path.path, 'w', zipfile.ZIP_DEFLATED) for root, dirs, files in walk(self.source_dir): for f in files: source = join(root, f) rel = source.replace(self.source_dir, '').strip('/') dest = join(root_dir, rel) self.zf.write(source, dest) self.zf.close() return self.package_path
def _generate_identity_name(self, mod_version=False): """mod_version == False -> dpn't change the version mod_version == None -> remove the version mod_version == Something else -> change the versino to somethig else""" datasetname = self.find_first_value('Root.Dataset', section='Root') origin = self.find_first_value('Root.Origin', section='Root') time = self.find_first_value('Root.Time', section='Root') space = self.find_first_value('Root.Space', section='Root') grain = self.find_first_value('Root.Grain', section='Root') variant = self.find_first_value('Root.Variant', section='Root') self.update_version() if self._has_semver(): # Modifyable part is the patch mv = self['Root'].get_value('Version.Patch') def set_ver(mv): self['Root'].find_first('Version.Patch').value = mv else: # Modifyable part is the whole version mv = self['Root'].get_value('Version') def set_ver(mv): self['Root'].find_first('Version').value = mv if mod_version is False: # Don't change the version pass elif mod_version is None: # Set the version to nothing -- the nonver name mv = None elif str(mod_version)[0] == '+' or str(mod_version)[0] == '-': # Increment or decrement the version try: int(mv) except ValueError: raise MetatabError( "When specifying version math, version value in Root.Version or Version.Patch term must be an integer") # Excel likes to make integers into floats try: if int(mv) == float(mv): mv = int(mv) except (ValueError, TypeError): pass if mod_version[0] == '+': mv = str(int(mv) + int(mod_version[1:] if mod_version[1:] else 1)) else: mv = str(int(mv) - int(mod_version[1:] if mod_version[1:] else 1)) else: # Set the version to a specific string mv = mod_version if mv is None: version = None else: set_ver(mv) version = self.update_version() parts = [slugify(str(e).replace('-', '_')) for e in ( origin, datasetname, time, space, grain, variant, version) if e and str(e).strip()] return '-'.join(parts)