def data_set_path(data, path, val): field, _, path2 = path.partition(".") if not path2: data[field] = val return parent = data.setdefault(field, {}) data_set_path(parent, path2, val)
def parse(self, response): if not response.url.startswith('https://lede-project.org/'): return if '?' in response.url: if 'do=edit' in response.url : for title in response.css('#wiki__text'): path = response.url[len('https://lede-project.org/'):] path, _, _ = path.partition('?') print('Storing', response.url, 'in', path) dirname = os.path.dirname(path) if dirname: os.makedirs(dirname, exist_ok=True) with open(path + '.txt',mode='wt') as file: file.write(title.root.text) yield {response.request.url: title.root.text} else: return else: yield scrapy.Request(response.url+'?do=edit', headers={'Referrer': response.url}) for next_page in response.css('a'): base_url = next_page.root.base_url if not next_page.root.base_url.startswith('https://lede-project.org/'): continue if '?' in base_url and 'do=edit' not in base_url: continue yield response.follow(next_page, self.parse)
def _resolve_relpath( path, rootdir=None, # *, _path_isabs=IS_ABS_PATH, _normcase=NORMCASE, _pathsep=PATH_SEP, ): # "path" is expected to use "/" for its path separator, regardless # of the provided "_pathsep". if path.startswith("./"): return path[2:] if not _path_isabs(path): return path # Deal with root-dir-as-fileid. _, sep, relpath = path.partition("/") if sep and not relpath.replace("/", ""): return "" if rootdir is None: return None rootdir = _normcase(rootdir) if not rootdir.endswith(_pathsep): rootdir += _pathsep if not _normcase(path).startswith(rootdir): return None return path[len(rootdir):]
def get_info(self, request): path = self.translate_path(request.form['path']) parts = path.partition('/representations') ip = parts[0] hrefs = self._get_href_variations(parts[1] + parts[2]) namespace = '{http://ead3.archivists.org/schema/}' tree = ET.parse('%s/metadata/descriptive/EAD.xml' % ip) # regular file - daoset for href in hrefs: did_list = tree.findall(".//%sdid/*/%sdao[@href='%s']/../.." % (namespace, namespace, href)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # regular file - no daoset for href in hrefs: did_list = tree.findall(".//%sdid/%sdao[@href='%s']/.." % (namespace, namespace, href)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # directory for href in hrefs: did_list = tree.findall(".//%sc[@base='%s']/%sdid" % (namespace, href, namespace)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # fallback return flask.jsonify(error=404, error_text='Not Found', info='No metadata associated to this element')
def get_info(self, request): path = self.translate_path(request.form['path']) parts = path.partition('/representations') ip = parts[0] hrefs = self._get_href_variations(parts[1] + parts[2]) namespace = '{http://ead3.archivists.org/schema/}' tree = ET.parse('%s/metadata/descriptive/EAD.xml' % ip) # regular file - daoset for href in hrefs: did_list = tree.findall(".//%sdid/*/%sdao[@href='%s']/../.." % (namespace, namespace, href)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # regular file - no daoset for href in hrefs: did_list = tree.findall(".//%sdid/%sdao[@href='%s']/.." % (namespace, namespace, href)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # directory for href in hrefs: did_list = tree.findall(".//%sc[@base='%s']/%sdid" % (namespace, href, namespace)) if did_list: o = xmltodict.parse(ET.tostring(did_list[0])) return json.dumps(o) # fallback return flask.jsonify( error=404, error_text='Not Found', info='No metadata associated to this element' )
def make_asset(product_id, path, md5_check, class_label='hirise product'): asset = Asset() asset.class_label = class_label asset.instrument_name = 'HiRISE' asset.product_id = product_id asset.md5_check = md5_check asset.relative_file_path = path.partition(DATA_ROOT)[-1] asset.save()
def GET(self): file_dir = pkg_resources.resource_filename("canopy", "__web__/static") path = os.path.join(file_dir, self.static_file) if not os.path.exists(path): raise web.NotFound(view.error.file_not_found(self.static_file)) tx.response.headers.content_type = \ mimetypes.guess_type(self.static_file)[0] web.header("X-Accel-Redirect", "/x/" + path.partition("/tree/")[2])
def data_get_path(data, path): field, _, path2 = path.partition(".") val = data.get(field) if not path2: return val if not val: return None return data_get_path(val, path2)
def fixup(path, rje): """ Fix `path` for remote/local handling. """ if ':' in path: # Remote. host, colon, path = path.partition(':') elif rje: path = os.path.join('..', 'RJE', path) else: path = os.path.join('..', path) return path
def __init__(self, path, aws_profile=None): # trim the initial off it if path.lower().startswith("s3://"): path = path[5:] self.bucket, _, self.path = path.partition("/") # add a trailing "/" if it doesn't exist. if not self.path.endswith("/"): path += "/" # Optionally accept a profile argument self.profile = aws_profile
def _embed_relation(resource, path, document, ancestors): """Embeds entities of a given (eventually multilevel) relation into the document. :param resource: resource of the document :param path: dot separated chain of relation names :param document: document to embed into :param ancestors: list of entities on the current 'path' of embedding List in the `ancestors` parameter containing tuples of resource name and entity id is used to prevent embedding of an entity into itself and to limit the depth of nested embedding. """ # Extract the topmost relation from the chain of relations and check # if there is a reference to a related entity in the actual document rel_name, _, tail = path.partition('.') relation = config.DOMAIN[resource]['relations'].get(rel_name) if not relation or relation['field'] not in document: return # Embed unless the entity is already embedded if rel_name not in document: # Retrieve the related entities related_resource = current_app.data.driver.db[relation['resource']] value = document[relation['field']] results = related_resource.find({relation['fkey']: value}) entities = [] for result in results: # Prevent embedding of an entity into itself if (relation['resource'], result['id']) in ancestors: continue result.pop('_id') # Omit xxx_id property in embedded entity - it is redundant with id it references if relation['fkey'] != 'id': result.pop(relation['fkey']) entities.append(result) if entities: # Either entity or list of entities will be embedded depending on singular or plural of relation name if rel_name.endswith('s'): document[rel_name] = entities else: document[rel_name] = entities[0] # Omit xxx_id property in embedding entity - it is redundant with id it references if relation['field'] != 'id': document.pop(relation['field']) # Recursively resolve deeper levels of embedding (limited to 3 levels) if tail and rel_name in document and len(ancestors) < 3: entities = document[rel_name] if not isinstance(entities, list): entities = [entities] for subdoc in entities: ancestors.append((relation['resource'], subdoc['id'])) _embed_relation(relation['resource'], tail, subdoc, ancestors) ancestors.pop()
def load_yaml(self, path, baseDir=None, warnWhenNotFound=False): path, sep, fragment = path.partition("#") path = os.path.abspath( os.path.join(baseDir or self.get_base_dir(), path)) if warnWhenNotFound and not os.path.isfile(path): return path, None logger.trace("attempting to load YAML file: %s", path) with open(path, "r") as f: config = self.yaml.load(f) if fragment and config: return path, _refResolver.resolve_fragment(config, fragment) return path, config
def normalize_path(path): """ Returns a path without a dangling ipc:// in front of it. >>> path1 = 'ipc:///home/ucuser/.unnaturalCode/socket' >>> path2 = '/home/ucuser/.unnaturalCode/socket' >>> normalize_path(path1) == normalize_path(path2) True """ if path.startswith('ipc://'): _head, _sep, tail = path.partition('ipc://') return tail return path
def get_subitem(self, obj, current_path, path, reverse, is_sublist=False): if not path or obj is None: if isinstance(obj, list): if len(obj) == 0: return 'EMPTY_LIST' obj = self.extract_from_array(obj, current_path, None, None, reverse, is_sublist, lambda x: x) return obj cur, _, rest = path.partition('.') sub_path = current_path + '.' + cur if current_path else cur if isinstance(obj, (dict, OrderedDict)): return self.get_subitem(obj.get(cur, None), sub_path, rest, reverse) elif isinstance(obj, list): if cur.isdigit(): if int(cur) < len(obj): return self.get_subitem(obj[int(cur)], sub_path, rest, reverse) elif len(obj) == 1 and isinstance( obj[0], (dict, OrderedDict)) and cur in obj[ 0]: # Possibly need to respect previous sort filter return self.get_subitem(obj[0][int(cur)], sub_path, rest, reverse) else: if is_sublist: def resolve_fxn(x): self.get_subitem(x, sub_path, rest, reverse) else: def resolve_fxn(x): self.get_subitem(x, current_path, path, reverse, True) remaining_path = cur if rest: remaining_path += '.' + rest return self.extract_from_array(obj, current_path, remaining_path, cur, reverse, is_sublist, resolve_fxn) return None
def recursiveDict( source, path ): (head, sep, tail) = path.partition( '.' ) lNames = [ s.lower() for s in source.keys() ] names = [ s for s in source.keys() ] if head.lower() in lNames: properName = names[ lNames.index( head.lower() ) ] entry = source[ properName ] if len( tail ) and isinstance( entry, dict ): return recursiveDict( entry, tail ) elif len( tail ) and isinstance( entry, list ): for subentry in entry: result = recursiveDict( subentry, tail ) if result is not None: return result elif len( tail ) == 0: return entry return None
def get_all_package_diffs(type, rev1='HEAD^1', rev2='HEAD'): """Show packages changed, added, or removed (or any combination of those) since a commit. Arguments: type (str): String containing one or more of 'A', 'B', 'C' rev1 (str): Revision to compare against, default is 'HEAD^' rev2 (str): Revision to compare to rev1, default is 'HEAD' Returns: A set contain names of affected packages. """ lower_type = type.lower() if not re.match('^[arc]*$', lower_type): tty.die("Invald change type: '%s'." % type, "Can contain only A (added), R (removed), or C (changed)") removed, added = diff_packages(rev1, rev2) git = get_git() out = git('diff', '--relative', '--name-only', rev1, rev2, output=str).strip() lines = [] if not out else re.split(r'\s+', out) changed = set() for path in lines: pkg_name, _, _ = path.partition(os.sep) if pkg_name not in added and pkg_name not in removed: changed.add(pkg_name) packages = set() if 'a' in lower_type: packages |= added if 'r' in lower_type: packages |= removed if 'c' in lower_type: packages |= changed return packages
def UpdateSubModuleDeps(deps, options, deps_vars): """Convert a 'deps' section in a .DEPS.git file with info from submodules.""" new_deps = {} bad_git_urls = set([]) for dep in deps: if not deps[dep]: # dep is 'None' and emitted to exclude the dep new_deps[dep] = None continue # Get the URL and the revision/hash for this dependency. dep_url, dep_rev = SplitScmUrl(deps[dep]) path = dep git_url = dep_url # Get the Git hash based off the SVN rev. git_hash = '' if dep_rev != 'HEAD': subpath = path.partition('/')[2] # Pass-through the hash for Git repositories. Resolve the hash for # submodule repositories. if os.path.exists(options.workspace + "/" + subpath + "/.git"): git_hash = '@%s' % GetSubModuleRev(options.workspace, subpath) else: git_hash = "@%s" % (dep_rev, ) # If this is webkit, we need to add the var for the hash. if dep == 'src/third_party/WebKit' and dep_rev: deps_vars['webkit_rev'] = git_hash git_hash = 'VAR_WEBKIT_REV' # If this is webkit, we need to add the var for the hash. elif dep == 'src/third_party/ffmpeg' and dep_rev: deps_vars['ffmpeg_hash'] = git_hash git_hash = 'VAR_FFMPEG_HASH' # Add this Git dep to the new deps. new_deps[path] = '%s%s' % (git_url, git_hash) return new_deps, bad_git_urls
def _get_subtree(self, tree, path): "Given a tree SHA and a path, return the SHA of the subtree." try: if os.sep in path: # The tree entry will only have a single level of the # directory name, so if we have a / in our filename we # know we're going to have to keep traversing the # tree. prefix, _, trailing = path.partition(os.sep) mode, subtree_sha = tree[prefix.encode('utf-8')] subtree = self[subtree_sha] return self._get_subtree(subtree, trailing) else: # The tree entry will point to the SHA of the contents # of the subtree. mode, sha = tree[path.encode('utf-8')] result = self[sha] return result except KeyError: # Some part of the path wasn't found, so the subtree is # not present. Return the sentinel value. return None
def __init__(self, entries): parsed = {} for path in filter(None, entries): parent, _, remainder = path.partition('/') parsed.setdefault(parent, []).append(remainder) self._entries = parsed or None # None is a leaf node
def _generate_category_name(self, path): if path == self.library.path: return os.path.split(path)[1] else: return path.partition(self.library.path)[2][1:]
def find_dir(name, path=THIS_DIR): """Return the shortest path containing name if possible, or None otherwise.""" p = path.partition(name) return p[0] + p[1] if p[2] else None
def __init__(self, root, downsample=True, transform=None, target_transform=None, dev_mode=False, preprocessed=False, person_filter=None, filter_mode = 'exclude', max_len=201, split='train'): self.person_filter = person_filter self.filter_mode = filter_mode self.root = os.path.expanduser(root) self.downsample = downsample self.transform = transform self.target_transform = target_transform self.dev_mode = dev_mode self.num_samples = 0 self.max_len = max_len self.split = split if preprocessed: self.root_dir = os.path.expanduser('librispeech_preprocessed/') if self.split == 'train': self.data_paths = os.listdir(os.path.join(self.root_dir,'train')) self.root_dir = os.path.join(self.root_dir,'train/') elif self.split == 'test': self.data_paths = os.listdir(os.path.join(self.root_dir,'test')) self.root_dir = os.path.join(self.root_dir,'test/') if person_filter: if self.filter_mode == 'include': self.data_paths = [sample for sample in self.data_paths if any(sample.startswith(pers+'-') for pers in self.person_filter)] elif self.filter_mode == 'exclude': self.data_paths = [sample for sample in self.data_paths if not any(sample.startswith(pers+'-') for pers in self.person_filter)] self.num_samples = len(self.data_paths) else: paths = make_manifest(self.root) os.mkdir('librispeech_preprocessed') os.mkdir('librispeech_preprocessed/train') os.mkdir('librispeech_preprocessed/test') test_splits = open("librispeech_splits/test_split.txt") train_splits = open("librispeech_splits/train_split.txt") split_reader = csv.reader(test_splits) test_data = [r[0] for r in split_reader] split_reader = csv.reader(train_splits) train_data = [r[0] for r in split_reader] with open(os.path.join(self.root,"SPEAKERS.TXT")) as csvfile: csvreader = csv.reader(csvfile, delimiter='|') for i in range(12): next(csvreader) rows = [r for r in csvreader] dict = {x[0].strip():[x[1].strip()] for x in rows} for z, path in enumerate(paths): keyword = 'train-clean-100/' before_keyword, keyword, after_keyword = path.partition(keyword) before_keyword, keyword, after_keyword = after_keyword.partition('/') pers = before_keyword before_keyword, keyword, after_keyword = after_keyword.partition('/') before_keyword, keyword, after_keyword = after_keyword.partition('.flac') sig = read_audio(path) if self.transform is not None: sig = self.transform(sig[0]) else: sig = sig[0] try: data = (sig.tolist(), dict[pers] + [pers]) if before_keyword in train_data: ujson.dump(data,open("librispeech_preprocessed/train/{}.json".format(before_keyword), 'w')) elif before_keyword in test_data: ujson.dump(data,open("librispeech_preprocessed/test/{}.json".format(before_keyword), 'w')) if z % 100 == 0: print "{} iterations".format(z) self.train_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/train/')) self.test_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/test/')) except: continue self.train_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/train/')) self.test_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/test/')) self.num_samples = len(self.train_data_paths) print "{} samples processed".format(self.num_samples)
async def post(self, path): '''Starts a task which runs some NLP algorithms on the files pointed to by path if the func query paramter is set. Otherwise the body is interpeted as a binary file and a new file whose name is given by the file name query parameter is created In case a task is started the user can poll for the status of the task by using the returned URL ''' action = self.get_query_argument('func', default=None) if action is not None: #Send a request to a worker, which will process the #resources at the given path with NLP algorithms. #The user can poll the status of the task through the #returned URL conn_pool = ConnectionPool.instance() conn = await conn_pool.get() #Send message for task to a worker try: #Create body of the message and encode it to json msg_body = { 'path': path } json_body = json.dumps(msg_body) #Create message and send it to the worker msg = Message(type=Message.Type['Request'], body=json_body.encode('utf-8')) conn.send_msg(msg) #Retrieve response of worker resp = await conn.receive_msg() self.write(resp.body.decode('utf-8')) finally: conn_pool.put(conn) self.set_status(202) else: file_ = None fs = filesystem.instance() collection_id = path.partition('/')[0] try: file_name = self.get_query_argument('filename') file_path = os.path.join(path, file_name) fs.create(file_path) file_ = fs.open(file_path, mode='wb') except tornado.web.MissingArgumentError: logger.info('Tried to upload file to collection {} without specifying a filename.'.format(collection_id)) raise except FileExistsError: logger.info('File {!r} already exists'.format(file_path)) raise except FileNotFoundError: logger.info( 'Collection {!r} does not exist'.format(collection_id) ) raise await file_.write(self.request.body) file_.close() self.set_status(201) self.finish()
def _generate_category_name(path): pic_dir = xdg.get_dir('pictures') if path == pic_dir: return os.path.split(path)[1] else: return path.partition(pic_dir)[2][1:]
def __init__(self, root, downsample=True, transform=None, target_transform=None, dev_mode=False, preprocessed=False, person_filter=None, filter_mode='exclude', max_len=201): self.person_filter = person_filter self.filter_mode = filter_mode self.root = os.path.expanduser(root) self.downsample = downsample self.transform = transform self.target_transform = target_transform self.dev_mode = dev_mode self.num_samples = 0 self.max_len = max_len if preprocessed: self.root_dir = os.path.expanduser('vctk_preprocessed/') self.data_paths = os.listdir(self.root_dir) if person_filter: if self.filter_mode == 'include': self.data_paths = [ sample for sample in self.data_paths if any(pers in sample for pers in self.person_filter) ] elif self.filter_mode == 'exclude': self.data_paths = [ sample for sample in self.data_paths if not any(pers in sample for pers in self.person_filter) ] self.num_samples = len(self.data_paths) else: paths = make_manifest(self.root) os.mkdir('vctk_preprocessed/') with open(os.path.join(self.root, "speaker-info.txt")) as csvfile: csvreader = csv.reader(csvfile, delimiter=' ') next(csvreader) rows = [r for r in csvreader] dict = {x[0]: [x[4], x[2], x[8]] for x in rows} for z, path in enumerate(paths): keyword = 'wav48/' befor_keyowrd, keyword, after_keyword = path.partition( keyword) pers = after_keyword[1:4] sig = read_audio(path) if self.transform is not None: sig = self.transform(sig[0]) else: sig = sig[0] try: self.data = (sig.tolist(), dict[pers] + [pers]) ujson.dump( self.data, open( "vctk_preprocessed/{}.json".format( after_keyword[5:13]), 'w')) if z % 100 == 0: print "{} iterations".format(z) self.data_paths = os.listdir( os.path.expanduser('vctk_preprocessed/')) except: continue self.data_paths = os.listdir( os.path.expanduser('vctk_preprocessed/')) self.num_samples = len(self.data_paths) print "{} samples processed".format(self.num_samples)
def main(self): keys = self.env.get('plist_keys', {"CFBundleShortVersionString": "version"}) # Many types of paths are accepted. Figure out which kind we have. path = os.path.normpath(self.env['info_path']) try: # Wrap all other actions in a try/finally so if we mount an image, # it will always be unmounted. # Check if we're trying to read something inside a dmg. if '.dmg' in path: (dmg_path, dmg, dmg_source_path) = path.partition(".dmg") dmg_path += ".dmg" mount_point = self.mount(dmg_path) path = os.path.join(mount_point, dmg_source_path.lstrip('/')) else: dmg = False # Finally check whether this is at least a valid path if not os.path.exists(path): raise ProcessorError("Path '%s' doesn't exist!" % path) # Is the path a bundle? info_plist_path = self.get_bundle_info_path(path) if info_plist_path: path = info_plist_path # Does it have a 'plist' extension (naively assuming 'plist' only names, for now) elif path.endswith('.plist'): # Full path to a plist was supplied, move on. pass # Might the path contain a bundle at its root? else: path = self.find_bundle(path) # Try to read the plist self.output("Reading: %s" % path) try: info = FoundationPlist.readPlist(path) except (FoundationPlist.NSPropertyListSerializationException, UnicodeEncodeError) as err: raise ProcessorError(err) # Copy each plist_keys' values and assign to new env variables self.env["plist_reader_output_variables"] = {} for key, val in keys.items(): try: self.env[val] = info[key] self.output("Assigning value of '%s' to output variable '%s'" % (self.env[val], val)) # This one is for documentation/recordkeeping self.env["plist_reader_output_variables"][val] = self.env[val] except KeyError: raise ProcessorError( "Key '%s' could not be found in the plist %s!" % (key, path)) finally: if dmg: self.unmount(dmg_path)