Ejemplo n.º 1
0
def data_set_path(data, path, val):
    field, _, path2 = path.partition(".")
    if not path2:
        data[field] = val
        return
    parent = data.setdefault(field, {})
    data_set_path(parent, path2, val)
Ejemplo n.º 2
0
    def parse(self, response):
        if not response.url.startswith('https://lede-project.org/'):
            return
        if '?' in response.url:
            if 'do=edit' in response.url :
                for title in response.css('#wiki__text'):
                    path = response.url[len('https://lede-project.org/'):]
                    path, _, _ = path.partition('?')
                    print('Storing', response.url, 'in', path)
                    dirname = os.path.dirname(path)

                    if dirname:
                        os.makedirs(dirname, exist_ok=True)
                    with open(path + '.txt',mode='wt') as file:
                        file.write(title.root.text)

                    yield {response.request.url: title.root.text}
            else:
                return
        else:
            yield scrapy.Request(response.url+'?do=edit', headers={'Referrer': response.url})
            for next_page in response.css('a'):
                base_url  = next_page.root.base_url
                if not next_page.root.base_url.startswith('https://lede-project.org/'):
                    continue
                if '?' in base_url and 'do=edit' not in base_url:
                    continue
                yield response.follow(next_page, self.parse)
Ejemplo n.º 3
0
def _resolve_relpath(
    path,
    rootdir=None,
    # *,
    _path_isabs=IS_ABS_PATH,
    _normcase=NORMCASE,
    _pathsep=PATH_SEP,
):
    # "path" is expected to use "/" for its path separator, regardless
    # of the provided "_pathsep".

    if path.startswith("./"):
        return path[2:]
    if not _path_isabs(path):
        return path

    # Deal with root-dir-as-fileid.
    _, sep, relpath = path.partition("/")
    if sep and not relpath.replace("/", ""):
        return ""

    if rootdir is None:
        return None
    rootdir = _normcase(rootdir)
    if not rootdir.endswith(_pathsep):
        rootdir += _pathsep

    if not _normcase(path).startswith(rootdir):
        return None
    return path[len(rootdir):]
Ejemplo n.º 4
0
 def get_info(self, request):
     path = self.translate_path(request.form['path'])
     parts = path.partition('/representations')
     ip = parts[0]
     hrefs = self._get_href_variations(parts[1] + parts[2])
     namespace = '{http://ead3.archivists.org/schema/}'
     tree = ET.parse('%s/metadata/descriptive/EAD.xml' % ip)
     # regular file - daoset
     for href in hrefs:
         did_list = tree.findall(".//%sdid/*/%sdao[@href='%s']/../.." %
                                 (namespace, namespace, href))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # regular file - no daoset
     for href in hrefs:
         did_list = tree.findall(".//%sdid/%sdao[@href='%s']/.." %
                                 (namespace, namespace, href))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # directory
     for href in hrefs:
         did_list = tree.findall(".//%sc[@base='%s']/%sdid" %
                                 (namespace, href, namespace))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # fallback
     return flask.jsonify(error=404,
                          error_text='Not Found',
                          info='No metadata associated to this element')
Ejemplo n.º 5
0
 def get_info(self, request):
     path = self.translate_path(request.form['path'])
     parts = path.partition('/representations')
     ip = parts[0]
     hrefs = self._get_href_variations(parts[1] + parts[2])
     namespace = '{http://ead3.archivists.org/schema/}'
     tree = ET.parse('%s/metadata/descriptive/EAD.xml' % ip)
     # regular file - daoset
     for href in hrefs:
         did_list = tree.findall(".//%sdid/*/%sdao[@href='%s']/../.."
                                 % (namespace, namespace, href))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # regular file - no daoset
     for href in hrefs:
         did_list = tree.findall(".//%sdid/%sdao[@href='%s']/.."
                                 % (namespace, namespace, href))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # directory
     for href in hrefs:
         did_list = tree.findall(".//%sc[@base='%s']/%sdid"
                                 % (namespace, href, namespace))
         if did_list:
             o = xmltodict.parse(ET.tostring(did_list[0]))
             return json.dumps(o)
     # fallback
     return flask.jsonify(
         error=404,
         error_text='Not Found',
         info='No metadata associated to this element'
     )
Ejemplo n.º 6
0
def data_set_path(data, path, val):
    field, _, path2 = path.partition(".")
    if not path2:
        data[field] = val
        return
    parent = data.setdefault(field, {})
    data_set_path(parent, path2, val)
Ejemplo n.º 7
0
def make_asset(product_id, path, md5_check, class_label='hirise product'):
    asset = Asset()
    asset.class_label = class_label
    asset.instrument_name = 'HiRISE'
    asset.product_id = product_id
    asset.md5_check = md5_check
    asset.relative_file_path = path.partition(DATA_ROOT)[-1]
    asset.save()
Ejemplo n.º 8
0
def make_asset(product_id, path, md5_check, class_label='hirise product'):
    asset = Asset()
    asset.class_label = class_label
    asset.instrument_name = 'HiRISE'
    asset.product_id = product_id
    asset.md5_check = md5_check
    asset.relative_file_path = path.partition(DATA_ROOT)[-1]
    asset.save()
Ejemplo n.º 9
0
 def GET(self):
     file_dir = pkg_resources.resource_filename("canopy", "__web__/static")
     path = os.path.join(file_dir, self.static_file)
     if not os.path.exists(path):
         raise web.NotFound(view.error.file_not_found(self.static_file))
     tx.response.headers.content_type = \
         mimetypes.guess_type(self.static_file)[0]
     web.header("X-Accel-Redirect", "/x/" + path.partition("/tree/")[2])
Ejemplo n.º 10
0
def data_get_path(data, path):
    field, _, path2 = path.partition(".")
    val = data.get(field)
    if not path2:
        return val
    if not val:
        return None
    return data_get_path(val, path2)
Ejemplo n.º 11
0
def data_get_path(data, path):
    field, _, path2 = path.partition(".")
    val = data.get(field)
    if not path2:
        return val
    if not val:
        return None
    return data_get_path(val, path2)
Ejemplo n.º 12
0
def fixup(path, rje):
    """ Fix `path` for remote/local handling. """
    if ':' in path:  # Remote.
        host, colon, path = path.partition(':')
    elif rje:
        path = os.path.join('..', 'RJE', path)
    else:
        path = os.path.join('..', path)
    return path
Ejemplo n.º 13
0
 def __init__(self, path, aws_profile=None):
     # trim the initial off it
     if path.lower().startswith("s3://"):
         path = path[5:]
     self.bucket, _, self.path = path.partition("/")
     # add a trailing "/" if it doesn't exist.
     if not self.path.endswith("/"):
         path += "/"
     # Optionally accept a profile argument
     self.profile = aws_profile
Ejemplo n.º 14
0
def _embed_relation(resource, path, document, ancestors):
	"""Embeds entities of a given (eventually multilevel) relation into
	the document.

	:param resource: resource of the document
	:param path: dot separated chain of relation names
	:param document: document to embed into
	:param ancestors: list of entities on the current 'path' of embedding

	List in the `ancestors` parameter containing tuples of resource
	name and entity id is used to prevent embedding of an entity into
	itself and to limit the depth of nested embedding.
	"""
	# Extract the topmost relation from the chain of relations and check
	# if there is a reference to a related entity in the actual document
	rel_name, _, tail = path.partition('.')
	relation = config.DOMAIN[resource]['relations'].get(rel_name)
	if not relation or relation['field'] not in document: return

	# Embed unless the entity is already embedded
	if rel_name not in document:
		# Retrieve the related entities
		related_resource = current_app.data.driver.db[relation['resource']]
		value = document[relation['field']]
		results = related_resource.find({relation['fkey']: value})
		entities = []
		for result in results:
			# Prevent embedding of an entity into itself
			if (relation['resource'], result['id']) in ancestors:
				continue
			result.pop('_id')
			# Omit xxx_id property in embedded entity - it is redundant with id it references
			if relation['fkey'] != 'id':
				result.pop(relation['fkey'])
			entities.append(result)
		if entities:
			# Either entity or list of entities will be embedded depending on singular or plural of relation name
			if rel_name.endswith('s'):
				document[rel_name] = entities
			else:
				document[rel_name] = entities[0]
			# Omit xxx_id property in embedding entity - it is redundant with id it references
			if relation['field'] != 'id':
				document.pop(relation['field'])

	# Recursively resolve deeper levels of embedding (limited to 3 levels)
	if tail and rel_name in document and len(ancestors) < 3:
		entities = document[rel_name]
		if not isinstance(entities, list):
			entities = [entities]
		for subdoc in entities:
			ancestors.append((relation['resource'], subdoc['id']))
			_embed_relation(relation['resource'], tail, subdoc, ancestors)
			ancestors.pop()
Ejemplo n.º 15
0
 def load_yaml(self, path, baseDir=None, warnWhenNotFound=False):
     path, sep, fragment = path.partition("#")
     path = os.path.abspath(
         os.path.join(baseDir or self.get_base_dir(), path))
     if warnWhenNotFound and not os.path.isfile(path):
         return path, None
     logger.trace("attempting to load YAML file: %s", path)
     with open(path, "r") as f:
         config = self.yaml.load(f)
     if fragment and config:
         return path, _refResolver.resolve_fragment(config, fragment)
     return path, config
Ejemplo n.º 16
0
def normalize_path(path):
    """
    Returns a path without a dangling ipc:// in front of it.

    >>> path1 = 'ipc:///home/ucuser/.unnaturalCode/socket'
    >>> path2 =       '/home/ucuser/.unnaturalCode/socket'
    >>> normalize_path(path1) == normalize_path(path2)
    True
    """

    if path.startswith('ipc://'):
        _head, _sep, tail = path.partition('ipc://')
        return tail
    return path
Ejemplo n.º 17
0
    def get_subitem(self, obj, current_path, path, reverse, is_sublist=False):
        if not path or obj is None:
            if isinstance(obj, list):
                if len(obj) == 0:
                    return 'EMPTY_LIST'
                obj = self.extract_from_array(obj, current_path, None, None,
                                              reverse, is_sublist, lambda x: x)

            return obj

        cur, _, rest = path.partition('.')

        sub_path = current_path + '.' + cur if current_path else cur

        if isinstance(obj, (dict, OrderedDict)):
            return self.get_subitem(obj.get(cur, None), sub_path, rest,
                                    reverse)

        elif isinstance(obj, list):
            if cur.isdigit():
                if int(cur) < len(obj):
                    return self.get_subitem(obj[int(cur)], sub_path, rest,
                                            reverse)
                elif len(obj) == 1 and isinstance(
                        obj[0],
                    (dict, OrderedDict)) and cur in obj[
                        0]:  # Possibly need to respect previous sort filter
                    return self.get_subitem(obj[0][int(cur)], sub_path, rest,
                                            reverse)
            else:
                if is_sublist:

                    def resolve_fxn(x):
                        self.get_subitem(x, sub_path, rest, reverse)
                else:

                    def resolve_fxn(x):
                        self.get_subitem(x, current_path, path, reverse, True)

                remaining_path = cur
                if rest:
                    remaining_path += '.' + rest

                return self.extract_from_array(obj, current_path,
                                               remaining_path, cur, reverse,
                                               is_sublist, resolve_fxn)

        return None
Ejemplo n.º 18
0
			def recursiveDict( source, path ):
				(head, sep, tail) = path.partition( '.' )
				lNames = [ s.lower() for s in source.keys() ]
				names = [ s for s in source.keys() ]
				if head.lower() in lNames:
					properName = names[ lNames.index( head.lower() ) ]
					entry = source[ properName ]
					if len( tail ) and isinstance( entry, dict ):
						return recursiveDict( entry, tail )
					elif len( tail ) and isinstance( entry, list ):
						for subentry in entry:
							result = recursiveDict( subentry, tail )
							if result is not None: return result
					elif len( tail ) == 0:
						return entry
				return None
Ejemplo n.º 19
0
def get_all_package_diffs(type, rev1='HEAD^1', rev2='HEAD'):
    """Show packages changed, added, or removed (or any combination of those)
       since a commit.

    Arguments:

        type (str): String containing one or more of 'A', 'B', 'C'
        rev1 (str): Revision to compare against, default is 'HEAD^'
        rev2 (str): Revision to compare to rev1, default is 'HEAD'

    Returns:

        A set contain names of affected packages.
    """
    lower_type = type.lower()
    if not re.match('^[arc]*$', lower_type):
        tty.die("Invald change type: '%s'." % type,
                "Can contain only A (added), R (removed), or C (changed)")

    removed, added = diff_packages(rev1, rev2)

    git = get_git()
    out = git('diff', '--relative', '--name-only', rev1, rev2,
              output=str).strip()

    lines = [] if not out else re.split(r'\s+', out)
    changed = set()
    for path in lines:
        pkg_name, _, _ = path.partition(os.sep)
        if pkg_name not in added and pkg_name not in removed:
            changed.add(pkg_name)

    packages = set()
    if 'a' in lower_type:
        packages |= added
    if 'r' in lower_type:
        packages |= removed
    if 'c' in lower_type:
        packages |= changed

    return packages
Ejemplo n.º 20
0
def UpdateSubModuleDeps(deps, options, deps_vars):
    """Convert a 'deps' section in a .DEPS.git file with info from submodules."""
    new_deps = {}
    bad_git_urls = set([])

    for dep in deps:
        if not deps[dep]:  # dep is 'None' and emitted to exclude the dep
            new_deps[dep] = None
            continue

        # Get the URL and the revision/hash for this dependency.
        dep_url, dep_rev = SplitScmUrl(deps[dep])

        path = dep
        git_url = dep_url

        # Get the Git hash based off the SVN rev.
        git_hash = ''
        if dep_rev != 'HEAD':
            subpath = path.partition('/')[2]
            # Pass-through the hash for Git repositories. Resolve the hash for
            # submodule repositories.
            if os.path.exists(options.workspace + "/" + subpath + "/.git"):
                git_hash = '@%s' % GetSubModuleRev(options.workspace, subpath)
            else:
                git_hash = "@%s" % (dep_rev, )

        # If this is webkit, we need to add the var for the hash.
        if dep == 'src/third_party/WebKit' and dep_rev:
            deps_vars['webkit_rev'] = git_hash
            git_hash = 'VAR_WEBKIT_REV'
        # If this is webkit, we need to add the var for the hash.
        elif dep == 'src/third_party/ffmpeg' and dep_rev:
            deps_vars['ffmpeg_hash'] = git_hash
            git_hash = 'VAR_FFMPEG_HASH'

        # Add this Git dep to the new deps.
        new_deps[path] = '%s%s' % (git_url, git_hash)

    return new_deps, bad_git_urls
Ejemplo n.º 21
0
 def _get_subtree(self, tree, path):
     "Given a tree SHA and a path, return the SHA of the subtree."
     try:
         if os.sep in path:
             # The tree entry will only have a single level of the
             # directory name, so if we have a / in our filename we
             # know we're going to have to keep traversing the
             # tree.
             prefix, _, trailing = path.partition(os.sep)
             mode, subtree_sha = tree[prefix.encode('utf-8')]
             subtree = self[subtree_sha]
             return self._get_subtree(subtree, trailing)
         else:
             # The tree entry will point to the SHA of the contents
             # of the subtree.
             mode, sha = tree[path.encode('utf-8')]
             result = self[sha]
             return result
     except KeyError:
         # Some part of the path wasn't found, so the subtree is
         # not present. Return the sentinel value.
         return None
Ejemplo n.º 22
0
 def __init__(self, entries):
     parsed = {}
     for path in filter(None, entries):
         parent, _, remainder = path.partition('/')
         parsed.setdefault(parent, []).append(remainder)
     self._entries = parsed or None  # None is a leaf node
Ejemplo n.º 23
0
 def _generate_category_name(self, path):
     if path == self.library.path:
         return os.path.split(path)[1]
     else:
         return path.partition(self.library.path)[2][1:]
Ejemplo n.º 24
0
def find_dir(name, path=THIS_DIR):
    """Return the shortest path containing name if possible, or None otherwise."""
    p = path.partition(name)
    return p[0] + p[1] if p[2] else None
Ejemplo n.º 25
0
 def _generate_category_name(self, path):
     if path == self.library.path:
         return os.path.split(path)[1]
     else:        
         return path.partition(self.library.path)[2][1:]
 def __init__(self, root, downsample=True, transform=None, target_transform=None, dev_mode=False, preprocessed=False, person_filter=None, filter_mode = 'exclude', max_len=201, split='train'):
     self.person_filter = person_filter
     self.filter_mode = filter_mode
     self.root = os.path.expanduser(root)
     self.downsample = downsample
     self.transform = transform
     self.target_transform = target_transform
     self.dev_mode = dev_mode
     self.num_samples = 0
     self.max_len = max_len
     self.split = split
     
     if preprocessed:
         self.root_dir = os.path.expanduser('librispeech_preprocessed/')
         if self.split == 'train':
             self.data_paths = os.listdir(os.path.join(self.root_dir,'train'))
             self.root_dir = os.path.join(self.root_dir,'train/')
         elif self.split == 'test':
             self.data_paths = os.listdir(os.path.join(self.root_dir,'test'))
             self.root_dir = os.path.join(self.root_dir,'test/')
         
         if person_filter:
             if self.filter_mode == 'include':                    
                 self.data_paths = [sample for sample in self.data_paths if any(sample.startswith(pers+'-') for pers in self.person_filter)]
             elif self.filter_mode == 'exclude':
                 self.data_paths = [sample for sample in self.data_paths if not any(sample.startswith(pers+'-') for pers in self.person_filter)]
         
         self.num_samples = len(self.data_paths)
         
     else:            
         paths = make_manifest(self.root)
         os.mkdir('librispeech_preprocessed')
         os.mkdir('librispeech_preprocessed/train')
         os.mkdir('librispeech_preprocessed/test')
         
         test_splits = open("librispeech_splits/test_split.txt")
         train_splits = open("librispeech_splits/train_split.txt")
         split_reader = csv.reader(test_splits)
         test_data = [r[0] for r in split_reader]
         split_reader = csv.reader(train_splits)
         train_data = [r[0] for r in split_reader]
         
         with open(os.path.join(self.root,"SPEAKERS.TXT")) as csvfile:                
             csvreader = csv.reader(csvfile, delimiter='|')
             for i in range(12):
                 next(csvreader)
             rows = [r for r in csvreader]
             dict = {x[0].strip():[x[1].strip()] for x in rows}
             for z, path in enumerate(paths):              
                 
                 keyword = 'train-clean-100/'
                 before_keyword, keyword, after_keyword = path.partition(keyword)
                 before_keyword, keyword, after_keyword = after_keyword.partition('/')
                 pers = before_keyword
                 before_keyword, keyword, after_keyword = after_keyword.partition('/')
                 before_keyword, keyword, after_keyword = after_keyword.partition('.flac')
                 
                 sig = read_audio(path)
                 if self.transform is not None:
                     sig = self.transform(sig[0])
                     
                 else:
                     sig = sig[0]
                 
                 try:
                     data = (sig.tolist(), dict[pers] + [pers])
                     if before_keyword in train_data:
                         ujson.dump(data,open("librispeech_preprocessed/train/{}.json".format(before_keyword), 'w'))
                     elif before_keyword in test_data:
                         ujson.dump(data,open("librispeech_preprocessed/test/{}.json".format(before_keyword), 'w'))
                     if z % 100 == 0:
                         print "{} iterations".format(z)
                     self.train_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/train/'))
                     self.test_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/test/'))
                 except:
                     continue
         
         self.train_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/train/'))
         self.test_data_paths = os.listdir(os.path.expanduser('librispeech_preprocessed/test/'))
         self.num_samples = len(self.train_data_paths)
         print "{} samples processed".format(self.num_samples)
Ejemplo n.º 27
0
    async def post(self, path):
        '''Starts a task which runs some NLP algorithms on the files pointed 
        to by path if the func query paramter is set. Otherwise the body is
        interpeted as a binary file and a new file whose name is given by the
        file name query parameter is created

        In case a task is started the user can poll for the status of the task 
        by using the returned URL
        '''

        action = self.get_query_argument('func', default=None)

        if action is not None:
            #Send a request to a worker, which will process the
            #resources at the given path with NLP algorithms.
            #The user can poll the status of the task through the
            #returned URL 
            conn_pool = ConnectionPool.instance()

            conn = await conn_pool.get()

            #Send message for task to a worker
            try:
                #Create body of the message and encode it to json 
                msg_body = {
                    'path': path
                }
                json_body = json.dumps(msg_body)

                #Create message and send it to the worker
                msg = Message(type=Message.Type['Request'],
                              body=json_body.encode('utf-8'))
                conn.send_msg(msg)

                #Retrieve response of worker
                resp = await conn.receive_msg()
                self.write(resp.body.decode('utf-8'))
            finally:
                conn_pool.put(conn)

            self.set_status(202)
        else:
            file_ = None
            fs = filesystem.instance()
            collection_id = path.partition('/')[0]

            try:
                file_name = self.get_query_argument('filename')
                file_path = os.path.join(path, file_name)
                fs.create(file_path)
                file_ = fs.open(file_path, mode='wb')
            except tornado.web.MissingArgumentError:
                logger.info('Tried to upload file to collection {} without specifying a filename.'.format(collection_id))
                raise
            except FileExistsError:
                logger.info('File {!r} already exists'.format(file_path))
                raise
            except FileNotFoundError:
                logger.info(
                    'Collection {!r} does not exist'.format(collection_id)
                )
                raise
                
            await file_.write(self.request.body)
            file_.close()

            self.set_status(201)

        self.finish()
Ejemplo n.º 28
0
 def __init__(self, entries):
     parsed = {}
     for path in filter(None, entries):
         parent, _, remainder = path.partition('/')
         parsed.setdefault(parent, []).append(remainder)
     self._entries = parsed or None  # None is a leaf node
Ejemplo n.º 29
0
 def _generate_category_name(path):
     pic_dir = xdg.get_dir('pictures')
     if path == pic_dir:
         return os.path.split(path)[1]
     else:
         return path.partition(pic_dir)[2][1:]
Ejemplo n.º 30
0
 def _generate_category_name(path):
     pic_dir = xdg.get_dir('pictures')
     if path == pic_dir:
         return os.path.split(path)[1]
     else:        
         return path.partition(pic_dir)[2][1:]
    def __init__(self,
                 root,
                 downsample=True,
                 transform=None,
                 target_transform=None,
                 dev_mode=False,
                 preprocessed=False,
                 person_filter=None,
                 filter_mode='exclude',
                 max_len=201):
        self.person_filter = person_filter
        self.filter_mode = filter_mode
        self.root = os.path.expanduser(root)
        self.downsample = downsample
        self.transform = transform
        self.target_transform = target_transform
        self.dev_mode = dev_mode
        self.num_samples = 0
        self.max_len = max_len

        if preprocessed:
            self.root_dir = os.path.expanduser('vctk_preprocessed/')
            self.data_paths = os.listdir(self.root_dir)

            if person_filter:
                if self.filter_mode == 'include':
                    self.data_paths = [
                        sample for sample in self.data_paths
                        if any(pers in sample for pers in self.person_filter)
                    ]
                elif self.filter_mode == 'exclude':
                    self.data_paths = [
                        sample for sample in self.data_paths
                        if not any(pers in sample
                                   for pers in self.person_filter)
                    ]

            self.num_samples = len(self.data_paths)

        else:
            paths = make_manifest(self.root)
            os.mkdir('vctk_preprocessed/')
            with open(os.path.join(self.root, "speaker-info.txt")) as csvfile:
                csvreader = csv.reader(csvfile, delimiter=' ')
                next(csvreader)
                rows = [r for r in csvreader]
                dict = {x[0]: [x[4], x[2], x[8]] for x in rows}
                for z, path in enumerate(paths):

                    keyword = 'wav48/'
                    befor_keyowrd, keyword, after_keyword = path.partition(
                        keyword)
                    pers = after_keyword[1:4]

                    sig = read_audio(path)
                    if self.transform is not None:
                        sig = self.transform(sig[0])
                    else:
                        sig = sig[0]
                    try:
                        self.data = (sig.tolist(), dict[pers] + [pers])
                        ujson.dump(
                            self.data,
                            open(
                                "vctk_preprocessed/{}.json".format(
                                    after_keyword[5:13]), 'w'))
                        if z % 100 == 0:
                            print "{} iterations".format(z)
                        self.data_paths = os.listdir(
                            os.path.expanduser('vctk_preprocessed/'))
                    except:
                        continue

            self.data_paths = os.listdir(
                os.path.expanduser('vctk_preprocessed/'))
            self.num_samples = len(self.data_paths)
            print "{} samples processed".format(self.num_samples)
Ejemplo n.º 32
0
    def main(self):
        keys = self.env.get('plist_keys', {"CFBundleShortVersionString": "version"})

        # Many types of paths are accepted. Figure out which kind we have.
        path = os.path.normpath(self.env['info_path'])

        try:
            # Wrap all other actions in a try/finally so if we mount an image,
            # it will always be unmounted.

            # Check if we're trying to read something inside a dmg.
            if '.dmg' in path:
                (dmg_path, dmg, dmg_source_path) = path.partition(".dmg")
                dmg_path += ".dmg"

                mount_point = self.mount(dmg_path)
                path = os.path.join(mount_point, dmg_source_path.lstrip('/'))
            else:
                dmg = False

            # Finally check whether this is at least a valid path
            if not os.path.exists(path):
                raise ProcessorError("Path '%s' doesn't exist!" % path)

            # Is the path a bundle?
            info_plist_path = self.get_bundle_info_path(path)
            if info_plist_path:
                path = info_plist_path

            # Does it have a 'plist' extension (naively assuming 'plist' only names, for now)
            elif path.endswith('.plist'):
                # Full path to a plist was supplied, move on.
                pass

            # Might the path contain a bundle at its root?
            else:
                path = self.find_bundle(path)

            # Try to read the plist
            self.output("Reading: %s" % path)
            try:
                info = FoundationPlist.readPlist(path)
            except (FoundationPlist.NSPropertyListSerializationException,
                    UnicodeEncodeError) as err:
                raise ProcessorError(err)

            # Copy each plist_keys' values and assign to new env variables
            self.env["plist_reader_output_variables"] = {}
            for key, val in keys.items():
                try:
                    self.env[val] = info[key]
                    self.output("Assigning value of '%s' to output variable '%s'" % (self.env[val], val))
                    # This one is for documentation/recordkeeping
                    self.env["plist_reader_output_variables"][val] = self.env[val]
                except KeyError:
                    raise ProcessorError(
                        "Key '%s' could not be found in the plist %s!" % (key, path))

        finally:
            if dmg:
                self.unmount(dmg_path)