Exemple #1
0
        else:
            head, *tail = path
            if head in data:
                child = data[head]
                if isinstance(child, list):
                    for element in child:
                        self.remove_path(element, tail)
                else:
                    self.remove_path(child, tail)

    def rewrite(self, d, *args, file=None, **kwargs):
        try:
            type = d['type']
        except KeyError:
            return d

        if type in self.paths:
            data = json.loads(json.dumps(d))
            paths = self.paths[type]
            for path in paths:
                self.remove_path(data, path)
            return data
        else:
            return d


print(f'Removing meaningless `id` properties ...')
r = JSONIDRemovalRewriter()
rewrite_output_files(r, parallel=True)
print('Done')
			return d
		else:
			print(f'failed to rewrite JSON value: {d!r}')
			raise Exception(f'failed to rewrite JSON value ({kwargs}): {d!r}')

if __name__ == '__main__':
	if len(sys.argv) < 2:
		cmd = sys.argv[0]
		print(f'''
	Usage: {cmd} URI_PREFIX MAP_FILE_NAME

	Process all json files in the output path (configured with the GETTY_PIPELINE_OUTPUT
	environment variable), rewriting URIs that have the specified URI_PREFIX to urn:uuid:
	URIs that are specified in the MAP_FILE_NAME JSON file.

		'''.lstrip())
		sys.exit(1)

	prefix = sys.argv[1]
	map_file = sys.argv[2]

	print(f'Rewriting URIs to UUIDs ...')
	start_time = time.time()
	r = UUIDRewriter(prefix, map_file)
	rewrite_output_files(r, update_filename=True, verify_uuid=True, parallel=True, ignore_errors=True)
	if map_file:
		r.persist_map()
	cur = time.time()
	elapsed = cur - start_time
	print(f'Done (%.1fs)' % (elapsed,))
Exemple #3
0
if __name__ == '__main__':
    if len(sys.argv) < 2:
        cmd = sys.argv[0]
        print(f'''
	Usage: {cmd} URI_REWRITE_MAP.json

		'''.lstrip())
        sys.exit(1)

    rewrite_map_filename = sys.argv[1]

    kwargs = {}
    if len(sys.argv) > 2:
        kwargs['files'] = sys.argv[2:]

    print(f'Rewriting post-sales URIs ...')
    start_time = time.time()
    with open(rewrite_map_filename, 'r') as f:
        post_sale_rewrite_map = json.load(f)
        # 	print('Post sales rewrite map:')
        # 	pprint.pprint(post_sale_rewrite_map)
        r = JSONValueRewriter(post_sale_rewrite_map, prefix=True)
        prefix = os.path.commonprefix(list(post_sale_rewrite_map.keys()))
        if len(prefix) > 20:
            kwargs['content_filter_re'] = re.compile(re.escape(prefix))
        rewrite_output_files(r, parallel=True, concurrency=8, **kwargs)
    cur = time.time()
    elapsed = cur - start_time
    print(f'Done (%.1fs)' % (elapsed, ))