def retrieve(args): with muninn.open(args.archive) as archive: target_path = os.getcwd() if args.directory is None else args.directory archive.retrieve(where=args.expression, target_path=target_path, use_symlinks=args.link) return 0
def remove(args): with muninn.open(args.archive) as archive: if args.catalogue_only: archive.delete_properties(args.expression) else: archive.remove(args.expression, force=args.force) return 0
def update(args): expression = "is_defined(core.archive_path)" if args.expression: expression += " and (%s)" % args.expression namespaces = [] if args.namespaces: for namespace in args.namespaces: namespaces += namespace.split(' ') if args.action == 'pull': # only get products with a remote_url if expression: expression = "is_defined(remote_url) and (%s)" % expression else: expression = "is_defined(remote_url)" with muninn.open(args.archive) as archive: products = archive.search(expression, namespaces=namespaces) if args.parallel: pool = multiprocessing.Pool() list(bar(pool.imap(Processor(args), products), total=len(products))) pool.close() pool.join() else: update_func = Processor(args, archive) for product in bar(products): update_func(product) return 0
def list_tags(args): with muninn.open(args.archive) as archive: for product in archive.search(where=args.expression, property_names=['uuid', 'product_name']): tags = archive.tags(product.core.uuid) print("%s (%s): %s" % (product.core.product_name, product.core.uuid, ", ".join(tags))) return 0
def update(args): expression = "is_defined(core.archive_path)" if args.expression: expression += " and (%s)" % args.expression if args.action == 'pull': # only get products with a remote_url if expression: expression = "is_defined(remote_url) and (%s)" % expression else: expression = "is_defined(remote_url)" processor = UpdateProcessor(args) with muninn.open(args.archive) as archive: if args.action in ['ingest', 'pull']: # we only need the uuid and the product_name products = archive.search(expression, property_names=['uuid', 'product_name']) else: products = archive.search(expression, namespaces=archive.namespaces()) if args.parallel: if args.processes is not None: pool = multiprocessing.Pool(args.processes) else: pool = multiprocessing.Pool() list(bar(pool.imap(processor, products), total=len(products))) pool.close() pool.join() else: for product in bar(products): processor.perform_operation(archive, product) return 0
def ingest(args): processor = IngestProcessor(args) with muninn.open(args.archive) as archive: if "-" in args.path: paths = [path for path in sys.stdin] else: paths = args.path total = len(paths) num_success = 0 if args.parallel: if args.processes is not None: pool = multiprocessing.Pool(args.processes) else: pool = multiprocessing.Pool() num_success = sum( list(bar(pool.imap(processor, paths), total=total))) pool.close() pool.join() elif total > 1: for path in bar(paths): num_success += processor.perform_operation(archive, path) elif total == 1: # don't show progress bar if we ingest just one item num_success = processor.perform_operation(archive, paths[0]) return 0 if num_success == total else 1
def untag(args): with muninn.open(args.archive) as archive: tags = None if args.all else args.tag for product in archive.search(where=args.expression, property_names=['uuid']): archive.untag(product.core.uuid, tags) return 0
def run(args): with muninn.open(args.archive) as archive: print("NAMESPACES") for namespace in sorted(archive.namespaces()): print(" %s" % namespace) namespace_schema = archive.namespace_schema(namespace) for name in sorted(namespace_schema): field = namespace_schema[name] field_name = field.name() if field.__module__ != 'muninn.schema': field_name = '%s.%s' % (field.__module__, field.name()) optional = namespace_schema.is_optional(name) print(" %s: %s %s" % ( name, field_name, "(optional)" if optional else "", )) print("\nPRODUCT TYPES") for product_type in sorted(archive.product_types()): print(" %s" % product_type) if archive.remote_backends(): print("\nREMOTE BACKENDS") for remote_backend in sorted(archive.remote_backends()): print(" %s" % remote_backend) return 0
def retrieve(args): target_path = os.getcwd() if args.directory is None else args.directory processor = RetrieveProcessor(args, target_path) with muninn.open(args.archive) as archive: products = archive.search(where=args.expression, property_names=['uuid']) return processor.process(archive, args, products)
def verify(args): processor = VerifyProcessor(args) with muninn.open(args.archive) as archive: products = archive.search(where=args.expression, property_names=['uuid']) error = processor.process(archive, args, products) if error != 0: sys.exit(1)
def __call__(self, item): try: if self._archive is None: self._archive = muninn.open(self._archive_name) return self.perform_operation(self._archive, item) except KeyboardInterrupt: # don't capture keyboard interrupts inside sub-processes (only the main process should handle it) pass
def attach(args): processor = AttachProcessor(args) with muninn.open(args.archive) as archive: if "-" in args.path: paths = [path for path in sys.stdin] else: paths = args.path return processor.process(archive, args, paths)
def strip(args): processor = StripProcessor(args) with muninn.open(args.archive) as archive: products = archive.search(where=args.expression, property_names=['uuid']) returncode = processor.process(archive, args, products) archive.cleanup_derived_products() return returncode
def remove(args): processor = RemoveProcessor(args) with muninn.open(args.archive) as archive: products = archive.search(where=args.expression, property_names=['uuid']) returncode = processor.process(archive, args, products) if not args.catalogue_only: archive.cleanup_derived_products() return returncode
def uuid(args): with muninn.open(args.archive) as archive: # Collect possibly multiple sort order specifier lists into a single list. order_by = [] if args.order_by is None else sum(args.order_by, []) # Find products using the search expression and print the UUIDs of the products found. for product in archive.search(args.expression, order_by, args.limit, property_names=['uuid']): print(product.core.uuid) return 0
def archive(database, storage, use_enclosing_directory, archive_path): database, _, database_options = database.partition(':') # create my_arch.cfg by combining my_arch.cfg.template and test.cfg template = open('my_arch.cfg.template', 'r').read() data = template.replace('{database}', database) data = data.replace('{storage}', storage) with open('my_arch.cfg', 'w') as f: f.write(data) section = None for line in open('test.cfg'): if line.startswith('['): section = line.strip() elif '=' in line and section in ( '[sqlite]', '[postgresql]') and database_options: key, _, value = line.partition('=') key, value = key.strip(), value.strip() for option in database_options.split(','): opt_key, opt_value = option.split('=') if opt_key == key: line = '%s = %s\n' % (opt_key, opt_value) if section != '[DEFAULT]': f.write(line) # create product type extension from template template = open('product_type.py.template', 'r').read() data = template.replace('{archive_path}', archive_path) data = data.replace('{use_enclosing_directory}', str(use_enclosing_directory)) # TODO jinja? open('product_type.py', 'w').write(data) # refresh product type, hook extension if 'product_type' in sys.modules: del sys.modules['product_type'] if 'hook_extension' in sys.modules: del sys.modules['hook_extension'] os.system('rm *.pyc -f') # create clean archive with muninn.open('my_arch') as archive: archive.register_namespace('mynamespace', MyNamespace) archive.register_namespace('mynamespace2', MyNamespace2) archive.destroy() archive.prepare() # store params # TODO this could be nicer archive._params = { 'database': database, 'storage': storage, 'use_enclosing_directory': use_enclosing_directory, 'archive_path': archive_path, } archive._checker = STORAGE_CHECKERS[storage](storage) yield archive
def handle(self, *args, **options): # get table_prefix archive_name = options['archive'] config = muninn._read_archive_config_file( muninn._locate_archive_config_file(archive_name)) try: backend = config['archive']['database'] except KeyError: backend = config['archive']['backend'] table_prefix = config[backend].get('table_prefix', '') if options['meta_options']: meta_options = '\n'.join([ ' %s = %s' % (k, repr(v)) for k, v in options['meta_options'].items() ]) + '\n' else: meta_options = '' context = { 'table_prefix': table_prefix, 'namespaces': [], 'meta_options': meta_options, } with muninn.open(archive_name) as archive: for namespace in archive.namespaces(): if namespace != 'core': namespace_schema = archive.namespace_schema(namespace) # name_camel_case = namespace_schema.__name__ ns_context = { 'name': namespace, 'name_camel_case': namespace.capitalize(), 'fields': [], } for name in sorted(namespace_schema): field = namespace_schema[name] field_name = field.name() if field.__module__ != 'muninn.schema': field_name = '%s.%s' % (field.__module__, field.name()) field_type = TYPES_MAPPING.get(field_name, None) optional = namespace_schema.is_optional(name) if field_type: ns_context['fields'].append({ 'code': field_name, 'name': name, 'type': field_type, 'optional': optional }) context['namespaces'].append(ns_context) template = Template(TEMPLATE) result = template.render(Context(context)) #TODO: create a file instead of printing to stdout print(result)
def ingest(args): with muninn.open(args.archive) as archive: path_expansion_function = get_path_expansion_function( args.path_is_stem, args.path_is_enclosing_directory) assert not args.link or not args.copy or not args.keep use_symlinks = True if args.link else False if args.copy else None verify_hash = True if args.verify_hash else False errors_encountered = False paths = sys.stdin if "-" in args.path else args.path for path in paths: path = os.path.abspath(path.strip()) # Expand path into multiple files and/or directories that belong to the same product. try: product_paths = path_expansion_function(path) except Error as error: logging.error( "%s: unable to determine which files or directories belong to product [%s]" % (path, error)) errors_encountered = True continue # Discard paths matching any of the user supplied exclude patterns. if args.exclude: product_paths = filter_paths(product_paths, args.exclude) if not product_paths: logging.error( "%s: path does not match any files or directories" % path) errors_encountered = True continue try: properties = archive.ingest(product_paths, args.product_type, use_symlinks=use_symlinks, verify_hash=verify_hash, use_current_path=args.keep, force=args.force) except muninn.Error as error: logging.error("%s: unable to ingest product [%s]" % (path, error)) errors_encountered = True continue if args.tag: try: archive.tag(properties.core.uuid, args.tag) except muninn.Error as error: logging.error("%s: unable to tag product [%s]" % (path, error)) errors_encountered = True return 0 if not errors_encountered else 1
def prepare(args): with muninn.open(args.archive) as archive: if args.dry_run: print("The following SQL statements would be executed:") for sql in archive.prepare_catalogue(dry_run=True): print(" " + sql) elif args.catalogue_only: archive.prepare_catalogue() else: archive.prepare(force=args.force) return 0
def pull(args): processor = PullProcessor(args) with muninn.open(args.archive) as archive: # find all remote products that satisfy filter expression = "active and is_defined(remote_url) and not is_defined(archive_path)" if args.expression: expression = "%s and (%s)" % (expression, args.expression) logging.debug('Going to pull products that match: %s', expression) products = archive.search(where=expression, property_names=['uuid']) returncode = processor.process(archive, args, products) if returncode == 0: logging.debug('Pulled %d product(s)', len(products)) return returncode
def search(args): with muninn.open(args.archive) as archive: # Collect possibly multiple sort order specifier lists into a single list. order_by = [] if args.order_by is None else sum(args.order_by, []) # Use default properties if no properties were explicitly requested. if args.properties is None: properties = [("core", "uuid"), ("core", "active"), ("core", "hash"), ("core", "size"), ("core", "metadata_date"), ("core", "archive_date"), ("core", "archive_path"), ("core", "product_type"), ("core", "product_name"), ("core", "physical_name"), ("core", "validity_start"), ("core", "validity_stop"), ("core", "creation_date"), ("core", "footprint"), ("core", "remote_url")] else: # Expand wildcards. properties = [] for (namespace, name) in sum(args.properties, []): _extend_properties(properties, namespace, name, archive) # Check property names against namespace schemas. for (namespace, name) in properties: schema = archive.namespace_schema(namespace) if name not in schema: logging.error("no property: %r defined within namespace: %r" % (name, namespace)) return 1 # Find products using the search expression. products = archive.search( args.expression, order_by, args.limit, property_names=[".".join(item) for item in properties]) # Output the requested properties of all products matching the search expression in the requested output format. if args.output_format == "psv": # PSV = Pipe Separated Values writer = PlainWriter(properties) elif args.output_format == "csv": writer = CSVWriter(properties) elif tabulate is not None: writer = TabulateWriter(properties, args.output_format) else: writer = PlainWriter(properties) writer.header() for product in products: writer.properties(product) writer.footer() return 0
def pull(args): with muninn.open(args.archive) as archive: verify_hash = True if args.verify_hash else False # find all remote products that satisfy filter expression = "is_defined(remote_url) and not is_defined(archive_path)" if args.expression: expression = "%s and (%s)" % (expression, args.expression) logging.debug('Going to pull products that match: %s', expression) num_products = archive.pull(expression, verify_hash=verify_hash) logging.debug('Pulled %d product(s)', num_products) return 0
def paths(args): with muninn.open(args.archive) as archive: # Collect possibly multiple sort order specifier lists into a single list. order_by_default = ['+core.archive_path', '+core.physical_name'] order_by = order_by_default if not args.order_by else sum(args.order_by, []) + order_by_default # Find products using the search expression and print the paths of the products found. products = archive.search(args.expression, order_by, args.limit, property_names=['archive_path', 'physical_name']) for product in products: product_path = archive.product_path(product) if product_path is not None: print(product_path) return 0
def destroy(args): with muninn.open(args.archive) as archive: if not args.yes: if args.catalogue_only: print(( "You are about to remove the catalogue database for the archive \"%s\". " "This operation cannot be undone!") % args.archive) else: print( ("You are about to completely remove the archive \"%s\". " "This operation cannot be undone!") % args.archive) if not ask_yes_no("Do you want to continue?", False): return 1 if args.catalogue_only: archive.destroy_catalogue() else: archive.destroy() return 0
def __call__(self, product): try: if self.archive is None: self.archive = muninn.open(self.archive_name) if self.action == 'ingest': logger.debug('running update:ingest on %s ' % product.core.product_name) self.archive.rebuild_properties( product.core.uuid, disable_hooks=self.disable_hooks, use_current_path=self.use_current_path) elif self.action == 'post_ingest': plugin = self.archive.product_type_plugin( product.core.product_type) if hasattr(plugin, "post_ingest_hook"): logger.debug('running update:post_ingest on %s ' % product.core.product_name) plugin.post_ingest_hook(self.archive, product) elif self.action == 'pull': logger.debug('running update:pull on %s ' % product.core.product_name) self.archive.rebuild_pull_properties( product.core.uuid, verify_hash=self.verify_hash, disable_hooks=self.disable_hooks, use_current_path=self.use_current_path) elif self.action == 'post_pull': plugin = self.archive.product_type_plugin( product.core.product_type) if hasattr(plugin, "post_pull_hook"): logger.debug('running update:post_pull on %s ' % product.core.product_name) plugin.post_pull_hook(self.archive, product) except KeyboardInterrupt: # don't capture keyboard interrupts inside sub-processes (only the main process should handle it) if not self.ignore_keyboard_interrupt: raise
def export(args): with muninn.open(args.archive) as archive: if args.list_formats: if not archive.export_formats(): print("no alternative export formats available") else: print("alternative export formats: " + " ".join(archive.export_formats())) print("") return 0 if args.expression is None: logging.error("no search expression specified") return 1 target_path = os.getcwd() if args.directory is None else args.directory archive.export(where=args.expression, target_path=target_path, format=args.format) return 0
def export(args): target_path = os.getcwd() if args.directory is None else args.directory processor = ExportProcessor(args, target_path) with muninn.open(args.archive) as archive: if args.list_formats: if not archive.export_formats(): print("no alternative export formats available") else: print("alternative export formats: " + " ".join(archive.export_formats())) print("") return 0 if args.expression is None: logging.error("no search expression specified") return 1 products = archive.search(where=args.expression, property_names=['uuid']) return processor.process(archive, args, products) return 0
def run(args): with muninn.open(args.archive) as archive: group_by = coalesce_identifier_arguments(args.group_by, archive) if args.stats: stats = coalesce_identifier_arguments(args.stats, archive) else: stats = coalesce_identifier_arguments(DEFAULT_STATS, archive) order_by = coalesce_order_by_args(args.order_by, archive) result, header = archive.summary(args.expression, aggregates=stats, group_by=group_by, group_by_tag=args.group_by_tag, order_by=order_by, having=args.stats_filter) # Output summary in the requested output format. writer = get_writer(header, args) writer.header() for product in result: writer.row(product) writer.footer() return 0
def untag(args): processor = UntagProcessor(args) with muninn.open(args.archive) as archive: products = archive.search(where=args.expression, property_names=['uuid']) return processor.process(archive, args, products)
def count(args): with muninn.open(args.archive) as archive: print(archive.count(args.expression)) return 0