def save_file(self, path, _hash): f = File() f.id = str(uuid()) f.name = os.path.basename(path) f.hash = _hash f.filesize = os.path.getsize(path) f.isdir = os.path.isdir(path) f.date = datetime.datetime.now() dirname = os.path.dirname(path) os.renames(path, os.path.join(dirname, f.id)) self.session.add(f)
def main(args=None): """ The main routine. Optionally accepts 'args' but this is more of a convenience for unit testing this module. It passes 'args' directly to the ArgumentParser's parse_args(...) method. """ parser = argparse.ArgumentParser(prog=__PROG, description=__DESC) # General options parser.add_argument('--version', action='version', version=__VER) # Outbox name helpstr=('name of the outbox configuration (default: %s)' % __DEFAULT_OUTBOX_NAME) parser.add_argument('-n', '--name', type=str, help=helpstr) # Use home directory as default location for outbox.conf default_config_filename = os.path.join( os.path.expanduser('~'), '.tagfiler', 'outbox.conf') parser.add_argument('-f', '--filename', type=str, help=('configuration filename (default: %s)' % default_config_filename)) # Use home directory as default location for state.db default_state_db = os.path.join(os.path.expanduser('~'), '.tagfiler', 'state.db') parser.add_argument('-s', '--state_db', type=str, help=('local state database (default: %s)' % default_state_db)) # Until we know better, use gsiftp://... as default endpoint prefix default_endpoint = "gsiftp://%s" % socket.gethostname() parser.add_argument('-e', '--endpoint', type=str, help=('endpoint (default: %s)' % default_endpoint)) # Verbose | Quite option group group = parser.add_mutually_exclusive_group() group.add_argument('-v', '--verbose', action='count', default=__LOGLEVEL_DEFAULT, help='verbose output (repeat to increase verbosity)') group.add_argument('-q', '--quiet', action='store_true', help='suppress output') # Directory and Inclusion/Exclusion option group group = parser.add_argument_group(title='Directory traversal options') group.add_argument('--root', metavar='DIRECTORY', type=str, nargs='+', help='root directories to be traversed recursively') group.add_argument('--exclude', type=str, nargs='+', help='exclude based on regular expression') group.add_argument('--include', type=str, nargs='+', help='include based on regular expression') # Tagfiler option group group = parser.add_argument_group(title='Tagfiler options') group.add_argument('--url', dest='url', metavar='URL', type=str, help='URL of the Tagfiler service') group.add_argument('--username', dest='username', metavar='USERNAME', type=str, help='username for your Tagfiler user account') group.add_argument('--password', dest='password', metavar='PASSWORD', type=str, help='password for your Tagfiler user account') group.add_argument('--goauthtoken', dest='goauthtoken', metavar='GOAUTHTOKEN', type=str, help='GOAuth token from GO authentication') group.add_argument('--bulk_ops_max', type=int, help='maximum bulk operations per call to Tagfiler' + \ ' (default: %d)' % __BULK_OPS_MAX) # Now parse them args = parser.parse_args(args) # Turn verbosity into a loglevel setting for the global logger if args.quiet: logging.getLogger().addHandler(logging.NullHandler()) # Should probably suppress stderr and stdout else: verbosity = args.verbose if args.verbose < __LOGLEVEL_MAX else __LOGLEVEL_MAX logging.basicConfig(level=__LOGLEVEL[verbosity]) logger.debug("args: %s" % args) # Load configuration file, or create configuration based on arguments filename = args.filename or default_config_filename cfg = {} if os.path.exists(filename): f = open(filename, 'r') try: cfg = json.load(f) logger.debug("config: %s" % cfg) except ValueError as e: print >> sys.stderr, ('ERROR: Malformed configuration file: %s' % e) return __EXIT_FAILURE else: f.close() # Create outbox model, and populate from settings outbox_model = Outbox() outbox_model.name = args.name or cfg.get('name', __DEFAULT_OUTBOX_NAME) outbox_model.state_db = args.state_db or \ cfg.get('state_db', default_state_db) # Tagfiler settings outbox_model.url = args.url or cfg.get('url') if not outbox_model.url: parser.error('Tagfiler URL must be given.') outbox_model.username = args.username or cfg.get('username') outbox_model.password = args.password or cfg.get('password') outbox_model.goauthtoken = args.goauthtoken or cfg.get('goauthtoken') if not outbox_model.goauthtoken and \ (not outbox_model.username or not outbox_model.password): parser.error('Tagfiler username and password must be given.') outbox_model.bulk_ops_max = args.bulk_ops_max or \ cfg.get('bulk_ops_max', __BULK_OPS_MAX) outbox_model.bulk_ops_max = int(outbox_model.bulk_ops_max) # Endpoint setting outbox_model.endpoint = args.endpoint or \ cfg.get('endpoint', default_endpoint) # Roots roots = args.root or cfg.get('roots') if not roots or not len(roots): parser.error('At least one root directory must be given.') for root in roots: outbox_model.roots.append(root) # Add include/exclusion patterns excludes = args.exclude or cfg.get('excludes') if excludes and len(excludes): for exclude in excludes: outbox_model.excludes.append(re.compile(exclude)) includes = args.include or cfg.get('includes') if includes and len(includes): for include in includes: outbox_model.includes.append(re.compile(include)) # Add the default 'name' tag path rule name_rule = create_default_name_path_rule(outbox_model.endpoint) outbox_model.path_rules.append(name_rule) # Add optional path rules rules = cfg.get('rules', []) for rule in rules: outbox_model.path_rules.append(RERule(**rule)) # Add optional line (content) rules linerules = cfg.get('linerules', []) for linerule in linerules: outbox_model.line_rules.append(LineRule(**linerule)) # Add optional dicom (content) rules dcmrules = cfg.get('dicomrules', []) for dcmrule in dcmrules: outbox_model.dicom_rules.append(DicomRule(**dcmrule)) # Add optional nifti (content) rules niftirules = cfg.get('niftirules', []) for niftirule in niftirules: outbox_model.nifti_rules.append(NiftiRule(**niftirule)) # Establish Tagfiler client connection try: client = TagfilerClient(outbox_model.url, outbox_model.username, outbox_model.password, outbox_model.goauthtoken) client.connect() client.login() except MalformedURL as err: print >> sys.stderr, ('ERROR: %s' % err) return __EXIT_FAILURE except UnresolvedAddress as err: print >> sys.stderr, ('ERROR: %s' % err) return __EXIT_FAILURE except NetworkError as err: print >> sys.stderr, ('ERROR: %s' % err) return __EXIT_FAILURE except ProtocolError as err: print >> sys.stderr, ('ERROR: %s' % err) return __EXIT_FAILURE state = OutboxStateDAO(outbox_model.state_db) worklist = [] found = 0 skipped = 0 tagged = 0 registered = 0 # walk the root trees, cksum as needed, create worklist to be registered for root in outbox_model.roots: for (rfpath, size, mtime, user, group) in \ tree_scan_stats(root, outbox_model.excludes, outbox_model.includes): filename = create_uri_friendly_file_path(root, rfpath) fargs = {'filename': filename, 'mtime': mtime, 'size': size, \ 'username': user, 'groupname': group} f = File(**fargs) found += 1 # Check if file exists in local state db exists = state.find_file(filename) if not exists: # Case: New file, not seen before logger.debug("New: %s" % filename) f.checksum = sha256sum(filename) state.add_file(f) worklist.append(f) elif f.mtime > exists.mtime: # Case: File has changed since last seen logger.debug("Modified: %s" % filename) f.checksum = sha256sum(filename) if f.checksum != exists.checksum: f.id = exists.id state.update_file(f) worklist.append(f) else: exists.mtime = f.mtime # update mod time state.update_file(f) skipped += 1 elif f.size and not exists.checksum: # Case: Missing checksum, on regular file logger.debug("Missing checksum: %s" % filename) f.checksum = sha256sum(filename) f.id = exists.id state.update_file(f) worklist.append(f) elif not exists.rtime: # Case: File has not been registered logger.debug("Not registered: %s" % filename) worklist.append(exists) else: # Case: File does not meet any criteria for processing logger.debug("Skipping: %s" % filename) skipped += 1 # Tag files in worklist tag_director = TagDirector() for f in worklist: logger.debug("Tagging: %s" % f) tag_director.tag_registered_file(outbox_model.path_rules, f) tag_director.tag_registered_file(outbox_model.dicom_rules, f) tag_director.tag_registered_file(outbox_model.nifti_rules, f) tag_director.tag_file_contents(outbox_model.line_rules, f) tagged += 1 # Register files in worklist if len(worklist): client.add_subjects(worklist) for f in worklist: logger.debug("Registered: %s" % f) f.rtime = time.time() state.update_file(f) registered += 1 # Print final message unless '--quiet' if not args.quiet: # Print concluding message to stdout print "Done. Found=%s Skipped=%s Tagged=%s Registered=%s" % \ (found, skipped, tagged, registered) try: client.close() except NetworkError as err: print >> sys.stderr, ('WARN: %s' % err) return __EXIT_SUCCESS