Ejemplo n.º 1
0
 def save_file(self, path, _hash):
     f = File()
     f.id = str(uuid())
     f.name = os.path.basename(path)
     f.hash = _hash
     f.filesize = os.path.getsize(path)
     f.isdir = os.path.isdir(path)
     f.date = datetime.datetime.now()
     dirname = os.path.dirname(path)
     os.renames(path, os.path.join(dirname, f.id))
     self.session.add(f)
Ejemplo n.º 2
0
def main(args=None):
    """
    The main routine.
    
    Optionally accepts 'args' but this is more of a convenience for unit 
    testing this module. It passes 'args' directly to the ArgumentParser's
    parse_args(...) method.
    """
    parser = argparse.ArgumentParser(prog=__PROG, description=__DESC)

    # General options
    parser.add_argument('--version', action='version', version=__VER)
    
    # Outbox name
    helpstr=('name of the outbox configuration (default: %s)' % 
             __DEFAULT_OUTBOX_NAME)
    parser.add_argument('-n', '--name', type=str, help=helpstr)
    
    # Use home directory as default location for outbox.conf
    default_config_filename = os.path.join(
            os.path.expanduser('~'), '.tagfiler', 'outbox.conf')
    parser.add_argument('-f', '--filename', type=str, 
                        help=('configuration filename (default: %s)' % 
                              default_config_filename))
    
    # Use home directory as default location for state.db
    default_state_db = os.path.join(os.path.expanduser('~'), 
                                    '.tagfiler', 'state.db')
    parser.add_argument('-s', '--state_db', type=str, 
                        help=('local state database (default: %s)' % 
                              default_state_db))
    
    # Until we know better, use gsiftp://... as default endpoint prefix
    default_endpoint = "gsiftp://%s" % socket.gethostname()
    parser.add_argument('-e', '--endpoint', type=str,
                        help=('endpoint (default: %s)' % 
                              default_endpoint))
    
    # Verbose | Quite option group
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-v', '--verbose', action='count', 
                       default=__LOGLEVEL_DEFAULT, 
                       help='verbose output (repeat to increase verbosity)')
    group.add_argument('-q', '--quiet', action='store_true', 
                       help='suppress output')
    
    # Directory and Inclusion/Exclusion option group
    group = parser.add_argument_group(title='Directory traversal options')
    group.add_argument('--root', metavar='DIRECTORY', 
                       type=str, nargs='+',
                       help='root directories to be traversed recursively')
    group.add_argument('--exclude', type=str, nargs='+',
                       help='exclude based on regular expression')
    group.add_argument('--include', type=str, nargs='+',
                       help='include based on regular expression')
    
    # Tagfiler option group
    group = parser.add_argument_group(title='Tagfiler options')
    group.add_argument('--url', dest='url', metavar='URL', 
                       type=str, help='URL of the Tagfiler service')
    group.add_argument('--username', dest='username', metavar='USERNAME', 
                       type=str, help='username for your Tagfiler user account')
    group.add_argument('--password', dest='password', metavar='PASSWORD', 
                       type=str, help='password for your Tagfiler user account')
    group.add_argument('--goauthtoken', dest='goauthtoken', metavar='GOAUTHTOKEN', 
                       type=str, help='GOAuth token from GO authentication')
    group.add_argument('--bulk_ops_max', type=int, 
                        help='maximum bulk operations per call to Tagfiler' + \
                        ' (default: %d)' % __BULK_OPS_MAX)
    
    # Now parse them
    args = parser.parse_args(args)
    
    # Turn verbosity into a loglevel setting for the global logger
    if args.quiet:
        logging.getLogger().addHandler(logging.NullHandler())
        # Should probably suppress stderr and stdout
    else:
        verbosity = args.verbose if args.verbose < __LOGLEVEL_MAX else __LOGLEVEL_MAX
        logging.basicConfig(level=__LOGLEVEL[verbosity])
        logger.debug("args: %s" % args)
    
    # Load configuration file, or create configuration based on arguments
    filename = args.filename or default_config_filename
    cfg = {}
    if os.path.exists(filename):
        f = open(filename, 'r')
        try:
            cfg = json.load(f)
            logger.debug("config: %s" % cfg)
        except ValueError as e:
            print >> sys.stderr, ('ERROR: Malformed configuration file: %s' % e)
            return __EXIT_FAILURE
        else:
            f.close()
    
    # Create outbox model, and populate from settings
    outbox_model = Outbox()
    outbox_model.name = args.name or cfg.get('name', __DEFAULT_OUTBOX_NAME)
    outbox_model.state_db = args.state_db or \
                            cfg.get('state_db', default_state_db)

    # Tagfiler settings
    outbox_model.url = args.url or cfg.get('url')
    if not outbox_model.url:
        parser.error('Tagfiler URL must be given.')
    
    outbox_model.username = args.username or cfg.get('username')
    outbox_model.password = args.password or cfg.get('password')
    outbox_model.goauthtoken = args.goauthtoken or cfg.get('goauthtoken')
    if not outbox_model.goauthtoken and \
        (not outbox_model.username or not outbox_model.password):
        parser.error('Tagfiler username and password must be given.')
        
    outbox_model.bulk_ops_max = args.bulk_ops_max or \
                                cfg.get('bulk_ops_max', __BULK_OPS_MAX)
    outbox_model.bulk_ops_max = int(outbox_model.bulk_ops_max)
    
    # Endpoint setting
    outbox_model.endpoint = args.endpoint or \
                                cfg.get('endpoint', default_endpoint)

    # Roots
    roots = args.root or cfg.get('roots')
    if not roots or not len(roots):
        parser.error('At least one root directory must be given.')
    for root in roots:
        outbox_model.roots.append(root)
    
    # Add include/exclusion patterns
    excludes = args.exclude or cfg.get('excludes')
    if excludes and len(excludes):
        for exclude in excludes:
            outbox_model.excludes.append(re.compile(exclude))
    
    includes = args.include or cfg.get('includes')
    if includes and len(includes):
        for include in includes:
            outbox_model.includes.append(re.compile(include))
    
    # Add the default 'name' tag path rule
    name_rule = create_default_name_path_rule(outbox_model.endpoint)
    outbox_model.path_rules.append(name_rule)
    
    # Add optional path rules
    rules = cfg.get('rules', [])
    for rule in rules:
        outbox_model.path_rules.append(RERule(**rule))
        
    # Add optional line (content) rules
    linerules = cfg.get('linerules', [])
    for linerule in linerules:
        outbox_model.line_rules.append(LineRule(**linerule))
        
    # Add optional dicom (content) rules
    dcmrules = cfg.get('dicomrules', [])
    for dcmrule in dcmrules:
        outbox_model.dicom_rules.append(DicomRule(**dcmrule))
        
    # Add optional nifti (content) rules
    niftirules = cfg.get('niftirules', [])
    for niftirule in niftirules:
        outbox_model.nifti_rules.append(NiftiRule(**niftirule))

    # Establish Tagfiler client connection
    try:
        client = TagfilerClient(outbox_model.url, outbox_model.username, 
                            outbox_model.password, outbox_model.goauthtoken)
        client.connect()
        client.login()
    except MalformedURL as err:
        print >> sys.stderr, ('ERROR: %s' % err)
        return __EXIT_FAILURE
    except UnresolvedAddress as err:
        print >> sys.stderr, ('ERROR: %s' % err)
        return __EXIT_FAILURE
    except NetworkError as err:
        print >> sys.stderr, ('ERROR: %s' % err)
        return __EXIT_FAILURE
    except ProtocolError as err:
        print >> sys.stderr, ('ERROR: %s' % err)
        return __EXIT_FAILURE
    
    state = OutboxStateDAO(outbox_model.state_db)
    worklist = []
    found = 0
    skipped = 0
    tagged = 0
    registered = 0

    # walk the root trees, cksum as needed, create worklist to be registered
    for root in outbox_model.roots:
        for (rfpath, size, mtime, user, group) in \
                tree_scan_stats(root, outbox_model.excludes, outbox_model.includes):
            filename = create_uri_friendly_file_path(root, rfpath)
            fargs = {'filename': filename, 'mtime': mtime, 'size': size, \
                    'username': user, 'groupname': group}
            f = File(**fargs)
            found += 1
            
            # Check if file exists in local state db
            exists = state.find_file(filename)
            if not exists:
                # Case: New file, not seen before
                logger.debug("New: %s" % filename)
                f.checksum = sha256sum(filename)
                state.add_file(f)
                worklist.append(f)
            elif f.mtime > exists.mtime:
                # Case: File has changed since last seen
                logger.debug("Modified: %s" % filename)
                f.checksum = sha256sum(filename)
                if f.checksum != exists.checksum:
                    f.id = exists.id
                    state.update_file(f)
                    worklist.append(f)
                else:
                    exists.mtime = f.mtime # update mod time
                    state.update_file(f)
                    skipped += 1
            elif f.size and not exists.checksum:
                # Case: Missing checksum, on regular file
                logger.debug("Missing checksum: %s" % filename)
                f.checksum = sha256sum(filename)
                f.id = exists.id
                state.update_file(f)
                worklist.append(f)
            elif not exists.rtime:
                # Case: File has not been registered
                logger.debug("Not registered: %s" % filename)
                worklist.append(exists)
            else:
                # Case: File does not meet any criteria for processing
                logger.debug("Skipping: %s" % filename)
                skipped += 1
    
    # Tag files in worklist
    tag_director = TagDirector()
    for f in worklist:
        logger.debug("Tagging: %s" % f)
        tag_director.tag_registered_file(outbox_model.path_rules, f)
        tag_director.tag_registered_file(outbox_model.dicom_rules, f)
        tag_director.tag_registered_file(outbox_model.nifti_rules, f)
        tag_director.tag_file_contents(outbox_model.line_rules, f)
        tagged += 1
    
    # Register files in worklist
    if len(worklist):
        client.add_subjects(worklist)
    for f in worklist:
        logger.debug("Registered: %s" % f)
        f.rtime = time.time()
        state.update_file(f)
        registered += 1
    
    # Print final message unless '--quiet'
    if not args.quiet:
        # Print concluding message to stdout
        print "Done. Found=%s Skipped=%s Tagged=%s Registered=%s" % \
                    (found, skipped, tagged, registered)
    
    try:
        client.close()
    except NetworkError as err:
        print >> sys.stderr, ('WARN: %s' % err)
    return __EXIT_SUCCESS