コード例 #1
0
def command_comp(arguments, fcapture=None):

    if not any(arguments[name]
               for name in ('--full', '--quick', '--none', '--size', '--time',
                            '--extension', '--basename')):
        arguments['--full'] = True
        arguments['--extension'] = True

    if not any(arguments[name] for name in ('--full', '--quick', '--none')):
        arguments['--none'] = True

    if not arguments['--none']:
        arguments['--size'] = True

    if (arguments['--lhs-update']
            or not arguments['--lhs-db']) and arguments['--lhs-path']:
        arguments['--lhs-path'] = os.path.realpath(arguments['--lhs-path'])

    if (arguments['--rhs-update']
            or not arguments['--rhs-db']) and arguments['--rhs-path']:
        arguments['--rhs-path'] = os.path.realpath(arguments['--rhs-path'])

    if arguments['--dry-run']:
        arguments['--echo'] = True

    haslhs = arguments['--lhs-path'] or arguments['--lhs-db']
    hasrhs = arguments['--rhs-path'] or arguments['--rhs-db']

    if (haslhs and hasrhs):
        attach = True
    else:
        attach = False

    def match(sel, lhs, rhs, complete=True):
        if arguments['--size']:
            sel = sel.where(lhs.c.size == rhs.c.size)

        if arguments['--time']:
            sel = sel.where(lhs.c.time == rhs.c.time)

        if arguments['--extension']:
            sel = sel.where(lhs.c.extension == rhs.c.extension)

        if arguments['--basename']:
            sel = sel.where(lhs.c.basename == rhs.c.basename)

        if arguments['--skip-empty']:
            sel = sel.where(lhs.c.size != 0)

        if complete:
            if arguments['--quick']:
                sel = sel.where(
                    and_(lhs.c.hash_quick == rhs.c.hash_quick,
                         lhs.c.hash_quick != None))

            if arguments['--full']:
                sel = sel.where(
                    and_(lhs.c.hash_total == rhs.c.hash_total,
                         lhs.c.hash_total != None))

        # Determine if we are on the same file system and ignore the same literal files
        if not (arguments['--rhs-path'] or arguments['--rhs-db']) or \
           (arguments['--rhs-update'] or not arguments['--rhs-db']) and arguments['--rhs-path']:
            sel = sel.where(lhs.c.path != rhs.c.path)

        return sel

    #ToDo add the LHSPATH and RHSPATH options
    argsSubstitutions = set(
        chain(*(re.findall(
            r'\{((?:(?:LHS|RHS|LHSONLY|RHSONLY|DUPE|UNIQUE)(GROUP)?)|LHSPATH|RHSPATH)(?:\:(dirpath|basename|ext|name|drive|dirpathnodrive|fullpath))?\}',
            arg) for arg in arguments['COMMAND'])))
    args = set(name for name, group, section in argsSubstitutions)

    if 'LHSPATH' in args:
        if not arguments['--lhs-path']:
            raise DocoptExit('{LHSPATH} without --lhs--path')
        args.remove('LHSPATH')

    if 'RHSPATH' in args:
        if not arguments['--rhs-path']:
            raise DocoptExit('{RHSPATH} without --rhs--path')
        args.remove('RHSPATH')

    if haslhs and hasrhs and args == {'LHS'}:

        def get_sel(lhs, rhs):
            sel = select([lhs.c.path.label('LHS')])
            sel = match(sel, lhs, rhs)
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHS', 'RHS'}:

        def get_sel(lhs, rhs):
            sel = select([lhs.c.path.label('LHS'), rhs.c.path.label('RHS')])
            sel = match(sel, lhs, rhs)
            sel = sel.order_by(lhs.c.path).order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHS', 'RHSGROUP'}:

        def get_sel(lhs, rhs):
            sel = select([
                lhs.c.path.label('LHS'),
                func.group_filepath(rhs.c.path).label('RHSGROUP')
            ])
            sel = match(sel, lhs, rhs)
            sel = sel.group_by(lhs.c.path)
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHSGROUP'}:

        def get_sel(lhs, rhs):
            sel = select([func.group_filepath(lhs.c.path).label('LHSGROUP')])
            sel = match(sel, lhs, rhs)
            sel = sel.group_by(rhs.c.path)
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHSGROUP', 'RHS'}:

        def get_sel(lhs, rhs):
            sel = select([
                func.group_filepath(lhs.c.path).label('LHSGROUP'),
                rhs.c.path.label('RHS')
            ])
            sel = match(sel, lhs, rhs)
            sel = sel.group_by(rhs.c.path)
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHSGROUP', 'RHSGROUP'}:

        def get_sel(lhs, rhs):
            sel = select([
                func.group_filepath(lhs.c.path).label('LHSGROUP'),
                func.group_filepath(rhs.c.path.distinct()).label('RHSGROUP')
            ])
            sel = match(sel, lhs, rhs)

            if arguments['--size']:
                sel = sel.group_by(lhs.c.size).order_by(lhs.c.size)

            if arguments['--time']:
                sel = sel.group_by(lhs.c.time).order_by(lhs.c.time)

            if arguments['--extension']:
                sel = sel.group_by(lhs.c.extension).order_by(lhs.c.extension)

            if arguments['--basename']:
                sel = sel.group_by(lhs.c.basename).order_by(lhs.c.basename)

            if arguments['--quick']:
                sel = sel.group_by(lhs.c.hash_quick).order_by(lhs.c.hash_quick)

            if arguments['--full']:
                sel = sel.group_by(lhs.c.hash_total).order_by(lhs.c.hash_total)

            sel = sel.order_by(lhs.c.path)
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHSONLY'}:

        def get_sel(lhs, rhs):
            sel = select([lhs.c.path.label('LHSONLY')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'LHSONLYGROUP'}:

        def get_sel(lhs, rhs):
            sel = select(
                [func.group_filepath(lhs.c.path).label('LHSONLYGROUP')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'RHS'}:

        def get_sel(lhs, rhs):
            sel = select([rhs.c.path.label('RHS')])
            sel = match(sel, lhs, rhs)
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'RHSGROUP'}:

        def get_sel(lhs, rhs):
            sel = select([func.group_filepath(rhs.c.path).label('RHSGROUP')])
            sel = match(sel, lhs, rhs)
            sel = sel.group_by(lhs.c.path)
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'RHSONLY'}:

        def get_sel(lhs, rhs):
            sel = select([rhs.c.path.label('RHSONLY')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and hasrhs and args == {'RHSONLYGROUP'}:

        def get_sel(lhs, rhs):
            sel = select(
                [func.group_filepath(rhs.c.path).label('RHSONLYGROUP')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and not hasrhs and args == {'DUPE'}:

        def get_sel(lhs, rhs):
            sel = select([lhs.c.path.label('DUPE')])
            sel = match(sel, lhs, rhs)
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and not hasrhs and args == {'DUPEGROUP'}:

        def get_sel(lhs, rhs):
            sel = select([
                func.group_filepath(lhs.c.path.distinct()).label('DUPEGROUP')
            ])
            sel = match(sel, lhs, rhs)

            if arguments['--size']:
                sel = sel.group_by(lhs.c.size).order_by(lhs.c.size)

            if arguments['--time']:
                sel = sel.group_by(lhs.c.time).order_by(lhs.c.time)

            if arguments['--extension']:
                sel = sel.group_by(lhs.c.extension).order_by(lhs.c.extension)

            if arguments['--basename']:
                sel = sel.group_by(lhs.c.basename).order_by(lhs.c.basename)

            if arguments['--quick']:
                sel = sel.group_by(lhs.c.hash_quick).order_by(lhs.c.hash_quick)

            if arguments['--full']:
                sel = sel.group_by(lhs.c.hash_total).order_by(lhs.c.hash_total)

            sel = sel.order_by(lhs.c.path)
            sel = sel.order_by(rhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and not hasrhs and args == {'UNIQUE'}:

        def get_sel(lhs, rhs):
            sel = select([lhs.c.path.label('UNIQUE')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    elif haslhs and not hasrhs and args == {'UNIQUEGROUP'}:

        def get_sel(lhs, rhs):
            sel = select(
                [func.group_filepath(lhs.c.path).label('UNIQUEGROUP')])
            sel = sel.where(~exists(match(select(['*']), lhs, rhs)))
            sel = sel.order_by(lhs.c.path)
            sel = sel.distinct()
            return sel
    else:
        raise DocoptExit(
            'COMMAND does not contain a valid combination of special arguments'
        )

    lhsrwFiles, lhsroFiles = None, None
    rhsrwFiles, rhsroFiles = None, None

    if attach:
        engine = create(None, separator=arguments['--separator'])
    else:
        engine, lhsrwFiles, lhsroFiles, rhsroFiles = create_side(
            arguments['--lhs-db'],
            arguments['--lhs-update'],
            arguments['--lhs-path'],
            separator=arguments['--separator'])

    with engine_dispose(engine):
        if attach:
            if haslhs:
                lhsrwFiles, lhsroFiles = attach_side(engine, 'lhs',
                                                     arguments['--lhs-db'],
                                                     arguments['--lhs-update'],
                                                     arguments['--lhs-path'])
            if hasrhs:
                rhsrwFiles, rhsroFiles = attach_side(engine, 'rhs',
                                                     arguments['--rhs-db'],
                                                     arguments['--rhs-update'],
                                                     arguments['--rhs-path'])

        if not arguments['--none'] and (lhsrwFiles != None
                                        or rhsrwFiles != None):
            # Do a preliminary comparison
            lhssel = match(select([lhsroFiles.c.path]), lhsroFiles, rhsroFiles,
                           False)
            rhssel = match(select([rhsroFiles.c.path]), lhsroFiles, rhsroFiles,
                           False)

            if arguments['--quick']:
                lhssel = lhssel.where(lhsroFiles.c.hash_quick == None)
                rhssel = rhssel.where(rhsroFiles.c.hash_quick == None)

            if arguments['--full']:
                lhssel = lhssel.where(lhsroFiles.c.hash_total == None)
                rhssel = rhssel.where(rhsroFiles.c.hash_total == None)

            # Update rw table
            conn = engine.connect()

            def updaterw(ro, rw, sel):
                if rw is None:
                    return

                for result in conn.execute(sel):
                    file = conn.execute(rw.select().where(
                        rw.c.path == result.path)).fetchone()
                    try:
                        stat = os.stat(result.path, follow_symlinks=False)
                    except Exception:
                        hash_quick, hash_total = None, None
                    else:
                        hash_quick, hash_total = hashfile(
                            result.path, stat, arguments['--quick'])

                    if hash_quick != None or hash_total != None:
                        conn.execute(
                            rw.update().where(rw.c.path == result.path).values(
                                size=stat.st_size,
                                time=stat.st_mtime_ns,
                                hash_quick=hash_quick,
                                hash_total=hash_total))
                    else:
                        conn.execute(
                            rw.delete().where(rw.c.path == result.path))

            try:
                updaterw(lhsroFiles, lhsrwFiles, lhssel)
                updaterw(rhsroFiles, rhsrwFiles, rhssel)
            finally:
                conn.close()

        # Do the full comparison

        sel = get_sel(lhsroFiles if lhsrwFiles is None else lhsrwFiles,
                      rhsroFiles if rhsrwFiles is None else rhsrwFiles)

        conn = engine.connect()
        try:
            for result in conn.execute(sel):
                result = dict(result.items())

                resultNormalized = {}
                for name, group, section in argsSubstitutions:
                    if section == '':
                        # simple
                        resultNormalized[name] = result[name]
                    elif group == '':
                        # single, sectioned
                        path = FilePath(result[name])
                        resultNormalized[name + ':' + section] = getattr(
                            path, section)
                    else:
                        # grouped, sectioned
                        paths = FilePath.splitpaths(result[name],
                                                    arguments['--separator'])
                        resultNormalized[name + ':' +
                                         section] = FilePath.joinpaths([
                                             getattr(p, section) for p in paths
                                         ], arguments['--separator'])

                result = resultNormalized

                cmd = [
                    re.sub(
                        r'\{(%s)\}' % '|'.join(
                            name + (':' + section if section != '' else '')
                            for name, group, section in argsSubstitutions),
                        (lambda match: result[match.group(1)]), arg)
                    for arg in arguments['COMMAND']
                ]

                if arguments['--echo']:
                    print(' '.join(escape_for_shell(arg) for arg in cmd))

                if fcapture != None:
                    fcapture(cmd)
                elif not arguments['--dry-run']:
                    try:
                        subprocess.run(cmd, shell=True, check=True)
                    except subprocess.CalledProcessError as ex:
                        if arguments['--ignore-errors']:
                            print(ex, file=sys.stderr)
                        else:
                            raise ex
        finally:
            conn.close()