def command_comp(arguments, fcapture=None): if not any(arguments[name] for name in ('--full', '--quick', '--none', '--size', '--time', '--extension', '--basename')): arguments['--full'] = True arguments['--extension'] = True if not any(arguments[name] for name in ('--full', '--quick', '--none')): arguments['--none'] = True if not arguments['--none']: arguments['--size'] = True if (arguments['--lhs-update'] or not arguments['--lhs-db']) and arguments['--lhs-path']: arguments['--lhs-path'] = os.path.realpath(arguments['--lhs-path']) if (arguments['--rhs-update'] or not arguments['--rhs-db']) and arguments['--rhs-path']: arguments['--rhs-path'] = os.path.realpath(arguments['--rhs-path']) if arguments['--dry-run']: arguments['--echo'] = True haslhs = arguments['--lhs-path'] or arguments['--lhs-db'] hasrhs = arguments['--rhs-path'] or arguments['--rhs-db'] if (haslhs and hasrhs): attach = True else: attach = False def match(sel, lhs, rhs, complete=True): if arguments['--size']: sel = sel.where(lhs.c.size == rhs.c.size) if arguments['--time']: sel = sel.where(lhs.c.time == rhs.c.time) if arguments['--extension']: sel = sel.where(lhs.c.extension == rhs.c.extension) if arguments['--basename']: sel = sel.where(lhs.c.basename == rhs.c.basename) if arguments['--skip-empty']: sel = sel.where(lhs.c.size != 0) if complete: if arguments['--quick']: sel = sel.where( and_(lhs.c.hash_quick == rhs.c.hash_quick, lhs.c.hash_quick != None)) if arguments['--full']: sel = sel.where( and_(lhs.c.hash_total == rhs.c.hash_total, lhs.c.hash_total != None)) # Determine if we are on the same file system and ignore the same literal files if not (arguments['--rhs-path'] or arguments['--rhs-db']) or \ (arguments['--rhs-update'] or not arguments['--rhs-db']) and arguments['--rhs-path']: sel = sel.where(lhs.c.path != rhs.c.path) return sel #ToDo add the LHSPATH and RHSPATH options argsSubstitutions = set( chain(*(re.findall( r'\{((?:(?:LHS|RHS|LHSONLY|RHSONLY|DUPE|UNIQUE)(GROUP)?)|LHSPATH|RHSPATH)(?:\:(dirpath|basename|ext|name|drive|dirpathnodrive|fullpath))?\}', arg) for arg in arguments['COMMAND']))) args = set(name for name, group, section in argsSubstitutions) if 'LHSPATH' in args: if not arguments['--lhs-path']: raise DocoptExit('{LHSPATH} without --lhs--path') args.remove('LHSPATH') if 'RHSPATH' in args: if not arguments['--rhs-path']: raise DocoptExit('{RHSPATH} without --rhs--path') args.remove('RHSPATH') if haslhs and hasrhs and args == {'LHS'}: def get_sel(lhs, rhs): sel = select([lhs.c.path.label('LHS')]) sel = match(sel, lhs, rhs) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHS', 'RHS'}: def get_sel(lhs, rhs): sel = select([lhs.c.path.label('LHS'), rhs.c.path.label('RHS')]) sel = match(sel, lhs, rhs) sel = sel.order_by(lhs.c.path).order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHS', 'RHSGROUP'}: def get_sel(lhs, rhs): sel = select([ lhs.c.path.label('LHS'), func.group_filepath(rhs.c.path).label('RHSGROUP') ]) sel = match(sel, lhs, rhs) sel = sel.group_by(lhs.c.path) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHSGROUP'}: def get_sel(lhs, rhs): sel = select([func.group_filepath(lhs.c.path).label('LHSGROUP')]) sel = match(sel, lhs, rhs) sel = sel.group_by(rhs.c.path) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHSGROUP', 'RHS'}: def get_sel(lhs, rhs): sel = select([ func.group_filepath(lhs.c.path).label('LHSGROUP'), rhs.c.path.label('RHS') ]) sel = match(sel, lhs, rhs) sel = sel.group_by(rhs.c.path) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHSGROUP', 'RHSGROUP'}: def get_sel(lhs, rhs): sel = select([ func.group_filepath(lhs.c.path).label('LHSGROUP'), func.group_filepath(rhs.c.path.distinct()).label('RHSGROUP') ]) sel = match(sel, lhs, rhs) if arguments['--size']: sel = sel.group_by(lhs.c.size).order_by(lhs.c.size) if arguments['--time']: sel = sel.group_by(lhs.c.time).order_by(lhs.c.time) if arguments['--extension']: sel = sel.group_by(lhs.c.extension).order_by(lhs.c.extension) if arguments['--basename']: sel = sel.group_by(lhs.c.basename).order_by(lhs.c.basename) if arguments['--quick']: sel = sel.group_by(lhs.c.hash_quick).order_by(lhs.c.hash_quick) if arguments['--full']: sel = sel.group_by(lhs.c.hash_total).order_by(lhs.c.hash_total) sel = sel.order_by(lhs.c.path) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHSONLY'}: def get_sel(lhs, rhs): sel = select([lhs.c.path.label('LHSONLY')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'LHSONLYGROUP'}: def get_sel(lhs, rhs): sel = select( [func.group_filepath(lhs.c.path).label('LHSONLYGROUP')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'RHS'}: def get_sel(lhs, rhs): sel = select([rhs.c.path.label('RHS')]) sel = match(sel, lhs, rhs) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'RHSGROUP'}: def get_sel(lhs, rhs): sel = select([func.group_filepath(rhs.c.path).label('RHSGROUP')]) sel = match(sel, lhs, rhs) sel = sel.group_by(lhs.c.path) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'RHSONLY'}: def get_sel(lhs, rhs): sel = select([rhs.c.path.label('RHSONLY')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and hasrhs and args == {'RHSONLYGROUP'}: def get_sel(lhs, rhs): sel = select( [func.group_filepath(rhs.c.path).label('RHSONLYGROUP')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and not hasrhs and args == {'DUPE'}: def get_sel(lhs, rhs): sel = select([lhs.c.path.label('DUPE')]) sel = match(sel, lhs, rhs) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and not hasrhs and args == {'DUPEGROUP'}: def get_sel(lhs, rhs): sel = select([ func.group_filepath(lhs.c.path.distinct()).label('DUPEGROUP') ]) sel = match(sel, lhs, rhs) if arguments['--size']: sel = sel.group_by(lhs.c.size).order_by(lhs.c.size) if arguments['--time']: sel = sel.group_by(lhs.c.time).order_by(lhs.c.time) if arguments['--extension']: sel = sel.group_by(lhs.c.extension).order_by(lhs.c.extension) if arguments['--basename']: sel = sel.group_by(lhs.c.basename).order_by(lhs.c.basename) if arguments['--quick']: sel = sel.group_by(lhs.c.hash_quick).order_by(lhs.c.hash_quick) if arguments['--full']: sel = sel.group_by(lhs.c.hash_total).order_by(lhs.c.hash_total) sel = sel.order_by(lhs.c.path) sel = sel.order_by(rhs.c.path) sel = sel.distinct() return sel elif haslhs and not hasrhs and args == {'UNIQUE'}: def get_sel(lhs, rhs): sel = select([lhs.c.path.label('UNIQUE')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel elif haslhs and not hasrhs and args == {'UNIQUEGROUP'}: def get_sel(lhs, rhs): sel = select( [func.group_filepath(lhs.c.path).label('UNIQUEGROUP')]) sel = sel.where(~exists(match(select(['*']), lhs, rhs))) sel = sel.order_by(lhs.c.path) sel = sel.distinct() return sel else: raise DocoptExit( 'COMMAND does not contain a valid combination of special arguments' ) lhsrwFiles, lhsroFiles = None, None rhsrwFiles, rhsroFiles = None, None if attach: engine = create(None, separator=arguments['--separator']) else: engine, lhsrwFiles, lhsroFiles, rhsroFiles = create_side( arguments['--lhs-db'], arguments['--lhs-update'], arguments['--lhs-path'], separator=arguments['--separator']) with engine_dispose(engine): if attach: if haslhs: lhsrwFiles, lhsroFiles = attach_side(engine, 'lhs', arguments['--lhs-db'], arguments['--lhs-update'], arguments['--lhs-path']) if hasrhs: rhsrwFiles, rhsroFiles = attach_side(engine, 'rhs', arguments['--rhs-db'], arguments['--rhs-update'], arguments['--rhs-path']) if not arguments['--none'] and (lhsrwFiles != None or rhsrwFiles != None): # Do a preliminary comparison lhssel = match(select([lhsroFiles.c.path]), lhsroFiles, rhsroFiles, False) rhssel = match(select([rhsroFiles.c.path]), lhsroFiles, rhsroFiles, False) if arguments['--quick']: lhssel = lhssel.where(lhsroFiles.c.hash_quick == None) rhssel = rhssel.where(rhsroFiles.c.hash_quick == None) if arguments['--full']: lhssel = lhssel.where(lhsroFiles.c.hash_total == None) rhssel = rhssel.where(rhsroFiles.c.hash_total == None) # Update rw table conn = engine.connect() def updaterw(ro, rw, sel): if rw is None: return for result in conn.execute(sel): file = conn.execute(rw.select().where( rw.c.path == result.path)).fetchone() try: stat = os.stat(result.path, follow_symlinks=False) except Exception: hash_quick, hash_total = None, None else: hash_quick, hash_total = hashfile( result.path, stat, arguments['--quick']) if hash_quick != None or hash_total != None: conn.execute( rw.update().where(rw.c.path == result.path).values( size=stat.st_size, time=stat.st_mtime_ns, hash_quick=hash_quick, hash_total=hash_total)) else: conn.execute( rw.delete().where(rw.c.path == result.path)) try: updaterw(lhsroFiles, lhsrwFiles, lhssel) updaterw(rhsroFiles, rhsrwFiles, rhssel) finally: conn.close() # Do the full comparison sel = get_sel(lhsroFiles if lhsrwFiles is None else lhsrwFiles, rhsroFiles if rhsrwFiles is None else rhsrwFiles) conn = engine.connect() try: for result in conn.execute(sel): result = dict(result.items()) resultNormalized = {} for name, group, section in argsSubstitutions: if section == '': # simple resultNormalized[name] = result[name] elif group == '': # single, sectioned path = FilePath(result[name]) resultNormalized[name + ':' + section] = getattr( path, section) else: # grouped, sectioned paths = FilePath.splitpaths(result[name], arguments['--separator']) resultNormalized[name + ':' + section] = FilePath.joinpaths([ getattr(p, section) for p in paths ], arguments['--separator']) result = resultNormalized cmd = [ re.sub( r'\{(%s)\}' % '|'.join( name + (':' + section if section != '' else '') for name, group, section in argsSubstitutions), (lambda match: result[match.group(1)]), arg) for arg in arguments['COMMAND'] ] if arguments['--echo']: print(' '.join(escape_for_shell(arg) for arg in cmd)) if fcapture != None: fcapture(cmd) elif not arguments['--dry-run']: try: subprocess.run(cmd, shell=True, check=True) except subprocess.CalledProcessError as ex: if arguments['--ignore-errors']: print(ex, file=sys.stderr) else: raise ex finally: conn.close()