def find_all_dups(): Trace() dups = [] table = FolderComp.FolderTable query = Expand(r'select distinct foldername from [table] Where original=0') folderNames = sql.execute(query, Flatten=True) for folderName in folderNames: dbg_print(len(dups), folderName) query = Expand(r"select * from [table] Where foldername=?") rows = sql.execute(query, Data=[folderName]) if len(rows) > 1: original_row = rows[0] oid = original_row[0] for dup_row in rows[1:]: result = Compare(original_row[2], dup_row[2]) if result: dups.append([oid, dup_row[0]]) print('') if len(dups): count = sql.update(table, dups, ['original=?'], "WHERE idx=?", Verbose=True) Log('Updated [count] rows') return len(dups)
def SQL_UnitTest(GenerateTestData=False): Trace() data = [ [ 55, 54, 53, 52, 51], [ 55, 44, 43, 42, 41], [ 35, 34, 33, 32, 31], [ 35, 34, 33, 22, 21], [ 15, 14, 13, 12, 11], [ 5, 4, 3, 2, 1], ] columns = [ [ 'col0', 'int' ], [ 'col1', 'int' ], [ 'col2', 'int' ], [ 'col3', 'int' ], [ 'col4', 'int' ], ] primaryKeys = ['col0', 'col1'] sorted = sql.sort_data(data, [0, 1, 2, 3], columns) PrettyPrintList(sorted) for idx in range(0, len(data[0]), 1): UnitTest.Step('Check column [idx]') col = [row[idx] for row in data] sorted = [row[idx] for row in data] sorted.sort() sorted.reverse() UnitTest.Verify(col == sorted, 'Column [idx] is sorted correctly') Log('col: %s' % col) Log('sorted: %s' % sorted) table = 'SQL_UnitTest' if GenerateTestData: jsonData = dictn() jsonData.expectedResultsJson = data UnitTest.SaveExpectedResults(jsonData) UnitTest.Step('Write & Read from table') sql.write_to_table(table, data, columns, Verbose=True) read = sql.select(table) UnitTest.Verify(data == read) UnitTest.Step('Update Tests') row = [ [ 55, 54, 1, 1, 1] ] count = sql.update(table, row, WhereClause=r'Where col0=55 and col1=54', Verbose=True) Log('count=[count]') UnitTest.Verify(count == 1) updated = sql.select(table, WhereClause=r'Where col0=55 and col1=54', Verbose=True) UnitTest.VerifyMatch(updated, row) UnitTest.Step('Write & Read unique data') unique_data = [] unique_data.append(list(range(100, 200, 20))) unique_data.append(list(range(200, 300, 20))) unique_data.append(list(range(300, 400, 20))) unique_data.append(list(range(400, 500, 20))) unique_data.append(list(range(500, 600, 20))) PrettyPrint(unique_data) sql.write_to_table(table, unique_data, columns, PrimaryKey=primaryKeys, UseExistingTable=False, IdentityIndex=False, Verbose=True) read = sql.select(table) PrettyPrint(read) UnitTest.Verify(unique_data == read) UnitTest.Step('Write & Read indexed unique data') sql.write_to_table(table, unique_data, columns, PrimaryKey=None, UseExistingTable=False, IdentityIndex=True, Verbose=True) read = sql.select(table) PrettyPrint(read) [row.insert(0, idx) for idx, row in enumerate(unique_data, 1)] UnitTest.Verify(unique_data == read)
def FindDups(self, Types='[AllMediaTypes]'): Trace(self.Folder, Types) if not sql.tables(self.Table): return [] self.Verbose = True def FindDupsInSet(rowSet): dups = [] foundIdx = [] foundPathNames = [] for idx, left in enumerate(rowSet): idxLeft = left[0] pathLeft = left[2] for right in rowSet[idx + 1 : ]: idxRight = right[0] if idxRight in foundIdx: continue pathRight = right[2] if filecmp.cmp(pathLeft, pathRight, False): dups.append([idxLeft, idxRight]) foundIdx.append(idxRight) foundPathNames.append(pathRight) return dups, foundPathNames rows = self.select_rows('', Types, SortColumns=['size', 'modified_date ASC']) results = [] dups = [] found = 0 Log('Total rows: %d' % (len(rows))) print(' Idx Dups Size') rowSet = [] allSets = [rowSet] prev_size = 0 for idx, row in enumerate(rows): print('\r%5d %5d' % (idx, found), end=' ') filepath = row[2] if not os.path.exists(filepath): Log('Error missing file: [filepath]') continue size = row[4] if idx == 0: prev_size = size if size == prev_size: rowSet.append(row) else: if len(rowSet): rowSet = [] allSets.append(rowSet) prev_size = size print('') for rowSet in allSets: if not len(rowSet): continue dupsRowSet, pathsRowSet = FindDupsInSet(rowSet) dups.extend(dupsRowSet) results.extend(pathsRowSet) found += len(dupsRowSet) Log(r'Found %d duplicates' % (len(dups))) updated = sql.update(self.Table, dups, ['original=?'], "WHERE idx=?", Verbose=self.Verbose) Log(r'Updated %d duplicate rows' % (updated)) return results