def diff_tsv(self, other, filename, exclude_fields): # We will store our variables in these lists with open(os.path.join(self.path, filename)) as self_file: self_dataframe = pandas.read_csv(self_file, delimiter='\t') self_dataframe_filtered = self_dataframe.drop(filter( lambda field: field in self_dataframe.columns, exclude_fields), axis=1) self_list = [self_dataframe_filtered.columns.tolist() ] + self_dataframe_filtered.values.tolist() self_data = daff.PythonTableView(self_list) with open(os.path.join(other.path, filename)) as other_file: other_dataframe = pandas.read_csv(other_file, delimiter='\t') other_dataframe_filtered = other_dataframe.drop(filter( lambda field: field in other_dataframe.columns, exclude_fields), axis=1) other_list = [other_dataframe_filtered.columns.tolist() ] + other_dataframe_filtered.values.tolist() other_data = daff.PythonTableView(other_list) alignment = daff.Coopy.compareTables(self_data, other_data).align() result = daff.PythonTableView([]) diff = daff.TableDiff(alignment, daff.CompareFlags()) diff.hilite(result) if diff.hasDifference(): return daff.TerminalDiffRender().render(result) else: return None
def get_diff(self, filename1, filename2): #print("get_diff", filename1, filename2) ext = filename1.split(".")[-1].lower() if ext not in ['csv', 'tsv', 'xls']: return None csvs = {} for f in [filename1, filename2]: # print("Loading file", f) table_set = self.read_file(f) if table_set is None: raise Exception("Invalid table set") row_set = table_set.tables[0] #print("Guessing headers") offset, headers = headers_guess(row_set.sample) row_set.register_processor(headers_processor(headers)) row_set.register_processor(offset_processor(offset + 1)) # Output of rowset is a structure csvs[f] = [headers] for row in row_set: csvs[f].append([r.value for r in row]) #print(csvs[f][:3]) # Loaded csv1 and csv2 table1 = daff.PythonTableView(csvs[filename1]) table2 = daff.PythonTableView(csvs[filename2]) alignment = daff.Coopy.compareTables(table1, table2).align() # print("Achieved alignment") data_diff = [] table_diff = daff.PythonTableView(data_diff) flags = daff.CompareFlags() highlighter = daff.TableDiff(alignment, flags) highlighter.hilite(table_diff) # Parse the differences #print("Parsing diff") diff = self.parse_diff(table_diff) #print("Computed diff", diff) return diff
def process_ids(prev_file, curr_file, prev_id_file, id_file): io = daff.TableIO() dapp = daff.Coopy(io) if not os.path.exists(prev_file): prev_file = curr_file v1 = dapp.loadTable(prev_file, 'local') v2 = dapp.loadTable(curr_file, 'remote') flags = daff.CompareFlags() flags.allow_nested_cells = True alignment = daff.compareTables3(None, v1, v2, flags).align() daff.TableDiff(alignment, flags).hiliteSingle(daff.SimpleTable(0, 0)) if os.path.exists(prev_id_file): in_refs = json.load(open(prev_id_file)) else: in_refs = {} out_refs = {} for part in alignment.comp.child_order: comp = alignment.comp.children.get(part) nalignment = comp.alignment order = nalignment.toOrder().getList() v1 = comp.a v2 = comp.b ref = in_refs.get(part, {}) if part not in out_refs: out_ref = out_refs[part] = {} mints = 0 copies = 0 drops = 0 for o in order: if o.r == 0: continue if o.r >= 0 and o.l >= 0: src = ref.get(str(o.l)) if src is None: out_ref[o.r] = str(uuid.uuid4()) mints += 1 else: out_ref[o.r] = ref[str(o.l)] copies += 1 if o.r < 0 and o.l >= 0: drops += 1 if o.r >= 0 and o.l < 0: out_ref[o.r] = str(uuid.uuid4()) mints += 1 json.dump(out_refs, open(id_file, 'w'), indent=2) return out_refs
['Ireland','Dublin'], ['France','Paris'], ['Spain','Barcelona'] ] data2 = [ ['Country','Code','Capital'], ['Ireland','ie','Dublin'], ['France','fr','Paris'], ['Spain','es','Madrid'], ['Germany','de','Berlin'] ] table1 = daff.PythonTableView(data1) table2 = daff.PythonTableView(data2) alignment = daff.Coopy.compareTables(table1,table2).align() data_diff = [] table_diff = PythonTableView(data_diff) flags = daff.CompareFlags() highlighter = daff.TableDiff(alignment,flags) highlighter.hilite(table_diff) diff2html = daff.DiffRender() diff2html.usePrettyArrows(False) diff2html.render(table_diff) table_diff_html = diff2html.html() print(table_diff_html)
import daff import sqlite3 db = sqlite3.connect(':memory:') c = db.cursor() c.execute("CREATE TABLE ver1 (id INTEGER PRIMARY KEY, name TEXT)") c.execute("CREATE TABLE ver2 (id INTEGER PRIMARY KEY, name TEXT)") data = [(1, "Paul"), (2, "Naomi"), (4, "Hobbes")] c.executemany('INSERT INTO ver1 VALUES (?,?)', data) data = [(2, "Noemi"), (3, "Calvin"), (4, "Hobbes")] c.executemany('INSERT INTO ver2 VALUES (?,?)', data) sd = daff.SqliteDatabase(db, None) st1 = daff.SqlTable(sd, daff.SqlTableName("ver1")) st2 = daff.SqlTable(sd, daff.SqlTableName("ver2")) sc = daff.SqlCompare(sd, st1, st2, None) align = sc.apply() flags = daff.CompareFlags() td = daff.TableDiff(align, flags) out = daff.PythonTableView([]) td.hilite(out) target = daff.PythonTableView([['@@', 'id', 'name'], ['+++', 3, 'Calvin'], ['->', 2, 'Naomi->Noemi'], ['---', 1, 'Paul']]) assert (target.isSimilar(out))