def format_diff(a: Table, b: Table, index: List[str] = []) -> str: flags = daff.CompareFlags() for name in index: flags.addPrimaryKey(name) diff = daff.Coopy.diff(a, b, flags=flags) output = daff.TerminalDiffRender().render(diff) return output
def diff_tsv(self, other, filename, exclude_fields): # We will store our variables in these lists with open(os.path.join(self.path, filename)) as self_file: self_dataframe = pandas.read_csv(self_file, delimiter='\t') self_dataframe_filtered = self_dataframe.drop(filter( lambda field: field in self_dataframe.columns, exclude_fields), axis=1) self_list = [self_dataframe_filtered.columns.tolist() ] + self_dataframe_filtered.values.tolist() self_data = daff.PythonTableView(self_list) with open(os.path.join(other.path, filename)) as other_file: other_dataframe = pandas.read_csv(other_file, delimiter='\t') other_dataframe_filtered = other_dataframe.drop(filter( lambda field: field in other_dataframe.columns, exclude_fields), axis=1) other_list = [other_dataframe_filtered.columns.tolist() ] + other_dataframe_filtered.values.tolist() other_data = daff.PythonTableView(other_list) alignment = daff.Coopy.compareTables(self_data, other_data).align() result = daff.PythonTableView([]) diff = daff.TableDiff(alignment, daff.CompareFlags()) diff.hilite(result) if diff.hasDifference(): return daff.TerminalDiffRender().render(result) else: return None
def get_diff(self, filename1, filename2): #print("get_diff", filename1, filename2) ext = filename1.split(".")[-1].lower() if ext not in ['csv', 'tsv', 'xls']: return None csvs = {} for f in [filename1, filename2]: # print("Loading file", f) table_set = self.read_file(f) if table_set is None: raise Exception("Invalid table set") row_set = table_set.tables[0] #print("Guessing headers") offset, headers = headers_guess(row_set.sample) row_set.register_processor(headers_processor(headers)) row_set.register_processor(offset_processor(offset + 1)) # Output of rowset is a structure csvs[f] = [headers] for row in row_set: csvs[f].append([r.value for r in row]) #print(csvs[f][:3]) # Loaded csv1 and csv2 table1 = daff.PythonTableView(csvs[filename1]) table2 = daff.PythonTableView(csvs[filename2]) alignment = daff.Coopy.compareTables(table1, table2).align() # print("Achieved alignment") data_diff = [] table_diff = daff.PythonTableView(data_diff) flags = daff.CompareFlags() highlighter = daff.TableDiff(alignment, flags) highlighter.hilite(table_diff) # Parse the differences #print("Parsing diff") diff = self.parse_diff(table_diff) #print("Computed diff", diff) return diff
def process_ids(prev_file, curr_file, prev_id_file, id_file): io = daff.TableIO() dapp = daff.Coopy(io) if not os.path.exists(prev_file): prev_file = curr_file v1 = dapp.loadTable(prev_file, 'local') v2 = dapp.loadTable(curr_file, 'remote') flags = daff.CompareFlags() flags.allow_nested_cells = True alignment = daff.compareTables3(None, v1, v2, flags).align() daff.TableDiff(alignment, flags).hiliteSingle(daff.SimpleTable(0, 0)) if os.path.exists(prev_id_file): in_refs = json.load(open(prev_id_file)) else: in_refs = {} out_refs = {} for part in alignment.comp.child_order: comp = alignment.comp.children.get(part) nalignment = comp.alignment order = nalignment.toOrder().getList() v1 = comp.a v2 = comp.b ref = in_refs.get(part, {}) if part not in out_refs: out_ref = out_refs[part] = {} mints = 0 copies = 0 drops = 0 for o in order: if o.r == 0: continue if o.r >= 0 and o.l >= 0: src = ref.get(str(o.l)) if src is None: out_ref[o.r] = str(uuid.uuid4()) mints += 1 else: out_ref[o.r] = ref[str(o.l)] copies += 1 if o.r < 0 and o.l >= 0: drops += 1 if o.r >= 0 and o.l < 0: out_ref[o.r] = str(uuid.uuid4()) mints += 1 json.dump(out_refs, open(id_file, 'w'), indent=2) return out_refs
['Ireland','Dublin'], ['France','Paris'], ['Spain','Barcelona'] ] data2 = [ ['Country','Code','Capital'], ['Ireland','ie','Dublin'], ['France','fr','Paris'], ['Spain','es','Madrid'], ['Germany','de','Berlin'] ] table1 = daff.PythonTableView(data1) table2 = daff.PythonTableView(data2) alignment = daff.Coopy.compareTables(table1,table2).align() data_diff = [] table_diff = PythonTableView(data_diff) flags = daff.CompareFlags() highlighter = daff.TableDiff(alignment,flags) highlighter.hilite(table_diff) diff2html = daff.DiffRender() diff2html.usePrettyArrows(False) diff2html.render(table_diff) table_diff_html = diff2html.html() print(table_diff_html)