Beispiel #1
0
    def diff_tsv(self, other, filename, exclude_fields):
        # We will store our variables in these lists
        with open(os.path.join(self.path, filename)) as self_file:
            self_dataframe = pandas.read_csv(self_file, delimiter='\t')
            self_dataframe_filtered = self_dataframe.drop(filter(
                lambda field: field in self_dataframe.columns, exclude_fields),
                                                          axis=1)
            self_list = [self_dataframe_filtered.columns.tolist()
                         ] + self_dataframe_filtered.values.tolist()
            self_data = daff.PythonTableView(self_list)

        with open(os.path.join(other.path, filename)) as other_file:
            other_dataframe = pandas.read_csv(other_file, delimiter='\t')
            other_dataframe_filtered = other_dataframe.drop(filter(
                lambda field: field in other_dataframe.columns,
                exclude_fields),
                                                            axis=1)
            other_list = [other_dataframe_filtered.columns.tolist()
                          ] + other_dataframe_filtered.values.tolist()
            other_data = daff.PythonTableView(other_list)

        alignment = daff.Coopy.compareTables(self_data, other_data).align()
        result = daff.PythonTableView([])

        diff = daff.TableDiff(alignment, daff.CompareFlags())
        diff.hilite(result)

        if diff.hasDifference():
            return daff.TerminalDiffRender().render(result)
        else:
            return None
Beispiel #2
0
    def get_diff(self, filename1, filename2):

        #print("get_diff", filename1, filename2)

        ext = filename1.split(".")[-1].lower()
        if ext not in ['csv', 'tsv', 'xls']:
            return None

        csvs = {}
        for f in [filename1, filename2]:
            # print("Loading file", f)
            table_set = self.read_file(f)
            if table_set is None:
                raise Exception("Invalid table set")
            row_set = table_set.tables[0]
            #print("Guessing headers")
            offset, headers = headers_guess(row_set.sample)
            row_set.register_processor(headers_processor(headers))
            row_set.register_processor(offset_processor(offset + 1))

            # Output of rowset is a structure
            csvs[f] = [headers]
            for row in row_set:
                csvs[f].append([r.value for r in row])

            #print(csvs[f][:3])

        # Loaded csv1 and csv2
        table1 = daff.PythonTableView(csvs[filename1])
        table2 = daff.PythonTableView(csvs[filename2])

        alignment = daff.Coopy.compareTables(table1, table2).align()

        # print("Achieved alignment")

        data_diff = []
        table_diff = daff.PythonTableView(data_diff)

        flags = daff.CompareFlags()
        highlighter = daff.TableDiff(alignment, flags)
        highlighter.hilite(table_diff)

        # Parse the differences
        #print("Parsing diff")
        diff = self.parse_diff(table_diff)

        #print("Computed diff", diff)
        return diff
Beispiel #3
0
def process_ids(prev_file, curr_file, prev_id_file, id_file):
    io = daff.TableIO()
    dapp = daff.Coopy(io)
    if not os.path.exists(prev_file):
        prev_file = curr_file
    v1 = dapp.loadTable(prev_file, 'local')
    v2 = dapp.loadTable(curr_file, 'remote')
    flags = daff.CompareFlags()
    flags.allow_nested_cells = True
    alignment = daff.compareTables3(None, v1, v2, flags).align()
    daff.TableDiff(alignment, flags).hiliteSingle(daff.SimpleTable(0, 0))
    if os.path.exists(prev_id_file):
        in_refs = json.load(open(prev_id_file))
    else:
        in_refs = {}
    out_refs = {}
    for part in alignment.comp.child_order:
        comp = alignment.comp.children.get(part)
        nalignment = comp.alignment
        order = nalignment.toOrder().getList()
        v1 = comp.a
        v2 = comp.b
        ref = in_refs.get(part, {})
        if part not in out_refs:
            out_ref = out_refs[part] = {}
        mints = 0
        copies = 0
        drops = 0
        for o in order:
            if o.r == 0:
                continue
            if o.r >= 0 and o.l >= 0:
                src = ref.get(str(o.l))
                if src is None:
                    out_ref[o.r] = str(uuid.uuid4())
                    mints += 1
                else:
                    out_ref[o.r] = ref[str(o.l)]
                    copies += 1
            if o.r < 0 and o.l >= 0:
                drops += 1
            if o.r >= 0 and o.l < 0:
                out_ref[o.r] = str(uuid.uuid4())
                mints += 1
    json.dump(out_refs, open(id_file, 'w'), indent=2)
    return out_refs
Beispiel #4
0
  ['Ireland','Dublin'],
  ['France','Paris'],
  ['Spain','Barcelona']
  ]

data2 = [
  ['Country','Code','Capital'],
  ['Ireland','ie','Dublin'],
  ['France','fr','Paris'],
  ['Spain','es','Madrid'],
  ['Germany','de','Berlin']
  ]

table1 = daff.PythonTableView(data1)
table2 = daff.PythonTableView(data2)

alignment = daff.Coopy.compareTables(table1,table2).align()

data_diff = []
table_diff = PythonTableView(data_diff)

flags = daff.CompareFlags()
highlighter = daff.TableDiff(alignment,flags)
highlighter.hilite(table_diff)

diff2html = daff.DiffRender()
diff2html.usePrettyArrows(False)
diff2html.render(table_diff)
table_diff_html = diff2html.html()
print(table_diff_html)
Beispiel #5
0
import daff
import sqlite3

db = sqlite3.connect(':memory:')
c = db.cursor()

c.execute("CREATE TABLE ver1 (id INTEGER PRIMARY KEY, name TEXT)")
c.execute("CREATE TABLE ver2 (id INTEGER PRIMARY KEY, name TEXT)")
data = [(1, "Paul"), (2, "Naomi"), (4, "Hobbes")]
c.executemany('INSERT INTO ver1 VALUES (?,?)', data)
data = [(2, "Noemi"), (3, "Calvin"), (4, "Hobbes")]
c.executemany('INSERT INTO ver2 VALUES (?,?)', data)

sd = daff.SqliteDatabase(db, None)

st1 = daff.SqlTable(sd, daff.SqlTableName("ver1"))
st2 = daff.SqlTable(sd, daff.SqlTableName("ver2"))

sc = daff.SqlCompare(sd, st1, st2, None)

align = sc.apply()

flags = daff.CompareFlags()
td = daff.TableDiff(align, flags)
out = daff.PythonTableView([])
td.hilite(out)

target = daff.PythonTableView([['@@', 'id', 'name'], ['+++', 3, 'Calvin'],
                               ['->', 2, 'Naomi->Noemi'], ['---', 1, 'Paul']])
assert (target.isSimilar(out))