def compare(file1, file2, rnames=False): num_errors = 0 with open(file1) as infile1: with open(file2) as infile2: header1 = infile1.readline().strip().split('\t') header2 = infile2.readline().strip().split('\t') if len(header1) != len(header2): raise Exception('Num clusters do not match') inrows1 = [line.strip().split('\t') for line in infile1] inrows2 = [line.strip().split('\t') for line in infile2] data1 = { util.make_rname(line[0], rnames): set(line[1:]) for line in inrows1 } data2 = { util.make_rname(line[0], rnames): set(line[1:]) for line in inrows2 } if len(data1) != len(data2): raise Exception('Numbers of entries does not match') if set(data1.keys()) != set(data2.keys()): for key1 in data1: if key1 not in data2: print "Key '%s' not found" % key1 raise Exception('Keys do not match') for key in data1: if data1[key] != data2[key]: print("data for key '%s' does not match" % key) for d1 in data1[key]: if d1 not in data2[key]: num_errors += 1 if num_errors == 0: print "done, everything matches" else: print "%d errors found" % num_errors
def compare(file1, file2, rnames=False): num_errors = 0 with open(file1) as infile1: with open(file2) as infile2: header1 = infile1.readline().strip().split('\t') header2 = infile2.readline().strip().split('\t') if len(header1) != len(header2): raise Exception('Num clusters do not match') inrows1 = [line.strip().split('\t') for line in infile1] inrows2 = [line.strip().split('\t') for line in infile2] data1 = {util.make_rname(line[0], rnames): set(line[1:]) for line in inrows1} data2 = {util.make_rname(line[0], rnames): set(line[1:]) for line in inrows2} if len(data1) != len(data2): raise Exception('Numbers of entries does not match') if set(data1.keys()) != set(data2.keys()): for key1 in data1: if key1 not in data2: print "Key '%s' not found" % key1 raise Exception('Keys do not match') for key in data1: if data1[key] != data2[key]: print("data for key '%s' does not match" % key) for d1 in data1[key]: if d1 not in data2[key]: num_errors += 1 if num_errors == 0: print "done, everything matches" else: print "%d errors found" % num_errors
def compare(file1, file2, verbose, rnames, mapheaders, eps=EPS): def tofloat(x): return float('nan') if x == 'NA' else float(x) num_errors = 0 num_correct = 0 with open(file1) as infile1: with open(file2) as infile2: header1 = infile1.readline().strip().split('\t') header2 = infile2.readline().strip().split('\t') if len(header1) != len(header2): raise Exception('Num clusters do not match') inrows1 = [line.strip().split('\t') for line in infile1] inrows2 = [line.strip().split('\t') for line in infile2] data1 = {util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows1 } data2 = {util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows2 } if len(data1) != len(data2): raise Exception('Numbers of entries does not match') if set(data1.keys()) != set(data2.keys()): print data1.keys() print data2.keys() raise Exception('Keys do not match') for key in data1: values1 = data1[key] values2 = data2[key] #print "VALUES1 = ", values1 #print "VALUES2 = ", values2 if len(values1) != len(values2): raise Exception("data for key '%s' does not have the same length" % key) for i1 in range(len(header1)): if mapheaders: i2 = header2.index(header1[i1]) else: i2 = i1 if math.isnan(values1[i1]) and math.isnan(values2[i2]): continue elif math.isnan(values1[i1]) and not math.isnan(values2[i2]): if verbose: print "[%s, %d]: NaN != %.13f" % (key, i1, values2[i2]) num_errors += 1 elif not math.isnan(values1[i1]) and math.isnan(values2[i2]): if verbose: print "[%s, %d]: %.13f != NaN" % (key, i1, values1[i1]) num_errors += 1 elif abs(values1[i1] - values2[i2]) > eps: if verbose: print "[%s, %d/%d]: %.13f != %.13f" % (key, i1, i2, values1[i1], values2[i2]) num_errors += 1 #raise Exception("key '%s' col %d mismatch (%f != %f)" % (key, i, values1[i], values2[i])) else: num_correct += 1 return num_correct, num_errors
def compare(file1, file2, verbose, rnames, mapheaders, eps=EPS): def tofloat(x): return float('nan') if x == 'NA' else float(x) num_errors = 0 num_correct = 0 with open(file1) as infile1: with open(file2) as infile2: header1 = infile1.readline().strip().split('\t') header2 = infile2.readline().strip().split('\t') if len(header1) != len(header2): raise Exception('Num clusters do not match') inrows1 = [line.strip().split('\t') for line in infile1] inrows2 = [line.strip().split('\t') for line in infile2] data1 = { util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows1 } data2 = { util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows2 } if len(data1) != len(data2): raise Exception('Numbers of entries does not match') if set(data1.keys()) != set(data2.keys()): print data1.keys() print data2.keys() raise Exception('Keys do not match') for key in data1: values1 = data1[key] values2 = data2[key] #print "VALUES1 = ", values1 #print "VALUES2 = ", values2 if len(values1) != len(values2): raise Exception( "data for key '%s' does not have the same length" % key) for i1 in range(len(header1)): if mapheaders: i2 = header2.index(header1[i1]) else: i2 = i1 if math.isnan(values1[i1]) and math.isnan(values2[i2]): continue elif math.isnan( values1[i1]) and not math.isnan(values2[i2]): if verbose: print "[%s, %d]: NaN != %.13f" % (key, i1, values2[i2]) num_errors += 1 elif not math.isnan(values1[i1]) and math.isnan( values2[i2]): if verbose: print "[%s, %d]: %.13f != NaN" % (key, i1, values1[i1]) num_errors += 1 elif abs(values1[i1] - values2[i2]) > eps: if verbose: print "[%s, %d/%d]: %.13f != %.13f" % ( key, i1, i2, values1[i1], values2[i2]) num_errors += 1 #raise Exception("key '%s' col %d mismatch (%f != %f)" % (key, i, values1[i], values2[i])) else: num_correct += 1 return num_correct, num_errors