Ejemplo n.º 1
0
def compare(file1, file2, rnames=False):
    num_errors = 0
    with open(file1) as infile1:
        with open(file2) as infile2:
            header1 = infile1.readline().strip().split('\t')
            header2 = infile2.readline().strip().split('\t')
            if len(header1) != len(header2):
                raise Exception('Num clusters do not match')
            inrows1 = [line.strip().split('\t') for line in infile1]
            inrows2 = [line.strip().split('\t') for line in infile2]
            data1 = {
                util.make_rname(line[0], rnames): set(line[1:])
                for line in inrows1
            }
            data2 = {
                util.make_rname(line[0], rnames): set(line[1:])
                for line in inrows2
            }
            if len(data1) != len(data2):
                raise Exception('Numbers of entries does not match')
            if set(data1.keys()) != set(data2.keys()):
                for key1 in data1:
                    if key1 not in data2:
                        print "Key '%s' not found" % key1
                raise Exception('Keys do not match')
            for key in data1:
                if data1[key] != data2[key]:
                    print("data for key '%s' does not match" % key)
                    for d1 in data1[key]:
                        if d1 not in data2[key]:
                            num_errors += 1
    if num_errors == 0:
        print "done, everything matches"
    else:
        print "%d errors found" % num_errors
def compare(file1, file2, rnames=False):
    num_errors = 0
    with open(file1) as infile1:
        with open(file2) as infile2:
            header1 = infile1.readline().strip().split('\t')
            header2 = infile2.readline().strip().split('\t')
            if len(header1) != len(header2):
                raise Exception('Num clusters do not match')
            inrows1 = [line.strip().split('\t') for line in infile1]
            inrows2 = [line.strip().split('\t') for line in infile2]
            data1 = {util.make_rname(line[0], rnames): set(line[1:]) for line in inrows1}
            data2 = {util.make_rname(line[0], rnames): set(line[1:]) for line in inrows2}
            if len(data1) != len(data2):
                raise Exception('Numbers of entries does not match')
            if set(data1.keys()) != set(data2.keys()):
                for key1 in data1:
                    if key1 not in data2:
                        print "Key '%s' not found" % key1
                raise Exception('Keys do not match')
            for key in data1:
                if data1[key] != data2[key]:
                    print("data for key '%s' does not match" % key)
                    for d1 in data1[key]:
                        if d1 not in data2[key]:
                            num_errors += 1
    if num_errors == 0:
        print "done, everything matches"
    else:
        print "%d errors found" % num_errors
def compare(file1, file2, verbose, rnames, mapheaders, eps=EPS):
    def tofloat(x):
        return float('nan') if x == 'NA' else float(x)

    num_errors = 0
    num_correct = 0
    
    with open(file1) as infile1:
        with open(file2) as infile2:
            header1 = infile1.readline().strip().split('\t')
            header2 = infile2.readline().strip().split('\t')
            if len(header1) != len(header2):
                raise Exception('Num clusters do not match')
            inrows1 = [line.strip().split('\t') for line in infile1]
            inrows2 = [line.strip().split('\t') for line in infile2]
            data1 = {util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows1 }
            data2 = {util.make_rname(line[0], rnames): map(tofloat, line[1:]) for line in inrows2 }
            if len(data1) != len(data2):
                raise Exception('Numbers of entries does not match')
            if set(data1.keys()) != set(data2.keys()):
                print data1.keys()
                print data2.keys()
                raise Exception('Keys do not match')
            for key in data1:
                values1 = data1[key]
                values2 = data2[key]
                #print "VALUES1 = ", values1
                #print "VALUES2 = ", values2
                if len(values1) != len(values2):
                    raise Exception("data for key '%s' does not have the same length" % key)
                for i1 in range(len(header1)):
                    if mapheaders:
                        i2 = header2.index(header1[i1])
                    else:
                        i2 = i1
                    if math.isnan(values1[i1]) and  math.isnan(values2[i2]):
                        continue
                    elif math.isnan(values1[i1]) and not math.isnan(values2[i2]):
                        if verbose:
                            print "[%s, %d]: NaN != %.13f" % (key, i1, values2[i2])
                        num_errors += 1
                    elif not math.isnan(values1[i1]) and math.isnan(values2[i2]):
                        if verbose:
                            print "[%s, %d]: %.13f != NaN" % (key, i1, values1[i1])
                        num_errors += 1
                    elif abs(values1[i1] - values2[i2]) > eps:
                        if verbose:
                            print "[%s, %d/%d]: %.13f != %.13f" % (key, i1, i2,
                                                                   values1[i1], values2[i2])
                        num_errors += 1
                        #raise Exception("key '%s' col %d mismatch (%f != %f)" % (key, i, values1[i], values2[i]))
                    else:
                        num_correct += 1

    return num_correct, num_errors
Ejemplo n.º 4
0
def compare(file1, file2, verbose, rnames, mapheaders, eps=EPS):
    def tofloat(x):
        return float('nan') if x == 'NA' else float(x)

    num_errors = 0
    num_correct = 0

    with open(file1) as infile1:
        with open(file2) as infile2:
            header1 = infile1.readline().strip().split('\t')
            header2 = infile2.readline().strip().split('\t')
            if len(header1) != len(header2):
                raise Exception('Num clusters do not match')
            inrows1 = [line.strip().split('\t') for line in infile1]
            inrows2 = [line.strip().split('\t') for line in infile2]
            data1 = {
                util.make_rname(line[0], rnames): map(tofloat, line[1:])
                for line in inrows1
            }
            data2 = {
                util.make_rname(line[0], rnames): map(tofloat, line[1:])
                for line in inrows2
            }
            if len(data1) != len(data2):
                raise Exception('Numbers of entries does not match')
            if set(data1.keys()) != set(data2.keys()):
                print data1.keys()
                print data2.keys()
                raise Exception('Keys do not match')
            for key in data1:
                values1 = data1[key]
                values2 = data2[key]
                #print "VALUES1 = ", values1
                #print "VALUES2 = ", values2
                if len(values1) != len(values2):
                    raise Exception(
                        "data for key '%s' does not have the same length" %
                        key)
                for i1 in range(len(header1)):
                    if mapheaders:
                        i2 = header2.index(header1[i1])
                    else:
                        i2 = i1
                    if math.isnan(values1[i1]) and math.isnan(values2[i2]):
                        continue
                    elif math.isnan(
                            values1[i1]) and not math.isnan(values2[i2]):
                        if verbose:
                            print "[%s, %d]: NaN != %.13f" % (key, i1,
                                                              values2[i2])
                        num_errors += 1
                    elif not math.isnan(values1[i1]) and math.isnan(
                            values2[i2]):
                        if verbose:
                            print "[%s, %d]: %.13f != NaN" % (key, i1,
                                                              values1[i1])
                        num_errors += 1
                    elif abs(values1[i1] - values2[i2]) > eps:
                        if verbose:
                            print "[%s, %d/%d]: %.13f != %.13f" % (
                                key, i1, i2, values1[i1], values2[i2])
                        num_errors += 1
                        #raise Exception("key '%s' col %d mismatch (%f != %f)" % (key, i, values1[i], values2[i]))
                    else:
                        num_correct += 1

    return num_correct, num_errors