def getDegreeComparison(prefix1, prefix2, k, l):
    format_float = '%.5f'

    from runcluster import filterWithI
    results = filterWithI(prefix1, k, l, 'out')
    results['g0-similarities'] = [map(lambda x: x[i], 
                                      results['g0-similarities']) 
                                  for i in range(2)] 

    WORDS = '[a-zA-Z]+'
    SPACES = '[\s]+'
    NUMBERS = '[\d]+'
    SIM = '[+-\.\d]+'

    f = open('data/%s/data.out.clustering.%s.output' % 
             (prefix2, k), 'r')
    g = open('data/%s/data.out.clustering.%s' % (prefix2, k), 'r')

    clusters1 = results['new_clusters']
    clusters2 = g.read().split()

    cov = [[0] * int(k) for i in range(int(k))]
    for i, j in zip(clusters1, clusters2):
        cov[int(i)][int(j)] += 1
    
    total = results['total']
    total_nodes = sum(total)
    for i in cov:
        s = float(sum(i))
        for j in range(len(i)):
            i[j] /= s
            i[j] -= (total[j]/float(total_nodes))

    fractions = [j/float(total_nodes) for j in total]

    matches = []
    category_regex = ('[\s]*' + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) +
                      SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) +
                      SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + 
                      SPACES + '|' + SPACES + '\n' )

    stats_regex = (SPACES + '(%s)' % (NUMBERS) + SPACES + '(%s)' % (NUMBERS) +
                   SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) +
                   SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) + 
                   SPACES + '|' + SPACES + '\n' )
    category = None
    for x in f:
        y = match(stats_regex, x)
        if y:
            matches.append(list(y.groups()))
        else:
            y = match(category_regex, x)
            if y:
                category = list(y.groups())

    g0_similarities = map(lambda x: [x], total)
    map(lambda x: x[0].extend(x[1]), 
        zip(g0_similarities, 
            transpose(results['g0-similarities'])))
    tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))]
    tmp = sorted(tmp, key=lambda x: -x[1][0])
    g0_similarities = map(lambda x: x[1], tmp)
    g0_v_category = map(lambda x: x[0], tmp)

    for i in g0_similarities:
        for j in range(2, len(i)):
            sign = '-'
            if i[j] > 0:
                sign = '+'
            i[j] = sign + format_float % i[j]

    g0_category = list(category)
    g0_category.pop(3)
    g0_category.pop(4)

    print_cov = []
    for i in cov:
        lst = [i[0]]
        for j in i[1:]:
            lst.append(format_float % j)
        print_cov.append(lst)

    components = [form.Table('Ref stats', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(int(k)),
                             v_category=['in-stats', 'out-stats', 'total-nodes'],
                             pairs=[results['in-stats'], results['out-stats'],
                                    results['total']]),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G0 Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=g0_category, v_category=g0_v_category,
                             pairs=g0_similarities),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G%s Similarities' % (l), use_id=True, 
                             border="1", cellspacing="0", cellpadding="3",
                             v_category=range(len(matches)),
                             h_category=category, pairs = matches),
                  form.tr(''),
                  form.Table('Cov', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(int(k)), pairs = print_cov),
                  form.tr(''),
                  ]

    j = 0
    for i in cov:
        r = sorted(zip(i, range(int(k))), key=lambda x: -x[0])
        pairs = map(lambda x: format_float % (x[0]), r)
        cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r)
        
        components.append(form.Table('cov%d' % (j), border="1", 
                                     cellspacing="0", cellpadding="3",
                                     h_category=cate, v_category=[j],
                                     pairs=[pairs],
                                     use_id=True))
        j += 1
        components.append(form.tr(''))
        
    variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=%s";\n' % (prefix1, prefix2, k, l)
    js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@scluster");}\n')

    return apply(form.Form, components, {'js':js})()
def getMergeComparison(prefix):
    format_float = '%.5f'

    from runcluster import filterWithI
    results = filterWithI(prefix, k, 0, 'abstract.matrix')

    f = open('data/%s/data.out.clustering.%s.output' % (prefix, k), 'r')
    g = open('data/%s/data.abstract.matrix.clustering.%s.output' % (prefix, k), 'r')

    s_stats = f.read().split('\n')
    v_stats = g.read().split('\n')

    (s_matches, category) = getMatches(s_stats)
    (v_matches, _) = getMatches(v_stats)

    f.close()
    g.close()

    f = open('data/%s/data.out.clustering.%s' % (prefix, k), 'r')
    g = open('data/%s/data.abstract.matrix.clustering.%s' % (prefix, k), 'r')

    clusters1 = f.read().split()
    clusters2 = g.read().split()

    total1 = [0] * (int(k))
    total2 = [0] * (int(k))

    for i in clusters1:
        total1[int(i)] += 1

    for i in clusters2:
        total2[int(i)] += 1

    total = results['total']

    cov = [[0] * (int(k)) for i in range(int(k))]
    for i, j in zip(clusters1, clusters2):
        cov[int(i)][int(j)] += 1
    
    total_nodes = sum(total)

    cov1 = []
    for i in cov:
        s = float(sum(i))
        lst = []
        for j in range(len(i)):
            lst.append(i[j] / s)
            i[j] /= s
            i[j] -= (total[j]/float(total_nodes))
        cov1.append(lst)

    fractions = [j/float(total_nodes) for j in total]

    print 'total', total
    g0_similarities = map(lambda x: [x], total)
    map(lambda x: x[0].extend(x[1]), 
        zip(g0_similarities, 
            results['g0-similarities']))
    tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))]
    tmp = sorted(tmp, key=lambda x: -x[1][0])
    g0_similarities = map(lambda x: x[1], tmp)
    g0_v_category = map(lambda x: x[0], tmp)

    for i in g0_similarities:
        for j in range(2, len(i)):
            sign = '-'
            if i[j] > 0:
                sign = '+'
            i[j] = sign + format_float % i[j]

    g0_category = list(category)
    g0_category.pop(3)
    g0_category.pop(4)
    g0_category.pop(0)

    print_cov = []
    for i in cov:
        lst = []
        for j in i:
            lst.append(format_float % j)
        print_cov.append(lst)

    components = [#form.Table('Ref stats', use_id=True, border="1", 
                  #           cellspacing="0", cellpadding="3",
                  #           h_category=range(int(k)),
                  #           v_category=['in-stats', 'out-stats', 'total-nodes'],
                  #           pairs=[results['in-stats'], results['out-stats'],
                  #                  results['total']]),
                  #form.tr(''),
                  #form.tr(''),
                  form.Table('SCluster Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=category, v_category=range(len(s_matches)),
                             pairs=s_matches),
                  form.tr(''),
                  form.tr(''),
                  form.Table('VCluster Similarities', use_id=True, 
                             border="1", cellspacing="0", cellpadding="3",
                             v_category=range(len(v_matches)),
                             h_category=category, pairs = v_matches),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G0 Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=g0_category, v_category=g0_v_category,
                             cross='cid', pairs=g0_similarities),
                  form.tr(''),
                  form.tr(''),
                  form.Table('Total', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total)), pairs=[total]),
                  form.tr(''),
                  form.Table('Total1', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total1)), pairs=[total1]),
                  form.tr(''),

                  form.Table('Total2', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total2)), pairs=[total2]),
                  form.tr(''),

                  form.Table('fractions', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(fractions)), pairs=[fractions]),
                  form.tr(''),
                  
                  
                  form.Table('Cov', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, int(k)), pairs = print_cov),
                  form.tr(''),
                  #form.Table('Cov', use_id=True, border="1", 
                  #           cellspacing="0", cellpadding="3",
                  #           h_category=range(0, int(k)), pairs = cov1),
                  #form.tr(''),

                  ]

    j = 0
    for i in cov:
        r = sorted(zip(i, range(0, int(k))), key=lambda x: -x[0])

        pairs = map(lambda x: format_float % (x[0]), r)
        cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r)
        
        components.append(form.Table('cov%d' % (j), border="1",
                                     cellspacing="0", cellpadding="3",
                                     h_category=cate, v_category=[j],
                                     pairs=[pairs],
                                     use_id=True))
        j += 1
        components.append(form.tr(''))
    
    variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=0";\n' % (prefix, prefix, k)
    js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@vcluster_tfidf");}\n')

    return apply(form.Form, components, {'js':js})()