def getLdaSclusterResultForm(prefix, lda_k, scluster_k):
    f = open('server/static/data/lda/%s/%s/final.other' % 
             (prefix, lda_k), 'r')
    alpha = float(f.read().split('\n')[-2].split()[1])
    f.close()
    
    f = open('server/static/data/lda/%s/%s/final.gamma' % 
             (prefix, lda_k), 'r')
    topic_scores = map(lambda x: normalize(map(lambda y: float(y)-alpha, 
                                               x.split())), 
                       f.read().split('\n')[:-1])
    f.close()
    
    f = open('data/%s/data.index' % (prefix), 'r')
    indices = f.read().split()
    f.close()
    
    f = open('data/%s/data.out.clustering.%s' % (prefix, scluster_k), 
             'r')
    clusters = map(int, f.read().split())
    f.close()
        
    avg_results = []
    for i in range(int(scluster_k)):
        avg_results.append([0.0] * int(lda_k))
        
    total = [0] * int(scluster_k)
    
    data = zip(clusters, topic_scores)
    for i, j in data:
        total[int(i)] += 1
        avg_results[i] = map(lambda x: x[0] + x[1], zip(j, avg_results[i]))

    for i in range(len(total)):
        for j in range(len(avg_results[i])):
            avg_results[i][j] /= float(total[i])

    t = avg_results
    for i in t:
        s = float(sum(i))
        for j in range(len(i)):
            i[j] /= s

    sim_results = dict([(i, []) for i in range(int(scluster_k))])
    for i, j in data:
        sim_results[i].append(j)

    in_sims = [0.0] * int(scluster_k)
    ex_sims = [0.0] * int(scluster_k)

    total = 0
    total_sim = 0.0
    for i in sim_results:
        t = 0
        for j in range(len(sim_results[i])):
            for k in range(len(sim_results[i])):
                if j != k:
                    t += 1
                    in_sims[i] += dot(sim_results[i][j], sim_results[i][k])**2
        total += t
        total_sim += in_sims[i]
        in_sims[i] /= float(t)

    for i in sim_results:
        t = 0
        for a in sim_results[i]:
            for j in sim_results:
                if i != j:
                    for b in sim_results[j]:
                        t += 1
                        ex_sims[i] += dot(a, b)**2
        total_sim += ex_sims[i]
        total += t
        ex_sims[i] /= float(t)
    
    global_sim = (total_sim/float(total))

    sim_category = range(int(scluster_k))

    total = [0] * int(scluster_k)
    for c in clusters:
        total[int(c)] += 1

    mat = []
    for t in range(0, len(topic_scores[0])):
        mat.append([0] * int(scluster_k))

    top = 20
    for i in range(0, len(topic_scores[0])):
        l = sorted(data, key=lambda x: x[1][i])[0:top]
        for j in l:
            mat[i][j[0]] += 1

    mat = transpose(mat)
    for j in mat:
        s = float(sum(j))
        for x in range(len(j)):
            j[x] /= s

    components = [form.Table('normalize average topic scores', 
                             h_category=range(int(lda_k)),
                             v_category=[i for i in range(len(avg_results))],
                             cross='lda/scluster',
                             use_id=True, pairs=avg_results, 
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''), form.tr(''),
                  form.Table('average internal similarities',
                             h_category=sim_category,
                             pairs=[in_sims], use_id=True,
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''), form.tr(''),
                  form.Table('internal ratio', h_category=sim_category, 
                             use_id=True,
                             pairs=[map(lambda x: x/global_sim, in_sims)],
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''), form.tr(''),
                  form.Table('average external similarities', 
                             h_category=sim_category,
                             pairs=[ex_sims], use_id=True,
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''), form.tr(''),
                  form.Table('external ratio', h_category=sim_category,
                             use_id=True,
                             pairs=[map(lambda x: x/global_sim, ex_sims)],
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''), form.tr(''),
                  form.Table('#docs among top 20 in topic i that are in cluster j normalized by the number of nodes in each scluster:', 
                             h_category=range(int(lda_k)), pairs=mat,
                             v_category=[i for i in range(len(mat))],
                             use_id = True, cross='lda/scluster',
                             border="1", cellspacing="0", cellpadding="3",),
                  form.tr(''),                  
                  ]
    js = form.js('script', 'function create_graph(id) { alert(id);}')

    return apply(form.Form, components, {'js': js})()
def getSclusterVsVcluster(prefix, k):
    format_float = '%.5f'

    f = open('data/%s/data.out.clustering.%s.output' % (prefix, k), 'r')
    g = open('data/%s/data.mat.clustering.%s.output' % (prefix, k), 'r')

    s_stats = f.read().split('\n')
    v_stats = g.read().split('\n')

    (s_matches, category) = getMatches(s_stats)
    (v_matches, _) = getMatches(v_stats)

    f.close()
    g.close()

    f = open('data/%s/data.out.clustering.%s' % (prefix, k), 'r')
    g = open('data/%s/data.mat.clustering.%s' % (prefix, k), 'r')


    clusters1 = f.read().split()
    clusters2 = g.read().split()

    cov = [[0] * (int(k)+1) for i in range(int(k))]
    for i, j in zip(clusters1, clusters2):
        cov[int(i)][int(j)+1] += 1
    
    total = [0] * (int(k)+1)
    for c in clusters2:
        total[int(c)+1] += 1
    
    total_nodes = sum(total)
    for i in cov:
        s = float(sum(i))
        for j in range(len(i)):
            i[j] /= s
            i[j] -= (total[j]/float(total_nodes))

    fractions = [j/float(total_nodes) for j in total]

    print_cov = []
    for i in cov:
        lst = []
        for j in i:
            lst.append(format_float % j)
        print_cov.append(lst)
        
    components = [#form.Table('Ref stats', use_id=True, border="1", 
                  #           cellspacing="0", cellpadding="3",
                  #           h_category=range(int(k)),
                  #           v_category=['in-stats', 'out-stats', 'total-nodes'],
                  #           pairs=[results['in-stats'], results['out-stats'],
                  #                  results['total']]),
                  #form.tr(''),
                  #form.tr(''),
                  form.Table('SCluster Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=category, v_category=range(len(s_matches)),
                             pairs=s_matches),
                  form.tr(''),
                  form.tr(''),
                  form.Table('VCluster Similarities', use_id=True, 
                             border="1", cellspacing="0", cellpadding="3",
                             v_category=range(len(v_matches)),
                             h_category=category, pairs = v_matches),
                  form.tr(''),
                  form.Table('Cov', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(-1, int(k)), pairs = print_cov),
                  form.tr(''),
                  ]

    j = 0
    for i in cov:
        r = sorted(zip(i, range(-1, int(k))), key=lambda x: -x[0])

        pairs = map(lambda x: format_float % (x[0]), r)
        cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r)
        
        components.append(form.Table('cov%d' % (j), border="1",
                                     cellspacing="0", cellpadding="3",
                                     h_category=cate, v_category=[j],
                                     pairs=[pairs],
                                     use_id=True))
        j += 1
        components.append(form.tr(''))
    
    variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=%s";\n' % (prefix, prefix, k, 0)
    js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@vcluster");}\n')

    return apply(form.Form, components, {'js':js})()
def getDegreeComparison(prefix1, prefix2, k, l):
    format_float = '%.5f'

    from runcluster import filterWithI
    results = filterWithI(prefix1, k, l, 'out')
    results['g0-similarities'] = [map(lambda x: x[i], 
                                      results['g0-similarities']) 
                                  for i in range(2)] 

    WORDS = '[a-zA-Z]+'
    SPACES = '[\s]+'
    NUMBERS = '[\d]+'
    SIM = '[+-\.\d]+'

    f = open('data/%s/data.out.clustering.%s.output' % 
             (prefix2, k), 'r')
    g = open('data/%s/data.out.clustering.%s' % (prefix2, k), 'r')

    clusters1 = results['new_clusters']
    clusters2 = g.read().split()

    cov = [[0] * int(k) for i in range(int(k))]
    for i, j in zip(clusters1, clusters2):
        cov[int(i)][int(j)] += 1
    
    total = results['total']
    total_nodes = sum(total)
    for i in cov:
        s = float(sum(i))
        for j in range(len(i)):
            i[j] /= s
            i[j] -= (total[j]/float(total_nodes))

    fractions = [j/float(total_nodes) for j in total]

    matches = []
    category_regex = ('[\s]*' + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) +
                      SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) +
                      SPACES + '(%s)' % (WORDS) + SPACES + '(%s)' % (WORDS) + 
                      SPACES + '|' + SPACES + '\n' )

    stats_regex = (SPACES + '(%s)' % (NUMBERS) + SPACES + '(%s)' % (NUMBERS) +
                   SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) +
                   SPACES + '(%s)' % (SIM) + SPACES + '(%s)' % (SIM) + 
                   SPACES + '|' + SPACES + '\n' )
    category = None
    for x in f:
        y = match(stats_regex, x)
        if y:
            matches.append(list(y.groups()))
        else:
            y = match(category_regex, x)
            if y:
                category = list(y.groups())

    g0_similarities = map(lambda x: [x], total)
    map(lambda x: x[0].extend(x[1]), 
        zip(g0_similarities, 
            transpose(results['g0-similarities'])))
    tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))]
    tmp = sorted(tmp, key=lambda x: -x[1][0])
    g0_similarities = map(lambda x: x[1], tmp)
    g0_v_category = map(lambda x: x[0], tmp)

    for i in g0_similarities:
        for j in range(2, len(i)):
            sign = '-'
            if i[j] > 0:
                sign = '+'
            i[j] = sign + format_float % i[j]

    g0_category = list(category)
    g0_category.pop(3)
    g0_category.pop(4)

    print_cov = []
    for i in cov:
        lst = [i[0]]
        for j in i[1:]:
            lst.append(format_float % j)
        print_cov.append(lst)

    components = [form.Table('Ref stats', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(int(k)),
                             v_category=['in-stats', 'out-stats', 'total-nodes'],
                             pairs=[results['in-stats'], results['out-stats'],
                                    results['total']]),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G0 Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=g0_category, v_category=g0_v_category,
                             pairs=g0_similarities),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G%s Similarities' % (l), use_id=True, 
                             border="1", cellspacing="0", cellpadding="3",
                             v_category=range(len(matches)),
                             h_category=category, pairs = matches),
                  form.tr(''),
                  form.Table('Cov', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(int(k)), pairs = print_cov),
                  form.tr(''),
                  ]

    j = 0
    for i in cov:
        r = sorted(zip(i, range(int(k))), key=lambda x: -x[0])
        pairs = map(lambda x: format_float % (x[0]), r)
        cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r)
        
        components.append(form.Table('cov%d' % (j), border="1", 
                                     cellspacing="0", cellpadding="3",
                                     h_category=cate, v_category=[j],
                                     pairs=[pairs],
                                     use_id=True))
        j += 1
        components.append(form.tr(''))
        
    variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=%s";\n' % (prefix1, prefix2, k, l)
    js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@scluster");}\n')

    return apply(form.Form, components, {'js':js})()
def getMergeComparison(prefix):
    format_float = '%.5f'

    from runcluster import filterWithI
    results = filterWithI(prefix, k, 0, 'abstract.matrix')

    f = open('data/%s/data.out.clustering.%s.output' % (prefix, k), 'r')
    g = open('data/%s/data.abstract.matrix.clustering.%s.output' % (prefix, k), 'r')

    s_stats = f.read().split('\n')
    v_stats = g.read().split('\n')

    (s_matches, category) = getMatches(s_stats)
    (v_matches, _) = getMatches(v_stats)

    f.close()
    g.close()

    f = open('data/%s/data.out.clustering.%s' % (prefix, k), 'r')
    g = open('data/%s/data.abstract.matrix.clustering.%s' % (prefix, k), 'r')

    clusters1 = f.read().split()
    clusters2 = g.read().split()

    total1 = [0] * (int(k))
    total2 = [0] * (int(k))

    for i in clusters1:
        total1[int(i)] += 1

    for i in clusters2:
        total2[int(i)] += 1

    total = results['total']

    cov = [[0] * (int(k)) for i in range(int(k))]
    for i, j in zip(clusters1, clusters2):
        cov[int(i)][int(j)] += 1
    
    total_nodes = sum(total)

    cov1 = []
    for i in cov:
        s = float(sum(i))
        lst = []
        for j in range(len(i)):
            lst.append(i[j] / s)
            i[j] /= s
            i[j] -= (total[j]/float(total_nodes))
        cov1.append(lst)

    fractions = [j/float(total_nodes) for j in total]

    print 'total', total
    g0_similarities = map(lambda x: [x], total)
    map(lambda x: x[0].extend(x[1]), 
        zip(g0_similarities, 
            results['g0-similarities']))
    tmp = [(i, g0_similarities[i]) for i in range(len(g0_similarities))]
    tmp = sorted(tmp, key=lambda x: -x[1][0])
    g0_similarities = map(lambda x: x[1], tmp)
    g0_v_category = map(lambda x: x[0], tmp)

    for i in g0_similarities:
        for j in range(2, len(i)):
            sign = '-'
            if i[j] > 0:
                sign = '+'
            i[j] = sign + format_float % i[j]

    g0_category = list(category)
    g0_category.pop(3)
    g0_category.pop(4)
    g0_category.pop(0)

    print_cov = []
    for i in cov:
        lst = []
        for j in i:
            lst.append(format_float % j)
        print_cov.append(lst)

    components = [#form.Table('Ref stats', use_id=True, border="1", 
                  #           cellspacing="0", cellpadding="3",
                  #           h_category=range(int(k)),
                  #           v_category=['in-stats', 'out-stats', 'total-nodes'],
                  #           pairs=[results['in-stats'], results['out-stats'],
                  #                  results['total']]),
                  #form.tr(''),
                  #form.tr(''),
                  form.Table('SCluster Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=category, v_category=range(len(s_matches)),
                             pairs=s_matches),
                  form.tr(''),
                  form.tr(''),
                  form.Table('VCluster Similarities', use_id=True, 
                             border="1", cellspacing="0", cellpadding="3",
                             v_category=range(len(v_matches)),
                             h_category=category, pairs = v_matches),
                  form.tr(''),
                  form.tr(''),
                  form.Table('G0 Similarities', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3", 
                             h_category=g0_category, v_category=g0_v_category,
                             cross='cid', pairs=g0_similarities),
                  form.tr(''),
                  form.tr(''),
                  form.Table('Total', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total)), pairs=[total]),
                  form.tr(''),
                  form.Table('Total1', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total1)), pairs=[total1]),
                  form.tr(''),

                  form.Table('Total2', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(total2)), pairs=[total2]),
                  form.tr(''),

                  form.Table('fractions', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, len(fractions)), pairs=[fractions]),
                  form.tr(''),
                  
                  
                  form.Table('Cov', use_id=True, border="1", 
                             cellspacing="0", cellpadding="3",
                             h_category=range(0, int(k)), pairs = print_cov),
                  form.tr(''),
                  #form.Table('Cov', use_id=True, border="1", 
                  #           cellspacing="0", cellpadding="3",
                  #           h_category=range(0, int(k)), pairs = cov1),
                  #form.tr(''),

                  ]

    j = 0
    for i in cov:
        r = sorted(zip(i, range(0, int(k))), key=lambda x: -x[0])

        pairs = map(lambda x: format_float % (x[0]), r)
        cate = map(lambda x: str((x[1], format_float % fractions[x[1]])), r)
        
        components.append(form.Table('cov%d' % (j), border="1",
                                     cellspacing="0", cellpadding="3",
                                     h_category=cate, v_category=[j],
                                     pairs=[pairs],
                                     use_id=True))
        j += 1
        components.append(form.tr(''))
    
    variables = 'var link = "/van_graph?prefix1=%s&prefix2=%s&k=%s&l=0";\n' % (prefix, prefix, k)
    js = form.js('script', variables + 'function create_graph(id) { window.open(link + "&cell=" + id + "@scluster@vcluster_tfidf");}\n')

    return apply(form.Form, components, {'js':js})()