def histogram():
    """
    Draw plot that shows ratio of number of users and number of conferences that they visited.
    """
    members, _ = get_members_and_conferences()
    cnt = Counter([i.__len__() for i in members.values()])
    del _
    del members
    gc.collect()
    plt.plot(list(cnt.keys()), list(cnt.values()))
    plt.xlabel('Number of conferences')
    plt.ylabel(u'Number of users')
    plt.axis([0, 60, 0, 50000])
def prepare_similarity_matrix():
    """
    Compute similarity matrix for collaborative filtering model.
    Weight is  cosine between the vectors.
    Weight will be computed only if two conferences include common users,
    otherwise weight = 0.
    """
    m = get_sparse_matrix()
    members, conferences = get_members_and_conferences()
    num_to_member, \
    member_to_num, \
    num_to_conference, \
    conference_to_num, \
    _, \
    _ = get_mapping()
    del _
    len = m.shape[0]
    m = m.tocsr()
    print(len)
    res = np.zeros((len,len))
    squares = [0 for i in range(len)]
    users = [[] for i in range(len)]
    users_confs = [[] for i in range(m.shape[1])]
    print('Preparing...')
    cnt = 0
    for i in range(len):
        if (cnt * 100 / len) % 10 == 0:
            print('{}%'.format(cnt * 100 / len))
        cnt += 1
        users[i] = list(map(lambda x: member_to_num[x], conferences[num_to_conference[i]]))
        squares[i] = sqrt(users[i].__len__())
    len2 = m.shape[1]
    cnt = 0
    for i in range(len2):
        if cnt % 50000 == 0:
            print('{}%'.format(cnt * 100 / len2))
        cnt += 1
        users_confs[i] = list(map(lambda x: conference_to_num[x],
                                  members[num_to_member[i]]))

    print('------\nMain part\n------')
    cnt = 0
    start = mktime(localtime())
    matrix_file = open('output/similarity_triples.txt', 'a')
    m = m.tocsc()
    for i in range(len):
        if cnt % 20 == 0:
            print('---\n{}%'.format(round(cnt * 100 / len, 3)))
        cnt += 1
        for u in users[i]:
            for uc in users_confs[u]:
                if res[i, uc] == 0:
                    if users[uc].__len__() < users[i].__len__():
                        tmp = m[i, users[uc]].nonzero()[1].__len__()
                    else:
                        tmp = m[uc, users[i]].nonzero()[1].__len__()
                    tmp /= (squares[uc] * squares[i])
                    res[i, uc] = tmp
                    res[uc, i] = tmp
                    matrix_file.write('{},{},{}\n'.format(i, uc, tmp))
    matrix_file.close()
    end = mktime(localtime())
    time = end - start
    print('---------/n TIME = {} /n----------'.format(time))