def plot_sift_upto():
    for i in [4000]:
        glove_data = utils.load_sift_data('train')
        #means = glove_data.mean(0)
        #glove_data -= means

        #glove_data = glove_data / (glove_data**2).sum(-1, keepdim=True).sqrt()
        glove_q = utils.load_sift_data('query')[:300]
        #glove_q -= means
        utils.plot_dist_hist_upto(glove_data, glove_q, i, 'sift')
    #plt.clf()

    for i in [4000]:
        glove_c_data = utils.load_sift_c_data('train')

        glove_c_q = utils.load_sift_c_data('query')[:300]
        pdb.set_trace()
        utils.plot_dist_hist_upto(glove_c_data, glove_c_q, i, 'sift_c')
from kahip.kmkahip import run_kmkahip

if __name__ == '__main__':
    opt = utils.parse_args()

    #adjust the number of parts and the height of the hierarchy
    n_cluster_l = [2]
    height_l = [1]

    # load dataset
    if opt.glove:
        dataset = utils.load_glove_data('train').to(utils.device)
        queryset = utils.load_glove_data('query').to(utils.device)
        neighbors = utils.load_glove_data('answers').to(utils.device)
    elif opt.sift:
        dataset = utils.load_sift_data('train').to(utils.device)
        queryset = utils.load_sift_data('query').to(utils.device)
        neighbors = utils.load_sift_data('answers').to(utils.device)
    else:
        dataset = utils.load_data('train').to(utils.device)
        queryset = utils.load_data('query').to(utils.device)
        neighbors = utils.load_data('answers').to(utils.device)

    #specify which action to take at each level, actions can be km, kahip, train, or svm. Lower keys indicate closer to leaf.
    #Note that if 'kahip' is included, evaluation must be on training rather than test set, since partitioning was performed on training, but not test, set.
    #e.g.: opt.level2action = {0:'km', 1:'train', 3:'train'}
    opt.level2action = {0: 'train'}

    for n_cluster in n_cluster_l:
        print('n_cluster {}'.format(n_cluster))
        opt.n_clusters = n_cluster
def compute_degrees_distr():
    dataset = utils.load_sift_data('train').to(utils.device)
    #dataset = utils.load_glove_data('train').to(utils.device)
    distr = utils.compute_degree_distr(dataset, 10)
    print(distr[:30])
    pdb.set_trace()