Beispiel #1
0
def convert_matlab_pca_data(args, direction_matlab_name,
                            direction_python_name):
    # class ARGS:
    #     dataset='cifar10'
    #     model='resnet56'
    #     model_folder='folders for models to be projected'
    #     dir_type='weights'
    #     ignore='biasbn'
    #     prefix='model_'
    #     suffix='.t7'
    #     start_epoch=0
    #     max_epoch=500
    #     save_epoch=1

    # args = ARGS()

    # args.model_folder = model_folder
    # args.model = model

    last_model_file = args.model_folder + '/' + args.prefix + str(
        args.max_epoch) + args.suffix
    net = model_loader.load(args.dataset, args.model, last_model_file)
    w = net_plotter.get_weights(net)

    # read in matlab pca results
    f = h5py.File(direction_matlab_name, 'r')
    fpy = h5py.File(direction_python_name, 'w')

    fpy['explained_variance_ratio_'] = np.array(f['explained_variance_ratio_'])
    fpy['explained_variance_'] = np.array(f['explained_variance_'])

    pc1 = np.array(f['directionx'])
    pc2 = np.array(f['directiony'])

    f.close()

    # convert vectorized directions to the same shape as models to save in h5 file.
    # import pdb; pdb.set_trace()

    if args.dir_type == 'weights':
        xdirection = npvec_to_tensorlist(pc1, w)
        ydirection = npvec_to_tensorlist(pc2, w)
    elif args.dir_type == 'states':
        xdirection = npvec_to_tensorlist(pc1, s)
        ydirection = npvec_to_tensorlist(pc2, s)

    if args.ignore == 'biasbn':
        net_plotter.ignore_biasbn(xdirection)
        net_plotter.ignore_biasbn(ydirection)
    # import pdb; pdb.set_trace()
    h5_util.write_list(fpy, 'xdirection', xdirection)
    h5_util.write_list(fpy, 'ydirection', ydirection)

    fpy.close()
    print('PCA directions saved in: %s' % direction_python_name)
Beispiel #2
0
def setup_PCA_directions(args, model_files):
    """
        Find PCA directions for the optimization path from the initial model
        to the final trained model.

        Returns:
            dir_name: the h5 file that stores the directions.
    """

    # Name the .h5 file that stores the PCA directions.
    folder_name = args.model_folder + '/PCA_' + args.dir_type
    if args.ignore:
        folder_name += '_ignore=' + args.ignore
    folder_name += '_save_epoch=' + str(args.save_epoch)
    os.system('mkdir ' + folder_name)
    dir_name = folder_name + '/directions.h5'

    # skip if the direction file exists
    if os.path.exists(dir_name):
        f = h5py.File(dir_name, 'a')
        if 'explained_variance_' in f.keys():
            f.close()
            return dir_name

    # load models and prepare the optimization path matrix
    matrix = []
    for model_file in model_files:
        print(model_file)
        net2 = model_loader.load(args.dataset, args.model, model_file,
                                 args.data_parallel)
        if args.dir_type == 'weights':
            w2 = net_plotter.get_weights(net2)
            d = net_plotter.get_diff_weights(w, w2)
        elif args.dir_type == 'states':
            s2 = net2.state_dict()
            d = net_plotter.get_diff_states(s, s2)
        if args.ignore == 'biasbn':
            net_plotter.ignore_biasbn(d)
        d = weights_to_vec(d)
        matrix.append(d.numpy())

    # Perform PCA on the optimization path matrix
    print("Perform PCA on the models")
    pca = PCA(n_components=2)
    pca.fit(np.array(matrix))
    pc1 = np.array(pca.components_[0])
    pc2 = np.array(pca.components_[1])
    print("angle between pc1 and pc2: " + str(cal_angle(pc1, pc2)))

    print(pca.explained_variance_ratio_)

    # convert vectorized directions to the same shape as models to save in h5 file.
    if args.dir_type == 'weights':
        xdirection = vec_to_weights(pc1, w)
        ydirection = vec_to_weights(pc2, w)
    elif args.dir_type == 'states':
        xdirection = vec_to_states(pc1, s)
        ydirection = vec_to_states(pc2, s)

    if args.ignore == 'biasbn':
        net_plotter.ignore_biasbn(xdirection)
        net_plotter.ignore_biasbn(ydirection)

    f = h5py.File(dir_name, 'w')
    h5_util.write_list(f, 'xdirection', xdirection)
    h5_util.write_list(f, 'ydirection', ydirection)

    f['explained_variance_ratio_'] = pca.explained_variance_ratio_
    f['singular_values_'] = pca.singular_values_
    f['explained_variance_'] = pca.explained_variance_

    f.close()
    print('PCA directions saved in ' + dir_name)

    return dir_name
Beispiel #3
0
def setup_PCA_directions(args, model_files, w, s):
    """
        Find PCA directions for the optimization path from the initial model
        to the final trained model.

        Returns:
            dir_name: the h5 file that stores the directions.
    """

    # Name the .h5 file that stores the PCA directions.
    folder_name = args.model_folder + '/PCA_' + args.dir_type
    if args.ignore:
        folder_name += '_ignore=' + args.ignore
    folder_name += '_save_epoch=' + str(args.save_epoch)
    os.system('mkdir ' + folder_name)
    dir_name = folder_name + '/directions.h5'

    # skip if the direction file exists
    if os.path.exists(dir_name):
        f = h5py.File(dir_name, 'a')
        if 'explained_variance_' in f.keys():
            f.close()
            return dir_name

    # load models and prepare the optimization path matrix
    matrix = []
    for model_file in model_files:
        print (model_file)
        net2 = model_loader.load(args.dataset, args.model, model_file)
        if args.dir_type == 'weights':
            w2 = net_plotter.get_weights(net2)
            d = net_plotter.get_diff_weights(w, w2)
        elif args.dir_type == 'states':
            s2 = net2.state_dict()
            d = net_plotter.get_diff_states(s, s2)
        if args.ignore == 'biasbn':
        	net_plotter.ignore_biasbn(d)
        d = tensorlist_to_tensor(d)
        matrix.append(d.numpy())

    # Perform PCA on the optimization path matrix
    print ("Perform PCA on the models")
    pca = PCA(n_components=2)
    pca.fit(np.array(matrix))
    pc1 = np.array(pca.components_[0])
    pc2 = np.array(pca.components_[1])
    print("angle between pc1 and pc2: %f" % cal_angle(pc1, pc2))

    print("pca.explained_variance_ratio_: %s" % str(pca.explained_variance_ratio_))

    # convert vectorized directions to the same shape as models to save in h5 file.
    if args.dir_type == 'weights':
        xdirection = npvec_to_tensorlist(pc1, w)
        ydirection = npvec_to_tensorlist(pc2, w)
    elif args.dir_type == 'states':
        xdirection = npvec_to_tensorlist(pc1, s)
        ydirection = npvec_to_tensorlist(pc2, s)

    if args.ignore == 'biasbn':
        net_plotter.ignore_biasbn(xdirection)
        net_plotter.ignore_biasbn(ydirection)

    f = h5py.File(dir_name, 'w')
    h5_util.write_list(f, 'xdirection', xdirection)
    h5_util.write_list(f, 'ydirection', ydirection)

    f['explained_variance_ratio_'] = pca.explained_variance_ratio_
    f['singular_values_'] = pca.singular_values_
    f['explained_variance_'] = pca.explained_variance_

    f.close()
    print ('PCA directions saved in: %s' % dir_name)

    return dir_name
Beispiel #4
0
def setup_othermodels_PCA_directions(args, ignore_embedding, model_files, w):
    """
            Find PCA directions for the optimization path from the initial model
            to the final trained model.

            Returns:
                dir_name: the h5 file that stores the directions.
        """

    # Name the .h5 file that stores the PCA directions.
    # Name the .h5 file that stores the PCA directions.
    folder_name = 'fairseq_master' + '/' + args.save_dir + '/PCA_'
    folder_name += 'lr=' + str(args.lr[0])
    folder_name += '_optimier=' + str(args.optimizer)
    folder_name += '_ignore_embedding=' + str(ignore_embedding)
    if args.ignore:
        folder_name += '_ignoreBN'
    os.system('mkdir ' + folder_name)
    dir_name = folder_name + '/directions.h5'
    # skip if the direction file exists
    if os.path.exists(dir_name):
        f = h5py.File(dir_name, 'a')
        if 'explained_variance_' in f.keys():
            f.close()
            return dir_name
    # load models and prepare the optimization path matrix
    matrix = []
    for model_file in model_files:
        print(model_file)
        state = torch.load(
            model_file,
            map_location=lambda s, l: torch.serialization.
            default_restore_location(s, 'cpu'),
        )
        args2 = state['args']
        args2.data = 'fairseq_master/' + args2.data
        #        task = tasks.setup_task(args2)
        #        model2 = task.build_model(args2)
        #        s2 = model2.state_dict()
        s2 = state['model']
        w2 = []

        if ignore_embedding:
            for key in s2:
                if 'version' in key or '_float_tensor' in key or 'embed' in key:
                    s2[key].fill_(0)
                w2.append(s2[key])
        else:
            for key in s2:
                if 'version' in key or '_float_tensor' in key:
                    s2[key].fill_(0)
                w2.append(s2[key])

        d = net_plotter.get_diff_weights(w, w2)
        if args.ignore == 'biasbn':
            net_plotter.ignore_biasbn(d)
        d = tensorlist_to_tensor(d)
        matrix.append(d.numpy())

    # Perform PCA on the optimization path matrix
    print("Perform PCA on the models")
    pca = PCA(n_components=2)
    pca.fit(np.array(matrix))
    pc1 = np.array(pca.components_[0])
    pc2 = np.array(pca.components_[1])
    print("angle between pc1 and pc2: %f" % cal_angle(pc1, pc2))

    print("pca.explained_variance_ratio_: %s" %
          str(pca.explained_variance_ratio_))

    xdirection = npvec_to_tensorlist(pc1, w)
    ydirection = npvec_to_tensorlist(pc2, w)

    if args.ignore == 'biasbn':
        net_plotter.ignore_biasbn(xdirection)
        net_plotter.ignore_biasbn(ydirection)

    f = h5py.File(dir_name, 'w')
    h5_util.write_list(f, 'xdirection', xdirection)
    h5_util.write_list(f, 'ydirection', ydirection)

    f['explained_variance_ratio_'] = pca.explained_variance_ratio_
    f['singular_values_'] = pca.singular_values_
    f['explained_variance_'] = pca.explained_variance_

    f.close()
    print('PCA directions saved in: %s' % dir_name)

    return dir_name