def feature_meanstd(mat, reg = None): ''' Utility function that does distributed mean and std computation Input: mat: the local data matrix, each row is a feature vector and each column is a feature dim reg: if reg is not None, the returned std is computed as std = np.sqrt(std**2 + reg) Output: m: the mean for each dimension std: the standard deviation for each dimension The implementation is actually moved to iceberk.mathutil now, we leave the code here just for backward compatibility ''' m, std = mathutil.mpi_meanstd(mat) if reg is not None: std = np.sqrt(std**2 + reg) return m, std
{'k': 1600, 'max_iter':100})), # does encoding pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) mpi.root_pickle(conv, 'cifar_conv.pickle') # do pruning try: selected_idx = pickle.load(open('cifar_selected_idx.pickle')) logging.info('Skipping first layer pruning') except Exception, e: features = conv.sample(cifar, 200000, True) mpi.dump_matrix_multi(features, '/u/vis/ttmp/jiayq/cifar/cifar_feature_pooled_sample') m, std = mathutil.mpi_meanstd(features) features -= m features /= std covmat = mathutil.mpi_cov(features, reg = 0.01) if mpi.is_root(): selected_idx = pcfs.max_variance_feature_selection(covmat, 800) else: selected_idx = None selected_idx = mpi.COMM.bcast(selected_idx) mpi.root_pickle((m, std, covmat), 'cifar_squared_correlation.pickle') mpi.root_pickle(selected_idx, 'cifar_selected_idx.pickle') dictionary_all = conv[-2].dictionary for i in [25,50,100,200,400,800,1600]: logging.info('Training with dictionary size %d' % i)
}) ]) logging.info('Training the pipeline...') conv.train(cifar, 400000, exhaustive=True) mpi.root_pickle(conv, 'cifar_conv.pickle') # do pruning try: selected_idx = pickle.load(open('cifar_selected_idx.pickle')) logging.info('Skipping first layer pruning') except Exception, e: features = conv.sample(cifar, 200000, True) mpi.dump_matrix_multi( features, '/u/vis/ttmp/jiayq/cifar/cifar_feature_pooled_sample') m, std = mathutil.mpi_meanstd(features) features -= m features /= std covmat = mathutil.mpi_cov(features, reg=0.01) if mpi.is_root(): selected_idx = pcfs.max_variance_feature_selection(covmat, 800) else: selected_idx = None selected_idx = mpi.COMM.bcast(selected_idx) mpi.root_pickle((m, std, covmat), 'cifar_squared_correlation.pickle') mpi.root_pickle(selected_idx, 'cifar_selected_idx.pickle') dictionary_all = conv[-2].dictionary for i in [25, 50, 100, 200, 400, 800, 1600]: logging.info('Training with dictionary size %d' % i)