def script_writeCommandsForPreprocessing(all_dirs_file, command_file_pre, num_proc, check_file=None): all_dirs = util.readLinesFromFile(all_dirs_file) all_dirs = [dir_curr[:-1] for dir_curr in all_dirs] if check_file is not None: all_dirs = getRemainingDirs(all_dirs, check_file) command_pre = 'echo ' command_middle_1 = ';cd ~/Downloads/opticalflow; matlab -nojvm -nodisplay -nosplash -r "out_folder=\'' command_middle = '\';saveTrainingData" > ' command_end = ' 2>&1' commands = [] for dir_curr in all_dirs: dir_curr = util.escapeString(dir_curr) log_file = os.path.join(dir_curr, 'log.txt') command = command_pre + dir_curr + command_middle_1 + dir_curr + command_middle + log_file + command_end commands.append(command) idx_range = util.getIdxRange(len(commands), len(commands) / num_proc) command_files = [] for i, start_idx in enumerate(idx_range[:-1]): command_file_curr = command_file_pre + str(i) + '.txt' end_idx = idx_range[i + 1] commands_rel = commands[start_idx:end_idx] util.writeFile(command_file_curr, commands_rel) command_files.append(command_file_curr) return command_files
def script_writeCommandsForPreprocessing(all_dirs_file,command_file_pre,num_proc,check_file=None): all_dirs=util.readLinesFromFile(all_dirs_file); all_dirs=[dir_curr[:-1] for dir_curr in all_dirs]; if check_file is not None: all_dirs=getRemainingDirs(all_dirs,check_file); command_pre='echo ' command_middle_1=';cd ~/Downloads/opticalflow; matlab -nojvm -nodisplay -nosplash -r "out_folder=\'' command_middle='\';saveTrainingData" > ' command_end=' 2>&1'; commands=[]; for dir_curr in all_dirs: dir_curr=util.escapeString(dir_curr); log_file=os.path.join(dir_curr,'log.txt'); command=command_pre+dir_curr+command_middle_1+dir_curr+command_middle+log_file+command_end; commands.append(command); idx_range=util.getIdxRange(len(commands),len(commands)/num_proc) command_files=[]; for i,start_idx in enumerate(idx_range[:-1]): command_file_curr=command_file_pre+str(i)+'.txt' end_idx=idx_range[i+1] commands_rel=commands[start_idx:end_idx]; util.writeFile(command_file_curr,commands_rel); command_files.append(command_file_curr); return command_files;
def script_breakUpInFilesListForFeatureExtraction(file_index,in_file_meta_pre,out_file_meta_pre): [in_files,_,_]=pickle.load(open(file_index,'rb')); batch_size=len(in_files)/10; idx_range=util.getIdxRange(len(in_files),batch_size) for idx,idx_begin in enumerate(idx_range[:-1]): idx_end=idx_range[idx+1]; in_files_rel=in_files[idx_begin:idx_end]; in_file_meta_curr=in_file_meta_pre+'_'+str(idx)+'.p'; out_file_meta_curr=out_file_meta_pre+'_'+str(idx)+'.p'; pickle.dump(in_files_rel,open(in_file_meta_curr,'wb')); print in_file_meta_curr,out_file_meta_curr
def getTopNError(net, transformer, im_files, imagenet_idx_mapped, batch_size, top_n, printDebug=True, pred_classes=False): im_files = im_files num_files = len(im_files) idx_range = util.getIdxRange(num_files, batch_size) error_bin = np.zeros((num_files, ), dtype='int') if pred_classes: pred_classes_mat = -1 * np.ones((num_files, top_n), dtype='int') for idx_idx, idx_begin in enumerate(idx_range[:-1]): idx_end = idx_range[idx_idx + 1] im_files_curr = im_files[idx_begin:idx_end] gt_class_curr = imagenet_idx_mapped[idx_begin:idx_end] batch_size_curr = len(im_files_curr) net.blobs['data'].reshape(batch_size_curr, 3, 227, 227) for idx_im in range(batch_size_curr): net.blobs['data'].data[idx_im, :, :, :] = transformer.preprocess( 'data', caffe.io.load_image(im_files_curr[idx_im])) out = net.forward() for idx_im in range(net.blobs['prob'].data.shape[0]): top_k = net.blobs['prob'].data[idx_im].flatten().argsort()[-1:-( top_n + 1):-1] gt_class = gt_class_curr[idx_im] if pred_classes: pred_classes_mat[idx_im + idx_begin, :] = top_k if sum(top_k == gt_class) > 0: error_bin[idx_im + idx_begin] = 1 if printDebug: print idx_begin, idx_end, batch_size_curr if printDebug: print sum(error_bin), len(error_bin) if pred_classes: return error_bin, pred_classes_mat else: return error_bin
def getIdxRange(total,thresh,num_parts): step=int(math.floor(total/float(num_parts))); thresh=min(step/2,thresh); idx_range_new=util.getIdxRange(total,step) rem = total%step; # print 'in getIdxRange',num_parts,idx_range_new,rem if 0<rem<thresh and len(idx_range_new)>2: idx_range_new=idx_range_new[:-2]+[idx_range_new[-1]] # num_parts=num_parts-1; num_parts=len(idx_range_new)-1; # print 'in getIdxRange',num_parts,idx_range_new,rem return idx_range_new,num_parts;
def getTopNError( net, transformer, im_files, imagenet_idx_mapped, batch_size, top_n, printDebug=True, pred_classes=False ): im_files = im_files num_files = len(im_files) idx_range = util.getIdxRange(num_files, batch_size) error_bin = np.zeros((num_files,), dtype="int") if pred_classes: pred_classes_mat = -1 * np.ones((num_files, top_n), dtype="int") for idx_idx, idx_begin in enumerate(idx_range[:-1]): idx_end = idx_range[idx_idx + 1] im_files_curr = im_files[idx_begin:idx_end] gt_class_curr = imagenet_idx_mapped[idx_begin:idx_end] batch_size_curr = len(im_files_curr) net.blobs["data"].reshape(batch_size_curr, 3, 227, 227) for idx_im in range(batch_size_curr): net.blobs["data"].data[idx_im, :, :, :] = transformer.preprocess( "data", caffe.io.load_image(im_files_curr[idx_im]) ) out = net.forward() for idx_im in range(net.blobs["prob"].data.shape[0]): top_k = net.blobs["prob"].data[idx_im].flatten().argsort()[-1 : -(top_n + 1) : -1] gt_class = gt_class_curr[idx_im] if pred_classes: pred_classes_mat[idx_im + idx_begin, :] = top_k if sum(top_k == gt_class) > 0: error_bin[idx_im + idx_begin] = 1 if printDebug: print idx_begin, idx_end, batch_size_curr if printDebug: print sum(error_bin), len(error_bin) if pred_classes: return error_bin, pred_classes_mat else: return error_bin
def script_saveBigFeatureMats(params): out_file_featureMats_pre = params.out_file_featureMats_pre out_file_meta_pre = params.out_file_meta_pre path_to_db = params.path_to_db out_file_paths = params.out_file_paths num_batches = params.num_batches if not os.path.exists(out_file_paths): mani = Tube_Manipulator(path_to_db) mani.openSession() paths_to_features = mani.select((Tube.deep_features_path, ), distinct=True) paths_to_features = [path_curr[0] for path_curr in paths_to_features] mani.closeSession() random.shuffle(paths_to_features) pickle.dump(paths_to_features, open(out_file_paths, 'wb')) paths_to_features = pickle.load(open(out_file_paths, 'rb')) paths_to_features.sort() batch_size = len(paths_to_features) / num_batches idxRange = util.getIdxRange(len(paths_to_features), batch_size) print len(idxRange), idxRange[-1] # start_idx=0; for start_idx in range(len(idxRange) - 1): out_file_curr = out_file_featureMats_pre + '_' + str( start_idx) + '.npz' out_file_meta_curr = out_file_meta_pre + '_' + str(start_idx) + '.p' print start_idx, idxRange[start_idx], idxRange[ start_idx + 1], out_file_curr, out_file_meta_curr, paths_to_features_curr = paths_to_features[ idxRange[start_idx]:idxRange[start_idx + 1]] t = time.time() train, shape_record = getGiantFeaturesMatGPU(paths_to_features_curr) train = np.array(train) np.savez(out_file_curr, train) pickle.dump([paths_to_features_curr, shape_record], open(out_file_meta_curr, 'wb')) print time.time() - t break
def script_saveBigFeatureMats(params): out_file_featureMats_pre=params.out_file_featureMats_pre out_file_meta_pre=params.out_file_meta_pre path_to_db=params.path_to_db out_file_paths=params.out_file_paths num_batches=params.num_batches if not os.path.exists(out_file_paths): mani=Tube_Manipulator(path_to_db); mani.openSession() paths_to_features=mani.select((Tube.deep_features_path,),distinct=True); paths_to_features=[path_curr[0] for path_curr in paths_to_features]; mani.closeSession(); random.shuffle(paths_to_features); pickle.dump(paths_to_features,open(out_file_paths,'wb')); paths_to_features=pickle.load(open(out_file_paths,'rb')); paths_to_features.sort(); batch_size=len(paths_to_features)/num_batches; idxRange=util.getIdxRange(len(paths_to_features),batch_size); print len(idxRange),idxRange[-1]; # start_idx=0; for start_idx in range(len(idxRange)-1): out_file_curr=out_file_featureMats_pre+'_'+str(start_idx)+'.npz'; out_file_meta_curr=out_file_meta_pre+'_'+str(start_idx)+'.p'; print start_idx,idxRange[start_idx],idxRange[start_idx+1],out_file_curr,out_file_meta_curr, paths_to_features_curr=paths_to_features[idxRange[start_idx]:idxRange[start_idx+1]] t=time.time(); train,shape_record=getGiantFeaturesMatGPU(paths_to_features_curr); train=np.array(train); np.savez(out_file_curr,train); pickle.dump([paths_to_features_curr,shape_record],open(out_file_meta_curr,'wb')) print time.time()-t break
def writeCommandsForTrainDebug(params,path_to_train_file,out_file_commands_pre,model_num): model_num=[str(model_num_curr) for model_num_curr in model_num]; print model_num; commands=[]; for path_to_folder,out_dir,segConstant in params: for model_num_curr in model_num: command='th '+path_to_train_file+' -model '+os.path.join(path_to_folder,'model_all_'+model_num_curr+'.dat')+' -outDir '+os.path.join(out_dir,str(model_num_curr))+' -segConstant '+segConstant; commands.append(command); print command; print len(commands) idx_split=util.getIdxRange(len(commands),3); for idx_idx,begin_idx in enumerate(idx_split[:-1]): end_idx=idx_split[idx_idx+1]; commands_curr=commands[begin_idx:end_idx]; out_file_curr=out_file_commands_pre+str(idx_idx)+'.sh'; util.writeFile(out_file_curr,commands_curr); print 'sh '+out_file_curr;
def main(): # print 'hello'; train_val_txt = '/Users/maheenrashid/Dropbox (Personal)/Davis_docs/Research/VOCdevkit 2/VOC2012/ImageSets/Main/horse_trainval.txt' path_to_im = '/Users/maheenrashid/Dropbox (Personal)/Davis_docs/Research/VOCdevkit 2/VOC2012/JPEGImages' path_to_anno = '/Users/maheenrashid/Dropbox (Personal)/Davis_docs/Research/VOCdevkit 2/VOC2012/Annotations' out_dir = '../pascal' util.mkdir(out_dir) out_file = os.path.join(out_dir, 'horse.txt') out_dir_im = '../pascal/just_horse_im' util.mkdir(out_dir_im) # saveBBoxImage(out_file,path_to_anno,out_dir_im) im_files = util.getFilesInFolder(out_dir_im, ext='.jpg') file_names = util.getFileNames(im_files, ext=True) batch_size = 20 batch_idx = util.getIdxRange(len(file_names), batch_size) print len(batch_idx) args = [] counter = 0 for idx_batch_start, batch_start in enumerate(batch_idx[:-1]): batch_end = batch_idx[idx_batch_start + 1] im_files_rel = im_files[batch_start:batch_end] file_names_rel = file_names[batch_start:batch_end] out_dir_curr = os.path.join(out_dir_im, str(idx_batch_start)) util.mkdir(out_dir_curr) for file_name, im_file_curr in zip(file_names_rel, im_files_rel): out_file = os.path.join(out_dir_curr, file_name) if not os.path.exists(out_file): args.append((im_file_curr, out_file, counter)) counter += 1 p = multiprocessing.Pool(multiprocessing.cpu_count()) print len(args) p.map(copyfile_wrapper, args)
def main(): h5_file='/disk2/mayExperiments/flow_resolution_scratch/im_viz_padding_ft_nC_sZ_youtube/large_0.707106781187/COCO_val2014_000000000143_pred_flo/results/109.h5'; data=readH5(h5_file)[0] print data.shape print data[:,10,10]; return # out_dir='/disk2/aprilExperiments/flo_subdivision_actual' # out_file=os.path.join(out_dir,'list_of_im.txt'); # img_paths=util.readLinesFromFile(out_file); # grid_sizes=[1]; # out_dir_pre='grid_flo_viz_' # im_post='_1_1'; # # grid_sizes=[1,2,4,5]; # # out_dir_pre='prob_fuse_flo_viz_'; # # im_post='' # grid_sizes_str=[str(grid_size) for grid_size in grid_sizes]; # grid_sizes_str='_'.join(grid_sizes_str); # viz_dir='/disk2/aprilExperiments/flo_subdivision_actual' # # out_dir_flo_viz=os.path.join(out_dir,'grid_flo_viz_'+grid_sizes_str); # out_dir_flo_viz=os.path.join(out_dir,out_dir_pre+grid_sizes_str); # out_dir_ac='/disk1/maheen_data/mayExperiments/new_model_flo_training_50000'; # prob_folder_only='prob_fuse_viz_'+grid_sizes_str # prob_folder=os.path.join(out_dir_ac,prob_folder_only); # print prob_folder # sym_path='/disk2/temp/'+prob_folder_only; # cmd=''; # if os.path.exists(sym_path): # cmd='rm '+sym_path+';' # cmd=cmd+'ln -s '+prob_folder+' '+sym_path; # print cmd; # subprocess.call(cmd,shell=True) # viz_dirs=[out_dir_flo_viz,sym_path]; # out_file_html=os.path.join(out_dir,'visualizing_fuse_diff_models_'+grid_sizes_str+'.html'); # print out_file_html # print viz_dirs # # script_writeHTMLStitchedFlos_wDirs(img_paths,out_file_html,viz_dirs) # img_paths_html=[]; # captions=[]; # for img_path in img_paths: # img_name=img_path[img_path.rindex('/')+1:img_path.rindex('.')]; # img_paths_html_curr=[util.getRelPath(img_path)]; # captions_curr=['im'] # for idx_viz_dir,viz_dir in enumerate(viz_dirs): # # print viz_dir,img_path # # img_path_curr=[os.path.join(viz_dir,file_curr) for file_curr in os.listdir(viz_dir) if file_curr.startswith(img_name)][0]; # if idx_viz_dir==0: # img_path_curr=os.path.join(viz_dir,img_name+im_post+'.png'); # # +'_1_1.png'); # else: # img_path_curr=os.path.join(viz_dir,'train2014_'+img_name+'.png'); # img_paths_html_curr.append(util.getRelPath(img_path_curr)); # captions_curr.append(viz_dir[viz_dir.rindex('/')+1:]); # img_paths_html.append(img_paths_html_curr); # captions.append(captions_curr) # visualize.writeHTML(out_file_html,img_paths_html,captions); # return # grid_sizes=[1,2,4,5]; # # grid_sizes=[1]; # out_dir_meta='/disk1/maheen_data/mayExperiments/new_model_flo_training_50000'; # util.mkdir(out_dir_meta); # # model_file='/home/maheenrashid/Downloads/debugging_jacob/optical_flow_prediction_test/examples/opticalflow/final.caffemodel' # model_file='/disk2/mayExperiments/finetuning_youtube_hmdb_llr/OptFlow_youtube_hmdb_iter_50000.caffemodel'; # clusters_file='/home/maheenrashid/Downloads/debugging_jacob/optical_flow_prediction_test/examples/opticalflow/clusters.mat'; # gpu=0; # util.mkdir(out_dir_meta); # # im_list_file=os.path.join(out_dir_meta,'list_of_im.txt') # # util.writeFile(im_list_file,img_paths) # # img_paths=util.readLinesFromFile(im_list_file); # out_file='/disk2/aprilExperiments/flo_subdivision_actual/list_of_im.txt' # img_paths=util.readLinesFromFile(out_file); # # img_paths=img_paths[:10]; # print len(img_paths); # t=time.time(); # script_saveFloPyramidsAndAverageEfficient(out_dir_meta,img_paths,grid_sizes,model_file,gpu,clusters_file,append_folder=True,overwrite=False); # # script_saveFloPyramidsAndAverage(out_dir_meta,img_paths,grid_sizes,model_file,gpu,clusters_file,append_folder=True,overwrite=True); # print time.time()-t; # # flo_dir='/disk1/maheen_data/mayExperiments/new_model_flo_training/prob_fuse_1_2_4_5'; # # flo_viz_dir='/disk1/maheen_data/mayExperiments/new_model_flo_training/prob_fuse_viz_1_2_4_5'; # str_grid='_'.join([str(val) for val in grid_sizes]); # # flo_dir='/disk1/maheen_data/mayExperiments/new_model_flo_training/prob_fuse_'+str_grid; # # flo_viz_dir='/disk1/maheen_data/mayExperiments/new_model_flo_training/prob_fuse_viz_'+str_grid; # flo_dir=os.path.join(out_dir_meta,'prob_fuse_'+str_grid); # flo_viz_dir=os.path.join(out_dir_meta,'prob_fuse_viz_'+str_grid); # util.mkdir(flo_viz_dir); # flo_paths=util.getFilesInFolder(flo_dir,'.flo'); # flo_viz_paths=[file_curr.replace('.flo','.png').replace(flo_dir,flo_viz_dir) for file_curr in flo_paths]; # out_file_sh=flo_viz_dir+'.sh'; # writeScriptToGetFloViz(flo_paths,flo_viz_paths,out_file_sh); # subprocess.call('sh '+out_file_sh,shell=True); # return out_dir_meta='/disk1/maheen_data/mayExperiments/model_50000_flo' batches=[os.path.join(out_dir_meta,dir_curr) for dir_curr in os.listdir(out_dir_meta) if os.path.isdir(os.path.join(out_dir_meta,dir_curr))]; print len(batches); batch_size=300; grid_sizes=[1,2,4,5]; model_file='/disk2/mayExperiments/finetuning_youtube_hmdb_llr/OptFlow_youtube_hmdb_iter_50000.caffemodel'; clusters_file='/home/maheenrashid/Downloads/debugging_jacob/optical_flow_prediction_test/examples/opticalflow/clusters.mat'; gpu=1; str_grid='_'.join([str(val) for val in grid_sizes]); flo_folder='prob_fuse_1_2_4_5' done=[]; not_done=[]; # flo_folder='h5_1_2_4_5' # for batch_curr in batches: # flo_folder_curr=os.path.join(batch_curr,flo_folder) # if os.path.isdir(flo_folder_curr): # done.append(batch_curr); # else: # not_done.append(batch_curr); # print len(done); # print len(not_done); # return for batch_curr in batches: flo_folder_curr=os.path.join(batch_curr,flo_folder) if os.path.isdir(flo_folder_curr): files=util.getFilesInFolder(flo_folder_curr,'.flo'); if len(files)==batch_size: done.append(batch_curr); continue; not_done.append(batch_curr); print len(done); print len(not_done); # h5_dir='h5_1_2_4_5' # path_to_mv=util.escapeString('/media/maheenrashid/Seagate Backup Plus Drive/maheen_data/mayExperiments/model_50000_flo'); # path_to_sh_pre=os.path.join(out_dir_meta,'mv_h5'); # path_to_sh_meta=os.path.join(out_dir_meta,'mv_h5_meta.sh'); # commands=[]; # for done_curr in done: # h5_path=os.path.join(done_curr,h5_dir); # if os.path.exists(h5_path): # batch_dir=done_curr[done_curr.rindex('/')+1:]; # new_path=os.path.join(path_to_mv,batch_dir); # str_command=''; # if not os.path.exists(new_path): # str_command='mkdir '+new_path+';'; # str_command=str_command+'mv '+h5_path+' '+new_path+ '/;'; # commands.append(str_command); # idx_range=util.getIdxRange(len(commands),len(commands)/12); # sh_files=[]; # for idx_idx,start_idx in enumerate(idx_range[:-1]): # commands_curr=commands[start_idx:idx_range[idx_idx+1]]; # sh_curr=path_to_sh_pre+'_'+str(idx_idx)+'.sh' # commands_curr=['#!/bin/sh']+commands_curr; # util.writeFile(sh_curr,commands_curr); # print sh_curr; # sh_files.append(sh_curr); # with open(path_to_sh_meta,'wb') as f: # f.write('#!/bin/sh\n'); # for file_curr in sh_files: # f.write(file_curr+' &\n'); # util.writeFile(path_to_sh,commands) # return # sort not_done; vals=[int(not_done_curr[not_done_curr.rindex('_')+1:]) for not_done_curr in not_done]; sort_idx=np.argsort(vals); not_done=[not_done[idx] for idx in sort_idx]; print not_done[:10]; not_done=not_done[:-1]; for not_done_curr in not_done: # [25:]: print not_done_curr list_file=not_done_curr+'.txt'; img_paths=util.readLinesFromFile(list_file); try: script_saveFloPyramidsAndAverageEfficient(not_done_curr,img_paths,grid_sizes,model_file,gpu,clusters_file,append_folder=True,overwrite=False); except: print not_done_curr,' is problematic'; continue print not_done_curr,' is complete'; return dir_training='/disk2/mayExperiments/train_data/rescaled_images/4'; out_dir_meta='/disk1/maheen_data/mayExperiments/model_50000_flo'; util.mkdir(out_dir_meta); img_paths=util.getFilesInFolder(dir_training,'.jpg'); batch_size=300; grid_sizes=[1,2,4,5]; model_file='/disk2/mayExperiments/finetuning_youtube_hmdb_llr/OptFlow_youtube_hmdb_iter_50000.caffemodel'; clusters_file='/home/maheenrashid/Downloads/debugging_jacob/optical_flow_prediction_test/examples/opticalflow/clusters.mat'; gpu=1; idx_range=util.getIdxRange(len(img_paths),batch_size); print len(idx_range); out_file_lists=[]; for idx_idx,idx_start in enumerate(idx_range[:-1]): idx_end=idx_range[idx_idx+1]; img_paths_rel=img_paths[idx_start:idx_end]; out_file_list=os.path.join(out_dir_meta,'batch_'+str(idx_idx)+'.txt'); # util.writeFile(out_file_list,img_paths_rel); out_file_lists.append(out_file_list); # print idx_start,idx_end; # print len(img_paths),len(idx_range); out_file_lists=out_file_lists[100:]; for list_no,out_file_list in enumerate(out_file_lists): dir_curr=out_file_list[:out_file_list.rindex('.')]; util.mkdir(dir_curr); img_paths=util.readLinesFromFile(out_file_list); print 'LIST NO',list_no,out_file_list; script_saveFloPyramidsAndAverageEfficient(dir_curr,img_paths,grid_sizes,model_file,gpu,clusters_file,append_folder=True,overwrite=False);
def main(): shape=(100000,100000); file_npz='/disk2/decemberExperiments/gettingNN/npz_'+str(shape[0])+'.npz'; # file_byte='temp/blob_'+str(shape[0])+'.b'; arr=np.random.randn(*shape).astype('float32'); print arr.dtype print arr.shape np.savez(file_npz,arr); # blob_str=bytearray(np.array(arr,dtype='float32').tostring()); # with open(file_byte,'wb') as f: # f.write(blob_str) t=time.time(); arr=np.load(file_npz)['arr_0']; print time.time()-t; # t=time.time(); # with open(file_byte,'rb') as f: # blob_str=f.read(); # arr_byte=np.fromstring(blob_str,dtype='float32') # arr_byte=arr_byte.reshape(shape) # print time.time()-t # print arr_byte.shape,np.allclose(arr,arr_byte); return # train_size=(,4096); feat_size=4096 test_size=(200,4096); total=6371288 batch_size=1000 # test=ca.random.uniform(size=test_size) idx=util.getIdxRange(total,batch_size); print len(idx) sizes=[]; for idx_idx,start_idx in enumerate(idx[:-1]): end_idx=idx[idx_idx+1] print idx_idx,start_idx,end_idx curr_size=end_idx-start_idx; if idx_idx==0: train=ca.random.uniform(size=(curr_size,feat_size)) else: # curr_array=ca.random.uniform(size=(curr_size,feat_size)) curr_array=np.random.randn(curr_size,feat_size) curr_array=ca.array(curr_array); print curr_array.shape,type(curr_array[0,0]) train=ca.extra.concatenate(train,curr_array,axis=0) print train.shape # sizes.append(curr_size); # print len(sizes); # print sum(sizes); print train.shape return # train=ca.zeros(train_size); pr = cProfile.Profile() pr.enable() indices,distances = nn(test,train); pr.disable() s = StringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print s.getvalue() print indices.shape,distances.shape return shape=(10000,4096); a = np.random.randn(*shape); ga = ca.array(a); print a.shape,ga.shape # ga=ca.concatenate(a,a); ga_b=ca.extra.concatenate(ga,ga,axis=0); ga_test=ca.random.uniform(size=(100,4096)) pr = cProfile.Profile() pr.enable() indices,distances = nn(ga_b,ga_b); pr.disable() s = StringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print s.getvalue() print indices.shape,distances.shape # print ga_b.shape return t=time.time(); a_n=util.normalize(a); print time.time()-t; print a_n.shape t=time.time(); ga_n=util.normalize(ga,True); print time.time()-t; print ga_n.shape ga_n_conv = np.array(ga_n) print np.allclose(a_n,ga_n_conv)
def main(): shape = (100000, 100000) file_npz = '/disk2/decemberExperiments/gettingNN/npz_' + str( shape[0]) + '.npz' # file_byte='temp/blob_'+str(shape[0])+'.b'; arr = np.random.randn(*shape).astype('float32') print arr.dtype print arr.shape np.savez(file_npz, arr) # blob_str=bytearray(np.array(arr,dtype='float32').tostring()); # with open(file_byte,'wb') as f: # f.write(blob_str) t = time.time() arr = np.load(file_npz)['arr_0'] print time.time() - t # t=time.time(); # with open(file_byte,'rb') as f: # blob_str=f.read(); # arr_byte=np.fromstring(blob_str,dtype='float32') # arr_byte=arr_byte.reshape(shape) # print time.time()-t # print arr_byte.shape,np.allclose(arr,arr_byte); return # train_size=(,4096); feat_size = 4096 test_size = (200, 4096) total = 6371288 batch_size = 1000 # test=ca.random.uniform(size=test_size) idx = util.getIdxRange(total, batch_size) print len(idx) sizes = [] for idx_idx, start_idx in enumerate(idx[:-1]): end_idx = idx[idx_idx + 1] print idx_idx, start_idx, end_idx curr_size = end_idx - start_idx if idx_idx == 0: train = ca.random.uniform(size=(curr_size, feat_size)) else: # curr_array=ca.random.uniform(size=(curr_size,feat_size)) curr_array = np.random.randn(curr_size, feat_size) curr_array = ca.array(curr_array) print curr_array.shape, type(curr_array[0, 0]) train = ca.extra.concatenate(train, curr_array, axis=0) print train.shape # sizes.append(curr_size); # print len(sizes); # print sum(sizes); print train.shape return # train=ca.zeros(train_size); pr = cProfile.Profile() pr.enable() indices, distances = nn(test, train) pr.disable() s = StringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print s.getvalue() print indices.shape, distances.shape return shape = (10000, 4096) a = np.random.randn(*shape) ga = ca.array(a) print a.shape, ga.shape # ga=ca.concatenate(a,a); ga_b = ca.extra.concatenate(ga, ga, axis=0) ga_test = ca.random.uniform(size=(100, 4096)) pr = cProfile.Profile() pr.enable() indices, distances = nn(ga_b, ga_b) pr.disable() s = StringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print s.getvalue() print indices.shape, distances.shape # print ga_b.shape return t = time.time() a_n = util.normalize(a) print time.time() - t print a_n.shape t = time.time() ga_n = util.normalize(ga, True) print time.time() - t print ga_n.shape ga_n_conv = np.array(ga_n) print np.allclose(a_n, ga_n_conv)