def prepare_for_rec(out_dir): meta = {} meanImg = readAndResize('/database/test.jpg',SIZE, CHANNELS) meanImgstd = readAndResize('/database/test.jpg', SIZE_STD, CHANNELS) meta['data_mean'] = meanImg meta['data_mean_std'] = meanImgstd util.pickle(os.path.join(out_dir,"batches.meta"), meta) trainingMeta = util.unpickle(os.path.join(out_dir, "trainingMeta.meta")) testMeta = util.unpickle(os.path.join(out_dir, "testMeta.meta")) print "prepare for training" random.shuffle(trainingMeta) make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH) #for test print "prepare for test" random.shuffle(testMeta) make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH,8000)
def collectImgByClass(inFolder, outFolder): subFolderList = os.listdir(inFolder) list.sort(subFolderList) for index, foldername in enumerate(subFolderList): print foldername dataCol = collectOneClass(os.path.join(inFolder, foldername)) util.pickle(outFolder +"/"+ foldername, dataCol)
def processTest(test_list, out_dir, startIdx): global NUM_PER_PATCH meta = util.unpickle(os.path.join(out_dir, "batches.meta")) allLabels = meta['label_names'] fileList = open(test_list,'rb').readlines() random.shuffle(fileList) print "####### Got %d classes ######" % len(allLabels) print "####### Got %d images ######" % len(fileList) numImg = len(fileList) numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder if numImg % NUM_PER_PATCH != 0: numBatches += 1 print 'Going to make %d baches' % numBatches for idx_batch in range(numBatches): # if idx_batch < numBatches - 2: # continue print "### Making the %dth batch ###" % idx_batch b_start = NUM_PER_PATCH * idx_batch b_end = NUM_PER_PATCH * (idx_batch + 1) if idx_batch == numBatches - 1: b_start = numImg - NUM_PER_PATCH b_end = numImg batchMeta = fileList[b_start:b_end] data, labels, imgnames = getBatch(batchMeta,allLabels) out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx)) print "saving to %s" % out_fname util.pickle(out_fname, {'data':data, 'labels':labels, 'images':imgnames})
def makeBatches(allImgMeta, out_dir, batchSize, startIdx = 0): numImg = len(allImgMeta) numBatches = numImg / batchSize # the last batch keep the remainder if numImg % batchSize != 0: numBatches += 1 print 'Going to make %d baches' % numBatches for idx_batch in range(numBatches): # if idx_batch < numBatches - 2: # continue print "### Making the %dth batch ###" % idx_batch b_start = batchSize * idx_batch b_end = batchSize * (idx_batch + 1) if idx_batch == numBatches - 1: b_start = numImg - batchSize b_end = numImg batchMeta = allImgMeta[b_start:b_end] data, labels = getBatch(batchMeta) labels1 = labels//2 #labels2 = labels//3 #labels3 = labels//4 out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx)) print "saving to %s" % out_fname util.pickle(out_fname, {'data':data, 'labels':labels, 'data1':data,} 'labels1':lablels1)
def prepareTest(Test_dir, stdImgfolder, out_dir, meanImg_dir, startIdx): global NUM_PER_PATCH allImgMeta, allLabels = collectAndShuffle(Test_dir, stdImgfolder) makeBatches(allImgMeta, out_dir, NUM_PER_BATCH, startIdx) out_file = out_dir + "/imglist" util.pickle(out_file, allImgMeta)
def collectImgByName(InputFolder, outFolder): flist = os.listdir(InputFolder) flist = [os.path.join(InputFolder, fname) for fname in flist] for foldername in flist: print foldername dataCol = collectOneClass(foldername,SIZE,CHANNELS) ind = foldername.rfind("/") folder = foldername[ind+1:] util.pickle(outFolder +"/"+ folder, dataCol)
def create_model(self, model_class=LinearSVC, save=True): self.model = model_class() start = time() self.model.fit(self.x_train, self.y_train) log('Training time : {:.0f}s'.format(time() - start)) log('Test accuracy : {:.2f}%'.format(100 * self.model.score(self.x_test, self.y_test))) if save: log('Pickling model') pickle(self.model, self.parameters["pickle_model"])
def doGetMeanImgStd(stdImgfolder, meanImgFolder): global SIZE_STD, CHANNELS , SIZE #print SIZE_STD dataCol = collectOneClass(stdImgfolder,SIZE_STD,CHANNELS) dataSum = np.sum(dataCol, axis=0, dtype = np.float64) globalSum = dataSum globalCount = dataCol.shape[0] meanImg = globalSum / globalCount #print globalSum #meanImg, info=getMeanImg(stdImgfolder) util.pickle(meanImgFolder+"/meanImgStd", meanImg)
def prepareTrain(folderCls, imgStdCls, meanImg_dir, out_dir): global NUM_PER_BATCH meanImg = util.unpickle(meanImg_dir + '/meanImg') meanImgStd = util.unpickle(meanImg_dir + '/meanImgStd') allImgMeta = collectAndShuffle(folderCls, imgStdCls) meta = {} meta['data_mean'] = meanImg meta['data_mean_std'] = meanImgStd util.pickle( os.path.join(out_dir, "batches.meta"), meta) makeBatches(allImgMeta, out_dir, NUM_PER_BATCH) out_file = out_dir + "/imglist" util.pickle(out_file, allImgMeta)
def prepareTrain(train_dir, out_dir, meanImg_dir): global NUM_PER_PATCH #train_dir = "/data1/LSVRC2010/train" #out_dir = "/data2/ILSVRC2010/train_batches" meanImg = util.unpickle(meanImg_dir + '/meanImg') allImgMeta, allLabels = collectAndShuffle(train_dir) meta = {} meta['data_mean'] = meanImg meta['label_names'] = allLabels util.pickle( os.path.join(out_dir, "batches.meta"), meta) makeBatches(allImgMeta, out_dir, NUM_PER_PATCH) out_file = out_dir + "/imglist" util.pickle(out_file, [allImgMeta, allLabels])
def prepareTrain(train_dir, stdImgfolder, out_dir, meanImg_dir, startIdx): global NUM_PER_BATCH meanImg = util.unpickle(meanImg_dir + '/meanImg') meanImgStd = util.unpickle(meanImg_dir + '/meanImgStd') allImgMeta, allLabels = collectAndShuffle(train_dir, stdImgfolder) meta = {} meta['data_mean'] = meanImg meta['data_mean_std'] = meanImgStd util.pickle( os.path.join(out_dir, "batches.meta"), meta) makeBatches(allImgMeta, out_dir, NUM_PER_BATCH, startIdx) out_file = out_dir + "/imglist" util.pickle(out_file, allImgMeta)
def getMeanImgStd(imgStdCls, meanImgFolder): global SIZE_STD, STD_CHANNELS globalSum = np.zeros(SIZE_STD*SIZE_STD*STD_CHANNELS, dtype=np.float64) globalCount = 0 data = np.zeros((1, SIZE_STD*SIZE_STD*STD_CHANNELS), dtype = np.uint8) for ind in range(len(imgStdCls)): fullname = imgStdCls[ind] print "Reading", fullname data[0,:] = readAndResize(fullname, SIZE_STD, STD_CHANNELS) dataSum = np.sum(data, axis=0, dtype = np.float64) globalSum += dataSum globalCount += data.shape[0] meanImg = globalSum / globalCount util.pickle(meanImgFolder+"/meanImgStd", meanImg)
def create_features(self, save=True): param = self.parameters vehicles = param['vehicles'] non_vehicles = param['non_vehicles'] log('Vehicles : {}'.format(len(vehicles))) log('Non-vehicles : {}'.format(len(non_vehicles))) orient, pix_per_cell, cell_per_block = param['orient'], param['pix_per_cell'], param['cell_per_block'] extract_features_partial = partial(extract_features, color_space=param['color_space'], spatial_size=param['spatial_size'], hist_bins=param['hist_bins'], orient=param['orient'], pix_per_cell=param['pix_per_cell'], cell_per_block=param['cell_per_block'], hog_channel=param['hog_channel'], spatial_feat=param['spatial_feat'], hist_feat=param['hist_feat'], hog_feat=param['hog_feat']) vehicles_features = extract_features_partial(vehicles) non_vehicles_features = extract_features_partial(non_vehicles) x = np.vstack((vehicles_features, non_vehicles_features)).astype(np.float64) self.x_scaler = StandardScaler().fit(x) x_scaled = self.x_scaler.transform(x) # Define the labels vector y = np.hstack((np.ones(len(vehicles)), np.zeros(len(non_vehicles)))) # Split up data into randomized training and test sets self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x_scaled, y, test_size=param['test_size']) log( 'Using {} orientations {} pixels per cell {} cells per block'.format(orient, pix_per_cell, cell_per_block)) log('Feature vector length: {}'.format(self.x_train.shape[0])) if save: log('Pickling features') pickle( {'x_train': self.x_train, 'x_test': self.x_test, 'y_train': self.y_train, 'y_test': self.y_test, 'x_scaler': self.x_scaler, 'parameters': param}, param["pickle_features"] )
def make_list_batches(allImgMeta, out_dir, batchSize, startIdx = 0): numImg = len(allImgMeta) numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder if numImg % NUM_PER_PATCH != 0: numBatches += 1 print 'Going to make %d baches' % numBatches for idx_batch in range(numBatches): # if idx_batch < numBatches - 2: # continue print "### Making the %dth batch ###" % idx_batch b_start = NUM_PER_PATCH * idx_batch b_end = NUM_PER_PATCH * (idx_batch + 1) if idx_batch == numBatches - 1: b_start = numImg - NUM_PER_PATCH b_end = numImg batchMeta = allImgMeta[b_start:b_end] data, dataStd, imgnames = get_list_batch(batchMeta) out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx)) print "saving to %s" % out_fname util.pickle(out_fname, {'data':data, 'data_std':dataStd, 'name':imgnames})
def main(): num_args = len(sys.argv) # load result from file num_nets = num_args - 1 assert( num_nets > 0 ) errors = [] # 0th net # result['labels'] # result['preds'] result = unpickle( sys.argv[1] ) errors.append( evaluate_result( result, sys.argv[1] ) ) num_batches = len( result['labels'] ) #import pdb; pdb.set_trace() # collet all results for ii in range( num_nets - 1 ): result_ii = unpickle( sys.argv[ii+2] ) # evaluate result_ii errors.append( evaluate_result( result_ii, sys.argv[ii+2] ) ) # check num of batches is consistant num_batches_ii = len( result_ii['labels'] ) for jj in range( num_batches ): # check label is consistant assert( np.array_equal( result_ii['labels'][jj], result['labels'][jj] ) ) # nc result['pred'][jj] result['preds'][jj] += result_ii['preds'][jj] pickle( 'combine_result', result ) # classifier mean/std accuracy errors = np.array( errors ) #import pdb; pdb.set_trace() print "mean: " , str(100*np.mean( errors )) , " std: " , str(100*(np.std( errors ))) # evaluate result evaluate_result( result, "After combine" )
def preprocessing(): folder = r'D:\work\sunxiuyu\SVHN\large-lcn' #outfolder = r'D:\work\sunxiuyu\SVHN\svhn-valid-large-1' datasize = 32 * 32 * 3 #meta = util.unpickle(metafile) #mean = np.zeros(datasize,np.double) num = 0 begin = 25 for i in range(begin, begin + 16): batch_file = os.path.join(folder, 'data_batch_' + str(i)) print batch_file buffer = util.unpickle(batch_file) data = buffer['data'] dim2 = len(data) data = np.transpose(data) dim1 = len(data) print dim1 newbuffer = np.zeros((dim1, dim2), np.single) for i in range(0, len(data)): img1 = data[i].reshape(3, 32, 32) img = np.zeros((32, 32, 3), np.single) result = np.zeros((3, 32, 32), np.single) img[:, :, 0] = img1[0, :, :] img[:, :, 1] = img1[1, :, :] img[:, :, 2] = img1[2, :, :] #cv2.imshow( "img1",img ) #cv2.waitKey(0) result[0, :, :] = img[:, :, 0] result[1, :, :] = img[:, :, 1] result[2, :, :] = img[:, :, 2] #print result[0,:,:] newbuffer[i] = result.reshape(3 * 32 * 32) newbuffer = np.transpose(newbuffer) buffer['data'] = newbuffer util.pickle(batch_file, buffer) return
def makeBatches(allImgMeta, out_dir, batchSize, startIdx = 0): numImg = len(allImgMeta) numBatches = numImg / batchSize # the last batch keep the remainder if numImg % batchSize != 0: numBatches += 1 print 'Going to make %d baches' % numBatches for idx_batch in range(numBatches): # if idx_batch < numBatches - 2: # continue print "### Making the %dth batch ###" % idx_batch b_start = batchSize * idx_batch b_end = batchSize * (idx_batch + 1) if idx_batch == numBatches - 1: b_start = numImg - batchSize b_end = numImg batchMeta = allImgMeta[b_start:b_end] # modified by shao, 20140410 data, dataStd, imgNames = getBatch(batchMeta) out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx)) print "saving to %s" % out_fname # modified by shao, 20140410 util.pickle(out_fname, {'data':data, 'data_std':dataStd, 'img_name': imgNames})
def prepareTrain(train_list, outFolder,startIdx): global NUM_PER_PATCH fileList = open(train_list,'rb').readlines() random.shuffle(fileList) if len(fileList) < Mean_Img_Num: num_mean_img = len(fileList) else: num_mean_img = Mean_Img_Num data = np.zeros((SIZE*SIZE*CHANNELS, num_mean_img), dtype=np.uint8) for i in range(0,num_mean_img): str1 = fileList[i].strip(); tmp = str1.split(':') data[:,i] = load_img_data(tmp[0]) tmp = data.reshape(data.shape[1],data.shape[0]) dataSum = np.sum(tmp, axis=0, dtype = np.float64) globalCount = tmp.shape[0] meanImg = dataSum / globalCount util.pickle(outFolder+"/meanImg", meanImg) allLabels = [] allImgMeta = [] for line in fileList: str1 = line.strip() tmp = str1.split(':') length = len(tmp) label = tmp[1] #print label if label not in allLabels: allLabels.append(label) print "####### Got %d classes ######" % len(allLabels) meta = {} meta['data_mean'] = meanImg meta['label_names'] = allLabels util.pickle( os.path.join(outFolder, "batches.meta"), meta) numImg = len(fileList) numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder if numImg % NUM_PER_PATCH != 0: numBatches += 1 print 'Going to make %d baches' % numBatches for idx_batch in range(numBatches): # if idx_batch < numBatches - 2: # continue print "### Making the %dth batch ###" % idx_batch b_start = NUM_PER_PATCH * idx_batch b_end = NUM_PER_PATCH * (idx_batch + 1) if idx_batch == numBatches - 1: b_start = numImg - NUM_PER_PATCH b_end = numImg batchMeta = fileList[b_start:b_end] data, labels, imgnames = getBatch(batchMeta,allLabels) out_fname = os.path.join(outFolder, "data_batch_%04d" % (startIdx+idx_batch)) print "saving to %s" % out_fname util.pickle(out_fname, {'data':data, 'labels':labels, 'images':imgnames})
image_path = out['image_path'][i] im = Image.open(image_path) #if cls_index == 6 and index == 0: # im.show() # import pdb; pdb.set_trace() assert (im.size == (IMAGE_SIZE, IMAGE_SIZE)) im_value = PIL2array(im).astype(n.float32) m = m * (1.0 * num / (num + 1)) m = m + im_value / (num + 1) num += 1 if VERIFY_RESULT: sum_m += im_value #print "\r" + str(i) + "/" + str(len(provider.data_list)) stdout.write("%4d/%%4d " % (cls_index + 1) % num_class) stdout.write("%8d/%%8d\r" % (index + 1) % len(out['index_map_train'][cls_index])) stdout.flush() print "\n" if VERIFY_RESULT: sum_m /= num diff = sum_m - m plot_array_image(diff, (IMAGE_SIZE, IMAGE_SIZE, 3)) print n.mean(abs(diff)) plot_array_image(m, (IMAGE_SIZE, IMAGE_SIZE, 3)) mean_info = {} mean_info['data'] = m mean_info['file'] = INPUT_FILE pickle(OUTPUT_FILE, mean_info)
def cifar100toimages(): file = r'D:\work\sunxiuyu\cifar-100-python\test' outfolder = r'D:\work\sunxiuyu\cifar-10-py-colmajor\tmp' dict = util.unpickle(file) numlcass = np.array(dict['fine_labels'], np.int).max() fine_classes = {} mean = np.zeros(32 * 32 * 3, dtype=np.double) for i in range(0, len(dict['data'])): mean += dict['data'][i] mean = mean / len(dict['data']) metafile = r'D:\work\sunxiuyu\cifar-10-py-colmajor\cifar10\batches.meta' outmetafile = r'D:\work\sunxiuyu\cifar-10-py-colmajor\cifar-100\batches.meta' meta = util.unpickle(metafile) for key in meta: print key meta['label_names'] = [str(i) for i in range(0, 100)] meta['data_mean'] = meta['data_mean'].reshape(3072) meta['data_mean'][0:1024] = mean[2048:3072] #b meta['data_mean'][1024:2048] = mean[1024:2048] #g meta['data_mean'][2048:3072] = mean[0:1024] #r util.pickle(outmetafile, meta) return for i in range(0, len(dict['data'])): fine_classes_idx = dict['fine_labels'][i] #m_data = dict['data'][i] - mean m_data = dict['data'][i] if fine_classes_idx in fine_classes: fine_classes[fine_classes_idx].append({ 'data': m_data, 'label': dict['fine_labels'][i], 'filename': dict['filenames'][i] }) else: fine_classes[fine_classes_idx] = [{ 'data': m_data, 'label': dict['fine_labels'][i], 'filename': dict['filenames'][i] }] pass # random shuffle for i in range(0, len(fine_classes)): indexs = range(0, len(fine_classes[i])) np.random.shuffle(indexs) fine_classes[i] = [fine_classes[i][x] for x in indexs] #save image patches if not os.path.exists(outfolder): os.makedirs(outfolder) for i in range(0, len(fine_classes)): class_folder = os.path.join(outfolder, str(i)) if not os.path.exists(class_folder): os.makedirs(class_folder) for image in fine_classes[i]: #save image patch m_data = image['data'] r = m_data[0:1024] g = m_data[1024:2048] b = m_data[2048:3072] r = r.reshape(32, 32) g = g.reshape(32, 32) b = b.reshape(32, 32) color_img = np.zeros((32, 32, 3), dtype=np.uint8) color_img[:, :, 0] = b[:, :] color_img[:, :, 1] = g[:, :] color_img[:, :, 2] = r[:, :] imagefile = os.path.join(class_folder, image['filename']) cv2.imwrite(imagefile, color_img)
for i,v in enumerate(val): s = s + str(i) + ': ' + str(len(v)) + ', ' print s print 'Loaded ' + str(nval) + ' validation images in ' + str(nclasses) + ' classes' print 'Validation blacklist: ' + str(n_blacklist) print 'Validation total: ' + str(nval + n_blacklist) # save data #################################################################### out = {} out['train'] = train out['val'] = val out['classes_map'] = classes_map out['classes'] = classes out['num_data'] = ntrain + nval out['num_data_train'] = ntrain out['num_data_val'] = nval pickle(data_file, out) call(["chmod", "777", data_file]) # change mode so that can be read by others print 'Saved data to ' + data_file # save smaller versions ######################################################## for n in [10, 100]: print 'Creating smaller dataset with ' + str(n) + ' classes' train_small = [] val_small = [] classes_small = [] classes_map_small = [] ntrain = 0 nval = 0 for i in range(0,1000,1000/n): train_small.append(train[i]) val_small.append(val[i])
idx = 1 word_vectors.append(np.zeros([1, word_vec_dim], np.float32)) for word in vocab: try: word_vectors.append(model[word].reshape([1, -1])) word_to_idx.append(idx) idx += 1 except Exception, e: word_to_idx.append(0) word_vectors = np.concatenate(word_vectors, axis=0) num = word_vectors.shape[0] assert (num == idx) assert (len(word_to_idx) == len(vocab)) print('%d words in corpus' % idx) pickle('data/vqa_word2vec_model.pkl', {'vocab': vocab, 'word2idx': word_to_idx, 'word_vectors': word_vectors}) if __name__ == '__main__': # vocab = get_combined_vocabulary() # slice_word2vec_model(vocab) encoder = Word2VecEncoder('data/vqa_word2vec_model.pkl') def debug_encoder_once(s): print s print encoder.encode(s) debug_encoder_once(['<S>', '12', '</S>'])
def test(): # Build the inference graph. config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) # Create model creator model_creator = create_model_fn(FLAGS.model_type) # create multiple choice question manger mc_manager = MultiChoiceQuestionManger( subset='trainval', answer_coding=model_creator.ans_coding) # Create reader post-processing function reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = model_creator(config, phase='evaluate') model.build() # g.finalize() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) result, rescore_data, state_rescore_data = [], [], [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): if itr > 50000: # cache at most 50k questions break outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs mc_ans, mc_coding = mc_manager.get_candidate_answer_and_word_coding( quest_id) inputs = reader_post_proc_fn(outputs, mc_coding) perplexity, state = sess.run( [model.likelihood, model.final_decoder_state], feed_dict=model.fill_feed_dict(inputs)) perplexity = perplexity.reshape(inputs[-1].shape) loss = perplexity[:, :-1].mean(axis=1) # generated = [generated[0]] # sample 3 question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) top1_mc_ans = mc_ans[loss.argmin()] result.append({u'answer': top1_mc_ans, u'question_id': quest_id}) # add hidden state saver label = mc_manager.get_binary_label(quest_id) state_sv = {'quest_id': quest_id, 'states': state, 'label': label} state_rescore_data.append(state_sv) if itr % 100 == 0: print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) print('answer\t: %s' % answer) top_k_ids = loss.argsort()[:3].tolist() for i, idx in enumerate(top_k_ids): t_mc_ans = mc_ans[idx] print('VAQ answer <%d>\t: %s (%0.2f)' % (i, t_mc_ans, loss[idx])) itr += 1 # save information for train classifier mc_label = np.array([a == answer for a in mc_ans], dtype=np.float32) quest_target = inputs[-2] datum = { 'quest_seq': quest_target, 'perplex': perplexity, 'label': mc_label, 'quest_id': quest_id } rescore_data.append(datum) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = FLAGS.result_file % get_model_iteration(checkpoint_path) json.dump(result, open(res_file, 'w')) tf.logging.info('Saving rescore data...') from util import pickle # pickle('data/rescore_dev.pkl', rescore_data) pickle('data/rescore_state_dev.pkl', state_rescore_data) tf.logging.info('Done!') return res_file, quest_ids
def send(players, recipient='all'): pub_socket.send(recipient + ':' + util.pickle(players))
def process_for_reconstruction(img_list, std_file, out_dir): #subFolderList = os.listdir(std_file) meta = {} meanImg = readAndResize('/database/test.jpg',SIZE, CHANNELS) meanImgstd = readAndResize('/database/test.jpg', SIZE_STD, CHANNELS) meta['data_mean'] = meanImg meta['data_mean_std'] = meanImgstd util.pickle(os.path.join(out_dir,"batches.meta"), meta) std_img = [fname for fname in os.listdir(std_file) if fname.endswith('.png')] #print len(subFolderList) #std_img.sort() fileList = open(img_list,'rb').readlines() random.shuffle(std_img) trainingMeta = [] testMeta = [] train_test_ratio = 0.8*len(std_img) #print train_test_ratio for i , std_name in enumerate(std_img): tmp = std_name.split('.') #length = len(std_name) label = tmp[0] num = 0 meta = [] for line in fileList: #nPos = line.index(label) # str = line.strip() # tmp = str.split('/') # length = len(tmp) # line_label = tmp[length - 2] if label in line: meta.append(line.strip()) num = num + 1 if i < train_test_ratio: trainingMeta += zip(meta,[os.path.join(std_file,std_name)] * len(meta)) else: testMeta += zip(meta,os.path.join(std_file,std_name) * len(meta)) str = "i = %d, name = %s, trainmeta = %d, testmeta = %d" %(i,std_name,len(trainingMeta), len(testMeta)) print str if num == 0: del std_img[i] # str = "i = %d, name = %s, trainmeta = %d" %(i,std_name,len(trainingMeta)) # print str util.pickle(os.path.join(out_dir,"trainingMeta.meta"), trainingMeta) util.pickle(os.path.join(out_dir,"testMeta.meta"), testMeta) #for training print "prepare for training" random.shuffle(trainingMeta) make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH) #for test print "prepare for test" random.shuffle(testMeta) make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH,8000)
image_path = out['image_path'][i] im = Image.open( image_path ) #if cls_index == 6 and index == 0: # im.show() # import pdb; pdb.set_trace() assert( im.size == ( IMAGE_SIZE, IMAGE_SIZE ) ) im_value = PIL2array( im ).astype(n.float32) m = m * (1.0 * num/(num+1)) m = m + im_value/(num+1) num += 1 if VERIFY_RESULT: sum_m += im_value #print "\r" + str(i) + "/" + str(len(provider.data_list)) stdout.write( "%4d/%%4d " % (cls_index+1) % num_class ) stdout.write( "%8d/%%8d\r" % (index+1) % len(out['index_map_train'][cls_index]) ) stdout.flush() print "\n" if VERIFY_RESULT: sum_m /= num diff = sum_m - m plot_array_image( diff, (IMAGE_SIZE, IMAGE_SIZE, 3 ) ) print n.mean( abs(diff) ) plot_array_image( m, (IMAGE_SIZE,IMAGE_SIZE,3) ); mean_info = {} mean_info['data'] = m mean_info['file'] = INPUT_FILE pickle( OUTPUT_FILE, mean_info )
def collectImgByClass(folderCls, outFolder): for index, foldername in enumerate(folderCls): dataCol = collectOneClass(foldername) ind = foldername.rfind("/") folder = foldername[ind+1:] util.pickle(outFolder +"/"+ folder, dataCol)
def make_batch(imagefolder,numclass,outfolder,ext='.bmp',sep=1, prev=None): batchname = 'data_batch_' #filenames -- [][(filename,basename,class)] filenames = getfiles(imagefolder,numclass,outfolder,ext) width,height = 32,32 # for each batches for i in range(0,sep): filenames_by_batches =[] # seperate the data into batches for j in range(0, int(numclass)): import math step = int ( math.ceil( len(filenames[j]) * 1.0 / sep)) #print step fbegin = step * i fend = step * (i + 1) if fend > len(filenames[j]) : fend = len(filenames[j]) #print fbegin,fend fs = filenames[j][fbegin:fend] filenames_by_batches += fs #random_shuffle random.shuffle(filenames_by_batches) batch_data = [] batch_filenames= [] batch_labels = [] bname = batchname + str(i + 1) #read img for f in filenames_by_batches: img = cv2.imread(f[0],1) width,height = img.shape[0],img.shape[1] #img = cv2.resize(img,(width,height)) #bgr -> gray #gray = cv2.cvtColor(img,cv.CV_BGR2GRAY) #gray = cv2.GaussianBlur(gray,(3,3),0) #dx = cv2.Sobel(gray,cv2.CV_32F,1,0,ksize = 3) #dy = cv2.Sobel(gray,cv2.CV_32F,0,1,ksize = 3) #dm = cv2.convertScaleAbs(cv2.cartToPolar(dx,dy)[0]) #cv2.imshow('a',img) if prev is not None: img = prev(img) #cv2.imshow('b',img) #print img img = np.array(img,np.single) b = img[:,:,0].reshape(width * height,order='C') g = img[:,:,1].reshape(width * height,order='C') r = img[:,:,2].reshape(width * height,order='C') #print b #print g #print r #raise #cv2.waitKey(0) #dm = dm.reshape(width * height,order='C') #im = np.array([b,g,r,dm]).reshape(width * height * 4,order='C') im = np.array([b,g,r]).reshape(width * height * 3,order='C') batch_data.append(im) batch_filenames.append(f[1]) batch_labels.append(f[2]) savefile = os.path.join(outfolder,bname) batch_data = np.transpose(np.array(batch_data,order='C')) print(len(batch_labels)) print(len(batch_data)) dict = {'batch_label':bname,'labels':batch_labels,'data':batch_data,'filenames':batch_filenames} util.pickle(savefile,dict) pass
def doGetMeanImg(reszFolder, meanImgFolder): meanImg, info = getMeanImg(reszFolder) util.pickle(meanImgFolder+"/meanImg", meanImg)
print s print 'Loaded ' + str(nval) + ' validation images in ' + str( nclasses) + ' classes' print 'Validation blacklist: ' + str(n_blacklist) print 'Validation total: ' + str(nval + n_blacklist) # save data #################################################################### out = {} out['train'] = train out['val'] = val out['classes_map'] = classes_map out['classes'] = classes out['num_data'] = ntrain + nval out['num_data_train'] = ntrain out['num_data_val'] = nval pickle(data_file, out) call(["chmod", "777", data_file]) # change mode so that can be read by others print 'Saved data to ' + data_file # save smaller versions ######################################################## for n in [10, 100]: print 'Creating smaller dataset with ' + str(n) + ' classes' train_small = [] val_small = [] classes_small = [] classes_map_small = [] ntrain = 0 nval = 0 for i in range(0, 1000, 1000 / n): train_small.append(train[i]) val_small.append(val[i])