#print "after resize and gray:",type(img),img.shape,img.dtype #show the gray img #cv2.imshow("w2",img) #cv2.waitKey(0) #reshape (h,w) to (h*w,) img=img.reshape(w*h) feature= [] feature.append(img_label(img_name)) for f_v in img: feature.append(f_v) features_list.append(feature) print len(features_list),len(features_list[0]),len(features_list[-1]) train_index_list = random.sample(range(len(features_list)), len(features_list)/2 ) train_features_list = [] for i in train_index_list: train_features_list.append(features_list[i]) valid_features_list = [] for i in range(len(features_list)): if i in train_index_list: continue valid_features_list.append(features_list[i]) print len(train_features_list) print len(valid_features_list) # write / cover content to file tdtf.append_content_to_csv(train_features_list,train_feature_filename) tdtf.append_content_to_csv(valid_features_list,valid_feature_filename)
#show the gray img #cv2.imshow("w2",img) #cv2.waitKey(0) #reshape (h,w) to (h*w,) img=img.reshape(w*h) feature= [] feature.append(img_label(img_name)) for f_v in img: feature.append(f_v) features_list.append(feature) print len(features_list),len(features_list[0]),len(features_list[-1]) ''' train_index_list = random.sample(range(len(features_list)), len(features_list)/2 ) train_features_list = [] for i in train_index_list: train_features_list.append(features_list[i]) valid_features_list = [] for i in range(len(features_list)): if i in train_index_list: continue valid_features_list.append(features_list[i]) print len(train_features_list) print len(valid_features_list) # write / cover content to file ''' tdtf.append_content_to_csv(features_list, feature_filename) #tdtf.wr_content_to_csv(valid_features_list,valid_feature_filename)
["dog", "3"], ["horse", "4"], ["airplane", "5"], ["cat", "6"], ["truck", "7"], ["deer", "8"], ["ship", "9"], ] # mod_labels = tdtf.read_s_feature_from_csv(filname=mod_label_filename, limit=None, header_n=1) period = 1000 t_len = 300001 # len(t_feature_list) print t_len for i in range(0, 301): start_index = i * period end_index = min((i + 1) * period, t_len) if start_index >= end_index: break f_feature_list = tdtf.read_s_feature_from_csv(filname=from_feature_filename, limit=period, header_n=start_index + 0) t_feature_list = [] for feature in f_feature_list: for pair in mod_labels: if feature[1] == str(pair[1]): feature[1] = pair[0] break t_feature_list.append(feature) print "len", len(t_feature_list), len(f_feature_list), start_index tdtf.append_content_to_csv(t_feature_list, to_feature_filename)
#show the gray img #cv2.imshow("w2",img) #cv2.waitKey(0) #reshape (h,w) to (h*w,) img = img.reshape(w * h) feature = [] feature.append(img_label(img_name)) for f_v in img: feature.append(f_v) features_list.append(feature) print len(features_list), len(features_list[0]), len(features_list[-1]) train_index_list = random.sample(range(len(features_list)), len(features_list) / 2) train_features_list = [] for i in train_index_list: train_features_list.append(features_list[i]) valid_features_list = [] for i in range(len(features_list)): if i in train_index_list: continue valid_features_list.append(features_list[i]) print len(train_features_list) print len(valid_features_list) # write / cover content to file tdtf.append_content_to_csv(train_features_list, train_feature_filename) tdtf.append_content_to_csv(valid_features_list, valid_feature_filename)
from_feature_filename = DataHome + "CIFAR_lenet_0.15_w41_ep100.csv" mod_labels = [[0, 0], [1, 0], [2, 1]] #awk -F ',' '{a[$2]++}END{for(i in a)if(i!="label")print ",[\""i"\",\""b++"\"]"}' ~/Documents/data/Kaggle/CIFAR-10/trainLabels.csv mod_labels = [["automobile", "0"], ["frog", "1"], ["bird", "2"], ["dog", "3"], ["horse", "4"], ["airplane", "5"], ["cat", "6"], ["truck", "7"], ["deer", "8"], ["ship", "9"]] #mod_labels = tdtf.read_s_feature_from_csv(filname=mod_label_filename, limit=None, header_n=1) period = 1000 t_len = 300001 #len(t_feature_list) print t_len for i in range(0, 301): start_index = i * period end_index = min((i + 1) * period, t_len) if start_index >= end_index: break f_feature_list = tdtf.read_s_feature_from_csv( filname=from_feature_filename, limit=period, header_n=start_index + 0) t_feature_list = [] for feature in f_feature_list: for pair in mod_labels: if feature[1] == str(pair[1]): feature[1] = pair[0] break t_feature_list.append(feature) print "len", len(t_feature_list), len(f_feature_list), start_index tdtf.append_content_to_csv(t_feature_list, to_feature_filename)