コード例 #1
0
    #print "after resize and gray:",type(img),img.shape,img.dtype

    #show the gray img
    #cv2.imshow("w2",img)
    #cv2.waitKey(0)

    #reshape (h,w) to (h*w,) 
    img=img.reshape(w*h) 
    feature= []
    feature.append(img_label(img_name))
    for f_v in img:
        feature.append(f_v)
    features_list.append(feature)

print len(features_list),len(features_list[0]),len(features_list[-1])
train_index_list = random.sample(range(len(features_list)), len(features_list)/2 )
train_features_list = []
for i in train_index_list:
    train_features_list.append(features_list[i])
valid_features_list = []
for i in range(len(features_list)):
    if i in train_index_list:
        continue
    valid_features_list.append(features_list[i])

print len(train_features_list)
print len(valid_features_list)
# write / cover content to file
tdtf.append_content_to_csv(train_features_list,train_feature_filename)
tdtf.append_content_to_csv(valid_features_list,valid_feature_filename)
コード例 #2
0
    #show the gray img
    #cv2.imshow("w2",img)
    #cv2.waitKey(0)

    #reshape (h,w) to (h*w,) 
    img=img.reshape(w*h) 
    feature= []
    feature.append(img_label(img_name))
    for f_v in img:
        feature.append(f_v)
    features_list.append(feature)

print len(features_list),len(features_list[0]),len(features_list[-1])
'''
train_index_list = random.sample(range(len(features_list)), len(features_list)/2 )
train_features_list = []
for i in train_index_list:
    train_features_list.append(features_list[i])
valid_features_list = []
for i in range(len(features_list)):
    if i in train_index_list:
        continue
    valid_features_list.append(features_list[i])

print len(train_features_list)
print len(valid_features_list)
# write / cover content to file
'''
tdtf.append_content_to_csv(features_list, feature_filename)
#tdtf.wr_content_to_csv(valid_features_list,valid_feature_filename)
コード例 #3
0
ファイル: modify_label_ofcsv.py プロジェクト: hphp/Kaggle
    ["dog", "3"],
    ["horse", "4"],
    ["airplane", "5"],
    ["cat", "6"],
    ["truck", "7"],
    ["deer", "8"],
    ["ship", "9"],
]
# mod_labels = tdtf.read_s_feature_from_csv(filname=mod_label_filename, limit=None, header_n=1)

period = 1000
t_len = 300001  # len(t_feature_list)
print t_len
for i in range(0, 301):
    start_index = i * period
    end_index = min((i + 1) * period, t_len)
    if start_index >= end_index:
        break

    f_feature_list = tdtf.read_s_feature_from_csv(filname=from_feature_filename, limit=period, header_n=start_index + 0)
    t_feature_list = []
    for feature in f_feature_list:
        for pair in mod_labels:
            if feature[1] == str(pair[1]):
                feature[1] = pair[0]
                break
        t_feature_list.append(feature)

    print "len", len(t_feature_list), len(f_feature_list), start_index
    tdtf.append_content_to_csv(t_feature_list, to_feature_filename)
コード例 #4
0
    #show the gray img
    #cv2.imshow("w2",img)
    #cv2.waitKey(0)

    #reshape (h,w) to (h*w,)
    img = img.reshape(w * h)
    feature = []
    feature.append(img_label(img_name))
    for f_v in img:
        feature.append(f_v)
    features_list.append(feature)

print len(features_list), len(features_list[0]), len(features_list[-1])
train_index_list = random.sample(range(len(features_list)),
                                 len(features_list) / 2)
train_features_list = []
for i in train_index_list:
    train_features_list.append(features_list[i])
valid_features_list = []
for i in range(len(features_list)):
    if i in train_index_list:
        continue
    valid_features_list.append(features_list[i])

print len(train_features_list)
print len(valid_features_list)
# write / cover content to file
tdtf.append_content_to_csv(train_features_list, train_feature_filename)
tdtf.append_content_to_csv(valid_features_list, valid_feature_filename)
コード例 #5
0
ファイル: modify_label_ofcsv.py プロジェクト: hphp/Kaggle
from_feature_filename = DataHome + "CIFAR_lenet_0.15_w41_ep100.csv"

mod_labels = [[0, 0], [1, 0], [2, 1]]
#awk -F ',' '{a[$2]++}END{for(i in a)if(i!="label")print ",[\""i"\",\""b++"\"]"}' ~/Documents/data/Kaggle/CIFAR-10/trainLabels.csv
mod_labels = [["automobile", "0"], ["frog", "1"], ["bird", "2"], ["dog", "3"],
              ["horse", "4"], ["airplane", "5"], ["cat", "6"], ["truck", "7"],
              ["deer", "8"], ["ship", "9"]]
#mod_labels = tdtf.read_s_feature_from_csv(filname=mod_label_filename, limit=None, header_n=1)

period = 1000
t_len = 300001  #len(t_feature_list)
print t_len
for i in range(0, 301):
    start_index = i * period
    end_index = min((i + 1) * period, t_len)
    if start_index >= end_index:
        break

    f_feature_list = tdtf.read_s_feature_from_csv(
        filname=from_feature_filename, limit=period, header_n=start_index + 0)
    t_feature_list = []
    for feature in f_feature_list:
        for pair in mod_labels:
            if feature[1] == str(pair[1]):
                feature[1] = pair[0]
                break
        t_feature_list.append(feature)

    print "len", len(t_feature_list), len(f_feature_list), start_index
    tdtf.append_content_to_csv(t_feature_list, to_feature_filename)