def load_data(dict): data_dict = {} for key in dict: data_dict[key] = None for key in dict: path = dict[key] print(path) npy_files = find_click.list_npy_files(path) xs = np.empty((0, 320)) count = 0 for npy in npy_files: npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) data_dict[key] = xs return data_dict
def load_data(dict): n_class = len(dict) # train_dict = {'0': None, '1': None, '2': None} # test_dict = {'0': None, '1': None, '2': None} train_dict = {} test_dict = {} for key in dict: train_dict[key] = None test_dict[key] = None for key in dict: path = dict[key] # print(path) c = int(key) ### split by date # file_list = find_click.list_files(path) # random_index = np.random.permutation(len(file_list)) # test_set = file_list[random_index[0]] # train_set = [file_list[i] for i in random_index[1:]] # # label = np.zeros(n_class) # # label[c] = 1 # # # training set # xs = np.empty((0, 320)) # count = 0 # print('training set loading.......') # for folder in train_set: # # print('loading %s' % folder[-6:]) # npy_list = find_click.list_npy_files(folder) # for npy in npy_list: # # print('loading %s' % npy) # npy_data = np.load(npy) # if npy_data.shape[0] == 0: # continue # xs = np.vstack((xs, npy_data)) # count += npy_data.shape[0] # print('loaded clicks:', count) # # # test set # txs = np.empty((0, 320)) # count = 0 # print('test set loading.......') # print('loading %s' % test_set[-6:]) # npy_list = find_click.list_npy_files(test_set) # for npy in npy_list: # # print('loading %s' % npy) # npy_data = np.load(npy) # if npy_data.shape[0] == 0: # continue # txs = np.vstack((txs, npy_data)) # count += npy_data.shape[0] # print('loaded clicks:', count) ### split by file npy_files = find_click.list_npy_files(path) npy_num = len(npy_files) random_index = np.random.permutation(npy_num) split_point = int(npy_num/4) test_set = [npy_files[i] for i in random_index[:split_point]] train_set = [npy_files[i] for i in random_index[split_point:]] # training set xs = np.empty((0, 320)) count = 0 print('training set loading.......') for npy in train_set: npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # testing set txs = np.empty((0, 320)) count = 0 print('testing set loading.......') for npy in test_set: print(npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue txs = np.vstack((txs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) train_dict[key] = xs test_dict[key] = txs return train_dict, test_dict
def test_cnn_batch_learn(data_path, label=3, n_class=8, batch_num=20): c = label npy_files = find_click.list_npy_files(data_path) random_index = np.random.permutation(len(npy_files)) label = np.zeros(n_class) label[c] = 1 # xs = np.empty((0, 256)) count = 0 # tf.reset_default_graph() x = tf.placeholder("float", [None, 96]) # 输入 x_image = tf.reshape(x, [-1, 1, 96, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * 24 * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 24 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) temp_y = tf.reshape(y, [-1, batch_num, n_class]) temp_y_t = tf.transpose(temp_y, perm=[0, 2, 1]) multi_y_t = tf.reshape(temp_y_t, [-1, batch_num]) multi_y = tf.transpose(multi_y_t, perm=[1, 0]) # 融合层 # # 单模式融合 # W_fuse = weight_variable([1, batch_num]) # b_fuse = bias_variable([n_class]) # fuse_out = tf.nn.softmax(tf.reshape(tf.matmul(W_fuse, multi_y), [-1, n_class]) + b_fuse) # 多模式融合 fuse_mode = 9 W_fuse = weight_variable([fuse_mode, batch_num]) # b_fuse = bias_variable([fuse_mode, n_class]) multi_fuse_out = tf.nn.relu(tf.matmul(W_fuse, multi_y)) # + b_fuse) mode_weight = weight_variable([1, fuse_mode]) mode_b = bias_variable([n_class]) fuse_out = tf.nn.softmax( tf.reshape(tf.matmul(mode_weight, multi_fuse_out), [-1, n_class]) + mode_b) init = tf.global_variables_initializer() saver = tf.train.Saver() print('==============================================') total_correct = 0 total = 0 with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net_lwy.ckpt") # 加载训练好的网络参数 for i in range(len(npy_files)): npy = npy_files[random_index[i]] print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.empty((0, 320)) xs = np.vstack((xs, npy_data)) # xs = npy_data count = npy_data.shape[0] print('loaded clicks:', count) # if count >= batch_num * n_total: # break click_batch = [] sample_num = xs.shape[0] total_batch = int(sample_num / batch_num) # print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': data_path, 'd': total_batch}) for i in range(0, total_batch): tmp_xs = np.empty((0, 96)) # for j in range(batch_num * i, batch_num * (i + 1)): j = batch_num * i if j > xs.shape[0]: break while j >= (batch_num * i) and j < (batch_num * (i + 1)): if xs.shape[0] == 0: break index = j % xs.shape[0] temp_x = xs[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) crop_x = np.fft.fft(crop_x) crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2) crop_x = crop_x[0, :96] crop_x = np.reshape(crop_x, [1, 96]) if c >= 0: # peak值位于20k以下,70k以上的滤去 peak_index = np.argmax(crop_x) if peak_index < 20 or peak_index > 70: xs = np.delete(xs, index, 0) continue crop_x = energy_normalize(crop_x) tmp_xs = np.vstack((tmp_xs, crop_x)) j += 1 # click_batch.append(tmp_xs) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label click_batch.append(sample) print('the number of batch:', len(click_batch)) if len(click_batch) == 0: continue total += len(click_batch) count = 0 majority_mat = [0] * n_class for i in range(len(click_batch)): temp_xs = click_batch[i][0][0] label = np.zeros(n_class) out_y = sess.run(fuse_out, feed_dict={ x: temp_xs, keep_prob: 1.0 }) pre_y = np.argmax(out_y, 1) ref_y = click_batch[i][1] if np.equal(pre_y, np.argmax(ref_y)): count += 1 total_correct += count print('correct:', count, 'total:', len(click_batch)) print('cnn test accuracy (batch learn): ', round(count / len(click_batch), 3)) # print('result:', majority_mat) print('total correct: %d, total: %d, batch learn acc: %f' % (total_correct, total, total_correct / total))
def test_cnn_data(data_path, label=3, n_class=8, batch_num=20): c = label npy_files = find_click.list_npy_files(data_path) random_index = np.random.permutation(len(npy_files)) label = np.zeros(n_class) label[c] = 1 # xs = np.empty((0, 256)) count = 0 # tf.reset_default_graph() x = tf.placeholder("float", [None, 96]) # 输入 x_image = tf.reshape(x, [-1, 1, 96, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * 24 * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 24 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) init = tf.global_variables_initializer() saver = tf.train.Saver() print('==============================================') total_correct = 0 total = 0 with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net_lwy.ckpt") # 加载训练好的网络参数 for i in range(len(npy_files)): npy = npy_files[random_index[i]] print('loading %s' % npy) npy_data = np.load(npy) # x = np.arange(0, 320) # plt.plot(x, npy_data[0]) # plt.show() if npy_data.shape[0] == 0: continue # npy_data = np.divide(npy_data, 2 ** 10) # energy = np.sqrt(np.sum(npy_data ** 2, 1)) # energy = np.tile(energy, (npy_data.shape[1], 1)) # energy = energy.transpose() # npy_data = np.divide(npy_data, energy) # plt.plot(x, npy_data[0]) # plt.show() xs = np.empty((0, 320)) xs = np.vstack((xs, npy_data)) # xs = npy_data count = npy_data.shape[0] print('loaded clicks:', count) # if count >= batch_num * n_total: # break click_batch = [] sample_num = xs.shape[0] total_batch = int(sample_num / batch_num) # print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': data_path, 'd': total_batch}) for i in range(0, total_batch): tmp_xs = np.empty((0, 96)) # for j in range(batch_num * i, batch_num * (i + 1)): j = batch_num * i if j > xs.shape[0]: break while j >= (batch_num * i) and j < (batch_num * (i + 1)): if xs.shape[0] == 0: break index = j % xs.shape[0] temp_x = xs[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) crop_x = np.fft.fft(crop_x) crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2) crop_x = crop_x[0, :96] crop_x = np.reshape(crop_x, [1, 96]) if c >= 0: # peak值位于20k以下,70k以上的滤去 peak_index = np.argmax(crop_x) if peak_index < 20 or peak_index > 70: xs = np.delete(xs, index, 0) continue crop_x = energy_normalize(crop_x) tmp_xs = np.vstack((tmp_xs, crop_x)) j += 1 label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label click_batch.append(sample) print('the number of batch:', len(click_batch)) if len(click_batch) == 0: continue total += len(click_batch) count = 0 majority_mat = [0] * n_class for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 96]) out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) pre_y = np.argmax(out_y, 1) label[pre_y] += 1 ref_y = click_batch[i][1] predict = np.argmax(label) majority_mat[int(predict)] += 1 if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 total_correct += count print('correct:', count, 'total:', len(click_batch)) print('cnn test accuracy (majority voting): ', round(count / len(click_batch), 3)) print('result:', majority_mat) # count = 0 # weight_vote_mat = [0] * n_class # weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 # for i in range(len(click_batch)): # temp_xs = click_batch[i][0] # label = np.zeros(n_class) # for j in range(0, temp_xs.shape[1]): # txs = temp_xs[0, j, :] # txs = np.reshape(txs, [1, 192]) # out = sess.run(weight, feed_dict={x: txs, keep_prob: 1.0}) # out = np.reshape(out, label.shape) # label = label + out # # ref_y = click_batch[i][1] # predict = np.argmax(label) # weight_vote_mat[int(predict)] += 1 # if np.equal(np.argmax(label), np.argmax(ref_y)): # count += 1 # # print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3)) # print('result:', weight_vote_mat) # # count = 0 # softmax_mat = [0] * n_class # for i in range(len(click_batch)): # temp_xs = click_batch[i][0] # label = np.zeros(n_class) # for j in range(0, temp_xs.shape[1]): # txs = temp_xs[0, j, :] # txs = np.reshape(txs, [1, 192]) # out = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) # out = np.reshape(out, label.shape) # label = label + out # # ref_y = click_batch[i][1] # predict = np.argmax(label) # softmax_mat[int(predict)] += 1 # if np.equal(np.argmax(label), np.argmax(ref_y)): # count += 1 # # print('cnn test accuracy (sum of softmax voting): ', round(count / len(click_batch), 3)) # print('result:', softmax_mat) print('total correct:', total_correct, 'total batch:', total) print('%s mean acc: %f' % (data_path, total_correct / total))
def load_npy_data(batch_num=20, n_total=500): dict = {'0': '', '1': '', '2': ''} # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon" # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner" # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt" dict[ "0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Melon" dict[ "1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Spinner" dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Tt" n_class = len(dict) # train_xs = np.empty((0, 96)) train_ys = np.empty((0, n_class)) # test_xs = np.empty((0, 96)) test_ys = np.empty((0, n_class)) train_xs = [] # train_ys = [] test_xs = [] # test_ys = [] for key in dict: path = dict[key] print(path) c = int(key) # npy_files = find_click.list_npy_files(path) file_list = find_click.list_files(path) random_index = np.random.permutation(len(file_list)) test_set = file_list[random_index[0]] train_set = [file_list[i] for i in random_index[1:]] label = np.zeros(n_class) label[c] = 1 # training set xs = np.empty((0, 320)) count = 0 print('training set loading.......') for folder in train_set: # print('loading %s' % folder[-6:]) npy_list = find_click.list_npy_files(folder) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # test set txs = np.empty((0, 320)) count = 0 print('test set loading.......') print('loading %s' % test_set[-6:]) npy_list = find_click.list_npy_files(test_set) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue txs = np.vstack((txs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # xs0, xs1 = split_data(xs) # print('crop and split clicks...') # temp_train_xs = random_crop_filter_click(xs, batch_num, n_total, key) # temp_test_xs = random_crop_filter_click(txs, batch_num, n_total=0, key=key) print('training set crop...') temp_train_xs = random_crop(xs, batch_num, n_total, key) print('testing set crop...') temp_test_xs = random_crop(txs, batch_num, n_total=0, key=key) temp_train_ys = np.tile(label, (len(temp_train_xs), 1)) temp_test_ys = np.tile(label, (len(temp_test_xs), 1)) train_xs += temp_train_xs train_ys = np.vstack((train_ys, temp_train_ys)) test_xs += temp_test_xs test_ys = np.vstack((test_ys, temp_test_ys)) train_xs = np.array(train_xs) test_xs = np.array(test_xs) # xs0, xs1 = split_data(xs) # print('crop training clicks...') # temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key) # print('crop testing clicks...') # temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key) # print('crop training clicks...') # temp_train_xs = random_crop_average_click(xs0, batch_num, int(n_total * 4 / 5), key) # print('crop testing clicks...') # temp_test_xs = random_crop_average_click(xs1, batch_num, int(n_total / 5), key) # temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1)) # temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1)) # # train_xs = np.vstack((train_xs, temp_train_xs)) # train_ys = np.vstack((train_ys, temp_train_ys)) # test_xs = np.vstack((test_xs, temp_test_xs)) # test_ys = np.vstack((test_ys, temp_test_ys)) return train_xs, train_ys, test_xs, test_ys
if not os.path.exists(root_save_path): os.makedirs(root_save_path) for key in dict: count = 0 print(dict[key]) path = dict[key] specie_name = path.split('/')[-1] file_list = find_click.list_files(path) save_specie_path = os.path.join(root_save_path, specie_name) for date_path in file_list: date = date_path.split('/')[-1] save_path = os.path.join(save_specie_path, date) if not os.path.exists(save_path): os.makedirs(save_path) npy_list = find_click.list_npy_files(date_path) for npy in npy_list: npy_data = np.load(npy) num = npy_data.shape[0] xs = np.empty((0, 320)) for index in range(num): temp_x = npy_data[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) crop_x = np.fft.fft(crop_x) crop_x = np.sqrt(crop_x.real**2 + crop_x.imag**2) crop_x = crop_x[0, :96] crop_x = np.reshape(crop_x, [1, 96])
def load_npy_data(batch_num=20, n_total=500): # dict = {'0': '', '1': '', '2': '', '3':'', '4':'', '5':'', '6':'', '7':''} # # dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': ''} # # dict["0"] = "/home/fish/ROBB/CNN_click/click/Data/BBW/Blainvilles_beaked_whale_(Mesoplodon_densirostris)" # dict["1"] = "/home/fish/ROBB/CNN_click/click/Data/Gm/Pilot_whale_(Globicephala_macrorhynchus)" # dict["2"] = "/home/fish/ROBB/CNN_click/click/Data/Gg/Rissos_(Grampus_grisieus)" # # dict["3"] = "/home/fish/ROBB/CNN_click/click/Data/Dc/Dc" # dict["4"] = "/home/fish/ROBB/CNN_click/click/Data/Dd/Dd" # dict["5"] = "/home/fish/ROBB/CNN_click/click/Data/Melon/palmyra2006" # dict["6"] = "/home/fish/ROBB/CNN_click/click/Data/Spinner/palmyra2006" # dict["7"] = "/home/fish/ROBB/CNN_click/click/Data/Tt/palmyra2006" # dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/BBW/Blainvilles_beaked_whale_(Mesoplodon_densirostris)" # dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Gm/Pilot_whale_(Globicephala_macrorhynchus)" # dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Gg/Rissos_(Grampus_grisieus)" # # # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Tt/palmyra2006" # # dict["4"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dc/Dc" # # dict["5"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dd/Dd" # # dict["6"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Melon/palmyra2006" # # dict["7"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Spinner/palmyra2006" # # # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Tt/palmyra2006" # dict["3"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dc/Dc" # dict["4"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Dd/Dd" # dict["5"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Melon/palmyra2006" # # dict["6"] = "/home/fish/ROBB/CNN_click/click/CNNDetection/Spinner/palmyra2006" dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': '', '6': ''} # dict = {'0': '', '1': '', '2': '', '3': '', '4': '', '5': ''} dict["0"] = "/home/fish/ROBB/CNN_click/click/ClearData/Mesoplodon/Blainvilles_beaked_whale_(Mesoplodon_densirostris)" dict["1"] = "/home/fish/ROBB/CNN_click/click/ClearData/Gg/Rissos_(Grampus_grisieus)" dict["2"] = "/home/fish/ROBB/CNN_click/click/ClearData/Gm/Pilot_whale_(Globicephala_macrorhynchus)" dict["3"] = "/home/fish/ROBB/CNN_click/click/Data/Melon/palmyra2006" dict["4"] = "/home/fish/ROBB/CNN_click/click/Data/Dd/Dd" dict["5"] = "/home/fish/ROBB/CNN_click/click/Data/Spinner/palmyra2006" # dict["0"] = "/home/fish/ROBB/CNN_click/click/Data/Dc/Dc" dict["6"] = "/home/fish/ROBB/CNN_click/click/Data/Tt/palmyra2006" n_class = len(dict) train_xs = np.empty((0, 96)) train_ys = np.empty((0, n_class)) test_xs = np.empty((0, 96)) test_ys = np.empty((0, n_class)) for key in dict: path = dict[key] c = int(key) npy_files = find_click.list_npy_files(path) random_index = np.random.permutation(len(npy_files)) label = np.zeros(n_class) label[c] = 1 xs = np.empty((0, 320)) count = 0 # for i in range(len(npy_files)): npy = npy_files[i] print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] # if count >= batch_num * n_total: # break if count >= 100000: break print('loaded clicks:', count) xs0, xs1 = split_data(xs) print('crop training clicks...') temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key) print('crop testing clicks...') temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key) temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1)) temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1)) train_xs = np.vstack((train_xs, temp_train_xs)) train_ys = np.vstack((train_ys, temp_train_ys)) test_xs = np.vstack((test_xs, temp_test_xs)) test_ys = np.vstack((test_ys, temp_test_ys)) return train_xs, train_ys, test_xs, test_ys
def load_data(batch_num=20): dict = {'0': '', '1': '', '2': ''} # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon" # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner" # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt" dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Melon" dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Spinner" dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Tt" n_class = len(dict) test_ys = np.empty((0, n_class)) test_xs = [] # gmm_dict = {'0': None, '1': None, '2': None} train_dict = {'0': None, '1': None, '2': None} for key in dict: path = dict[key] print(path) c = int(key) # npy_files = find_click.list_npy_files(path) file_list = find_click.list_files(path) random_index = np.random.permutation(len(file_list)) test_set = file_list[random_index[0]] train_set = [file_list[i] for i in random_index[1:]] label = np.zeros(n_class) label[c] = 1 # training set xs = np.empty((0, 320)) count = 0 print('training set loading.......') for folder in train_set: # print('loading %s' % folder[-6:]) npy_list = find_click.list_npy_files(folder) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # test set txs = np.empty((0, 320)) count = 0 print('test set loading.......') print('loading %s' % test_set[-6:]) npy_list = find_click.list_npy_files(test_set) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue txs = np.vstack((txs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) print('crop training clicks...') train_xs = random_crop(xs, batch_num, n_total=0) train_xs = np.array(train_xs) print('crop testing clicks...') temp_test_xs = random_crop(txs, batch_num, n_total=0) train_dict[key] = train_xs # gmm = GMM(n_components=16).fit(train_xs) # # gmm_dict[key] = gmm temp_test_ys = np.tile(label, (len(temp_test_xs), 1)) test_xs += temp_test_xs test_ys = np.vstack((test_ys, temp_test_ys)) test_xs = np.array(test_xs) return train_dict, test_xs, test_ys