def detect_click(class_path, class_name, snr_threshold_low=5, snr_threshold_high=20, tar_fs=192000): # tar_fs = 192000 # tar_fs = 400000 folder_list = find_click.list_files(class_path) if not folder_list: folder_list = folder_list + [class_path] for folder in folder_list: print(folder) count = 0 wav_files = find_click.list_wav_files(folder) # wav_files = shuffle_frames(wav_files) path_name = folder.split('/')[-1] dst_path = "./CNN_Det12_WK5/%(class)s/%(type)s" % { 'class': class_name, 'type': path_name } if not os.path.exists(dst_path): mkdir(dst_path) save_npy = True for file_name in wav_files: run_cnn_detection(file_name, snr_threshold_low, snr_threshold_high, save_npy, dst_path, tar_fs)
def load_data(data_path, n_class): train_data = [] test_data = [] for c in range(0, n_class): print("---------------------------------------------------------") path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} npy_files = find_click.list_files(path, '.npy') print("load data : %s, the number of files : %d" % (path, len(npy_files))) print("---------------------------------------------------------") random_index = np.random.permutation(len(npy_files)) ''' for idx in range(len(npy_files)): if idx < len(npy_files)/2: random_index[idx] = idx * 2 else: random_index[idx] = 2 * (idx - int((1+len(npy_files))/2)) + 1 ''' count = 0 clicks_train = np.empty((0, 320)) clicks_test = np.empty((0, 320)) for idx in range(len(npy_files)): index = random_index[idx] npy_file = npy_files[index] clicks = np.load(npy_file) count += clicks.shape[0] if idx < len(npy_files) / 2: clicks_train = np.vstack((clicks_train, clicks)) else: clicks_test = np.vstack((clicks_test, clicks)) # print("%s : the number of clicks : %d" % (npy_file, clicks.shape[0])) label = c label = np.array([label]) label = list(label) clicks_train = list(np.expand_dims(clicks_train, axis=0)) clicks_train = clicks_train + label clicks_test = list(np.expand_dims(clicks_test, axis=0)) clicks_test = clicks_test + label print("the number of clicks : %(n)d" % {'n': count}) train_data.append(clicks_train) test_data.append(clicks_test) return train_data, test_data
def load_data(self, data_path): self.cnn_mv_scores = [] self.cnn_mp_scores = [] self.cnn_labels = [] # self.gmm_models = [] self.gmm_scores = [] self.gmm_labels = [] self.train_data = [] self.test_data = [] for c in range(0, self.n_classes): print("---------------------------------------------------------") path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} npy_files = find_click.list_files(path, '.npy') print("load data : %s, the number of files : %d" % (path, len(npy_files))) print("---------------------------------------------------------") random_index = np.random.permutation(len(npy_files)) count = 0 clicks_train = np.empty((0, 320)) clicks_test = np.empty((0, 320)) for idx in range(len(npy_files)): index = random_index[idx] npy_file = npy_files[index] clicks = np.load(npy_file) count += clicks.shape[0] if idx < len(npy_files) * (self.fold_num - 1) / self.fold_num: clicks_train = np.vstack((clicks_train, clicks)) else: clicks_test = np.vstack((clicks_test, clicks)) label = c label = np.array([label]) label = list(label) clicks_train = list(np.expand_dims(clicks_train, axis=0)) clicks_train = clicks_train + label clicks_test = list(np.expand_dims(clicks_test, axis=0)) clicks_test = clicks_test + label print("the number of clicks : %(n)d" % {'n': count}) self.train_data.append(clicks_train) self.test_data.append(clicks_test)
def load_data(self, data_path, n_total=20000): self.train_xs = np.empty((0, self.ftu_num)) self.train_ys = np.empty((0, self.n_class)) self.test_xs = np.empty((0, self.ftu_num)) self.test_ys = np.empty((0, self.n_class)) for c in range(0, self.n_class): path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} files = find_click.list_files(path, '.txt') print("load data : %s, the number of files : %d" % (path, len(files))) label = np.zeros(self.n_class) label[c] = 1 samples = [] for file in files: cvs_data = pd.read_csv(file, sep='\t') data = cvs_data.values sample = data[:, 1] sample = self.down_sample(sample, 2) samples.append(sample) xs0, xs1 = self.split_data(samples) xs0 = self.generate_data(xs0, int(n_total * 4 / 5)) xs1 = self.generate_data(xs1, int(n_total / 5)) xs0 = np.array(xs0) xs1 = np.array(xs1) ys0 = np.tile(label, (xs0.shape[0], 1)) ys1 = np.tile(label, (xs1.shape[0], 1)) self.train_xs = np.vstack((self.train_xs, xs0)) self.train_ys = np.vstack((self.train_ys, ys0)) self.test_xs = np.vstack((self.test_xs, xs1)) self.test_ys = np.vstack((self.test_ys, ys1))
def detect_save_click(class_path, class_name, snr_threshold_low=5, snr_threshold_high=100): tar_fs = 96000 signal_len = 320 folder_list = find_click.list_files(class_path) if folder_list == []: folder_list = folder_list + [class_path] for folder in folder_list: print(folder) count = 0 wav_files = find_click.list_wav_files(folder) # wav_files = shuffle_frames(wav_files) path_name = folder.split('/')[-1] dst_path = "./TKEO_wk3_complete/%(class)s/%(type)s" % { 'class': class_name, 'type': path_name } if not os.path.exists(dst_path): mkdir(dst_path) for pathname in wav_files: print(pathname) wave_data, frameRate = find_click.read_wav_file(pathname) # wave_data = resample(wave_data, frameRate, tar_fs) # [path, wavname_ext] = os.path.split(pathname) wavname = wavname_ext.split('/')[-1] wavname = wavname.split('.')[0] fl = 5000 fwhm = 0.0004 fdr_threshold = 0.65 click_index, xn = find_click.find_click_fdr_tkeo( wave_data, frameRate, fl, fwhm, fdr_threshold, signal_len, 8) scale = (2**12 - 1) / max(xn) for i in np.arange(xn.size): xn[i] = xn[i] * scale click_arr = [] for j in range(click_index.shape[0]): index = click_index[j] # click_data = wave_data[index[0]:index[1], 0] click_data = xn[index[0]:index[1]] # 信噪比过滤 detected_clicks_energy = calcu_click_energy( click_data.reshape(1, -1)) noise_estimate1 = xn[index[0] - 256:index[0]] noise_estimate2 = xn[index[1] + 1:index[1] + 257] noise_estimate = np.hstack((noise_estimate1, noise_estimate2)) noise_energy = calcu_energy(noise_estimate) if noise_energy <= 0 or detected_clicks_energy <= 0: continue snr = 10 * math.log10(detected_clicks_energy / noise_energy) if snr < snr_threshold_low or snr > snr_threshold_high: continue click_data = resample(click_data, frameRate, tar_fs) # 前置TKEO前 click_data = cut_data(click_data, signal_len) click_data = click_data.astype(np.short) click_arr.append(click_data) # filename = "%(path)s/%(pre)s_click_%(n)06d.wav" % {'path': dst_path, 'pre': wavname, 'n': count} # f = wave.open(filename, "wb") # # set wav params # f.setnchannels(1) # f.setsampwidth(2) # f.setframerate(tar_fs) # # turn the data to string # f.writeframes(click_data.tostring()) # f.close() count = count + 1 dst = "%(path)s/%(pre)s_N%(num)d.npy" \ % {'path': dst_path, 'pre': wavname, 'num': len(click_arr)} print(dst) np.save(dst, np.array(click_arr, dtype=np.short)) # if count > 20000: # break print("count = %(count)d" % {'count': count})
def load_npy_data(batch_num=20, n_total=500): dict = {'0': '', '1': '', '2': ''} # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon" # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner" # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt" dict[ "0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Melon" dict[ "1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Spinner" dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Tt" n_class = len(dict) # train_xs = np.empty((0, 96)) train_ys = np.empty((0, n_class)) # test_xs = np.empty((0, 96)) test_ys = np.empty((0, n_class)) train_xs = [] # train_ys = [] test_xs = [] # test_ys = [] for key in dict: path = dict[key] print(path) c = int(key) # npy_files = find_click.list_npy_files(path) file_list = find_click.list_files(path) random_index = np.random.permutation(len(file_list)) test_set = file_list[random_index[0]] train_set = [file_list[i] for i in random_index[1:]] label = np.zeros(n_class) label[c] = 1 # training set xs = np.empty((0, 320)) count = 0 print('training set loading.......') for folder in train_set: # print('loading %s' % folder[-6:]) npy_list = find_click.list_npy_files(folder) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # test set txs = np.empty((0, 320)) count = 0 print('test set loading.......') print('loading %s' % test_set[-6:]) npy_list = find_click.list_npy_files(test_set) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue txs = np.vstack((txs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # xs0, xs1 = split_data(xs) # print('crop and split clicks...') # temp_train_xs = random_crop_filter_click(xs, batch_num, n_total, key) # temp_test_xs = random_crop_filter_click(txs, batch_num, n_total=0, key=key) print('training set crop...') temp_train_xs = random_crop(xs, batch_num, n_total, key) print('testing set crop...') temp_test_xs = random_crop(txs, batch_num, n_total=0, key=key) temp_train_ys = np.tile(label, (len(temp_train_xs), 1)) temp_test_ys = np.tile(label, (len(temp_test_xs), 1)) train_xs += temp_train_xs train_ys = np.vstack((train_ys, temp_train_ys)) test_xs += temp_test_xs test_ys = np.vstack((test_ys, temp_test_ys)) train_xs = np.array(train_xs) test_xs = np.array(test_xs) # xs0, xs1 = split_data(xs) # print('crop training clicks...') # temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key) # print('crop testing clicks...') # temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key) # print('crop training clicks...') # temp_train_xs = random_crop_average_click(xs0, batch_num, int(n_total * 4 / 5), key) # print('crop testing clicks...') # temp_test_xs = random_crop_average_click(xs1, batch_num, int(n_total / 5), key) # temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1)) # temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1)) # # train_xs = np.vstack((train_xs, temp_train_xs)) # train_ys = np.vstack((train_ys, temp_train_ys)) # test_xs = np.vstack((test_xs, temp_test_xs)) # test_ys = np.vstack((test_ys, temp_test_ys)) return train_xs, train_ys, test_xs, test_ys
# dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet18/Tt" dict["0"] = "/home/fish/ROBB/CNN_click/click/Xiamen/bottlenose" dict["1"] = "/home/fish/ROBB/CNN_click/click/Xiamen/chinesewhite" dict["2"] = "/home/fish/ROBB/CNN_click/click/Xiamen/Neomeris" root_save_path = "/home/fish/ROBB/CNN_click/click/Xiamen_filtered" if not os.path.exists(root_save_path): os.makedirs(root_save_path) for key in dict: count = 0 print(dict[key]) path = dict[key] specie_name = path.split('/')[-1] file_list = find_click.list_files(path) save_specie_path = os.path.join(root_save_path, specie_name) for date_path in file_list: date = date_path.split('/')[-1] save_path = os.path.join(save_specie_path, date) if not os.path.exists(save_path): os.makedirs(save_path) npy_list = find_click.list_npy_files(date_path) for npy in npy_list: npy_data = np.load(npy) num = npy_data.shape[0] xs = np.empty((0, 320)) for index in range(num): temp_x = npy_data[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)]
def test_cnn_batch_data(data_path, n_class, input_dm, batch_num=20): tf.reset_default_graph() x = tf.placeholder("float", [None, input_dm]) # 输入 x_image = tf.reshape(x, [-1, 1, input_dm, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * int(input_dm / 4) * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * int(input_dm / 4) * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net.ckpt") # 加载训练好的网络参数 for c in [3, 6, 7]: # range(0, n_class): path = "%(path)s/%(class)d" % {'path': data_path, 'class': c} npy_files = find_click.list_files(path, '.npy') print("load data : %s, the number of files : %d" % (path, len(npy_files))) for path_name in npy_files: print(path_name) clicks = np.load(path_name) print("the number of clicks : %d" % (clicks.shape[0])) click_batch = [] num = clicks.shape[0] run_num = int(num / batch_num) if num % batch_num != 0: run_num += 1 for i in range(0, run_num): tmp_xs = np.empty((0, input_dm)) for j in range(batch_num * i, batch_num * (i + 1)): index = j % num temp_x = clicks[index] energy = np.sqrt(np.sum(temp_x**2)) temp_x /= energy margin = int((len(temp_x) - input_dm) / 2) beg_idx = np.random.randint(0, margin) crop_x = temp_x[beg_idx:(beg_idx + input_dm)] crop_x = np.reshape(crop_x, [1, input_dm]) tmp_xs = np.vstack((tmp_xs, crop_x)) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label click_batch.append(sample) count = 0 out_labels = [0] * n_class for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, input_dm]) out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) max_idx = np.argmax(out_y, 1) label[max_idx] += 1 ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 out_labels[np.argmax(label)] += 1 if len(click_batch) == 0: continue print('cnn test accuracy (majority voting): ', round(count / len(click_batch), 3)) print(out_labels) count = 0 out_labels = [0] * n_class weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, input_dm]) out = sess.run(weight, feed_dict={ x: txs, keep_prob: 1.0 }) out = np.reshape(out, label.shape) label = label + out ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 out_labels[np.argmax(label)] += 1 print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3)) print(out_labels)
def test_cnn_bottlenose_data(data_path, n_class=8, batch_num=20): click_batch = [] list_files = find_click.list_files(data_path) if list_files == []: list_files = list_files + [data_path] c = 3 # the label of bottlenose is 3 for path in list_files: # if path != './TestData/Dc/Dc': # continue wav_files = find_click.list_wav_files(path) print("load data : %s, the number of files : %d" % (path, len(wav_files))) # 为避免训练网络用的Click用于测试, 类似于训练时区分训练和测试样本 # 利用全部样本后1/5的Click生成测试样本 xs = np.empty((0, 320)) count = 0 for pathname in wav_files: count += 1 wave_data, frame_rate = find_click.read_wav_file(pathname) energy = np.sqrt(np.sum(wave_data ** 2)) wave_data /= energy wave_data = np.reshape(wave_data, [-1]) xs = np.vstack((xs, wave_data)) sample_num = xs.shape[0] total_batch = int(sample_num / batch_num) print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': path, 'd': total_batch}) for i in range(0, total_batch): tmp_xs = np.empty((0, 192)) for j in range(batch_num * i, batch_num * (i + 1)): index = j % sample_num temp_x = xs[index] beg_idx = np.random.randint(64, (64 + 32)) crop_x = temp_x[beg_idx:(beg_idx + 192)] crop_x = np.reshape(crop_x, [1, 192]) tmp_xs = np.vstack((tmp_xs, crop_x)) label = [0] * n_class label[c] = 1 label = np.array([[label]]) label = list(label) tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0) tmp_xs = list(tmp_xs) sample = tmp_xs + label click_batch.append(sample) x = tf.placeholder("float", [None, 192]) # 输入 x_image = tf.reshape(x, [-1, 1, 192, 1]) # 第一个卷积层 W_conv1 = weight_variable([1, 5, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_1x2(h_conv1) # 第二个卷积层 W_conv2 = weight_variable([1, 5, 32, 32]) b_conv2 = bias_variable([32]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_1x2(h_conv2) # 密集链接层 W_fc1 = weight_variable([1 * 48 * 32, 256]) b_fc1 = bias_variable([256]) h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 48 * 32]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # Dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob) # 输出层 W_fc2 = weight_variable([256, n_class]) b_fc2 = bias_variable([n_class]) y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, "params/cnn_net_lwy.ckpt") # 加载训练好的网络参数 print('the number of batch:', len(click_batch)) count = 0 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) pre_y = np.argmax(out_y, 1) label[pre_y] += 1 ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (majority voting): ', round(count / len(click_batch), 3)) count = 0 weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out = sess.run(weight, feed_dict={x: txs, keep_prob: 1.0}) out = np.reshape(out, label.shape) label = label + out ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3)) count = 0 for i in range(len(click_batch)): temp_xs = click_batch[i][0] label = np.zeros(n_class) for j in range(0, temp_xs.shape[1]): txs = temp_xs[0, j, :] txs = np.reshape(txs, [1, 192]) out = sess.run(y, feed_dict={x: txs, keep_prob: 1.0}) out = np.reshape(out, label.shape) label = label + out ref_y = click_batch[i][1] if np.equal(np.argmax(label), np.argmax(ref_y)): count += 1 print('cnn test accuracy (sum of softmax voting): ', round(count / len(click_batch), 3))
def load_data(batch_num=20): dict = {'0': '', '1': '', '2': ''} # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon" # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner" # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt" dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Melon" dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Spinner" dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Tt" n_class = len(dict) test_ys = np.empty((0, n_class)) test_xs = [] # gmm_dict = {'0': None, '1': None, '2': None} train_dict = {'0': None, '1': None, '2': None} for key in dict: path = dict[key] print(path) c = int(key) # npy_files = find_click.list_npy_files(path) file_list = find_click.list_files(path) random_index = np.random.permutation(len(file_list)) test_set = file_list[random_index[0]] train_set = [file_list[i] for i in random_index[1:]] label = np.zeros(n_class) label[c] = 1 # training set xs = np.empty((0, 320)) count = 0 print('training set loading.......') for folder in train_set: # print('loading %s' % folder[-6:]) npy_list = find_click.list_npy_files(folder) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue xs = np.vstack((xs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) # test set txs = np.empty((0, 320)) count = 0 print('test set loading.......') print('loading %s' % test_set[-6:]) npy_list = find_click.list_npy_files(test_set) for npy in npy_list: # print('loading %s' % npy) npy_data = np.load(npy) if npy_data.shape[0] == 0: continue txs = np.vstack((txs, npy_data)) count += npy_data.shape[0] print('loaded clicks:', count) print('crop training clicks...') train_xs = random_crop(xs, batch_num, n_total=0) train_xs = np.array(train_xs) print('crop testing clicks...') temp_test_xs = random_crop(txs, batch_num, n_total=0) train_dict[key] = train_xs # gmm = GMM(n_components=16).fit(train_xs) # # gmm_dict[key] = gmm temp_test_ys = np.tile(label, (len(temp_test_xs), 1)) test_xs += temp_test_xs test_ys = np.vstack((test_ys, temp_test_ys)) test_xs = np.array(test_xs) return train_dict, test_xs, test_ys