Exemple #1
0
def detect_click(class_path,
                 class_name,
                 snr_threshold_low=5,
                 snr_threshold_high=20,
                 tar_fs=192000):
    # tar_fs = 192000
    # tar_fs = 400000
    folder_list = find_click.list_files(class_path)
    if not folder_list:
        folder_list = folder_list + [class_path]
    for folder in folder_list:
        print(folder)
        count = 0
        wav_files = find_click.list_wav_files(folder)

        # wav_files = shuffle_frames(wav_files)

        path_name = folder.split('/')[-1]

        dst_path = "./CNN_Det12_WK5/%(class)s/%(type)s" % {
            'class': class_name,
            'type': path_name
        }
        if not os.path.exists(dst_path):
            mkdir(dst_path)
        save_npy = True
        for file_name in wav_files:
            run_cnn_detection(file_name, snr_threshold_low, snr_threshold_high,
                              save_npy, dst_path, tar_fs)
Exemple #2
0
def load_data(data_path, n_class):

    train_data = []
    test_data = []

    for c in range(0, n_class):

        print("---------------------------------------------------------")
        path = "%(path)s/%(class)d" % {'path': data_path, 'class': c}

        npy_files = find_click.list_files(path, '.npy')
        print("load data : %s, the number of files : %d" %
              (path, len(npy_files)))
        print("---------------------------------------------------------")

        random_index = np.random.permutation(len(npy_files))
        '''
        for idx in range(len(npy_files)):
            if idx < len(npy_files)/2:
                random_index[idx] = idx * 2
            else:
                random_index[idx] = 2 * (idx - int((1+len(npy_files))/2)) + 1
        '''

        count = 0

        clicks_train = np.empty((0, 320))
        clicks_test = np.empty((0, 320))

        for idx in range(len(npy_files)):
            index = random_index[idx]
            npy_file = npy_files[index]

            clicks = np.load(npy_file)
            count += clicks.shape[0]

            if idx < len(npy_files) / 2:
                clicks_train = np.vstack((clicks_train, clicks))
            else:
                clicks_test = np.vstack((clicks_test, clicks))

            # print("%s : the number of clicks : %d" % (npy_file, clicks.shape[0]))

        label = c
        label = np.array([label])
        label = list(label)

        clicks_train = list(np.expand_dims(clicks_train, axis=0))
        clicks_train = clicks_train + label

        clicks_test = list(np.expand_dims(clicks_test, axis=0))
        clicks_test = clicks_test + label

        print("the number of clicks : %(n)d" % {'n': count})

        train_data.append(clicks_train)
        test_data.append(clicks_test)

    return train_data, test_data
Exemple #3
0
    def load_data(self, data_path):

        self.cnn_mv_scores = []
        self.cnn_mp_scores = []
        self.cnn_labels = []
        #
        self.gmm_models = []
        self.gmm_scores = []
        self.gmm_labels = []

        self.train_data = []
        self.test_data = []

        for c in range(0, self.n_classes):
            print("---------------------------------------------------------")
            path = "%(path)s/%(class)d" % {'path': data_path, 'class': c}

            npy_files = find_click.list_files(path, '.npy')
            print("load data : %s, the number of files : %d" %
                  (path, len(npy_files)))
            print("---------------------------------------------------------")

            random_index = np.random.permutation(len(npy_files))

            count = 0
            clicks_train = np.empty((0, 320))
            clicks_test = np.empty((0, 320))

            for idx in range(len(npy_files)):
                index = random_index[idx]
                npy_file = npy_files[index]

                clicks = np.load(npy_file)
                count += clicks.shape[0]

                if idx < len(npy_files) * (self.fold_num - 1) / self.fold_num:
                    clicks_train = np.vstack((clicks_train, clicks))
                else:
                    clicks_test = np.vstack((clicks_test, clicks))

            label = c
            label = np.array([label])
            label = list(label)

            clicks_train = list(np.expand_dims(clicks_train, axis=0))
            clicks_train = clicks_train + label

            clicks_test = list(np.expand_dims(clicks_test, axis=0))
            clicks_test = clicks_test + label

            print("the number of clicks : %(n)d" % {'n': count})

            self.train_data.append(clicks_train)
            self.test_data.append(clicks_test)
Exemple #4
0
    def load_data(self, data_path, n_total=20000):
        self.train_xs = np.empty((0, self.ftu_num))
        self.train_ys = np.empty((0, self.n_class))
        self.test_xs = np.empty((0, self.ftu_num))
        self.test_ys = np.empty((0, self.n_class))

        for c in range(0, self.n_class):
            path = "%(path)s/%(class)d" % {'path': data_path, 'class': c}
            files = find_click.list_files(path, '.txt')

            print("load data : %s, the number of files : %d" %
                  (path, len(files)))

            label = np.zeros(self.n_class)
            label[c] = 1

            samples = []
            for file in files:
                cvs_data = pd.read_csv(file, sep='\t')
                data = cvs_data.values
                sample = data[:, 1]
                sample = self.down_sample(sample, 2)
                samples.append(sample)

            xs0, xs1 = self.split_data(samples)

            xs0 = self.generate_data(xs0, int(n_total * 4 / 5))
            xs1 = self.generate_data(xs1, int(n_total / 5))

            xs0 = np.array(xs0)
            xs1 = np.array(xs1)

            ys0 = np.tile(label, (xs0.shape[0], 1))
            ys1 = np.tile(label, (xs1.shape[0], 1))

            self.train_xs = np.vstack((self.train_xs, xs0))
            self.train_ys = np.vstack((self.train_ys, ys0))
            self.test_xs = np.vstack((self.test_xs, xs1))
            self.test_ys = np.vstack((self.test_ys, ys1))
Exemple #5
0
def detect_save_click(class_path,
                      class_name,
                      snr_threshold_low=5,
                      snr_threshold_high=100):
    tar_fs = 96000
    signal_len = 320
    folder_list = find_click.list_files(class_path)
    if folder_list == []:
        folder_list = folder_list + [class_path]
    for folder in folder_list:
        print(folder)
        count = 0
        wav_files = find_click.list_wav_files(folder)

        # wav_files = shuffle_frames(wav_files)

        path_name = folder.split('/')[-1]

        dst_path = "./TKEO_wk3_complete/%(class)s/%(type)s" % {
            'class': class_name,
            'type': path_name
        }
        if not os.path.exists(dst_path):
            mkdir(dst_path)

        for pathname in wav_files:

            print(pathname)

            wave_data, frameRate = find_click.read_wav_file(pathname)

            # wave_data = resample(wave_data, frameRate, tar_fs)  #

            [path, wavname_ext] = os.path.split(pathname)
            wavname = wavname_ext.split('/')[-1]
            wavname = wavname.split('.')[0]

            fl = 5000
            fwhm = 0.0004
            fdr_threshold = 0.65
            click_index, xn = find_click.find_click_fdr_tkeo(
                wave_data, frameRate, fl, fwhm, fdr_threshold, signal_len, 8)

            scale = (2**12 - 1) / max(xn)
            for i in np.arange(xn.size):
                xn[i] = xn[i] * scale

            click_arr = []
            for j in range(click_index.shape[0]):
                index = click_index[j]
                # click_data = wave_data[index[0]:index[1], 0]

                click_data = xn[index[0]:index[1]]

                #  信噪比过滤
                detected_clicks_energy = calcu_click_energy(
                    click_data.reshape(1, -1))
                noise_estimate1 = xn[index[0] - 256:index[0]]
                noise_estimate2 = xn[index[1] + 1:index[1] + 257]
                noise_estimate = np.hstack((noise_estimate1, noise_estimate2))
                noise_energy = calcu_energy(noise_estimate)
                if noise_energy <= 0 or detected_clicks_energy <= 0:
                    continue
                snr = 10 * math.log10(detected_clicks_energy / noise_energy)
                if snr < snr_threshold_low or snr > snr_threshold_high:
                    continue

                click_data = resample(click_data, frameRate, tar_fs)  # 前置TKEO前

                click_data = cut_data(click_data, signal_len)

                click_data = click_data.astype(np.short)

                click_arr.append(click_data)
                # filename = "%(path)s/%(pre)s_click_%(n)06d.wav" % {'path': dst_path, 'pre': wavname, 'n': count}
                # f = wave.open(filename, "wb")
                # # set wav params
                # f.setnchannels(1)
                # f.setsampwidth(2)
                # f.setframerate(tar_fs)
                # # turn the data to string
                # f.writeframes(click_data.tostring())
                # f.close()
                count = count + 1

            dst = "%(path)s/%(pre)s_N%(num)d.npy" \
                      % {'path': dst_path, 'pre': wavname, 'num': len(click_arr)}
            print(dst)
            np.save(dst, np.array(click_arr, dtype=np.short))

            # if count > 20000:
            #     break

        print("count = %(count)d" % {'count': count})
Exemple #6
0
def load_npy_data(batch_num=20, n_total=500):

    dict = {'0': '', '1': '', '2': ''}
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt"
    dict[
        "0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Melon"
    dict[
        "1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Spinner"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete_filtered/Tt"

    n_class = len(dict)
    # train_xs = np.empty((0, 96))
    train_ys = np.empty((0, n_class))
    # test_xs = np.empty((0, 96))
    test_ys = np.empty((0, n_class))

    train_xs = []
    # train_ys = []
    test_xs = []
    # test_ys = []

    for key in dict:
        path = dict[key]

        print(path)

        c = int(key)

        # npy_files = find_click.list_npy_files(path)

        file_list = find_click.list_files(path)

        random_index = np.random.permutation(len(file_list))

        test_set = file_list[random_index[0]]

        train_set = [file_list[i] for i in random_index[1:]]

        label = np.zeros(n_class)
        label[c] = 1

        # training set
        xs = np.empty((0, 320))
        count = 0
        print('training set loading.......')
        for folder in train_set:
            # print('loading %s' % folder[-6:])
            npy_list = find_click.list_npy_files(folder)
            for npy in npy_list:
                # print('loading %s' % npy)
                npy_data = np.load(npy)
                if npy_data.shape[0] == 0:
                    continue
                xs = np.vstack((xs, npy_data))
                count += npy_data.shape[0]
        print('loaded clicks:', count)

        # test set
        txs = np.empty((0, 320))
        count = 0
        print('test set loading.......')
        print('loading %s' % test_set[-6:])
        npy_list = find_click.list_npy_files(test_set)
        for npy in npy_list:
            # print('loading %s' % npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            txs = np.vstack((txs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        # xs0, xs1 = split_data(xs)
        # print('crop and split clicks...')
        # temp_train_xs = random_crop_filter_click(xs, batch_num, n_total, key)
        # temp_test_xs  = random_crop_filter_click(txs, batch_num, n_total=0, key=key)
        print('training set crop...')
        temp_train_xs = random_crop(xs, batch_num, n_total, key)
        print('testing set crop...')
        temp_test_xs = random_crop(txs, batch_num, n_total=0, key=key)

        temp_train_ys = np.tile(label, (len(temp_train_xs), 1))
        temp_test_ys = np.tile(label, (len(temp_test_xs), 1))
        train_xs += temp_train_xs
        train_ys = np.vstack((train_ys, temp_train_ys))
        test_xs += temp_test_xs
        test_ys = np.vstack((test_ys, temp_test_ys))
    train_xs = np.array(train_xs)
    test_xs = np.array(test_xs)

    # xs0, xs1 = split_data(xs)
    # print('crop training clicks...')
    # temp_train_xs = random_crop(xs0, batch_num, int(n_total * 4 / 5), key)
    # print('crop testing clicks...')
    # temp_test_xs = random_crop(xs1, batch_num, int(n_total / 5), key)

    # print('crop training clicks...')
    # temp_train_xs = random_crop_average_click(xs0, batch_num, int(n_total * 4 / 5), key)
    # print('crop testing clicks...')
    # temp_test_xs = random_crop_average_click(xs1, batch_num, int(n_total / 5), key)

    # temp_train_ys = np.tile(label, (temp_train_xs.shape[0], 1))
    # temp_test_ys = np.tile(label, (temp_test_xs.shape[0], 1))
    #
    # train_xs = np.vstack((train_xs, temp_train_xs))
    # train_ys = np.vstack((train_ys, temp_train_ys))
    # test_xs = np.vstack((test_xs, temp_test_xs))
    # test_ys = np.vstack((test_ys, temp_test_ys))

    return train_xs, train_ys, test_xs, test_ys
Exemple #7
0
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet18/Tt"

    dict["0"] = "/home/fish/ROBB/CNN_click/click/Xiamen/bottlenose"
    dict["1"] = "/home/fish/ROBB/CNN_click/click/Xiamen/chinesewhite"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/Xiamen/Neomeris"

    root_save_path = "/home/fish/ROBB/CNN_click/click/Xiamen_filtered"
    if not os.path.exists(root_save_path):
        os.makedirs(root_save_path)

    for key in dict:
        count = 0
        print(dict[key])
        path = dict[key]
        specie_name = path.split('/')[-1]
        file_list = find_click.list_files(path)
        save_specie_path = os.path.join(root_save_path, specie_name)
        for date_path in file_list:
            date = date_path.split('/')[-1]
            save_path = os.path.join(save_specie_path, date)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            npy_list = find_click.list_npy_files(date_path)
            for npy in npy_list:
                npy_data = np.load(npy)
                num = npy_data.shape[0]
                xs = np.empty((0, 320))
                for index in range(num):
                    temp_x = npy_data[index]
                    beg_idx = np.random.randint(64, (64 + 32))
                    crop_x = temp_x[beg_idx:(beg_idx + 192)]
def test_cnn_batch_data(data_path, n_class, input_dm, batch_num=20):

    tf.reset_default_graph()

    x = tf.placeholder("float", [None, input_dm])
    # 输入
    x_image = tf.reshape(x, [-1, 1, input_dm, 1])

    # 第一个卷积层
    W_conv1 = weight_variable([1, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_1x2(h_conv1)

    # 第二个卷积层
    W_conv2 = weight_variable([1, 5, 32, 32])
    b_conv2 = bias_variable([32])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_1x2(h_conv2)

    # 密集链接层
    W_fc1 = weight_variable([1 * int(input_dm / 4) * 32, 256])
    b_fc1 = bias_variable([256])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * int(input_dm / 4) * 32])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

    # 输出层
    W_fc2 = weight_variable([256, n_class])
    b_fc2 = bias_variable([n_class])
    y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "params/cnn_net.ckpt")  # 加载训练好的网络参数

        for c in [3, 6, 7]:  # range(0, n_class):
            path = "%(path)s/%(class)d" % {'path': data_path, 'class': c}

            npy_files = find_click.list_files(path, '.npy')
            print("load data : %s, the number of files : %d" %
                  (path, len(npy_files)))

            for path_name in npy_files:
                print(path_name)
                clicks = np.load(path_name)

                print("the number of clicks : %d" % (clicks.shape[0]))

                click_batch = []

                num = clicks.shape[0]
                run_num = int(num / batch_num)
                if num % batch_num != 0:
                    run_num += 1

                for i in range(0, run_num):
                    tmp_xs = np.empty((0, input_dm))
                    for j in range(batch_num * i, batch_num * (i + 1)):
                        index = j % num
                        temp_x = clicks[index]

                        energy = np.sqrt(np.sum(temp_x**2))
                        temp_x /= energy

                        margin = int((len(temp_x) - input_dm) / 2)
                        beg_idx = np.random.randint(0, margin)
                        crop_x = temp_x[beg_idx:(beg_idx + input_dm)]
                        crop_x = np.reshape(crop_x, [1, input_dm])
                        tmp_xs = np.vstack((tmp_xs, crop_x))

                    label = [0] * n_class
                    label[c] = 1

                    label = np.array([[label]])
                    label = list(label)

                    tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0),
                                            axis=0)
                    tmp_xs = list(tmp_xs)
                    sample = tmp_xs + label
                    click_batch.append(sample)

                count = 0
                out_labels = [0] * n_class
                for i in range(len(click_batch)):
                    temp_xs = click_batch[i][0]
                    label = np.zeros(n_class)
                    for j in range(0, temp_xs.shape[1]):
                        txs = temp_xs[0, j, :]
                        txs = np.reshape(txs, [1, input_dm])
                        out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0})
                        max_idx = np.argmax(out_y, 1)
                        label[max_idx] += 1

                    ref_y = click_batch[i][1]
                    if np.equal(np.argmax(label), np.argmax(ref_y)):
                        count += 1
                    out_labels[np.argmax(label)] += 1

                if len(click_batch) == 0:
                    continue

                print('cnn test accuracy (majority voting): ',
                      round(count / len(click_batch), 3))
                print(out_labels)

                count = 0
                out_labels = [0] * n_class
                weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
                for i in range(len(click_batch)):
                    temp_xs = click_batch[i][0]
                    label = np.zeros(n_class)
                    for j in range(0, temp_xs.shape[1]):
                        txs = temp_xs[0, j, :]
                        txs = np.reshape(txs, [1, input_dm])
                        out = sess.run(weight,
                                       feed_dict={
                                           x: txs,
                                           keep_prob: 1.0
                                       })
                        out = np.reshape(out, label.shape)
                        label = label + out

                    ref_y = click_batch[i][1]
                    if np.equal(np.argmax(label), np.argmax(ref_y)):
                        count += 1
                    out_labels[np.argmax(label)] += 1

                print('cnn test accuracy (weight voting): ',
                      round(count / len(click_batch), 3))
                print(out_labels)
Exemple #9
0
def test_cnn_bottlenose_data(data_path, n_class=8, batch_num=20):
    click_batch = []
    list_files = find_click.list_files(data_path)
    if list_files == []:
        list_files = list_files + [data_path]
    c = 3  # the label of bottlenose is 3
    for path in list_files:
        # if path != './TestData/Dc/Dc':
        #     continue
        wav_files = find_click.list_wav_files(path)
        print("load data : %s, the number of files : %d" % (path, len(wav_files)))

        # 为避免训练网络用的Click用于测试, 类似于训练时区分训练和测试样本
        #  利用全部样本后1/5的Click生成测试样本
        xs = np.empty((0, 320))
        count = 0
        for pathname in wav_files:
            count += 1
            wave_data, frame_rate = find_click.read_wav_file(pathname)
            energy = np.sqrt(np.sum(wave_data ** 2))
            wave_data /= energy
            wave_data = np.reshape(wave_data, [-1])
            xs = np.vstack((xs, wave_data))

        sample_num = xs.shape[0]
        total_batch = int(sample_num / batch_num)
        print('the number of data(%(datasrc)s): %(d)d' % {'datasrc': path, 'd': total_batch})
        for i in range(0, total_batch):
            tmp_xs = np.empty((0, 192))
            for j in range(batch_num * i, batch_num * (i + 1)):
                index = j % sample_num
                temp_x = xs[index]
                beg_idx = np.random.randint(64, (64 + 32))
                crop_x = temp_x[beg_idx:(beg_idx + 192)]
                crop_x = np.reshape(crop_x, [1, 192])
                tmp_xs = np.vstack((tmp_xs, crop_x))

            label = [0] * n_class
            label[c] = 1

            label = np.array([[label]])
            label = list(label)

            tmp_xs = np.expand_dims(np.expand_dims(tmp_xs, axis=0), axis=0)
            tmp_xs = list(tmp_xs)
            sample = tmp_xs + label
            click_batch.append(sample)

    x = tf.placeholder("float", [None, 192])
    # 输入
    x_image = tf.reshape(x, [-1, 1, 192, 1])

    # 第一个卷积层
    W_conv1 = weight_variable([1, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_1x2(h_conv1)

    # 第二个卷积层
    W_conv2 = weight_variable([1, 5, 32, 32])
    b_conv2 = bias_variable([32])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_1x2(h_conv2)

    # 密集链接层
    W_fc1 = weight_variable([1 * 48 * 32, 256])
    b_fc1 = bias_variable([256])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 1 * 48 * 32])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

    # 输出层
    W_fc2 = weight_variable([256, n_class])
    b_fc2 = bias_variable([n_class])
    y = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(init)
        saver.restore(sess, "params/cnn_net_lwy.ckpt")  # 加载训练好的网络参数

        print('the number of batch:', len(click_batch))
        count = 0
        for i in range(len(click_batch)):
            temp_xs = click_batch[i][0]
            label = np.zeros(n_class)
            for j in range(0, temp_xs.shape[1]):
                txs = temp_xs[0, j, :]
                txs = np.reshape(txs, [1, 192])
                out_y = sess.run(y, feed_dict={x: txs, keep_prob: 1.0})
                pre_y = np.argmax(out_y, 1)
                label[pre_y] += 1

            ref_y = click_batch[i][1]
            if np.equal(np.argmax(label), np.argmax(ref_y)):
                count += 1

        print('cnn test accuracy (majority voting): ', round(count / len(click_batch), 3))

        count = 0
        weight = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        for i in range(len(click_batch)):
            temp_xs = click_batch[i][0]
            label = np.zeros(n_class)
            for j in range(0, temp_xs.shape[1]):
                txs = temp_xs[0, j, :]
                txs = np.reshape(txs, [1, 192])
                out = sess.run(weight, feed_dict={x: txs, keep_prob: 1.0})
                out = np.reshape(out, label.shape)
                label = label + out

            ref_y = click_batch[i][1]
            if np.equal(np.argmax(label), np.argmax(ref_y)):
                count += 1

        print('cnn test accuracy (weight voting): ', round(count / len(click_batch), 3))

        count = 0
        for i in range(len(click_batch)):
            temp_xs = click_batch[i][0]
            label = np.zeros(n_class)
            for j in range(0, temp_xs.shape[1]):
                txs = temp_xs[0, j, :]
                txs = np.reshape(txs, [1, 192])
                out = sess.run(y, feed_dict={x: txs, keep_prob: 1.0})
                out = np.reshape(out, label.shape)
                label = label + out

            ref_y = click_batch[i][1]
            if np.equal(np.argmax(label), np.argmax(ref_y)):
                count += 1

        print('cnn test accuracy (sum of softmax voting): ', round(count / len(click_batch), 3))
Exemple #10
0
def load_data(batch_num=20):

    dict = {'0': '', '1': '', '2': ''}
    # dict["0"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Melon"
    # dict["1"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Spinner"
    # dict["2"] = "/home/fish/ROBB/CNN_click/click/TKEO_wk5_complete/Tt"
    dict["0"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Melon"
    dict["1"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Spinner"
    dict["2"] = "/home/fish/ROBB/CNN_click/click/CNNDet12_filtered/Tt"

    n_class = len(dict)
    test_ys = np.empty((0, n_class))
    test_xs = []

    # gmm_dict = {'0': None, '1': None, '2': None}
    train_dict = {'0': None, '1': None, '2': None}

    for key in dict:
        path = dict[key]

        print(path)

        c = int(key)

        # npy_files = find_click.list_npy_files(path)

        file_list = find_click.list_files(path)

        random_index = np.random.permutation(len(file_list))

        test_set = file_list[random_index[0]]

        train_set = [file_list[i] for i in random_index[1:]]

        label = np.zeros(n_class)
        label[c] = 1

        # training set
        xs = np.empty((0, 320))
        count = 0
        print('training set loading.......')
        for folder in train_set:
            # print('loading %s' % folder[-6:])
            npy_list = find_click.list_npy_files(folder)
            for npy in npy_list:
                # print('loading %s' % npy)
                npy_data = np.load(npy)
                if npy_data.shape[0] == 0:
                    continue
                xs = np.vstack((xs, npy_data))
                count += npy_data.shape[0]
        print('loaded clicks:', count)

        # test set
        txs = np.empty((0, 320))
        count = 0
        print('test set loading.......')
        print('loading %s' % test_set[-6:])
        npy_list = find_click.list_npy_files(test_set)
        for npy in npy_list:
            # print('loading %s' % npy)
            npy_data = np.load(npy)
            if npy_data.shape[0] == 0:
                continue
            txs = np.vstack((txs, npy_data))
            count += npy_data.shape[0]
        print('loaded clicks:', count)

        print('crop training clicks...')
        train_xs = random_crop(xs, batch_num, n_total=0)
        train_xs = np.array(train_xs)
        print('crop testing clicks...')
        temp_test_xs = random_crop(txs, batch_num, n_total=0)

        train_dict[key] = train_xs

        # gmm = GMM(n_components=16).fit(train_xs)
        #
        # gmm_dict[key] = gmm

        temp_test_ys = np.tile(label, (len(temp_test_xs), 1))
        test_xs += temp_test_xs
        test_ys = np.vstack((test_ys, temp_test_ys))
    test_xs = np.array(test_xs)
    return train_dict, test_xs, test_ys