Ejemplo n.º 1
0
def rmsf(raw_path,
         save_path,
         label_names,
         length,
         space,
         split_rates,
         label_path,
         fre_time,
         decode=False,
         generalization=False):
    # decode为True表示存包洛解调后的频域数据
    with open(
            os.path.join(
                label_path,
                'label_{}_{}_{}.meta'.format(fre_time,
                                             os.path.split(raw_path)[-1],
                                             os.path.split(save_path)[-1])),
            'w') as f:
        for label, label_name in enumerate(label_names):
            if generalization:
                label_paths_list = [
                    x for x in os.listdir(raw_path)
                    if label_name in x and args.train_test in x
                ]
            else:
                label_paths_list = [
                    x for x in os.listdir(raw_path) if label_name in x
                ]
            # print(label_paths_list)
            num = 0
            for label_paths in label_paths_list:
                ori_file_paths = [
                    x for x in os.listdir(os.path.join(raw_path, label_paths))
                    if x[-5:] == '.tdms'
                ]
                files_num = len(ori_file_paths)

                for split_rate in split_rates:
                    if generalization:
                        file_paths = ori_file_paths
                    else:
                        file_paths = ori_file_paths[
                            round(split_rate[0] *
                                  files_num):round(split_rate[1] * files_num)]

                    for file_path in file_paths:
                        # 读取原始数据(单个)
                        file_path = os.path.join(
                            os.path.join(raw_path, label_paths), file_path)
                        tdms_file = TdmsFile(file_path)
                        try:
                            group_name, channel_name = 'group', 'acceleration'
                            datas = tdms_file[group_name][channel_name].data
                        except:
                            group_name, channel_name = '组名称', '加速度'
                            datas = tdms_file[group_name][channel_name].data

                        # 单个tdms文件数据
                        for i in range(0, len(datas) - length + 1, space):
                            one_sample = datas[i:(i + length)]
                            if args.add_awgn and args.train_test == 'test':
                                one_sample = awgn(
                                    one_sample,
                                    args.awgn_db_list[args.experiment_time -
                                                      1])
                            if decode:
                                one_sample = np.reshape(one_sample, (-1))
                                one_sample = decode_data(one_sample)[1:(
                                    args.fre_data_length + 1)]

                            # 数据保存
                            path = r'{}/{}_{}.pkl'.format(
                                save_path, label_name, num)
                            with open(path, 'wb') as data_f:
                                pickle.dump(one_sample, data_f)
                                f.write('{0}{1}{2}{3}'.format(
                                    path, '  ', label, '\n'))
                            num += 1
                    if generalization:
                        break
    handle_label(label_names, label_path, 'label_{}_{}_{}.meta'.format(
        fre_time,
        os.path.split(raw_path)[-1],
        os.path.split(save_path)[-1]))  # 使每类的样本数量一致,并分别保存
Ejemplo n.º 2
0
def gb(raw_path,
       save_path,
       label_names,
       length,
       space,
       split_rates,
       label_path,
       fre_time,
       channel='C1',
       decode=False,
       generalization=False):
    # decode为True表示存包洛解调后的频域数据
    if generalization:
        file_paths = [
            x for x in os.listdir(raw_path) for y in label_names
            if y in x and args.train_test in x
        ]
    else:
        file_paths = [
            x for x in os.listdir(raw_path) for y in label_names if y in x
        ]
    # print(file_paths)
    with open(
            os.path.join(
                label_path,
                'label_{}_{}_{}.meta'.format(fre_time,
                                             os.path.split(raw_path)[-1],
                                             os.path.split(save_path)[-1])),
            'w') as f:
        for file_path in file_paths:
            # 读取原始数据
            try:
                datas = pd.read_csv(os.path.join(raw_path, file_path),
                                    dtype=np.float32,
                                    skiprows=range(0, 16),
                                    sep='\t',
                                    names=[
                                        'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
                                        'C7', 'C8', 'C9'
                                    ])
            except:
                datas = pd.read_csv(os.path.join(raw_path, file_path),
                                    dtype=np.float32,
                                    skiprows=range(0, 16),
                                    names=[
                                        'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
                                        'C7', 'C8', 'C9'
                                    ])

            num = 0
            all_data = np.array(datas[channel])
            for split_rate in split_rates:
                useful_length = round(
                    len(all_data) * (split_rate[1] - split_rate[0]))
                start = max(0,
                            round(len(all_data) * split_rate[0]) - length // 2)
                stop = start + useful_length + length
                if stop > len(all_data) - 1:
                    start = len(all_data) - useful_length - length
                    stop = len(all_data)
                if generalization:
                    data = all_data
                else:
                    data = all_data[start:stop]

                # 定义标签
                file_name = os.path.split(file_path)[-1][:-4]  # 获取文件名
                label = label_names.index(
                    [x for x in label_names if x in file_name][0])

                # 划分数据
                for i in range(0, len(data) - length, int(space)):  # 等间隔
                    one_sample = data[int(i):int(i) + length]
                    if args.add_awgn and args.train_test == 'test':
                        one_sample = awgn(
                            one_sample,
                            args.awgn_db_list[args.experiment_time - 1])
                    if decode:
                        one_sample = np.reshape(one_sample, (-1))
                        one_sample = decode_data(one_sample)[1:(
                            args.fre_data_length + 1)]

                    # 数据保存
                    path = r'{}/{}_{}.pkl'.format(save_path, file_name, num)
                    with open(path, 'wb') as data_f:
                        pickle.dump(one_sample, data_f)
                        f.write('{0}{1}{2}{3}'.format(path, '  ', label, '\n'))
                    num += 1
                if generalization:
                    break

    handle_label(label_names, label_path, 'label_{}_{}_{}.meta'.format(
        fre_time,
        os.path.split(raw_path)[-1],
        os.path.split(save_path)[-1]))  # 使每类的样本数量一致,并分别保存
Ejemplo n.º 3
0
def canda_wo(raw_path,
             save_path,
             label_names,
             length,
             space,
             split_rates,
             label_path,
             fre_time,
             decode=False,
             generalization=False):
    # decode为True表示存包洛解调后的频域数据
    if generalization:
        file_paths = [x for x in os.listdir(raw_path) if args.train_test in x]
    else:
        file_paths = [x for x in os.listdir(raw_path)]
    # print(file_paths)
    with open(
            os.path.join(
                label_path,
                'label_{}_{}_{}.meta'.format(fre_time,
                                             os.path.split(raw_path)[-1],
                                             os.path.split(save_path)[-1])),
            'w') as f:
        for file_path in file_paths:
            # 读取原始数据
            data_dict = loadmat(os.path.join(raw_path, file_path))
            all_data = data_dict['Channel_1']
            num = 0
            for split_rate in split_rates:
                useful_length = round(
                    len(all_data) * (split_rate[1] - split_rate[0]))
                start = max(0,
                            round(len(all_data) * split_rate[0]) - length // 2)
                stop = start + useful_length + length
                if stop > len(all_data) - 1:
                    start = len(all_data) - useful_length - length
                    stop = len(all_data)
                if generalization:
                    data = all_data
                else:
                    data = all_data[start:stop]
                data = data[range(1, len(data), 8)]

                # 定义标签
                file_name = os.path.split(file_path)[-1][:-4]  # 获取文件名
                label = label_names.index(
                    [x for x in label_names if x in file_name][0])

                # 划分数据
                for i in range(0, len(data) - length, int(space)):  # 等间隔
                    one_sample = data[int(i):int(i) + length]
                    if args.add_awgn and args.train_test == 'test':
                        one_sample = awgn(
                            one_sample,
                            args.awgn_db_list[args.experiment_time - 1])
                    if decode:
                        one_sample = np.reshape(one_sample, (-1))
                        one_sample = decode_data(one_sample)[1:(
                            args.fre_data_length + 1)]

                    # 数据保存
                    path = r'{}/{}_{}.pkl'.format(save_path, file_name, num)
                    with open(path, 'wb') as data_f:
                        pickle.dump(one_sample, data_f)
                        f.write('{0}{1}{2}{3}'.format(path, '  ', label, '\n'))
                    num += 1
                if generalization:
                    break

    handle_label(label_names, label_path, 'label_{}_{}_{}.meta'.format(
        fre_time,
        os.path.split(raw_path)[-1],
        os.path.split(save_path)[-1]))  # 使每类的样本数量一致,并分别保存
Ejemplo n.º 4
0
def abvt(raw_path,
         save_path,
         label_names,
         length,
         space,
         split_rates,
         label_path,
         fre_time,
         decode=False,
         channel='C1',
         generalization=False):
    # decode为True表示存包洛解调后的频域数据
    with open(
            os.path.join(
                label_path,
                'label_{}_{}_{}.meta'.format(fre_time,
                                             os.path.split(raw_path)[-1],
                                             os.path.split(save_path)[-1])),
            'w') as f:
        for label, label_name in enumerate(label_names):
            if generalization:
                label_paths_list = [
                    x for x in os.listdir(raw_path)
                    if label_name in x and args.train_test in x
                ]
            else:
                label_paths_list = [
                    x for x in os.listdir(raw_path) if label_name in x
                ]
            # print(label_paths_list)
            num = 0
            for label_paths in label_paths_list:
                ori_file_paths = [
                    x for x in os.listdir(os.path.join(raw_path, label_paths))
                    if x[-4:] == '.csv'
                ]
                for file_path in ori_file_paths:
                    # 读取原始数据(单个)
                    file_path = os.path.join(
                        os.path.join(raw_path, label_paths), file_path)
                    raw_data = pd.read_csv(
                        file_path,
                        index_col=False,
                        dtype=np.float32,
                        names=['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8'])
                    all_data = np.array(raw_data[channel])
                    all_data = all_data[range(1, len(all_data),
                                              2)]  # 采样频率降低为25k Hz
                    for split_rate in split_rates:
                        useful_length = round(
                            len(all_data) * (split_rate[1] - split_rate[0]))
                        start = max(
                            0,
                            round(len(all_data) * split_rate[0]) - length // 2)
                        stop = start + useful_length + length
                        if stop > len(all_data) - 1:
                            start = len(all_data) - useful_length - length
                            stop = len(all_data)
                        if generalization:
                            datas = all_data
                        else:
                            datas = all_data[start:stop]
                        datas = datas[range(1, len(datas), 2)]

                        # 单个文件数据
                        for i in range(0, len(datas) - length + 1, space):
                            one_sample = datas[i:(i + length)]
                            if args.add_awgn and args.train_test == 'test':
                                one_sample = awgn(
                                    one_sample,
                                    args.awgn_db_list[args.experiment_time -
                                                      1])
                            if decode:
                                one_sample = np.reshape(one_sample, (-1))
                                one_sample = decode_data(one_sample)[1:(
                                    args.fre_data_length + 1)]

                            # 数据保存
                            path = r'{}/{}_{}.pkl'.format(
                                save_path, label_name, num)
                            with open(path, 'wb') as data_f:
                                pickle.dump(one_sample, data_f)
                                f.write('{0}{1}{2}{3}'.format(
                                    path, '  ', label, '\n'))
                            num += 1
                        if generalization:
                            break

    handle_label(label_names, label_path, 'label_{}_{}_{}.meta'.format(
        fre_time,
        os.path.split(raw_path)[-1],
        os.path.split(save_path)[-1]))  # 使每类的样本数量一致,并分别保存