Python FeatureExtractor.clear 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: feature_extractor

클래스/타입: FeatureExtractor

메소드/함수: clear

hotexamples.com에서의 예제들: 9

Python FeatureExtractor.clear - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 feature_extractor.FeatureExtractor.clear에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FeatureExtractor(30)

extract(20)

extract_features(14)

clear(9)

__init__(6)

Init(4)

extract_spatial_features(3)

extractFeatures(3)

extract_hog_features(2)

dependency(2)

extract_lemma_ngrams(2)

extract_log_mel_filterbank_energies(2)

compute_n_chars(2)

compute_feature_vector(2)

compute_document_length(2)

compute_descriptors(2)

extract_histogram_features(2)

close(2)

draw_keypoints(2)

chunk(2)

extract_word_ngrams(2)

featurizeFiles(2)

extractFeaturesDirectFromText(2)

extract_batch(2)

ne(2)

ngrams(2)

srl(2)

extract_full_feature_matrix(1)

extract_from_paths(1)

Client(1)

extract_single_image_features(1)

extract_hist_features(1)

extract_info(1)

extract_ngram_list(1)

get_dbp_sparql(1)

get_dtw_features(1)

get_fft_features(1)

get_freq_features(1)

get_minmax_features(1)

extract_from_img(1)

execute(1)

extract_features_fast(1)

categories(1)

Processor(1)

activate_tensor_board(1)

bcv(1)

bin_spatial(1)

build(1)

build_bag(1)

build_tfidf(1)

예제 #1

파일 보기

파일: FeatureToMatrixFilterDiscardFileWithDB.py 프로젝트: zhengchengyy/BBDataProcessing

def feature_to_matrix_file(action,
                           db,
                           volt_collection,
                           tag_collection,
                           port=27017,
                           host='localhost',
                           ndevices=3,
                           offset=0,
                           action_num=0,
                           interval=1,
                           rate=1):
    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    # print(interval,rate)
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    # ntags表示总标签数，即人数；tag_acc表示累加计数
    ntags = tag_collection.count_documents({'tag': action})
    # ntags = 1
    tag_acc = 0

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))
    fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " +
                 "stepsize:" + str(rate) + "s)")

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 丢弃数据
    discard = {
        "get_up": [],
        "go_to_bed": [],
        "turn_over": [2, 4, 5, 6, 9],
        "legs_stretch": [2, 5, 7, 8, 9, 11],
        "hands_stretch": [7, 8, 9],
        "legs_tremble": [3, 6, 9, 10, 11, 12],
        "hands_tremble": [2, 3, 7, 8, 9, 11],
        "body_tremble": [1, 2, 3, 6, 7, 8, 9],
        "head_move": [3, 9, 12],
        "legs_move": [1, 3, 4, 6, 9],
        "hands_move": [3, 6, 9],
        "hands_rising": [4, 5, 7, 8, 9],
        "kick": [2, 4, 5, 6, 7, 8, 9, 11, 12]
    }

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc > ntags):
            break
        # if(tag_acc in discard[action]):
        # if (tag_acc == 9 or tag_acc == 11):
        if (tag_acc == 9 or (tag_acc == 11 and action != "hands_move")):
            continue
        print("people_" + str(tag_acc))
        # inittime, termtime
        inittime, termtime = tag['termtime'] - 30, tag['termtime']
        # inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts, filter_volts = {}, {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []
            filter_volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            t = volt['time']
            times[device_no].append(t)
            volts[device_no].append(v)

        for i in range(1, ndevices + 1):
            filter_volts[i] = volts[i]
            # 小波变换滤波
            filter_volts[i] = cwt_filter(volts[i], 0.08)

            # 低通滤波器滤波
            # b, a = signal.butter(8, 4 / 7, 'lowpass')  # 配置滤波器，8表示滤波器的阶数
            # filter_volts[i] = signal.filtfilt(b, a, filter_volts[i])

            # 傅里叶变换滤波，使用后动作识别准确率反而降低
            # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 25)  #滤波后准确率下降

            # 移动平均滤波，参数可选：full, valid, same
            # filter_volts[i] = np_move_avg(filter_volts[i], 5, mode="same")

            # 除以体重，归一化数据
            # filter_volts[i] = list(map(lambda x: x / weights[tag_acc - 1], filter_volts[i]))
            filter_volts[i] = getNormalization(filter_volts[i])

        # 定义存储时间、特征列表
        feature_times, feature_values, feature_matrixs = {}, {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            feature_matrixs[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 提取第几个设备的特征
        start = 1
        end = ndevices

        # 对每个采集设备进行特征提取
        for i in range(start, end + 1):
            for j in range(len(filter_volts[i])):
                value = {"time": times[i][j], "volt": filter_volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])

        # 定义特征类型
        feature_type = list(
            feature_values[1].keys())  # keys()方法虽然返回的是列表，但是不可以索引

        for i in range(start, end + 1):

            # 如果文件存在，则以添加的方式打开
            if (os.path.exists("feature_matrixs/feature_matrix" + str(i) +
                               ".npy")):
                feature_matrix = np.load("feature_matrixs/feature_matrix" +
                                         str(i) + ".npy")
                label_matrix = np.load("feature_matrixs/label_matrix" +
                                       str(i) + ".npy")
                temp_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                       dtype=float)

                # 可以删除这部分，np.save直接覆盖原文件
                # os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy")
                # os.remove("feature_matrixs/label_matrix" + str(i) + ".npy")

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        temp_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix = np.append(label_matrix, [action_num])

                # np.append(feature_matrixs, [temp_matrix], axis=0)
                feature_matrix = np.insert(feature_matrix,
                                           feature_matrix.shape[0],
                                           values=temp_matrix,
                                           axis=0)

                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))
                print("label_matrix" + str(i) + ":" + str(label_matrix.shape))

                feature_matrixs[i] = feature_matrix

            # 如果文件不存在，则定义特征矩阵和标签矩阵
            else:
                feature_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                          dtype=float)
                label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int)

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        feature_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix[j] = action_num
                # np.save保存时自动为8位小数
                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))
                print("label_matrix" + str(i) + ":" + str(label_matrix.shape))

                feature_matrixs[i] = feature_matrix

예제 #2

파일 보기

def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=5,
                          offset=0):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    # ntags表示总标签数，即人数；tag_acc表示累加计数
    ntags = tag_collection.count_documents({'tag': action})
    ntags = 8
    tag_acc = 0

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))

    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    interval = 2
    rate = 1
    fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " +
                 "stepsize:" + str(rate) + "s)")

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc > ntags):
            break
        if (tag_acc == 9 or (tag_acc == 11 and action != "hands_move")):
            continue
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts = {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            time = volt['time']
            times[device_no].append(time)
            volts[device_no].append(v)

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 提取第几个设备的特征
        start = 1
        end = ndevices

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, end + 1):
            for j in range(len(volts[i])):
                value = {"time": times[i][j], "volt": volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])
        # 定义画布上下位置的计数，即特征累加
        fea_acc = 0

        style.use('default')
        colors = ['r', 'b', 'g', 'c', 'm']  # m c

        for feature_type in feature_values[1].keys():
            fea_acc += 1
            ax = fig.add_subplot(nfeatures, ntags,
                                 (fea_acc - 1) * ntags + tag_acc)
            plt.subplots_adjust(
                hspace=0.5)  # 函数中的wspace是子图之间的垂直间距，hspace是子图的上下间距
            ax.set_title(feature_type)

            for i in range(start, end + 1):
                ax.set_xlim(feature_times[i][0], feature_times[i][-1])
                ax.plot(feature_times[i],
                        feature_values[i][feature_type],
                        label='device_' + str(i),
                        color=colors[i - 1],
                        alpha=0.9)

                # # 获取最大最小值，并且打上标记
                # max_index = np.argmax(feature_values[i][feature_type])
                # min_index = np.argmin(feature_values[i][feature_type])
                # ax.plot(feature_times[i][max_index],feature_values[i][feature_type][max_index],'rs')
                # show_max = str(i)+":"+str(round(feature_values[i][feature_type][max_index],6))
                # # xy=(横坐标，纵坐标)  箭头尖端, xytext=(横坐标，纵坐标) 文字的坐标，指的是最左边的坐标
                # # https://blog.csdn.net/qq_30638831/article/details/79938967
                # plt.annotate(show_max, xy=(feature_times[i][max_index],
                #     feature_values[i][feature_type][max_index]),
                #     xytext=(feature_times[i][max_index], feature_values[i][feature_type][max_index]))
                # ax.plot(feature_times[i][min_index], feature_values[i][feature_type][min_index], 'gs')
                # show_min = str(i)+":"+str(round(feature_values[i][feature_type][min_index],6))
                # plt.annotate(show_min, xy=(feature_times[i][min_index],
                #     feature_values[i][feature_type][min_index]),
                #     xytext=(feature_times[i][min_index], feature_values[i][feature_type][min_index]))

            # 设置每个数据对应的图像名称
            if fea_acc == 1 and tag_acc == 1:
                ax.legend(loc='upper right')
                ax.set_xlabel('Time(s)')
            if fea_acc == nfeatures:
                # 设置人员
                person = [
                    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'
                ]
                ax.set_xlabel("Person" + person[tag_acc - 1] + ": " +
                              timeToFormat(inittime + offset) + " ~ " +
                              timeToFormat(termtime + offset))

            # 以第一个设备的时间数据为准，数据的每1/10添加一个x轴标签
            xticks = []
            xticklabels = []
            length = len(feature_times[i])
            step = length // 8 - 1
            for k in range(0, length, step):
                xticks.append(feature_times[i][k])
                # xticklabels.append(timeToSecond(feature_times[i][k] + offset))

                xticklabels.append(int(feature_times[i][k] -
                                       inittime))  # 图中的开始时间表示时间间隔interval
            # 设定标签的实际数字，数据类型必须和原数据一致
            ax.set_xticks(xticks)
            # 设定我们希望它显示的结果，xticks和xticklabels的元素一一对应
            ax.set_xticklabels(xticklabels, rotation=15)

            # 显示网格
            ax.grid(linestyle=':')
            # ax.grid(True, which='both')

    # 最大化显示图像窗口
    plt.get_current_fig_manager().window.state('zoomed')
    plt.show()

예제 #3

파일 보기

파일: FeatureToMatrixFileDownsampling.py 프로젝트: zhengchengyy/BBDataProcessing

def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=3,
                          offset=0,
                          action_num=0):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))

    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    interval = 1
    rate = 1
    fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " +
                 "stepsize:" + str(rate) + "s)")

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 定义画布左右位置的计数：标签累加，即人数累加
    tag_acc = 1

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts = {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []

        sampling_counter = 0
        sampling_factor = 3  #表示sampling_factor个数据只下采样一个数据
        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            if (sampling_counter % sampling_factor == 0):
                device_no = int(volt['device_no'])
                v = volt['voltage']
                time = volt['time']
                times[device_no].append(time)
                volts[device_no].append(v)
                sampling_counter = 1
            sampling_counter += 1

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 提取第几个设备的特征
        start = 1
        end = ndevices

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, end + 1):
            for j in range(len(volts[i])):
                value = {"time": times[i][j], "volt": volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])

        # 定义特征类型
        feature_type = list(
            feature_values[1].keys())  # keys()方法虽然返回的是列表，但是不可以索引

        for i in range(start, end + 1):

            # 如果文件存在，则以添加的方式打开
            if (os.path.exists("feature_matrixs/feature_matrix" + str(i) +
                               ".npy")):
                feature_matrix = np.load("feature_matrixs/feature_matrix" +
                                         str(i) + ".npy")
                label_matrix = np.load("feature_matrixs/label_matrix" +
                                       str(i) + ".npy")
                temp_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                       dtype=float)

                os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy")
                os.remove("feature_matrixs/label_matrix" + str(i) + ".npy")

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        temp_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix = np.append(label_matrix, [action_num])

                # np.append(feature_matrixs, [temp_matrix], axis=0)
                feature_matrix = np.insert(feature_matrix,
                                           feature_matrix.shape[0],
                                           values=temp_matrix,
                                           axis=0)

                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

            # 如果文件不存在，则定义特征矩阵和标签矩阵
            else:
                feature_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                          dtype=float)
                label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int)

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        feature_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix[j] = action_num
                # np.save保存时自动为8位小数
                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

        tag_acc += 1

예제 #4

파일 보기

                    "device_no": i,
                    "feature_time": times[i][j],
                    "feature_value": output,
                    "interval": interval,
                    "rate": rate
                }
                feature_times[i].append(features['feature_time'])
                for feature_type in feature_values[i].keys():
                    feature_values[i][feature_type].append(
                        features['feature_value'][feature_type])

                # 把特征数据存入数据库
                # collection.insert_one(features)

        # 清理所有模块，防止过期数据
        extractor.clear()

    title = config['volt_collection'][6:] + "" + config['action']
    fig = plt.figure(title, figsize=(6, 8))
    fig.suptitle("features")

    # 定义画布位置的计数
    n = 1

    for feature_type in feature_values[1].keys():
        style.use('default')
        colors = ['r', 'b', 'g', 'c', 'm']  # m c
        base = nfeatures * 100 + 10
        # plot, add_subplot(211)将画布分割成2行1列，图像画在从左到右从上到下的第1块
        ax = fig.add_subplot(base + n)
        plt.subplots_adjust(hspace=0.5)  # 函数中的wspace是子图之间的垂直间距，hspace是子图的上下间距

예제 #5

파일 보기

def feature_to_matrix_file(action,
                           db,
                           volt_collection,
                           tag_collection,
                           port=27017,
                           host='localhost',
                           ndevices=3,
                           offset=0,
                           action_num=0,
                           interval=2,
                           rate=1):
    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    # print(interval,rate)
    # 针对不同动作,设置不同时间窗口
    # if(action == "turn_over"):
    #     interval = 2
    #     rate = 1
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})
    # 提取第几个设备的特征
    start = 1
    end = ndevices

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 定义画布左右位置的计数：标签累加，即人数累加
    tag_acc = 0

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc > 8):
            break
        print("people_" + str(tag_acc))
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts, filter_volts, normalize_volts = {}, {}, {}, {}
        for i in range(start, ndevices + 1):
            times[i] = []
            volts[i] = []
            filter_volts[i] = []
            normalize_volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            t = volt['time']
            times[device_no].append(t)
            volts[device_no].append(v)

        filter_thread = [0.2, 0.06, 0.08]
        for i in range(start, end + 1):
            filter_volts[i] = volts[i]
            # 小波变换滤波
            filter_volts[i] = cwt_filter(volts[i], 0.08)

            # 傅里叶变换滤波
            # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 15)

            # 低通滤波器滤波
            # b, a = signal.butter(8, 3 / 7, 'lowpass')  # 配置滤波器，8表示滤波器的阶数
            # filter_volts[i] = signal.filtfilt(b, a, filter_volts[i])

            # 移动平均滤波，参数可选：full, valid, same
            # filter_volts[i] = np_move_avg(filter_volts[i], 5, mode="same")

            # 归一化数据
            normalize_volts[i] = getNormalization(filter_volts[i])

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(start, end + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 对每个采集设备进行特征提取
        for i in range(start, end + 1):
            for j in range(len(normalize_volts[i])):
                value = {"time": times[i][j], "volt": normalize_volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()
        extractor.clear()

        # 定义特征数量
        nfeatures = len(feature_values[1])

        # 定义特征类型
        feature_type = list(
            feature_values[1].keys())  # keys()方法虽然返回的是列表，但是不可以索引

        for i in range(start, end + 1):

            # 如果文件存在，则以添加的方式打开
            if (os.path.exists("feature_matrixs/feature_matrix" + str(i) +
                               ".npy")):
                feature_matrix = np.load("feature_matrixs/feature_matrix" +
                                         str(i) + ".npy")
                label_matrix = np.load("feature_matrixs/label_matrix" +
                                       str(i) + ".npy")
                temp_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                       dtype=float)

                os.remove("feature_matrixs/feature_matrix" + str(i) + ".npy")
                os.remove("feature_matrixs/label_matrix" + str(i) + ".npy")

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        temp_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix = np.append(label_matrix, [action_num])

                # np.append(feature_matrixs, [temp_matrix], axis=0)
                feature_matrix = np.insert(feature_matrix,
                                           feature_matrix.shape[0],
                                           values=temp_matrix,
                                           axis=0)

                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)
                np.set_printoptions(suppress=True)
                np.savetxt('feature_matrixs/feature_matrix' + str(device_no) +
                           '.txt',
                           feature_matrix,
                           fmt="%.18f,%.18f")

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

            # 如果文件不存在，则定义特征矩阵和标签矩阵
            else:
                feature_matrix = np.zeros((len(feature_times[i]), nfeatures),
                                          dtype=float)
                label_matrix = np.zeros((len(feature_times[i]), 1), dtype=int)

                for j in range(len(feature_times[i])):
                    for k in range(nfeatures):
                        feature_matrix[j][k] = feature_values[i][
                            feature_type[k]][j]
                    label_matrix[j] = action_num
                # np.save保存时自动为8位小数
                np.save('feature_matrixs/feature_matrix' + str(i),
                        feature_matrix)
                np.save('feature_matrixs/label_matrix' + str(i), label_matrix)

                np.set_printoptions(suppress=True)
                np.savetxt('feature_matrixs/feature_matrix' + str(device_no) +
                           '.txt',
                           feature_matrix,
                           fmt="%.18f,%.18f")

                print("feature_matrix" + str(i) + ":" +
                      str(feature_matrix.shape))

예제 #6

파일 보기

def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=5,
                          offset=0):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))

    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    interval = 1
    rate = 1
    fig.suptitle(action + " (" + "interval:" + str(interval) + "s, " +
                 "stepsize:" + str(rate) + "s)")

    # 定义特征提取器
    extractor = FeatureExtractor()

    # 定义特征提取模块
    rangemodule = RangeModule(interval, rate)
    standarddeviation = StandardDeviationModule(interval, rate)
    energe = EnergyModule(interval, rate)

    # 注册特征提取模块
    extractor.register(rangemodule)
    extractor.register(standarddeviation)
    extractor.register(energe)

    # 定义画布左右位置的计数：标签累加，即人数累加
    tag_acc = 1

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts = {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            time = volt['time']
            times[device_no].append(time)
            volts[device_no].append(v)

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            feature_values[i] = {
                'Range': [],
                'StandardDeviation': [],
                'Energy': []
            }

        # 提取第几个设备的特征
        start = 1

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, 5 + 1):
            for j in range(len(volts[i])):
                value = {"time": times[i][j], "volt": volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = 3
        # 定义画布上下位置的计数，即特征累加
        fea_acc = 1
        base = nfeatures * 100 + ntags * 10
        style.use('default')
        colors = ['r', 'b', 'g', 'c', 'm']  # m c

        for feature_type in feature_values[1].keys():
            # plot, add_subplot(311)将画布分割成3行1列，图像画在从左到右从上到下的第1块
            ax = fig.add_subplot(base + tag_acc + (fea_acc - 1) * ntags)
            plt.subplots_adjust(
                hspace=0.5)  # 函数中的wspace是子图之间的垂直间距，hspace是子图的上下间距
            ax.set_title(feature_type)

            for i in range(start, 3 + 1):
                ax.set_xlim(feature_times[i][0], feature_times[i][-1])
                ax.plot(feature_times[i],
                        feature_values[i][feature_type],
                        label='device_' + str(i),
                        color=colors[i - 1],
                        alpha=0.9)

                # # 获取最大最小值，并且打上标记
                # max_index = np.argmax(feature_values[i][feature_type])
                # min_index = np.argmin(feature_values[i][feature_type])
                # ax.plot(feature_times[i][max_index],feature_values[i][feature_type][max_index],'rs')
                # show_max = str(i)+":"+str(round(feature_values[i][feature_type][max_index],6))
                # # xy=(横坐标，纵坐标)  箭头尖端, xytext=(横坐标，纵坐标) 文字的坐标，指的是最左边的坐标
                # # https://blog.csdn.net/qq_30638831/article/details/79938967
                # plt.annotate(show_max, xy=(feature_times[i][max_index],
                #     feature_values[i][feature_type][max_index]),
                #     xytext=(feature_times[i][max_index], feature_values[i][feature_type][max_index]))
                # ax.plot(feature_times[i][min_index], feature_values[i][feature_type][min_index], 'gs')
                # show_min = str(i)+":"+str(round(feature_values[i][feature_type][min_index],6))
                # plt.annotate(show_min, xy=(feature_times[i][min_index],
                #     feature_values[i][feature_type][min_index]),
                #     xytext=(feature_times[i][min_index], feature_values[i][feature_type][min_index]))

            # 设置每个数据对应的图像名称
            if fea_acc == 1 and tag_acc == 1:
                ax.legend(loc='upper right')
                ax.set_xlabel('Time(s)')
            if fea_acc == nfeatures:
                # 设置人员
                person = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
                ax.set_xlabel("Person" + person[tag_acc - 1] + ": " +
                              timeToFormat(inittime + offset) + " ~ " +
                              timeToFormat(termtime + offset))

            fea_acc += 1

            # 以第一个设备的时间数据为准，数据的每1/10添加一个x轴标签
            xticks = []
            xticklabels = []
            length = len(feature_times[i])
            interval = length // 8 - 1
            for k in range(0, length, interval):
                xticks.append(feature_times[i][k])
                # xticklabels.append(timeToSecond(feature_times[i][k] + offset))

                xticklabels.append(int(feature_times[i][k] -
                                       inittime))  # 图中的开始时间表示时间间隔interval
            # 设定标签的实际数字，数据类型必须和原数据一致
            ax.set_xticks(xticks)
            # 设定我们希望它显示的结果，xticks和xticklabels的元素一一对应
            ax.set_xticklabels(xticklabels, rotation=15)

        tag_acc += 1

    figure = plt.gcf()  # get current figure
    figure.set_size_inches(20, 10)
    plt.savefig("feature_images/" + title + ".png", dpi=200)

예제 #7

파일 보기

파일: RealTimeSimulativeMonitor.py 프로젝트: zhengchengyy/BBDataProcessing

def feature_to_matrix_file(action,
                           db,
                           volt_collection,
                           tag_collection,
                           port=27017,
                           host='localhost',
                           ndevices=3,
                           offset=0,
                           action_num=0,
                           interval=2,
                           rate=1):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # 定义画布左右位置的计数：标签累加，即人数累加
    tag_acc = 0

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc > 8):
            break
        print("people_" + str(tag_acc))
        inittime, termtime = tag['inittime'], tag['termtime']

        # 提取第几个设备的特征
        start = 1
        end = ndevices

        # get the arrays according to which we will plot later
        times, volts, filter_volts, normalize_volts = {}, {}, {}, {}
        for i in range(start, end + 1):
            times[i] = []
            volts[i] = []
            filter_volts[i] = []
            normalize_volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            t = volt['time']
            times[device_no].append(t)
            volts[device_no].append(v)

        for i in range(1, ndevices + 1):
            filter_volts[i] = volts[i]
            # 小波变换滤波
            filter_volts[i] = cwt_filter(volts[i], 0.08)

            # 傅里叶变换滤波
            # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 15)

            # 归一化数据
            normalize_volts[i] = getNormalization(filter_volts[i])

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(start, end + 1):
            feature_times[i] = []
            from collections import defaultdict
            feature_values[i] = defaultdict(list)
            for feature in feature_names:
                feature_values[i][feature[:-6]] = []

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, end + 1):
            import pickle
            with open('models/' + 'device_' + str(i) + '_post_prune.pickle',
                      'rb') as f:
                model = pickle.load(f)

            result = []
            for j in range(len(normalize_volts[i])):
                value = {"time": times[i][j], "volt": normalize_volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_temp = []  #存储实时计算的一条特征数据
                    for feature_type in feature_values[i].keys():
                        # print(feature_type, features['feature_value'][feature_type])
                        feature_temp.append(
                            features['feature_value'][feature_type])

                    predict_result = model.predict([feature_temp])

                    predict_proba = max(model.predict_proba([feature_temp])[0])
                    print(timeToSecond(times[i][j]),
                          action_names_copy[predict_result[0]], predict_proba)
                    # print(action_names[predict_result], predict_proba)

            # 清理所有模块，防止过期数据
            extractor.clear()

예제 #8

파일 보기

def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=3,
                          offset=0,
                          action_num=0,
                          feature_name="Mean"):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError('Collection not found！')
    except CollectionError as e:
        print(e.message)

    # ntags表示总标签数，即人数；tag_acc表示累加计数
    ntags = tag_collection.count_documents({'tag': action})
    ntags = 3
    tag_acc = 0

    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    interval = 2
    rate = 1

    # 定义特征提取器
    extractor = FeatureExtractor()

    for feature in feature_names:
        # 定义特征提取模块
        module = eval(feature + "(" + str(interval) + "," + str(rate) + ")")
        # 注册特征提取模块
        extractor.register(module)

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        tag_acc += 1
        if (tag_acc < ntags):
            continue
        if (tag_acc > ntags):
            break
        # inittime
        # inittime, termtime = tag['inittime'], tag['termtime']
        inittime, termtime = tag['termtime'] - 31, tag['termtime']

        # get the arrays according to which we will plot later
        times, volts, filter_volts = {}, {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []
            filter_volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            time = volt['time']
            times[device_no].append(time)
            volts[device_no].append(v)

        # 滤波
        for i in range(start, end + 1):
            # 小波变换滤波
            filter_volts[i] = cwt_filter(volts[i], 0.08)

            # 傅里叶变换滤波
            # filter_volts[i] = fft_filter(filter_volts[i], 1 / 70, 25)

            # 归一化数据
            filter_volts[i] = getNormalization(filter_volts[i])

        # 定义存储时间、特征列表
        # feature_times, feature_values = {}, {}
        # for i in range(start, end + 1):
        #     feature_times[i] = []
        #     from collections import defaultdict
        #     feature_values[i] = defaultdict(list)
        #     for feature in feature_names:
        #         feature_values[i][feature[:-6]] = []

        # 对每个采集设备进行特征提取 ndevices
        for i in range(start, end + 1):
            for j in range(len(filter_volts[i])):
                value = {"time": times[i][j], "volt": filter_volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    fea_diff_action[action_num].append(
                        features['feature_value'][feature_name])

            # 清理所有模块，防止过期数据
            extractor.clear()

예제 #9

파일 보기

def draw_features_from_db(action,
                          db,
                          volt_collection,
                          tag_collection,
                          port=27017,
                          host='localhost',
                          ndevices=3,
                          offset=0):
    client = MongoClient(port=port, host=host)
    database = client[db]
    tag_collection = database[tag_collection]
    volt_collection = database[volt_collection]

    try:
        if volt_collection.count_documents(
            {}) + tag_collection.count_documents({}) < 2:
            raise CollectionError(
                'Collection not found, please check names of the collection and database'
            )
    except CollectionError as e:
        print(e.message)

    ntags = tag_collection.count_documents({'tag': action})

    title = config['volt_collection'][6:] + "" + action + "_features"
    fig = plt.figure(title, figsize=(6, 8))
    fig.suptitle(action)

    # 根据时间采集数据，基本单位为s，比如1s、10s、30s、60s
    # interval表示每次分析的时间跨度，rate表示间隔多长时间进行一次分析
    interval = 1
    rate = 1

    # 定义特征提取器
    extractor = FeatureExtractor()

    # 定义特征提取模块
    rangemodule = RangeModule(interval, rate)
    vibrationfreq = VibrationFreqModule(interval, rate)
    thresholdcounter = ThresholdCounterModule(interval, rate)

    # 注册特征提取模块
    extractor.register(rangemodule)
    extractor.register(vibrationfreq)
    extractor.register(thresholdcounter)

    # 定义画布左右位置的计数：标签累加，即人数累加
    tag_acc = 1

    # read the data that is of a certain action one by one
    for tag in tag_collection.find({'tag': action}):
        inittime, termtime = tag['inittime'], tag['termtime']

        # get the arrays according to which we will plot later
        times, volts = {}, {}
        for i in range(1, ndevices + 1):
            times[i] = []
            volts[i] = []

        for volt in volt_collection.find(
            {'time': {
                '$gt': inittime,
                '$lt': termtime
            }}):
            device_no = int(volt['device_no'])
            v = volt['voltage']
            time = volt['time']
            times[device_no].append(time)
            volts[device_no].append(v)

        # 定义存储时间、特征列表
        feature_times, feature_values = {}, {}
        for i in range(1, ndevices + 1):
            feature_times[i] = []
            feature_values[i] = {
                'Range': [],
                'VibrationFreq': [],
                'ThresholdCounter': []
            }

        # 对每个采集设备进行特征提取
        for i in range(1, ndevices + 1):
            for j in range(len(volts[i])):
                value = {"time": times[i][j], "volt": volts[i][j]}
                output = extractor.process(value)
                if (output):
                    features = {
                        "device_no": i,
                        "feature_time": times[i][j],
                        "feature_value": output,
                        "interval": interval,
                        "rate": rate
                    }
                    feature_times[i].append(features['feature_time'])
                    for feature_type in feature_values[i].keys():
                        feature_values[i][feature_type].append(
                            features['feature_value'][feature_type])

            # 清理所有模块，防止过期数据
            extractor.clear()

        # 定义特征数量
        nfeatures = 3
        # 定义画布上下位置的计数，即特征累加
        fea_acc = 1
        base = nfeatures * 100 + ntags * 10
        style.use('default')
        colors = ['r', 'b', 'g', 'c', 'm']  # m c
        # subtitle = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        # fig.suptitle("Person" + subtitle[tag_acc - 1] + ": " + timeToFormat(inittime + offset)
        #              + " ~ " + timeToFormat(termtime + offset))

        for feature_type in feature_values[1].keys():
            # plot, add_subplot(311)将画布分割成3行1列，图像画在从左到右从上到下的第1块
            ax = fig.add_subplot(base + tag_acc + (fea_acc - 1) * ntags)
            plt.subplots_adjust(
                hspace=0.5)  # 函数中的wspace是子图之间的垂直间距，hspace是子图的上下间距
            ax.set_title(feature_type)

            for i in range(1, ndevices + 1):
                ax.set_xlim(feature_times[i][0], feature_times[i][-1])
                ax.plot(feature_times[i],
                        feature_values[i][feature_type],
                        label='device_' + str(i),
                        color=colors[i - 1],
                        alpha=0.9)

            # 设置每个数据对应的图像名称
            if fea_acc == 1 and tag_acc == 2:
                ax.legend(loc='best')
            if fea_acc == nfeatures:
                ax.set_xlabel('Time')
            fea_acc += 1

            # 以第一个设备的时间数据为准，数据的每1/10添加一个x轴标签
            xticks = []
            xticklabels = []
            length = len(feature_times[1])
            interval = length // 10 - 1
            for i in range(0, length, interval):
                xticks.append(feature_times[1][i])
                xticklabels.append(timeToSecond(feature_times[1][i] + offset))
            ax.set_xticks(xticks)  # 设定标签的实际数字，数据类型必须和原数据一致
            ax.set_xticklabels(
                xticklabels,
                rotation=15)  # 设定我们希望它显示的结果，xticks和xticklabels的元素一一对应

        tag_acc += 1

    plt.show()