def unpack_raw_data(root_path,
                    points=None,
                    name=None,
                    collection_type='line_base'):
    # "mac", "ADV", "RSSI", "NAME", "CAHNNEL", "TYPE", "TX_POWER", "A_GAIN", "A_TYPE", "BOARD"
    space = []
    # 전체 폴더 00, 05, 10, ....
    dir_list = file_io.get_directory_list(input_dir_path)
    dir_list = file_io.extract_only_directory(dir_list)
    print(dir_list)
    # 전체 폴더 중 하나 접근 00
    for dir_idx, directory in enumerate(dir_list):
        line_base_pack = []
        # 하나의 라인에 대한 폴더 접근
        dir_path = "{}/{}/".format(root_path, directory)
        # 파일 리스트 추출
        file_list = file_io.get_all_file_path(dir_path, file_extension='txt')
        print(file_list)
        # 하나의 파일 열어서 가공
        for file_idx, file in enumerate(file_list):
            channel_info = file_io.get_pure_filename(file).split('_')[1]
            lines = file_io.read_txt_file(file)
            for line in lines:
                # 0: mac
                # 1: ADV
                # 2: RSSI
                # 3: Name
                split_data = split_raw_data_line(line)
                if split_data != '':
                    for device_pack_idx, device_pack in enumerate(name):
                        for device_idx, device_name in enumerate(device_pack):
                            if device_name in split_data[0]:
                                if len(points[device_pack_idx][dir_idx]) == 0:
                                    continue
                                split_data.append(channel_info)
                                # print("device pack idx : ", points[device_pack_idx])
                                # print("point : ", points[device_pack_idx][dir_idx])
                                # print("data : ", split_data)
                                device_point_x = points[device_pack_idx][
                                    dir_idx][0]
                                device_point_y = points[device_pack_idx][
                                    dir_idx][1]
                                split_data.append(device_point_x)
                                split_data.append(device_point_y)
                                space.append(split_data)
                                line_base_pack.append(split_data)

                    print(split_data)

        pack_pd = pd.DataFrame(
            line_base_pack,
            columns=["mac", "ADV", "RSSI", "NAME", "CHANNEL", "X", "Y"])
        save_path = "{}/{}.csv".format(root_path, directory)
        pack_pd.to_csv(save_path, mode='w', index=None)
    pack_all_pd = pd.DataFrame(
        space, columns=["mac", "ADV", "RSSI", "NAME", "CHANNEL", "X", "Y"])
    save_path = "{}.csv".format(root_path)
    pack_all_pd.to_csv(save_path, mode='w', index=None)
def load_path_loss_with_detail_dataset(input_dir,
                                       model_type='CRNN',
                                       num_workers=8,
                                       batch_size=128,
                                       shuffle=True,
                                       input_size=15,
                                       various_input=False):
    # 파일들이 저장되었는 경로를 받아 파일 리스트를 얻어냄
    file_list = file_io.get_all_file_path(input_dir, file_extension='csv')
    # csv에 있는 모든 데이터를 다 꺼내서 넘파이로 만듬
    addition_dataset = []
    setup_dataset = None
    for idx, file in enumerate(file_list):
        addition_dataset.append(pd.read_csv(file).to_numpy())

    div_meter_pack = []
    rnn_dataset = []
    for n_idx, pack in enumerate(addition_dataset):
        label = pack[:, 0].tolist()
        label = list(set(label))
        temp_pack = pd.DataFrame(pack)
        for key in label:
            div_meter_pack.append(temp_pack[temp_pack[0] == key].to_numpy())

    for n_idx, pack in enumerate(div_meter_pack):
        if len(pack) < 30:
            temp = pack.tolist()
            temp = temp * (int(30 / len(pack)) + 2)
            pack = np.array(temp)
        for i in range(len(pack) - input_size):
            rnn_dataset.append(pack[i:i + input_size])
        # if various_input is True:
        #     for i in range(len(pack)-input_size):
        #         rnn_dataset.append(pack[i:i+np.random.randint(input_size-7)+7])
    rnn_dataset = np.array(rnn_dataset)
    setup_dataset = rnn_dataset

    train_data, valid_data, test_data = data_split(setup_dataset,
                                                   shuffle=shuffle)
    pathloss_train_dataset = PathLossWithDetailDataset(input_data=train_data,
                                                       model_type=model_type)
    pathloss_test_dataset = PathLossWithDetailDataset(input_data=test_data,
                                                      model_type=model_type)
    pathloss_valid_dataset = PathLossWithDetailDataset(input_data=valid_data,
                                                       model_type=model_type)
    pathloss_train_dataloader = DataLoader(pathloss_train_dataset,
                                           batch_size=batch_size,
                                           shuffle=shuffle,
                                           num_workers=num_workers)
    pathloss_test_dataloader = DataLoader(pathloss_test_dataset,
                                          batch_size=batch_size,
                                          shuffle=shuffle,
                                          num_workers=num_workers)
    pathloss_valid_dataloader = DataLoader(pathloss_valid_dataset,
                                           batch_size=batch_size,
                                           shuffle=shuffle,
                                           num_workers=num_workers)
    return pathloss_train_dataloader, pathloss_valid_dataloader, pathloss_test_dataloader
def get_scaler_checkpoint(input_dir, scaler_type='robust'):
    file_list = file_io.get_all_file_path(input_dir, file_extension='csv')

    dataset = pd.DataFrame()

    for idx, file in enumerate(file_list):
        temp = pd.read_csv(file, header=None)
        dataset = pd.concat([dataset, temp], ignore_index=True)

    x_data = dataset.drop([0], axis='columns')

    if scaler_type == 'robust':
        scaler = RobustScaler().fit(x_data)
        filename = '{}_scaler.pkl'.format(scaler_type)
        joblib.dump(scaler, filename)
def get_addition_dataset(config):
    file_list = file_io.get_all_file_path(input_dir=config['input_dir'],
                                          file_extension='csv')
    file_list.sort()
    print(file_list)
    target_dataset = []
    addition_dataset = []

    # original file have -> ['meter', 'mac', 'type', 'rssi']
    # version up file have -> ['meter', 'mac', 'type', 'rssi', 'channel']
    for file in file_list:
        temp = pd.read_csv(file)
        if config['device_id'] != '':
            temp = temp[temp['mac'] == config['device_id']]
        temp = temp.drop(['mac', 'type'], axis=1)  # 불필요한 데이터 제거
        target_dataset.append(temp)
    # dropped -> ['meter', 'rssi', 'channel']

    for item in target_dataset:
        temp = []
        for idx, line in item.iterrows():
            data = line.tolist()
            data.append(config.get('tx_power'))
            data.append(config.get('rx_height'))
            data.append(config.get('tx_antenna_gain'))
            data.append(config.get('rx_antenna_gain'))
            data.append(config.get('covered'))
            if config['use_fspl']:
                data.append(path_loss.get_distance_with_rssi_fspl(data[1]))
            if config['inference']:
                del data[0]
            temp.append(data)
        addition_dataset.append(temp)
    # finish ['meter', 'rssi', 'channel', 'tx_power', 'rx_height', 'tx_antenna_gain',
    #         'rx_antenna_gain', 'covered', 'fspl']

    for idx, item in enumerate(addition_dataset):
        temp = pd.DataFrame(item)
        file_io.create_directory(config['save_dir'])
        temp.to_csv('{}/dataset_{}_mac_{}.csv'.format(config['save_dir'], idx,
                                                      config['device_id']),
                    header=None,
                    index=None)
예제 #5
0
import numpy as np
from tool import file_io
import pandas as pd


input_dir = '../dataset/v9/loader_test'
input_size = 20

# 파일들이 저장되었는 경로를 받아 파일 리스트를 얻어냄
file_list = file_io.get_all_file_path(input_dir, file_extension='csv')
# csv에 있는 모든 데이터를 다 꺼내서 넘파이로 만듬
addition_dataset = []
setup_dataset = None
for idx, file in enumerate(file_list):
    addition_dataset.append(pd.read_csv(file).to_numpy())

div_meter_pack = []
rnn_dataset = []
for n_idx, pack in enumerate(addition_dataset):
    label = pack[:, 0].tolist()
    label = list(set(label))
    temp_pack = pd.DataFrame(pack)
    for key in label:
        div_meter_pack.append(temp_pack[temp_pack[0] == key].to_numpy())

for n_idx, pack in enumerate(div_meter_pack):
    print(len(pack))
    if len(pack) < 30:
        temp = pack.tolist()
        temp = temp * (int(30/len(pack))+6)
        pack = np.array(temp)