コード例 #1
0
ファイル: process.py プロジェクト: benhe119/OpenUBA
    def process_data(self, data_folder: str, log_data_obj: dict):

        logging.warning("Processing Data for : " + str(data_folder))

        log_name = log_data_obj["log_name"]
        log_type = log_data_obj["type"]
        location_type = log_data_obj["location_type"]
        folder = log_data_obj["folder"]

        dataset_session = DatasetSession(log_type)
        '''
         STEP1: check for new datasets
         from folder directory
        '''

        #read dataset, if any new
        if log_type == "csv":
            # invoke datasetsession to read the csv
            dataset_session.read_csv(data_folder, folder,
                                     location_type)  # load
            print("isinstance(dataset_session.dataset, Dataset): " +
                  str(isinstance(dataset_session.dataset, Dataset)))
            dataset_size: Tuple = dataset_session.get_size()
            logging.info("Dataset Session size: " + str(dataset_size))

            # fetch actual dataframe
            print("======GET DATAFRAME ======")
            print(dataset_session.get_dataset().get_dataframe().data.shape)
コード例 #2
0
ファイル: user.py プロジェクト: wjsshide/OpenUBA
    def extract_users(dataset_session: DatasetSession,
                      log_metadata_obj: dict) -> List:
        ############## TESTS
        # get dataset
        log_file_dataset: Dataset = dataset_session.get_dataset()
        # get core dataframe
        log_file_core_dataframe: CoreDataFrame = log_file_dataset.get_dataframe(
        )
        # get data frame (.data)
        log_file_dataframe: pd.DataFrame = log_file_core_dataframe.data
        # test: get shape
        log_file_shape: Tuple = log_file_dataframe.shape
        logging.warning("execute(): dataframe shape: " + str(log_file_shape))
        ############

        logging.info(
            "ExtractAllUsersCSV: extract_users log_file_data.columns: - " +
            str(log_file_dataframe.columns))
        logging.info("ExtractAllUsersCSV: extract_users log_metadata_obj: - " +
                     str(log_metadata_obj))

        id_column: pd.Series = log_file_dataframe[
            log_metadata_obj["id_feature"]]
        logging.info(
            "ExtractAllUsersCSV, extract_users, id_column, len of column: " +
            str(len(id_column)))
        user_set: List = np.unique(
            log_file_dataframe[log_metadata_obj["id_feature"]].fillna("NA"))
        logging.info(
            "ExtractAllUsersCSV, extract_users, user_set len of column: " +
            str(len(user_set)))
        logging.error(user_set)
        return user_set
コード例 #3
0
ファイル: process.py プロジェクト: yslhzj/OpenUBA
    def process_data(self, data_folder: str, log_data_obj: dict) -> CoreDataFrame:

        logging.warning("Processing Data for : "+str(data_folder))

        log_name = log_data_obj["log_name"]
        log_type = log_data_obj["type"]
        location_type = log_data_obj["location_type"]
        folder = log_data_obj["folder"]
        id_feature = log_data_obj["id_feature"]

        dataset_session = DatasetSession(log_type)

        #read dataset, if any new
        if log_type == DataSourceFileType.CSV.value:
            # invoke datasetsession to read the csv
            dataset_session.read_csv(data_folder, folder, location_type) # load
            print( "isinstance(dataset_session.dataset, Dataset): "+str(isinstance(dataset_session.dataset, Dataset)) )
            dataset_size: Tuple = dataset_session.get_size()
            logging.info( "Dataset Session size: "+str(dataset_size) )


        # fetch actual dataframe
        print("======GET DATAFRAME ======")
        return dataset_session.get_dataset().get_dataframe()