def load_dataset(dataset='NSL-KDD'): """Loads a dataset. Args: dataset (str): Dataset's identifier. Returns: X and Y (samples and labels). """ # If the dataset is `nslkdd` if dataset == 'nsl-kdd': # Loading a .txt file to a numpy array txt = l.load_txt('data/nsl-kdd.txt') # If the dataset is `unespy` elif dataset == 'unespy': # Loading a .txt file to a numpy array txt = l.load_txt('data/unespy.txt') # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) return X, Y
def test_load_txt(): txt = loader.load_txt('boat.txt') assert txt is None txt = loader.load_txt('data/boat.txt') assert txt.shape == (100, 4)
def _load(self, file_path): """Loads and parses a dataframe from a file. Args: file_path (str): File to be loaded. Returns: Arrays holding the features and labels. """ # Getting file extension extension = file_path.split('.')[-1] if extension == 'csv': data = loader.load_csv(file_path) elif extension == 'txt': data = loader.load_txt(file_path) elif extension == 'json': data = loader.load_json(file_path) else: raise e.ArgumentError( 'File extension not recognized. It should be `.csv`, `.json` or `.txt`' ) X, Y = p.parse_loader(data) return X, Y
def _read_distances(self, file_name): """Reads the distance between nodes from a pre-defined file. Args: file_name (str): File to be loaded. """ logger.debug('Running private method: read_distances().') # Getting file extension extension = file_name.split('.')[-1] if extension == 'csv': distances = loader.load_csv(file_name) elif extension == 'txt': distances = loader.load_txt(file_name) else: # Raises an ArgumentError exception raise e.ArgumentError('File extension not recognized. It should be either `.csv` or .txt`') # Check if distances have been properly loaded if distances is None: raise e.ValueError('Pre-computed distances could not been properly loaded') # Apply the distances matrix to the property self.pre_distances = distances
def test_opf_pre_compute_distances(): txt = loader.load_txt('data/boat.txt') X, Y = parser.parse_loader(txt) X_train, _, _, _ = splitter.split(X, Y, 0.5, 1) general.pre_compute_distance(X_train, 'boat_split_distances.txt', 'log_squared_euclidean')
def load_dataset(file_path): """Loads data from a .txt file and parses it. Args: file_path (str): Input file to be loaded. Returns: Samples and labels arrays. """ # Loading a .txt file to a numpy array txt = l.load_txt(file_path) # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) return X, Y
def _load(self, file_path): """Loads and parses a dataframe from a file. Args: file_path (str): File to be loaded. Returns: Arrays holding the features and labels. """ # Getting file extension extension = file_path.split('.')[-1] # Check if extension is .csv if extension == 'csv': # If yes, call the method that actually loads csv data = loader.load_csv(file_path) # Check if extension is .txt elif extension == 'txt': # If yes, call the method that actually loads txt data = loader.load_txt(file_path) # Check if extension is .json elif extension == 'json': # If yes, call the method that actually loads json data = loader.load_json(file_path) # If extension is not recognized else: # Raises an ArgumentError exception raise e.ArgumentError( 'File extension not recognized. It should be `.csv`, `.json` or `.txt`' ) # Parsing array X, Y = p.parse_loader(data) return X, Y
def _read_distances(self, file_path): """Reads the distance between nodes from a pre-defined file. Args: file_path (str): File to be loaded. Returns: A matrix with pre-computed distances. """ logger.debug('Running private method: read_distances().') # Getting file extension extension = file_path.split('.')[-1] # Check if extension is .csv if extension == 'csv': # If yes, call the method that actually loads csv distances = loader.load_csv(file_path) # Check if extension is .txt elif extension == 'txt': # If yes, call the method that actually loads txt distances = loader.load_txt(file_path) # If extension is not recognized else: # Raises an ArgumentError exception raise e.ArgumentError( 'File extension not recognized. It should be either `.csv` or .txt`' ) # Check if distances have been properly loaded if distances is None: # If not, raises a ValueError raise e.ValueError( 'Pre-computed distances could not been properly loaded') return distances
def load_split_dataset(file_path, train_split=0.5, random_state=1): """Loads data from a .txt file, parses it and splits into training and validation sets. Args: file_path (str): Input file to be loaded. train_split (float): Percentage of training set. random_state (int): Seed used to provide a deterministic trait. Returns: Training and validation sets along their indexes. """ # Loading a .txt file to a numpy array txt = l.load_txt(file_path) # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) # Splitting data into training and validation sets with their indexes X_train, X_val, Y_train, Y_val, I_train, I_val = s.split_with_index( X, Y, percentage=train_split, random_state=random_state) return X_train, Y_train, I_train, X_val, Y_val, I_val
import opfython.math.general as g import opfython.stream.loader as l import opfython.stream.parser as p import opfython.stream.splitter as s # Loading a .txt file to a numpy array txt = l.load_txt('data/boat.txt') # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) # Creating a file of pre-computed distances g.pre_compute_distance(X, 'boat_split_distances.txt', distance='log_squared_euclidean')
import opfython.stream.loader as l import opfython.stream.parser as p from opfython.subgraphs import KNNSubgraph # Defining an input file input_file = 'data/boat.txt' # Loading a .txt file to a dataframe txt = l.load_txt(input_file) # Parsing a pre-loaded dataframe X, Y = p.parse_loader(txt) # Creating a knn-subgraph structure g = KNNSubgraph(X, Y) # KNNSubgraph can also be directly created from a file g = KNNSubgraph(from_file=input_file)
import opfython.stream.loader as l # Loading a .csv file csv = l.load_csv('data/sample.csv') # Loading a .txt file txt = l.load_txt('data/sample.txt') # Loading a .json file json = l.load_json('data/sample.json')