def _load(self, file_path): """Loads and parses a dataframe from a file. Args: file_path (str): File to be loaded. Returns: Arrays holding the features and labels. """ # Getting file extension extension = file_path.split('.')[-1] if extension == 'csv': data = loader.load_csv(file_path) elif extension == 'txt': data = loader.load_txt(file_path) elif extension == 'json': data = loader.load_json(file_path) else: raise e.ArgumentError( 'File extension not recognized. It should be `.csv`, `.json` or `.txt`' ) X, Y = p.parse_loader(data) return X, Y
def load_dataset(dataset='NSL-KDD'): """Loads a dataset. Args: dataset (str): Dataset's identifier. Returns: X and Y (samples and labels). """ # If the dataset is `nslkdd` if dataset == 'nsl-kdd': # Loading a .txt file to a numpy array txt = l.load_txt('data/nsl-kdd.txt') # If the dataset is `unespy` elif dataset == 'unespy': # Loading a .txt file to a numpy array txt = l.load_txt('data/unespy.txt') # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) return X, Y
def test_opf_pre_compute_distances(): txt = loader.load_txt('data/boat.txt') X, Y = parser.parse_loader(txt) X_train, _, _, _ = splitter.split(X, Y, 0.5, 1) general.pre_compute_distance(X_train, 'boat_split_distances.txt', 'log_squared_euclidean')
def test_parse_loader(): X, Y = parser.parse_loader([]) assert X is None assert Y is None try: data = np.ones((4, 4)) X, Y = parser.parse_loader(data) except: try: data = np.ones((4, 4)) data[3, 1] = 3 X, Y = parser.parse_loader(data) except: csv = loader.load_csv('data/boat.csv') X, Y = parser.parse_loader(csv) assert X.shape == (100, 2) assert Y.shape == (100, )
def load_dataset(file_path): """Loads data from a .txt file and parses it. Args: file_path (str): Input file to be loaded. Returns: Samples and labels arrays. """ # Loading a .txt file to a numpy array txt = l.load_txt(file_path) # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) return X, Y
def _load(self, file_path): """Loads and parses a dataframe from a file. Args: file_path (str): File to be loaded. Returns: Arrays holding the features and labels. """ # Getting file extension extension = file_path.split('.')[-1] # Check if extension is .csv if extension == 'csv': # If yes, call the method that actually loads csv data = loader.load_csv(file_path) # Check if extension is .txt elif extension == 'txt': # If yes, call the method that actually loads txt data = loader.load_txt(file_path) # Check if extension is .json elif extension == 'json': # If yes, call the method that actually loads json data = loader.load_json(file_path) # If extension is not recognized else: # Raises an ArgumentError exception raise e.ArgumentError( 'File extension not recognized. It should be `.csv`, `.json` or `.txt`' ) # Parsing array X, Y = p.parse_loader(data) return X, Y
def load_split_dataset(file_path, train_split=0.5, random_state=1): """Loads data from a .txt file, parses it and splits into training and validation sets. Args: file_path (str): Input file to be loaded. train_split (float): Percentage of training set. random_state (int): Seed used to provide a deterministic trait. Returns: Training and validation sets along their indexes. """ # Loading a .txt file to a numpy array txt = l.load_txt(file_path) # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) # Splitting data into training and validation sets with their indexes X_train, X_val, Y_train, Y_val, I_train, I_val = s.split_with_index( X, Y, percentage=train_split, random_state=random_state) return X_train, Y_train, I_train, X_val, Y_val, I_val
import opfython.math.general as g import opfython.stream.loader as l import opfython.stream.parser as p import opfython.stream.splitter as s # Loading a .txt file to a numpy array txt = l.load_txt('data/boat.txt') # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) # Creating a file of pre-computed distances g.pre_compute_distance(X, 'boat_split_distances.txt', distance='log_squared_euclidean')
import numpy as np import pytest from opfython.math import distance from opfython.stream import loader, parser from opfython.subgraphs import knn csv = loader.load_csv('data/boat.csv') X, Y = parser.parse_loader(csv) def test_knn_subgraph_n_clusters(): subgraph = knn.KNNSubgraph(X, Y) assert subgraph.n_clusters == 0 def test_knn_subgraph_n_clusters_setter(): subgraph = knn.KNNSubgraph(X, Y) try: subgraph.n_clusters = 0.5 except: subgraph.n_clusters = 1 assert subgraph.n_clusters == 1 try: subgraph.n_clusters = -1 except: subgraph.n_clusters = 1