Exemple #1
0
def load_dataset(dataset='NSL-KDD'):
    """Loads a dataset.

    Args:
        dataset (str): Dataset's identifier.

    Returns:
        X and Y (samples and labels).

    """

    # If the dataset is `nslkdd`
    if dataset == 'nsl-kdd':
        # Loading a .txt file to a numpy array
        txt = l.load_txt('data/nsl-kdd.txt')

    # If the dataset is `unespy`
    elif dataset == 'unespy':
        # Loading a .txt file to a numpy array
        txt = l.load_txt('data/unespy.txt')

    # Parsing a pre-loaded numpy array
    X, Y = p.parse_loader(txt)

    return X, Y
def test_load_txt():
    txt = loader.load_txt('boat.txt')

    assert txt is None

    txt = loader.load_txt('data/boat.txt')

    assert txt.shape == (100, 4)
Exemple #3
0
    def _load(self, file_path):
        """Loads and parses a dataframe from a file.

        Args:
            file_path (str): File to be loaded.

        Returns:
            Arrays holding the features and labels.

        """

        # Getting file extension
        extension = file_path.split('.')[-1]

        if extension == 'csv':
            data = loader.load_csv(file_path)

        elif extension == 'txt':
            data = loader.load_txt(file_path)

        elif extension == 'json':
            data = loader.load_json(file_path)

        else:
            raise e.ArgumentError(
                'File extension not recognized. It should be `.csv`, `.json` or `.txt`'
            )

        X, Y = p.parse_loader(data)

        return X, Y
Exemple #4
0
    def _read_distances(self, file_name):
        """Reads the distance between nodes from a pre-defined file.

        Args:
            file_name (str): File to be loaded.

        """

        logger.debug('Running private method: read_distances().')

        # Getting file extension
        extension = file_name.split('.')[-1]

        if extension == 'csv':
            distances = loader.load_csv(file_name)

        elif extension == 'txt':
            distances = loader.load_txt(file_name)

        else:
            # Raises an ArgumentError exception
            raise e.ArgumentError('File extension not recognized. It should be either `.csv` or .txt`')

        # Check if distances have been properly loaded
        if distances is None:
            raise e.ValueError('Pre-computed distances could not been properly loaded')

        # Apply the distances matrix to the property
        self.pre_distances = distances
Exemple #5
0
def test_opf_pre_compute_distances():
    txt = loader.load_txt('data/boat.txt')

    X, Y = parser.parse_loader(txt)

    X_train, _, _, _ = splitter.split(X, Y, 0.5, 1)

    general.pre_compute_distance(X_train, 'boat_split_distances.txt', 'log_squared_euclidean')
Exemple #6
0
def load_dataset(file_path):
    """Loads data from a .txt file and parses it.

    Args:
        file_path (str): Input file to be loaded.

    Returns:
        Samples and labels arrays.

    """

    # Loading a .txt file to a numpy array
    txt = l.load_txt(file_path)

    # Parsing a pre-loaded numpy array
    X, Y = p.parse_loader(txt)

    return X, Y
Exemple #7
0
    def _load(self, file_path):
        """Loads and parses a dataframe from a file.

        Args:
            file_path (str): File to be loaded.

        Returns:
            Arrays holding the features and labels.

        """

        # Getting file extension
        extension = file_path.split('.')[-1]

        # Check if extension is .csv
        if extension == 'csv':
            # If yes, call the method that actually loads csv
            data = loader.load_csv(file_path)

        # Check if extension is .txt
        elif extension == 'txt':
            # If yes, call the method that actually loads txt
            data = loader.load_txt(file_path)

        # Check if extension is .json
        elif extension == 'json':
            # If yes, call the method that actually loads json
            data = loader.load_json(file_path)

        # If extension is not recognized
        else:
            # Raises an ArgumentError exception
            raise e.ArgumentError(
                'File extension not recognized. It should be `.csv`, `.json` or `.txt`'
            )

        # Parsing array
        X, Y = p.parse_loader(data)

        return X, Y
Exemple #8
0
    def _read_distances(self, file_path):
        """Reads the distance between nodes from a pre-defined file.

        Args:
            file_path (str): File to be loaded.

        Returns:
            A matrix with pre-computed distances.

        """

        logger.debug('Running private method: read_distances().')

        # Getting file extension
        extension = file_path.split('.')[-1]

        # Check if extension is .csv
        if extension == 'csv':
            # If yes, call the method that actually loads csv
            distances = loader.load_csv(file_path)

        # Check if extension is .txt
        elif extension == 'txt':
            # If yes, call the method that actually loads txt
            distances = loader.load_txt(file_path)

        # If extension is not recognized
        else:
            # Raises an ArgumentError exception
            raise e.ArgumentError(
                'File extension not recognized. It should be either `.csv` or .txt`'
            )

        # Check if distances have been properly loaded
        if distances is None:
            # If not, raises a ValueError
            raise e.ValueError(
                'Pre-computed distances could not been properly loaded')

        return distances
Exemple #9
0
def load_split_dataset(file_path, train_split=0.5, random_state=1):
    """Loads data from a .txt file, parses it and splits into training and validation sets.

    Args:
        file_path (str): Input file to be loaded.
        train_split (float): Percentage of training set.
        random_state (int): Seed used to provide a deterministic trait.

    Returns:
        Training and validation sets along their indexes.

    """

    # Loading a .txt file to a numpy array
    txt = l.load_txt(file_path)

    # Parsing a pre-loaded numpy array
    X, Y = p.parse_loader(txt)

    # Splitting data into training and validation sets with their indexes
    X_train, X_val, Y_train, Y_val, I_train, I_val = s.split_with_index(
        X, Y, percentage=train_split, random_state=random_state)

    return X_train, Y_train, I_train, X_val, Y_val, I_val
Exemple #10
0
import opfython.math.general as g
import opfython.stream.loader as l
import opfython.stream.parser as p
import opfython.stream.splitter as s

# Loading a .txt file to a numpy array
txt = l.load_txt('data/boat.txt')

# Parsing a pre-loaded numpy array
X, Y = p.parse_loader(txt)

# Creating a file of pre-computed distances
g.pre_compute_distance(X,
                       'boat_split_distances.txt',
                       distance='log_squared_euclidean')
Exemple #11
0
import opfython.stream.loader as l
import opfython.stream.parser as p
from opfython.subgraphs import KNNSubgraph

# Defining an input file
input_file = 'data/boat.txt'

# Loading a .txt file to a dataframe
txt = l.load_txt(input_file)

# Parsing a pre-loaded dataframe
X, Y = p.parse_loader(txt)

# Creating a knn-subgraph structure
g = KNNSubgraph(X, Y)

# KNNSubgraph can also be directly created from a file
g = KNNSubgraph(from_file=input_file)
Exemple #12
0
import opfython.stream.loader as l

# Loading a .csv file
csv = l.load_csv('data/sample.csv')

# Loading a .txt file
txt = l.load_txt('data/sample.txt')

# Loading a .json file
json = l.load_json('data/sample.json')