def __init__(self, config_filepath):
        """
        Parameters:
        -----------
        config_filename : None/str
            Defaults to project config: config.json.
        """

        self.config = data.load_json(config_filepath)
        self.m2l_dirpath = ROOT / self.config["operator_dirname"]

        # Load operators and key2index lookup tables
        operator_files = self.m2l_dirpath.glob('m2l_level*')
        index_to_key_files = self.m2l_dirpath.glob('index*')

        self.operators = {
            level: None for level in range(2, self.config['octree_max_level']+1)
        }

        self.index_to_key = {
            level: None for level in range(2, self.config['octree_max_level']+1)
        }

        for filename in operator_files:
            level = self.get_level(str(filename))
            self.operators[level] = data.load_pickle(
                f'm2l_level_{level}', self.m2l_dirpath
            )

        for filename in index_to_key_files:
            level = self.get_level(str(filename))
            self.index_to_key[level] = data.load_pickle(
                f'index_to_key_level_{level}', self.m2l_dirpath
            )
Beispiel #2
0
def main():
    args = get_parser().parse_args()

    config = Config.from_file(args.config)

    logger = get_logger(config.output_path)
    logger.info(args)
    logger.info("=> Starting evaluation ...")

    logger.info("Load data")
    corpus = io.load_json(config.input_path, append_title=config.use_title)

    logger.info("Perform preprocessing")
    preprocessed_corpus = Preprocessing(
        corpus["keywords"],
        config=config.preprocessing,
        datatype="keywords",
        logger=logger,
    ).apply_preprocessing()

    preprocessed_corpus["token"] = preprocessed_corpus["token"].apply(flatten)
    preprocessed_corpus.drop("abstract", axis=1, inplace=True)

    logger.info("Start clustering")
    clustering = Clustering(
        preprocessed_corpus,
        clustering_config=config.clustering,
        dim_reduction_config=config.dim_reduction,
        logger=logger,
    )
    model = clustering.perform_clustering()

    logger.info(f"Save results to {config.output_path}")
    corpus["label"] = model.labels_
    io.write_json(config.input_path.split(".")[0] + "_labeled.json", corpus)
Beispiel #3
0
    def __init__(self, config_filename=None):

        if config_filename is not None:
            config_filepath = PARENT / config_filename
        else:
            config_filepath = PARENT / "config.json"

        self.config = data.load_json(config_filepath)

        data_dirpath = PARENT / self.config["data_dirname"]
        operator_dirpath = PARENT / self.config["operator_dirname"]
        source_filename = self.config['source_filename']
        target_filename = self.config['target_filename']
        source_densities_filename = self.config['source_densities_filename']

        # Load sources, targets and source densities
        self.surface = data.load_hdf5_to_array('surface', 'surface',
                                               operator_dirpath)
        self.sources = data.load_hdf5_to_array(source_filename,
                                               source_filename, data_dirpath)
        self.targets = data.load_hdf5_to_array(target_filename,
                                               target_filename, data_dirpath)
        self.source_densities = data.load_hdf5_to_array(
            source_densities_filename, source_densities_filename, data_dirpath)

        # Load precomputed operators
        self.uc2e_u = data.load_hdf5_to_array('uc2e_u', 'uc2e_u',
                                              operator_dirpath)
        self.uc2e_v = data.load_hdf5_to_array('uc2e_v', 'uc2e_v',
                                              operator_dirpath)
        self.m2m = data.load_hdf5_to_array('m2m', 'm2m', operator_dirpath)
        self.l2l = data.load_hdf5_to_array('l2l', 'l2l', operator_dirpath)
        self.m2l = data.load_pickle('m2l_compressed', operator_dirpath)

        # Load configuration properties
        self.maximum_level = self.config['octree_max_level']
        self.kernel_function = kernel.KERNELS[self.config['kernel']]()
        self.order = self.config['order']
        self.octree = octree.Octree(self.sources, self.targets,
                                    self.maximum_level, self.source_densities)

        # Coefficients discretising surface of a node
        self.ncoefficients = 6 * (self.order - 1)**2 + 2

        # Containers for results
        self.result_data = [
            density.Potential(target, np.zeros(1, dtype='float64'))
            for target in self.octree.targets
        ]

        self.source_data = {
            key: node.Node(key, self.ncoefficients)
            for key in self.octree.non_empty_source_nodes
        }

        self.target_data = {
            key: node.Node(key, self.ncoefficients)
            for key in self.octree.non_empty_target_nodes
        }
Beispiel #4
0
    data_func = DATA[dtype]

    sources, targets, source_densities = data_func(npoints)

    data.save_array_to_hdf5(data_dirpath, 'sources', sources)
    data.save_array_to_hdf5(data_dirpath, 'targets', targets)
    data.save_array_to_hdf5(data_dirpath, 'source_densities', source_densities)


if __name__ == "__main__":

    if len(sys.argv) < 4:
        raise ValueError(
            f'Must Specify Config Filepath number of points and data type!\
                e.g. `python generate_test_data.py /path/to/config.json 100 random`'
        )

    elif sys.argv[3] not in DATA.keys():
        raise ValueError(
            f'Data type `{sys.argv[3]}` not valid. Must be either`separated` or `random`'
        )

    else:
        config_filepath = sys.argv[1]
        npoints = sys.argv[2]
        dtype = sys.argv[3]
        config = data.load_json(config_filepath)
        config['npoints'] = int(npoints)
        config['dtype'] = dtype
        main(**config)
import pathlib

import numpy as np
import pytest

import fmm.hilbert as hilbert
from fmm.kernel import KERNELS
from fmm.octree import Octree
import fmm.operator as operator
import utils.data as data


HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
ROOT = HERE.parent.parent
CONFIG_FILEPATH = HERE.parent.parent / "test_config.json"
CONFIG = data.load_json(CONFIG_FILEPATH)

ORDER = CONFIG['order']
SURFACE = operator.compute_surface(ORDER)
KERNEL_FUNCTION = KERNELS['laplace']()

OPERATOR_DIRPATH = HERE.parent.parent / CONFIG['operator_dirname']
DATA_DIRPATH = HERE.parent.parent / CONFIG['data_dirname']

RTOL = 1e-1

@pytest.fixture
def octree():
    sources = data.load_hdf5_to_array('sources', 'sources', DATA_DIRPATH)
    targets = data.load_hdf5_to_array('targets', 'targets', DATA_DIRPATH)
Beispiel #6
0
import os
import pathlib
import subprocess

import click

from utils.data import load_json

HELP_TEXT = """
    Command Line Interface for PyExaFMM
"""

HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
CONFIG = load_json(HERE.parent / 'config.json')


@click.group(help=HELP_TEXT)
def cli():
    pass


@click.command(help='Build dev version in current python env')
def build():
    click.echo('Building and installing')
    subprocess.run(['conda', 'develop', '.'])


@click.command(help='Run test suite')
def test():
    click.echo('Running test suite')
    subprocess.run(['pytest', 'fmm'])