Beispiel #1
0
def _eval_regressor(regressor, X, y):
    """Evaluates a regressor on some test data :X:, :y:.
    """

    if hasattr(regressor, 'no_tf'):
        data_func = lambda: (X, y)
    else:
        data_func = dg.BatchData.to_dataset((X, y))

    predictor = regressor.predict(input_fn=data_func,
                                  yield_single_examples=False)
    prediction = next(predictor)

    diff = prediction - y
    squared_error = np.square(diff)
    total_loss = np.sum(squared_error)
    result = tools.Object(prediction=prediction,
                          X=X,
                          y=y,
                          diff=diff,
                          max_deviation=np.max(np.abs(diff)),
                          average_loss=np.mean(squared_error),
                          loss=total_loss / len(X),
                          total_loss=total_loss)
    return result
Beispiel #2
0
def _eval_regressor(regressor_factory, X, y):
    """Evaluates a regressor on some test data :X:, :y:.
    """

    regressor = regressor_factory()
    processor = regressor_factory.compile_kwargs.processor
    use_tf = regressor_factory.use_tf

    if use_tf:
        data_func = dg.BatchData.to_dataset((X, y))
    else:
        data_func = lambda: (X, y)

    with processor.training(False):
        predictor = regressor.predict(input_fn=data_func,
                                      yield_single_examples=False)
        prediction_before_postprocessing = next(predictor)
        prediction = processor.inverse_transform(
            X, prediction_before_postprocessing)

    diff = prediction - y
    squared_error = np.square(diff)
    total_loss = np.sum(squared_error)
    result = tools.Object(prediction=prediction,
                          X=X,
                          y=y,
                          diff=diff,
                          max_deviation=np.max(np.abs(diff)),
                          average_loss=np.mean(squared_error),
                          loss=total_loss / len(X),
                          total_loss=total_loss)
    return result
Beispiel #3
0
def _dnn_hyperparameters_from_dir(dir_name):
    """Creates DNN hyperparameters from the name of the directory of the DNN."""

    details = tools.Object()

    units, rest = dir_name.split('__')
    units = units.split('_')
    rest = rest.split('_')

    all_units = []
    for unit in units:
        if 'x' in unit:
            unit_size, unit_repeat = unit.split('x')
            unit_size, unit_repeat = int(unit_size), int(unit_repeat)
            all_units.extend([unit_size for _ in range(unit_repeat)])
        else:
            all_units.append(int(unit))
    details.hidden_units = all_units[:-1]
    details.logits = all_units[-1]

    processor_name = rest[0]
    processor_class = pc.ProcessorBase.find_subclass(processor_name)
    details.processor = lambda: processor_class()

    activation_name = rest[1].lower()

    # Not a great way to do this inversion, admittedly
    if activation_name[:9] == 'leakyrelu':
        alpha = float(
            str(activation_name[9]) + '.' + str(activation_name[10:]))
        details.activation = ft.partial(tf.nn.leaky_relu, alpha=alpha)
    else:
        try:
            activation_fn = getattr(tf.nn, activation_name)
        except AttributeError:
            raise ValueError(f"Activation '{activation_name}' not understood.")
        else:
            details.activation = activation_fn

    remaining = rest[2:]
    if len(remaining) == 0:
        uuid = None
    elif len(remaining) == 1:
        uuid = remaining[0]
    else:
        raise ValueError(
            f"Bad dir_name string '{dir_name}'. Too many remaining arguments: {remaining}"
        )

    return details, uuid
Beispiel #4
0
def file_count(file_path):
    """Counts the lines of Python code, comments and whitespace in a file located at :file_path:."""

    line_count = tools.Object(code=0, comment=0, whitespace=0)
    currently_in_docstring = False

    with open(file_path, 'r') as f:
        if file_path.endswith('.py'):
            lines = f.readlines()
        elif file_path.endswith('ipynb'):
            lines = []
            cells = json.load(f)['cells']
            for cell in cells:
                if cell['cell_type'] == 'code':
                    lines.extend(cell['source'])
        else:
            raise RuntimeError(
                "Unrecognised file type at '{}'".format(file_path))

        for line in lines:
            line = line.strip()
            if currently_in_docstring:
                line_count.comment += 1
                if line.endswith('"""'):
                    currently_in_docstring = False
            elif line == '':
                line_count.whitespace += 1
            elif line.startswith('#'):
                line_count.comment += 1
            elif line.startswith('"""'):
                line_count.comment += 1
                if line == '"""' or not line.endswith('"""'):
                    currently_in_docstring = True
            else:
                line_count.code += 1

    return line_count
"""Everything we do is on a grid."""

import numpy as np

# https://github.com/patrick-kidger/tools
import tools

### Grid hyperparameters
# The separation between points of the fine grid
fine_grid_sep = tools.Object(t=0.01, x=0.01)
# The separation between points of the coarse grid
coarse_grid_sep = tools.Object(t=0.1, x=0.1)
# The amount of intervals in the coarse grid. Thus the coarse grid will contain
# (num_intervals.t + 1) * (num_intervals.x + 1) elements.
# So with num_intervals.t = 3, num_intervals.x = 3, it looks like:
#
# @ @ @ @
#
# @ @ @ @
#
# @ @ @ @
#
# @ @ @ @
num_intervals = tools.Object(t=7, x=7)

fine_grid_fineness = tools.Object(t=int(coarse_grid_sep.t // fine_grid_sep.t),
                                  x=int(coarse_grid_sep.x // fine_grid_sep.x))
coarse_grid_size = tools.Object(t=num_intervals.t * coarse_grid_sep.t,
                                x=num_intervals.x * coarse_grid_sep.x)

### Grids to evaluate our solution on
import sys
import pandas as pd
sys.path.append(os.getcwd())
sys.path.append(os.path.join(os.getcwd(), 'cytomod', 'otherTools'))
import matplotlib.pyplot as plt
import cytomod
import cytomod.run_gap_statistic as gap_stat
import cytomod.assoc_to_outcome as outcome
from cytomod import plotting as cyplot
from hclusterplot import plotHColCluster
import tools
import numpy as np

########### ------------------- Define manual arguments ----------------- ###########

args = tools.Object()

args.name_data = 'FLU09'
args.name_compartment = 'Plasma'

args.log_transform = True
args.max_testing_k = 8
args.max_final_k = 6  # Must be <= max_testing_k
args.recalculate_modules = False
args.outcomes = ['FluPositive']  # names of binary outcome columns
args.covariates = ['Age']  # names of regression covariates to control for
args.log_column_names = ['Age']  # or empty list: []
args.cytokines = None  # if none, will take all

args.seed = 1234
Beispiel #7
0
 def __init__(self, regressor, **kwargs):
     """The argument :regressor: is the regressor that is produced."""
     self.regressor = regressor
     self.compile_kwargs = tools.Object(processor=pc.IdentityProcessor())
     self.use_tf = False
     super(RegressorFactory, self).__init__(**kwargs)
Beispiel #8
0
class FenicsSolution(dgb.SolutionBase):
    """Generates a random solution using FEniCS."""
    
    defaults = tools.Object(t=0, T=10, a=0, b=20, 
                            fineness_t=grid.fine_grid_sep.t, 
                            fineness_x=grid.fine_grid_sep.x,
                            min_num_peaks=2, max_num_peaks=3, 
                            min_wobbly=2, max_wobbly=4,
                            wobbly_const_coef_lim=np.pi, 
                            wobbly_lin_coef_lim=1.7, 
                            peak_range_offset=0.15, 
                            peak_offset=3,
                            min_height=3, max_height=10)
    
    def __init__(self, initial_condition, 
                 t=defaults.t, T=defaults.T, 
                 a=defaults.a, b=defaults.b,
                 fineness_t=defaults.fineness_t,  
                 fineness_x=defaults.fineness_x, 
                 line_up=True,
                 smoothing_thresh=0.01,
                 _solve=True,
                 _tvals=None,
                 _xvals=None,
                 _uvals=None,
                 **kwargs):
        """Numerically determines the solution to the Camassa--Holm equation
        from the given :initial_condition:.
        
        The :initial_condition: argument should be a string, in C++ syntax
        describing the initial condition. Some common gotchas: all explicit 
        numbers should be floats, the spatial variable should be referred to 
        as 'x[0]', and any absolute values should be applied as 'fabs'. 
        e.g.
        >>> initial_condition = '0.2 * exp(-fabs(x[0] - 10))'
        Check the FEniCS documentation for what are valid inputs to a
        fenics.Expression for a list of available mathematics functions.
        
        The arguments :t:, :T: describe the start and end times, and :a:, :b:
        describe the start and end spatial points. The points :a: and :b: will 
        be identified in order to make the domain spatially periodic. The 
        numerical analysis will be done on a grid of fineness :fineness_t: and 
        :fineness_x: in the t and x dimensions respectively.
        
        If :line_up: is True, then an additional linear function will be 
        added on to the initial condition in order to make sure that the 
        values at the spatial endpoints line up; otherwise a small jump is
        created across the periodic boundary, which creates a small soliton of
        its own!
        If the initial condition is sensible then this linear function will be 
        small enough to be unnoticable in the rest of the initial condition.
        If this flag is used then the :initial_condition: must also be 
        intpretable as Python, allowing for functions from the math library.
        The :line_up: argument defaults to True.
        
        Due to numerical errors it is possible for the function to develop
        negative values; despite this being mathematically wrong. This can
        potentially generate quite serious discrepancies. To prevent this,
        some smoothing around zero may optionally be performed be setting a
        value for :smoothing_thresh:, which defaults to 0.01. This may be
        disabled by setting :smoothing_thresh: to None.
        """
                
        # Incoming awfulness
        if line_up:
            # brace yourself
            math_list = ['acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 
                         'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 
                         'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 
                         'radians', 'sin', 'sinh', 'sqrt', 'tan', 'tanh']
            math_dict = {name: getattr(math, name) for name in math_list}
            math_dict['abs'] = abs
            math_dict['xxx'] = a
            initial_condition_rep = initial_condition.replace('x[0]', 'xxx')
            a_val = eval(initial_condition_rep, {'__builtins__': None}, math_dict)
            math_dict['xxx'] = b
            b_val = eval(initial_condition_rep, {'__builtins__': None}, math_dict)
            # Blame http://lybniz2.sourceforge.net/safeeval.html for showing me
            # how to do this.

            _diff = a_val - b_val
            _m = _diff / (b - a)
            _c = -0.5 * _diff - _m * a

            linear_str = '{} * x[0] + {}'.format(_m, _c)
            line_up_message = ('FEniCS: Making solution periodic by adding {} to the '
                               'initial condition.'.format(linear_str))
            if abs(_c) > 0.25:
                tflog.info(line_up_message)
            else:
                tflog.debug(line_up_message)
            initial_condition += ' + ' + linear_str

        if _solve:
            tvals, xvals, uvals, converged = fenics_solve(initial_condition, t, T, a, b, 
                                                          fineness_t, fineness_x,
                                                          smoothing_thresh=smoothing_thresh)

            if not converged:
                raise ex.FEniCSConvergenceException
        else:
            tvals, xvals, uvals = _tvals, _xvals, _uvals
            
        self.initial_condition = initial_condition
        self.t = t
        self.T = T
        self.a = a
        self.b = b
        self.fineness_t = fineness_t
        self.fineness_x = fineness_x
        
        self.tvals = tvals
        self.xvals = xvals
        self.uvals = uvals
        
        super(FenicsSolution, self).__init__(**kwargs)
        
    def __call__(self, point):
        t, x = point
        t = int(t / self.fineness_t)
        x = int(x / self.fineness_x)
        return self.uvals[t, x]
    
    def save(self, folder):
        """Saves the FEniCS solution to the specified :folder:, which
        should be a string.
        """
        if folder[-1] not in ('/', '\\'):
            if '/' in folder:
                folder += '/'
            else:
                folder += '\\'
        os.mkdir(folder)
        np.save(folder + 'uvals', self.uvals)
        # Don't really need to save all of this, it's true.
        # Some of it can be reconstructed simply from the
        # other information saved.
        # But this is particular straightforward.
        np.save(folder + 'tvals', self.tvals)
        np.save(folder + 'xvals', self.xvals)
        with open(folder + 'other_data', 'w') as f:
            f.write(json.dumps({'fineness_t': self.fineness_t, 
                                'fineness_x': self.fineness_x,
                                't': self.t,
                                'T': self.T,
                                'a': self.a,
                                'b': self.b,
                                'initial_condition': self.initial_condition}))
    
    @classmethod
    def load(cls, folder, **kwargs):
        """Loads a FEniCS solutions from the specified :folder:, which
        should be a string. Any additional :**kwargs: are passed on to
        the initialisation of the class.
        """
        if folder[-1] not in ('/', '\\'):
            if '/' in folder:
                folder += '/'
            else:
                folder += '\\'
        tvals = np.load(folder + 'tvals.npy')
        xvals = np.load(folder + 'xvals.npy')
        uvals = np.load(folder + 'uvals.npy')
        with open(folder + 'other_data') as f:
            other_data = json.loads(f.read())
        fineness_t = other_data['fineness_t']
        fineness_x = other_data['fineness_x']
        t = other_data['t']
        T = other_data['T']
        a = other_data['a']
        b = other_data['b']
        initial_condition = other_data['initial_condition']
        self = cls(initial_condition=initial_condition, 
                   t=t, T=T, 
                   a=a, b=b,
                   fineness_t=fineness_t,  
                   fineness_x=fineness_x, 
                   line_up=False,
                   _solve=False,
                   _tvals=tvals,
                   _xvals=xvals,
                   _uvals=uvals,
                   **kwargs)
        return self
    
    @classmethod
    def gen(cls, 
            min_num_peaks=defaults.min_num_peaks, 
            max_num_peaks=defaults.max_num_peaks, 
            min_wobbly=defaults.min_wobbly, 
            max_wobbly=defaults.max_wobbly,
            wobbly_const_coef_lim=defaults.wobbly_const_coef_lim, 
            wobbly_lin_coef_lim=defaults.wobbly_lin_coef_lim, 
            peak_range_offset=defaults.peak_range_offset, 
            peak_offset=defaults.peak_offset,
            min_height=defaults.min_height, 
            max_height=defaults.max_height, 
            **kwargs):
        """Generates a random solution of this form, and a random location
        around which to evaluate it.
        
        Its arguments are the same as gen_solution; see its docstring for an
        explanation of their behaviour.
        """
        
        self = cls.gen_solution(min_num_peaks, max_num_peaks, 
                                min_wobbly, max_wobbly, 
                                wobbly_const_coef_lim, wobbly_lin_coef_lim,
                                peak_range_offset, peak_offset,
                                min_height, max_height, 
                                **kwargs)
        point = cls._gen_point(**kwargs)
        
        return point, self
        
    @classmethod
    def gen_solution(cls,
                     min_num_peaks=defaults.min_num_peaks, 
                     max_num_peaks=defaults.max_num_peaks, 
                     min_wobbly=defaults.min_wobbly, 
                     max_wobbly=defaults.max_wobbly,
                     wobbly_const_coef_lim=defaults.wobbly_const_coef_lim, 
                     wobbly_lin_coef_lim=defaults.wobbly_lin_coef_lim, 
                     peak_range_offset=defaults.peak_range_offset, 
                     peak_offset=defaults.peak_offset,
                     min_height=defaults.min_height, 
                     max_height=defaults.max_height,
                     **kwargs):
        """Generates a random solution.
        
        The arguments :min_num_peaks:, :max_num_peaks:, :min_wobbly:, :max_wobbly:,
        :min_height:, :max_height:, :peak_offset: determine the nature of the 
        automatically generated initial condition.
        It will randomly have between :min_num_peaks: and :max_num_peaks: 
        (inclusive) peaks (each a sech curve), each of a height chosen randomly
        from a uniform distribution between :min_height: and :max_height:.
        
        These peaks are then made 'wobbly' (technical term) by a factor 
        corresponding toan integer chosen randomly between :min_wobbly: and 
        :max_wobbly: (inclusive). (Specifically, it is multiplied by a sum of 
        sines of linear functions). Beyond making the initial condition more 
        interesting, these may also split a peak into pieces, giving the impression 
        that there more peaks than :max_num_peaks:. This 'wobbly' behaviour may be 
        turned off by setting :max_wobbly: to 0.
        
        The constant and linear coefficients of the linear functions fed into the 
        'wobbly' sin functions will be chosen randomly from a uniform distribution
        from -:wobbly_const_coef_lim: to :wobbly_const_coef_lim:, and chosen 
        randomly from a uniform distribution from -:wobbly_lin_coef_lim: to 
        :wobbly_lin_coef_lim:, respectively. The default for :wobbly_const_coef_lim:
        is pi; the default for :wobbly_lin_coef_lim: is 1.7.
        
        The peaks are located at least :peak_offset: distance from the endpoints
        of the domain, in order to allow sufficient decay for there not to be too
        large a jump across the periodic boundary. The default is 3.
        More than that, the peaks are each started off in their own section of the 
        domain: the domain (less the :peak_offset: distance from each endpoint) is 
        split into a number of equal size pieces equal to the number of peaks, and
        each peak started off in its own piece. The peak will be placed at least 
        :peak_range_offset: proportion within its own piece. The default is 0.15,
        so the peak is started off somewhere in the middle 70% of its range.
        
        The default arguments have all been chosen to try and generate interesting
        looking solutions, which are nonetheless not so wild that the numerical
        analysis is poor, but which are also nontrivial throughout most of the
        domain, so that picking an arbitrary location in the domain is likely to
        generate good training data.
        
        Any additional kwargs (e.g. :a:, :b:) are passed on to __init__.
        """
        
        a = kwargs.get('a', cls.defaults.a)
        b = kwargs.get('b', cls.defaults.b)
        
        num_peaks = np.random.randint(min_num_peaks, max_num_peaks + 1)
        # Each peak is placed randomly in a region of this length.
        peak_region_length = (b - a - 2 * peak_offset) / num_peaks
        peak_strs = []
        for peak_index in range(num_peaks):
            peak_height = np.random.uniform(min_height, max_height)
            peak_region_start = a + peak_offset + (peak_index + peak_range_offset) * peak_region_length
            peak_loc = np.random.uniform(peak_region_start, 
                                         peak_region_start + (1 - 2 * peak_range_offset) * peak_region_length)

            if max_wobbly > 0:
                wobbly = np.random.randint(min_wobbly, max_wobbly + 1)
                const_coefs = (np.random.uniform(-wobbly_const_coef_lim, wobbly_const_coef_lim) 
                               for _ in range(wobbly))
                lin_coefs = (np.random.uniform(-wobbly_lin_coef_lim, wobbly_lin_coef_lim) 
                             for _ in range(wobbly))
                wobble_strs = ('sin({} * x[0] + {})'.format(lin, const) 
                               for lin, const in zip(lin_coefs, const_coefs))
                wobble_str = ' + '.join(wobble_strs)
                norm_wobble_str = '{} + {}'.format(wobbly, wobble_str)
            else:
                wobbly = 1
                norm_wobble_str = '1.0'

            # *0.5 because norm_wobble_str takes  values in [0, 2]
            peak_strs.append('{} * ({}) / cosh(x[0] - {})'.format(0.5 * peak_height / wobbly, 
                                                                  norm_wobble_str, 
                                                                  peak_loc))

        initial_condition = ' + '.join(peak_strs)
        tflog.debug("FEniCS: Generated initial condition {}".format(initial_condition))
        converged = False
        while not converged:
            try:
                self = cls(initial_condition, **kwargs)
            except ex.FEniCSConvergenceException as e:
                tflog.warn(e)
            else:
                converged = True
                
        return self
       
    @classmethod
    def _gen_point(cls,
                   a=defaults.a, b=defaults.b,
                   t=defaults.t, T=defaults.T,
                   fineness_t=defaults.fineness_t,
                   fineness_x=defaults.fineness_x):
        """Handles the generation of a particular point."""
        # The coarse grid musn't have any part of it lie outside [t, T]x[a, b],
        # as we don't have any data there.
        # (Taking off all of grid.num_intervals, rather than just half of it, 
        # is a little overkill.)
        t_point = np.random.uniform(t + grid.num_intervals.t * grid.coarse_grid_sep.t, 
                                    T - grid.num_intervals.t * grid.coarse_grid_sep.t)
        x_point = np.random.uniform(a + grid.num_intervals.x * grid.coarse_grid_sep.x, 
                                    b - grid.num_intervals.x * grid.coarse_grid_sep.x)
        t_point = tools.round_mult(t_point, fineness_t, 'down')
        x_point = tools.round_mult(x_point, fineness_x, 'down')
        return t_point, x_point