def _eval_regressor(regressor, X, y): """Evaluates a regressor on some test data :X:, :y:. """ if hasattr(regressor, 'no_tf'): data_func = lambda: (X, y) else: data_func = dg.BatchData.to_dataset((X, y)) predictor = regressor.predict(input_fn=data_func, yield_single_examples=False) prediction = next(predictor) diff = prediction - y squared_error = np.square(diff) total_loss = np.sum(squared_error) result = tools.Object(prediction=prediction, X=X, y=y, diff=diff, max_deviation=np.max(np.abs(diff)), average_loss=np.mean(squared_error), loss=total_loss / len(X), total_loss=total_loss) return result
def _eval_regressor(regressor_factory, X, y): """Evaluates a regressor on some test data :X:, :y:. """ regressor = regressor_factory() processor = regressor_factory.compile_kwargs.processor use_tf = regressor_factory.use_tf if use_tf: data_func = dg.BatchData.to_dataset((X, y)) else: data_func = lambda: (X, y) with processor.training(False): predictor = regressor.predict(input_fn=data_func, yield_single_examples=False) prediction_before_postprocessing = next(predictor) prediction = processor.inverse_transform( X, prediction_before_postprocessing) diff = prediction - y squared_error = np.square(diff) total_loss = np.sum(squared_error) result = tools.Object(prediction=prediction, X=X, y=y, diff=diff, max_deviation=np.max(np.abs(diff)), average_loss=np.mean(squared_error), loss=total_loss / len(X), total_loss=total_loss) return result
def _dnn_hyperparameters_from_dir(dir_name): """Creates DNN hyperparameters from the name of the directory of the DNN.""" details = tools.Object() units, rest = dir_name.split('__') units = units.split('_') rest = rest.split('_') all_units = [] for unit in units: if 'x' in unit: unit_size, unit_repeat = unit.split('x') unit_size, unit_repeat = int(unit_size), int(unit_repeat) all_units.extend([unit_size for _ in range(unit_repeat)]) else: all_units.append(int(unit)) details.hidden_units = all_units[:-1] details.logits = all_units[-1] processor_name = rest[0] processor_class = pc.ProcessorBase.find_subclass(processor_name) details.processor = lambda: processor_class() activation_name = rest[1].lower() # Not a great way to do this inversion, admittedly if activation_name[:9] == 'leakyrelu': alpha = float( str(activation_name[9]) + '.' + str(activation_name[10:])) details.activation = ft.partial(tf.nn.leaky_relu, alpha=alpha) else: try: activation_fn = getattr(tf.nn, activation_name) except AttributeError: raise ValueError(f"Activation '{activation_name}' not understood.") else: details.activation = activation_fn remaining = rest[2:] if len(remaining) == 0: uuid = None elif len(remaining) == 1: uuid = remaining[0] else: raise ValueError( f"Bad dir_name string '{dir_name}'. Too many remaining arguments: {remaining}" ) return details, uuid
def file_count(file_path): """Counts the lines of Python code, comments and whitespace in a file located at :file_path:.""" line_count = tools.Object(code=0, comment=0, whitespace=0) currently_in_docstring = False with open(file_path, 'r') as f: if file_path.endswith('.py'): lines = f.readlines() elif file_path.endswith('ipynb'): lines = [] cells = json.load(f)['cells'] for cell in cells: if cell['cell_type'] == 'code': lines.extend(cell['source']) else: raise RuntimeError( "Unrecognised file type at '{}'".format(file_path)) for line in lines: line = line.strip() if currently_in_docstring: line_count.comment += 1 if line.endswith('"""'): currently_in_docstring = False elif line == '': line_count.whitespace += 1 elif line.startswith('#'): line_count.comment += 1 elif line.startswith('"""'): line_count.comment += 1 if line == '"""' or not line.endswith('"""'): currently_in_docstring = True else: line_count.code += 1 return line_count
"""Everything we do is on a grid.""" import numpy as np # https://github.com/patrick-kidger/tools import tools ### Grid hyperparameters # The separation between points of the fine grid fine_grid_sep = tools.Object(t=0.01, x=0.01) # The separation between points of the coarse grid coarse_grid_sep = tools.Object(t=0.1, x=0.1) # The amount of intervals in the coarse grid. Thus the coarse grid will contain # (num_intervals.t + 1) * (num_intervals.x + 1) elements. # So with num_intervals.t = 3, num_intervals.x = 3, it looks like: # # @ @ @ @ # # @ @ @ @ # # @ @ @ @ # # @ @ @ @ num_intervals = tools.Object(t=7, x=7) fine_grid_fineness = tools.Object(t=int(coarse_grid_sep.t // fine_grid_sep.t), x=int(coarse_grid_sep.x // fine_grid_sep.x)) coarse_grid_size = tools.Object(t=num_intervals.t * coarse_grid_sep.t, x=num_intervals.x * coarse_grid_sep.x) ### Grids to evaluate our solution on
import sys import pandas as pd sys.path.append(os.getcwd()) sys.path.append(os.path.join(os.getcwd(), 'cytomod', 'otherTools')) import matplotlib.pyplot as plt import cytomod import cytomod.run_gap_statistic as gap_stat import cytomod.assoc_to_outcome as outcome from cytomod import plotting as cyplot from hclusterplot import plotHColCluster import tools import numpy as np ########### ------------------- Define manual arguments ----------------- ########### args = tools.Object() args.name_data = 'FLU09' args.name_compartment = 'Plasma' args.log_transform = True args.max_testing_k = 8 args.max_final_k = 6 # Must be <= max_testing_k args.recalculate_modules = False args.outcomes = ['FluPositive'] # names of binary outcome columns args.covariates = ['Age'] # names of regression covariates to control for args.log_column_names = ['Age'] # or empty list: [] args.cytokines = None # if none, will take all args.seed = 1234
def __init__(self, regressor, **kwargs): """The argument :regressor: is the regressor that is produced.""" self.regressor = regressor self.compile_kwargs = tools.Object(processor=pc.IdentityProcessor()) self.use_tf = False super(RegressorFactory, self).__init__(**kwargs)
class FenicsSolution(dgb.SolutionBase): """Generates a random solution using FEniCS.""" defaults = tools.Object(t=0, T=10, a=0, b=20, fineness_t=grid.fine_grid_sep.t, fineness_x=grid.fine_grid_sep.x, min_num_peaks=2, max_num_peaks=3, min_wobbly=2, max_wobbly=4, wobbly_const_coef_lim=np.pi, wobbly_lin_coef_lim=1.7, peak_range_offset=0.15, peak_offset=3, min_height=3, max_height=10) def __init__(self, initial_condition, t=defaults.t, T=defaults.T, a=defaults.a, b=defaults.b, fineness_t=defaults.fineness_t, fineness_x=defaults.fineness_x, line_up=True, smoothing_thresh=0.01, _solve=True, _tvals=None, _xvals=None, _uvals=None, **kwargs): """Numerically determines the solution to the Camassa--Holm equation from the given :initial_condition:. The :initial_condition: argument should be a string, in C++ syntax describing the initial condition. Some common gotchas: all explicit numbers should be floats, the spatial variable should be referred to as 'x[0]', and any absolute values should be applied as 'fabs'. e.g. >>> initial_condition = '0.2 * exp(-fabs(x[0] - 10))' Check the FEniCS documentation for what are valid inputs to a fenics.Expression for a list of available mathematics functions. The arguments :t:, :T: describe the start and end times, and :a:, :b: describe the start and end spatial points. The points :a: and :b: will be identified in order to make the domain spatially periodic. The numerical analysis will be done on a grid of fineness :fineness_t: and :fineness_x: in the t and x dimensions respectively. If :line_up: is True, then an additional linear function will be added on to the initial condition in order to make sure that the values at the spatial endpoints line up; otherwise a small jump is created across the periodic boundary, which creates a small soliton of its own! If the initial condition is sensible then this linear function will be small enough to be unnoticable in the rest of the initial condition. If this flag is used then the :initial_condition: must also be intpretable as Python, allowing for functions from the math library. The :line_up: argument defaults to True. Due to numerical errors it is possible for the function to develop negative values; despite this being mathematically wrong. This can potentially generate quite serious discrepancies. To prevent this, some smoothing around zero may optionally be performed be setting a value for :smoothing_thresh:, which defaults to 0.01. This may be disabled by setting :smoothing_thresh: to None. """ # Incoming awfulness if line_up: # brace yourself math_list = ['acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh', 'sqrt', 'tan', 'tanh'] math_dict = {name: getattr(math, name) for name in math_list} math_dict['abs'] = abs math_dict['xxx'] = a initial_condition_rep = initial_condition.replace('x[0]', 'xxx') a_val = eval(initial_condition_rep, {'__builtins__': None}, math_dict) math_dict['xxx'] = b b_val = eval(initial_condition_rep, {'__builtins__': None}, math_dict) # Blame http://lybniz2.sourceforge.net/safeeval.html for showing me # how to do this. _diff = a_val - b_val _m = _diff / (b - a) _c = -0.5 * _diff - _m * a linear_str = '{} * x[0] + {}'.format(_m, _c) line_up_message = ('FEniCS: Making solution periodic by adding {} to the ' 'initial condition.'.format(linear_str)) if abs(_c) > 0.25: tflog.info(line_up_message) else: tflog.debug(line_up_message) initial_condition += ' + ' + linear_str if _solve: tvals, xvals, uvals, converged = fenics_solve(initial_condition, t, T, a, b, fineness_t, fineness_x, smoothing_thresh=smoothing_thresh) if not converged: raise ex.FEniCSConvergenceException else: tvals, xvals, uvals = _tvals, _xvals, _uvals self.initial_condition = initial_condition self.t = t self.T = T self.a = a self.b = b self.fineness_t = fineness_t self.fineness_x = fineness_x self.tvals = tvals self.xvals = xvals self.uvals = uvals super(FenicsSolution, self).__init__(**kwargs) def __call__(self, point): t, x = point t = int(t / self.fineness_t) x = int(x / self.fineness_x) return self.uvals[t, x] def save(self, folder): """Saves the FEniCS solution to the specified :folder:, which should be a string. """ if folder[-1] not in ('/', '\\'): if '/' in folder: folder += '/' else: folder += '\\' os.mkdir(folder) np.save(folder + 'uvals', self.uvals) # Don't really need to save all of this, it's true. # Some of it can be reconstructed simply from the # other information saved. # But this is particular straightforward. np.save(folder + 'tvals', self.tvals) np.save(folder + 'xvals', self.xvals) with open(folder + 'other_data', 'w') as f: f.write(json.dumps({'fineness_t': self.fineness_t, 'fineness_x': self.fineness_x, 't': self.t, 'T': self.T, 'a': self.a, 'b': self.b, 'initial_condition': self.initial_condition})) @classmethod def load(cls, folder, **kwargs): """Loads a FEniCS solutions from the specified :folder:, which should be a string. Any additional :**kwargs: are passed on to the initialisation of the class. """ if folder[-1] not in ('/', '\\'): if '/' in folder: folder += '/' else: folder += '\\' tvals = np.load(folder + 'tvals.npy') xvals = np.load(folder + 'xvals.npy') uvals = np.load(folder + 'uvals.npy') with open(folder + 'other_data') as f: other_data = json.loads(f.read()) fineness_t = other_data['fineness_t'] fineness_x = other_data['fineness_x'] t = other_data['t'] T = other_data['T'] a = other_data['a'] b = other_data['b'] initial_condition = other_data['initial_condition'] self = cls(initial_condition=initial_condition, t=t, T=T, a=a, b=b, fineness_t=fineness_t, fineness_x=fineness_x, line_up=False, _solve=False, _tvals=tvals, _xvals=xvals, _uvals=uvals, **kwargs) return self @classmethod def gen(cls, min_num_peaks=defaults.min_num_peaks, max_num_peaks=defaults.max_num_peaks, min_wobbly=defaults.min_wobbly, max_wobbly=defaults.max_wobbly, wobbly_const_coef_lim=defaults.wobbly_const_coef_lim, wobbly_lin_coef_lim=defaults.wobbly_lin_coef_lim, peak_range_offset=defaults.peak_range_offset, peak_offset=defaults.peak_offset, min_height=defaults.min_height, max_height=defaults.max_height, **kwargs): """Generates a random solution of this form, and a random location around which to evaluate it. Its arguments are the same as gen_solution; see its docstring for an explanation of their behaviour. """ self = cls.gen_solution(min_num_peaks, max_num_peaks, min_wobbly, max_wobbly, wobbly_const_coef_lim, wobbly_lin_coef_lim, peak_range_offset, peak_offset, min_height, max_height, **kwargs) point = cls._gen_point(**kwargs) return point, self @classmethod def gen_solution(cls, min_num_peaks=defaults.min_num_peaks, max_num_peaks=defaults.max_num_peaks, min_wobbly=defaults.min_wobbly, max_wobbly=defaults.max_wobbly, wobbly_const_coef_lim=defaults.wobbly_const_coef_lim, wobbly_lin_coef_lim=defaults.wobbly_lin_coef_lim, peak_range_offset=defaults.peak_range_offset, peak_offset=defaults.peak_offset, min_height=defaults.min_height, max_height=defaults.max_height, **kwargs): """Generates a random solution. The arguments :min_num_peaks:, :max_num_peaks:, :min_wobbly:, :max_wobbly:, :min_height:, :max_height:, :peak_offset: determine the nature of the automatically generated initial condition. It will randomly have between :min_num_peaks: and :max_num_peaks: (inclusive) peaks (each a sech curve), each of a height chosen randomly from a uniform distribution between :min_height: and :max_height:. These peaks are then made 'wobbly' (technical term) by a factor corresponding toan integer chosen randomly between :min_wobbly: and :max_wobbly: (inclusive). (Specifically, it is multiplied by a sum of sines of linear functions). Beyond making the initial condition more interesting, these may also split a peak into pieces, giving the impression that there more peaks than :max_num_peaks:. This 'wobbly' behaviour may be turned off by setting :max_wobbly: to 0. The constant and linear coefficients of the linear functions fed into the 'wobbly' sin functions will be chosen randomly from a uniform distribution from -:wobbly_const_coef_lim: to :wobbly_const_coef_lim:, and chosen randomly from a uniform distribution from -:wobbly_lin_coef_lim: to :wobbly_lin_coef_lim:, respectively. The default for :wobbly_const_coef_lim: is pi; the default for :wobbly_lin_coef_lim: is 1.7. The peaks are located at least :peak_offset: distance from the endpoints of the domain, in order to allow sufficient decay for there not to be too large a jump across the periodic boundary. The default is 3. More than that, the peaks are each started off in their own section of the domain: the domain (less the :peak_offset: distance from each endpoint) is split into a number of equal size pieces equal to the number of peaks, and each peak started off in its own piece. The peak will be placed at least :peak_range_offset: proportion within its own piece. The default is 0.15, so the peak is started off somewhere in the middle 70% of its range. The default arguments have all been chosen to try and generate interesting looking solutions, which are nonetheless not so wild that the numerical analysis is poor, but which are also nontrivial throughout most of the domain, so that picking an arbitrary location in the domain is likely to generate good training data. Any additional kwargs (e.g. :a:, :b:) are passed on to __init__. """ a = kwargs.get('a', cls.defaults.a) b = kwargs.get('b', cls.defaults.b) num_peaks = np.random.randint(min_num_peaks, max_num_peaks + 1) # Each peak is placed randomly in a region of this length. peak_region_length = (b - a - 2 * peak_offset) / num_peaks peak_strs = [] for peak_index in range(num_peaks): peak_height = np.random.uniform(min_height, max_height) peak_region_start = a + peak_offset + (peak_index + peak_range_offset) * peak_region_length peak_loc = np.random.uniform(peak_region_start, peak_region_start + (1 - 2 * peak_range_offset) * peak_region_length) if max_wobbly > 0: wobbly = np.random.randint(min_wobbly, max_wobbly + 1) const_coefs = (np.random.uniform(-wobbly_const_coef_lim, wobbly_const_coef_lim) for _ in range(wobbly)) lin_coefs = (np.random.uniform(-wobbly_lin_coef_lim, wobbly_lin_coef_lim) for _ in range(wobbly)) wobble_strs = ('sin({} * x[0] + {})'.format(lin, const) for lin, const in zip(lin_coefs, const_coefs)) wobble_str = ' + '.join(wobble_strs) norm_wobble_str = '{} + {}'.format(wobbly, wobble_str) else: wobbly = 1 norm_wobble_str = '1.0' # *0.5 because norm_wobble_str takes values in [0, 2] peak_strs.append('{} * ({}) / cosh(x[0] - {})'.format(0.5 * peak_height / wobbly, norm_wobble_str, peak_loc)) initial_condition = ' + '.join(peak_strs) tflog.debug("FEniCS: Generated initial condition {}".format(initial_condition)) converged = False while not converged: try: self = cls(initial_condition, **kwargs) except ex.FEniCSConvergenceException as e: tflog.warn(e) else: converged = True return self @classmethod def _gen_point(cls, a=defaults.a, b=defaults.b, t=defaults.t, T=defaults.T, fineness_t=defaults.fineness_t, fineness_x=defaults.fineness_x): """Handles the generation of a particular point.""" # The coarse grid musn't have any part of it lie outside [t, T]x[a, b], # as we don't have any data there. # (Taking off all of grid.num_intervals, rather than just half of it, # is a little overkill.) t_point = np.random.uniform(t + grid.num_intervals.t * grid.coarse_grid_sep.t, T - grid.num_intervals.t * grid.coarse_grid_sep.t) x_point = np.random.uniform(a + grid.num_intervals.x * grid.coarse_grid_sep.x, b - grid.num_intervals.x * grid.coarse_grid_sep.x) t_point = tools.round_mult(t_point, fineness_t, 'down') x_point = tools.round_mult(x_point, fineness_x, 'down') return t_point, x_point