Example #1
0
class LocalDensity(flowws.Stage):
    """Compute the local density of particles in the system"""
    ARGS = [
        Arg('histogram_bins', '-b', int, default=64,
            help='Number of bins to use in the histogram plot'),
        Arg('r_max', '-r', float, required=True,
            help='Maximum radial distance'),
        Arg('diameter', '-d', float, default=0.,
            help='Smoothing diameter to use in the density calculation'),
    ]

    def run(self, scope, storage):
        """Compute and provide the local density"""
        compute = freud.density.LocalDensity(
            self.arguments['r_max'],
            self.arguments['diameter'])

        box = freud.box.Box.from_box(scope['box'], scope.get('dimensions', 3))
        compute.compute((box, scope['position']), scope['position'])
        self.density = scope['local_density'] = compute.density

        scope.setdefault('color_scalars', []).append('local_density')
        scope.setdefault('visuals', []).append(self)

    def draw_matplotlib(self, figure):
        ax = figure.add_subplot(111)
        ax.hist(self.density, bins=self.arguments['histogram_bins'],
                density=True)
        ax.set_xlabel('Density')
        ax.set_ylabel('Probability')
class InitializeTF(flowws.Stage):
    """Initialize tensorflow, enabling memory growth for GPUs."""

    ARGS = [
        Arg('jit', '-j', bool, True,
            help='If True, enable JIT compilation'),
        Arg('gpu', '-g', bool, True,
            help='If False, disable GPUs'),
        Arg('memory_growth', '-m', bool, True,
            help='If True, enable gradual memory growth'),
    ]

    def run(self, scope, storage):
        tf.config.optimizer.set_jit(self.arguments['jit'])

        if not self.arguments['gpu']:
            tf.config.set_visible_devices([], 'GPU')

        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                if self.arguments['memory_growth']:
                    # Currently, memory growth needs to be the same across GPUs
                    for gpu in gpus:
                        tf.config.experimental.set_memory_growth(gpu, True)
                logical_gpus = tf.config.experimental.list_logical_devices('GPU')
                print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
            except RuntimeError as e:
                # Memory growth must be set before GPUs have been initialized
                print(e)
class WikiText2(flowws.Stage):
    """Train a model on the wikitext-2 dataset"""

    ARGS = [
        Arg('sequence_length',
            '-l',
            int,
            64,
            help='Maximum sequence length of the network'),
        Arg('batch_size', '-b', int, 32, help='Batch size for training')
    ]

    def run(self, scope, storage):
        encoder = wikitext.build_wikitext_bpe_encoder()
        scope['vocabulary_size'] = encoder.vocabulary_size()
        sequence_length = self.arguments['sequence_length']

        def x_y_for_dataset(dataset_name):
            fat_sample = training_data_to_dense_samples(
                dataset_name, encoder, sequence_length)
            _x = fat_sample[:, :sequence_length]
            _y = np.expand_dims(fat_sample[:, 1:], axis=-1)
            return _x, _y

        scope['training_data'] = x_y_for_dataset(wikitext.TRAINING_SET_NAME)
        (scope['x_train'], scope['y_train']) = scope['training_data']
        scope['validation_data'] = x_y_for_dataset(
            wikitext.VALIDATION_SET_NAME)
        scope['test_data'] = x_y_for_dataset(wikitext.TEST_SET_NAME)
        scope['loss'] = 'sparse_categorical_crossentropy'
        scope['sequence_length'] = sequence_length
        scope['batch_size'] = self.arguments['batch_size']
        scope['encoder'] = lambda x: [n for (n, token) in encoder(x)]
        scope['decoder'] = lambda x: encoder.decode(x)
Example #4
0
class NeuralPotentialDropout(flowws.Stage):
    ARGS = [
        Arg('reset',
            '-r',
            bool,
            False,
            help='If given, first reset (clear) dropout configuration'),
        Arg('mu', '-m', float, help='Neural potential to use for layers'),
        Arg('spatial_mask',
            '-s',
            bool,
            False,
            help='If True, learn a spatial mask for spatial dropout'),
    ]

    def run(self, scope, storage):
        mu = self.arguments['mu']

        if self.arguments['reset']:
            scope.pop('dropout_class', None)
            scope.pop('dropout_spatial2d_class', None)

        layer_dropout = functools.partial(LearnedDropout, mu)
        sequence_dropout = functools.partial(
            LearnedSequenceDropout,
            mu,
            spatial_mask=self.arguments['spatial_mask'])
        spatial_dropout = functools.partial(
            LearnedSpatialDropout2D,
            mu,
            spatial_mask=self.arguments['spatial_mask'])

        scope['dropout_class'] = layer_dropout
        scope['dropout_sequence_class'] = sequence_dropout
        scope['dropout_spatial2d_class'] = spatial_dropout
Example #5
0
class Tensorboard(flowws.Stage):
    """Specify a tensorboard dump callback."""

    ARGS = [
        Arg('histogram_period',
            None,
            int,
            0,
            help='Frequency to dump histogram data'),
        Arg('write_graph',
            '-g',
            bool,
            True,
            help='Write the computational graph'),
        Arg('profile_batch',
            '-b',
            int,
            2,
            help='Batch index to profile (0 to disable)'),
    ]

    def run(self, scope, storage):
        callback = callbacks.TensorBoard(
            histogram_freq=self.arguments['histogram_period'],
            write_graph=self.arguments['write_graph'],
            profile_batch=self.arguments['profile_batch'])

        scope.setdefault('callbacks', []).append(callback)
Example #6
0
class DEMInteraction(flowws.Stage):
    """Specify that DEM interactions should be included in future MD stages"""
    ARGS = [
        Arg('reset', '-r', bool, False,
            help='Clear previously-defined DEM interactions beforehand'),
        Arg('type', '-t', str, required=True,
            help='Interaction class name'),
    ]

    def run(self, scope, storage):
        """Registers this object to provide a DEM force compute in future MD stages"""
        callbacks = scope.setdefault('callbacks', collections.defaultdict(list))

        if self.arguments['reset']:
            pre_run_callbacks = [c for c in callbacks['pre_run']
                                 if not isinstance(c, DEMInteraction)]
            callbacks['pre_run'] = pre_run_callbacks

        callbacks['pre_run'].append(self)

    def __call__(self, scope, storage, context):
        """Callback to be performed before each run command.

        Initializes a DEM pair potential interaction based on per-type
        shape information.
        """
        interaction_type = self.arguments['type']

        nlist = hoomd.md.nlist.tree()
        system = scope['system']
        dimensions = scope.get('dimensions', 3)

        try:
            type_shapes = scope['type_shapes']
        except KeyError:
            msg = ('Shape information has not been set for DEM interactions. '
                   'Use a ShapeDefinition or similar step beforehand.')
            raise WorkflowError(msg)

        if interaction_type == 'wca':
            radii = [shape.get('rounding_radius', 0) for shape in type_shapes]
            assert np.isclose(min(radii), max(radii)), 'WCA requires identical rounding radii for all shapes'
            radius = radii[0]

            if radius <= 0:
                logger.warning('Non-rounded shapes given, using a rounding radius of 0.5')
                radius = .5

            potential = hoomd.dem.pair.WCA(nlist, radius)
            for (name, shape) in zip(system.particles.types, type_shapes):
                vertices = shape['vertices']
                if dimensions == 2:
                    potential.setParams(name, vertices)
                else:
                    (vertices, faces) = hoomd.dem.utils.convexHull(shape['vertices'])
                    potential.setParams(name, vertices, faces)
        else:
            raise NotImplementedError(
                'Unknown Interaction type {}'.format(interaction_type))
Example #7
0
class CenterSpaceGroup(flowws.Stage):
    """Attempt to automatically detect the space group of the system and center it accordingly."""
    ARGS = [
        Arg('minimum_distance',
            '-d',
            float,
            .9,
            help=
            'Precision with which to merge points transformed by space group transformations'
            ),
        Arg('use_types',
            '-t',
            bool,
            True,
            help='Use type information when centering the unit cell'),
    ]

    def run(self, scope, storage):
        """Detect the space group and center the system."""
        box = scope['box']
        boxmat = plato.math.box_to_matrix(box)
        fractions = plato.math.make_fractions(box, scope['position'])
        types = scope['type']

        if not self.arguments['use_types']:
            types = np.zeros_like(types)

        spglib_cell = (boxmat, fractions, types)
        try:
            self.spg_info = identify_spacegroup(
                spglib_cell,
                max_iterations=64,
                minimum_distance=self.arguments['minimum_distance'])
        except ValueError:
            # didn't work; don't display things
            return

        (box, fractions, types) = standardize_cell(spglib_cell,
                                                   self.spg_info['common'][1])

        scope['box'] = box
        scope['position'] = plato.math.fractions_to_coordinates(box, fractions)
        scope['type'] = types
        scope.setdefault('visuals', []).append(self)

    def draw_matplotlib(self, figure):
        ax = figure.add_subplot()

        histogram = self.spg_info['histogram']
        keys = list(histogram)
        counts = [histogram[k] for k in keys]
        xs = np.arange(len(keys))
        ax.bar(xs, counts)

        ax.set_title('Space group prevalence')
        ax.set_xticks(xs)
        ax.set_xticklabels(keys)
        ax.set_ylabel('Count')
Example #8
0
class Colormap(flowws.Stage):
    """Access and use matplotlib colormaps on scalar quantities.

    This module emits a `color` value, calculated using a given scalar
    argument and matplotlib colormap name.

    Valid scalars quantities can be provided to this module by saving
    them in the scope and adding their name to the `color_scalars`
    list.
    """
    ARGS = [
        Arg('colormap_name',
            '-c',
            str,
            'viridis',
            help='Name of the matplotlib colormap to use'),
        Arg('argument', '-a', str, help='Name of the value to map to colors'),
        Arg('range',
            '-r', (float, float),
            help='Minimum and maximum values of the scalar to be mapped'),
    ]

    def run(self, scope, storage):
        """Generate an array of colors using the given color scalars."""
        color_scalars = scope.setdefault('color_scalars', [])
        if 'type' not in color_scalars and 'type' in scope:
            color_scalars.append('type')

        argument = self.arguments.get('argument', None)
        if (argument is not None and argument not in color_scalars
                and argument in scope):
            color_scalars.append(argument)

        self.arg_specifications['argument'].valid_values = color_scalars

        if argument is None:
            self.arguments['argument'] = color_scalars[0]
        self.arg_specifications['colormap_name'].valid_values = \
            sorted(matplotlib.cm.cmap_d.keys())

        N = len(scope['position'])

        try:
            values = scope[self.arguments['argument']].copy()
        except KeyError:
            values = np.full(N, 0.5)

        normalize = None
        if self.arguments.get('range', None):
            (vmin, vmax) = self.arguments['range']
            normalize = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)

        cmap = matplotlib.cm.get_cmap(self.arguments['colormap_name'])
        smap = matplotlib.cm.ScalarMappable(normalize, cmap)

        scope['color'] = smap.to_rgba(values)
Example #9
0
class Diffraction(flowws.Stage):
    """Compute a 3D diffraction pattern of the system and display its slice or projection.

    This stage computes a 3D histogram of the system based on the
    given periodic system box and particle coordinates and performs
    the FFT in 3D. Either a slice or full projection through the
    Fourier space is displayed with the current system orientation.

    **Note:** This module should be considered experimental in terms
    of stability for the time being; the inputs and outputs may change
    drastically in the future, or the module may be removed entirely.
    """
    ARGS = [
        Arg('bin_count',
            '-b',
            int,
            default=128,
            help='Number of bins to use in the x, y, and z directions'),
        Arg('projection',
            '-p',
            bool,
            False,
            help=
            'If True, project the diffraction pattern all the way through fourier space'
            ),
        Arg('min_value',
            None,
            float,
            0,
            help='Minimum value of intensity to clip to'),
        Arg('max_value',
            None,
            float,
            1,
            help='Maximum value of intensity to clip to'),
        Arg('sigma', None, float, 0, help='Lengthscale of blurring the FFT'),
    ]

    def run(self, scope, storage):
        """Prepare to display the diffraction pattern"""
        self.positions = scope['position']
        self.box = scope['box']
        scope.setdefault('visuals', []).append(self)
        scope.setdefault('visual_link_rotation', []).append(self)

    def draw_plato(self):
        prim = DiffractionPrimitive(positions=self.positions, box=self.box)
        prim.size = self.arguments['bin_count']
        prim.sigma = self.arguments['sigma']
        prim.vmin = self.arguments['min_value']
        prim.vmax = self.arguments['max_value']
        prim.projection = self.arguments['projection']
        prim.cmap = DEFAULT_COLORMAP

        return plato.draw.Scene(prim, size=(1, 1), pixel_scale=800)
Example #10
0
class LSTMLM(flowws.Stage):
    ARGS = [
        Arg('embedding_dimensions',
            '-e',
            int,
            32,
            help='Embedding dimensionality for inputs'),
        Arg('layer_widths',
            '-w', [int],
            help='Number of units for each LSTM layer'),
        Arg('inner_dropout',
            None,
            float,
            help='Dropout rate to use inside LSTM module'),
        Arg('inter_dropout',
            None,
            float,
            help='Dropout rate to use after each LSTM module'),
        Arg('sequence_dropout',
            None,
            bool,
            False,
            help='If True, use sequence-element dropout by default'),
    ]

    def run(self, scope, storage):
        vocabulary_size = scope['vocabulary_size']
        sequence_length = scope['sequence_length']
        default_dropout = (keras.layers.SpatialDropout1D
                           if self.arguments['sequence_dropout'] else
                           keras.layers.Dropout)
        dropout_cls = scope.get('dropout_sequence_class', default_dropout)

        inputs = last = keras.layers.Input((sequence_length, ), dtype=tf.int32)
        last = keras.layers.Embedding(
            vocabulary_size, self.arguments['embedding_dimensions'])(inputs)

        for i, w in enumerate(self.arguments['layer_widths']):
            layer = keras.layers.LSTM(w,
                                      return_sequences=True,
                                      dropout=self.arguments.get(
                                          'inner_dropout', 0))
            last = layer(last)

            last = keras.layers.BatchNormalization()(last)

            if 'inter_dropout' in self.arguments:
                last = dropout_cls(self.arguments['inter_dropout'])(last)

        last = keras.layers.Dense(vocabulary_size, activation='softmax')(last)

        scope['input_symbol'] = inputs
        scope['output'] = last
        scope['loss'] = 'sparse_categorical_crossentropy'
class Dshemuchadse2021OPPInteraction(Dshemuchadse2021InteractionBase):
    """Specify a new interaction potential from the paper "Moving beyond the constraints of chemistry via crystal structure discovery with isotropic multiwell pair potentials" to include in future MD stages

    These interactions are taken from the methods description in the
    paper (Proceedings of the National Academy of Sciences May 2021,
    118 (21); DOI 10.1073/pnas.2024034118). This module implements the
    oscillatory pair potential, consisting of a short-range repulsion
    and a cosine term that scales with r^-3.

    The potential is rescaled such that the global minimum is -1
    epsilon_0.

    """
    ARGS = [
        Arg('reset', '-r', bool, False,
            help='Disable previously-defined interactions'),
        Arg('k', '-k', float,
            help='Interaction parameter k'),
        Arg('phi', '-p', float,
            help='Interaction parameter phi'),
        Arg('width', '-w', int, 1000,
            help='Number of points at which to evaluate the tabulated potential'),
        Arg('r_min', None, float, .5,
            help='Minimum distance at which to evaluate the tabulated potential'),
    ]

    def run(self, scope, storage):
        """Registers this object to provide a force compute in future MD stages"""
        callbacks = scope.setdefault('callbacks', collections.defaultdict(list))

        if self.arguments['reset']:
            pre_run_callbacks = [c for c in callbacks['pre_run']
                                 if not isinstance(c, Dshemuchadse2021OPPInteraction)]
            callbacks['pre_run'] = pre_run_callbacks
            return

        self.potential_kwargs = dict(k=self.arguments['k'], phi=self.arguments['phi'])
        self.rmax, self.potential_kwargs['scale'] = self.find_potential_parameters()

        callbacks['pre_run'].append(self)
        scope.setdefault('visuals', []).append(self)

    @staticmethod
    def force(r, k, phi, scale=1.):
        arg = k*(r - 1.) + phi
        result = 15*r**(-16) + (k*r*np.sin(arg) + 3*np.cos(arg))*r**(-4)
        return result*scale

    @staticmethod
    def potential(r, k, phi, scale=1.):
        result = r**(-15) + np.cos(k*(r - 1.) + phi)*r**(-3)
        return result*scale
class CIFAR100(flowws.Stage):
    """Use the CIFAR100 dataset from keras."""

    ARGS = [
        Arg('coarse_labels',
            '-c',
            bool,
            False,
            help=
            'If True, use 20 coarse-grained labels instead of 100 fine-grained labels'
            ),
    ]

    def run(self, scope, storage):
        label_mode = 'coarse' if self.arguments['coarse_labels'] else 'fine'
        (x_train, y_train), (x_test,
                             y_test) = keras.datasets.cifar100.load_data(
                                 label_mode=label_mode)

        num_classes = len(np.unique(y_train))

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255
        input_shape = x_train[0].shape

        scope['x_train'] = x_train
        scope['x_test'] = x_test
        scope['y_train'] = y_train
        scope['y_test'] = y_test
        scope['num_classes'] = num_classes
Example #13
0
class MobileNetV2(flowws.Stage):
    """Use the MobileNetV2 architecture as provided by keras."""

    ARGS = [
        Arg('dropout',
            '-d',
            float,
            0,
            help='Dropout probability to use (if any)'),
    ]

    def run(self, scope, storage):
        try:
            input_shape = scope['x_train'][0].shape
        except KeyError:
            input_shape = next(scope['train_generator'])[0][0].shape
        num_classes = scope['num_classes']
        Dropout = scope.get('dropout_spatial2d_class', keras.layers.Dropout)

        model = MobileNetModel(classes=num_classes,
                               weights=None,
                               input_shape=input_shape)

        if self.arguments['dropout']:
            clonefun_ = functools.partial(clonefun,
                                          Dropout=Dropout,
                                          rate=self.arguments['dropout'])
            model = keras.models.clone_model(model, clone_function=clonefun_)

        scope['model'] = model
        scope['loss'] = 'sparse_categorical_crossentropy'
        scope.setdefault('metrics', []).append('accuracy')
Example #14
0
class Pyriodic(flowws.Stage):
    """Browse structures available in :std:doc:`pyriodic<pyriodic:index>`.

    This module provides all the structures available in the pyriodic
    default database (which uses all available pyriodic libraries
    installed on the system). Systems are resized to a minimum of the
    given size and noise may be added before the rest of the workflow
    is run.
    """
    ARGS = [
        Arg('structure',
            '-s',
            str,
            required=True,
            valid_values=VALID_STRUCTURES,
            help='Structure to display'),
        Arg('size', '-n', int, default=1, help='Minimum size of the system'),
        Arg('noise',
            None,
            float,
            default=0,
            help='Gaussian noise to apply to each position'),
    ]

    def run(self, scope, storage):
        """Load the given structure into the scope"""
        structure = self._get_structure(self.arguments['structure'],
                                        self.arguments['size'],
                                        self.arguments['noise'])

        scope['position'] = structure.positions
        scope['type'] = structure.types
        scope['box'] = structure.box

    @functools.lru_cache(maxsize=1)
    def _get_structure(self, name, size, noise):
        query = 'select structure from unit_cells where name = ? limit 1'
        for (structure, ) in pyriodic.db.query(query, (name, )):
            pass

        structure = structure.rescale_shortest_distance(1)
        structure = structure.replicate_upto(size)
        if noise:
            structure = structure.add_gaussian_noise(self.arguments['noise'])

        return structure
class ImagenetDirectory(flowws.Stage):
    """Load ImageNet images from a specified directory."""

    ARGS = [
        Arg('base', '-b', str,
            help='Base directory storing images'),
        Arg('validation_fraction', '-v', float, .3,
            help='Fraction of files to be used in validation set'),
        Arg('augmentations', '-a', [str],
            help='Names of augmentations to perform on each image (use "null" for none)'),
        Arg('batch_size', None, int, 32,
            help='Batch size for training and validation'),
        Arg('train_epoch_scaling', None, float, 1.,
            help='Factor to scale the number of batches considered to be part of an epoch by (train set)'),
        Arg('val_epoch_scaling', None, float, 1.,
            help='Factor to scale the number of batches considered to be part of an epoch by (validation set)'),
        Arg('test_epoch_scaling', None, float, 1.,
            help='Factor to scale the number of batches considered to be part of an epoch by (test set)'),
    ]

    def run(self, scope, storage):
        train_dir = os.path.join(self.arguments['base'], 'train')
        test_dir = os.path.join(self.arguments['base'], 'val')

        label_names = list(sorted(os.listdir(train_dir)))
        label_map = {label: i for (i, label) in enumerate(label_names)}

        train_files, val_files = split_filenames(
            train_dir, label_names, self.arguments['validation_fraction'])

        test_files, _ = split_filenames(test_dir, label_names, -1)

        augmentation_names = (self.arguments['augmentations'] or
                              ['scale', 'crop', 'maybe_flip', 'keras_preprocess'])
        augmentations = [AUGMENTATIONS[name] for name in augmentation_names]

        train_generator = batch_generator(
            train_dir, train_files, label_map, self.arguments['batch_size'], augmentations)
        val_generator = batch_generator(
            train_dir, val_files, label_map, self.arguments['batch_size'], augmentations)
        test_generator = batch_generator(
            test_dir, test_files, label_map, self.arguments['batch_size'], augmentations)

        steps_per_epoch = int(len(train_files)//self.arguments['batch_size']*
                              self.arguments['train_epoch_scaling'])
        validation_steps = (len(val_files)//self.arguments['batch_size']*
                            self.arguments['val_epoch_scaling'])
        test_steps = (len(test_files)//self.arguments['batch_size']*
                      self.arguments['test_epoch_scaling'])

        scope['label_names'] = label_names
        scope['label_map'] = label_map
        scope['train_generator'] = train_generator
        scope['generator_train_steps'] = steps_per_epoch
        scope['validation_generator'] = val_generator
        scope['generator_val_steps'] = validation_steps
        scope['test_generator'] = test_generator
        scope['generator_test_steps'] = test_steps
        scope['loss'] = 'sparse_categorical_crossentropy'
        scope['num_classes'] = len(label_names)
class TextMetrics(flowws.Stage):
    ARGS = [
        Arg('metrics', '-m', [str], [], help='Metrics to compute and save'),
    ]

    def run(self, scope, storage):
        metrics = scope.setdefault('metrics', [])

        for m in self.arguments['metrics']:
            metrics.append(METRIC_MAP.get(m, m))
Example #17
0
class Steinhardt(flowws.Stage):
    """Compute the Steinhardt order parameter of particles in the system"""
    ARGS = [
        Arg('histogram_bins', '-b', int, default=64,
            help='Number of bins to use in the histogram plot'),
        Arg('l', '-l', int, default=6,
            help='Spherical harmonic degree of the order parameter'),
        Arg('r_max', '-r', float,
            help='Maximum radial distance to consider for neighbors (if given)'),
        Arg('num_neighbors', '-n', int,
            help='Number of neighbors to use; overrules r_max if given'),
        Arg('r_guess', None, float, 2,
            help='Characteristic distance for finding num_neighbors neighboring particles'),
    ]

    def run(self, scope, storage):
        """Compute and provide the Steinhardt order parameter"""
        compute = freud.order.Steinhardt(self.arguments['l'])

        box = freud.box.Box.from_box(scope['box'], scope.get('dimensions', 3))
        query_options = dict(exclude_ii=True)
        if 'num_neighbors' in self.arguments:
            query_options['num_neighbors'] = self.arguments['num_neighbors']
            query_options['r_guess'] = self.arguments['r_guess']
        else:
            query_options['r_max'] = self.arguments.get('r_max', None)
        compute.compute((box, scope['position']), query_options)

        self.steinhardt = compute.ql
        name = 'steinhardt_q{}'.format(self.arguments['l'])
        scope[name] = self.steinhardt
        scope.setdefault('color_scalars', []).append(name)
        scope.setdefault('visuals', []).append(self)

    def draw_matplotlib(self, figure):
        ax = figure.add_subplot(111)
        ax.hist(self.steinhardt, bins=self.arguments['histogram_bins'],
                density=True)
        ax.set_xlabel('$Q_{{{}}}$'.format(self.arguments['l']))
        ax.set_ylabel('Probability')
Example #18
0
class ViewQt(flowws.Stage):
    """Provide an interactive view of the entire workflow using Qt.

    An interactive display window will be opened that displays visual
    results while allowing the arguments of all stages in the workflow
    to be modified.
    """
    ARGS = [
        Arg('controls', '-c', bool, True, help='Display controls'),
    ]

    def __init__(self, *args, **kwargs):
        self.workflow = None
        self._running_threads = None
        self._rerun_event = threading.Event()
        self._stage_event = threading.Event()
        self._exit_event = threading.Event()
        self._visual_queue = queue.Queue()
        self._scope_queue = queue.Queue()
        super().__init__(*args, **kwargs)

    def run(self, scope, storage):
        """Displays parameters and outputs for the workflow in a Qt window."""
        self.workflow = scope['workflow']
        scope['rerun_callback'] = self.rerun
        self._scope_queue.put((scope, storage))

        if self._running_threads is None:
            our_sigint_handler = functools.partial(sigint_handler,
                                                   self._exit_event)
            signal.signal(signal.SIGINT, our_sigint_handler)

            args = (scope, self.workflow, self._rerun_event, self._stage_event,
                    self._exit_event, self._visual_queue,
                    self.arguments['controls'])
            self._visual_queue.put(scope.get('visuals', []))
            self._running_threads = rerun_thread = RerunThread(args=args)
            rerun_thread.start()

            app = ViewQtApp(self.workflow, self._rerun_event,
                            self._stage_event, self._exit_event,
                            self._visual_queue, self._scope_queue,
                            self.arguments['controls'], [])
            app.exec_()

            rerun_thread.join()

    def rerun(self):
        self._rerun_event.set()
Example #19
0
class MLP(flowws.Stage):
    """Specify a multilayer perceptron model."""

    ARGS = [
        Arg('hidden_widths',
            '-w', [int], [32],
            help='Number of nodes for each hidden layer'),
        Arg('activation', '-a', str, 'relu'),
        Arg('batch_norm',
            '-b',
            bool,
            False,
            help='Apply batch normalization before all hidden layers'),
        Arg('output_batch_norm',
            None,
            bool,
            False,
            help='Apply batch normalization after each hidden layer'),
        Arg(
            'flatten',
            '-f',
            bool,
            False,
        ),
        Arg('dropout',
            '-d',
            float,
            0,
            help='Apply a dropout layer with the given '
            'dropout rate after each hidden layer'),
    ]

    def run(self, scope, storage):
        input_shape = scope['x_train'][0].shape
        input_symbol = keras.layers.Input(shape=input_shape)

        Dropout = scope.get('dropout_class', keras.layers.Dropout)

        layers = []

        if self.arguments['batch_norm']:
            layers.append(keras.layers.BatchNormalization())

        if self.arguments['flatten']:
            layers.append(keras.layers.Flatten())

        for w in self.arguments['hidden_widths']:
            layers.append(
                keras.layers.Dense(w, activation=self.arguments['activation']))
            if self.arguments.get('output_batch_norm', False):
                layers.append(keras.layers.BatchNormalization())
            if self.arguments['dropout']:
                layers.append(Dropout(self.arguments['dropout']))

        scope['input_symbol'] = input_symbol
        scope['output'] = sequence(input_symbol, layers)
Example #20
0
class Classifier(flowws.Stage):
    """Specify a simple classifier output."""

    ARGS = [Arg('activation', default='softmax')]

    def run(self, scope, storage):
        layers = []

        layers.append(
            keras.layers.Dense(scope['num_classes'],
                               activation=self.arguments['activation']))

        scope['output'] = sequence(scope['output'], layers)
        scope['loss'] = 'sparse_categorical_crossentropy'
        scope.setdefault('metrics', []).append('accuracy')
Example #21
0
class SaveGarnett(flowws.Stage):
    """Save trajectory quantities using Garnett.

    This stage currently only saves an individual frame, but saving an
    entire trajectory is intended to work in the future.
    """
    ARGS = [
        Arg('filename',
            '-f',
            str,
            'trajectory.sqlite',
            help='Name of file to save trajectory to'),
    ]

    def run(self, scope, storage):
        """Save trajectory quantities."""
        filename = self.arguments['filename']

        box = scope['box']
        box = garnett.trajectory.Box(Lx=box[0],
                                     Ly=box[1],
                                     Lz=box[2],
                                     xy=box[3],
                                     xz=box[4],
                                     yz=box[5])
        positions = scope['position']
        if 'orientation' in scope:
            orientations = scope['orientation']
        else:
            orientations = np.tile([(1, 0, 0, 0)], (len(positions), 1))
        types = scope['type']

        if 'type_names' not in scope:
            type_names = [chr(ord('A') + t) for t in sorted(set(types))]
        else:
            type_names = scope['type_names']

        the_frame = FakeFrame(box, positions, orientations, type_names, types,
                              None, None, len(positions))

        mode = ('w' if any(filename.endswith(suf)
                           for suf in ['.pos', '.cif']) else 'wb')

        with storage.open(self.arguments['filename'],
                          mode=mode,
                          on_filesystem=True) as f:
            garnett.write([the_frame], f.name)
Example #22
0
class Center(flowws.Stage):
    """Center the system through periodic boundary conditions.

    This module modifies the positions of the system to have either
    the center of mass of the system or a single indicated particle at
    (0, 0, 0).
    """
    ARGS = [
        Arg('particle',
            '-p',
            int,
            -1,
            help=
            'Particle index to center with (default: use center of mass of the system)'
            ),
    ]

    def run(self, scope, storage):
        """Center the system."""
        box = scope.get('box', None)
        positions = scope['position']
        self.arg_specifications['particle'].valid_values = flowws.Range(
            -1, len(positions), (True, False))

        index = self.arguments['particle']
        index = index if index >= 0 else None

        if index is not None:
            center_point = positions[index]
            positions -= center_point[np.newaxis]

            if box is not None:
                positions = wrap(box, positions)

        elif box is None:
            center_point = np.mean(positions, axis=0)
            positions -= center_point[np.newaxis]

        else:
            positions = center(box, positions)

        scope['position'] = positions
class NeuralPotentialController(flowws.Stage):
    ARGS = [
        Arg('k_p', '-k', float, help='Proportional constant (in log-space)'),
        Arg('tau',
            '-t',
            float,
            help='Time constant for I-control (in log-space)'),
        Arg('setpoint',
            '-s',
            float,
            .5,
            help='Mean passthrough probability setpoint'),
        Arg('log_history',
            None,
            bool,
            False,
            help='If True, retain a history in-memory'),
        Arg('period',
            '-p',
            int,
            1,
            help='Period (in batches) for the controller to run'),
        Arg('disable',
            '-d',
            bool,
            0,
            help='If given, disable this callback for future Train invocations'
            ),
    ]

    def run(self, scope, storage):
        if self.arguments['disable']:
            callbacks = scope.setdefault('callbacks', [])
            new_callbacks = [
                c for c in callbacks
                if not isinstance(c, NeuralPotentialControlCallback)
            ]
            scope['callbacks'] = new_callbacks
            return

        callback = NeuralPotentialControlCallback(
            self.arguments['k_p'], self.arguments['tau'],
            self.arguments['setpoint'], self.arguments['period'],
            self.arguments['log_history'])
        scope.setdefault('callbacks', []).append(callback)
class GPTModel(flowws.Stage):
    """Train a model on the wikitext-2 dataset"""

    ARGS = [
        Arg('width', '-w', int, 64,
            help='Working width of the deep network'),
        Arg('depth', '-d', int, 6,
            help='Number of transformer blocks to use'),
        Arg('use_convolutions', '-c', bool, False,
            help='Use causal convolutions instead of position embeddings'),
        Arg('use_agglomeration', '-a', bool, False,
            help='Use agglomerative instead of full attention'),
        Arg('use_adaptive_computation', None, bool, False,
            help='Use adaptive computation time'),
        Arg('convolution_width', None, int, 8,
            help='Width of causal convolutions to use'),
        Arg('num_heads', '-n', int, 8,
            help='Number of attention/agglomerative heads to use'),
        Arg('print_summary', '-p', bool, False,
            help='Print a summary of the model before continuing'),
        Arg('dropout', None, float, .5,
            help='Dropout to use in transformer layers'),
    ]

    def run(self, scope, storage):
        vocabulary_size = scope['vocabulary_size']
        sequence_length = scope['sequence_length']

        kwargs = {}

        if self.arguments['use_adaptive_computation']:
            model = universal_transformer_gpt_model(
                sequence_length,
                vocabulary_size,
                self.arguments['width'],
                self.arguments['depth'],
                self.arguments['num_heads'],
                agglomerative_attention=self.arguments['use_agglomeration'],
                use_convolutions=self.arguments['use_convolutions'],
                use_coordinate_embeddings=(not self.arguments['use_convolutions']),
                convolution_width=self.arguments['convolution_width'],
                transformer_dropout=self.arguments['dropout'],
                **kwargs
            )
        else:
            if 'dropout_sequence_class' in scope:
                kwargs['dropout_cls'] = scope['dropout_sequence_class']

            model = vanilla_transformer_gpt_model(
                sequence_length,
                vocabulary_size,
                self.arguments['width'],
                self.arguments['depth'],
                self.arguments['num_heads'],
                agglomerative_attention=self.arguments['use_agglomeration'],
                use_convolutions=self.arguments['use_convolutions'],
                use_coordinate_embeddings=(not self.arguments['use_convolutions']),
                convolution_width=self.arguments['convolution_width'],
                transformer_dropout=self.arguments['dropout'],
                **kwargs
            )

        if self.arguments['print_summary']:
            model.summary()

        scope['model'] = model
Example #25
0
class StageForTesting(flowws.Stage):
    ARGS = [
        Arg('required_value', required=True),
        Arg('defaulted_value', default='default'),
    ]
class GalileanModel(flowws.Stage):
    ARGS = [
        Arg('steps',
            '-s',
            int,
            10,
            help='Number of galilean steps to perform for each batch'),
        Arg('move_distance',
            '-m',
            float,
            1e-3,
            help='Distance to move for each step'),
        Arg('log_move_distance',
            '-d',
            bool,
            False,
            help='If True, log the move distance'),
        Arg('tune_distance',
            '-t',
            bool,
            False,
            help=
            'Auto-tune the move distance based on loss surface reflection rates'
            ),
        Arg('gradient_descent_rate',
            '-g',
            float,
            0,
            help='Fraction of steps to use normal gradient descent on'),
        Arg('reduce_distance_period',
            None,
            int,
            0,
            help=
            'Patience (in epochs) for a distance reduction method like ReduceLROnPlateau'
            ),
        Arg('clear',
            '-c',
            bool,
            False,
            help='If given, remove the usage of a previous GalileanModel')
    ]

    def run(self, scope, storage):
        if self.arguments['clear']:
            scope.pop('custom_model_class')

            keep = lambda x: (not isinstance(x, (DistanceLogger,
                                                 ReduceStepSizeOnPlateau)))
            scope['callbacks'] = list(filter(keep, scope.get('callbacks', [])))

            return

        timescale = 32 if self.arguments['tune_distance'] else 0
        ModelFun = functools.partial(
            Model,
            galilean_steps=self.arguments['steps'],
            galilean_distance=self.arguments['move_distance'],
            galilean_batch_timescale=timescale,
            gradient_descent_rate=self.arguments['gradient_descent_rate'],
        )
        scope['custom_model_class'] = ModelFun

        if self.arguments['log_move_distance']:
            scope.setdefault('callbacks', []).append(DistanceLogger())

        if self.arguments['reduce_distance_period']:
            callback = ReduceStepSizeOnPlateau(
                patience=self.arguments['reduce_distance_period'],
                monitor='val_loss',
                factor=.75,
                verbose=True)
            scope.setdefault('callbacks', []).append(callback)
class PruneNeuralPotentialLayers(flowws.Stage):
    ARGS = [
        Arg('summarize',
            None,
            bool,
            False,
            help='If True, print the model summary after pruning'),
    ]

    def run(self, scope, storage):
        model = scope['model']
        parent_map = get_input_parent_map(model)

        to_skip, new_descriptions = [], {}
        for layer in find_learnable_dropout_layers(model):
            weights = layer.get_weights()[0]
            probas = tf.math.sigmoid(weights).numpy()
            sampled_mask = np.random.uniform(0, 1, size=probas.shape) < probas
            mask = np.where(sampled_mask)[0]

            to_skip.append(layer)
            Pruner.mask_children_downward(layer, mask, new_descriptions)
            Pruner.mask_parents_upward(layer, mask, new_descriptions,
                                       parent_map)

        new_layer_weights = {}
        nullified_indices = {layer: i for (i, layer) in enumerate(to_skip)}

        def clonefun(layer):
            result = None

            if layer in to_skip:
                name = 'nullified_learnable_dropout_{}'.format(
                    nullified_indices[layer])
                result = keras.layers.Lambda(identity, name=name)
            if layer in new_descriptions:
                desc = new_descriptions[layer]
                result = layer.__class__.from_config(desc.json)
                new_layer_weights[result] = desc.weights
            elif isinstance(layer, keras.models.Model):
                old_input_layer = keras.models.Model.layers.fget(layer)[0]
                new_input_tensor = None
                if old_input_layer in new_descriptions:
                    new_input_desc = new_descriptions[old_input_layer]
                    new_input_layer = old_input_layer.__class__.from_config(
                        new_input_desc.json)
                    new_input_tensor = new_input_layer.output
                result = keras.models.clone_model(layer,
                                                  new_input_tensor,
                                                  clone_function=clonefun)

            if result is not None:
                return result

            return layer.__class__.from_config(layer.get_config())

        new_model = keras.models.clone_model(model, clone_function=clonefun)

        for (layer, weights) in new_layer_weights.items():
            layer.set_weights(weights)

        if self.arguments['summarize']:
            new_model.summary()

        scope['model'] = new_model
Example #28
0
class BasisSelection(flowws.Stage):
    """Select directions and distances to form the basis vectors for a unit cell.

    This stage produces two visuals: a bond orientational order
    diagram that can be used to select symmetric directions, and a
    "cylindrical RDF" that measures bonds along each given
    direction. Together, these can be used to select the direction and
    length of the three basis vectors for the unit cell.
    """
    ARGS = [
        Arg('orientations',
            '-d', [(float, float, float, float)], [],
            help=
            'Quaternions specifying orientations for basis vectors (in the (0, 0, 1) direction)'
            ),
        Arg('r_max',
            '-r',
            float,
            3,
            help=
            'Maximum distance to consider for binds to select for the unit cell basis vectors'
            ),
        Arg('angle_tolerance',
            '-a',
            float,
            5,
            help='Angle tolerance for selecting bonds (in degrees)'),
        Arg('rdf_bins',
            None,
            int,
            128,
            help='Number of bins to use for cylindrical RDF'),
        Arg('x_direction',
            '-x',
            int,
            0,
            help=
            'Candidate direction to take as the x direction in the final unit cell'
            ),
        Arg('x_min',
            None,
            float,
            0,
            help=
            'Minimum distance to take bonds from the cylindrical RDF for the x basis vector'
            ),
        Arg('x_max',
            None,
            float,
            1,
            help=
            'Maximum distance to take bonds from the cylindrical RDF for the x basis vector'
            ),
        Arg('y_direction',
            '-y',
            int,
            1,
            help=
            'Candidate direction to take as the y direction in the final unit cell'
            ),
        Arg('y_min',
            None,
            float,
            0,
            help=
            'Minimum distance to take bonds from the cylindrical RDF for the y basis vector'
            ),
        Arg('y_max',
            None,
            float,
            1,
            help=
            'Maximum distance to take bonds from the cylindrical RDF for the y basis vector'
            ),
        Arg('z_direction',
            '-z',
            int,
            2,
            help=
            'Candidate direction to take as the z direction in the final unit cell'
            ),
        Arg('z_min',
            None,
            float,
            0,
            help=
            'Minimum distance to take bonds from the cylindrical RDF for the z basis vector'
            ),
        Arg('z_max',
            None,
            float,
            1,
            help=
            'Maximum distance to take bonds from the cylindrical RDF for the z basis vector'
            ),
    ]

    def __init__(self, *args, **kwargs):
        self.direction_visual = DirectionVisual()
        super().__init__(*args, **kwargs)

    def run(self, scope, storage):
        """Display the interactive direction- and distance-selection visuals."""
        positions = scope['position']
        types = scope['type']

        box = freud.box.Box.from_box(scope['box'])
        aq = freud.AABBQuery(box, positions)
        args = dict(exclude_ii=True,
                    mode='ball',
                    r_max=self.arguments['r_max'])
        nlist = aq.query(positions, args).toNeighborList()
        bonds = positions[nlist.point_indices] - positions[
            nlist.query_point_indices]
        bonds = box.wrap(bonds)

        angle_tol = self.arguments['angle_tolerance'] * np.pi / 180
        R = np.sin(angle_tol)

        normalized_bonds = bonds / nlist.distances[:, np.newaxis]
        orientations = np.array(self.arguments['orientations'])
        orientations /= np.linalg.norm(orientations, axis=-1, keepdims=True)

        distances = np.linalg.norm(bonds, axis=-1)

        basis = np.eye(3)

        bond_filter = np.ones((bonds.shape[0], ), dtype=np.bool)
        target_quat = np.array((1., 0, 0, 0))

        optimized_orientations = []
        found_vectors = []
        found_bonds = []
        circle_positions = []
        for q in orientations:
            qconj = plato.math.quatconj(q)

            rotated_bonds = plato.math.quatrot(q[np.newaxis, :], bonds)
            filt_thetas = np.arctan2(
                np.sqrt(rotated_bonds[:, 0]**2 + rotated_bonds[:, 1]**2),
                rotated_bonds[:, 2])
            filt_thetas[filt_thetas > np.pi / 2] -= np.pi
            filt = np.abs(filt_thetas) < angle_tol

            filtered_bonds = rotated_bonds[filt]

            found_bonds.append(filtered_bonds)

            if filtered_bonds.shape[0]:
                (target_quat, mean) = find_best_orientation(filtered_bonds, q)
            else:
                target_quat = q

            qconj = plato.math.quatconj(target_quat)

            found_vectors.append(plato.math.quatrot(qconj, [0, 0, 1]))
            optimized_orientations.append(target_quat)

            circle = circle_patterns([(0, 0, 0)], [R], z=-1)
            circle = plato.math.quatrot(qconj, circle)
            circle_positions.append(circle)

            bond_filter *= np.logical_not(filt)

        self.direction_radii = [
            np.linalg.norm(b, axis=-1) for b in found_bonds
        ]

        for i, key in enumerate('xyz'):
            direction_name = key + '_direction'
            min_name, max_name = key + '_min', key + '_max'

            direction_index = self.arguments[direction_name]
            r_min, r_max = self.arguments[min_name], self.arguments[max_name]
            r_min, r_max = sorted([r_min, r_max])

            try:
                direction = found_vectors[direction_index]
            except IndexError:
                break

            candidate_distances = self.direction_radii[direction_index]
            filt = np.logical_and(candidate_distances >= r_min,
                                  candidate_distances < r_max)
            if np.any(filt):
                candidate_distances = candidate_distances[filt]
            distance = np.mean(candidate_distances)

            basis[:, i] = direction * distance

        if np.linalg.det(basis) < 0:
            basis[:, 0] *= -1

        self.direction_visual.update(self.arguments['angle_tolerance'],
                                     circle_positions, bonds[bond_filter],
                                     target_quat.copy())

        self.basis = scope['basis_vectors'] = basis
        scope.setdefault('visuals', []).append(self.direction_visual)
        scope['visuals'].append(self)
        scope.setdefault('visual_link_rotation',
                         []).append(self.direction_visual)

        self.gui_actions = [
            ('Select bonds', self._select_current_position),
            ('Undo last selection', self._undo_selection),
        ]

    def draw_matplotlib(self, figure):
        import matplotlib

        ax = figure.add_subplot()
        colors = matplotlib.rcParams['axes.prop_cycle'].by_key()['color']

        for i, (rs, color) in enumerate(zip(self.direction_radii, colors)):
            (hist, _) = np.histogram(rs, bins=self.arguments['rdf_bins'])

            for key in 'xyz':
                direction_name = key + '_direction'
                if self.arguments[direction_name] != i:
                    continue
                min_name, max_name = key + '_min', key + '_max'
                r_min, r_max = self.arguments[min_name], self.arguments[
                    max_name]
                r_min, r_max = sorted([r_min, r_max])

                r_taken = np.linalg.norm(self.basis[:, 'xyz'.index(key)])
                ax.vlines([r_min, r_max],
                          0,
                          np.max(hist) * 1.05,
                          linestyles='dashed',
                          color=color)
                ax.vlines([r_taken],
                          0,
                          np.max(hist) * 1.05,
                          linestyles='solid',
                          color=color)

            ax.hist(rs,
                    bins=self.arguments['rdf_bins'],
                    alpha=.5,
                    label='Direction {}'.format(i),
                    color=color)

        ax.legend()
        ax.set_xlabel('$R$')
        ax.set_ylabel('$Count(R)$')

    def _select_current_position(self, scope, storage):
        plato_scene = scope['visual_objects'][self.direction_visual]

        self.arguments['orientations'].append(plato_scene.rotation.copy())

        if scope.get('rerun_callback', None) is not None:
            scope['rerun_callback']()

    def _undo_selection(self, scope, storage):
        if self.arguments['orientations']:
            self.arguments['orientations'].pop()

        if scope.get('rerun_callback', None) is not None:
            scope['rerun_callback']()
Example #29
0
class GTAR(flowws.Stage):
    """Emit the contents of a :std:doc:`libgetar<libgetar:index>`-format file into the scope.

    The GTAR module outputs the records found in a getar-format file
    directly into the scope. It provides a notion of frames in a
    trajectory using the discretely-varying record with the most
    indices as the basis.
    """
    ARGS = [
        Arg('filename',
            '-i',
            str,
            required=True,
            help='Getar-format filename to open'),
        Arg('frame', '-f', int, 0, help='Frame to load'),
        Arg('loop_frames',
            type=bool,
            default=False,
            help=
            'If True, loop the workflow over frames found in the trajectory file, beginning at the given frame'
            ),
        Arg('group', '-g', str, '', help='GTAR group to restrict results to'),
    ]

    def __init__(self, *args, **kwargs):
        self._cached_record_frames = {}
        self._gtar_traj = self._gtar_file = None
        self._looping = False
        super().__init__(*args, **kwargs)

    @functools.lru_cache(maxsize=1)
    def _get_traj(self, filename, storage):
        for handle in (self._gtar_traj, self._gtar_file):
            if handle is not None:
                handle.close()

        try:
            gtar_file = gtar_traj = None

            gtar_file = storage.open(self.arguments['filename'],
                                     'rb',
                                     on_filesystem=True)
            gtar_traj = gtar.GTAR(gtar_file.name, 'r')

            self._gtar_file, self._gtar_traj = gtar_file, gtar_traj
            gtar_file = gtar_traj = None
        finally:
            for handle in (gtar_traj, gtar_file):
                if handle is not None:
                    handle.close()

        return self._gtar_traj

    def run(self, scope, storage):
        """Load records found in a getar file into the scope."""
        scope['filename'] = self.arguments['filename']
        scope['frame'] = self.arguments['frame']
        scope['cache_key'] = scope['filename'], scope['frame']

        gtar_traj = self._get_traj(self.arguments['filename'], storage)

        self._cache_record_frames(gtar_traj, scope, storage,
                                  self.arguments['group'])
        if self.arguments['loop_frames'] and not self._looping:
            self._looping, self.arguments['loop_frames'] = True, False
            try:
                # loop over all frames except the last one
                frame_max = len(self._cached_frame_indices)
                frames = range(self.arguments['frame'], frame_max - 1)
                for frame in frames:
                    self.arguments['frame'] = frame
                    scope['workflow'].run()
            finally:
                self._looping, self.arguments['loop_frames'] = False, True
                # now run the last frame
                self.arguments['frame'] = max(
                    0,
                    len(self._cached_frame_indices) - 1)

        recs = self._set_record_frames()

        for rec in recs:
            callback = functools.partial(gtar_traj.getRecord, rec,
                                         rec.getIndex())
            scope.set_call(rec.getName(), callback)

    def _cache_record_frames(self, traj, scope, storage, group):
        self._cached_record_frames = {}
        for rec in traj.getRecordTypes():
            # ignore per-bond records and so on, for now
            if rec.getGroup() != group:
                continue

            self._cached_record_frames[rec] = list(
                map(index_sort_key, traj.queryFrames(rec)))

        (_, self._cached_frame_indices) = max(
            (len(indices), indices)
            for (rec, indices) in self._cached_record_frames.items()
            if rec.getBehavior() == gtar.Behavior.Discrete)

    def _set_record_frames(self):
        frame = self.arguments['frame']

        index_to_find = index_sort_key(
            self._cached_frame_indices[self.arguments['frame']][1])
        self.arg_specifications['frame'].valid_values = flowws.Range(
            0, len(self._cached_frame_indices), (True, False))

        for (rec, indices) in self._cached_record_frames.items():
            try:
                index = find_le(indices, index_to_find)[1]
                rec.setIndex(index)
            except ValueError:
                pass

        return self._cached_record_frames.keys()
Example #30
0
class Train(flowws.Stage):
    ARGS = [
        Arg('optimizer', '-o', str, 'adam', help='optimizer to use'),
        Arg('epochs', '-e', int, 2000, help='Max number of epochs'),
        Arg('batch_size', '-b', int, 256, help='Batch size'),
        Arg('validation_split', '-v', float, .3),
        Arg('early_stopping', type=int),
        Arg('reduce_lr', type=int),
        Arg('ring_count', type=int),
        Arg('ring_k', type=float, default=1),
        Arg('ring_eps', type=float),
        Arg('dump_filename', '-f', default='dump.tar'),
        Arg('dump_period', '-d', int),
        Arg('seed', '-s', int),
        Arg('verbose',
            None,
            bool,
            True,
            help='If True, print the training progress'),
    ]

    @staticmethod
    def ring_name_updater(layer, i):
        cfg = layer.get_config()
        cfg['name'] = cfg['name'] + '_ring{}'.format(i)
        return layer.__class__.from_config(cfg)

    def run(self, scope, storage):
        if 'seed' in self.arguments:
            s = self.arguments['seed']
            random.seed(s)
            random.seed(random.randrange(2**32))
            np.random.seed(random.randrange(2**32))
            tf.random.set_seed(random.randrange(2**32))

        model = keras.models.Model(scope['input_symbol'], scope['output'])

        if self.arguments.get('ring_count', None):
            models = []
            for i in range(self.arguments['ring_count']):
                clone = functools.partial(self.ring_name_updater, i=i)
                models.append(
                    keras.models.clone_model(model, scope['input_symbol'],
                                             clone))
            final_output = K.sum([m.output for m in models], axis=0)
            final_output = K.softmax(final_output)
            model = keras.models.Model(scope['input_symbol'], final_output)

            for (left, right) in zip(models, np.roll(models, -1)):
                harmonic = lambda left=left, right=right: (.5 * self.arguments[
                    'ring_k'] * sum(
                        K.sum(K.square(l - r)) for (l, r) in zip(
                            left.trainable_weights, right.trainable_weights)))
                model.add_loss(harmonic)

        scope['model'] = model

        for term in scope.get('extra_losses', []):
            model.add_loss(term)

        metrics = scope.get('metrics', [])

        model.compile(self.arguments['optimizer'],
                      loss=scope['loss'],
                      metrics=metrics)

        if self.arguments.get('ring_count', None) and self.arguments.get(
                'ring_eps', None):
            print('randomizing ring weights')
            eps = self.arguments['ring_eps']

            names = [l.name for l in model.layers if 'ring' in l.name]
            base_values = {}
            for name in names:
                base = re.sub(r'_ring\d+$', '', name)
                layer = model.get_layer(name)
                if base in base_values:
                    for (value, tensor) in zip(base_values[base],
                                               layer.trainable_weights):
                        new_value = value * np.random.normal(
                            loc=1, scale=eps, size=value.shape)
                        tensor.assign(value)
                else:
                    base_values[base] = [
                        w.numpy() for w in layer.trainable_weights
                    ]
        else:
            print('not randomizing ring weights')

        callbacks = scope.get('callbacks', [])

        verbose = self.arguments['verbose']
        if tfa is not None and verbose:
            callbacks.append(
                tfa.callbacks.TQDMProgressBar(show_epoch_progress=False,
                                              update_per_second=1))
            verbose = False

        if 'early_stopping' in self.arguments:
            callbacks.append(
                keras.callbacks.EarlyStopping(
                    patience=self.arguments['early_stopping'],
                    monitor='val_loss'))

        if 'reduce_lr' in self.arguments:
            callbacks.append(
                keras.callbacks.ReduceLROnPlateau(
                    patience=self.arguments['reduce_lr'],
                    monitor='val_loss',
                    factor=.5,
                    verbose=True))

        with contextlib.ExitStack() as context_stack:
            if self.arguments.get('dump_period', None):
                modifiers = [
                    hashlib.sha1(
                        json.dumps(
                            scope['workflow'].to_JSON()).encode()).hexdigest()
                    [:32]
                ]
                handle = context_stack.enter_context(
                    storage.open(scope.get('dump_filename', 'dump.tar'),
                                 'a',
                                 modifiers,
                                 on_filesystem=True))
                cbk = keras_gtar.GTARLogger(handle.name,
                                            self.arguments['dump_period'],
                                            append=True,
                                            when='pre_epoch')
                callbacks.append(cbk)

            model.fit(scope['x_train'],
                      scope['y_train'],
                      verbose=verbose,
                      epochs=self.arguments['epochs'],
                      batch_size=self.arguments['batch_size'],
                      validation_split=self.arguments['validation_split'],
                      callbacks=callbacks)