Esempio n. 1
0
def run():
    disturb_mem.disturb_mem()


    b = sharedX(np.zeros((2,)))
    channels = OrderedDict()

    disturb_mem.disturb_mem()

    v_max = b.max(axis=0)
    v_min = b.min(axis=0)
    v_range = v_max - v_min

    updates = []
    for i, val in enumerate([
            v_max.max(),
            v_max.min(),
            v_range.max(),
            ]):
        disturb_mem.disturb_mem()
        s = sharedX(0., name='s_'+str(i))
        updates.append((s, val))

    for var in theano.gof.graph.ancestors(update for var, update in updates):
        if var.name is not None:
            if var.name[0] != 's' or len(var.name) != 2:
                var.name = None

    for key in channels:
        updates.append((s, channels[key]))
    file_path='nondeterminism_6.txt'
    mode = RecordMode(file_path=file_path,
                      replay=0)
    f = theano.function([], mode=mode, updates=updates, on_unused_input='ignore', name='f')

    """
    print 'type(f): ',type(f)
    print 'elements of f:'
    for elem in dir(f):
        print '\t',elem
    print 'type(f.fn): ',type(f.fn)
    print 'elements of f.fn:'
    for elem in dir(f.fn):
        print '\t',elem
    """

    trials = 1

    for i in xrange(trials):
        disturb_mem.disturb_mem()
        f()

    mode.record.f.flush()
    mode.record.f.close()

    mode.set_record(Record(file_path=file_path, replay=1))

    for i in xrange(trials):
        disturb_mem.disturb_mem()
        f()
Esempio n. 2
0
def run():
    disturb_mem.disturb_mem()

    b = sharedX(np.zeros((2, )))
    channels = OrderedDict()

    disturb_mem.disturb_mem()

    v_max = b.max(axis=0)
    v_min = b.min(axis=0)
    v_range = v_max - v_min

    updates = []
    for i, val in enumerate([
            v_max.max(),
            v_max.min(),
            v_range.max(),
    ]):
        disturb_mem.disturb_mem()
        s = sharedX(0., name='s_' + str(i))
        updates.append((s, val))

    for var in theano.gof.graph.ancestors(update for var, update in updates):
        if var.name is not None:
            if var.name[0] != 's' or len(var.name) != 2:
                var.name = None

    for key in channels:
        updates.append((s, channels[key]))
    file_path = 'nondeterminism_5.txt'
    mode = RecordMode(file_path=file_path, replay=0)
    f = theano.function([],
                        mode=mode,
                        updates=updates,
                        on_unused_input='ignore',
                        name='f')

    for i in xrange(100):
        disturb_mem.disturb_mem()
        f()

    mode.record.f.flush()
    mode.record.f.close()

    mode.set_record(Record(file_path=file_path, replay=1))

    for i in xrange(100):
        disturb_mem.disturb_mem()
        f()
Esempio n. 3
0
def test_record_mode_bad():

    """
    Like test_record_bad, but some events are recorded by the
    theano RecordMode, as is the event that triggers the mismatch
    error.
    """

    # Record a sequence of events
    output = cStringIO.StringIO()

    recorder = Record(file_object=output, replay=False)

    record_mode = RecordMode(recorder)

    i = iscalar()
    f = function([i], i, mode=record_mode, name='f')

    num_lines = 10

    for i in xrange(num_lines):
        recorder.handle_line(str(i)+'\n')
        f(i)

    # Make sure that the playback functionality doesn't raise any errors
    # when we repeat them
    output_value = output.getvalue()
    output = cStringIO.StringIO(output_value)

    playback_checker = Record(file_object=output,  replay=True)

    playback_mode = RecordMode(playback_checker)

    i = iscalar()
    f = function([i], i, mode=playback_mode, name='f')

    for i in xrange(num_lines // 2):
        playback_checker.handle_line(str(i)+'\n')
        f(i)

    # Make sure a wrong event causes a MismatchError
    try:
        f(0)
    except MismatchError:
        return
    raise AssertionError("Failed to detect a mismatch.")
Esempio n. 4
0
def test_record_mode_good():

    """
    Like test_record_good, but some events are recorded by the
    theano RecordMode. We don't attempt to check the
    exact string value of the record in this case.
    """

    # Record a sequence of events
    output = cStringIO.StringIO()

    recorder = Record(file_object=output, replay=False)

    record_mode = RecordMode(recorder)

    i = iscalar()
    f = function([i], i, mode=record_mode, name='f')

    num_lines = 10

    for i in xrange(num_lines):
        recorder.handle_line(str(i)+'\n')
        f(i)

    # Make sure that the playback functionality doesn't raise any errors
    # when we repeat them
    output_value = output.getvalue()
    output = cStringIO.StringIO(output_value)

    playback_checker = Record(file_object=output,  replay=True)

    playback_mode = RecordMode(playback_checker)

    i = iscalar()
    f = function([i], i, mode=playback_mode, name='f')

    for i in xrange(num_lines):
        playback_checker.handle_line(str(i)+'\n')
        f(i)
Esempio n. 5
0
def run(replay):
    disturb_mem.disturb_mem()

    mode = RecordMode(file_path="nondeterminism_4.txt", replay=replay)

    b = sharedX(np.zeros((1, )), name='b')
    channels = OrderedDict()

    disturb_mem.disturb_mem()

    v_max = b.max(axis=0)
    v_min = b.min(axis=0)
    v_range = v_max - v_min

    updates = []
    for i, val in enumerate([
            v_max.max(),
            v_max.min(),
            v_range.max(),
    ]):
        disturb_mem.disturb_mem()
        s = sharedX(0., name='s_' + str(i))
        updates.append((s, val))

    for var in theano.gof.graph.ancestors(update for var, update in updates):
        if var.name is not None and var.name is not 'b':
            if var.name[0] != 's' or len(var.name) != 2:
                var.name = None

    for key in channels:
        updates.append((s, channels[key]))
    f = theano.function([],
                        mode=mode,
                        updates=updates,
                        on_unused_input='ignore',
                        name='f')
    for output in f.maker.fgraph.outputs:
        mode.record.handle_line(var_descriptor(output) + '\n')
    disturb_mem.disturb_mem()
    f()

    mode.record.f.flush()
    mode.record.f.close()
Esempio n. 6
0
def test_determinism_2():
    """
    A more aggressive determinism test. Tests that apply nodes are all passed inputs
    with the same md5sums, apply nodes are run in same order, etc.
    Uses disturb_mem to try to cause dictionaries to iterate in different orders, etc.
    """
    def run_sgd(mode):
        # Must be seeded the same both times run_sgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2

        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """
            def __init__(self):
                self.W1 = [
                    sharedX(rng.randn(num_features, chunk_width))
                    for i in xrange(num_chunks)
                ]
                disturb_mem.disturb_mem()
                self.W2 = [
                    sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)
                ]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()

        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()

            def get_data_specs(self, model):
                data = CompositeSpace(
                    (model.get_input_space(), model.get_output_space()))
                source = (model.get_input_source(), model.get_target_source())
                return (data, source)

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = SGD(
            cost=cost,
            batch_size=batch_size,
            init_momentum=.5,
            learning_rate=1e-3,
            monitoring_dataset={
                'train': train,
                'valid': valid
            },
            update_callbacks=[ExponentialDecay(decay_factor=2., min_lr=.0001)],
            termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(dataset=train,
                             model=model,
                             algorithm=algorithm,
                             extensions=[
                                 PolyakAveraging(start=0),
                                 MomentumAdjustor(final_momentum=.9,
                                                  start=1,
                                                  saturate=5),
                             ],
                             save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()

    output = cStringIO.StringIO()
    record = Record(file_object=output, replay=False)
    record_mode = RecordMode(record)

    run_sgd(record_mode)

    output = cStringIO.StringIO(output.getvalue())
    playback = Record(file_object=output, replay=True)
    playback_mode = RecordMode(playback)

    run_sgd(playback_mode)
Esempio n. 7
0
model = DummyModel()

from pylearn2.training_algorithms.bgd import BGD
from pylearn2.devtools.record import RecordMode
allocate_random()
from pylearn2.costs.cost import Cost

class DummyCost(Cost):
    supervised = True
    def __call__(self, model, X, Y, **kwargs):
        return sum([x.sum() for x in (model.get_params()+[X, Y])])


algorithm =  BGD( **{
               'theano_function_mode': RecordMode(
                        path = 'nondeterminism_2_record.txt',
                        replay = replay
               ),
               'line_search_mode': 'exhaustive',
               'batch_size': 100,
               'set_batch_size': 1,
               'updates_per_batch': 1,
               'reset_alpha': 0,
               'conjugate': 1,
               'reset_conjugate': 0,
               'cost' : DummyCost()
})

algorithm.setup(model=model, dataset=None)
algorithm.optimizer._cache_values()

Esempio n. 8
0
model = DummyModel()

from pylearn2.training_algorithms.bgd import BGD
from pylearn2.devtools.record import RecordMode
allocate_random()
from pylearn2.costs.cost import Cost


class DummyCost(Cost):
    supervised = True

    def __call__(self, model, X, Y, **kwargs):
        return sharedX(0.)
        return sum([x.sum() for x in (model.get_params() + [X, Y])])


algorithm = BGD(
    **{
        'theano_function_mode':
        RecordMode(path='nondeterminism_2_record.txt', replay=replay),
        'conjugate':
        1,
        'batch_size':
        100,
        'cost':
        DummyCost()
    })

algorithm.setup(model=model, dataset=None)
algorithm.optimizer._cache_values()
Esempio n. 9
0
from pylearn2.utils import sharedX
from pylearn2.utils import safe_zip
from theano import config
from theano import function
import theano.tensor as T
import sys
from pylearn2.devtools.record import RecordMode
from collections import OrderedDict

_, replay = sys.argv
if replay in ['0', '1']:
    replay = int(replay)
else:
    assert False

record_mode = RecordMode('nondeterminism_record.txt', replay=replay)

def allocate_random():
    # Allocate a time-dependent amount of objects to increase
    # chances of all subsequent objects' ids changing from run
    # to run
    global l
    from datetime import datetime
    now = datetime.now()
    ms = now.microsecond
    ms = int(ms)
    n = ms % 1000
    m = ms / 1000
    l = [[0]*m for i in xrange(n)]
allocate_random()