Example #1
0
    def fit(self, X):
        from numpy.random.mtrand import RandomState
        min_width = np.repeat(sys.float_info.max, len(X) - 1)
        randomizer = RandomState(111)
        if self.initial_ordering is not None:
            ordering = self.initial_ordering
            assert len(ordering) == len(X), \
                'initial_ordering has wrong size'
        else:
            ordering = np.arange(len(X), dtype=int)
        for i in range(self.n_trials):
            final_ordering, bd, labels, prs = self._fit_once(X, ordering)
            width = np.sort(bd)[::-1]
            if lt_lex(width, min_width):
                best_order = final_ordering
                best_bd = bd
                best_labels = labels
                best_pinch_ratios = prs
                min_width = width
            randomizer.shuffle(ordering)

        self._ordering = best_order
        self._boundary = best_bd
        self.labels_ = best_labels
        self._pinch_ratios = best_pinch_ratios
        self._width = min_width
Example #2
0
def rarefaction_keep(M, RID, reads=0, iters=0, myLambda=0.1):
    global curSamples, totSamples
    noccur = np.sum(M, axis=1)  # number of occurrences for each sample
    nvar = M.shape[1]  # number of variables
    nsamp = M.shape[0]  # number of samples

    Mrarefied = np.empty_like(M)
    for i in range(nsamp):
        p = (M[i] + myLambda) / (float(noccur[i]) + nvar * myLambda)
        myArr = np.zeros(nvar)
        for n in xrange(iters):
            prng = RandomState()
            choice = prng.choice(nvar, size=reads, replace=True, p=p)
            binArr = np.bincount(choice, minlength=nvar)
            if n == 0:
                myArr = binArr
            else:
                myArr = np.vstack((myArr, binArr))

        if iters > 1:
            Mrarefied[i] = np.mean(myArr, axis=0)
        else:
            Mrarefied[i] = myArr
        curSamples[RID] += 1
        functions.setBase(RID, 'Step 2 of 6: Sub-sampling data...\nSub-sampling is complete for ' + str(curSamples[RID]) + ' out of ' + str(totSamples[RID]) + ' samples')
    return Mrarefied
Example #3
0
   def __init__(self,h,cutoff=0.2,seed=dfseed):
      dummy = RandomState(seed)
      s1, s2, s3 = 0,0,0
      while(s1==s2 or s1==s3 or s2==s3):
	s1, s2, s3 = dummy.randint(low=4294967296,size=3)
      self.__dxy = randxy(s1,cutoff)
      self.__site = randsite(s2,h.dim)
      self.__acc = randgen(s3)
Example #4
0
def create_nonce():
    """
    Create a random string
    :return:  a random string
    """
    rand = RandomState()
    lo=1000000000000000
    hi=999999999999999999
    return b2a_hex(rand.randint(lo, hi, 3).tostring())[:32]
Example #5
0
def job_pick_and_move(path_src, path_dst, filenames, labels):
    rs = RandomState(0)
    bools = [labels == l for l in xrange(5)]
    indices_lists = [x.nonzero()[0] for x in bools]
    permuted_indices = [rs.permutation(x) for x in indices_lists]
    permuted_indices_top700 = [x[:700] for x in permuted_indices]

    for i in xrange(5):
        for s in filenames[permuted_indices_top700[i]]:
            shutil.copy(os.path.join(path_src, s+".png"), os.path.join(path_dst, s + "_" + str(i) + ".png"))
Example #6
0
    def get_indices(self):
        """
        Get data indices

        :rtype: numpy.array
        """
        if self._indices is None:
            rs = RandomState(seed=self._random_state)
            self._indices = rs.permutation(len(self))
        return self._indices
def generate_binned_dataset(n_samples, n_bins):
    """useful function, generates dataset with bins, groups, random weights.
    This is used to test correlation functions. """
    random = RandomState()
    y = random.uniform(size=n_samples) > 0.5
    pred = random.uniform(size=(n_samples, 2))
    weights = random.exponential(size=(n_samples,))
    bins = random.randint(0, n_bins, n_samples)
    groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1))
    return y, pred, weights, bins, groups
Example #8
0
def test_state_setter_getter(fname):
    # make sure the presence of custom __setstate__, __getstate__ methods
    # is honored -- numpy's RNGs have it
    from numpy.random.mtrand import RandomState
    r = RandomState()
    h5save(fname, r)
    rl = h5load(fname)
    rl_state = rl.get_state()
    for i, v in enumerate(r.get_state()):
        assert_array_equal(v, rl_state[i])
def test_fast_cvm(n_samples=1000):
    random = RandomState()
    data1 = random.uniform(size=n_samples)
    weights1 = random.uniform(size=n_samples)
    mask = random.uniform(size=n_samples) > 0.5
    data2 = data1[mask]
    weights2 = weights1[mask]
    a = cvm_2samp(data1, data2, weights1, weights2)
    prepared_data1, prepared_weights1, F1 = prepare_distibution(data1, weights1)
    b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, F1=F1)
    assert numpy.allclose(a, b)
def create_icon(filename):

    rand = RandomState()
      
    lo = 1000000000000000
    hi = 999999999999999999
    random_string = binascii.b2a_hex(rand.randint(lo, hi, 4).tostring())[:64]
    time_millis = int(round(time.time() * 1000))
    
    v = visicon.Visicon(random_string, str(time_millis), 128)
    i = v.draw_image()
    i.save(filename)
Example #11
0
    def __call__(self, particle, rand):
        """Get the next velocity from this particle given a particle that it
        should be moving toward"""
        # I'm not sure what "given a particle that it should be moving toward"
        # means.  We only take one argument, and that's "this particle"

        # In a Bypass mrs implementation (and possibly also with Serial),
        # Kalman motion should work just fine.  However, in parallel, the state
        # required for each particle's motion is not persistent.  We would need
        # to add a "motion state" field to the particle, or something similar,
        # so that each Slave task can access the state that is supposed to be
        # building up in the Kalman filter

        # Note that care needs to be taken in speculative methods to not
        # clobber the state that is passed around, or superfluously add to the
        # state when other particles are evaluating the motion, or when
        # speculative children are being moved.  This needs to be rethought to
        # be compatible with speculative evaluation.

        raise NotImplementedError("Kalman motion requires state that is not "
                "persistent in mrs")

        kalman = self.getfilter(particle, rand)

        grel = particle.nbestpos - particle.pos
        if self.norandscale:
            newvel = 1.0 * grel
        else:
            newvel = rand.uniform(0,2) * grel

        if self.restrictvel:
            self.cube.constrain_vec(newvel, True)

        newpos = particle.pos + newvel

        if not self.usepbest:
            kalman.add(newpos)
        else:
            kalman.add(array(list(newpos) + list(particle.pbestpos)))

        if self.predict:
            mean, var = kalman.predict()
        else:
            mean, var = kalman.filt()

        # Bad!  We should find a better way to initialize the random state
        # instead of just drawing a random number from the particle rand
        # This does give reproducible results, it just makes the random numbers
        # from state less good
        state = RandomState(rand.randint(0, sys.maxint))
        newstate = state.multivariate_normal(mean, var)
        return array(newstate[:self.dims]),array(newstate[self.dims:])
def test_compute_cut():
    random = RandomState()
    predictions = random.permutation(100)
    labels = numpy.ones(100)
    for eff in [0.1, 0.5, 0.75, 0.99]:
        cut = compute_cut_for_efficiency(eff, labels, predictions)
        assert numpy.sum(predictions > cut) / len(predictions) == eff, 'the cut was set wrongly'

    weights = numpy.array(random.exponential(size=100))
    for eff in random.uniform(size=100):
        cut = compute_cut_for_efficiency(eff, labels, predictions, sample_weight=weights)
        lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights)
        upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights)
        assert lower < eff < upper
Example #13
0
def test_msee_computations(size=1000, n_bins=10):
    random = RandomState()
    testY = random.uniform(size=size) > 0.5
    pred = random.uniform(size=(size, 2))
    weights = random.exponential(size=size)

    bins = random.randint(0, n_bins, size)
    target_efficiencies = [0.5, 0.6]
    groups = [numpy.where(testY & (bins == bin))[0] for bin in range(n_bins)]
    x1 = compute_msee_on_bins(pred[:, 1], testY, bin_indices=bins,
                              target_efficiencies=target_efficiencies, sample_weight=weights)
    x2 = compute_msee_on_groups(pred[:, 1], testY, groups=groups,
                                target_efficiencies=target_efficiencies, sample_weight=weights)
    assert abs(x1 - x2) < 1e-6, "MSE are different"
    print("MSE variation is ok")
Example #14
0
 def __init__(self, athlete, queue, ID, rate = 20,
              noise = 0.3, verbose = False, seed = None):
     """
     Sensor class which gets position measurements from athlete, adds noise
     and collects them in a queue.
     
     :param athlete: object yielding position data when called
     :param queue: queue to which the measurements are added
     :param id: sensor ID
     :param rate (optional): sampling rate of sensor in Hz, default: 20
     :param noise (optional): standard deviation of noise on measurement in
     meter, default: 0.3
     :param verbose (optional): verbosity of sensor, default: False
     :param seed (optional): seed of noise generation, default: None 
     """
     super(Sensor, self).__init__()
     self.queue = queue
     self.ID = ID
     self.athlete = athlete
     self.rate = rate
     self.deltat = 1./self.rate
     self.noise = noise
     self.verbose = verbose
     self.rs = RandomState(seed)
     self.running = Event()
Example #15
0
def test_binner():
    """This function tests binner class"""
    random = RandomState()
    binner = Binner(random.permutation(30), 3)
    assert numpy.all(binner.limits > [9, 19]), 'failed on the limits'
    assert numpy.all(binner.limits < [10, 20]), 'failed on the limits'
    bins = binner.get_bins([-1000, 1000, 0, 10, 20, 9.0, 10.1, 19.0, 20.1])
    assert numpy.all(bins == [0, 2, 0, 1, 2, 0, 1, 1, 2]), 'wrong binning'

    binner = Binner(random.permutation(20), 5)
    p = random.permutation(40)
    # checking whether binner preserves correspondence
    list1 = list(binner.split_into_bins(numpy.array(range(-10, 30))[p], numpy.array(range(0, 40))[p]))
    for a, b in list1:
        for x, y in zip(a, b):
            assert x + 10 == y, 'transpositions are wrong after binning'

    binner = Binner(random.permutation(30), 3)
    result2 = list(binner.split_into_bins(range(10, 20)))
    answer2 = [[], range(10, 20), []]

    for a, b in zip(result2, answer2):
        for x, y in zip(a[0], b):
            assert x == y, 'binning is wrong'

    result3 = list(binner.split_into_bins(random.permutation(45)))
    answer3 = list(binner.split_into_bins(range(45)))
    for x, y in zip(result3, answer3):
        assert set(x[0]) == set(y[0]), "binner doesn't work well with permutations"

    print('binner is ok')
Example #16
0
    def __init__(self, left_num_neurons, right_num_neurons, transfer_function):
        """
        Initializes a vectorneuron with a weight matrix of size (left_num_neurons, right_num_neurons), 
        a bias vector of size (right_num_neurons, 1), and a transfer function transfer_function.
        """

        print '>>> Creating VectorNeuron: (%s, %s) %s' % \
        (left_num_neurons, right_num_neurons, transfer_function)

        self.__weight_matrix = Matrix(rand(right_num_neurons, left_num_neurons))
        self.__weight_matrix_backup = self.__weight_matrix.copy()
        self.__bias_vector = Matrix(rand(right_num_neurons, 1))
        self.__delta_w_matrix = Matrix(rand(right_num_neurons, left_num_neurons))
        self.__mersenne_twister = MersenneTwister()
        self.__mersenne_twister.seed(int(1000*time.time()))
        self.__transfer_function = transfer_function
def check_weighted_percentile(size=100, q_size=20):
    random = RandomState()
    array = random.permutation(size)
    quantiles = random.uniform(size=q_size)
    q_permutation = random.permutation(q_size)
    result1 = weighted_percentile(array, quantiles)[q_permutation]
    result2 = weighted_percentile(array, quantiles[q_permutation])
    result3 = weighted_percentile(array[random.permutation(size)], quantiles[q_permutation])
    assert numpy.all(result1 == result2) and numpy.all(result1 == result3), 'breaks on permutations'

    # checks that order is kept
    quantiles = numpy.linspace(0, 1, size * 3)
    x = weighted_percentile(array, quantiles, sample_weight=random.exponential(size=size))
    assert numpy.all(x == numpy.sort(x)), "doesn't preserve order"

    array = numpy.array([0, 1, 2, 5])
    # comparing with simple percentiles
    for x in random.uniform(size=10):
        assert numpy.abs(numpy.percentile(array, x * 100) - weighted_percentile(array, x, old_style=True)) < 1e-7, \
            "doesn't coincide with numpy.percentile"
Example #18
0
 def __init__(self, subject_column, *args, **kwargs):
     super(_PereiraBenchmark.PereiraExtrapolationCeiling, self).__init__(
         subject_column, *args, **kwargs)
     self._num_subsamples = 10
     self.holdout_ceiling = _PereiraBenchmark.PereiraHoldoutSubjectCeiling(subject_column=subject_column)
     self._rng = RandomState(0)
Example #19
0
 def __init__(self,
              lam: Union[float, ndarray, Iterable[float]] = 1.0,
              seed=None):
     self.lam = lam
     self.rs = RandomState(seed=seed)
Example #20
0
 def __init__(self, alpha: list, seed=None):
     self.alpha = alpha
     self.rs = RandomState(seed=seed)
Example #21
0
 def __init__(self, config):
     super(OrganicUserEventCounterModel, self).__init__(config)
     if config.select_randomly:
         self.rng = RandomState(self.config.random_seed)
Example #22
0
 def __init__(self, config, model):
     super(TorchModel, self).__init__(config)
     self.model = model
     if self.config.select_randomly:
         self.rng = RandomState(self.config.random_seed)
Example #23
0
def add_poisson_noise(imgs, seed=123):
    poisson_noise = RandomState(seed).poisson(lam=1, size=imgs.shape)
    return imgs + poisson_noise
Example #24
0
# 定义输入
x = tf.placeholder(tf.float32, shape=(None, 2), name="x-input")
y_ = tf.placeholder(tf.float32, shape=(None, 1), name="y-input")

a = tf.matmul(x, w1)

y = tf.matmul(a, w2)

# 定义损失函数

cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))

# 定义优化算法
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

rdm = RandomState(1)

dataset_size = 10000

X = rdm.rand(dataset_size, 2)

Y = [[int(x1+x2 < 1)] for (x1, x2) in X]

batch_size = 128

with tf.Session() as sess:

    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    print(sess.run(w1))
Example #25
0
 def __init__(self, *args, **kwargs):
     super(_Fedorenko2016.ElectrodeExtrapolation, self).__init__(*args, **kwargs)
     self._rng = RandomState(0)
     self._num_samples = 15  # number of samples per electrode selection
Example #26
0
def main():
    config = Configuration()

    y_train = np.load(config.training_data_folder +
                      'train_labels.npy')  # labels of the training data
    x_train = np.load(config.training_data_folder +
                      'train_features.npy')  # labels of the training data
    feature_names = np.load(config.training_data_folder + 'feature_names.npy')
    failure_times_train = np.load(config.training_data_folder +
                                  'train_failure_times.npy')
    window_times_train = np.load(config.training_data_folder +
                                 'train_window_times.npy')

    # get unique classes
    classes = np.unique(y_train)

    print('Number of examples in training data set:', x_train.shape[0])
    print('Reducing to', config.examples_per_class, 'examples per class with',
          len(classes), 'classes')

    # for each class get the indices of all examples with this class
    indices_of_classes = []
    for c in classes:
        indices_of_classes.append(np.where(y_train == c)[0])

    # reduce classes to equal many examples
    new_indices = []
    ran = RandomState(config.random_seed_index_selection)
    for i in range(len(indices_of_classes)):
        length = len(indices_of_classes[i])

        # if there are less examples than there should be for each class only those can be used
        epc = config.examples_per_class if config.examples_per_class < length else length

        temp = ran.choice(indices_of_classes[i], epc, replace=False)
        # print(len(indices_of_classes[i]), len(temp))

        new_indices.append(temp)

    casebase_features_list = []
    casebase_labels_list = []
    casebase_failures_list = []
    casebase_window_times_list = []

    # extract the values at the selected indices and add to list
    for i in range(len(classes)):
        casebase_labels_list.extend(y_train[new_indices[i]])
        casebase_features_list.extend(x_train[new_indices[i]])
        casebase_failures_list.extend(failure_times_train[new_indices[i]])
        casebase_window_times_list.extend(window_times_train[new_indices[i]])

    # transform list of values back into an array and safe to file
    casebase_labels = np.stack(casebase_labels_list, axis=0)
    casebase_features = np.stack(casebase_features_list, axis=0)
    casebase_failures = np.stack(casebase_failures_list, axis=0)
    casebase_window_times = np.stack(casebase_window_times_list, axis=0)

    print('Number of exaples in training data set:',
          casebase_features.shape[0])

    np.save(config.case_base_folder + 'train_features.npy',
            casebase_features.astype('float32'))
    np.save(config.case_base_folder + 'train_labels.npy', casebase_labels)
    np.save(config.case_base_folder + 'train_failure_times.npy',
            casebase_failures)
    np.save(config.case_base_folder + 'train_window_times.npy',
            casebase_window_times)

    files_to_copy = [
        'feature_names.npy', 'test_labels.npy', 'test_features.npy',
        'test_window_times.npy', 'test_failure_times.npy',
        'FailureMode_Sim_Matrix.csv', 'Lokalization_Sim_Matrix.csv',
        'Condition_Sim_Matrix.csv'
    ]

    for file in files_to_copy:
        copyfile(config.training_data_folder + file,
                 config.case_base_folder + file)
Example #27
0
 def __init__(self, seed=None):
     self.rs = RandomState(seed=seed)
Example #28
0
 def __init__(self, config=Configuration(random_args)):
     super(RandomAgent, self).__init__(config)
     self.rng = RandomState(config.random_seed)
Example #29
0
 def __init__(self, probabilities: numpy.array, seed=None):
     self.probabilities = probabilities
     self.a = numpy.arange(len(probabilities))
     self.lb = 0
     self.ub = len(probabilities) - 1
     self.rs = RandomState(seed=seed)
Example #30
0
 def __init__(self, a: Union[float, ndarray, Iterable[float]], seed=None):
     self.a = a
     self.rs = RandomState(seed=seed)
Example #31
0
 def __init__(self, lb: int, ub: int, seed=None):
     self.lb = lb
     self.ub = ub
     self.rs = RandomState(seed=seed)
Example #32
0
def train(config, model_dir, train_src, train_tgt, valid_src,
          valid_tgt, batch_max_words, batch_max_sentences, epochs, test_sentences,
          test_interval, valid_freq, keep_models, patience, max_words, learning_rate,
          max_seconds, exit_status_max_train, anneal_restarts, anneal_decay,
          override_learning_rate, valid_ref, lc_bleu, stop_on_cost):
    start = time.time()
    state = TrainingState()
    state.learning_rate = learning_rate
    log.info('hostname: %s', socket.gethostname())
    x_vocab = Vocab(vocab_path=os.path.join(model_dir, 'x_vocab.txt'))
    y_vocab = Vocab(vocab_path=os.path.join(model_dir, 'y_vocab.txt'))

    cnn_mt = ConvolutionalMT(config, x_vocab, y_vocab)
    model_file = find_latest_model(model_dir)
    if model_file:
        compat.load_params(cnn_mt, model_file)
        state_path = state.path_for_model(model_file)
        if os.path.exists(state_path):
            state.load(state_path)
            if state.learning_rate != learning_rate:
                if override_learning_rate:
                    log.info('overriding saved learning rate {} to {}'.format(
                        state.learning_rate, learning_rate))
                    state.learning_rate = learning_rate
                else:
                    log.warning('using saved learning rate {}'.format(
                        state.learning_rate))
        else:
            log.warning('no training state file found for model!')
            state.training_iteration = model_iter_from_path(model_file)
    log.info('TrainingState: {}'.format(state.format_for_log()))
    log.info('using {} for stopping criteria'.format('cost' if stop_on_cost else 'bleu'))

    next_test_cycle = test_interval
    early_stop = False
    train_seconds = state.total_train_seconds

    # Get a different random state to avoid seeing the same shuffled batches
    # on restart.  We want to see different data, especially for large datasets.
    random_state = RandomState()

    log.info('preparing training batches...')
    train_dataset = XYDataset(train_src, train_tgt, x_vocab, y_vocab,
                              max_words_per_sentence=max_words,
                              max_words_per_batch=batch_max_words,
                              max_sentences_per_batch=batch_max_sentences,
                              random_state=random_state)
    log.info('preparing validation batches...')
    valid_xy_dataset = XYDataset(valid_src, valid_tgt, x_vocab, y_vocab,
                                 max_words_per_sentence=max_words,
                                 max_words_per_batch=batch_max_words,
                                 max_sentences_per_batch=batch_max_sentences,
                                 random_state=None)
    valid_x_dataset = XDataset(valid_src, x_vocab, config.num_positions,
                               max_words_per_batch=batch_max_words,
                               max_sentences_per_batch=batch_max_sentences)

    log.info('starting train loop...')
    log.info('process memory at start of train loop: {:.2f} GB'.format(
        used_memory_in_gigabytes()))

    while state.completed_epochs < epochs:
        epoch_cost = 0
        for batch in train_dataset():
            x, x_mask, y, y_mask = batch
            elapsed = time.time() - start
            if max_seconds and elapsed > max_seconds:
                log.info('%d seconds elapsed in train()', elapsed)
                log.info('exiting with status %d', exit_status_max_train)
                exit(exit_status_max_train)
            state.training_iteration += 1

            cnn_mt.set_learning_rate(state.learning_rate)
            batch_cost = cnn_mt.train(x, x_mask, y, y_mask)

            epoch_cost += batch_cost
            next_test_cycle -= 1
            if next_test_cycle == 0:
                test(cnn_mt, x_vocab, y_vocab, test_sentences, max_words)
                next_test_cycle = test_interval
            if state.training_iteration % valid_freq == 0:
                log.info('BEGIN Validating')
                valid_cost = dataset_cost(cnn_mt, valid_xy_dataset)
                state.validation_costs.append(float(valid_cost))
                new_best = False
                bleu, bleu_s, max_bleu_s = -1.0, '?????', '?????'
                if valid_ref:
                    bleu, bleu_line = compute_greedy_bleu(cnn_mt, valid_x_dataset, valid_ref,
                                                          lc_bleu, max_words)
                    log.info(bleu_line)
                    state.validation_bleus.append(bleu)
                    bleu_s = '{:05.2f}'.format(bleu)
                    max_bleu_s = '{:05.2f}'.format(max(state.validation_bleus))
                if stop_on_cost:
                    if valid_cost <= min(state.validation_costs):
                        state.bad_counter = 0
                        new_best = True
                else:
                    if bleu >= max(state.validation_bleus):
                        state.bad_counter = 0
                        new_best = True
                log.info('END   Validating')
                ts = train_seconds + int(time.time() - start)
                log.info('bleu{} {:5s} max {:5s} cost {:f} min {:f} bad_counter {:d} lr {:f} '
                         'iter {:d} completed_epochs: {:d} train_secs {:d}'.format(
                          '-lc' if lc_bleu else '', bleu_s, max_bleu_s, valid_cost,
                          min(state.validation_costs), state.bad_counter, state.learning_rate,
                          state.training_iteration, state.completed_epochs, ts))
                model_src = save_model(cnn_mt, model_dir, keep_models, state,
                                       train_seconds + int(time.time() - start))
                if new_best:
                    log.info('New best model; saving model')
                    model_dst = os.path.join(model_dir, 'model')
                    copy_checkpoint(model_src, model_dst)
                else:
                    state.bad_counter += 1
                    if state.bad_counter > patience:
                        if state.anneal_restarts_done < anneal_restarts:
                            log.info('No progress on the validation set, annealing learning '
                                     'rate and resuming from best params.')
                            state.learning_rate *= anneal_decay
                            log.info('new learning rate: {:f}'.format(state.learning_rate))
                            state.anneal_restarts_done += 1
                            state.bad_counter = 0
                            best_model_path = os.path.join(model_dir, 'model')
                            compat.load_params(cnn_mt, best_model_path)
                        else:
                            log.info('Early Stop!')
                            early_stop = True
                            break
        if early_stop:
            # Non-zero exit status to prevent dependent queue
            # jobs from executing.
            exit(1)
        state.completed_epochs += 1
        log.info('epoch %d, epoch cost %f', state.completed_epochs, epoch_cost)
        log.info('process memory at end of epoch: {:.2f} GB'.format(
            used_memory_in_gigabytes()))
    log.info('process memory at end of training: {:.2f} GB'.format(
        used_memory_in_gigabytes()))
    log.info('training ends')
Example #33
0
 def __init__(self, seed, epsilon, num_action):
     self.name = "epsilon-Greedy Agent"
     self.np_random = RandomState(seed)
     self.epsilon = epsilon
     self.RVS = [0 for i in range(num_action)]
 def __init__(self, seed):
     RandomState.__init__(self, seed)
     self.seed = seed
Example #35
0
class LikelihoodAgent(Agent):
    def __init__(self, feature_provider, use_argmax=False, seed=43):
        self.feature_provider = feature_provider
        self.use_argmax = use_argmax
        self.random_state = RandomState(seed)
        self.model = None

    @property
    def num_products(self):
        return self.feature_provider.config.num_products

    def _create_features(self, user_state, action):
        """Create the features that are used to estimate the expected reward from the user state.
        """
        features = np.zeros(len(user_state) * self.num_products)
        features[action * len(user_state):(action + 1) *
                 len(user_state)] = user_state

        return features

    def train(self, logs):
        user_states, actions, rewards, proba_actions = build_rectangular_data(
            logs, self.feature_provider)
        print(user_states)

        features = np.vstack([
            self._create_features(user_state, action) for user_state, action in
            zip(user_states, actions)  # should be the enumerate of action
        ])
        self.model = LogisticRegression()
        self.model.fit(features.astype(float), rewards.astype(int))

    def _score_products(self, user_state):
        all_action_features = np.array([
            self._create_features(user_state, action)
            for action in range(self.num_products)
        ])
        temp = self.model.predict_proba(all_action_features)[:, 0]

        return temp

    def act(self, observation, reward, done):
        """Act method returns an action based on current observation and past history"""
        self.feature_provider.observe(observation)
        user_state = self.feature_provider.features(observation)
        prob = self._score_products(user_state)

        try:
            action = self.random_state.choice(self.num_products,
                                              p=prob / sum(prob))

            ps = prob[action]
            all_ps = prob.copy()
        except:
            action = np.argmax(prob)
            ps = 1.0
            all_ps = np.zeros(self.num_products)
            all_ps[action] = 1.0
        # ##epsilon greedy is working better, change it after tests

        return {
            **super().act(observation, reward, done),
            **{
                'action': action,
                'ps': ps,
                'ps-a': all_ps,
            }
        }

    def reset(self):
        self.feature_provider.reset()
Example #36
0
 def __init__(self, seed):
     RandomState.__init__(self, seed)
     self.seed = seed
Example #37
0
 def __init__(self, feature_provider, use_argmax=False, seed=43):
     self.feature_provider = feature_provider
     self.use_argmax = use_argmax
     self.random_state = RandomState(seed)
     self.model = None
 def __init__(self, seed, espilon):
     self.name = "epsilon-Greedy Agent"
     self.np_random = RandomState(seed)
     self.epsilon = espilon
Example #39
0
def randxy(seed,cutoff=1.0):
   r = RandomState(seed)
   while(True):
      yield array(r.uniform(-cutoff,cutoff,2))
Example #40
0
from numpy.random.mtrand import RandomState
from hep_ml.commonutils import generate_sample
from hep_ml.metrics_utils import prepare_distribution, _ks_2samp_fast, ks_2samp_weighted, _cvm_2samp_fast, \
    group_indices_to_groups_matrix
from hep_ml.metrics import KnnBasedSDE, KnnBasedTheil, KnnBasedCvM, \
    BinBasedSDE, BinBasedTheil, BinBasedCvM
from hep_ml.metrics_utils import bin_to_group_indices, compute_bin_indices
from tests._metrics_oldimplementation import compute_sde_on_bins, compute_sde_on_groups, compute_theil_on_bins, \
    compute_theil_on_groups, bin_based_ks, groups_based_ks, cvm_2samp, bin_based_cvm, group_based_cvm, sde, \
    cvm_flatness, theil_flatness


__author__ = 'Alex Rogozhnikov'

random = RandomState()


def generate_binned_dataset(n_samples, n_bins):
    """useful function, generates dataset with bins, groups, random weights.
    This is used to test correlation functions. """
    random = RandomState()
    y = random.uniform(size=n_samples) > 0.5
    pred = random.uniform(size=(n_samples, 2))
    weights = random.exponential(size=(n_samples,))
    bins = random.randint(0, n_bins, n_samples)
    groups = bin_to_group_indices(bin_indices=bins, mask=(y == 1))
    return y, pred, weights, bins, groups


def test_bin_to_group_indices(size=100, bins=10):
Example #41
0
def randgen(seed):
   r = RandomState(seed)
   while(True):
      yield r.rand()
Example #42
0
 def __init__(self, config, logreg):
     super(LogregMulticlassModel, self).__init__(config)
     self.logreg = logreg
     if config.select_randomly:
         self.rng = RandomState(self.config.random_seed)
Example #43
0
 def __init__(self,
              beta: Union[int, ndarray, Iterable[int]] = 1.0,
              seed=None):
     self.beta = beta
     self.rs = RandomState(seed=seed)
Example #44
0
def randsite(seed,dim):
   r = RandomState(seed)
   while(True):
      yield r.randint(dim)
Example #45
0
 def __init__(self, *args, **kwargs):
     super(_PereiraBenchmark.PereiraHoldoutSubjectCeiling, self).__init__(*args, **kwargs)
     self._rng = RandomState(0)
     self._num_bootstraps = 5
Example #46
0
class AbstractEnv(gym.Env, ABC):
    def __init__(self):
        gym.Env.__init__(self)
        ABC.__init__(self)

        self.first_step = True
        self.config = None
        self.state = None
        self.current_user_id = None
        self.current_time = None
        self.empty_sessions = OrganicSessions()

    def reset_random_seed(self, epoch=0):
        # Initialize Random State.
        assert (self.config.random_seed is not None)
        self.rng = RandomState(self.config.random_seed + epoch)
        if self.config.random_seed_for_user is not None:
            assert isinstance(self.config.random_seed_for_user, int)
            self.user_rng = RandomState(self.config.random_seed_for_user +
                                        epoch)

    def init_gym(self, args):

        self.config = Configuration(args)

        # Defining Action Space.
        self.action_space = Discrete(self.config.num_products)

        if 'time_generator' not in args:
            self.time_generator = DefaultTimeGenerator(self.config)
        else:
            self.time_generator = self.config.time_generator

        # Setting random seed for the first time.
        self.reset_random_seed()

        if 'agent' not in args:
            self.agent = None
        else:
            self.agent = self.config.agent

        # Setting any static parameters such as transition probabilities.
        self.set_static_params()

        # Set random seed for second time, ensures multiple epochs possible.
        self.reset_random_seed()

    def reset(self, user_id=0):
        # Current state.
        self.first_step = True
        self.state = organic  # Manually set first state as Organic.

        self.time_generator.reset()
        if self.agent:
            self.agent.reset()

        self.current_time = self.time_generator.new_time()
        self.current_user_id = user_id

        # Record number of times each product seen for static policy calculation.
        self.organic_views = np.zeros(self.config.num_products)

    def generate_organic_sessions(self):

        # Initialize session.
        session = OrganicSessions()

        while self.state == organic:
            # Add next product view.
            self.update_product_view()
            session.next(
                DefaultContext(self.current_time, self.current_user_id),
                self.product_view)

            # Update markov state.
            self.update_state()

        return session

    def step(self, action_id):
        """

        Parameters
        ----------
        action_id : int between 1 and num_products indicating which
                 product recommended (aka which ad shown)

        Returns
        -------
        observation, reward, done, info : tuple
            observation (tuple) :
                a tuple of values (is_organic, product_view)
                is_organic - True  if Markov state is `organic`,
                             False if Markov state `bandit` or `stop`.
                product_view - if Markov state is `organic` then it is an int
                               between 1 and P where P is the number of
                               products otherwise it is None.
            reward (tuple) :
                a tuple of values (click, ctr), ctr is click-through-rate which
                means the probability of user clicking.
                if the previous state was
                    `bandit` - then reward is (1, ctr) if the user clicked on the ad
                               you recommended otherwise (0, ctr)
                    `organic` - then reward is (None, None)
            done (bool) :
                whether it's time to reset the environment again.
                An episode is over at the end of a user's timeline (all of
                their organic and bandit sessions)
            info (dict) :
                 this is unused, it's always an empty dict
        """

        # No information to return.
        info = {}

        if self.first_step:
            assert (action_id is None)
            self.first_step = False
            sessions = self.generate_organic_sessions()
            return (Observation(
                DefaultContext(self.current_time, self.current_user_id),
                sessions), (None, None), self.state == stop, info)

        assert (action_id is not None)
        # Calculate reward from action.
        reward = self.draw_click(action_id)  # (click ,ctr)

        self.update_state()

        # Markov state dependent logic.
        if self.state == organic:
            sessions = self.generate_organic_sessions()
        else:
            sessions = self.empty_sessions

        return (Observation(
            DefaultContext(self.current_time, self.current_user_id),
            sessions), reward, self.state == stop, info)

    def step_offline(self, observation, reward, done):
        """Call step function wih the policy implemented by a particular Agent."""

        if self.first_step:
            action = None
        else:
            assert (hasattr(self, 'agent'))
            assert (observation is not None)
            if self.agent:
                action = self.agent.act(observation, reward, done)
            else:
                # Select a Product randomly.
                action = {
                    't':
                    observation.context().time(),
                    'u':
                    observation.context().user(),
                    'a':
                    np.int16(self.rng.choice(self.config.num_products)),
                    'ps':
                    1.0 / self.config.num_products,
                    'ps-a':
                    (np.ones(self.config.num_products) /
                     self.config.num_products if self.config.with_ps_all else
                     ()),
                }

        if done:
            reward = self.draw_click(action['a'])  # (click ,ctr)
            return (action,
                    Observation(
                        DefaultContext(self.current_time,
                                       self.current_user_id),
                        self.empty_sessions), reward, done, None)
        else:
            observation, reward, done, info = self.step(
                action['a'] if action is not None else None)

            return action, observation, reward, done, info

    def generate_logs(self,
                      num_offline_users: int,
                      agent: Agent = None,
                      num_organic_offline_users: int = 0):
        """
        Produce logs of applying an Agent in the Environment for the specified amount of Users.
        If the Agent is not provided, then the default Agent is used that randomly selects an Action.
        """

        if agent:
            old_agent = self.agent
            self.agent = agent

        data = {
            't': [],
            'u': [],
            'z': [],
            'v': [],
            'a': [],
            'c': [],
            'ctr': [],
            'ps': [],
            'ps-a': [],
        }

        def _store_organic(observation):
            assert (observation is not None)
            assert (observation.sessions() is not None)
            for session in observation.sessions():
                data['t'].append(session['t'])
                data['u'].append(session['u'])
                data['z'].append('organic')
                data['v'].append(session['v'])
                data['a'].append(None)
                data['c'].append(None)
                data['ctr'].append(None)
                data['ps'].append(None)
                data['ps-a'].append(None)

        def _store_bandit(action, reward):
            if action:
                assert (reward is not None)
                data['t'].append(action['t'])
                data['u'].append(action['u'])
                data['z'].append('bandit')
                data['v'].append(None)
                data['a'].append(action['a'])
                data['c'].append(reward[0])
                data['ctr'].append(reward[1])
                data['ps'].append(action['ps'])
                data['ps-a'].append(action['ps-a'] if 'ps-a' in action else ())

        unique_user_id = 0
        for _ in trange(num_organic_offline_users, desc='Organic Users'):
            self.reset(unique_user_id)
            unique_user_id += 1
            observation, _, _, _ = self.step(None)
            _store_organic(observation)

        for _ in trange(num_offline_users, desc='Users'):
            self.reset(unique_user_id)
            unique_user_id += 1
            observation, reward, done, _ = self.step(None)

            while not done:
                _store_organic(observation)
                action, observation, reward, done, _ = self.step_offline(
                    observation, reward, done)
                _store_bandit(action, reward)

            _store_organic(observation)

        data['t'] = np.array(data['t'], dtype=np.float32)
        data['u'] = pd.array(data['u'], dtype=pd.UInt32Dtype())
        data['v'] = pd.array(data['v'], dtype=pd.UInt32Dtype())
        data['a'] = pd.array(data['a'], dtype=pd.UInt32Dtype())
        data['c'] = np.array(data['c'], dtype=np.float32)
        data['ctr'] = np.array(data['ctr'], dtype=np.float32)

        if agent:
            self.agent = old_agent

        return pd.DataFrame().from_dict(data)

    def generate_gt(
        self,
        num_offline_users: int,
    ):
        data = {
            't': [],
            'u': [],
            'z': [],
            'v': [],
            'a': [],
            'c': [],
            'ctr': [],
            'ps': [],
            'ps-a': [],
        }

        def _store_organic(observation):
            assert (observation is not None)
            assert (observation.sessions() is not None)
            for session in observation.sessions():
                data['t'].append(session['t'])
                data['u'].append(session['u'])
                data['z'].append('organic')
                data['v'].append(session['v'])
                data['a'].append(None)
                data['c'].append(None)
                data['ctr'].append(None)
                data['ps'].append(None)
                data['ps-a'].append(None)

        def _store_bandit(action, reward):
            if action:
                assert (reward is not None)
                data['t'].append(action['t'])
                data['u'].append(action['u'])
                data['z'].append('bandit')
                data['v'].append(None)
                data['a'].append(action['a'])
                data['c'].append(reward[0])
                data['ctr'].append(reward[1])
                data['ps'].append(action['ps'])
                data['ps-a'].append(action['ps-a'] if 'ps-a' in action else ())

        unique_user_id = 0
        all_actions = np.arange(self.config.num_products)
        for _ in trange(num_offline_users, desc='Users'):
            self.reset(unique_user_id)
            unique_user_id += 1
            observation, reward, done, _ = self.step(None)

            while not done:
                _store_organic(observation)
                for action in all_actions:
                    if action == 0:
                        observation, reward, done, info = self.step(0)
                    else:
                        reward = self.draw_click(action)
                    action = {
                        't':
                        observation.context().time(),
                        'u':
                        observation.context().user(),
                        'a':
                        action,
                        'ps':
                        1.0,
                        'ps-a': (np.ones(self.config.num_products) /
                                 self.config.num_products
                                 if self.config.with_ps_all else ()),
                    }
                    _store_bandit(action, reward)
            _store_organic(observation)

        data['t'] = np.array(data['t'], dtype=np.float32)
        data['u'] = pd.array(data['u'], dtype=pd.UInt32Dtype())
        data['v'] = pd.array(data['v'], dtype=pd.UInt32Dtype())
        data['a'] = pd.array(data['a'], dtype=pd.UInt32Dtype())
        data['c'] = np.array(data['c'], dtype=np.float32)
        data['ctr'] = np.array(data['ctr'], dtype=np.float32)

        return pd.DataFrame().from_dict(data)
Example #47
0
 def __init__(self, k: Union[int, ndarray, Iterable[int]], seed=None):
     self.k = k
     self.rs = RandomState(seed=seed)
Example #48
0
class VectorNeuron(object):
    """
    The VectorNeuron class represents a single weight matrix and a corresponding
    transfer function.  

    An input to a VectorNeuron is first multiplied by the weight matrix.  The 
    result is then fed through a transfer function to produce the VectorNeuron's 
    output. 

    The output is either the containing neural network's final output or the input
    to another VectorNeuron.
    """

    __weight_matrix = None
    __weight_matrix_backup = None
    __bias_vector = None
    __delta_w_matrix = None
    __result = None
    __transfer_function = ""
    __mersenne_twister = None

    def __init__(self, left_num_neurons, right_num_neurons, transfer_function):
        """
        Initializes a vectorneuron with a weight matrix of size (left_num_neurons, right_num_neurons), 
        a bias vector of size (right_num_neurons, 1), and a transfer function transfer_function.
        """

        print '>>> Creating VectorNeuron: (%s, %s) %s' % \
        (left_num_neurons, right_num_neurons, transfer_function)

        self.__weight_matrix = Matrix(rand(right_num_neurons, left_num_neurons))
        self.__weight_matrix_backup = self.__weight_matrix.copy()
        self.__bias_vector = Matrix(rand(right_num_neurons, 1))
        self.__delta_w_matrix = Matrix(rand(right_num_neurons, left_num_neurons))
        self.__mersenne_twister = MersenneTwister()
        self.__mersenne_twister.seed(int(1000*time.time()))
        self.__transfer_function = transfer_function

    def neuron_compute(self, input_matrix):
        """Computes the vectorneuron output for input_matrix"""
        self.__result = self.__weight_matrix * input_matrix
        current_value = None
        transfer_function = self.__transfer_function
        row_dim = self.__weight_matrix.shape[0]
        input_col_dim = input_matrix.shape[1]

        for i in range(0,row_dim):
            for j in range(0, input_col_dim):
                current_value = self.__result[i, j]
                self.__result[i, j]= self.__bias_vector[i, 0] + current_value
                cmd = "self.%s(current_value)" % transfer_function
                self.__result[i, j] = eval(cmd)

    def compute_delta_w(self, m, lr):
        """Computes new delta_w matrix"""
        k = 0
        delta_w = None
        delta_w_row_dim = self.__delta_w_matrix.shape[0]
        delta_w_col_dim = self.__delta_w_matrix.shape[1]

        for i in range(0, delta_w_row_dim):
            for j in range(0,delta_w_col_dim):
                k = abs(self.__mersenne_twister.randint(0,math.pow(2,32)) % m)
                if k == 0:
                    delta_w = lr
                elif k == 1:
                    delta_w = -1.0 * lr
                else:
                    delta_w = 0.0
                self.__delta_w_matrix[i, j] = delta_w

    def compute_delta_w_annealing(self, n, m, lr):
        """Computes new delta_w matrix (annealing style)"""
        k = 0
        delta_w = None
        delta_w_row_dim = self.__delta_w_matrix.shape[0]

        for i in range(0,delta_w_row_dim):
            delta_w_matrix_col = self.__delta_w_matrix.shape[1]
            for j in range(0, delta_w_matrix_col):
                k = abs(self.__mersenne_twister.randint(0,math.pow(2,32)) % m)
                if k < n:
                    if k % 2 == 0:
                        if (k == 0):
                            delta_w = lr
                        else:
                            delta_w = lr / k
                    elif k % 2 == 1:
                        delta_w = -1.0 * lr / k
                    else:
                        delta_w = 0.0
                else:
                    delta_w = 0.0
                self.__delta_w_matrix[i, j]  = delta_w

    def logsig(self, x):
        """Returns logsig of a single variable x"""
        return 1.0/(1.0 + exp(-1.0 * x))

    def purelin(self, x):
        """Returns purelin of a single variable x"""
        return x

    def tansig(self, x):
        """Returns tansig of a single variable x"""
        return 2.0/exp(1.0 + exp(-2.0 * x), -1.0)

    def linsig(self, x):
        """Returns linsig of a single variable x"""
        if x <= 1.0 and x >= -1.0:
            return x
        if x > 1:
            return 1.0
        else:
            return -1.0

    def change_weights(self):
        """Changes weight_matrix by adding delta_w_matrix"""
        #print 'weight_matrix orig'
        #print self.__weight_matrix
        self.__weight_matrix = self.__weight_matrix + self.__delta_w_matrix
        #print 'weight matrix new'
        #print self.__weight_matrix

    def rollback_weights(self):
        """Reset weight_matrix to weight_matrix_backup"""
        #print 'resetting weights'
        self.__weight_matrix = self.__weight_matrix_backup.copy()

    def weight_matrix_backup(self):
        """Copies the current weight_matrix to weight_matrix_backup"""
        self.__weight_matrix_backup = self.__weight_matrix.copy()

    def get_bias(self):
        """Returns the vectorneuron's bias vector"""
        return self.__bias_vector

    def get_delta_w(self):
        """Return the computed delta_w matrix used to alter the weights"""
        return self.__delta_w_matrix

    def get_result(self):
        """Returns the output of vectorneuron's neuron_compute function"""
        return self.__result

    def get_weight_matrix(self):
        """Returns the vectorneuron's current weight_matrix"""
        return self.__weight_matrix

    def get_weight_matrix_backup(self):
        """Returns a backup of the vectorneuron's previous weight_matrix"""
        return self.__weight_matrix_backup

    def get_transfer_function(self):
        """Returns the vectorneuron's transfer function"""
        return self.__transfer_function

    def write_weight_to_file(self, filename):
        """Write the vectorneuron's weight_matrix to filename """
        savetxt(filename, self.__weight_matrix)
        return True

    def write_bias_to_file(self, filename):
        """Write the vectorneuron's biias vector to filename"""
        savetxt(filename, self.__bias_vector)
        return True
Example #49
0
        class OrganicUserEventCounterModel(Model):
            """
            Organic Event Count Model (per a User).
            """

            def __init__(self, config):
                super(OrganicUserEventCounterModel, self).__init__(config)
                self.rng = RandomState(self.config.random_seed)


            def act(self, observation, features):
                features = features.flatten()
                if self.config.exploit_explore:
                    is_explore_case = self.rng.choice(
                        [True, False],
                        p=[self.config.epsilon, 1 - self.config.epsilon]
                    )
                    if is_explore_case:
                        mask = features == 0
                        features[mask] = 1
                        features[~mask] = 0
                    action_proba = features / np.sum(features)
                else:
                    features = self.config.epsilon + features # adding epsilon where we dont explore to force it at some point
                    action_proba = features / np.sum(features)

                    if self.config.reverse_pop:
                        action_proba = 1 - action_proba
                        action_proba = action_proba / np.sum(action_proba)

                if self.config.select_randomly:
                    action = self.rng.choice(self.config.num_products, p=action_proba)
                    #if self.config.exploit_explore:
                    #    ps = (
                    #            (
                    #                self.config.epsilon
                    #                if is_explore_case else
                    #                1 - self.config.epsilon
                    #            ) * action_proba[action]
                    #    )
                    #else:
                    ps = action_proba[action]
                    if self.config.with_ps_all:
                        ps_all = action_proba
                    else:
                        ps_all = ()
                else:
                    action = np.argmax(action_proba)
                    ps = 1.0
                    if self.config.with_ps_all:
                        ps_all = np.zeros(self.config.num_products)
                        ps_all[action] = 1.0
                    else:
                        ps_all = ()
                return {
                    **super().act(observation, features),
                    **{
                        'a': action,
                        'ps': ps,
                        'ps-a': ps_all,
                    },
                }
Example #50
0
class Sensor(Thread):
    
    def __init__(self, athlete, queue, ID, rate = 20,
                 noise = 0.3, verbose = False, seed = None):
        """
        Sensor class which gets position measurements from athlete, adds noise
        and collects them in a queue.
        
        :param athlete: object yielding position data when called
        :param queue: queue to which the measurements are added
        :param id: sensor ID
        :param rate (optional): sampling rate of sensor in Hz, default: 20
        :param noise (optional): standard deviation of noise on measurement in
        meter, default: 0.3
        :param verbose (optional): verbosity of sensor, default: False
        :param seed (optional): seed of noise generation, default: None 
        """
        super(Sensor, self).__init__()
        self.queue = queue
        self.ID = ID
        self.athlete = athlete
        self.rate = rate
        self.deltat = 1./self.rate
        self.noise = noise
        self.verbose = verbose
        self.rs = RandomState(seed)
        self.running = Event()
        
    def run(self):
        """
        Run sensor.
        """
        if self.verbose:
            print('Sensor %s started'%self.ID)
        # start time
        time = datetime.now()
        # number of measurements
        i = 0
        while not self.running.isSet():
            # get time of measurement
            t = datetime.now()
            # get data from athlete
            data = self.athlete(t)
            # add noise to position
            pos = data.pos + self.rs.randn(2) * self.noise
            # create MeasurementSpec instance containing ID, position,
            # and time of measurement
            measurement = MeasurementSpec(ID = self.ID, coords = pos,
                                          time = t)
            # add measurement to queue
            self.queue.put(measurement)
            # increment number of measurements
            i += 1
            # calculate time to wait to satisfy sampling rate
            timeout = i * self.deltat - (datetime.now()-time).total_seconds()
            self.running.wait(timeout)
        if self.verbose:
            print('Sensor %s stopped'%self.ID)

        
    def stop(self):
        """
        Stop sensor.
        """
        self.running.set()
Example #51
0
def compute_amplitude_prediction_correlations_voltage(pred_fn,
                                                      examples,
                                                      n_iterations,
                                                      perturb_fn=None,
                                                      batch_size=30,
                                                      seed=((2017, 7, 10))):
    """
    Changed function to calculate time-resolved voltage pertubations, and not frequency as original in compute_amplitude_prediction_correlations

    Perturb input amplitudes and compute correlation between amplitude
    perturbations and prediction changes when pushing perturbed input through
    the prediction function.
    For more details, see [EEGDeepLearning]_.
    Parameters
    ----------
    pred_fn: function
    Function accepting an numpy input and returning prediction.
    examples: ndarray
    Numpy examples, first axis should be example axis.
    n_iterations: int
    Number of iterations to compute.
    perturb_fn: function, optional
    Function accepting amplitude array and random generator and returning
    perturbation. Default is Gaussian perturbation.
    batch_size: int, optional
    Batch size for computing predictions.
    seed: int, optional
    Random generator seed
    Returns
    -------
    amplitude_pred_corrs: ndarray
    Correlations between amplitude perturbations and prediction changes
    for all sensors and frequency bins.
    References
    ----------
    .. [EEGDeepLearning] Schirrmeister, R. T., Springenberg, J. T., Fiederer, L. D. J.,
    Glasstetter, M., Eggensperger, K., Tangermann, M., ... & Ball, T. (2017).
    Deep learning with convolutional neural networks for EEG decoding and
    visualizations.
    arXiv preprint arXiv:1703.05051.
    """
    inds_per_batch = get_balanced_batches(n_trials=len(examples),
                                          rng=None,
                                          shuffle=False,
                                          batch_size=batch_size)
    log.info("Compute original predictions...")
    orig_preds = [
        pred_fn(examples[example_inds]) for example_inds in inds_per_batch
    ]
    orig_preds_arr = np.concatenate(orig_preds)
    rng = RandomState(seed)
    fft_input = np.fft.rfft(examples, axis=2)
    amps = np.abs(fft_input)
    phases = np.angle(fft_input)

    amp_pred_corrs = []
    for i_iteration in range(n_iterations):
        log.info("Iteration {:d}...".format(i_iteration))
        log.info("Sample perturbation...")
        #modified part start
        perturbation = rng.randn(*examples.shape)
        new_in = examples + perturbation
        #modified part end
        log.info("Compute new predictions...")
        new_in = new_in.astype('float32')
        new_preds = [
            pred_fn(new_in[example_inds]) for example_inds in inds_per_batch
        ]

        new_preds_arr = np.concatenate(new_preds)

        diff_preds = new_preds_arr - orig_preds_arr

        log.info("Compute correlation...")
        amp_pred_corr = wrap_reshape_apply_fn(corr,
                                              perturbation[:, :, :, 0],
                                              diff_preds,
                                              axis_a=(0, ),
                                              axis_b=(0))
        amp_pred_corrs.append(amp_pred_corr)
    return amp_pred_corrs
Example #52
0
 def _initialize_random_state(self):
     return RandomState(seed=0)  # fix the seed here so that the same halves are produced for score and ceiling
Example #53
0
    def __init__(self,
                 model_identifier,
                 model,
                 file_path,
                 vocab_size=None,
                 block_size=512,
                 max_features=4000):
        assert os.path.isfile(file_path), f"{file_path} is not a file"
        with open(file_path, encoding="utf-8") as f:
            text = f.read()

        # Tokens
        directory, filename = os.path.split(file_path)
        cached_tokens_file = os.path.join(
            directory, f'cached_lm_{model_identifier}_{block_size}_{filename}')
        if os.path.exists(cached_tokens_file) and os.getenv('NOSAVE',
                                                            '0') != '1':
            logger.info("Loading tokens from cached file %s",
                        cached_tokens_file)
            with open(cached_tokens_file, 'rb') as handle:
                self.examples = pickle.load(handle)
        else:
            logger.info("Creating tokens from dataset file %s", file_path)
            self.examples = []
            tokenized_text = model.tokenize(text, vocab_size=vocab_size)
            assert tokenized_text.max() < vocab_size
            # Truncate in block of block_size
            # Especially with the small block sizes we end up using together with the
            # "feeding in context one word increments at a time", this is not ideal because the model doesn't see a lot
            # of context. But it's going to be even more compute if we maximize the context per block.
            for i in tqdm(range(0,
                                len(tokenized_text) - block_size + 1,
                                block_size),
                          desc='truncate text into blocks'):
                self.examples.append(
                    model.tokens_to_inputs(tokenized_text[i:i + block_size]))
            # Note that we are loosing the last truncated example here for the sake of simplicity (no padding)
            # If your dataset is small, first you should loook for a bigger one :-) and second you
            # can change this behavior by adding (model specific) padding.
            if os.getenv('NOSAVE', '0') != '1':
                logger.info("Saving tokens into cached file %s",
                            cached_tokens_file)
                with open(cached_tokens_file, 'wb') as handle:
                    pickle.dump(self.examples,
                                handle,
                                protocol=pickle.HIGHEST_PROTOCOL)

        # Features
        cached_features_file = os.path.join(
            directory,
            f'cached_lm_features_{model_identifier}_{block_size}_{filename}')
        if os.path.exists(cached_features_file) and os.getenv('NOSAVE',
                                                              '0') != '1':
            logger.info("Loading features from cached file %s",
                        cached_features_file)
            with open(cached_features_file, 'rb') as handle:
                self.features = pickle.load(handle)
        else:
            self.features = []
            for block in tqdm(
                    self.examples,
                    desc="token blocks to features"):  # pass tokens to model
                block_features = model(block)
                self.features.append(block_features)
            self.features = np.array(self.features)
            if os.getenv('NOSAVE', '0') != '1':
                logger.info("Saving features into cached file %s",
                            cached_features_file)
                with open(cached_features_file, 'wb') as handle:
                    pickle.dump(self.features,
                                handle,
                                protocol=pickle.HIGHEST_PROTOCOL)
        assert len(self.examples) == len(self.features)

        # optional subsampling
        if self.features[0].shape[-1] > max_features:
            indices = np.arange(self.features[0].shape[-1])
            rnd = RandomState(0)
            indices = rnd.choice(indices, size=max_features, replace=False)
            self.subsample = lambda features: features[:, :, indices]
        else:
            self.subsample = lambda features: features