def _generate_data():
    '''
    Generate random input data (un_coded_bits) and subsequent output data 
    (coded_bits) of a rate (2,3) convolutional coder
    '''

    seq_length = 100000    
    prng = RandomState(314159265)
   
    un_coded_bits = prng.randint(2, size = seq_length)   
    initial_state = prng.randint(2, size = 6)
    
    coded_bits = _convolutional_coder(un_coded_bits, initial_state)
    
    return coded_bits, un_coded_bits
    def test_MACD_window_length_generation(self, seed):
        rng = RandomState(seed)

        signal_period = rng.randint(1, 90)
        fast_period = rng.randint(signal_period + 1, signal_period + 100)
        slow_period = rng.randint(fast_period + 1, fast_period + 100)
        ewma = MovingAverageConvergenceDivergenceSignal(
            fast_period=fast_period,
            slow_period=slow_period,
            signal_period=signal_period,
        )
        assert_equal(
            ewma.window_length,
            slow_period + signal_period - 1,
        )
Example #3
0
def align_converge(y_LR,size=64):
    """iterate until offsets converge"""
    (h,w) = y_LR.shape
    # split image
    y_L = y_LR[:,:w/2]
    y_R = y_LR[:,w/2:]
    (h,w) = y_L.shape
    s = size / 2
    # now find n offsets
    rand = RandomState(0)
    prev_dx, prev_dy = 0, 0
    series = []
    while True:
        # at a random locations in y_L
        y = rand.randint(h/4,h*3/4)
        x = rand.randint(w/4,w*3/4)
        it = y_L[y:y+s,x:x+s] # take an s x s chunk there
        tm = match_template(y_R,it) # match it against y_R
        ry, rx = maximum_position(tm) # max value is location
        series += [((y-ry), (x-rx))] # accumulatea
        print series
        n = len(series)
        if n % 2 == 0:
            # take the median
            dy, dx = np.median(np.asarray(series),axis=0).astype(int)
            if n > 100 or (abs(dy-prev_dy) == 0 and abs(dx-prev_dx) == 0):
                return dy, dx
            prev_dy, prev_dx = dy, dx
 def createGraph(self):
     """
         W -> Adjacency Matrix
         For test cases, this method is modified to generate random 
         weight matrix with zero weights for no edges.
         
            Graph is undirected with only positive weights 
            
         @prng: pseudo random number generator
     """
     '''
     _ = 0  # no edge
         # a  b  c  d
     W = [[_, 1, 3, 4],  # a
          [1, _, 2, 1],  # b
          [_, 1, _, 1],  # c
          [5, 1, 2, _]]  # d
     #print W
     '''
     prng = RandomState()  # alternative to random.seed
     w = prng.randint(0, 6, size=16)
     # inflating 1D array to 2D square matrix
     W = w.reshape(4, 4)
     #pprint(W)
     #print W[a][d]
     W = np.array(W) 
     W_symm = (W + W.T)/2  # making the matrix symmetric
     np.fill_diagonal(W_symm, 0)
     return W_symm
Example #5
0
def test_qtl_fast_binomial_scan():
    random = RandomState(9)

    N = 200
    G = random.randn(N, N + 100)
    G = stdnorm(G, 0)
    G /= sqrt(G.shape[1])

    p = 2
    X = random.randn(N, p)
    X = stdnorm(X, 0)
    X /= sqrt(X.shape[1])

    ntrials = random.randint(1, 50, N)
    nsuccesses = binomial(
        ntrials,
        -0.1,
        G,
        causal_variants=X,
        causal_variance=0.1,
        random_state=random)

    qtl = scan(BinomialPhenotype(nsuccesses, ntrials), X, G=G, progress=False,
               fast=True)

    assert_allclose(
        qtl.pvalues(), [
            0.698565827403, 0.443299805368
        ],
        rtol=1e-4)
Example #6
0
class SubSampled(object):

    def __init__(self, dataset, nb, random_state=2, mode='random', shuffle=True):
        self.dataset = dataset
        self.nb = nb
        self.rng = RandomState(random_state)
        self.mode = mode
        self.shuffle = shuffle
        self.next_batch_iter = None

    def load(self):
        self.dataset.load()
        if self.mode == 'random':
            indices = self.rng.randint(0, len(self.dataset.X), size=self.nb)
        elif self.mode == 'batch':
            try:
                indices = next(self.next_batch_iter)
            except Exception:
                self.next_batch_iter = iterate_minibatches(self.dataset.X.shape[0], self.nb, shuffle=self.shuffle)
                indices = next(self.next_batch_iter)
        self.X = self.dataset.X[indices]
        if hasattr(self.dataset, "y"):
            self.y = [self.dataset.y[ind] for ind in indices]
        if hasattr(self.dataset, "img_dim"):
            self.img_dim = self.dataset.img_dim
        if hasattr(self.dataset, "output_dim"):
            self.output_dim = self.dataset.output_dim
        if hasattr(self.dataset, "y_raw"):
            self.y_raw = [self.dataset.y_raw[ind] for ind in indices]
Example #7
0
class RandomGenerator(object):
    def __init__(self, seed=None):
        self._random = RandomState(seed=seed)

    def random(self):
        return self._random.rand()

    def randint(self, a, b=None):
        if b is None:
            b = a
            a = 0
        r = self._random.randint(a, high=b, size=1)
        return r[0]

    def sample(self, population, k):
        if k == 0:
            return []
        return self._random.choice(population, size=k, replace=False)

    def __getattr__(self, attr):
        return getattr(self._random, attr)

    def __getstate__(self):
        return {'_random': self._random}

    def __setstate__(self, d):
        self._random = d['_random']
Example #8
0
def transpose_characters(token, index_to_char, n=1, char_pool=None, seed=17):
    if isinstance(seed, RandomState):
        rng = seed
    else:
        rng = RandomState(seed)

    chars = set(token)
    if len(chars) == 1:
        return token

    new_token = token
    for i in six.moves.range(n):
        idx = max(1, rng.randint(len(new_token)))
        neighbor = 0
        if idx == 0:
            neighbor == 1
        elif idx == len(new_token) - 1:
            neighbor = len(new_token) - 2
        else:
            if rng.uniform() > 0.5:
                neighbor = idx + 1
            else:
                neighbor = idx - 1
        left = min(idx, neighbor) 
        right = max(idx, neighbor)
        new_token = unicode(new_token[0:left] + new_token[right] + new_token[left] + new_token[right+1:])
    return new_token
Example #9
0
def test_get_random_state():
    prng1 = RandomState(42)
    prng2 = get_prng(42)
    prng3 = get_prng(prng1)
    prng4 = get_prng(prng2)
    prng5 = get_prng()
    prng6 = get_prng(None)
    prng7 = get_prng(np.random)
    assert(isinstance(prng1, RandomState))
    assert(isinstance(prng3, RandomState))
    assert(isinstance(prng5, RandomState))
    assert(isinstance(prng6, RandomState))
    assert(isinstance(prng7, RandomState))
    x1 = prng1.randint(5, size=10)
    x2 = prng2.randint(5, size=10)
    x3 = prng3.randint(5, size=10)
    x4 = prng4.randint(5, size=10)
    x5 = prng5.randint(5, size=10)
    x6 = prng6.randint(5, size=10)
    x7 = prng7.randint(5, size=10)
    assert_equal(x1, x2)
    assert_equal(x3, x4)
    assert_equal(len(x5), 10)
    assert_equal(len(x6), 10)
    assert_equal(len(x7), 10)
Example #10
0
def make_ratings(n_users, n_items, min_rating_per_user, max_rating_per_user,
                 rating_choices, seed=None, shuffle=True):
    """Randomly generate a (user_id, item_id, rating) array

    Return
    ------
        ndarray with shape (n_samples, 3)

    """
    if not (isinstance(rating_choices, list) or
            isinstance(rating_choices, tuple)):
        raise ValueError("'rating_choices' must be a list or tuple")
    if min_rating_per_user < 0 or min_rating_per_user >= n_items:
        raise ValueError("invalid 'min_rating_per_user' invalid")
    if (min_rating_per_user > max_rating_per_user) or \
       (max_rating_per_user >= n_items):
        raise ValueError("invalid 'max_rating_per_user' invalid")

    rs = RandomState(seed=seed)
    user_arrs = []
    for user_id in xrange(n_users):
        item_count = rs.randint(min_rating_per_user, max_rating_per_user)
        item_ids = rs.choice(n_items, item_count, replace=False)
        ratings = rs.choice(rating_choices, item_count)
        arr = np.stack(
            [np.repeat(user_id, item_count), item_ids, ratings], axis=1)
        user_arrs.append(arr)

    ratings = np.array(np.vstack(user_arrs))
    ratings[:, 2] = ratings[:, 2].astype('float')
    if shuffle:
        rs.shuffle(ratings)
    return ratings
Example #11
0
def test_qtl_binomial_scan_covariate_redundance():
    random = RandomState(9)

    N = 200
    G = random.randn(N, N + 100)
    G = stdnorm(G, 0)
    G /= sqrt(G.shape[1])

    p = 2
    X = random.randn(N, p)
    X = stdnorm(X, 0)
    X /= sqrt(X.shape[1])

    ntrials = random.randint(1, 50, N)
    nsuccesses = binomial(
        ntrials,
        -0.1,
        G,
        causal_variants=X,
        causal_variance=0.1,
        random_state=random)

    X[:] = 1
    qtl = scan(BinomialPhenotype(nsuccesses, ntrials), X, G=G, progress=False,
               fast=False)
    assert_allclose(qtl.pvalues(), [1] * p, rtol=1e-4)
Example #12
0
def randShots(seed):

    prng = RandomState(seed)
    treat = prng.randint(0, 3)
    if treat == 1:
        return "1 Shot"
    else:
        return str(treat) + " Shots"
Example #13
0
def randShots(seed):

	prng = RandomState(seed)
	treat = prng.randint(0, 2)
	if treat == 1:
		return('1 Shot')
	else:
		return(str(treat) + ' Shots')
Example #14
0
def test_corrcoef():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = corrcoef(x, y, group)
    res2 = corrcoef(x, y, group)
    np.testing.assert_equal(res1, res2)
Example #15
0
def test_sim_corr():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = sim_corr(x, y, group, seed=prng)
    res2 = sim_corr(x, y, group)
    np.testing.assert_equal(res1[0], res2[0])
Example #16
0
File: sim.py Project: matthagy/pbd
def initialize_random(cexinf, random_seed):
    if random_seed is None:
        random_seed = generate_seed()
    random_seed = int(random_seed)
    msg('initializing random state with seed=0x%X', random_seed)
    rnd = RandomState(random_seed)
    cexinf.map_all_async(make_writing_message('initialize_random', 'u', seed)
                         for seed in rnd.randint(0xfffffff, size=cexinf.get_size())
                         ).read_frmt('x')
Example #17
0
def test_corr():
    prng = RandomState(42)
    x = prng.randint(5, size=10)
    y = x
    res1 = corr(x, y, prng=prng)
    res2 = corr(x, y)
    np.testing.assert_equal(len(res1), 5)
    np.testing.assert_equal(len(res2), 5)
    np.testing.assert_equal(res1[0], res2[0])
    np.testing.assert_equal(res1[1], res2[1])
    #np.testing.assert_equal(res1[2], res2[2])
    #np.testing.assert_equal(res1[3], res2[3])

    y = prng.randint(5, size=10)
    res1 = corr(x, y, prng=prng)
    res2 = corr(x, y)
    np.testing.assert_equal(len(res1), 5)
    np.testing.assert_equal(len(res2), 5)
    np.testing.assert_equal(res1[0], res2[0])
Example #18
0
def test_permute():
    prng = RandomState(42)

    x = prng.randint(10, size=20)
    permute(x, prng)
    expected = np.array([3, 2, 7, 9, 6, 5, 1, 6, 4, 2,
                         7, 7, 7, 4, 4, 3, 1, 5, 7, 6])
    np.testing.assert_array_equal(x, expected)

    permute(x)
    np.testing.assert_equal(x.max(), 9)
    np.testing.assert_equal(x.min(), 1)
Example #19
0
def sample_half_counts(evt0, seed=0):
    evt = sdict(b = evt0.b, d = evt0.d, ims = evt0.ims, ths = evt0.ths,
                bkgr = evt0.bkgr/2)
    N = len(evt0.xc)
    rnd = RandomState(seed)
    m = rnd.randint(low=0, high=N, size=rnd.binomial(N,0.5))

    evt.xc = evt0.xc[m]
    evt.yc = evt0.yc[m]
    evt.w = evt0.w[m]

    return evt
Example #20
0
def chromosome_init(N, h_dom=16, w_dom=16, h=100, w=100):
    x_dom = RandomState() #[0-(w-16-1)]
    y_dom = RandomState() #[0-(h-16-1)]
    flip =  RandomState()  #[0-7]

    chrom_pool = (ctypes.c_char_p * N)()
       
    for i in xrange(N):
        chromosome = ctypes.create_string_buffer(2*9+3)          # 9 bits x_dom, 9 bits y_dom and 3 bits flip
        chromosome.value = '{0:09b}'.format(x_dom.randint(0,w-w_dom))+'{0:09b}'.format(y_dom.randint(0,h-h_dom))+'{0:03b}'.format(flip.randint(0,8))
        chrom_pool[i] = ctypes.string_at(chromosome,21)
    return chrom_pool
Example #21
0
def test_permute_rows():
    prng = RandomState(42)

    x = prng.randint(10, size=20).reshape(2, 10)
    permute_rows(x, prng)
    expected = np.array([[2, 7, 7, 6, 4, 9, 3, 4, 6, 6],
                         [7, 4, 5, 5, 3, 7, 1, 2, 7, 1]])
    np.testing.assert_array_equal(x, expected)

    permute_rows(x)
    np.testing.assert_equal(x.max(), 9)
    np.testing.assert_equal(x.min(), 1)
Example #22
0
def replace_characters(token, index_to_char, n=1, char_pool=string.ascii_lowercase, seed=17):
    if isinstance(seed, RandomState):
        rng = seed
    else:
        rng = RandomState(seed)

    new_token = token
    for i in six.moves.range(n):
        idx = max(1, rng.randint(len(new_token)))
        #ch = index_to_char[rng.randint(len(index_to_char))]
        ch = rng.choice(list(char_pool))
        new_token = unicode(new_token[0:idx-1] + ch + new_token[idx:])
    return new_token
Example #23
0
def pseudorandom(sequence, seed=None):
    '''
    Returns a randomly selected element from the sequence.
    '''
    # We need to create a stand-alone generator that cannot be affected by other
    # parts of the code that may require random data (e.g. noise).
    from numpy.random import RandomState
    state = RandomState()
    state.seed(seed)
    n = len(sequence)
    while True:
        i = state.randint(0, n)
        yield sequence[i]
Example #24
0
def test_naive_ova_asgd_wrong_labels():

    rstate = RandomState(42)

    n_classes = 10

    Xtrn, ytrn = get_fake_multiclass_data(N_POINTS, N_FEATURES, n_classes,
                                          rstate)

    clf = OVAASGD(*((n_classes,) + DEFAULT_ARGS),
                  rstate=RandomState(999), **DEFAULT_KWARGS)
    ytrn_bad = rstate.randint(n_classes + 42, size=len(ytrn))
    clf.partial_fit(Xtrn, ytrn_bad)
Example #25
0
def test_sim_corr():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = sim_corr(x, y, group, seed=prng, reps=100)
    res2 = sim_corr(x, y, group, seed=prng, alternative='less', reps=100)
    res3 = sim_corr(x, y, group, seed=prng, alternative='two-sided', reps=100)
    
    assert_almost_equal(res1[0], 1-res2[0])
    assert_equal(res1[1], res2[1])
    assert_equal(res1[1], res3[1])
    assert_equal(res1[0], res3[0])
Example #26
0
def delete_characters(token, index_to_char, n=1, char_pool=None, seed=17):
    if isinstance(seed, RandomState):
        rng = seed
    else:
        rng = RandomState(seed)

    new_token = token
    if n > len(new_token):
        n = len(new_token) - 1
    for i in six.moves.range(n):
        try:
            idx = max(1, rng.randint(len(new_token)))
            new_token = unicode(new_token[0:idx-1] + new_token[idx:])
        except ValueError, e:
            print('new_token', new_token, len(new_token))
            raise e
Example #27
0
def setupSeed(hoursBetweenTimestepInROMSFiles,startTime,endTime,startSpawningTime,endSpawningTime,releaseParticles):
    ##################################################
    # Create seed variation as function of day
    ##################################################

    # Make datetime array from start to end at 3 hour interval
    #interval = timedelta(hours=hoursBetweenTimestepInROMSFiles)
    difference=endTime-startTime
    hoursOfSimulation=divmod(difference.total_seconds(), 3600)
     
    difference=endSpawningTime-startSpawningTime
    hoursOfSpawning=divmod(difference.total_seconds(), 3600)
     
    #startSimulationJD=startTime.timetuple().tm_yday
    #endSimulationJD=endTime.timetuple().tm_yday
    timeStepsSimulation=int(int(hoursOfSimulation[0])/hoursBetweenTimestepInROMSFiles)
	
    #startSpawningJD=startSpawningTime.timetuple().tm_yday
    #endSpawningJD=endSpawningTime.timetuple().tm_yday
    #timeStepsSpawning=int(int(hoursOfSpawning[0])/hoursBetweenTimestepInROMSFiles)
	
    print "\nKINO TIME EVOLUTION:"
    print "=>SIMULATION: Drift simulation will run for %s simulation hours" %(timeStepsSimulation)
    print "=>SPAWNING: Simulated spawning will run for %s simulation hours\n initiated on %s and ending on %s"%(timeStepsSimulation,startSpawningTime,endSpawningTime)

    interval = timedelta(hours=24)
    hoursPerSpawning=divmod(interval.total_seconds(), 3600) #hours per spawning event
    timeStepsSpawning=int(int(hoursOfSpawning[0])/int(hoursPerSpawning[0])) #number of spawning timesteps
    spawningTimes = [startSpawningTime + interval*n for n in range(timeStepsSpawning)] #times of spawning

    # Normal distribution around 0.5
    mu, sigma = 0.5, 0.1 # mean and standard deviation

    prng = RandomState()
    scale = prng.randint(1, 5, size=1)

    prng = RandomState()
    s = prng.normal(mu, sigma, len(spawningTimes))
    num=(s*releaseParticles).astype(int)
    num=np.sort(num) #sort particles in increasing order
    num=np.concatenate((num[len(num)%2::2],num[::-2]),axis=0) #release the highest number of particles at the midpoint of the spawning period

    print "SPAWNING: Simulated spawning will release %s eggs"%(np.sum(num))

    return num, spawningTimes
Example #28
0
def test_permute_incidence_fixed_sums():
    prng = RandomState(42)
    x0 = prng.randint(2, size=80).reshape((8, 10))
    x1 = permute_incidence_fixed_sums(x0)

    K = 5

    m = []
    for i in range(1000):
        x2 = permute_incidence_fixed_sums(x0, k=K)
        m.append(np.sum(x0 != x2))

    np.testing.assert_(max(m) <= K * 4,
                       "Too many swaps occurred")

    for axis in (0, 1):
        for test_arr in (x1, x2):
            np.testing.assert_array_equal(x0.sum(axis=axis),
                                          test_arr.sum(axis=axis))
Example #29
0
def test_precision():

    rng_reg = RandomState(2)
    rng_clf = RandomState(8)
    for X, y, clf in zip(
            (rng_reg.random_sample((5, 2)),
             rng_clf.random_sample((1000, 4))),
            (rng_reg.random_sample((5, )),
             rng_clf.randint(2, size=(1000, ))),
            (DecisionTreeRegressor(criterion="friedman_mse", random_state=0,
                                   max_depth=1),
             DecisionTreeClassifier(max_depth=1, random_state=0))):

        clf.fit(X, y)
        for precision in (4, 3):
            dot_data = export_graphviz(clf, out_file=None, precision=precision,
                                       proportion=True)

            # With the current random state, the impurity and the threshold
            # will have the number of precision set in the export_graphviz
            # function. We will check the number of precision with a strict
            # equality. The value reported will have only 2 precision and
            # therefore, only a less equal comparison will be done.

            # check value
            for finding in finditer(r"value = \d+\.\d+", dot_data):
                assert_less_equal(
                    len(search(r"\.\d+", finding.group()).group()),
                    precision + 1)
            # check impurity
            if is_classifier(clf):
                pattern = r"gini = \d+\.\d+"
            else:
                pattern = r"friedman_mse = \d+\.\d+"

            # check impurity
            for finding in finditer(pattern, dot_data):
                assert_equal(len(search(r"\.\d+", finding.group()).group()),
                             precision + 1)
            # check threshold
            for finding in finditer(r"<= \d+\.\d+", dot_data):
                assert_equal(len(search(r"\.\d+", finding.group()).group()),
                             precision + 1)
Example #30
0
def align(y_LR,size=64,n=12):
    (h,w) = y_LR.shape
    # split image
    y_L = y_LR[:,:w/2]
    y_R = y_LR[:,w/2:]
    (h,w) = y_L.shape
    s = size / 2
    # now find n offsets
    R = np.zeros((n,2))
    rand = RandomState(0)
    for i in range(n): # to find each offset
        # at a random locations in y_L
        y = rand.randint(h/4,h*3/4)
        x = rand.randint(w/4,w*3/4)
        it = y_L[y:y+s,x:x+s] # take an s x s chunk there
        tm = match_template(y_R,it) # match it against y_R
        ry, rx = maximum_position(tm) # max value is location
        R[i,:] = ((y-ry), (x-rx)) # accumulatea
    # take the median
    dy, dx = np.median(R,axis=0).astype(int)
    return dy, dx
Example #31
0
def test_he_otp_lr_ft1():
    federal_info = fed_conf_host

    sec_param = {"he_algo": 'paillier', "he_key_length": 1024}

    prng = RandomState(0)
    guest_theta = prng.uniform(-1, 1, (6, ))
    guest_features = prng.uniform(-1, 1, (32, 6))
    guest_labels = prng.randint(0, 2, (32, ))

    host_theta = prng.uniform(-1, 1, (6, ))
    host_features = prng.uniform(-1, 1, (32, 6))

    def calu_grad(host_theta, host_features, guest_theta, guest_features,
                  guest_labels):
        u2 = host_theta.dot(host_features.T)
        u1 = guest_theta.dot(guest_features.T)
        u = u1 + u2
        h_x = 1 / (1 + np.exp(-u))
        diff_y = guest_labels - h_x

        batch_size = host_features.shape[0]
        grads = (-1 / batch_size) * (diff_y.dot(host_features))

        return grads

    trainer = make_protocol(HE_OTP_LR_FT1,
                            federal_info,
                            sec_param,
                            algo_param=None)

    # 联邦计算结果
    fed_grads = trainer.exchange(host_theta, host_features)

    # 本地计算结果
    local_grads = calu_grad(host_theta, host_features, guest_theta,
                            guest_features, guest_labels)

    assert almost_equal(fed_grads, local_grads)
Example #32
0
def test_ggp_expfam_tobi():
    random = RandomState(2)

    n = 30

    ntrials = random.randint(30, size=n)
    K = random.randn(n, n)
    K = matmul(K, K.T)

    lik = BinomialProdLik(ntrials=ntrials, link=LogitLink())

    mean = OffsetMean(n)

    cov2 = EyeCov(n)

    y = GGPSampler(lik, mean, cov2).sample(random)

    ggp = ExpFamGP(y, ("binomial", ntrials), mean, cov2)
    assert_allclose(ggp.lml(), -67.84095700542488)

    ggp.fit(verbose=False)
    assert_allclose(ggp.lml(), -64.26701904994792)
Example #33
0
def add_cspy_edge_attributes(G, seed=None):
    """
    Set edge attributes required for cspy
    """
    if seed is None:
        random_state = RandomState()
    elif isinstance(seed, int):
        random_state = RandomState(seed)
    elif isinstance(seed, RandomState):
        random_state = seed
    else:
        raise Exception(
            '{} cannot be used to seed numpy.random.RandomState'.format(seed))
    # Initialise edge attributes
    set_edge_attributes(G, 0, 'weight')
    set_edge_attributes(G, 0, 'res_cost')
    # Iterate through edges to specify 'weight' and 'res_cost' attributes
    for edge in G.edges(data=True):
        # Distance is converted from an already existing edge attribute (m to km)
        dist = edge[2]['length'] * 0.001
        # Fixed resource costs for a given edge.
        # 'sights' is a random integer between [0, 5)
        res_cost_sights = random_state.randint(1, 5)
        # 'travel-time' is distance over speed (not necessary)
        res_cost_travel_time = dist / float(WALKING_SPEED)
        # 'delivery time' is a random number between the travel-time for
        # the edge and 10 times the travel time.
        # in reality this would depend on the buildings present
        res_cost_delivery_time = random_state.uniform(
            res_cost_travel_time, 10 * res_cost_travel_time)
        # 'shift' is not required.
        res_cost_shift = 0

        edge[2]['res_cost'] = array([
            0, res_cost_sights, res_cost_shift, res_cost_travel_time,
            res_cost_delivery_time
        ])
        edge[2]['weight'] = 0  #-dist
    return G
Example #34
0
class AxelrodTraitFactory(object):
    """
    In the original Axelrod model, agents have F loci and T possible traits per locus.
    Individuals are initialized with a list of F random integers, each chosen from 0 to T-1.
    The result is given as a Python list, and stored as the individual's initial trait set.


    This factory is dynamically loaded from its fully qualified name in a configuration file,
     and passed the simulation configuration object in its constructor.  The instantiating
     code then calls initialize_population(graph), passing it a NetworkX graph of nodes, previously
     constructed
    """

    def __init__(self, simconfig):
        self.simconfig = simconfig
        self.prng = RandomState()  # allow the library to choose a seed via OS specific mechanism

    def initialize_population(self,graph):
        nf = self.simconfig.num_features
        nt = self.simconfig.num_traits
        for nodename in graph.nodes():
            graph.node[nodename]['traits'] = self.prng.randint(0, nt, size=nf)
Example #35
0
class Dataset(udata.Dataset):
    def __init__(self, name, patchsize):
        super().__init__()
        self.dataset = name
        self.patch_size = patchsize
        self.mat_files = open(self.dataset, 'r').readlines()
        self.file_num = len(self.mat_files)
        self.rand_state = RandomState(66)

    def __len__(self):
        return self.file_num * 100

    def __getitem__(self, idx):
        file_name = self.mat_files[idx % self.file_num]
        gt_file = file_name.split(' ')[1][:-1]
        img_file = file_name.split(' ')[0]
        O = cv2.imread(self.args.dir_data + img_file)
        b, g, r = cv2.split(O)
        input_img = cv2.merge([r, g, b])
        B = cv2.imread(self.args.dir_data + gt_file)
        b, g, r = cv2.split(B)
        gt = cv2.merge([r, g, b])
        im_pair = np.hstack((gt, input_img))
        O, B = self.crop(im_pair, self.patch_size)
        O, B = O.astype(np.float32), B.astype(np.float32)
        O = np.transpose(O, (2, 0, 1))
        B = np.transpose(B, (2, 0, 1))
        sample = {'O': O, 'B': B}
        return sample

    def crop(self, img_pair, patchsize):
        h, ww, c = img_pair.shape
        w = int(ww / 2)
        p_h, p_w = patchsize, patchsize
        r = self.rand_state.randint(0, h - p_h)
        c = self.rand_state.randint(0, w - p_w)
        O = img_pair[r:r + p_h, c + w:c + p_w + w]
        B = img_pair[r:r + p_h, c:c + p_w]
        return O, B
Example #36
0
def test_glmmexpfam_poisson():
    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
Example #37
0
def make_ratings(n_users,
                 n_items,
                 min_rating_per_user,
                 max_rating_per_user,
                 rating_choices,
                 seed=None,
                 shuffle=True):
    """Randomly generate a (user_id, item_id, rating) array

    Return
    ------
        ndarray with shape (n_samples, 3)

    """
    if not (isinstance(rating_choices, list)
            or isinstance(rating_choices, tuple)):
        raise ValueError("'rating_choices' must be a list or tuple")
    if min_rating_per_user < 0 or min_rating_per_user >= n_items:
        raise ValueError("invalid 'min_rating_per_user' invalid")
    if (min_rating_per_user > max_rating_per_user) or \
       (max_rating_per_user >= n_items):
        raise ValueError("invalid 'max_rating_per_user' invalid")

    rs = RandomState(seed=seed)
    user_arrs = []
    for user_id in xrange(n_users):
        item_count = rs.randint(min_rating_per_user, max_rating_per_user)
        item_ids = rs.choice(n_items, item_count, replace=False)
        ratings = rs.choice(rating_choices, item_count)
        arr = np.stack([np.repeat(user_id, item_count), item_ids, ratings],
                       axis=1)
        user_arrs.append(arr)

    ratings = np.array(np.vstack(user_arrs))
    ratings[:, 2] = ratings[:, 2].astype('float')
    if shuffle:
        rs.shuffle(ratings)
    return ratings
def test_stochastic_environment_model():

    random_state = RandomState(12345)

    model = StochasticEnvironmentModel()

    actions = [
        Action(i)
        for i in range(5)
    ]

    states = [
        State(i, actions)
        for i in range(5)
    ]

    for t in range(1000):
        state = sample_list_item(states, None, random_state)
        action = sample_list_item(state.AA, None, random_state)
        next_state = sample_list_item(states, None, random_state)
        reward = Reward(None, random_state.randint(10))
        model.update(state, action, next_state, reward)

    environment_sequence = []
    for i in range(1000):
        state = model.sample_state(random_state)
        action = model.sample_action(state, random_state)
        next_state, reward = model.sample_next_state_and_reward(state, action, random_state)
        environment_sequence.append((next_state, reward))

    # uncomment the following line and run test to update fixture
    # with open(f'{os.path.dirname(__file__)}/fixtures/test_stochastic_environment_model.pickle', 'wb') as file:
    #     pickle.dump(environment_sequence, file)

    with open(f'{os.path.dirname(__file__)}/fixtures/test_stochastic_environment_model.pickle', 'rb') as file:
        environment_sequence_fixture = pickle.load(file)

    assert environment_sequence == environment_sequence_fixture
Example #39
0
    def spsa_step_one(self, lambdas, spsa_params, count):
        """Evaluate +/- perturbations of kernel parameters (lambdas).

        Args:
            lambdas (numpy.ndarray): kernel parameters at step 'count' in SPSA optimization loop
            spsa_params (numpy.ndarray): SPSA parameters
            count (int): the current step in the SPSA optimization loop

        Returns:
            numpy.ndarray: kernel parameters in + direction
            numpy.ndarray: kernel parameters in - direction
            numpy.ndarray: random vector with elements {-1,1}
        """

        prng = RandomState(count)

        c_spsa = float(spsa_params[1]) / np.power(count + 1, spsa_params[3])
        delta = 2 * prng.randint(0, 2, size=np.shape(lambdas)[0]) - 1

        lambda_plus = lambdas + c_spsa * delta
        lambda_minus = lambdas - c_spsa * delta

        return lambda_plus, lambda_minus, delta
Example #40
0
 def test_classification_kmeans_relevance(self):
     state = RandomState(seed=0)
     Xs = []
     Ys = []
     n = 20
     for i in range(0, 5):
         for j in range(0, 4):
             x1 = state.rand(n) + i * 1.1
             x2 = state.rand(n) + j * 1.1
             Xs.append(numpy.vstack([x1, x2]).T)
             cl = state.randint(0, 4)
             Ys.extend([cl for i in range(n)])
     X = numpy.vstack(Xs)
     Y = numpy.array(Ys)
     clk = ClassifierAfterKMeans(c_n_clusters=6, c_random_state=state)
     try:
         clk.fit(X, Y)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     score = clk.score(X, Y)
     self.assertGreater(score, 0.95)
Example #41
0
class TestStickExpectation(unittest.TestCase):
    """Test stick_expectation"""
    def setUp(self):
        self.rand = RandomState(0)
        self.num_stick = 100
        self.uniform_stick = np.array([
            np.ones(self.num_stick - 1),
            np.arange(self.num_stick - 1, 0, -1)
        ])

    def test_stick_expectation_shape(self):
        num_stick = self.rand.randint(100, 200)
        var_sticks = np.ones((2, num_stick - 1))
        expectation_stick = stick_expectation(var_sticks)
        assert_equal(expectation_stick.shape, (num_stick, ))
        assert_almost_equal(np.sum(expectation_stick), 1.0)

    def test_uniform_stick_expectation(self):
        expectation_stick = stick_expectation(self.uniform_stick)
        shape = expectation_stick.shape
        all_equal_stick = np.ones(shape) * expectation_stick[0]
        assert_almost_equal(expectation_stick, all_equal_stick)
        assert_almost_equal(np.sum(expectation_stick), 1.0)
Example #42
0
    def seeds(self, value: Union[int, Mapping[str, int]]) -> None:
        keys = ['map', 'objects', 'quest', 'surface']

        def _key_missing(seeds):
            return not set(seeds.keys()).issuperset(keys)

        seeds = value
        if type(value) is int:
            rng = RandomState(value)
            seeds = {}
        elif _key_missing(value):
            rng = g_rng.next()

        # Check if we need to generate missing seeds.
        self._seeds = {}
        for key in keys:
            if key in seeds:
                self._seeds[key] = seeds[key]
            else:
                self._seeds[key] = rng.randint(65635)

        self.quest_gen_options.quest_rng = self.rngs['quest']
        self.surface_gen_options.seed = self._seeds['surface']
Example #43
0
def test_binomial_optimize_refit():
    random = RandomState(139)
    nsamples = 30
    nfeatures = 31

    G = random.randn(nsamples, nfeatures) / sqrt(nfeatures)

    u = random.randn(nfeatures)

    z = 0.1 + 2 * dot(G, u) + random.randn(nsamples)

    ntrials = random.randint(10, 500, size=nsamples)

    y = zeros(nsamples)
    for i in range(len(ntrials)):
        y[i] = sum(
            z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0)
    (Q, S0) = economic_qs_linear(G)

    M = ones((nsamples, 1))
    lik = BinomialProdLik(ntrials, LogitLink())
    lik.nsuccesses = y
    ep = ExpFamEP(lik, M, Q[0], Q[1], S0)
    ep.learn(progress=False)

    assert_allclose(ep.lml(), -144.2381842202486, rtol=1e-3)

    nep = ep.copy()

    assert_allclose(ep.lml(), -144.2381842202486, rtol=1e-3)
    assert_allclose(nep.lml(), -144.2381842202486, rtol=1e-3)

    nep.M = c_[M, random.randn(nsamples)]

    assert_allclose(nep.lml(), -145.7076758124364, rtol=1e-3)
    nep.learn(progress=False)
    assert_allclose(nep.lml(), -143.98475638974728, rtol=1e-3)
    def generator(self):
        """

        :return:
        """
        triples_train_idx = self._data.triples_train_idx if self._data.triples_train_subset_idx is None else self._data.triples_train_subset_idx
        for (idx, (head, relation, gold_tail)) in enumerate(triples_train_idx):
            # Hint: We use 'np.RandomState' as 'np.random.randint' is not thread-safe
            cou_inter = idx % 4096
            if cou_inter == 0:
                np_random = RandomState(idx)
                tail_idx_negatives = np_random.randint(self._gp.num_vertices, size=(4096, self._model_params.num_negative))

            def generate_negative_samples(head, gold_tail, tail_idx_negatives):
                replacement = int((head + gold_tail) / 2)
                neg_tail_replacement = replacement + 1 if replacement < (
                            self._gp.num_vertices - 1) else replacement

                tail_idx_negatives = np.where(tail_idx_negatives == gold_tail, neg_tail_replacement,
                                              tail_idx_negatives)
                tail_idx_negatives = np.insert(tail_idx_negatives, 0, gold_tail)
                return tail_idx_negatives

            pos_neg_tails = generate_negative_samples(head, gold_tail, tail_idx_negatives[cou_inter])
            mask = np.array([1.] + [0.] * (len(pos_neg_tails)-1), dtype=self.np_precision)

            device = '/gpu:0'
            if self._deterministic is True:
                device = '/cpu:0'
            with tf.device(device):
                a = tf.constant(np.array([head]))
                b = tf.constant(np.array([relation]))
                c = tf.constant(np.array(pos_neg_tails))
                d = tf.constant(np.array([gold_tail]))
                e = tf.constant(np.array([mask]))

            yield (a, b), (c, d, e)
Example #45
0
def test_binomial_get_normal_likelihood_trick():
    random = RandomState(139)
    nsamples = 30
    nfeatures = 31

    G = random.randn(nsamples, nfeatures) / sqrt(nfeatures)

    u = random.randn(nfeatures)

    z = 0.1 + 2 * dot(G, u) + random.randn(nsamples)

    ntrials = random.randint(10, 500, size=nsamples)

    y = zeros(nsamples)
    for i in range(len(ntrials)):
        y[i] = sum(
            z[i] + random.logistic(scale=pi / sqrt(3), size=ntrials[i]) > 0)
    (Q, S0) = economic_qs_linear(G)

    M = ones((nsamples, 1))
    lik = BinomialProdLik(ntrials, LogitLink())
    lik.nsuccesses = y
    ep = ExpFamEP(lik, M, Q[0], Q[1], S0)
    ep.learn(progress=False)

    nlt = ep.get_normal_likelihood_trick()
    assert_allclose(nlt.fast_scan(G)[0], [
        -143.48903288, -144.32031587, -144.03889888, -144.31806561,
        -143.90248659, -144.303103, -144.47854112, -144.44469341, -144.285027,
        -144.31240175, -143.11590263, -142.81623878, -141.67554141,
        -144.4780024, -144.47780285, -144.10317082, -142.10043322,
        -143.0813298, -143.99841663, -143.345783, -144.45458683, -144.37877612,
        -142.56846859, -144.32923028, -144.44116855, -144.45082936,
        -144.40932741, -143.0212886, -144.47902176, -143.94188634,
        -143.72765373
    ],
                    rtol=1e-5)
Example #46
0
def generateDegradation(args, seed):
    from numpy.random import RandomState
    from numpy.linalg import norm

    rs = RandomState(seed)

    if args.D == 2:
        rotation = (rs.uniform(*args.rotate),)
    if args.D == 3:
        angle = rs.uniform(*args.rotate)
        axis = rs.uniform(size=3)
        axis = axis/norm(axis)
        rotation = angle, axis
    translation = rs.uniform(*args.translate, size=args.D)
    scale = rs.uniform(*args.scale)
    if args.drop[0] == args.drop[1]:
        ndrops = args.drop[0]
    else:
        ndrops = rs.randint(*sorted(args.drop))
    drops = rs.choice(range(args.N), size=ndrops, replace=False)
    duplications = rs.choice(range(args.duplicate[0], args.duplicate[1] + 1), size=args.N - ndrops)
    noise = rs.uniform(*args.noise) * rs.randn(sum(duplications), args.D)

    return rotation, translation, scale, drops, duplications, noise
Example #47
0
def get_data_index2(data):
    # 获取所有的药物列表
    drug_code_list = get_drugs_code()

    # 获取药物及SMILES
    drug_smiles_dict = get_drug_json()

    data_dict = batch_dict(data)

    result_dict = dict()

    for name, smiles in data_dict.items():
        result_dict[name] = None
        for code, smiles2 in drug_smiles_dict.items():
            if smiles == smiles2:
                code_index = drug_code_list.index(code)
                result_dict[name] = code_index
                break

        if result_dict[name] is None:
            rdm = RandomState(len(smiles))
            result_dict[name] = rdm.randint(0, 3882)

    return result_dict
Example #48
0
class RandomPolicy(LayerPolicy):

    def __init__(self, num_layers, policy_seed=None, **params):
        self._count = 0
        self._seed = policy_seed
        self._state = RandomState(policy_seed)
        super(RandomPolicy, self).__init__(num_layers)

    def _random_layer(self):
        return self._state.randint(1, self._num_layers+1)

    def initial_layer_id(self):
        return self._random_layer()

    def next_layer_id(self):
        self._count = (self._count + 1) % self._num_layers
        self._layer_id = self._random_layer()
        return self._layer_id

    def cycle_ended(self):
        return self._count == 0

    def name(self):
        return 'random'
Example #49
0
def test_glmmexpfam_copy():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL)
    glmm0.fit(verbose=False)

    v = -19.575736562427252
    assert_allclose(glmm0.lml(), v)

    glmm1 = glmm0.copy()
    assert_allclose(glmm1.lml(), v)

    glmm1.scale = 0.92
    assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL)

    glmm0.fit(verbose=False)
    glmm1.fit(verbose=False)

    v = -19.575736562378573
    assert_allclose(glmm0.lml(), v)
    assert_allclose(glmm1.lml(), v)
Example #50
0
class RandomWrapper:
    """ This is a wrapper for the numpy random object"""
    rand = None

    def __init__(self, seed=None):
        if (seed is None):
            seed = int(datetime.utcnow().timestamp())
            print("************* Seed is: {0}\n".format(seed))
            self.rand = RandomState(seed)
        else:
            self.rand = RandomState(seed)

    def randint(self, low: int, high: int) -> int:
        """ low and high are both inclusive and exclusive respectively"""
        return self.rand.randint(low, high)

    def uniform(self, low: float, high: float) -> float:
        return self.rand.uniform(low, high)

    def random(self) -> float:
        return self.rand.uniform(0.0, 1.0)

    def sample(self, elements: List, size: int) -> List:
        return self.rand.choice(elements, size=size, replace=False)
Example #51
0
    def validate(self) -> None:
        """
        Check arguments correctness and consistency.

        * input files must exist
        * output files must be in a writeable directory
        * if no seed specified, set random seed.
        * length of per-chain lists equals specified # of chains
        """
        if self.model_name is None:
            raise ValueError('no stan model specified')
        if self.model_exe is None:
            raise ValueError('model not compiled')

        if self.chain_ids is not None:
            for i in range(len(self.chain_ids)):
                if self.chain_ids[i] < 1:
                    raise ValueError('invalid chain_id {}'.format(
                        self.chain_ids[i]))
        if self.output_dir is not None:
            self.output_dir = os.path.realpath(
                os.path.expanduser(self.output_dir))
            if not os.path.exists(self.output_dir):
                try:
                    os.makedirs(self.output_dir)
                    self._logger.info('created output directory: %s',
                                      self.output_dir)
                except (RuntimeError, PermissionError) as exc:
                    raise ValueError(
                        'invalid path for output files, no such dir: {}'.
                        format(self.output_dir)) from exc
            if not os.path.isdir(self.output_dir):
                raise ValueError(
                    'specified output_dir not a directory: {}'.format(
                        self.output_dir))
            try:
                testpath = os.path.join(self.output_dir, str(time()))
                with open(testpath, 'w+'):
                    pass
                os.remove(testpath)  # cleanup
            except Exception as exc:
                raise ValueError('invalid path for output files,'
                                 ' cannot write to dir: {}'.format(
                                     self.output_dir)) from exc

        if self.seed is None:
            rng = RandomState()
            self.seed = rng.randint(1, 99999 + 1)
        else:
            if not isinstance(self.seed, (int, list)):
                raise ValueError(
                    'seed must be an integer between 0 and 2**32-1,'
                    ' found {}'.format(self.seed))
            if isinstance(self.seed, int):
                if self.seed < 0 or self.seed > 2**32 - 1:
                    raise ValueError(
                        'seed must be an integer between 0 and 2**32-1,'
                        ' found {}'.format(self.seed))
            else:
                if self.chain_ids is None:
                    raise ValueError(
                        'seed must not be a list when no chains used')

                if len(self.seed) != len(self.chain_ids):
                    raise ValueError(
                        'number of seeds must match number of chains,'
                        ' found {} seed for {} chains '.format(
                            len(self.seed), len(self.chain_ids)))
                for i in range(len(self.seed)):
                    if self.seed[i] < 0 or self.seed[i] > 2**32 - 1:
                        raise ValueError('seed must be an integer value'
                                         ' between 0 and 2**32-1,'
                                         ' found {}'.format(self.seed[i]))

        if isinstance(self.data, str):
            if not os.path.exists(self.data):
                raise ValueError('no such file {}'.format(self.data))
        elif self.data is None:
            if isinstance(self.method_args, OptimizeArgs):
                raise ValueError('data must be set when optimizing')
        elif not isinstance(self.data, (str, dict)):
            raise ValueError('data must be string or dict')

        if self.inits is not None:
            if isinstance(self.inits, (Integral, Real)):
                if self.inits < 0:
                    raise ValueError('inits must be > 0, found {}'.format(
                        self.inits))
            elif isinstance(self.inits, str):
                if not os.path.exists(self.inits):
                    raise ValueError('no such file {}'.format(self.inits))
            elif isinstance(self.inits, list):
                if self.chain_ids is None:
                    raise ValueError(
                        'inits must not be a list when no chains are used')

                if len(self.inits) != len(self.chain_ids):
                    raise ValueError(
                        'number of inits files must match number of chains,'
                        ' found {} inits files for {} chains '.format(
                            len(self.inits), len(self.chain_ids)))
                names_set = set(self.inits)
                if len(names_set) != len(self.inits):
                    raise ValueError('each chain must have its own init file,'
                                     ' found duplicates in inits files list.')
                for i in range(len(self.inits)):
                    if not os.path.exists(self.inits[i]):
                        raise ValueError('no such file {}'.format(
                            self.inits[i]))
Example #52
0
    def build_program(
            self, random_state: RandomState) -> Union[List[Optional], None]:
        """
        在没有指定program的情况下,随机创建一个naive的program

        Parameters
        ----------
        random_state: numpy.random.RandomState
                      随机种子状态

        Returns
        -------
        program: List[Optional]
                 树的内容,元素为函数(_Function)或常数或变量
        """
        # 初始方法有full, grow, half and half三种方法
        if self.init_method == 'half and half':
            method = ('full' if random_state.randint(2) else 'grow')
        else:
            method = self.init_method

        # 最大深度
        max_depth = random_state.randint(*self.init_depth)

        # 初始化,随机从function_set中选出一个函数
        # 将挑选出的函数添加到program中
        # 将挑选出的函数的参数个数添加到terminal_stack中
        function = random_state.randint(len(self.function_set))  # 随机挑选一个函数结点
        function = self.function_set[function]  # 随机挑选一个函数
        program = [function]  # 向program中添加函数
        terminal_stack = [function.arity]  # 向terminal_stack中添加函数参数个数

        # 开始生成树
        while terminal_stack:
            # 树的深度(函数个数=树的深度(指有多少层子结点),因为这里是按照深度优先搜索)
            depth: int = len(terminal_stack)
            # choice等于特征数+函数数,表示选择数
            choice: int = self.n_features + len(self.function_set)
            # 随机选择生成一个整数
            choice: int = random_state.randint(choice)

            # 如果树的深度小于最大深度且选择full的方法生成树且choice小于等于可选函数个数
            # 在full的情况下优先叠加函数
            if (depth < max_depth) and (method == 'full'
                                        or choice <= len(self.function_set)):
                #  随机挑选一个函数
                function = random_state.randint(len(self.function_set))
                function = self.function_set[function]
                # 向program中添加函数
                program.append(function)
                # 向terminal_stack中添加函数参数数目
                terminal_stack.append(function.arity)
            else:
                # 如果包括常数, cost_range代表常数数值范围
                if self.const_range is not None:
                    terminal = random_state.randint(self.n_features + 1)
                # 如果不包含常数
                else:
                    terminal = random_state.randint(self.n_features)
                # 只有randint(self.n_features+1)的情况下才有可能满足条件,此时添加常数
                if terminal == self.n_features:
                    # 在cost_range所规定的范围内生成常数
                    terminal = random_state.uniform(*self.const_range)
                    if self.const_range is None:
                        raise ValueError('A constant was produced with '
                                         'const_range=None.')

                # 如果是变量,则terminal为整数;如果是常数,则terminal为常数本身
                program.append(terminal)
                # 所在结点一个子结点完成处理
                terminal_stack[-1] -= 1
                # 如果一个结点的所有子结点都处理完毕,则删除该结点,同时上一层结点也-1
                # 如果所有子结点都处理完毕,则返回program,结束处理
                while terminal_stack[-1] == 0:
                    terminal_stack.pop()
                    if not terminal_stack:
                        return program
                    terminal_stack[-1] -= 1
        # We should never get here
        return None
Example #53
0
File: hlda.py Project: wangjs/hlda
class HierarchicalLDA(object):
    
    def __init__(self, corpus, vocab, 
                 alpha=10.0, gamma=1.0, eta=0.1, 
                 seed=0, verbose=True, num_levels=3):
        
        NCRPNode.total_nodes = 0
        NCRPNode.last_node_id = 0        
        
        self.corpus = corpus
        self.vocab = vocab
        self.alpha = alpha  # smoothing on doc-topic distributions
        self.gamma = gamma  # "imaginary" customers at the next, as yet unused table
        self.eta = eta      # smoothing on topic-word distributions

        self.seed = seed
        self.random_state = RandomState(seed)        
        self.verbose = verbose

        self.num_levels = num_levels
        self.num_documents = len(corpus)
        self.num_types = len(vocab)
        self.eta_sum = eta * self.num_types

        # if self.verbose:        
        #     for d in range(len(self.corpus)):
        #         doc = self.corpus[d]
        #         words = ' '.join([self.vocab[n] for n in doc])
        #         print 'doc_%d = %s' % (d, words)  
        
        # initialise a single path
        path = np.zeros(self.num_levels, dtype=np.object)
        
        # initialize and fill the topic pointer arrays for 
        # every document. Set everything to the single path that 
        # we added earlier.
        self.root_node = NCRPNode(self.num_levels, self.vocab)
        self.document_leaves = {}                                   # currently selected path (ie leaf node) through the NCRP tree
        self.levels = np.zeros(self.num_documents, dtype=np.object) # indexed < doc, token >
        for d in range(len(self.corpus)):
            
            # populate nodes into the path of this document
            doc = self.corpus[d]
            doc_len = len(doc)
            path[0] = self.root_node
            self.root_node.customers += 1 # always add to the root node first
            for level in range(1, self.num_levels):
                # at each level, a node is selected by its parent node based on the CRP prior
                parent_node = path[level-1]
                level_node = parent_node.select(self.gamma)
                level_node.customers += 1
                path[level] = level_node
                
            # set the leaf node for this document                 
            leaf_node = path[self.num_levels-1]
            self.document_leaves[d] = leaf_node
                        
            # randomly assign each word in the document to a level (node) along the path
            self.levels[d] = np.zeros(doc_len, dtype=np.int)
            for n in range(doc_len):
                w = doc[n]                
                random_level = self.random_state.randint(self.num_levels)
                random_node = path[random_level]
                random_node.word_counts[w] += 1
                random_node.total_words += 1
                self.levels[d][n] = random_level                

    def estimate(self, num_samples, display_topics=50, n_words=5, with_weights=True):
        
        print 'HierarchicalLDA sampling'
        for s in range(num_samples):
            
            sys.stdout.write('.')
            
            for d in range(len(self.corpus)):
                self.sample_path(d)
            
            for d in range(len(self.corpus)):
                self.sample_topics(d)
                
            if (s > 0) and ((s+1) % display_topics == 0):
                print
                self.print_nodes(n_words, with_weights)

    def sample_path(self, d):
        
        # define a path starting from the leaf node of this doc
        path = np.zeros(self.num_levels, dtype=np.object)
        node = self.document_leaves[d]
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            path[level] = node
            node = node.parent
            
        # remove this document from the path, deleting empty nodes if necessary
        self.document_leaves[d].drop_path()
        
        ############################################################
        # calculates the prior p(c_d | c_{-d}) in eq. (4)
        ############################################################

        node_weights = {}
        self.calculate_ncrp_prior(node_weights, self.root_node, 0.0)
        
        ############################################################
        # calculates the likelihood p(w_d | c, w_{-d}, z) in eq. (4)
        ############################################################

        level_word_counts = {}
        for level in range(self.num_levels):
            level_word_counts[level] = {}        
        doc_levels = self.levels[d]
        doc = self.corpus[d]
        
        # remove doc from path
        for n in range(len(doc)): # for each word in the doc
            
            # count the word at each level
            level = doc_levels[n]
            w = doc[n]
            if w not in level_word_counts[level]:
                level_word_counts[level][w] = 1
            else:
                level_word_counts[level][w] += 1

            # remove word count from the node at that level
            level_node = path[level]
            level_node.word_counts[w] -= 1
            level_node.total_words -= 1
            assert level_node.word_counts[w] >= 0
            assert level_node.total_words >= 0

        self.calculate_doc_likelihood(node_weights, level_word_counts)

        ############################################################
        # pick a new path
        ############################################################

        nodes = np.array(list(node_weights.keys()))
        weights = np.array([node_weights[node] for node in nodes])
        weights = np.exp(weights - np.max(weights)) # normalise so the largest weight is 1
        weights = weights / np.sum(weights)

        choice = self.random_state.multinomial(1, weights).argmax()
        node = nodes[choice]
        
        # if we picked an internal node, we need to add a new path to the leaf
        if not node.is_leaf():
            node = node.get_new_leaf()

        # add the doc back to the path
        node.add_path()                     # add a customer to the path
        self.document_leaves[d] = node      # store the leaf node for this doc

        # add the words
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            word_counts = level_word_counts[level]
            for w in word_counts:
                node.word_counts[w] += word_counts[w]
                node.total_words += word_counts[w]
            node = node.parent        
        
    def calculate_ncrp_prior(self, node_weights, node, weight):
        ''' Calculates the prior on the path according to the nested CRP '''

        for child in node.children:
            child_weight = log( float(child.customers) / (node.customers + self.gamma) )
            self.calculate_ncrp_prior(node_weights, child, weight + child_weight)
        
        node_weights[node] = weight + log( self.gamma / (node.customers + self.gamma))

    def calculate_doc_likelihood(self, node_weights, level_word_counts):

        # calculate the weight for a new path at a given level
        new_topic_weights = np.zeros(self.num_levels)
        for level in range(1, self.num_levels):  # skip the root

            word_counts = level_word_counts[level]
            total_tokens = 0

            for w in word_counts:
                count = word_counts[w]
                for i in range(count):  # why ?????????
                    new_topic_weights[level] += log((self.eta + i) / (self.eta_sum + total_tokens))
                    total_tokens += 1

        self.calculate_word_likelihood(node_weights, self.root_node, 0.0, level_word_counts, new_topic_weights, 0)

    def calculate_word_likelihood(self, node_weights, node, weight, level_word_counts, new_topic_weights, level):
                
        # first calculate the likelihood of the words at this level, given this topic
        node_weight = 0.0
        word_counts = level_word_counts[level]
        total_words = 0
        
        for w in word_counts:
            count = word_counts[w]
            for i in range(count): # why ?????????
                node_weight += log( (self.eta + node.word_counts[w] + i) / 
                                    (self.eta_sum + node.total_words + total_words) )
                total_words += 1
                
        # propagate that weight to the child nodes
        for child in node.children:
            self.calculate_word_likelihood(node_weights, child, weight + node_weight, 
                                           level_word_counts, new_topic_weights, level+1)
            
        # finally if this is an internal node, add the weight of a new path
        level += 1
        while level < self.num_levels:
            node_weight += new_topic_weights[level]
            level += 1
            
        node_weights[node] += node_weight
        
    def sample_topics(self, d):

        doc = self.corpus[d]
        
        # initialise level counts
        doc_levels = self.levels[d]
        level_counts = np.zeros(self.num_levels, dtype=np.int)
        for c in doc_levels:
            level_counts[c] += 1

        # get the leaf node and populate the path
        path = np.zeros(self.num_levels, dtype=np.object)
        node = self.document_leaves[d]
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            path[level] = node
            node = node.parent

        # sample a new level for each word
        level_weights = np.zeros(self.num_levels)            
        for n in range(len(doc)):

            w = doc[n]            
            word_level = doc_levels[n]

            # remove from model
            level_counts[word_level] -= 1
            node = path[word_level]
            node.word_counts[w] -= 1
            node.total_words -= 1

            # pick new level
            for level in range(self.num_levels):
                level_weights[level] = (self.alpha + level_counts[level]) *                     \
                    (self.eta + path[level].word_counts[w]) /                                   \
                    (self.eta_sum + path[level].total_words)
            level_weights = level_weights / np.sum(level_weights)
            level = self.random_state.multinomial(1, level_weights).argmax()
            
            # put the word back into the model
            doc_levels[n] = level
            level_counts[level] += 1
            node = path[level]
            node.word_counts[w] += 1
            node.total_words += 1
        
    def print_nodes(self, n_words, with_weights):
        self.print_node(self.root_node, 0, n_words, with_weights)
        
    def print_node(self, node, indent, n_words, with_weights):
        out = '    ' * indent
        out += 'topic %d (level=%d, total_words=%d, documents=%d): ' % (node.node_id, node.level, node.total_words, node.customers)
        out += node.get_top_words(n_words, with_weights)
        print out        
        for child in node.children:
            self.print_node(child, indent+1, n_words, with_weights)        
Example #54
0
def analyse_response_repeats_all_trials(repeats_data, anchor_model, neg_model,
                                        sess):
    # generate positive examples of resposnes - responses at same time, different repeats

    prng = RandomState(50)

    n_trials = repeats_data['repeats'].shape[0]
    n_random_times = 10
    random_times = prng.randint(0, repeats_data['repeats'].shape[1],
                                n_random_times)
    responses = repeats_data['repeats'][:, random_times, :].astype(np.float32)
    responses = np.transpose(responses, [1, 0, 2])
    responses = np.reshape(
        responses,
        [n_trials * n_random_times, responses.shape[2]]).astype(np.float32)
    stim_idx = np.repeat(np.arange(n_random_times), n_trials, 0)

    # embed a sample response to get dimensions
    feed_dict = {
        anchor_model.map_cell_grid_tf: repeats_data['map_cell_grid'],
        anchor_model.cell_types_tf: repeats_data['ctype_1hot'],
        anchor_model.mean_fr_tf: repeats_data['mean_firing_rate'],
        anchor_model.responses_tf: np.expand_dims(responses[0:100, :], 2)
    }
    resp_test = sess.run(anchor_model.responses_embed, feed_dict=feed_dict)
    resp_embed = np.zeros(
        (responses.shape[0], resp_test.shape[1], resp_test.shape[2], 1))

    # embed the responses
    # since we use batch norm in testing, we need to jumble the response to get correct estimate of batch norm statistics
    tms = np.arange(responses.shape[0])
    tms_jumble = np.random.permutation(tms)

    batch_sz = 100
    for itm in np.arange(0, tms_jumble.shape[0], batch_sz):
        print(itm)
        feed_dict = {
            anchor_model.map_cell_grid_tf:
            repeats_data['map_cell_grid'],
            anchor_model.cell_types_tf:
            repeats_data['ctype_1hot'],
            anchor_model.mean_fr_tf:
            repeats_data['mean_firing_rate'],
            anchor_model.responses_tf:
            np.expand_dims(responses[tms_jumble[itm:itm + batch_sz], :], 2)
        }
        resp_embed[tms_jumble[itm:itm + batch_sz], :, :, :] = sess.run(
            anchor_model.responses_embed, feed_dict=feed_dict)

    # compute distance between pairs of responses
    distances = np.zeros((responses.shape[0], responses.shape[0]))
    distances_euclidean = np.zeros((responses.shape[0], responses.shape[0]))
    batch_dist = np.int(100)
    for iresp in np.arange(0, distances.shape[0], batch_dist):
        print(iresp)
        for jresp in np.arange(0, distances.shape[1], batch_dist):
            r1 = np.expand_dims(resp_embed[iresp:iresp + batch_dist], 1)
            r2 = np.expand_dims(resp_embed[jresp:jresp + batch_dist], 0)
            distances[iresp:iresp + batch_dist,
                      jresp:jresp + batch_dist] = np.sum((r1 - r2)**2,
                                                         (2, 3, 4))

            rr1 = np.expand_dims(responses[iresp:iresp + batch_dist], 1)
            rr2 = np.expand_dims(responses[jresp:jresp + batch_dist], 0)
            distances_euclidean[iresp:iresp + batch_dist,
                                jresp:jresp + batch_dist] = np.sum(
                                    (rr1 - rr2)**2, 2)

    test_clustering = {
        'distances': distances,
        'responses': responses,
        'stim_idx': stim_idx,
        'resp_embed': resp_embed,
        'random_times': random_times,
        'distances_euclidean': distances_euclidean
    }

    return test_clustering
Example #55
0
class MicrobeFactory:
    def __init__(self,
                 user_id,
                 game_type,
                 store,
                 microbe_types,
                 n_in_epoch=6,
                 n_in_epoch_promo=10,
                 epoch_period=5000,
                 second_epoch_period=3000,

                 n_in_epoch_mobile=4,
                 n_in_epoch_promo_mobile=8,
                 epoch_period_mobile=1000,
                 second_epoch_period_mobile=1000,
                 ):
        self._user_id = user_id
        self._game_type = game_type
        self._store = store

        assert self._game_type is not None

        self._seed = int(time.time())
        self._rnd = RandomState(self._seed)
        self._microbes = []

        self._n_in_epoch = n_in_epoch
        self._n_in_epoch_promo = n_in_epoch_promo
        self._epoch_period = epoch_period
        self._second_epoch_period = second_epoch_period

        self._n_in_epoch_mobile = n_in_epoch_mobile
        self._n_in_epoch_promo_mobile = n_in_epoch_promo_mobile
        self._epoch_period_mobile = epoch_period_mobile
        self._second_epoch_period_mobile = second_epoch_period_mobile

        self.microbe_types = microbe_types

        self._epoch = 0
        self._last_epoch_time = None

        assert self.microbe_types is not None
        assert len(self.microbe_types) > 0
        for m in self.microbe_types:
            assert m['type'] > 0
            assert m['width'] > 0
            assert m['height'] > 0

        top_bar_size = self.game_cfg['top_bar_size']
        self.cell_width = self.game_cfg['cell_width']
        self.cell_height = self.game_cfg['cell_height']

        self.x_max = 1
        self.x_min = -1

        self.y_max = 1 - top_bar_size
        self.y_min = -1 + self.cell_height / 2

        self.cells_x = math.floor((self.x_max - self.x_min) / self.cell_width)
        self.cells_y = math.floor((self.y_max - self.y_min) / self.cell_height)

    @property
    def n_in_epoch(self):
        return self._n_in_epoch_mobile if \
            self.is_mobile() else self._n_in_epoch

    @property
    def n_in_epoch_promo(self):
        return self._n_in_epoch_promo_mobile if \
            self.is_mobile() else self._n_in_epoch_promo

    @property
    def epoch_period(self):
        return self._epoch_period_mobile if \
            self.is_mobile() else self._epoch_period

    @property
    def second_epoch_period(self):
        return self._second_epoch_period_mobile if \
            self.is_mobile() else self._second_epoch_period

    @property
    def store(self):
        return self._store

    @property
    def game_cfg(self):
        return settings.config['game']

    @property
    def user_id(self):
        return self._user_id

    @property
    def microbes(self):
        return self._microbes

    @property
    def epoch(self):
        return self._epoch

    def dump_microbes(self, microbes=None):
        if microbes is None:
            microbes = self.microbes
        return [m.to_dict() for m in microbes]

    def gen_microbes(self, has_promo):
        self._epoch += 1
        self._last_epoch_time = datetime.datetime.now()

        n = self.n_in_epoch
        if has_promo:
            n = self.n_in_epoch_promo

        new_microbes = []
        for _ in range(0, n):
            microbe_info_i = self._rnd.randint(0, len(self.microbe_types))
            m = Microbe(self,
                        self._epoch,
                        self.microbe_types[microbe_info_i],
                        self._rnd)
            m.set_position(self.microbes)
            self.microbes.append(m)
            new_microbes.append(m)
        return new_microbes

    def get_alive(self):
        return list(filter(lambda item: item.is_alive, self.microbes))

    def shoot(self, x, y, has_promo, radius=None):
        killed = []

        closest = list(sorted(
            self.get_alive(),
            key=lambda item:
            numpy.math.hypot(x - item.x, y - item.y)
        ))
        if has_promo:
            microbes = closest[:4]
        else:
            microbes = closest[:1]

        if len(microbes) > 0 and microbes[0].is_hit(x, y, radius):
            for m in microbes:
                if m.damage() <= 0:
                    killed.append(m.id)

        self._microbes = self.get_alive()
        score = len(killed)  # simple for just now
        return score, killed

    def check_world(self, game_started_at, current_time, has_promo):
        if self._last_epoch_time is None:
            self._last_epoch_time = game_started_at

        delta = (current_time - self._last_epoch_time).total_seconds() * MS

        epoch_to_delete = self._epoch - 1
        if self.is_mobile():
            epoch_to_delete = self._epoch

        if self._epoch >= 2 or self.is_mobile():
            # remove & generate
            if delta >= self.epoch_period:
                # removing epoch
                removed_microbes = []
                for m in self.microbes:

                    if m.epoch == epoch_to_delete:  # remove previous epoch
                        m.kill()
                        removed_microbes.append(m.id)

                # creating new epoch
                self._microbes = self.get_alive()
                new_microbes = self.gen_microbes(has_promo)
                return self.dump_microbes(new_microbes), removed_microbes
        else:
            # just add new epoch
            if delta >= self.second_epoch_period:
                # creating new epoch
                self.gen_microbes(has_promo)
                return self.dump_microbes(), []
        return None

    def is_mobile(self):
        return self._game_type == Game.Type.mobile
n_variables = 15
seeds = list(range(101, 200))
rng = RandomState(19023)
variables = list(range(n_variables))
n_samples = 200

# Data generation parameters
gen_mean = np.zeros(n_variables)
gen_var = np.zeros(n_variables) + 0.2
gen_weight = 2

# Generate some data form a GN
graph = random_dag(variables, rng=rng)
beta = graph.A.T * gen_weight

sample_seed = rng.randint(0, 2**32 - 1)
data_gn = sample_from_gn(graph, gen_mean, gen_var, beta, n_samples,
                         sample_seed)

# Fit the score and create the parent set distributions
fan_in = 5

bge = BGe(data_gn)
ps_dist = get_parent_set_distributions(variables, fan_in, bge, rng=rng)

# Some random state to start
state = DAGState(random_dag(variables, fan_in, rng=rng))

# Check consistency of first state
check_consistency(state)
Example #57
0
def cross_validate(seed_dir,
                   language,
                   n_samples,
                   n_splits,
                   evl_size,
                   max_epochs,
                   batch_size,
                   max_generalization_loss,
                   min_coverage,
                   hyperparams,
                   seed=None,
                   verbose=False,
                   dashboard_port=6006):
    """Perform cross-validation on the

    The process is designed to be able to continue with minimal additional
    effort after a crash.  It can therefore be stopped and taken up again later.

    Tensorboard is served during each training run.

    :param str seed_dir: Where to store model data for this seed.  If
        cross-validation is performed for multiple seeds, multiple seed
        directories are needed.

    :param yokome.language.Language language: The language to train on.

    :param int n_samples: The number of sample sentences to load.

    :param int n_splits: The number ``k`` of folds.

    :param float evl_size: The portion of evaluation samples w.r.t. the
        non-validation part of all samples.

    :param int max_epochs: The maximum number of epochs to train for.  The
        actual number of epochs may be less if the training process stops early.

    :param int batch_size: The number of sentences to estimate the probability
        for in parallel.

    :param float max_generalization_loss: The maximum generalization loss at
        which the training process is still continued.

    :param min_coverage: The portion of the corpus that has to be covered by the
        minimal vocabulary of the most frequent words that is used to encode
        incoming data.

    :param hyperparams: The model parameters used in this pass of
        cross-validation.

    :param int seed: The seed used for the pseudo-random number generator that
        generates the seeds for the models to be trained.

    :param bool verbose: Whether to print progress indiation.

    :param int dashboard_port: On which port to serve Tensorboard.
    
    :return: The average loss over all folds.

    """
    total_loss = 0
    r = RandomState(seed)
    for i, (trn, evl, vld) in enumerate(kfold(language, n_samples, n_splits,
                                              evl_size),
                                        start=1):
        fold_dir = seed_dir + ('/fold_%d' % (i, ))
        if verbose:
            print('            Fold %d...' % (i, ))
        try:
            with open(fold_dir + '/report.json', 'r') as f:
                total_loss += json.load(f)['loss']
        except OSError:
            pass
        else:
            r.randint(0x100000000)
            r.randint(0x100000000)
            continue
        vocabulary = generate_vocabulary_from(language, trn + evl,
                                              min_coverage)
        model_seed = r.randint(0x100000000)
        os.makedirs(fold_dir, exist_ok=True)
        model = LanguageModel(fold_dir,
                              params=hyperparams,
                              seed=model_seed,
                              production_mode=False,
                              language=language,
                              vocabulary=vocabulary)
        tensorboard = Popen([
            'tensorboard', '--logdir',
            model.training_dir(), '--port',
            str(dashboard_port)
        ],
                            stdout=DEVNULL,
                            stderr=DEVNULL)
        model.train(trn,
                    evl,
                    max_epochs,
                    batch_size,
                    max_generalization_loss=max_generalization_loss,
                    shuffle=True,
                    random_state=r.randint(0x100000000),
                    verbose=False)
        # Load the best model
        model = LanguageModel(fold_dir,
                              params=hyperparams,
                              seed=model_seed,
                              production_mode=True,
                              language=language,
                              vocabulary=None)
        loss = float(model.validate(vld, batch_size)['loss'])
        with open(fold_dir + '/.tmp.report.json', 'w') as f:
            json.dump({'loss': loss}, f)
        os.replace(fold_dir + '/.tmp.report.json', fold_dir + '/report.json')
        total_loss += loss
        tensorboard.terminate()
        tensorboard.wait()
    return total_loss / n_splits
class Simulation:
    num_random_pairings: int = 0 
        # N==0 -> (agents are evolved in pairs: a genotype contains a pair of agents) 
        # N>0  -> each agent will go though a simulation with N other agents (randomly chosen)
    entropy_type: str = 'shannon-dd' # 'shannon-1d', 'shannon-dd', 'transfer', 'sample'
    entropy_target_value: str = 'neural' # 'neural', 'distance', 'angle'
    concatenate: bool = True # whether to concatenate values in entropy_target_value
    isolation: bool = False # whether to run simulation on a single agent (as if second agent does not exits)
    genotype_structure: Dict = field(default_factory=lambda:gen_structure.DEFAULT_GEN_STRUCTURE(2))
    num_brain_neurons: int = None  # initialized in __post_init__
    collision_type: str = 'overlapping' # 'none', 'overlapping', 'edge'
    agent_body_radius: int = 4
    agents_pair_initial_distance: int = 20
    agent_sensors_divergence_angle: float = np.radians(45)  # angle between sensors and axes of symmetry
    brain_step_size: float = 0.1
    num_trials: int = 4 # hard coded
    trial_duration: int = 200
    num_cores: int = 1
    data_noise_level: float = 1e-8
    timeit: bool = False

    def __post_init__(self):          

        self.num_brain_neurons = gen_structure.get_num_brain_neurons(self.genotype_structure)
        self.num_data_points = int(self.trial_duration / self.brain_step_size)

        self.init_agents_pair()
        self.set_initial_positions_angles()

        if self.isolation:
            # if we run agents in isolation we want to ignore collisions
            self.collision_type = 'none'

        self.timing = Timing(self.timeit)        

        self.__check_params__()

    def __check_params__(self):
        assert self.num_random_pairings >= 0, \
            "Number of pairing must be >= 0  (0 if a genotype already contains a pair of agents)"

        assert_string_in_values(self.collision_type, 'collision_type', ['none', 'overlapping', 'edge'])
        assert_string_in_values(self.entropy_type, 'entropy_type', ['shannon-1d', 'shannon-dd', 'transfer', 'sample'])
        assert_string_in_values(self.entropy_target_value, 'entropy_target_value', ['neural', 'distance', 'angle'])

        if self.entropy_type in ['shannon-1d', 'shannon-dd']:
            accepted_entropy_target_values = ['neural', 'distance', 'angle']
            assert self.entropy_target_value in accepted_entropy_target_values, \
                "Shannon entropy currently works only when entropy_target_value in {}".format(accepted_entropy_target_values)

        if self.entropy_type == 'transfer':
            assert self.entropy_target_value == 'neural' and self.num_brain_neurons == 2, \
                'Transfer entropy currently works only on two dimensional data (i.e., 2 neural outputs per agent)'

        if self.entropy_target_value == 'angle':
            assert self.entropy_type in ['shannon-1d','sample'], \
                "entropy on angle works only for entropy_type in ['shannon-1d','sample']"

    def init_agents_pair(self):
        self.agents_pair_net = []
        self.agents_pair_body = []
        for _ in range(2):
            self.agents_pair_net.append(
                AgentNetwork(
                    self.num_brain_neurons,
                    self.brain_step_size,
                    self.genotype_structure,
                )
            )
            self.agents_pair_body.append(
                AgentBody(
                    self.agent_body_radius,
                    self.agent_sensors_divergence_angle,
                    collision_type=self.collision_type,
                    timeit = self.timeit
                )
            )

    def set_initial_positions_angles(self, random_state=None):
        
        if random_state:
            self.agents_pair_start_angle_trials = pi * random_state.uniform(0, 2, (self.num_trials,2))
        else:            
            # first agent always points right
            # second agent at points right, up, left, down in each trial respectively
            self.agents_pair_start_angle_trials = [
                [0., 0.],
                [0., pi/2],
                [0., pi],
                [0., 3*pi/2],
            ]

        # first agent positioned at (0,0)
        # second agent 20 units away from first, along its facing direction 
        # (right, up, left, down) if not random
        self.agents_pair_start_pos_trials = [
            [
                np.array([0.,0.]), 
                self.agents_pair_initial_distance * \
                    np.array(
                        [
                            np.cos(self.agents_pair_start_angle_trials[i][1]),
                            np.sin(self.agents_pair_start_angle_trials[i][1])
                        ]
                    )
            ]
            for i in range(self.num_trials)
        ]
        if random_state:
            # reinitialized the angle because it was used for positioning
            # we don't want the second agent to necessarily face outwards
            self.agents_pair_start_angle_trials = pi * random_state.uniform(0, 2, (self.num_trials,2))

    def save_to_file(self, file_path):
        with open(file_path, 'w') as f_out:
            obj_dict = asdict(self)
            json.dump(obj_dict, f_out, indent=3, cls=NumpyListJsonEncoder)

    @staticmethod
    def load_from_file(file_path, **kwargs):
        with open(file_path) as f_in:
            obj_dict = json.load(f_in)

        if kwargs:
            obj_dict.update(kwargs)

        sim = Simulation(**obj_dict)
        gen_structure.check_genotype_structure(sim.genotype_structure)
        return sim        

    def set_agents_genotype_phenotype(self):
        '''
        Split genotype and set phenotype of the two agents
        :param np.ndarray genotypes_pair: sequence with two genotypes (one after the other)
        '''
        
        phenotypes = [None,None]
        if self.num_random_pairings == 0:
            genotypes_pair = self.genotype_population[self.genotype_index]
            genotypes_split = np.array_split(genotypes_pair, 2)                
        else:
            genotypes_split = [
                self.genotype_population[self.genotype_index], 
                self.genotype_population[self.rand_agent_indexes[self.sim_index]], 
            ]
        if self.data_record is not None:
            self.data_record['genotypes'] = genotypes_split
            phenotypes = [{},{}]
            self.data_record['phenotypes'] = phenotypes
        for a in range(2):
            self.agents_pair_net[a].genotype_to_phenotype(
                genotypes_split[a], phenotype_dict=phenotypes[a])

    def init_values_for_computing_entropy(self):

        if self.entropy_target_value == 'neural':
            # initialize agents brain output of all trial for computing entropy
            # list of list (4 trials x 2 agents) each containing array (num_data_points,num_brain_neurons)
            self.values_for_computing_entropy = [
                [
                    np.zeros((self.num_data_points, self.num_brain_neurons)) 
                    for _ in range(2)
                ] for _ in range(self.num_trials)
            ]
        elif self.entropy_target_value == 'distance':
            # distance (1-d data) per trial            
            # entropy is computed based on distances
            # 4 list (one per trial) with the agent distances
            self.values_for_computing_entropy = [
                np.zeros((self.num_data_points,1))
                for _ in range(self.num_trials)
            ]
        else:
            # angle: (1-d data) per trial per agent
            assert self.entropy_target_value == 'angle'
            self.values_for_computing_entropy = [
                [
                    np.zeros((self.num_data_points,1))
                    for _ in range(2)
                ] for _ in range(self.num_trials)
            ]

    def init_data_record(self):
        if self.data_record is  None:                       
            return            
        self.data_record['position'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['distance'] = [None for _ in range(self.num_trials)]
        self.data_record['angle'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['collision'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['delta_xy'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['signal_strength'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['brain_input'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['brain_state'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['derivatives'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['brain_output'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['wheels'] = [[None,None] for _ in range(self.num_trials)]
        self.data_record['emitter'] = [[None,None] for _ in range(self.num_trials)]                
        self.timing.add_time('SIM_init_data', self.tim)

    def init_data_record_trial(self, t):
        if self.data_record is None:            
            return
        self.data_record['distance'][t] = np.zeros(self.num_data_points)
        for a in range(2):
            if self.ghost_index == a:
                # copy all ghost agent's values from original_data_record
                if t == 0:
                    for k in self.data_record:
                        self.data_record[k] = deepcopy(self.original_data_record[k])
            else:
                self.data_record['position'][t][a] = np.zeros((self.num_data_points, 2))
                self.data_record['angle'][t][a] = np.zeros(self.num_data_points)
                self.data_record['collision'][t][a] = np.zeros(self.num_data_points)
                self.data_record['delta_xy'][t][a] = np.zeros((self.num_data_points, 2))
                self.data_record['signal_strength'][t][a] = np.zeros((self.num_data_points, 2))
                self.data_record['brain_input'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons))
                self.data_record['brain_state'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons))
                self.data_record['derivatives'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons))
                self.data_record['brain_output'][t][a] = np.zeros((self.num_data_points, self.num_brain_neurons))
                self.data_record['wheels'][t][a] = np.zeros((self.num_data_points, 2))
                self.data_record['emitter'][t][a] = np.zeros(self.num_data_points)
        self.timing.add_time('SIM_init_trial_data', self.tim)            

    def save_data_record(self, t, i):
        if self.data_record is None: 
            return
        self.data_record['distance'][t][i] = self.get_agents_distance()
        for a in range(2):    
            if self.ghost_index == a:                    
                continue # do not save data for ghost: already saved in init_data_trial
            agent_net = self.agents_pair_net[a]
            agent_body = self.agents_pair_body[a]
            self.data_record['position'][t][a][i] = agent_body.position
            self.data_record['angle'][t][a][i] = agent_body.angle
            self.data_record['collision'][t][a][i] = 1 if agent_body.flag_collision else 0
            self.data_record['delta_xy'][t][a][i] = self.prev_delta_xy_agents[a]                                        
            self.data_record['wheels'][t][a][i] = agent_body.wheels
            self.data_record['emitter'][t][a][i] = self.emitter_agents[a]
            self.data_record['signal_strength'][t][a][i] = self.signal_strength_agents[a]
            self.data_record['brain_input'][t][a][i] = agent_net.brain.input                    
            self.data_record['brain_state'][t][a][i] = agent_net.brain.states
            self.data_record['derivatives'][t][a][i] = agent_net.brain.dy_dt
            self.data_record['brain_output'][t][a][i] = agent_net.brain.output
        self.timing.add_time('SIM_save_data', self.tim)                            

    def compute_signal_strength_agents(self):
        for a in [x for x in range(2) if x != self.ghost_index]:    
            if self.isolation and a==1:
                self.signal_strength_agents[a] = 0
            else:
                b = 1 - a
                # signal_strength = np.array([0.,0.])  # if we want to mimic zero signal strength
                self.signal_strength_agents[a] = self.agents_pair_body[a].get_signal_strength(
                    self.agents_pair_body[b].position,
                    self.emitter_agents[b]
                )
        self.timing.add_time('SIM_get_signal_strength', self.tim)

    def update_wheels_emitter_agents(self, t,i):
        for a in range(2):
            if a == self.ghost_index:
                self.emitter_agents[a] = self.original_data_record['emitter'][t][a][i]
            if self.isolation and a==1:
                self.emitter_agents[a] = 0
            else:
                motor_outputs = self.agents_pair_net[a].compute_motor_outputs()
                self.agents_pair_body[a].wheels = np.take(motor_outputs, [0,2]) # index 0,2: MOTORS  
                self.emitter_agents[a] = motor_outputs[1] # index 1: EMITTER
        self.timing.add_time('SIM_compute_motors_emitter', self.tim)

    def get_agents_distance(self):
        return self.agents_pair_body[0].get_dist_centers(self.agents_pair_body[1].position)

    def store_values_for_entropy(self, t,i):
        if self.entropy_target_value == 'neural': #neural outputs 
            for a in [x for x in range(2) if x != self.ghost_index]:
                self.values_for_computing_entropy[t][a][i] = self.agents_pair_net[a].brain.output  
        elif self.entropy_target_value == 'angle': # angle
            for a in [x for x in range(2) if x != self.ghost_index]:
                self.values_for_computing_entropy[t][a][i] = self.agents_pair_body[a].angle
        else: # distance
            self.values_for_computing_entropy[t][i] = self.get_agents_distance()
                                
    def prepare_agents_for_trial(self, t):
        for a in range(2):
            agent_net = self.agents_pair_net[a]
            agent_body = self.agents_pair_body[a]
            # reset params that are due to change during the experiment
            agent_body.init_params(
                wheels = np.zeros(2),
                flag_collision = False
            )
            # set initial states to zeros
            agent_net.init_params(
                brain_states = np.zeros(self.num_brain_neurons),
            )
            agent_pos = np.copy(self.agents_pair_start_pos_trials[t][a])
            agent_angle = self.agents_pair_start_angle_trials[t][a]
            agent_body.set_position_and_angle(agent_pos, agent_angle)
            # compute output
            agent_net.brain.compute_output()        
        # compute motor outpus    
        self.update_wheels_emitter_agents(t, 0)                          
        # compute signal streng

        self.store_values_for_entropy(t,0) #

        self.timing.add_time('SIM_prepare_agents_for_trials', self.tim)     

    def compute_brain_input_agents(self):                
        for a in [x for x in range(2) if x != self.ghost_index]:    
            if self.isolation and a==1:
                continue
            self.agents_pair_net[a].compute_brain_input(self.signal_strength_agents[a])
        self.timing.add_time('SIM_compute_brain_input', self.tim)

    def compute_brain_euler_step_agents(self):          
        for a in [x for x in range(2) if x != self.ghost_index]:              
            if self.isolation and a==1:
                continue
            self.agents_pair_net[a].brain.euler_step()  # this sets agent.brain.output (2-dim vector)
            self.timing.add_time('SIM_euler_step', self.tim)

    def move_one_step_agents(self, t, i):
        delta_xy_agents = [None, None]
        angle_agents = [None, None]
        for a in range(2):                
            if self.ghost_index == a:
                # for ghost agent we need to retrieve position, delta_xy, and angle from data
                self.agents_pair_body[a].position = self.original_data_record['position'][t][a][i] 
                delta_xy_agents[a] = self.original_data_record['delta_xy'][t][a][i]                        
                angle_agents[a] = self.original_data_record['angle'][t][a][i]
            else:                                                
                # TODO: check if the agents didn't go too far from one another
                b = 1 - a
                delta_xy_agents[a], angle_agents[a] =  self.agents_pair_body[a].move_one_step(
                    self.prev_delta_xy_agents[b],
                    self.prev_angle_agents[b]
                )                                           
        self.prev_delta_xy_agents = delta_xy_agents
        self.prev_angle_agents = angle_agents
        self.timing.add_time('SIM_move_one_step', self.tim)  

    def compute_performance(self, t):
        performance_agent_AB = []
        if self.entropy_type=='transfer':
            # it only applies to neural_outputs (with 2 neurons)
            # add random noise to data before calculating transfer entropy
            for a in range(2):
                if self.ghost_index == a:
                    continue
                if self.isolation and a==1:
                    continue                    
                
                if self.concatenate:
                    all_values_for_computing_entropy = np.concatenate([
                        self.values_for_computing_entropy[t][a]
                        for t in range(self.num_trials)
                    ])
                else:
                    all_values_for_computing_entropy = self.values_for_computing_entropy[t][a]
                
                all_values_for_computing_entropy = utils.add_noise(
                    all_values_for_computing_entropy, 
                    self.random_state, 
                    noise_level=self.data_noise_level
                )

                # calculate performance        
                # TODO: understand what happens if reciprocal=False
                performance_agent_AB.append(
                    get_transfer_entropy(all_values_for_computing_entropy, binning=True) 
                )

        elif self.entropy_type in ['shannon-1d', 'shannon-dd']:
            # shannon-1d, shannon-dd
            if self.entropy_target_value == 'distance':
                if self.concatenate:
                    all_values_for_computing_entropy = np.concatenate([
                        self.values_for_computing_entropy
                    ])
                else:
                    all_values_for_computing_entropy = self.values_for_computing_entropy[t]
                min_v, max_v= 0., 100.
                performance_agent_AB = [
                    get_shannon_entropy_dd_simplified(
                        all_values_for_computing_entropy, min_v, max_v)
                ]
            if self.entropy_target_value == 'angle':
                # angle (apply modulo angle of 2*pi)
                # min_v, max_v= 0., 2*np.pi
                min_v, max_v= -np.pi/4, np.pi/4
                for a in range(2):
                    if self.ghost_index == a:
                        continue
                    if self.isolation and a==1:
                        continue
                    if self.concatenate:
                        all_values_for_computing_entropy = np.concatenate([
                            self.values_for_computing_entropy[t][a]
                            for t in range(self.num_trials)
                        ])
                    else:
                        all_values_for_computing_entropy = self.values_for_computing_entropy[t][a]
                    # all_values_for_computing_entropy = all_values_for_computing_entropy % 2*np.pi
                    all_values_for_computing_entropy = all_values_for_computing_entropy.flatten()
                    all_values_for_computing_entropy = np.diff(all_values_for_computing_entropy)
                    performance_agent_AB.append(
                        get_shannon_entropy_1d(all_values_for_computing_entropy, min_v, max_v)
                    )
            else: # neural
                min_v, max_v= 0., 1.
                for a in range(2):
                    if self.ghost_index == a:
                        continue
                    if self.isolation and a==1:
                        continue
                    if self.concatenate:
                        all_values_for_computing_entropy = np.concatenate([
                            self.values_for_computing_entropy[t][a]
                            for t in range(self.num_trials)
                        ])
                    else:
                        all_values_for_computing_entropy = self.values_for_computing_entropy[t][a]

                    if self.entropy_type == 'shannon-dd':
                        performance_agent_AB.append(
                            get_shannon_entropy_dd_simplified(all_values_for_computing_entropy, min_v, max_v)
                        )
                    else:
                        # shannon-1d
                        for c in range(self.num_brain_neurons):
                            column_values = all_values_for_computing_entropy[:,c]
                            performance_agent_AB.append(
                                get_shannon_entropy_1d(column_values, min_v, max_v)
                            )            
        else:
            # sample entropy
            # only applies to 1d data
            if self.entropy_target_value == 'neural':
                for a in range(2):
                    if self.ghost_index == a:
                        continue
                    if self.isolation and a==1:
                        continue
                    if self.concatenate:
                        all_values_for_computing_entropy = np.concatenate([
                            self.values_for_computing_entropy[t][a]
                            for t in range(self.num_trials)
                        ])
                    else:
                        all_values_for_computing_entropy = self.values_for_computing_entropy[t][a]

                    for c in range(self.num_brain_neurons):
                        column_values = all_values_for_computing_entropy[:,c]
                        mean = column_values.mean()
                        std = column_values.std()
                        normalize_values = (column_values - mean) / std
                        performance_agent_AB.append(
                            _numba_sampen(normalize_values, order=2, r=(0.2 * DEFAULT_SAMPLE_ENTROPY_NEURAL_STD)) 
                        )        
            elif self.entropy_target_value == 'distance':
                if self.concatenate:
                    all_values_for_computing_entropy = np.concatenate([
                        self.values_for_computing_entropy
                    ])
                else:
                    all_values_for_computing_entropy = self.values_for_computing_entropy[t]                    
                mean = all_values_for_computing_entropy.mean()
                std = all_values_for_computing_entropy.std()
                normalize_values = (all_values_for_computing_entropy - mean) / std
                performance_agent_AB = [
                    _numba_sampen(normalize_values.flatten(), order=2, 
                        r=(0.2 * DEFAULT_SAMPLE_ENTROPY_DISTANCE_STD)) 
                ]
            else: 
                assert self.entropy_target_value == 'angle'
                for a in range(2):
                    if self.ghost_index == a:
                        continue
                    if self.isolation and a==1:
                        continue
                    if self.concatenate:
                        all_values_for_computing_entropy = np.concatenate([
                            self.values_for_computing_entropy[t][a]
                            for t in range(self.num_trials)
                        ])
                    else:
                        all_values_for_computing_entropy = self.values_for_computing_entropy[t][a]
                    all_values_for_computing_entropy = np.diff(all_values_for_computing_entropy)
                    mean = all_values_for_computing_entropy.mean()
                    std = all_values_for_computing_entropy.std()
                    normalize_values = (all_values_for_computing_entropy - mean) / std
                    performance_agent_AB.append(
                        _numba_sampen(normalize_values.flatten(), order=2, r=(0.2 * DEFAULT_SAMPLE_ENTROPY_ANGLE_STD)) 
                    )      
        return performance_agent_AB                                           

    #################
    # MAIN FUNCTION
    #################
    def run_simulation(self, genotype_population=None, genotype_index=None,
        rnd_seed=0, data_record_list=None, ghost_index=None, original_data_record_list=None):
        '''
        Main function to compute shannon/transfer/sample entropy performace        
        '''

        self.tim = self.timing.init_tictoc()

        self.genotype_population = genotype_population
        self.genotype_index = genotype_index
        self.random_state = RandomState(rnd_seed)
        self.rand_agent_indexes = []
        self.ghost_index = ghost_index

        # fill rand_agent_indexes with n indexes i
        while len(self.rand_agent_indexes) != self.num_random_pairings:
            next_rand_index = self.random_state.randint(len(self.genotype_population))
            if next_rand_index != self.genotype_index:
                self.rand_agent_indexes.append(next_rand_index)

        num_simulations = max(1, self.num_random_pairings)        
        sim_performances = []

        for self.sim_index in range(num_simulations):

            self.data_record = None 
            if data_record_list is not None: 
                self.data_record = {}
                data_record_list.append(self.data_record)
            self.original_data_record = None if original_data_record_list is None else original_data_record_list[self.sim_index]
            self.values_for_computing_entropy = [] # initialized in init_values_for_computing_entropy

            if self.genotype_population is not None:            
                self.set_agents_genotype_phenotype()    
                self.timing.add_time('SIM_init_agent_phenotypes', self.tim)    

            trial_performances = []
            self.signal_strength_agents = [None, None]
            self.emitter_agents = [None, None]
            self.prev_delta_xy_agents, self.prev_angle_agents = None, None # pylint: disable=W0612
            self.init_values_for_computing_entropy()

            # INITIALIZE DATA RECORD
            self.init_data_record()        

            # EXPERIMENT START
            for t in range(self.num_trials):

                # SETUP AGENTS FOR TRIAL
                self.prepare_agents_for_trial(t)            
                
                # initialize prev_delta_xy with zeros (zero dispacement)
                self.prev_delta_xy_agents = [np.array([0.,0.]), np.array([0.,0.])]            
                # initialize prev_angle as initial angle of each agent
                self.prev_angle_agents = [self.agents_pair_body[a].angle for a in range(2)]                        
                
                # INIT DATA for TRIAL
                self.init_data_record_trial(t)           

                self.save_data_record(t, 0)

                # TRIAL START
                for i in range(1, self.num_data_points):                

                    # 1) Agent senses strength of emitter from the two sensors
                    self.compute_signal_strength_agents() # deletece dist_centers

                    # 2) compute brain input
                    self.compute_brain_input_agents()

                    # 3) Update agent's neural system
                    self.compute_brain_euler_step_agents()

                    # 4) Agent updates wheels and  emitter
                    self.update_wheels_emitter_agents(t,i)                            

                    # 5) Move one step  agents
                    self.move_one_step_agents(t, i)

                    # 6) Store the values for computing entropy
                    self.store_values_for_entropy(t,i)  # deletece dist_centers

                    self.save_data_record(t, i)             

                # TRIAL END

                if self.concatenate and t!=self.num_trials-1:
                    # do not compute performance until the last trial
                    continue

                performance_agent_AB = self.compute_performance(t)

                if self.num_random_pairings==0:
                    # when agent sare evolved in pairs the 
                    # performance is the mean between the two agents
                    agents_perf = np.mean(performance_agent_AB)
                else:
                    # otherwise it's the performance of the first agent
                    agents_perf = np.mean(performance_agent_AB[0])

                # appending mean performance between two agents in trial_performances
                trial_performances.append(agents_perf)

                self.timing.add_time('SIM_compute_performace', self.tim)

            # SIMULATION END

            # returning mean performances between all trials
            sim_perf = np.mean(trial_performances)
            sim_performances.append(sim_perf)       
            if self.data_record:
                self.data_record['summary'] = {
                    'rand_agent_indexes': self.rand_agent_indexes,
                    'performance_trials': trial_performances,
                    'performance_sim': sim_perf
                }
        
        return np.mean(sim_performances)

    '''
    POPULATION EVALUATION FUNCTION
    '''
    def evaluate(self, population, random_seeds):                
        population_size = len(population)
        assert population_size == len(random_seeds)

        if self.num_cores > 1:
            # run parallel job
            sim_array = [Simulation(**asdict(self)) for _ in range(self.num_cores)]
            performances = Parallel(n_jobs=self.num_cores)( # prefer="threads" does not work
                delayed(sim_array[i%self.num_cores].run_simulation)(population, i, rnd_seed) \
                for i, (_, rnd_seed) in enumerate(zip(population, random_seeds))
            )
        else:
            # single core
            performances = [
                self.run_simulation(population, i, rnd_seed)
                for i, (_, rnd_seed) in enumerate(zip(population, random_seeds))
             ]

        return performances
Example #59
0
class TestAnalyzer:

    def setUp(self):

        self.prng = RandomState(133)

        self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
                                         'f1': self.prng.normal(0, 1, 10),
                                         'f2': self.prng.normal(1, 0.1, 10),
                                         'f3': self.prng.normal(2, 0.1, 10),
                                         'group': ['group1'] * 10},
                                        index=range(0, 10))

        self.df_features_same_score = self.df_features.copy()
        self.df_features_same_score[['sc1']] = [3] * 10

        self.df_features_with_groups = self.df_features.copy()
        self.df_features_with_groups['group'] = ['group1']*5 + ['group2']*5

        self.df_features_with_groups_and_length = self.df_features_with_groups.copy()
        self.df_features_with_groups_and_length['length'] = self.prng.normal(50, 250, 10)

        self.human_scores = pd.Series(self.prng.randint(1, 5, size=10))
        self.system_scores = pd.Series(self.prng.random_sample(10) * 5)
        self.same_human_scores = pd.Series([3] * 10)

        # get the directory containing the tests
        self.test_dir = dirname(__file__)

    def test_correlation_helper(self):

        # test that there are no nans for data frame with 10 values
        retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 0)

    def test_correlation_helper_for_data_with_one_row(self):
        # this should return two data frames with nans
        retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 3)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_two_rows(self):
        # this should return 1/-1 for marginal correlations and nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group')
        assert_equal(abs(retval[0].values).sum(), 3)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_three_rows(self):
        # this should compute marginal correlations but return Nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_four_rows(self):
        # this should compute marginal correlations and return a unity
        # matrix for partial correlations
        # it should also raise a UserWarning
        with warnings.catch_warnings(record=True) as warning_list:
            retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_almost_equal(np.abs(retval[1].values).sum(), 0.9244288637889855)
        assert issubclass(warning_list[-1].category, UserWarning)



    def test_correlation_helper_for_data_with_groups(self):
        retval = Analyzer.correlation_helper(self.df_features_with_groups, 'sc1', 'group')
        assert_equal(len(retval[0]), 2)
        assert_equal(len(retval[1]), 2)


    def test_correlation_helper_for_one_group_with_one_row(self):
        # this should return a data frames with nans for group with 1 row
        retval = Analyzer.correlation_helper(self.df_features_with_groups[:6], 'sc1', 'group')
        assert_equal(len(retval[0]), 2)
        assert_equal(len(retval[1]), 2)
        assert_equal(retval[0].isnull().values.sum(), 3)

    def test_correlation_helper_for_groups_and_length(self):
        retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length,
                                             'sc1', 'group', include_length=True)
        for df in retval:
            assert_equal(len(df), 2)
            assert_equal(len(df.columns), 3)


    def test_correlation_helper_for_group_with_one_row_and_length(self):
        # this should return a data frames with nans for group with 1 row
        retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length[:6],
                                             'sc1', 'group', include_length=True)
        for df in retval:
            assert_equal(len(df), 2)
            assert_equal(len(df.columns), 3)



    def test_that_correlation_helper_works_for_data_with_the_same_human_score(self):
        # this test should raise UserWarning because the determinant is very close to
        # zero. It also raises Runtime warning because
        # variance of human scores is 0.
        with warnings.catch_warnings(record=True) as warning_list:
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group')
            assert_equal(retval[0].isnull().values.sum(), 3)
            assert_equal(retval[1].isnull().values.sum(), 3)
            assert issubclass(warning_list[-1].category, UserWarning)


    def test_that_metrics_helper_works_for_data_with_one_row(self):
        # There should be NaNs for SMD, correlations and both sds
        # note that we will get a value for QWK since we are
        # dividing by N and not N-1
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.human_scores[0:1],
                                            self.system_scores[0:1])
            assert_equal(evals.isnull().values.sum(), 5)

    def test_that_metrics_helper_works_for_data_with_the_same_label(self):
        # There should be NaNs for correlation and SMD.
        # Note that for a dataset with a single response
        # kappas will be 0 or 1
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.same_human_scores,
                                            self.system_scores)
            assert_equal(evals.isnull().values.sum(), 2)

    def test_metrics_helper_population_sds(self):
        df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv'))
        # compute the metrics when not specifying the population SDs
        computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'])
        expected_metrics1 = pd.Series({'N': 500.0,
                                       'R2': 0.65340566606389394,
                                       'RMSE': 0.47958315233127197,
                                       'SMD': 0.03679030063229779,
                                       'adj_agr': 100.0,
                                       'corr': 0.82789026370069529,
                                       'exact_agr': 77.0,
                                       'h_max': 6.0,
                                       'h_mean': 3.4199999999999999,
                                       'h_min': 1.0,
                                       'h_sd': 0.81543231461565147,
                                       'kappa': 0.6273493195074531,
                                       'sys_max': 6.0,
                                       'sys_mean': 3.4500000000000002,
                                       'sys_min': 1.0,
                                       'sys_sd': 0.81782496620652367,
                                       'wtkappa': 0.8273273273273274})

        # and now compute them specifying the population SDs
        computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'],
                                                    population_human_score_sd=0.5,
                                                    population_system_score_sd=0.4,
                                                    smd_method='williamson')
        # the only number that should change is the SMD
        expected_metrics2 = expected_metrics1.copy()
        expected_metrics2['SMD'] = 0.066259

        assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index())
        assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index())


    def test_metrics_helper_zero_system_sd(self):
        human_scores = [1, 3, 4, 2, 3, 1, 3, 4, 2, 1]
        system_score = [2.54] * 10
        computed_metrics1 = Analyzer.metrics_helper(human_scores,
                                                    system_score)
        expected_metrics1 = pd.Series({'N': 10,
                                       'R2': -0.015806451612903283,
                                       'RMSE': 1.122319027727856,
                                       'SMD': 0.11927198519188371,
                                       'adj_agr': 50.0,
                                       'corr': None,
                                       'exact_agr': 0,
                                       'h_max': 4,
                                       'h_mean': 2.4,
                                       'h_min': 1.0,
                                       'h_sd': 1.1737877907772674,
                                       'kappa': 0,
                                       'sys_max': 2.54,
                                       'sys_mean': 2.54,
                                       'sys_min': 2.54,
                                       'sys_sd': 0,
                                       'wtkappa': 0})
        # now compute DSM
        computed_metrics2 = Analyzer.metrics_helper(human_scores,
                                                    system_score,
                                                    use_diff_std_means=True)

        # the only number that should change is the SMD
        expected_metrics2 = expected_metrics1.copy()
        expected_metrics2.drop("SMD", inplace=True)
        expected_metrics2['DSM'] = None
        assert_series_equal(computed_metrics1.sort_index(),
                            expected_metrics1.sort_index(),
                            check_dtype=False)
        assert_series_equal(computed_metrics2.sort_index(),
                            expected_metrics2.sort_index(),
                            check_dtype=False)


    def test_compute_pca_less_samples_than_features(self):
        # test pca when we have less samples than
        # features. In this case the number of components
        # equals to the number of samples.
        df = pd.DataFrame({'a': range(50)})
        for i in range(100):
            df[i] = df['a'] * i
        (components, variance) = Analyzer.compute_pca(df, df.columns)
        assert_equal(len(components.columns), 50)
        assert_equal(len(variance.columns), 50)

    def test_compute_disattenuated_correlations_single_human(self):
        hm_corr = pd.Series([0.9, 0.8, 0.6],
                            index=['raw', 'raw_trim', 'raw_trim_round'])
        hh_corr = pd.Series([0.81], index=[''])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0)

    def test_compute_disattenuated_correlations_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64, 0.36],
                            index=['All data', 'GROUP1', 'GROUP2'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0])

    def test_compute_disattenuated_correlations_single_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81],
                            index=['All data'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_mismatched_indices(self):
        hm_corr = pd.Series([0.9, 0.6],
                            index=['All data', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_negative_human(self):
        hm_corr = pd.Series([0.9, 0.8],
                            index=['All data', 'GROUP1'])
        hh_corr = pd.Series([-0.03, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 2)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
Example #60
0
experiment setup, running & evaluation
CK 2014
"""
import copy
import numpy as np
import networkx as nx
import hcWrapSim as wrp
import hcNetworks as net
import hcPlotting as plo
from hcUtil import printprogress
from matplotlib import pyplot as plt
from numpy.random import RandomState

# provide a fixed set of random seeds
rng = RandomState(1)
standard_seeds = rng.randint(0, 100000, 200)


class measure:
    def __init__(self,
                 roi=None,
                 wait=0,
                 name=None,
                 window=None,
                 windowlength=1000,
                 increment=1):
        """
        A measure runs some analysis on raw simulation data & remembers the result.
        It accumulates results when applied repeatedly.

        Args: