Beispiel #1
0
def _smepdpsolve_single_trajectory(L, dt, tlist, N_store, N_substeps, rho_t,
                                   c_ops, e_ops, data):
    """ 
    Internal function.
    """
    states_list = []

    rho_t = np.copy(rho_t)

    prng = RandomState() # todo: seed it
    r_jump, r_op = prng.rand(2)

    jump_times = []
    jump_op_idx = []

    for t_idx, t in enumerate(tlist):

        if e_ops:
            for e_idx, e in enumerate(e_ops):
                data.expect[e_idx, t_idx] += expect_rho_vec(e, rho_t)
        else:
            states_list.append(Qobj(vec2mat(rho_t)))

        for j in range(N_substeps):

            if expect_rho_vec(d_op, sigma_t) < r_jump:
                # jump occurs
                p = np.array([rho_expect(c.dag() * c, rho_t) for c in c_ops])
                p = np.cumsum(p / np.sum(p))
                n = np.where(p >= r_op)[0][0]

                # apply jump
                rho_t = c_ops[n] * psi_t * c_ops[n].dag()
                rho_t /= rho_expect(c.dag() * c, rho_t)
                rho_t = np.copy(rho_t)

                # store info about jump
                jump_times.append(tlist[t_idx] + dt * j)
                jump_op_idx.append(n)

                # get new random numbers for next jump
                r_jump, r_op = prng.rand(2)

            # deterministic evolution wihtout correction for norm decay
            dsigma_t = spmv(L.data.data,
                            L.data.indices,
                            L.data.indptr, sigma_t) * dt

            # deterministic evolution with correction for norm decay
            drho_t = spmv(L.data.data,
                          L.data.indices,
                          L.data.indptr, rho_t) * dt

            rho_t += drho_t

            # increment density matrices
            sigma_t += dsigma_t
            rho_t += drho_t

    return states_list, jump_times, jump_op_idx
Beispiel #2
0
def create(seed,
           head_prob = 0.8,
           two_col_prob = 0.3,
           section_range = [5,9]):
    '''
    Creates the same html for a given seed
    '''
    rand = RandomState(seed)
    soup = BeautifulSoup(_template, 'html.parser')
    if rand.rand() < head_prob:
        soup.body.insert(0, create_header(rand, soup, level=1))
    content = soup.body.div
    if rand.rand() < two_col_prob:
        content['class'] = 'col2'
    def append_section(new_elem, header_level = 0):
        div = soup.new_tag('div')
        if header_level > 0:
            div.append(create_header(rand, soup, level = header_level))
        div.append(new_elem)
        content.append(div)
    actions = [lambda:append_section(create_paragraph(rand, soup)),
               lambda:append_section(create_table(rand, soup), header_level = 3),
               lambda:append_section(create_list(rand, soup), header_level = 3)]
    section_count = sample_discrete_normal(rand, *section_range)
    for _sec_i in xrange(section_count):
        action = rand.choice(actions)
        action()
        
    return soup
def main():
    batch_size = 8
    x = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='x-input')
    y = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='y-input')

    w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
    y_hat = tf.matmul(x, w1)

    # loss
    loss_less = 10
    loss_more = 1
    loss = tf.reduce_sum(tf.where(tf.greater(y, y_hat), (y - y_hat) * loss_more, (y_hat - y) * loss_less))

    train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

    rdm = RandomState(1)

    dataset_size = 128
    X = rdm.rand(dataset_size, 2)
    Y = [[x1 + x2 + rdm.rand() / 10 - 0.05] for (x1, x2) in X]

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        Steps = 5001
        for i in range(Steps):
            start = (i * batch_size) % dataset_size
            end = min(start + batch_size, dataset_size)
            sess.run(train_step, feed_dict={x: X[start:end], y: Y[start:end]})
            if i % 100 == 0:
                print(sess.run(w1))
def test_experiment_sample_windows():
    data_rng = RandomState(398765905)
    rand_topo = data_rng.rand(200,10,10,3).astype(np.float32)
    rand_y = np.int32(data_rng.rand(200) > 0.5)
    rand_topo[rand_y == 1] += 0.1
    rand_set = DenseDesignMatrixWrapper(topo_view=rand_topo, y=rand_y)
    
    lasagne.random.set_rng(RandomState(9859295))
    in_layer = InputLayer(shape= [None, 10,5,3])
    network = DenseLayer(incoming=in_layer, name='softmax',
        num_units=2, nonlinearity=lasagne.nonlinearities.softmax)
    updates_modifier = MaxNormConstraint({'softmax': 0.5})
    
    dataset = rand_set
    
    dataset_iterator = WindowsIterator(n_samples_per_window=5, 
                                             batch_size=60)
    
    preprocessor = OnlineAxiswiseStandardize(axis=['c', 1])
    dataset_splitter=FixedTrialSplitter(n_train_trials=150, valid_set_fraction=0.1)
    updates_var_func=lasagne.updates.adam
    loss_var_func= lasagne.objectives.categorical_crossentropy
    monitors=[braindecode.veganlasagne.monitors.LossMonitor (),
                    braindecode.veganlasagne.monitors.WindowMisclassMonitor(),
                    braindecode.veganlasagne.monitors.RuntimeMonitor()]
    stop_criterion= braindecode.veganlasagne.stopping.MaxEpochs(num_epochs=5)
    
    
    exp = Experiment(network, dataset, dataset_splitter, preprocessor,
              dataset_iterator, loss_var_func, updates_var_func, 
              updates_modifier, monitors, stop_criterion,
              remember_best_chan='valid_misclass',
              run_after_early_stop=True)
    exp.setup()
    exp.run()
    
    assert np.allclose(
        [0.629630,0.140741,0.029630,0.022222,0.000000,0.000000,0.000000],
        exp.monitor_chans['train_misclass'],
        rtol=1e-4, atol=1e-4)
    assert np.allclose(
        [0.400000,0.133333,0.066667,0.000000,0.000000,0.000000,0.000000],
        exp.monitor_chans['valid_misclass'],
        rtol=1e-4, atol=1e-4)
    assert np.allclose(
        [0.560000,0.060000,0.000000,0.000000,0.000000,0.000000,0.000000],
        exp.monitor_chans['test_misclass'],
        rtol=1e-4, atol=1e-4)
    assert np.allclose(
        [1.180485, 0.574264, 0.420023, 0.330909, 0.278569, 0.245692, 0.242845],
        exp.monitor_chans['train_loss'],
        rtol=1e-4, atol=1e-4)
    assert np.allclose(
        [1.016782, 0.514049, 0.370485, 0.288948, 0.240913, 0.211189, 0.215967],
        exp.monitor_chans['valid_loss'],
        rtol=1e-4, atol=1e-4)
    assert np.allclose(
        [1.031832, 0.504570, 0.352317, 0.269810, 0.223904, 0.196681, 0.197899],
        exp.monitor_chans['test_loss'],
        rtol=1e-4, atol=1e-4)
    def init_params(self, embed_map, count_dict, L):
        """
        Initializes embeddings and context matricies
        """
        prng = RandomState(self.seed)

        # Pre-trained word embedding matrix
        if embed_map != None:
            R = np.zeros((self.K, self.V))
            for i in range(self.V):
                word = count_dict[i]
                if word in embed_map:
                    R[:,i] = embed_map[word]
                else:
                    R[:,i] = embed_map['*UNKNOWN*']
            R = gpu.garray(R)
        else:
            r = np.sqrt(6) / np.sqrt(self.K + self.V + 1)
            R = prng.rand(self.K, self.V) * 2 * r - r
            R = gpu.garray(R)
        bw = gpu.zeros((1, self.V))

        # Context 
        C = 0.01 * prng.randn(self.context, self.K, self.K)
        C = gpu.garray(C)

        # Image context
        M = 0.01 * prng.randn(self.h, self.K)
        M = gpu.garray(M)

        # Hidden layer
        r = np.sqrt(6) / np.sqrt(self.D + self.h + 1)
        J = prng.rand(self.D, self.h) * 2 * r - r
        J = gpu.garray(J)
        bj = gpu.zeros((1, self.h))

        # Initial deltas used for SGD
        deltaR = gpu.zeros(np.shape(R))
        deltaC = gpu.zeros(np.shape(C))
        deltaB = gpu.zeros(np.shape(bw))
        deltaM = gpu.zeros(np.shape(M))
        deltaJ = gpu.zeros(np.shape(J))
        deltaBj = gpu.zeros(np.shape(bj))

        self.R = R
        self.C = C
        self.bw = bw
        self.M = M
        self.J = J
        self.bj = bj
        self.deltaR = deltaR
        self.deltaC = deltaC
        self.deltaB = deltaB
        self.deltaM = deltaM
        self.deltaJ = deltaJ
        self.deltaBj = deltaBj
Beispiel #6
0
def _smepdpsolve_single_trajectory(Heff, dt, tlist, N_store, N_substeps, rho_t,
                                   c_ops, e_ops, data):
    """ 
    Internal function.
    """
    states_list = []

    raise NotImplemented("SME PDP solver not yet completed")

    phi_t = np.copy(psi_t)

    prng = RandomState() # todo: seed it
    r_jump, r_op = prng.rand(2)

    jump_times = []
    jump_op_idx = []

    for t_idx, t in enumerate(tlist):

        if e_ops:
            for e_idx, e in enumerate(e_ops):
                data.expect[e_idx, t_idx] += _rho_expect(e, rho_t)
        else:
            states_list.append(Qobj(rho_t))

        for j in range(N_substeps):

            if _rho_expect(d_op, sigma_t) < r_jump:
                # jump occurs
                p = np.array([rho_expect(c.dag() * c, rho_t) for c in c_ops])
                p = np.cumsum(p / np.sum(p))
                n = np.where(p >= r_op)[0][0]

                # apply jump
                rho_t = c_ops[n] * psi_t * c_ops[n].dag()
                rho_t /= rho_expect(c.dag() * c, rho_t)
                rho_t = np.copy(rho_t)

                # store info about jump
                jump_times.append(tlist[t_idx] + dt * j)
                jump_op_idx.append(n)

                # get new random numbers for next jump
                r_jump, r_op = prng.rand(2)

            # deterministic evolution wihtout correction for norm decay

            # deterministic evolution with correction for norm decay

            # increment wavefunctions
            sigma_t += dsigma_t
            rho_t += drho_t

    return states_list, jump_times, jump_op_idx
Beispiel #7
0
def load_dataset(max_sent_len=42):
    train_data = []
    valid_data = []

    rnd = RandomState(42)
    # for file_path in islice(sorted(glob.glob('data/valid/*'), key=lambda k: rnd.rand()), 0, 40):
    for file_path in islice(sorted(glob.glob('data/valid/*'), key=lambda k: rnd.rand()), 0, 1):
        print file_path
        with open(file_path) as f:
            for line in f:
                line = line.decode('utf-8').split()
                if len(line) < max_sent_len:
                    valid_data.append(line)

    print '===train===='
    # for file_path in islice(sorted(glob.glob('data/train/*'), key=lambda k: rnd.rand()), 0, 40):
    for file_path in islice(sorted(glob.glob('data/train/*'), key=lambda k: rnd.rand()), 0, 1):
        with open(file_path) as f:
            print file_path
            for line in f:
                line = line.decode('utf-8').split()
                if len(line) < max_sent_len:
                    train_data.append(line)

    # vocab = Counter()
    # for line in chain(train_data, valid_data):
    #     vocab.update(line)
    # print vocab.most_common(n=int(len(vocab) * 1.0))[-10:]
    # print vocab.most_common(n=int(len(vocab) * 0.085))[-10:]
    # vocab = set([e[0] for e in vocab.most_common(n=int(len(vocab) * 0.085))])
    # vocab.update(['<UNK>'])
    # word_to_idx = {}
    # idx_to_word = []
    # for i, word in enumerate(vocab):
    #     word_to_idx[word] = i
    #     idx_to_word.append(word)
    # word_to_idx['<<S>>'] = i + 1
    # idx_to_word.append('<<S>>')
    # unk_idx = word_to_idx['<UNK>']

    with open('vocab.pckl') as f:
        vocab = cPickle.load(f)
    word_to_idx = vocab['word_to_idx']
    idx_to_word = vocab['idx_to_word']
    unk_idx = word_to_idx['<UNK>']

    for i in xrange(len(train_data)):
        train_data[i] = [word_to_idx.get(e, unk_idx) for e in train_data[i]]
    for i in xrange(len(valid_data)):
        valid_data[i] = [word_to_idx.get(e, unk_idx) for e in valid_data[i]]

    return train_data, valid_data, word_to_idx, idx_to_word
def test_pad_and_unpad_equal_1d():
    'gridder.pad_array and subsequent .unpad_array gives original array: 1D'
    prng = RandomState(12345)
    x = prng.rand(21)
    xpad, nps = gridder.pad_array(x)
    xunpad = gridder.unpad_array(xpad, nps)
    assert_almost(xunpad, x)
Beispiel #9
0
class RandomGenerator(object):
    def __init__(self, seed=None):
        self._random = RandomState(seed=seed)

    def random(self):
        return self._random.rand()

    def randint(self, a, b=None):
        if b is None:
            b = a
            a = 0
        r = self._random.randint(a, high=b, size=1)
        return r[0]

    def sample(self, population, k):
        if k == 0:
            return []
        return self._random.choice(population, size=k, replace=False)

    def __getattr__(self, attr):
        return getattr(self._random, attr)

    def __getstate__(self):
        return {'_random': self._random}

    def __setstate__(self, d):
        self._random = d['_random']
Beispiel #10
0
def stitch(targets,images):
    mask = rois_mask(targets) # True where image data is
    gaps_mask = mask==False # True where infill needs to go
    # compute bounds relative to the camera field
    (x,y,w,h) = stitched_box(targets)
    uroi = img_as_float(stitch_raw(targets,images,(x,y,w,h))) # stitch with black infill

    # step 1: sparsely sample background mostly ignoring blob
    # compute gradient on both axes
    k = [[-3,-1,0,1,3],
         [-3,-1,0,1,3],
         [-3,-1,0,1,3],
         [-3,-1,0,1,3]]
    gy = convolve(uroi,k)
    gx = convolve(uroi,np.rot90(k))
    # ignore all but low-gradient areas
    bg = (abs(gy+gx) < 0.2) & mask

    # step 2: remove less contiguous areas
    filter_size = max(2,int(max(h,w)/200))
    mf = minimum_filter(bg*1,filter_size)

    # step 3: interpolate between samples
    z = inpaint(uroi*mf,mf==False)

    # step 4: subsample and re-interpolate to degrade artifacts in fill region
    random = RandomState(0)
    (h,w)=z.shape
    ng = random.rand(h,w) < 0.01
    z2 = inpaint(z*ng,ng==False)

    # step 5: final composite
    roi = (z2 * gaps_mask) + uroi
    return (roi * 255).astype(np.uint8), mask
def test_sum_prediction():
    """ Test with a model that predicts sum over four samples """
    rng = RandomState(3904890384)
    n_samples_in_buffer = 1000
    dataset = rng.rand(n_samples_in_buffer*2,5).astype(np.float32)
    markers = np.ones((n_samples_in_buffer*2,1)).astype(np.float32)
    set_and_markers = np.concatenate((dataset, markers), axis=1)
    
    factor_new=0.001
    n_stride = 10
    pred_freq = 11
    standardized = exponential_running_standardize(dataset,
        factor_new=factor_new, init_block_size=n_stride)
    model = InputLayer([1,1,4,1])
    model = GlobalPoolLayer(model,pool_function=T.sum)
    
    expected = [np.sum(standardized[stop-4:stop], axis=0) for stop in xrange(11, dataset.shape[0], 11)]
    expected = np.array(expected)
    
    processor = StandardizeProcessor(factor_new=factor_new,
        n_samples_in_buffer=n_samples_in_buffer)
    
    online_model = OnlineModel(model)
    online_pred = OnlineCoordinator(processor, online_model, pred_freq=pred_freq,
        trainer=NoTrainer())
    
    online_pred.initialize(n_chans=dataset.shape[1])
    all_preds = []
    for i_start_sample in xrange(0,dataset.shape[0]-n_stride+1,n_stride):
        online_pred.receive_samples(set_and_markers[i_start_sample:i_start_sample+n_stride])
        if online_pred.has_new_prediction():
            pred, _ = online_pred.pop_last_prediction_and_sample_ind()
            all_preds.append(pred)
        
    assert np.allclose(np.array(all_preds).squeeze(), expected, rtol=1e-3)
def test_online_predictor():
    """ Test whether predictions are done at correct timepoints.
    Model actually just returns input """
    
    rng = RandomState(3904890384)
    n_samples_in_buffer = 1000
    dataset = rng.rand(n_samples_in_buffer*2,5).astype(np.float32)
    markers = np.ones((n_samples_in_buffer*2,1)).astype(np.float32)
    set_and_markers = np.concatenate((dataset, markers), axis=1)
    
    factor_new=0.001
    n_stride = 10
    pred_freq = 11
    standardized = exponential_running_standardize(dataset,
        factor_new=factor_new, init_block_size=n_stride)
    model = InputLayer([1,1,1,1])
    
    processor = StandardizeProcessor(factor_new=factor_new,
        n_samples_in_buffer=n_samples_in_buffer)
    
    online_model = OnlineModel(model)
    online_pred = OnlineCoordinator(processor, online_model, pred_freq=pred_freq,
        trainer=NoTrainer())
    
    online_pred.initialize(n_chans=dataset.shape[1])
    all_preds = []
    for i_start_sample in xrange(0,dataset.shape[0]-n_stride+1,n_stride):
        online_pred.receive_samples(set_and_markers[i_start_sample:i_start_sample+n_stride])
        if online_pred.has_new_prediction():
            pred, _ = online_pred.pop_last_prediction_and_sample_ind()
            all_preds.append(pred)
        
    assert np.array_equal(np.array(all_preds).squeeze(), standardized[10::pred_freq])
    def execute(method, seed, output):
        # size of the output image
        size_x, size_y = (768, 512)

        # output format selection
        if output['mime_type'] == "image/png":
            extension = ".png"
            driver = gdal.GetDriverByName("PNG")
            options = []
        elif output['mime_type'] == "image/jpeg":
            extension = ".jpg"
            driver = gdal.GetDriverByName("JPEG")
            options = []
        elif output['mime_type'] == "image/tiff":
            extension = ".tif"
            driver = gdal.GetDriverByName("GTiff")
            options = ["TILED=YES", "COMPRESS=DEFLATE", "PHOTOMETRIC=RGB"]
        else:
            ExecuteError("Unexpected output format received! %r" % output)

        # generate a random in-memory GDAL dataset
        mem_driver = gdal.GetDriverByName("MEM")
        mem_ds = mem_driver.Create("", size_x, size_y, 3, gdal.GDT_Byte)
        random_state = RandomState(seed)
        for i in xrange(3):
            mem_ds.GetRasterBand(i+1).WriteArray(
                (256.0 * random_state.rand(size_y, size_x)).astype('uint8')
            )

        # convert in-memory dataset to the desired output
        tmp_filename = os.path.join("/tmp", str(uuid.uuid4()) + extension)
        output_filename = "test03_binary_complex" + extension

        try:
            driver.CreateCopy(tmp_filename, mem_ds, 0, options)
            del mem_ds

            if method == 'file':
                # Return object as a temporary Complex Data File.
                # None that the object holds the format attributes!
                # The 'filename' parameter sets the raw output
                # 'Content-Disposition: filename=' HTTP header.
                return CDFile(tmp_filename, filename=output_filename, **output)

            elif method == 'in-memory-buffer':
                # Return object as an in-memory Complex Data Buffer.
                # None that the object holds the format attributes!
                # The 'filename' parameter sets the raw output
                # 'Content-Disposition: filename=' HTTP header.
                with file(tmp_filename) as fid:
                    _output = CDByteBuffer(
                        fid.read(), filename=output_filename, **output
                    )
                os.remove(tmp_filename)
                return _output
        except:
            # make sure no temporary file is left in case of an exception
            if os.path.isfile(tmp_filename):
                os.remove(tmp_filename)
            raise
def noisy_remove(G, p, seed=None):
	from numpy.random import RandomState
	prng = RandomState(seed)
	G_copy = G.copy()
	for (a,b) in G_copy.edges():
		if prng.rand() < p and len(G_copy[a])>1 and len(G_copy[b])>1:
			G_copy.remove_edges_from([(a,b)])
	return G_copy
def test_sim_corr():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = sim_corr(x, y, group, seed=prng)
    res2 = sim_corr(x, y, group)
    np.testing.assert_equal(res1[0], res2[0])
def test_corrcoef():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = corrcoef(x, y, group)
    res2 = corrcoef(x, y, group)
    np.testing.assert_equal(res1, res2)
Beispiel #17
0
def test_coordinatevec_padding_1d():
    'gridder.padcoords accurately pads coordinate vector in 1D'
    prng = RandomState(12345)
    f = prng.rand(72) * 10
    x = np.arange(100, 172)
    fpad, nps = gridder.pad_array(f)
    N = gridder.pad_coords(x, f.shape, nps)
    npt.assert_allclose(N[0][nps[0][0]:-nps[0][1]], x)
Beispiel #18
0
    def init_params(self, embed_map, count_dict, L):
        """
        Initializes embeddings and context matricies
        """
        prng = RandomState(self.seed)

        # Pre-trained word embedding matrix
        if embed_map != None:
            R = np.zeros((self.K, self.V))
            for i in range(self.V):
                word = count_dict[i]
                if word in embed_map:
                    R[:,i] = embed_map[word]
#                else:
#                    R[:,i] = embed_map['*UNKNOWN*']
        else:
            r = np.sqrt(6) / np.sqrt(self.K + self.V + 1)
            R = prng.rand(self.K, self.V) * 2 * r - r

        bw = np.zeros((1, self.V))

        # Context 
        C = 0.01 * prng.randn(self.context, self.K, self.K)

        # Image context
        M = 0.01 * prng.randn(self.h, self.K)

        # Hidden layer
        r = np.sqrt(6) / np.sqrt(self.D + self.h + 1)
        J = prng.rand(self.D, self.h) * 2 * r - r
        bj = np.zeros((1, self.h))

        R = theano.shared(R.astype(theano.config.floatX), borrow=True)
        C = theano.shared(C.astype(theano.config.floatX), borrow=True)
        bw = theano.shared(bw.astype(theano.config.floatX), borrow=True)
        M = theano.shared(M.astype(theano.config.floatX), borrow=True)
        J = theano.shared(J.astype(theano.config.floatX), borrow=True)
        bj = theano.shared(bj.astype(theano.config.floatX), borrow=True)

        self.R = R
        self.C = C
        self.bw = bw
        self.M = M
        self.J = J
        self.bj = bj
 def load(self):
     rng = RandomState(np.uint64(hash("RandomDataset")))
     input_shape = [500,3,600,1]
     
     y = rng.random_integers(0,1,size=input_shape[0])
     y = OneHotFormatter(2).format(y)
     topo_view = rng.rand(*input_shape)
     super(RandomDataset, self).__init__(topo_view=topo_view, y=y, 
                                           axes=('b', 'c', 0, 1))
    def test_moving_average_convergence_divergence(self,
                                                   seed,
                                                   fast_period,
                                                   slow_period,
                                                   signal_period):
        rng = RandomState(seed)

        nassets = 3

        macd = MovingAverageConvergenceDivergenceSignal(
            fast_period=fast_period,
            slow_period=slow_period,
            signal_period=signal_period,
        )

        today = pd.Timestamp('2016', tz='utc')
        assets = pd.Index(np.arange(nassets))
        out = np.empty(shape=(nassets,), dtype=np.float64)
        close = rng.rand(macd.window_length, nassets)

        macd.compute(
            today,
            assets,
            out,
            close,
            fast_period,
            slow_period,
            signal_period,
        )

        close_df = pd.DataFrame(close)
        fast_ewma = self.expected_ewma(
            close_df,
            fast_period,
        )
        slow_ewma = self.expected_ewma(
            close_df,
            slow_period,
        )
        signal_ewma = self.expected_ewma(
            fast_ewma - slow_ewma,
            signal_period
        )

        # Everything but the last row should be NaN.
        self.assertTrue(signal_ewma.iloc[:-1].isnull().all().all())

        # We're testing a single compute call, which we expect to be equivalent
        # to the last row of the frame we calculated with pandas.
        expected_signal = signal_ewma.values[-1]

        np.testing.assert_almost_equal(
            out,
            expected_signal,
            decimal=8
        )
Beispiel #21
0
def test_random_choice_error():
    """random_choice should raise an error when probabilities do not sum
    to one."""

    rstate = RandomState(0)
    p = rstate.rand(10)
    p /= p.sum()
    p *= 1.001
    with pytest.raises(ValueError):
        nestle.random_choice(10, p=p, rstate=rstate)
Beispiel #22
0
def test_fails_if_npd_lessthan_arraydim():
    'gridder.pad_array raises error if given npad is less than array length'
    shape = (101, 172)
    x, y, z = gridder.regular((-5000., 5000., -5000., 5000.), shape, z=-150)
    g = z.reshape(shape)
    npdt = (128, 128)
    raises(ValueError, gridder.pad_array, g, npd=npdt)
    prng = RandomState(12345)
    g = prng.rand(20)
    npdt = 16
    raises(ValueError, gridder.pad_array, g, npd=npdt)
Beispiel #23
0
def test_fails_if_npd_incorrect_dimension():
    'gridder.pad_array raises error if given improper dimension on npadding'
    s = (101, 172)
    x, y, z = gridder.regular((-5000., 5000., -5000., 5000.), s, z=-150)
    g = z.reshape(s)
    npdt = 128
    raises(ValueError, gridder.pad_array, g, npd=npdt)
    npdt = (128, 256, 142)
    raises(ValueError, gridder.pad_array, g, npd=npdt)
    prng = RandomState(12345)
    g = prng.rand(50)
    raises(ValueError, gridder.pad_array, g, npd=npdt)
Beispiel #24
0
def create_fake_skll_learner(df_coefficients):

    """
    Create fake SKLL linear regression learner object
    using the coefficients in the given data frame.

    Parameters
    ----------
    df_coefficients : pandas DataFrame
        Data frame containing the linear coefficients
        we want to create the fake SKLL model with.

    Returns
    -------
    learner: skll Learner object
        SKLL LinearRegression Learner object containing
        with the specified coefficients.
    """

    # get the logger
    logger = logging.getLogger(__name__)

    # initialize a random number generator
    randgen = RandomState(1234567890)

    # iterate over the coefficients
    coefdict = {}
    for feature, coefficient in df_coefficients.itertuples(index=False):
        if feature == 'Intercept':
            intercept = coefficient
        else:
            # exclude NA coefficients
            if coefficient == np.nan:
                logger.warning("No coefficient was estimated for "
                               "{}. This is likely due to exact "
                               "collinearity in the model. This "
                               "feature will not be used for model "
                               "building".format(feature))
            else:
                coefdict[feature] = coefficient

    learner = Learner('LinearRegression')
    num_features = len(coefdict)  # excluding the intercept
    fake_feature_values = randgen.rand(num_features)
    fake_features = [dict(zip(coefdict, fake_feature_values))]
    fake_fs = FeatureSet('fake', ids=['1'], labels=[1.0], features=fake_features)
    learner.train(fake_fs, grid_search=False)

    # now create its parameters from the coefficients from the built-in model
    learner.model.coef_ = learner.feat_vectorizer.transform(coefdict).toarray()[0]
    learner.model.intercept_ = intercept
    return learner
Beispiel #25
0
def test_random_choice():
    """nestle.random_choice() is designed to mimic np.random.choice(),
    for numpy < v1.7.0. In cases where we have both, test that they agree.
    """
    rstate = RandomState(0)
    p = rstate.rand(10)
    p /= p.sum()
    for seed in range(10):
        rstate.seed(seed)
        i = rstate.choice(10, p=p)
        rstate.seed(seed)
        j = nestle.random_choice(10, p=p, rstate=rstate)
        assert i == j
def test_sim_corr():
    prng = RandomState(42)
    x = prng.rand(10)
    y = x
    group = prng.randint(3, size=10)
    res1 = sim_corr(x, y, group, seed=prng, reps=100)
    res2 = sim_corr(x, y, group, seed=prng, alternative='less', reps=100)
    res3 = sim_corr(x, y, group, seed=prng, alternative='two-sided', reps=100)
    
    assert_almost_equal(res1[0], 1-res2[0])
    assert_equal(res1[1], res2[1])
    assert_equal(res1[1], res3[1])
    assert_equal(res1[0], res3[0])
Beispiel #27
0
class ListaSet:
    def __init__(self, paramfile):
        self.param = {'paramfile': paramfile}
        plines=readLines(paramfile)
        for l in plines:
            l=l.rstrip().split()
            self.param[l[0]]=l[1]
        self.param['outsize']=int(self.param['outsize'])
 	print self.param
        print "ListaPrvd_full with paramfile:", paramfile

        self.indim=self.param['outsize']**2*16
        self.outdim=self.param['outsize']**2
        self.datanum=2
        self.prng = RandomState(1234567890)
        print '%d samples found' % self.datanum

    def get_num_images(self):
        return self.datanum
    
    def get_input_dim(self):
        return self.indim

    def get_output_dim(self):
        return self.outdim

    def get_input(self, idx):
        #print 'get_input!'
        res=np.zeros((self.param['outsize'], self.param['outsize'], 16), dtype=float)
        res[:,:,0:2]=(self.prng.rand(self.param['outsize'], self.param['outsize'], 2)-0.5)/10+1
        res[:,:,2:]=(self.prng.rand(self.param['outsize'], self.param['outsize'], 14)-0.5)+1
        return res

    def get_output(self, idx):
        res=np.zeros((self.param['outsize'], self.param['outsize']), dtype=float)+1
        return res
    
    def getmeta(self, idx):
        return self.param
Beispiel #28
0
    def test_group_var_large_inputs(self):

        prng = RandomState(1234)

        out = np.array([[np.nan]], dtype=self.dtype)
        counts = np.array([0], dtype='int64')
        values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
        values.shape = (10 ** 6, 1)
        labels = np.zeros(10 ** 6, dtype='int64')

        self.algo(out, counts, values, labels)

        self.assertEqual(counts[0], 10 ** 6)
        tm.assert_almost_equal(out[0, 0], 1.0 / 12, check_less_precise=True)
Beispiel #29
0
    def test_group_var_generic_2d_all_finite(self):
        prng = RandomState(1234)

        out = (np.nan * np.ones((5, 2))).astype(self.dtype)
        counts = np.zeros(5, dtype='int64')
        values = 10 * prng.rand(10, 2).astype(self.dtype)
        labels = np.tile(np.arange(5), (2, )).astype('int64')

        expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
        expected_counts = counts + 2

        self.algo(out, counts, values, labels)
        np.testing.assert_allclose(out, expected_out, self.rtol)
        tm.assert_numpy_array_equal(counts, expected_counts)
Beispiel #30
0
    def test_group_var_generic_1d(self):
        prng = RandomState(1234)

        out = (np.nan * np.ones((5, 1))).astype(self.dtype)
        counts = np.zeros(5, dtype="int64")
        values = 10 * prng.rand(15, 1).astype(self.dtype)
        labels = np.tile(np.arange(5), (3,)).astype("int64")

        expected_out = (np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2)[:, np.newaxis]
        expected_counts = counts + 3

        self.algo(out, counts, values, labels)
        np.testing.assert_allclose(out, expected_out, self.rtol)
        tm.assert_numpy_array_equal(counts, expected_counts)
from numpy.testing import assert_array_almost_equal, assert_almost_equal
from numpy.random import RandomState

from sklearn.externals import joblib

from statsmodels import api as sm
import pandas as pd

import pickle
import pytest
import time
import os

# initialize the random state
rs = RandomState(42)
y = rs.rand(25)

# more interesting heart rate data (asserts we can use a series)
hr = load_heartrate(as_series=True)

# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([
    4.439524, 4.769823, 6.558708, 5.070508, 5.129288, 6.715065, 5.460916,
    3.734939, 4.313147, 4.554338, 6.224082, 5.359814, 5.400771, 5.110683,
    4.444159, 6.786913, 5.497850, 3.033383, 5.701356, 4.527209, 3.932176,
    4.782025, 3.973996, 4.271109, 4.374961, 3.313307, 5.837787, 5.153373,
    3.861863, 6.253815, 5.426464, 4.704929, 5.895126, 5.878133, 5.821581,
    5.688640, 5.553918, 4.938088, 4.694037, 4.619529, 4.305293, 4.792083,
    3.734604, 7.168956, 6.207962, 3.876891, 4.597115, 4.533345, 5.779965,
    4.916631
class TestTriggerPatterns(unittest.TestCase):
    def setUp(self):
        self.random_state = RandomState(1234)
        self.rgb_entity = GenericImageEntity(self.random_state.rand(1000, 1000, 3).astype(np.uint8))
        self.rgba_entity = GenericImageEntity(self.random_state.rand(500, 500, 4).astype(np.uint8))
        self.noop = NoOpFilterXForm()
        self.noop_down = NoOpFilterXForm("BGR", True, False)
        self.gotham = GothamFilterXForm()
        self.nashville = NashvilleFilterXForm()
        self.kelvin = KelvinFilterXForm()
        self.lomo = LomoFilterXForm()
        self.toaster = ToasterXForm()

    def test_data_integrity(self):
        start_array = self.rgb_entity.get_data()
        end_array = self.noop.do(self.rgb_entity, self.random_state).get_data()
        self.assertTrue(np.array_equal(start_array, end_array))
        start_array = self.rgba_entity.get_data()
        end_array = self.noop.do(self.rgba_entity, self.random_state).get_data()
        self.assertTrue(np.array_equal(start_array, end_array))
        start_array = self.rgba_entity.get_data()
        end_array = self.noop_down.do(self.rgba_entity, self.random_state).get_data()
        self.assertTrue(np.array_equal(start_array[:, :, :3], end_array))

    def test_gotham(self):
        out_rgb = self.gotham.do(self.rgb_entity, self.random_state)
        self.assertEqual(3, out_rgb.get_data().shape[2])
        out_rgba = self.gotham.do(self.rgba_entity, self.random_state)
        self.assertEqual(4, out_rgba.get_data().shape[2])

    def test_nashville(self):
        out_rgb = self.nashville.do(self.rgb_entity, self.random_state)
        self.assertEqual(3, out_rgb.get_data().shape[2])
        out_rgba = self.nashville.do(self.rgba_entity, self.random_state)
        self.assertEqual(4, out_rgba.get_data().shape[2])

    def test_kelvin(self):
        out_rgb = self.kelvin.do(self.rgb_entity, self.random_state)
        self.assertEqual(3, out_rgb.get_data().shape[2])
        out_rgba = self.kelvin.do(self.rgba_entity, self.random_state)
        self.assertEqual(4, out_rgba.get_data().shape[2])

    def test_lomo(self):
        out_rgb = self.lomo.do(self.rgb_entity, self.random_state)
        self.assertEqual(3, out_rgb.get_data().shape[2])
        out_rgba = self.lomo.do(self.rgba_entity, self.random_state)
        self.assertEqual(4, out_rgba.get_data().shape[2])

    def test_toaster(self):
        out_rgb = self.toaster.do(self.rgb_entity, self.random_state)
        self.assertEqual(3, out_rgb.get_data().shape[2])
        out_rgba = self.toaster.do(self.rgba_entity, self.random_state)
        self.assertEqual(4, out_rgba.get_data().shape[2])

    def test_channel_order(self):
        bgr_lomo = LomoFilterXForm('BGR')
        rgb_lomo = LomoFilterXForm('RGB')
        bgr_img = np.concatenate((np.ones((5, 5, 1)), np.zeros((5, 5, 2))), axis=2).astype(np.uint8)
        rgb_img = np.concatenate((np.zeros((5, 5, 2)), np.ones((5, 5, 1))), axis=2).astype(np.uint8)
        bgr_result = bgr_lomo.do(GenericImageEntity(bgr_img), random_state_obj=self.random_state)
        rgb_result = rgb_lomo.do(GenericImageEntity(rgb_img), random_state_obj=self.random_state)
        self.assertTrue(np.array_equal(bgr_result.get_data()[:, :, 0], rgb_result.get_data()[:, :, 2]))
        bgr_switched_result = rgb_lomo.do(GenericImageEntity(bgr_img), random_state_obj=self.random_state)
        rgb_switched_result = bgr_lomo.do(GenericImageEntity(rgb_img), random_state_obj=self.random_state)
        # transform should be modifying R and G channels, but is instead modifying B and G channels, setting to zero
        self.assertTrue(np.array_equal(bgr_switched_result.get_data(), np.zeros((5, 5, 3))))
        self.assertTrue(np.array_equal(rgb_switched_result.get_data(), np.zeros((5, 5, 3))))
Beispiel #33
0
class PMF(ModelBase):
    '''
    Probabilistic Matrix Factorization
    '''
    def __init__(self,
                 n_user,
                 n_item,
                 n_feature,
                 batch_size=1e5,
                 epsilon=50.0,
                 momentum=0.8,
                 seed=None,
                 reg=1e-2,
                 converge=1e-5,
                 max_rating=None,
                 min_rating=None):
        super(PMF, self).__init__()
        self.n_user = n_user
        self.n_item = n_item
        self.n_feature = n_feature

        self.random_state = RandomState(seed)
        # batch size
        self.batch_size = batch_size
        # 学习速率
        self.epsilon = float(epsilon)
        # 动量
        self.momentum = float(momentum)
        # 正则化参数
        self.reg = reg
        self.converge = converge
        self.max_rating = float(
            max_rating) if max_rating is not None else max_rating
        self.min_rating = float(
            min_rating) if min_rating is not None else min_rating

        # 数据状态
        self.mean_rating = None
        # 生成随机用户矩阵,大小为n_user * n_feature
        self.user_features_ = 0.1 * self.random_state.rand(n_user, n_feature)
        # 生成随机项目矩阵,大小为n_item * n_feature,用于做预测,并调整参数权重
        self.item_features_ = 0.1 * self.random_state.rand(n_item, n_feature)

    def fit(self, ratings, n_iters=50):
        # 全局电影评分的均值
        self.mean_rating_ = np.mean(ratings[:, 2])
        last_rmse = None
        # 切分数据集,进行分批训练,假设batch_size=10000,ratings=100000,则切成10份
        batch_num = int(np.ceil(float(ratings.shape[0] / self.batch_size)))
        logger.debug('batch count = {}'.format(batch_num + 1))

        # 初始化动量矩阵
        u_feature_mom = np.zeros((self.n_user, self.n_feature))
        i_feature_mom = np.zeros((self.n_item, self.n_feature))
        # 初始化梯度矩阵
        u_feature_grads = np.zeros((self.n_user, self.n_feature))
        i_feature_grads = np.zeros((self.n_item, self.n_feature))

        for iteration in range(n_iters):
            logger.debug('iteration {:d}'.format(iteration))
            # 打乱数据集
            self.random_state.shuffle(ratings)
            # 分批训练
            for batch in range(batch_num):
                start_idx = int(batch * self.batch_size)
                end_idx = int((batch + 1) * self.batch_size)
                data = ratings[start_idx:end_idx]

                # 计算梯度
                # data.take(0, axis=1),取data数据集第0列的值,即用户ID
                # user_features_1矩阵是根据用户ID排序的,根据之前的分批用户ID找到对应的数据
                u_features = self.user_features_.take(data.take(0, axis=1),
                                                      axis=0)
                # 第一列为项目ID,使用项目ID作为item_features_的索引
                i_features = self.item_features_.take(data.take(1, axis=1),
                                                      axis=0)

                # 计算预测值,用户矩阵和项目矩阵做内积
                preds = np.sum(u_features * i_features, 1)
                # 计算误差,预测值 - (实际值 - 实际值的全局均值)
                errs = preds - (data.take(2, axis=1) - self.mean_rating_)
                # 误差矩阵,大小为errs * n_feature
                # http://blog.csdn.net/ksearch/article/details/21388985
                # 假设errs.shape=(10000,),使用np.tile则在行上复制n_feature次
                # 即(10, 10000),在转置为(10000, 10),等于将errs每个值在横轴上复制10次
                err_mat = np.tile(errs, (self.n_feature, 1)).T
                # (u_features * i_features - trues)^2 + λ(u_features + i_features)求导
                # 第二项为正则化项,使用L1,分别对该公式求u_features和i_features的梯度
                u_grads = 2 * i_features * err_mat + self.reg * u_features
                i_grads = 2 * u_features * err_mat + self.reg * i_features

                # 初始化梯度矩阵,所有值为0
                u_feature_grads.fill(0.0)
                i_feature_grads.fill(0.0)

                # 更新梯度矩阵
                for i in range(data.shape[0]):
                    row = data.take(i, axis=0)
                    # row[0]为用户ID,u_feature_grads.shape=(943, 10)
                    u_feature_grads[row[0], :] += u_grads.take(i, axis=0)
                    # row[1]为项目ID,i_feature_grads.shape=(1682, 10)
                    i_feature_grads[row[1], :] += i_grads.take(i, axis=0)

                # 更新动量,以前梯度方向 + 当前梯度方向 = 现在走的梯度方向
                # momentum决定以前梯度有多大影响,动量用于当前梯度为0时,陷入高原
                # 或局部最小点时,可靠以前梯度的惯性继续往前走
                u_feature_mom = (self.momentum * u_feature_mom) + \
                                ((self.epsilon / data.shape[0]) * u_feature_grads)
                i_feature_mom = (self.momentum * i_feature_mom) + \
                                ((self.epsilon / data.shape[0]) * i_feature_grads)

                # 更新隐变量latent variables
                self.user_features_ -= u_feature_mom
                self.item_features_ -= i_feature_mom

            # 计算RMSE
            train_preds = self.predict(ratings[:, :2])
            train_rmse = RMSE(train_preds, ratings[:, 2])
            logger.info('iter: {:d}, train RMSE: {:.6f}'.format(
                iteration, train_rmse))

            # 当两次rmse的差小于阈值,即收敛则停止
            if last_rmse and abs(train_rmse - last_rmse) < self.converge:
                logger.info(
                    'converges at iteration {:d}, stop.'.format(iteration))
                break
            else:
                last_rmse = train_rmse

        return self.user_features_, self.item_features_

    def predict(self, data):
        # 没训练模型进行拟合,则引发异常错误
        if not self.mean_rating_:
            raise NotFittedError()

        u_features = self.user_features_.take(data.take(0, axis=1), axis=0)
        i_features = self.item_features_.take(data.take(1, axis=1), axis=0)
        preds = np.sum(u_features * i_features, 1) + self.mean_rating_

        # 限制预测值的上限
        if self.max_rating:
            preds[preds > self.max_rating] = self.max_rating

        # 限制预测值的下限
        if self.min_rating:
            preds[preds < self.min_rating] = self.min_rating

        return preds
Beispiel #34
0
def nn():
    # 训练数据batch的大小
    batch_size = 8

    # 参数
    w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
    w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

    # 在placeholder中设置shape的时候不指定行,这样在训练的时候可以输入较小的batch
    # 在测试的时候可以输入较大的batch。在输入数据的时候更自由
    x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
    y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

    # 前向传播求预测值的过程
    a = tf.matmul(x, w1)
    y = tf.matmul(a, w2)
    '''
    后向传播的过程
    '''
    # 将y转换为0-1的数值
    y = tf.sigmoid(y)

    # 定义交叉熵
    cross_entropy = -tf.reduce_mean(
        y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) +
        (1 - y) * tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0)))
    train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

    # 通过随机数生成模拟数据集
    rdm = RandomState(1)
    dataset_size = 128
    X = rdm.rand(dataset_size, 2)

    # 定义规则来给出样本的标签。这里定义x1+x2<1的样例为正样本,否则为负样本。
    # 在这里标签用0表示负样本,1表示正样本
    Y = [[int(x1 + x2) < 1] for (x1, x2) in X]

    # 创建会话执行TensorFlow程序

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        # 初始化变量
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        # 打印初始参数值
        print("initial weights w1:")
        print(sess.run(w1))
        print("initial weights w2:")
        print(sess.run(w2))

        # 训练参数
        steps = 5000
        for i in range(steps):
            # 每次选取batch个样本进行训练
            start = (i * batch_size) % dataset_size
            end = min(start + batch_size, dataset_size)

            # 通过选取的样本训练神经网络并更新参数
            sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})

            # 每训练1000轮计算一次在所有数据上的交叉熵并打印
            if i % 1000 == 0:
                total_cross_entropy = sess.run(cross_entropy,
                                               feed_dict={
                                                   x: X,
                                                   y_: Y
                                               })
                print("After %d training step(s), cross entropy on all" % i +
                      "data is %g" % total_cross_entropy)

        # 打印训练后的参数值
        print("final weights w1:")
        print(sess.run(w1))
        print("final weights w2:")
        print(sess.run(w2))
Beispiel #35
0
if __name__ == '__main__':
    rg = StackedRegressionGraph(bind_index=False)
    rg.new_plot()
    rg.new_plot()
    # rg.new_plot()
    from numpy.random import RandomState
    from numpy import linspace
    n = 50
    x = linspace(0, 10, n)

    rs = RandomState(123456)

    # print rs.randn(10)
    # print rs.randn(10)
    y = 5 + rs.rand(n)
    y[[1, 2, 3, 4]] = [1, 2, 3, 4]
    y2 = 10 + rs.rand(n)
    y2[[-1, -2, -3, -4]] = [6, 5, 6, 7]

    # y = 2 * x + random.rand(n)

    # d = np.zeros(n)
    # d[::10] = random.rand() + 5
    # d[::15] = random.rand() + 2

    # y += d

    fod = {'filter_outliers': False, 'iterations': 1, 'std_devs': 2}
    rg.new_series(
        x,
Beispiel #36
0
class DataBlock(object):
    def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id):
        self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True)
        self.valid_data = HomogeneousDataIterator(valid_data, batch_size)
        self.train_data_iterator = iter(self.train_data)
        self.valid_data_iterator = iter(self.valid_data)
        self.word_keep_prob = 1.0 - word_dropout_prob
        self.rnd = RandomState(47571)
        self.unk_idx = word_to_idx['<UNK>']

        self.context = Context(device_id)
        c = Counter([len(line) for line in chain(train_data, valid_data)])
        print c.most_common()
        max_len = max([len(line) for line in chain(train_data, valid_data)])

        self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id))
        self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id)
        self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id)
        self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols)

        self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id))
        self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id)
        self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols)
        self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id)
        self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id)
        self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols)

        self.blocking_contexts = None
        self.training_mode = True

    def set_training_mode(self):
        self.training_mode = True

    def set_testing_mode(self):
        self.training_mode = False

    def fprop(self):
        if self.training_mode:
            data = next(self.train_data_iterator)
        else:
            try:
                data = next(self.valid_data_iterator)
            except StopIteration as e:
                self.valid_data_iterator = iter(self.valid_data)
                raise e
        lengths_npa = np.array([[len(e)] for e in data], np.int32, order='F')
        max_len = int(np.max(lengths_npa))

        self.enc_lengths.assign_npa(self.context, lengths_npa)
        self._enc_mask.mask_column_numbers_row_wise(self.context, self.enc_lengths)
        for e in self.enc_mask:
            e.last_modification_context = self.context

        lengths_npa += 1
        self.dec_lengths.assign_npa(self.context, lengths_npa)
        self._dec_mask.mask_column_numbers_row_wise(self.context, self.dec_lengths)
        for e in self.dec_mask:
            e.last_modification_context = self.context

        enc_x_npa = np.zeros((len(data), max_len), np.int32, 'F')
        dec_x_npa = np.zeros((len(data), max_len + 1), np.int32, 'F')
        dec_y_npa = np.zeros((len(data), max_len + 1), np.int32, 'F')
        for k, e in enumerate(data):
            enc_x_npa[k, :len(e)] = e
            if self.training_mode:
                new_e = [_ if self.rnd.rand() < self.word_keep_prob else self.unk_idx for _ in e]
            else:
                new_e = e
            dec_x_npa[k, :len(e) + 1] = [word_to_idx['<<S>>']] + new_e
            dec_y_npa[k, :len(e) + 1] = e + [word_to_idx['<<S>>']]
        self.enc_x.assign_npa(self.context, enc_x_npa)
        self.dec_x.assign_npa(self.context, dec_x_npa)
        self._dec_y.assign_npa(self.context, dec_y_npa)
        for e in self.dec_y:
            e.last_modification_context = self.context

        self.enc_mask.fprop()
        self.dec_mask.fprop()
        self.enc_x.fprop()
        self.dec_x.fprop()
        self.dec_y.fprop()
Beispiel #37
0
class PMF(ModelBase):
    """Probabilistic Matrix Factorization
    """
    def __init__(self,
                 n_user,
                 n_item,
                 n_feature,
                 batch_size=1e5,
                 epsilon=50.0,
                 momentum=0.8,
                 seed=None,
                 reg=1e-2,
                 converge=1e-5,
                 max_rating=None,
                 min_rating=None):

        super(PMF, self).__init__()
        self.n_user = n_user
        self.n_item = n_item
        self.n_feature = n_feature

        self.random_state = RandomState(seed)

        # batch size
        self.batch_size = batch_size

        # learning rate
        self.epsilon = float(epsilon)
        self.momentum = float(momentum)
        # regularization parameter
        self.reg = reg
        self.converge = converge
        self.max_rating = float(max_rating) \
            if max_rating is not None else max_rating
        self.min_rating = float(min_rating) \
            if min_rating is not None else min_rating

        # data state
        self.mean_rating_ = None
        # user/item features
        self.user_features_ = 0.1 * self.random_state.rand(n_user, n_feature)
        self.item_features_ = 0.1 * self.random_state.rand(n_item, n_feature)

    def fit(self, ratings, n_iters=50):

        check_ratings(ratings, self.n_user, self.n_item, self.max_rating,
                      self.min_rating)

        self.mean_rating_ = np.mean(ratings[:, 2])
        last_rmse = None
        batch_num = int(np.ceil(float(ratings.shape[0] / self.batch_size)))
        logger.debug("batch count = %d", batch_num + 1)

        # momentum
        u_feature_mom = np.zeros((self.n_user, self.n_feature))
        i_feature_mom = np.zeros((self.n_item, self.n_feature))
        # gradient
        u_feature_grads = np.zeros((self.n_user, self.n_feature))
        i_feature_grads = np.zeros((self.n_item, self.n_feature))
        for iteration in xrange(n_iters):
            logger.debug("iteration %d...", iteration)

            self.random_state.shuffle(ratings)

            for batch in xrange(batch_num):
                start_idx = int(batch * self.batch_size)
                end_idx = int((batch + 1) * self.batch_size)
                data = ratings[start_idx:end_idx]

                # compute gradient
                u_features = self.user_features_.take(data.take(
                    0, axis=1).astype('int32'),
                                                      axis=0)
                i_features = self.item_features_.take(data.take(
                    1, axis=1).astype('int32'),
                                                      axis=0)
                preds = np.sum(u_features * i_features, 1)
                errs = preds - (data.take(2, axis=1) - self.mean_rating_)
                err_mat = np.tile(2 * errs, (self.n_feature, 1)).T
                u_grads = i_features * err_mat + self.reg * u_features
                i_grads = u_features * err_mat + self.reg * i_features

                u_feature_grads.fill(0.0)
                i_feature_grads.fill(0.0)
                for i in xrange(data.shape[0]):
                    row = data.take(i, axis=0)
                    u_feature_grads[int(row[0]), :] += u_grads.take(i, axis=0)
                    i_feature_grads[int(row[1]), :] += i_grads.take(i, axis=0)

                # update momentum
                u_feature_mom = (self.momentum * u_feature_mom) + \
                    ((self.epsilon / data.shape[0]) * u_feature_grads)
                i_feature_mom = (self.momentum * i_feature_mom) + \
                    ((self.epsilon / data.shape[0]) * i_feature_grads)

                # update latent variables
                self.user_features_ -= u_feature_mom
                self.item_features_ -= i_feature_mom

            # compute RMSE
            train_preds = self.predict(ratings[:, :2])
            train_rmse = RMSE(train_preds, ratings[:, 2])
            logger.info("iter: %d, train RMSE: %.6f", iteration, train_rmse)

            # stop when converge
            if last_rmse and abs(train_rmse - last_rmse) < self.converge:
                logger.info('converges at iteration %d. stop.', iteration)
                break
            else:
                last_rmse = train_rmse
        return self

    def predict(self, data):

        if not self.mean_rating_:
            raise NotFittedError()

        u_features = self.user_features_.take(data.take(
            0, axis=1).astype('int32'),
                                              axis=0)
        i_features = self.item_features_.take(data.take(
            1, axis=1).astype('int32'),
                                              axis=0)
        preds = np.sum(u_features * i_features, 1) + self.mean_rating_

        if self.max_rating:
            preds[preds > self.max_rating] = self.max_rating

        if self.min_rating:
            preds[preds < self.min_rating] = self.min_rating
        return preds
Beispiel #38
0
y_hat = tf.matmul(a, w2)

# 将预测数值转换0~1之间
y_hat = tf.sigmoid(y_hat)
# 交叉商算法
cross_entropy = -tf.reduce_mean(
    y * tf.log(tf.clip_by_value(y_hat, 1e-10, 1.0)) +
    (1 - y) * tf.log(tf.clip_by_value(1 - y_hat, 1e-10, 1.0)))
# 创建训练推理算法 以1/1000的学习率学习
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

rdm = RandomState(1)
dataSetSize = 128

# 生成训练的数据(模拟)
X = rdm.rand(dataSetSize, 2)
# 生成训练数据的结果(模拟)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    print(session.run(w1))
    '''
        [[-0.8113182   1.4845988   0.06532937]
        [-2.4427042   0.0992484   0.5912243 ]]
    '''
    print(session.run(w2))
    '''
        [[-0.8113182 ]
         [ 1.4845988 ]
         [ 0.06532937]]
Beispiel #39
0
import tensorflow as tf
from numpy.random import RandomState

rdm = RandomState(1)
X = rdm.rand(128,2)
Y = [[int (x0+x1 < 1)] for (x0,x1) in X]


Beispiel #40
0
from __future__ import absolute_import
from pyramid.arima import ARIMA, auto_arima
from pyramid.arima.auto import _fmt_warning_str
from pyramid.arima.utils import nsdiffs
from nose.tools import assert_raises
import numpy as np
from numpy.testing import assert_array_almost_equal, assert_almost_equal
from numpy.random import RandomState
import warnings
import pickle
import os

# initialize the random state
rs = RandomState(42)
y = rs.rand(25)

# more interesting, heart rate data:
hr = np.array([
    84.2697, 84.2697, 84.0619, 85.6542, 87.2093, 87.1246, 86.8726, 86.7052,
    87.5899, 89.1475, 89.8204, 89.8204, 90.4375, 91.7605, 93.1081, 94.3291,
    95.8003, 97.5119, 98.7457, 98.904, 98.3437, 98.3075, 98.8313, 99.0789,
    98.8157, 98.2998, 97.7311, 97.6471, 97.7922, 97.2974, 96.2042, 95.2318,
    94.9367, 95.0867, 95.389, 95.5414, 95.2439, 94.9415, 95.3557, 96.3423,
    97.1563, 97.4026, 96.7028, 96.5516, 97.9837, 98.9879, 97.6312, 95.4064,
    93.8603, 93.0552, 94.6012, 95.8476, 95.7692, 95.9236, 95.7692, 95.9211,
    95.8501, 94.6703, 93.0993, 91.972, 91.7821, 91.7911, 90.807, 89.3196,
    88.1511, 88.7762, 90.2265, 90.8066, 91.2284, 92.4238, 93.243, 92.8472,
    92.5926, 91.7778, 91.2974, 91.6364, 91.2952, 91.771, 93.2285, 93.3199,
    91.8799, 91.2239, 92.4055, 93.8716, 94.5825, 94.5594, 94.9453, 96.2412,
    96.6879, 95.8295, 94.7819, 93.4731, 92.7997, 92.963, 92.6996, 91.9648,
    91.2417, 91.9312, 93.9548, 95.3044, 95.2511, 94.5358, 93.8093, 93.2287,
Beispiel #41
0
    def _init_trans_mat(self):
        # Check input
        if any([x is None for x in [self.X, self.labels, self.d]]):
            raise ValueError('X, labels and subdim not set!')

        num_pts = self.X.shape[0]
        D = self.X.shape[1]
        subdim = self.d

        # Setup random state
        prng = RandomState()
        if self._SEED is not None:
            prng = RandomState(self._SEED)
            if self._verbose:
                print("Setting random seed to", self._SEED)

        if self._init_method == "PCA":
            if num_pts < self.d:
                raise ValueError('num_pts < subdim')
            if self.d > D:
                raise ValueError('subdim > inputdim')

            pca = PCA(n_components=subdim, whiten=False)
            pca.fit(self.X)
            L = pca.components_.T + 1E-6

        elif self._init_method == "LDA":
            if self.d > D:
                raise ValueError('subdim > inputdim')

            lda_obj = LDA.LDA(self.X, self.labels)
            lda_obj.compute(dim=self.d)
            L = lda_obj.getTransform()
            L = L * (1. / LA.norm(L, ord=1, axis=1)).reshape(-1, 1)
        elif self._init_method == "randbeng":
            # L = 1. * bound * prng.rand(D, self.d) - bound
            L = np.random.normal(0,
                                 np.sqrt(2) / np.sqrt(self.D + self.d),
                                 (self.D, self.d))
        elif self._init_method == "randbest":
            # Do some random generation of matrices pick the one with lowest # of constraints
            if self._verbose:
                print('Doing random pre-gen L')
            t0 = timeit.default_timer()
            best_L = prng.rand(D, self.d)
            L = best_L
            self.loss_fun(best_L)
            # nconsts = self._count_active_constraints()
            bound = np.sqrt(6. / (D + self.d))
            best_N_consts = 1E10
            for i in range(0, 10):
                L = 1. * bound * prng.rand(D, self.d) - bound
                # L = 1E-5*prng.rand(D,self.d)
                # L = L * (1./LA.norm(L,ord=1,axiss=1)).reshape(-1,1)
                self.loss_fun(L)
                consts = self._count_active_constraints()
                if consts < best_N_consts:
                    best_N_consts = consts
                    best_L = copy.copy(L)
            L = copy.copy(best_L)
            if self._verbose:
                print("Pre-gen of L done. Took:",
                      "%3.3f" % (timeit.default_timer() - t0),
                      end=", ")
                print("# active const", best_N_consts, end=", ")

        elif self._init_method == "rand":
            # method_str = print('Doing random pre-gen Lapa')
            bound = np.sqrt(6. / (D + self.d))
            L = 1. * bound * prng.rand(D, self.d) - bound

        return L
Beispiel #42
0
x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')

#定义神经网络前向传播的过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

#定义损失函数和反向传播的算法 交叉熵,二分类问题可以不用softmax
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
train_step = \
    tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

#通过随机数生成一个模拟数据集
rdm = RandomState(1)  #产生一个随机状态种子
dataset_size = 128
X = rdm.rand(dataset_size, 2)  #生成 128行2列的数据
'''
定义规则来给出样本的标签,在这里所有的 x1 + x2 <1的样例被认为是
正样本(比如零件合格),而其他为负样本(比如零件不合格)。和tensorflow
游乐场中的表示法不大一样的地方是,在这里是用0来表示负样本,使用1来表示正样本,
大部分解决分类问题的神经网络都会采用0和1的表示方法。
'''
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

#创建一个会话来运行TF程序
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print("w1: ", sess.run(w1))
    print("w2: ", sess.run(w2))

    #设定训练的次数
Beispiel #43
0
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

x = tf.placeholder(tf.float32, shape=(None, 2), name="x-input")
b_ = tf.placeholder(tf.float32, shape=(None, 1), name="b-input")

a = tf.matmul(x, w1)
b = tf.matmul(a, w2)

cross_entropy = -tf.reduce_mean(b_ * tf.log(tf.clip_by_value(b, 1e-10, 1.0)))
learning_rate = 0.001
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

rdm = RandomState(1)
dataset_size = 3000
X = rdm.rand(dataset_size, 2)
B = [[int(x1 + x2 < 1)] for (x1, x2) in X]

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(w1))
    print(sess.run(w2))
    print(len(B))
    for i in range(step):
        start = (i * bitch_size) % dataset_size
        end = min(start + bitch_size, dataset_size)

        #print("i : %d   start : %d   end : %d" %(i, start, end))
Beispiel #44
0
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)
global_step = tf.Variable(0)

cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1)))
learn_rate = tf.train.exponential_decay(0.1,
                                        global_step,
                                        128 / 8,
                                        0.96,
                                        staircase=True)
train_step = tf.train.AdamOptimizer(learn_rate).minimize(
    cross_entropy, global_step=global_step)

rdm = RandomState(1)
datasize = 128
X = rdm.rand(datasize, 2)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:
    init_op = tf.initialize_all_variables()
    sess.run(init_op)
    print sess.run(w1)
    print sess.run(w2)

    STEP = 5000
    for i in range(STEP):
        start = (i * batch_size) % datasize
        end = min(start + batch_size, datasize)
        sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})

        if i % 1000 == 0:
Beispiel #45
0
batch_size = 8
x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w1)
loss_less = 1
loss_more = 10
loss = tf.reduce_sum(
    tf.where(tf.greater(y, y_), (y - y_) * loss_more, (y_ - y) * loss_less))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

# 模拟数据集
rdm = RandomState(1)
dataset_size = 4
X = rdm.rand(dataset_size, 2)
# 每一个元素是一个列表
Y = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]
# 就一个列表
# Y2 = [x1 + x2 + rdm.rand() / 10.0 - 0.05 for (x1, x2) in X]
where_v = tf.where(1 == 2, 2, 3)

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    STEPS = 5000
    print(sess.run(where_v))

    # for i in range(STEPS):
    #     start = (i * batch_size) % dataset_size
    #     # start = i * batch_size
class TrainValDataset(Dataset):
    def __init__(self, name):
        super().__init__()
        self.rand_state = RandomState(66)
        self.root_dir = os.path.join(settings.data_dir, name)
        self.mat_files = os.listdir(self.root_dir)
        self.patch_size = settings.patch_size
        self.file_num = len(self.mat_files)

    def __len__(self):
        return self.file_num

    def __getitem__(self, idx):
        file_name = self.mat_files[idx % self.file_num]
        img_file = os.path.join(self.root_dir, file_name)
        img_pair = cv2.imread(img_file).astype(np.float32) / 255

        if settings.aug_data:
            O, B = self.crop(img_pair, aug=True)
            O, B = self.flip(O, B)
            O, B = self.rotate(O, B)
        else:
            O, B = self.crop(img_pair, aug=False)

        O = np.transpose(O, (2, 0, 1))
        B = np.transpose(B, (2, 0, 1))
        sample = {'O': O, 'B': B}

        return sample

    def crop(self, img_pair, aug):
        patch_size = self.patch_size
        h, ww, c = img_pair.shape
        w = int(ww / 2)

        if aug:
            mini = -1 / 4 * self.patch_size
            maxi = 1 / 4 * self.patch_size + 1
            p_h = patch_size + self.rand_state.randint(mini, maxi)
            p_w = patch_size + self.rand_state.randint(mini, maxi)
        else:
            p_h, p_w = patch_size, patch_size

        r = self.rand_state.randint(0, h - p_h)
        c = self.rand_state.randint(0, w - p_w)
        O = img_pair[r:r + p_h, c + w:c + p_w + w]
        B = img_pair[r:r + p_h, c:c + p_w]

        if aug:
            O = cv2.resize(O, (patch_size, patch_size))
            B = cv2.resize(B, (patch_size, patch_size))

        return O, B

    def flip(self, O, B):
        if self.rand_state.rand() > 0.5:
            O = np.flip(O, axis=1)
            B = np.flip(B, axis=1)
        return O, B

    def rotate(self, O, B):
        angle = self.rand_state.randint(-30, 30)
        patch_size = self.patch_size
        center = (int(patch_size / 2), int(patch_size / 2))
        M = cv2.getRotationMatrix2D(center, angle, 1)
        O = cv2.warpAffine(O, M, (patch_size, patch_size))
        B = cv2.warpAffine(B, M, (patch_size, patch_size))
        return O, B
Beispiel #47
0
def mock_dataset():
    rdm = RandomState(1)
    X = rdm.rand(dataset_size, 2)
    Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]
    return X, Y
Beispiel #48
0
from numpy.testing import assert_array_almost_equal, assert_almost_equal
from numpy.random import RandomState

import joblib
from statsmodels import api as sm
import pandas as pd

import pickle
import pytest
import time
import os
from os.path import abspath, dirname

# initialize the random state
rs = RandomState(42)
y = rs.rand(25)

# more interesting heart rate data (asserts we can use a series)
hr = load_heartrate(as_series=True)

# > set.seed(123)
# > abc <- rnorm(50, 5, 1)
abc = np.array([
    4.439524, 4.769823, 6.558708, 5.070508, 5.129288, 6.715065, 5.460916,
    3.734939, 4.313147, 4.554338, 6.224082, 5.359814, 5.400771, 5.110683,
    4.444159, 6.786913, 5.497850, 3.033383, 5.701356, 4.527209, 3.932176,
    4.782025, 3.973996, 4.271109, 4.374961, 3.313307, 5.837787, 5.153373,
    3.861863, 6.253815, 5.426464, 4.704929, 5.895126, 5.878133, 5.821581,
    5.688640, 5.553918, 4.938088, 4.694037, 4.619529, 4.305293, 4.792083,
    3.734604, 7.168956, 6.207962, 3.876891, 4.597115, 4.533345, 5.779965,
    4.916631
Beispiel #49
0
INPUT_NODE_NUM = 7
w1 = tf.Variable(tf.random_normal([INPUT_NODE_NUM, 20], stddev=1))
w2 = tf.Variable(tf.random_normal([20, 20], stddev=1))
w3 = tf.Variable(tf.random_normal([20, 5], stddev=1))
#w4 = tf.Variable(tf.random_normal([3,1],stddev = 1))

biases = tf.Variable(tf.zeros([2]))
biases2 = tf.Variable(tf.zeros([3]))
#模型的输入输出
x = tf.placeholder(tf.float32, shape=(None, INPUT_NODE_NUM), name='x-input')
y_ = tf.placeholder(tf.float32, shape=(None, 5), name='y-input')

#使用随机数填满数组
rdm = RandomState(1)
dataset_size = 140
X = rdm.rand(dataset_size, INPUT_NODE_NUM)

#读取数据写入数组
#X为训练数据
for i in range(0, 140):
    #X[i][0] = i
    '''
    if (i%2 == 0):
        X[i][0] = 1
    else:
        X[i][0] = 0
    '''

    X[i][0] = stringToNum(workbook.sheets()[0].cell(i + 2, 1).value)
    X[i][1] = stringToNum(workbook.sheets()[0].cell(i + 2, 2).value)
    X[i][2] = stringToNum(workbook.sheets()[0].cell(i + 2, 3).value)
class DpMixtureGibbs:
    def __init__(self, data, hyperpars, seed=-1, verbose=False):
        ''' 
        Clusters bins by DP mixture model using Gibbs sampling
        '''
        self.verbose = verbose
        if self.verbose:
            print 'DpMixtureGibbs initialised'
            sys.stdout.flush()

        self.use_rt_likelihood = hyperpars.second_stage_clustering_use_rt_likelihood
        self.use_mass_likelihood = hyperpars.second_stage_clustering_use_mass_likelihood
        self.use_adduct_likelihood = hyperpars.second_stage_clustering_use_adduct_likelihood

        # prepare arrays for concrete bins, posterior rts
        # and word counts (from 1st stage clustering)
        self.masses = np.array(data[0])
        self.rts = np.array(data[1])
        self.word_counts_list = [np.array(x) for x in data[2]]

        self.W = len(self.word_counts_list[0])
        self.origins = data[3]
        self.N = len(self.rts)
        assert self.N == len(self.word_counts_list)
        assert self.N == len(self.origins)

        delta = hyperpars.across_file_rt_tol
        var = (delta / 3.0)**2  # assume 1 delta is 3 standard deviations
        self.rt_prec = 1.0 / var

        log_one_ppm = np.log(1000001) - np.log(1000000)
        log_delta = log_one_ppm * hyperpars.across_file_mass_tol
        delta = np.exp(log_delta)
        var = (delta / 3.0)**2  # assume 1 delta is 3 standard deviations
        self.mass_prec = 1.0 / var

        self.prior_rt_mean = np.mean(self.rts)
        self.prior_rt_prec = 5E-6

        self.prior_mass_mean = np.mean(self.masses)
        self.prior_mass_prec = 5E-6

        self.alpha = float(hyperpars.dp_alpha)
        self.beta = float(hyperpars.beta)

        self.nsamps = hyperpars.rt_clustering_nsamps
        self.burn_in = hyperpars.rt_clustering_burnin
        self.seed = int(seed)
        if self.seed > 0:
            self.random_state = RandomState(self.seed)
        else:
            self.random_state = RandomState()

        # self.Z = None
        # self.ZZ_all = sp.lil_matrix((self.N, self.N),dtype=np.float)
        self.cluster_rt_mean = None
        self.cluster_rt_prec = None
        self.matching_results = []
        self.samples_obtained = 0

    def run(self):

        if self.verbose:
            print "Sampling begins"
            sys.stdout.flush()

        # initialise all rows under one cluster
        K = 1
        cluster_counts = np.array([float(self.N)])
        cluster_mass_sums = np.array([self.masses.sum()])
        cluster_rt_sums = np.array([self.rts.sum()])
        all_word_counts = np.zeros(self.W)
        for wc in self.word_counts_list:
            all_word_counts += wc
        cluster_word_sums = [all_word_counts]
        current_ks = np.zeros(self.N, dtype=np.int)
        cluster_member_origins = [list(self.origins)]

        # start sampling
        self.samples_obtained = 0
        for s in range(self.nsamps):

            start_time = time.time()

            if self.N > 1:  # if only 1 item, then nothing to sample

                # loop through the objects in random order
                random_order = range(self.N)
                self.random_state.shuffle(random_order)
                processed = 0
                for n in random_order:

                    if self.verbose:
                        processed += 1
                        print "Processing %s remaining %d/%d" % (
                            (s, n), processed, self.N)
                        sys.stdout.flush()

                    current_mass_data = self.masses[n]
                    current_rt_data = self.rts[n]
                    current_word_counts = self.word_counts_list[n]
                    current_origin = self.origins[n]
                    k = current_ks[n]  # the current cluster of this item

                    # remove from model, detecting empty table if necessary
                    cluster_counts[k] = cluster_counts[k] - 1
                    cluster_mass_sums[
                        k] = cluster_mass_sums[k] - current_mass_data
                    cluster_rt_sums[k] = cluster_rt_sums[k] - current_rt_data
                    cluster_word_sums[
                        k] = cluster_word_sums[k] - current_word_counts
                    cluster_member_origins[k].remove(current_origin)

                    # if empty table, delete this cluster
                    if cluster_counts[k] == 0:
                        K = K - 1
                        cluster_counts = np.delete(cluster_counts,
                                                   k)  # delete k-th entry
                        cluster_mass_sums = np.delete(cluster_mass_sums,
                                                      k)  # delete k-th entry
                        cluster_rt_sums = np.delete(cluster_rt_sums,
                                                    k)  # delete k-th entry
                        del cluster_member_origins[k]  # delete k-th entry
                        del cluster_word_sums[k]
                        current_ks = self._reindex(
                            k,
                            current_ks)  # remember to reindex all the clusters

                    # compute prior probability for K existing table and new table
                    prior = np.array(cluster_counts)
                    prior = np.append(prior, self.alpha)
                    prior = prior / prior.sum()

                    log_likelihood = np.zeros_like(prior)

                    ## mass likelihood
                    if self.use_mass_likelihood:

                        # for current k
                        param_beta = self.prior_mass_prec + (self.mass_prec *
                                                             cluster_counts)
                        temp = (self.prior_mass_prec * self.prior_mass_mean
                                ) + (self.mass_prec * cluster_mass_sums)
                        param_alpha = (1 / param_beta) * temp

                        # for new k
                        param_beta = np.append(param_beta,
                                               self.prior_mass_prec)
                        param_alpha = np.append(param_alpha,
                                                self.prior_mass_mean)

                        # pick new k
                        prec = 1 / ((1 / param_beta) + (1 / self.mass_prec))
                        log_likelihood_mass = -0.5 * np.log(2 * np.pi)
                        log_likelihood_mass = log_likelihood_mass + 0.5 * np.log(
                            prec)
                        log_likelihood_mass = log_likelihood_mass - 0.5 * np.multiply(
                            prec, np.square(current_mass_data - param_alpha))

                        log_likelihood += log_likelihood_mass
                        self.like_mass = log_likelihood_mass

                    ## RT likelihood
                    if self.use_rt_likelihood:

                        # for current k
                        param_beta = self.prior_rt_prec + (self.rt_prec *
                                                           cluster_counts)
                        temp = (self.prior_rt_prec * self.prior_rt_mean) + (
                            self.rt_prec * cluster_rt_sums)
                        param_alpha = (1 / param_beta) * temp

                        # for new k
                        param_beta = np.append(param_beta, self.prior_rt_prec)
                        param_alpha = np.append(param_alpha,
                                                self.prior_rt_mean)

                        # pick new k
                        prec = 1 / ((1 / param_beta) + (1 / self.rt_prec))
                        log_likelihood_rt = -0.5 * np.log(2 * np.pi)
                        log_likelihood_rt = log_likelihood_rt + 0.5 * np.log(
                            prec)
                        log_likelihood_rt = log_likelihood_rt - 0.5 * np.multiply(
                            prec, np.square(current_rt_data - param_alpha))

                        log_likelihood += log_likelihood_rt
                        self.like_rt = log_likelihood_rt

                    ## adducts likelihood
                    if self.use_adduct_likelihood:

                        log_likelihood_wc = np.zeros_like(log_likelihood_rt)
                        for k_idx in range(K):  # the finite portion
                            wcb = cluster_word_sums[k_idx] + self.beta
                            log_likelihood_wc[k_idx] = self._C(
                                wcb + current_word_counts) - self._C(wcb)
                        # the infinite bit
                        wcb = np.zeros(self.W) + self.beta
                        log_likelihood_wc[-1] = self._C(
                            wcb + current_word_counts) - self._C(wcb)

                        log_likelihood += log_likelihood_wc
                        self.like_wc = log_likelihood_wc

                    ## plus some additional rules to prevent bins from different files to be clustered together
                    valid_clusters_check = np.zeros(K + 1)
                    for k_idx in range(K):
                        # this_bin cannot go into a cluster where the origin file is the same
                        existing_origins = cluster_member_origins[k_idx]
                        if current_origin in existing_origins:
                            valid_clusters_check[k_idx] = float('-inf')
                    log_likelihood = log_likelihood + valid_clusters_check

                    # sample from posterior
                    post = log_likelihood + np.log(prior)
                    post = np.exp(post - post.max())
                    post = post / post.sum()
                    random_number = self.random_state.rand()
                    cumsum = np.cumsum(post)
                    new_k = 0
                    for new_k in range(len(cumsum)):
                        c = cumsum[new_k]
                        if random_number <= c:
                            break

                    # (new_k+1) because indexing starts from 0 here
                    if (new_k + 1) > K:
                        # make new cluster and add to it
                        K = K + 1
                        cluster_counts = np.append(cluster_counts, 1)
                        cluster_mass_sums = np.append(cluster_mass_sums,
                                                      current_mass_data)
                        cluster_rt_sums = np.append(cluster_rt_sums,
                                                    current_rt_data)
                        cluster_member_origins.append([current_origin])
                        cluster_word_sums.append(current_word_counts)
                    else:
                        # put into existing cluster
                        cluster_counts[new_k] = cluster_counts[new_k] + 1
                        cluster_mass_sums[new_k] = cluster_mass_sums[
                            new_k] + current_mass_data
                        cluster_rt_sums[
                            new_k] = cluster_rt_sums[new_k] + current_rt_data
                        cluster_member_origins[new_k].append(current_origin)
                        cluster_word_sums[new_k] = cluster_word_sums[
                            new_k] + current_word_counts

                    # assign object to the cluster new_k, regardless whether it's current or new
                    current_ks[n] = new_k

                    assert len(cluster_counts
                               ) == K, "len(cluster_counts)=%d != K=%d)" % (
                                   len(cluster_counts), K)
                    assert len(cluster_mass_sums
                               ) == K, "len(cluster_mass_sums)=%d != K=%d)" % (
                                   len(cluster_mass_sums), K)
                    assert len(cluster_rt_sums
                               ) == K, "len(cluster_rt_sums)=%d != K=%d)" % (
                                   len(cluster_rt_sums), K)
                    assert len(
                        cluster_member_origins
                    ) == K, "len(cluster_member_origins)=%d != K=%d)" % (
                        len(cluster_member_origins), K)
                    assert len(cluster_word_sums
                               ) == K, "len(cluster_word_sums)=%d != K=%d)" % (
                                   len(cluster_word_sums), K)
                    assert current_ks[n] < K, "current_ks[%d] = %d >= %d" % (
                        n, current_ks[n])

                # end objects loop

            time_taken = time.time() - start_time
            if s >= self.burn_in:

                if self.verbose:
                    print(
                        '\tSAMPLE\tIteration %d\ttime %4.2f\tnumClusters %d' %
                        ((s + 1), time_taken, K))
                # self.Z = self._get_Z(self.N, K, current_ks)
                self.samples_obtained += 1

                # construct the actual alignment here
                for k in range(K):
                    pos = np.flatnonzero(current_ks == k)
                    memberstup = tuple(pos.tolist())
                    # if self.verbose:
                    #    print "\t\tsample=" + str(s) + " k=" + str(k) + " memberstup=" + str(memberstup)
                    self.matching_results.append(memberstup)
            else:
                if self.verbose:
                    print(
                        '\tBURN-IN\tIteration %d\ttime %4.2f\tnumClusters %d' %
                        ((s + 1), time_taken, K))
            sys.stdout.flush()

        # end sample loop
        self.last_K = K
        self.last_assignment = current_ks
        if self.verbose:
            print "DONE!"

    def _C(self, arr):
        sum_arr = np.sum(arr)
        sum_log_gamma = np.sum(gammaln(arr))
        res = sum_log_gamma - gammaln(sum_arr)
        return res

    def _reindex(self, deleted_k, current_ks):
        pos = np.where(current_ks > deleted_k)
        current_ks[pos] = current_ks[pos] - 1
        return current_ks


#     def _get_Z(self, N, K, current_ks):
#         Z = sp.lil_matrix((N, K))
#         for n in range(len(current_ks)):
#             k = current_ks[n]
#             Z[n, k] = 1
#         return Z
#
#     def _get_ZZ(self, Z):
#         return Z.tocsr() * Z.tocsr().transpose()

    def __repr__(self):
        return "Gibbs sampling for DP mixture model\n" + self.hyperpars.__repr__() + \
        "\nn_samples = " + str(self.n_samples)
Beispiel #51
0
# 定义神经网络前向传播的过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# 定义损失函数和反向传播算法
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
# cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_)
# 学习率
learning_rate = 0.001
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# 通过随机数生成一个模拟数据集
rdm = RandomState(1)
dataSet_size = 128
X = rdm.rand(dataSet_size, 2)

# 定义规则来给出样本的标签。在这里所有x1 + x2 < 1的样本都被认为是正样本(比如零件合格),
# 而其他为负样本(比如零件不合格)。和TensorFlow游乐场中的表示法不大一样的地方是,
# 这里使用0来表示负样本,1来表示正样本。大部分解决分类问题的神经网络都会采用0和1的表示方法
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:
    # 初始化变量
    sess.run(tf.global_variables_initializer())
    print(sess.run(w1))
    print(sess.run(w2))

    # 设定训练轮数
    STEPS = 5000
    for i in range(STEPS):
Beispiel #52
0
# forward propagation
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# defin lossfunction and
y = tf.sigmoid(y)
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) +
                                (1 - y_) *
                                tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0)))
learning_rate = 0.001
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# create a random trainset
rng = RandomState(1)
dataset_size = 128
X = rng.rand(dataset_size, 2)

# define: x1 + x2 <1 positive
Y = [[int(x1 + x2) < 1] for (x1, x2) in X]
# Y = [[int(x1 + x2)] for (x1, x2) in X]
# create a session for run
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(w1))
    print(sess.run(w2))
    '''
    how many times you need to train
        each times get one batch from traindata_set
    '''
    STEPS = 5000
Beispiel #53
0
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

x = tf.placeholder(tf.float32, shape=(None, 2), name="x-input")
y_ = tf.placeholder(tf.float32, shape=(None, 1), name="y-input")

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)
cross_ent = loss_func(y, y_)

train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_ent)

rand = RandomState(1)
data_size = 128
X = rand.rand(data_size, 2)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:

    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    print(sess.run(w1), sess.run(w2))

    #    print(sess.run(y, feed_dict={x:[[0.7, 0.9],[0.1,0.4],[0.5,0.8]]}))

    step = 5000
    for i in range(step):
        start = (i * batch_size) % data_size
        end = min(start + batch_size, data_size)
            wav_path = os.path.join(in_dir, subdir, '{}.wav'.format(basename))

            # read audio file
            try:
                x, fs = sf.read(wav_path)
            except Exception as e:
                # print("Error on {}".format(basename))
                print(e) if 'System error' not in str(
                    e
                ) else None  # preprocessed dir can have no wav file due to the lenght constraint.
                continue
            # assert fs == 16000
            if x.shape[0] % 256 == 0:
                x = np.concatenate((x, np.array([1e-06])), axis=0)
            y = signal.filtfilt(b, a, x)
            wav = y * 0.96 + (prng.rand(y.shape[0]) - 0.5) * 1e-06

            # compute spectrogram
            D = pySTFT(wav).T
            D_mel = np.dot(D, mel_basis)
            D_db = 20 * np.log10(np.maximum(min_level, D_mel)) - 16
            S = (D_db + 100) / 100

            # extract f0
            f0_rapt = sptk.rapt(wav.astype(np.float32) * 32768,
                                fs,
                                256,
                                min=lo,
                                max=hi,
                                otype=2)
            index_nonzero = (f0_rapt != -1e10)
class BCP:
    '''
        训练神经网络的过程:
            1、定义神经网络的结果和前向传播的输出结果
            2、定义损失函数以及选择反向传播的算法
            3、生成会话,并且在训练数据上反复运行反向传播优化算法
        注:无论神经网络的结构如何变化,这3个步骤是不变的
    '''
    def __init__(self, batch_size=8):
        # 定义训练数据batch的大小
        self.batch_size = batch_size
        #定义神经网络的参数
        self.w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
        self.w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))
        #在shape的一个维度上使用None可以方便使用不同的batch大小,在实际训练时需要把数据分成较小的batch,
        #不然将大量的数据放入一个batch可能会导致内存溢出
        self.x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input')
        self.y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input')
        #定义前向传播的过程
        self.a = tf.matmul(self.x, self.w1)
        self.y = tf.matmul(self.a, self.w2)
        #定义损失函数和反向传播算法
        self.cross_entropy = -tf.reduce_mean(
            self.y_ * tf.log(tf.clip_by_value(self.y, 1e-10, 1.0)))
        self.train_step = tf.train.AdamOptimizer(0.001).minimize(
            self.cross_entropy)
        self.sess = tf.Session()
        # 初始化所有变量
        initVar = tf.initialize_all_variables()
        self.sess.run(initVar)

    def create_model_data(self):
        '''
        通过随机数生成一个模拟数据集
        定义规则来给出样本的标签。 x1 + x2 > 1 的样例都被认为是正样本
            0 代表负样本 1 代表正样本
        :return:
        '''
        self.rdm = RandomState(1)
        self.dataset_size = 128
        X = self.rdm.rand(self.dataset_size, 2)
        Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]
        return X, Y

    def run(self):
        print("训练之前结果:")
        print(self.sess.run(self.w1))
        print(self.sess.run(self.w2))

    def runTest(self, STEPS=5000):
        X, Y = self.create_model_data()
        for i in range(STEPS):
            #每次选取batch_size个样本进行训练
            start = (i * self.batch_size) % self.dataset_size
            end = min(start + self.batch_size, self.dataset_size)
            self.sess.run(self.train_step,
                          feed_dict={
                              self.x: X[start:end],
                              self.y_: Y[start:end]
                          })
            if i % 1000 == 0:
                total_cross_entropy = self.sess.run(self.cross_entropy,
                                                    feed_dict={
                                                        self.x: X,
                                                        self.y_: Y
                                                    })
                #交叉熵越小,说明预测的结果和真实的结果差距越小
                print(
                    "After %d training step(s), cross entropy on all data is %g"
                    % (i, total_cross_entropy))
        print("训练之后结果:")
        print(self.sess.run(self.w1))
        print(self.sess.run(self.w2))
Beispiel #56
0
# 定义神经网络前向传播过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# 定义损失函数和反向传播的算法
y = tf.sigmoid(y)
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) +
                                (1 - y_) *
                                tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0)))

train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
# 通过随机数生成一个模拟数据集
rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)  # 随机生成一个128行2列的二维数组,由于设定随机种子为1,所以每次运行随机生成的数组相同

Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]
# 创建一个会话来运行TensorFlow程序
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    # 初始化变量
    sess.run(init_op)

    print(sess.run(w1))
    print(sess.run(w2))

    STEPS = 5000
    for i in range(STEPS):
        # 每次选取batch_size个样本进行训练
        start = (i * batch_size) % dataset_size
Beispiel #57
0
"""
import tensorflow as tf
from numpy.random import RandomState


# configuration
BATCH_SIZE = 8
DATASET_SIZE = 128
STEPS = 20000
SEED = 7
REG_TYPE = 'L1'


# Generate synthetic dataset
rdm = RandomState(1)
X = rdm.rand(DATASET_SIZE, 2)
Y = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X]


def get_weight(shape, lamda, type='L1'):
    """
    Create an weight Variable, and put L1/L2
    regularization of it to collection.
    """

    weight = tf.Variable(tf.random_normal(shape),\
        dtype=tf.float32, trainable=True)

    if type == 'L1':
        reg_loss = tf.contrib.layers.l1_regularizer(lamda)(weight)
    elif type == 'L2':
Beispiel #58
0
import matplotlib.pyplot as plt
import numpy as np
import random as random
from numpy.random import RandomState
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

#part1 #y=ax+b ,a is slope b is intercept
rng = RandomState(2)  #seed 影響產出值
x = 10 * rng.rand(50)
y = 3 * x - 5 + rng.randn(50)
plt.scatter(x, y)  #繪出二維圖
plt.show()  #顯示plt

#part2
from sklearn.linear_model import LinearRegression

model = LinearRegression(fit_intercept=True)  #是否計算該模型的截距

model.fit(x[:, np.newaxis], y)  #fit linear model #newaxis表示增加一个新的坐标轴

xfit = np.linspace(0, 10, 1000)  #指定生成大小為1000,從0到10的等差數列
yfit = model.predict(xfit[:, np.newaxis])  #predict using the linear model

plt.scatter(x, y)
plt.plot(xfit, yfit)
plt.show()

print("Model slope:    ", model.coef_[0])
print("Model intercept:", model.intercept_)
Beispiel #59
0
#二分类问题损失函数
cross_entropy = -tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0))+
                                (1-y_)*tf.log(tf.clip_by_value(1-y,1e-10,1.0)))
#回归问题损失函数
#loss_less = 10
#loss_more = 1
#loss = tf.reduce_sum(tf.where(tf.greater(y,y_),(y-y_)*loss_less,(y_-y)*loss_more))

train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

from numpy.random import RandomState
rdm = RandomState(1)
data_size = 128
X = rdm.rand(data_size,2)
#回归问题 构建x1+x2+随机噪声
#Y = [[x1+x2+rdm.rand()/10.0-0.05] for (x1,x2) in X]
#二分类问题虚构验标签数值
Y = [[int(x1+x2<1)] for (x1,x2) in X]

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    
    print(sess.run(w1))
    print(sess.run(w2))
    
    STEPS = 5000
    for i in range(STEPS):
        start = (i * batch_size) % data_size
Beispiel #60
0
y = tf.matmul(a, w2)

#定义损失函数和反向传播算法
y = tf.sigmoid(y)
# tf.clip_by_value(y, 1e-10, 1.0)意思是将y的值限制在1e-10~1.0之间以避免一些运算错误,
# 符号*是矩阵元素的乘法而不是矩阵乘法(tf.matmul)
# reduce_mean是对矩阵的所有元素求平均
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) +
                                (1 - y_) *
                                tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0)))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

#生成模拟数据
sample_num = 256
rdm = RandomState(1)
X = rdm.rand(sample_num, 2)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]  # x1+x2<1的样本被认为是正样本

# 创建会话运行程序
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()  # 初试所有变量
    sess.run(init_op)

    # 输出目前(未经训练)的参数取值。
    print("训练前参数:")
    print("w1:\n", sess.run(w1))
    print("w2:\n", sess.run(w2))
    print("\n")

    # 训练模型
    STEPS = 10000