Example #1
0
from cluster_generator.ics import ClusterICs, \
    compute_centers_for_binary
from cluster_generator.tests.utils import particle_answer_testing
from numpy.random import RandomState
from pathlib import Path

prng = RandomState(25)


def test_single_ics(answer_dir):
    p = Path(answer_dir) / "profile.h5"
    num_particles = {k: 100000 for k in ["dm", "star", "gas"]}
    ics = ClusterICs("single",
                     1,
                     p, [0.0, 0.0, 0.0], [0.0, 0.0, 0.0],
                     num_particles=num_particles)
    parts = ics.setup_particle_ics(prng=prng)
    particle_answer_testing(parts, "particles.h5", False, answer_dir)


def test_double_ics(answer_store, answer_dir):
    p = Path(answer_dir) / "profile.h5"
    num_particles = {k: 200000 for k in ["dm", "star", "gas"]}
    center1, center2 = compute_centers_for_binary([0.0, 0.0, 0.0], 3000.0,
                                                  500.0)
    velocity1 = [500.0, 0.0, 0.0]
    velocity2 = [-500.0, 0.0, 0.0]
    ics = ClusterICs("double",
                     2, [p, p], [center1, center2], [velocity1, velocity2],
                     num_particles=num_particles)
    parts = ics.setup_particle_ics(prng=prng)
Example #2
0
 def __init__(self, seed=None):
     self._random = RandomState(seed=seed)
Example #3
0
    volume_shape = data.shape

    crop_seq_train = VolumeCropSequence(
        data_volume=data,
        labels_volume=labels,
        batch_size=batch_size,
        meta_crop_generator=MetaCrop3DGenerator(
            volume_shape=volume_shape,
            crop_shape=crop_shape,
            is_2halfd=(model_type == T2SModelType.d2half),
            x0y0z0_generator=(
                grid_pos_gen :=
                t2s_volseq.UniformGridPosition.build_from_volume_crop_shapes(
                    volume_shape=volume_shape,
                    crop_shape=crop_shape,
                    random_state=RandomState(args.random_state_seed),
                )),
            gt_field=t2s_volseq.GTUniformEverywhere(
                gt_type=gt_type,
                grid_position_generator=grid_pos_gen,
                random_state=RandomState(args.random_state_seed),
            ),
            et_field=t2s_volseq.ET3DConstantEverywhere.build_no_displacement(
                grid_position_generator=grid_pos_gen),
            vs_field=t2s_volseq.VSConstantEverywhere.build_no_shift(
                grid_position_generator=grid_pos_gen)),

        # this volume cropper only returns random crops,
        # so the number of crops per epoch/batch is w/e i want
        epoch_size=10,
        **vol_crop_seq_common_kwargs,
Example #4
0
 def __init__(self,trainmode,trainsize):
     super().__init__()
     self.rt=RandomState(self.rs_seed)
     self.trainmode=trainmode
     self.trainsize=trainsize
Example #5
0
    status = MPI.Status()
    host = MPI.Get_processor_name()
    info = MPI.INFO_NULL

    nlocalcores = mp.cpu_count()  #One core is manager

    if rank == 0:
        """
        The rank 0 process is the master manager.  This process:

        1. Reads the data from the shapefile or DB
        2. Generates the W Object
        3. Sends the W object and attribute vector to all children
        """
        w = ps.lat2W(8, 8)
        random_int = RandomState(123456789)
        attribute = random_int.random_sample((w.n, 1))
        numifs = 8

        data = {'w': w, 'numifs': numifs}

        print "I have {} cores in a shared memory space".format(nlocalcores)
    else:
        data = None

    #Broadcast 2 sets of data, a list of Python objects and an array of attribute information
    data = comm.bcast(
        data, root=0
    )  #Inefficient Python object, better to get full, pass and reform?
    if rank != 0:
        w = data['w']
Example #6
0
# -*- coding: utf-8 -*-
'''use_NMF_in_olivetti_faces.py'''
from numpy.random import RandomState  #加载RandomState用于创建随机种子
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces  #加载Olivetti人脸数据集导入函数
from sklearn import decomposition

n_row, n_col = 2, 3  #设置图像展示时的排列情况
n_components = n_row * n_col
image_shape = (64, 64)  #设置人脸数据图片的大小

###############################################################################
##加载数据,并打乱顺序
dataset = fetch_olivetti_faces(shuffle=True, random_state=RandomState(0))
faces = dataset.data


###############################################################################
def plot_gallery(title, images, n_col=n_col, n_row=n_row):
    plt.figure(figsize=(2. * n_col, 2.26 * n_row))  #创建图片,并指定图片大小(英寸)
    plt.suptitle(title, size=16)  #设置标题及字号大小

    for i, comp in enumerate(images):
        plt.subplot(n_row, n_col, i + 1)
        vmax = max(comp.max(), -comp.min())

        #对数值归一化,并以灰度图形式显示
        plt.imshow(comp.reshape(image_shape),
                   cmap=plt.cm.gray,
                   interpolation='nearest',
                   vmin=-vmax,
Example #7
0
    def __init__(self,
                 prng=RandomState(1234567890),
                 htCut=950.,
                 minMSD=105.,
                 maxMSD=210.,
                 tau32Cut=0.65,
                 ak8PtMin=400.,
                 bdisc=0.8484,
                 writePredDist=True,
                 isData=True,
                 year=2019,
                 UseLookUpTables=False,
                 lu=None,
                 ModMass=False,
                 RandomDebugMode=False):

        self.prng = prng
        self.htCut = htCut
        self.minMSD = minMSD
        self.maxMSD = maxMSD
        self.tau32Cut = tau32Cut
        self.ak8PtMin = ak8PtMin
        self.bdisc = bdisc
        self.writePredDist = writePredDist
        self.writeHistFile = True
        self.isData = isData
        self.year = year
        self.UseLookUpTables = UseLookUpTables
        self.ModMass = ModMass
        self.RandomDebugMode = RandomDebugMode
        self.lu = lu  # Look Up Tables

        self.ttagcats = [
            "Pt", "at", "pret", "0t", "1t", "1t+2t", "2t", "0t+1t+2t"
        ]  #anti-tag+probe, anti-tag, pre-tag, 0, 1, >=1, 2 ttags, any t-tag
        self.btagcats = ["0b", "1b", "2b"]  # 0, 1, >=2 btags
        self.ycats = ['cen', 'fwd']  # Central and forward
        # Combine categories like "0bcen", "0bfwd", etc:
        self.anacats = [
            t + b + y for t, b, y in itertools.product(
                self.ttagcats, self.btagcats, self.ycats)
        ]
        print(self.anacats)

        dataset_axis = hist.Cat("dataset", "Primary dataset")
        cats_axis = hist.Cat("anacat", "Analysis Category")

        jetmass_axis = hist.Bin("jetmass", r"Jet $m$ [GeV]", 50, 0, 500)
        jetpt_axis = hist.Bin("jetpt", r"Jet $p_{T}$ [GeV]", 50, 0, 5000)
        ttbarmass_axis = hist.Bin("ttbarmass", r"$m_{t\bar{t}}$ [GeV]", 50, 0,
                                  5000)
        jeteta_axis = hist.Bin("jeteta", r"Jet $\eta$", 50, -5, 5)
        jetphi_axis = hist.Bin("jetphi", r"Jet $\phi$", 50, -np.pi, np.pi)
        jety_axis = hist.Bin("jety", r"Jet $y$", 50, -3, 3)
        jetdy_axis = hist.Bin("jetdy", r"Jet $\Delta y$", 50, 0, 5)
        manual_axis = hist.Bin("jetp", r"Jet Momentum [GeV]", manual_bins)
        tagger_axis = hist.Bin("tagger", r"deepTag", 50, 0, 1)
        tau32_axis = hist.Bin("tau32", r"$\tau_3/\tau_2$", 50, 0, 2)

        subjetmass_axis = hist.Bin("subjetmass", r"SubJet $m$ [GeV]", 50, 0,
                                   500)
        subjetpt_axis = hist.Bin("subjetpt", r"SubJet $p_{T}$ [GeV]", 50, 0,
                                 2000)
        subjeteta_axis = hist.Bin("subjeteta", r"SubJet $\eta$", 50, -4, 4)
        subjetphi_axis = hist.Bin("subjetphi", r"SubJet $\phi$", 50, -np.pi,
                                  np.pi)

        self._accumulator = processor.dict_accumulator({
            'ttbarmass':
            hist.Hist("Counts", dataset_axis, cats_axis, ttbarmass_axis),
            'jetmass':
            hist.Hist("Counts", dataset_axis, cats_axis, jetmass_axis),
            'SDmass':
            hist.Hist("Counts", dataset_axis, cats_axis, jetmass_axis),
            'SDmass_precat':
            hist.Hist("Counts", dataset_axis, jetpt_axis, jetmass_axis),
            'jetpt':
            hist.Hist("Counts", dataset_axis, cats_axis, jetpt_axis),
            'jeteta':
            hist.Hist("Counts", dataset_axis, cats_axis, jeteta_axis),
            'jetphi':
            hist.Hist("Counts", dataset_axis, cats_axis, jetphi_axis),
            'probept':
            hist.Hist("Counts", dataset_axis, cats_axis, jetpt_axis),
            'probep':
            hist.Hist("Counts", dataset_axis, cats_axis, manual_axis),
            'jety':
            hist.Hist("Counts", dataset_axis, cats_axis, jety_axis),
            'jetdy':
            hist.Hist("Counts", dataset_axis, cats_axis, jetdy_axis),
            'deepTag_TvsQCD':
            hist.Hist("Counts", dataset_axis, cats_axis, jetpt_axis,
                      tagger_axis),
            'deepTagMD_TvsQCD':
            hist.Hist("Counts", dataset_axis, cats_axis, jetpt_axis,
                      tagger_axis),
            'tau32':
            hist.Hist("Counts", dataset_axis, cats_axis, tau32_axis),
            'tau32_2D':
            hist.Hist("Counts", dataset_axis, cats_axis, jetpt_axis,
                      tau32_axis),
            'tau32_precat':
            hist.Hist("Counts", dataset_axis, jetpt_axis, tau32_axis),
            'subjetmass':
            hist.Hist("Counts", dataset_axis, cats_axis, subjetmass_axis),
            'subjetpt':
            hist.Hist("Counts", dataset_axis, cats_axis, subjetpt_axis),
            'subjeteta':
            hist.Hist("Counts", dataset_axis, cats_axis, subjeteta_axis),
            'subjetphi':
            hist.Hist("Counts", dataset_axis, cats_axis, subjetphi_axis),
            'numerator':
            hist.Hist("Counts", dataset_axis, cats_axis, manual_axis),
            'denominator':
            hist.Hist("Counts", dataset_axis, cats_axis, manual_axis),
            'cutflow':
            processor.defaultdict_accumulator(int),
        })
Example #8
0
def interaction_test(data,
                     category_column='category',
                     compare_only=None,
                     num_bootstraps=1000,
                     bootstrap_size=.9):
    """
    :param data: pandas DataFrame with x and y values and the category column
    :param category_column:
    :param compare_only: instead of all pair-wise tests, compare all other categories only with this category
    :param num_bootstraps: number of bootstraps to determine regression parameters
    :param bootstrap_size: size of each bootstrap to determine regression parameters
    :return: a pandas DataFrame with compared categories and the corresponding test values
    """
    groups = dict(list(data.groupby(category_column)))
    result = []
    if compare_only:
        pairs = [
            dict((item, (compare_only, groups[compare_only])))
            for item in groups.items() if item[0] != compare_only
        ]
    else:
        pairs = list(map(dict, itertools.combinations(groups.items(), 2)))
    for pair in tqdm(pairs, desc=f'pair interactions'):
        descriptor1, descriptor2 = pair.keys()
        data1, data2 = pair.values()
        x1, y1 = data1['x'].values, data1['y'].values
        x2, y2 = data2['x'].values, data2['y'].values
        x1, x2 = scipy.stats.zscore(x1), scipy.stats.zscore(
            x2)  # normalize to control for different variances
        # run bootstraps to collect multiple samples of slope/intercept
        rng = RandomState(0)
        bootstraps = []
        for bootstrap in range(num_bootstraps):
            indices1 = rng.choice(np.arange(len(x1)),
                                  size=int(bootstrap_size * len(x1)))
            indices2 = rng.choice(np.arange(len(x2)),
                                  size=int(bootstrap_size * len(x2)))
            bootstrap_x1, bootstrap_y1 = x1[indices1], y1[indices1]
            bootstrap_x2, bootstrap_y2 = x2[indices2], y2[indices2]
            regression1 = LinearRegression().fit(
                np.expand_dims(bootstrap_x1, 1), bootstrap_y1)
            regression2 = LinearRegression().fit(
                np.expand_dims(bootstrap_x2, 1), bootstrap_y2)
            slope1, slope2 = regression1.coef_, regression2.coef_
            intercept1, intercept2 = regression1.intercept_, regression2.intercept_
            bootstraps.append({
                'bootstrap': bootstrap,
                'slope1': slope1,
                'slope2': slope2,
                'intercept1': intercept1,
                'intercept2': intercept2
            })
        bootstraps = pd.DataFrame(bootstraps)
        ttest_slope = ttest_ind(bootstraps['slope1'].values,
                                bootstraps['slope2'].values)
        ttest_int = ttest_ind(bootstraps['intercept1'].values,
                              bootstraps['intercept2'].values)
        result.append({
            'data1': descriptor1,
            'data2': descriptor2,
            'test': 'ttest_ind',
            'slope1': np.mean(bootstraps['slope1']),
            'slope2': np.mean(bootstraps['slope2']),
            'intercept1': np.mean(bootstraps['intercept1']),
            'intercept2': np.mean(bootstraps['intercept2']),
            'p_slope': ttest_slope.pvalue.squeeze(),
            'statistic_slope': ttest_slope.statistic.squeeze(),
            'p_intercept': ttest_int.pvalue.squeeze(),
            'statistic_intercept': ttest_int.statistic.squeeze()
        })
    return pd.DataFrame(result)
Example #9
0
import numpy as np
import scipy.linalg as la
from numpy.random import RandomState
from scipy import signal

# Parameters
M = 10
T = 200
N = 3 * T
p = 0.2
rn = RandomState(364)

beta_true = 2 * rn.rand(M) - 1
beta_true = beta_true / la.norm(beta_true, 1)
x_true = (rn.rand(N) < p) * rn.randn(N)

y_shifted = np.zeros(N + M)
# Shift y by M, then generate y using AR model
for t in range(N):
    y_shifted[t + M] = x_true[t] + np.sum(
        np.flipud(beta_true) * y_shifted[t + M - M:t + M])
# Only observe a length T subsequence.
y = y_shifted[1 + T + M:1 + T + T + M]
Example #10
0
    """Return eigenvalues, sorted."""
    vals, vecs = np.linalg.eigh(cov)
    order = vals.argsort()[::-1]
    return vals[order], vecs[:, order]


def cov_ellipse(cov, nstd):
    """Get the covariance ellipse."""
    vals, vecs = eigsorted(cov)
    r1, r2 = nstd * np.sqrt(vals)
    theta = np.arctan2(*vecs[:, 0][::-1])

    return r1, r2, theta


CMAP = matplotlib.colors.ListedColormap(RandomState(0).rand(256 * 256, 3))


def plot_covell(c, w, x, P):
    """Plot cov ellipse with border and aplha color."""
    ca = plot_cov_ellipse(P[0:2, 0:2], x[0:2])
    ce = plot_cov_ellipse(P[0:2, 0:2], x[0:2])
    ca.set_alpha(w * 0.2)
    ce.set_alpha(w * 0.8)
    ca.set_facecolor(CMAP(c))
    ce.set_facecolor('none')
    ce.set_edgecolor(CMAP(c))
    ce.set_linewidth(1)


def plot_trace(t,
Example #11
0
hw = load_hardware()

# Read the tiles to be used and define the number of tiles. Path obtained from command line
# argument.
if (tilesfile is None):
    nominal_tiles = load_tiles()
else:
    nominal_tiles = load_tiles(tiles_file=tilesfile)

ntiles = len(nominal_tiles.id)

# Container for the Mersenne Twister pseudo-random number generator. Since every MPI rank is seeded
# with a different number, size number of different subpriorities are generated.
#seed = 62*rank_group + rank
seed = rank
random_generator = RandomState(seed=seed)

# Load Target data.
tgs = Targets()

# First load the MTL file and compute the target IDs.
load_target_file(tgs, mtlfile, random_generator=random_generator)

# --------------------------------------------------------------------------------------------------
# TARGET IDs IN PARALLEL
#
# Target IDs for each tracer in parallel.
# The total number of targets is split almost evenly between all processes. Each MPI task takes a
# portion of the total targets and extract the corresponding target IDs for each type of tracer.
# --------------------------------------------------------------------------------------------------
    def validate(self) -> None:
        """
        Check arguments correctness and consistency.

        * input files must exist
        * output files must be in a writeable directory
        * if no seed specified, set random seed.
        * length of per-chain lists equals specified # of chains
        """
        if self.model_name is None:
            raise ValueError('no stan model specified')
        if self.model_exe is None:
            raise ValueError('model not compiled')

        if self.chain_ids is not None:
            for i in range(len(self.chain_ids)):
                if self.chain_ids[i] < 1:
                    raise ValueError('invalid chain_id {}'.format(
                        self.chain_ids[i]))
        if self.output_dir is not None:
            self.output_dir = os.path.realpath(
                os.path.expanduser(self.output_dir))
            if not os.path.exists(self.output_dir):
                try:
                    os.makedirs(self.output_dir)
                    self._logger.info('created output directory: %s',
                                      self.output_dir)
                except (RuntimeError, PermissionError):
                    raise ValueError(
                        'invalid path for output files, no such dir: {}'.
                        format(self.output_dir))
            if not os.path.isdir(self.output_dir):
                raise ValueError(
                    'specified output_dir not a directory: {}'.format(
                        self.output_dir))
            try:
                testpath = os.path.join(self.output_dir, str(time()))
                with open(testpath, 'w+'):
                    pass
                os.remove(testpath)  # cleanup
            except Exception:
                raise ValueError('invalid path for output files,'
                                 ' cannot write to dir: {}'.format(
                                     self.output_dir))

        if self.seed is None:
            rng = RandomState()
            self.seed = rng.randint(1, 99999 + 1)
        else:
            if not isinstance(self.seed, (int, list)):
                raise ValueError(
                    'seed must be an integer between 0 and 2**32-1,'
                    ' found {}'.format(self.seed))
            if isinstance(self.seed, int):
                if self.seed < 0 or self.seed > 2**32 - 1:
                    raise ValueError(
                        'seed must be an integer between 0 and 2**32-1,'
                        ' found {}'.format(self.seed))
            else:
                if self.chain_ids is None:
                    raise ValueError(
                        'seed must not be a list when no chains used')

                if len(self.seed) != len(self.chain_ids):
                    raise ValueError(
                        'number of seeds must match number of chains,'
                        ' found {} seed for {} chains '.format(
                            len(self.seed), len(self.chain_ids)))
                for i in range(len(self.seed)):
                    if self.seed[i] < 0 or self.seed[i] > 2**32 - 1:
                        raise ValueError('seed must be an integer value'
                                         ' between 0 and 2**32-1,'
                                         ' found {}'.format(self.seed[i]))

        if isinstance(self.data, str):
            if not os.path.exists(self.data):
                raise ValueError('no such file {}'.format(self.data))
        elif self.data is None:
            if isinstance(self.method_args, OptimizeArgs):
                raise ValueError('data must be set when optimizing')
        elif not isinstance(self.data, (str, dict)):
            raise ValueError('data must be string or dict')

        if self.inits is not None:
            if isinstance(self.inits, (Integral, Real)):
                if self.inits < 0:
                    raise ValueError('inits must be > 0, found {}'.format(
                        self.inits))
            elif isinstance(self.inits, str):
                if not os.path.exists(self.inits):
                    raise ValueError('no such file {}'.format(self.inits))
            elif isinstance(self.inits, list):
                if self.chain_ids is None:
                    raise ValueError(
                        'inits must not be a list when no chains are used')

                if len(self.inits) != len(self.chain_ids):
                    raise ValueError(
                        'number of inits files must match number of chains,'
                        ' found {} inits files for {} chains '.format(
                            len(self.inits), len(self.chain_ids)))
                names_set = set(self.inits)
                if len(names_set) != len(self.inits):
                    raise ValueError('each chain must have its own init file,'
                                     ' found duplicates in inits files list.')
                for i in range(len(self.inits)):
                    if not os.path.exists(self.inits[i]):
                        raise ValueError('no such file {}'.format(
                            self.inits[i]))
def main(x,
         y,
         training_fraction=0.80,
         learning_rate=0.001,
         epochs=1000,
         batch_size=1000,
         update_summary_at=100):
    """
    :param x: shape = m * 786
    :param y: shape = m * 10
    :param training_fraction:
    :param epochs:
    :param batch_size:
    :param update_summary_at:
    :return:
    """
    training_size = int(len(x) * training_fraction)

    # if last batch size is less than half of desired batch size then throwing exception.
    # In future, instead of throwing exception we may avoid using this last batch.

    assert training_size % batch_size == 0 or training_size % batch_size > batch_size / 2
    last_batch_size = training_size % batch_size

    _data = train_test_split(x,
                             y,
                             train_size=training_fraction,
                             stratify=y.argmax(1),
                             random_state=0)

    # training_data_x, training_data_y = x[:training_size], y[:training_size]
    # testing_data_x, testing_data_y = x[training_size:], y[training_size:]

    training_data_x, training_data_y = _data[0], _data[2]
    testing_data_x, testing_data_y = _data[1], _data[3]

    feature_size = training_data_x.shape[1]
    hidden_nu = 20
    output_size = training_data_y.shape[1]

    x = placeholder(float32, [None, feature_size], name='x')
    y = placeholder(float32, [None, output_size], name='y')

    # also check xavier_initializer
    W1 = Variable(random_normal([feature_size, hidden_nu],
                                seed=1,
                                dtype=float32),
                  name='W1')
    b1 = Variable(random_normal([hidden_nu], dtype=float32, seed=2),
                  name='b1')  # use zeros also

    W2 = Variable(random_normal([hidden_nu, output_size],
                                seed=3,
                                dtype=float32),
                  name='W2')
    b2 = Variable(random_normal([output_size], dtype=float32, seed=4),
                  name='b2')

    L0_L1 = x @ W1 + b1
    L1_L1 = nn.relu(L0_L1)

    L1_L2 = L1_L1 @ W2 + b2
    L2_L2 = nn.softmax(L1_L2)

    cost = reduce_mean(nn.softmax_cross_entropy_with_logits_v2(logits=L2_L2,
                                                               labels=y),
                       name='cost')

    optimization = train.AdamOptimizer(learning_rate=learning_rate).minimize(
        cost, name='optimization')

    init = global_variables_initializer()

    current_predictions = equal(argmax(L2_L2, axis=1), argmax(y, axis=1))

    accuracy = tf.round(
        10000 * reduce_mean(cast(current_predictions, float32))) / 100

    with Session() as sess:
        writer = summary.FileWriter('mnist/visualize', graph=sess.graph)

        cost_summary = summary.scalar('cost', cost)
        training_accuracy_summary = summary.scalar('training accuracy',
                                                   accuracy)
        testing_accuracy_summary = summary.scalar('testing accuracy', accuracy)

        sess.run(init)

        # ---------------------------------------------------------------------------------

        for e in range(epochs):

            _idx = RandomState(e).permutation(
                training_size)  # check how much does it matter to add
            # uniformity of data in each batch.

            total_cost = 0

            def mini_batch(start_idx, end_idx):
                curr_idx = _idx[start_idx:end_idx]

                _x = training_data_x[curr_idx]
                _y = training_data_y[curr_idx]

                _, c = sess.run([optimization, cost], feed_dict={x: _x, y: _y})

                return (end_idx - start_idx) * c

            for i in range(0, training_size, batch_size):
                total_cost += mini_batch(i, min(i + batch_size, training_size))

            if last_batch_size != 0:
                total_cost += mini_batch(training_size - last_batch_size,
                                         training_size)

            print('epoch:', e, 'total cost:', round(
                total_cost,
                3))  # check how this 'total_cost' can be fed into summary.

            if e % update_summary_at == 0:
                _total_cost, training_accuracy = sess.run(
                    [cost_summary, training_accuracy_summary],
                    feed_dict={
                        x: training_data_x,
                        y: training_data_y
                    })
                writer.add_summary(_total_cost, e)
                writer.add_summary(training_accuracy, e)

                testing_accuracy = sess.run(testing_accuracy_summary,
                                            feed_dict={
                                                x: testing_data_x,
                                                y: testing_data_y
                                            })
                writer.add_summary(testing_accuracy, e)

        writer.close()
Example #14
0
    def __init__(self,
                 n,
                 k,
                 training_set,
                 validation_set,
                 weights_mu=0,
                 weights_sigma=1,
                 weights_prng=RandomState(),
                 lr_iteration_limit=1000,
                 mini_batch_size=0,
                 convergence_decimals=2,
                 shuffle=False,
                 logger=None):
        """
        Initialize a Correlation Attack Learner for the specified LTF Array which uses transform_lightweight_secure.

        :param n: Input length
        :param k: Number of parallel LTFs in the LTF Array
        :param training_set: The training set, i.e. a data structure containing challenge response pairs
        :param validation_set: The validation set, i.e. a data structure containing challenge response pairs. Used for
        approximating accuracies of permuted models (can be smaller e.g. 0.1*training_set_size)
        :param weights_mu: mean of the Gaussian that is used to choose the initial model
        :param weights_sigma: standard deviation of the Gaussian that is used to choose the initial model
        :param weights_prng: PRNG to draw the initial model from. Defaults to fresh `numpy.random.RandomState` instance.
        :param lr_iteration_limit: Iteration limit for a single LR learner run
        :param logger: logging.Logger
                       Logger which is used to log detailed information of learn iterations.
        """
        self.n = n
        self.k = k

        self.validation_set_efba = ChallengeResponseSet(
            challenges=LTFArray.efba_bit(
                LTFArray.transform_lightweight_secure(
                    validation_set.challenges, k)),
            responses=validation_set.responses)

        self.logger = logger

        self.lr_learner = LogisticRegression(
            t_set=training_set,
            n=n,
            k=k,
            transformation=LTFArray.transform_lightweight_secure,
            combiner=LTFArray.combiner_xor,
            weights_mu=weights_mu,
            weights_sigma=weights_sigma,
            weights_prng=weights_prng,
            logger=logger,
            iteration_limit=lr_iteration_limit,
            minibatch_size=mini_batch_size,
            convergence_decimals=convergence_decimals,
            shuffle=shuffle)

        self.initial_accuracy = .5
        self.initial_lr_iterations = 0
        self.initial_model = None
        self.total_lr_iterations = 0
        self.best_permutation_iteration = 0
        self.total_permutation_iterations = 0
        self.best_permutation = None
        self.best_accuracy = None

        assert n in (
            64, 128
        ), 'Correlation attack for %i bit is currently not supported.' % n
        assert validation_set.N >= 1000, 'Validation set should contain at least 1000 challenges.'

        self.correlation_permutations = loadmat(
            'data/correlation_permutations_lightweight_secure_%i_10.mat' %
            n)['shiftOverviewData'][:, :, 0].astype('int64')
    def __init__(self,
                 path,
                 input_size=28 * 28,
                 data_switch=False,
                 is_image=False,
                 is_double=False,
                 directed=False,
                 reorder=False,
                 raw_latent=False,
                 seed=633):
        """__init__

        :param path:
        :param input_size:
        :param data_switch: random swap pairs
        :param is_image:
        :param is_double:
        :param directed:
        :param reorder: make labels ordered
        :param raw_latent:
        :param seed:
        """
        self._rng = RandomState(seed)
        self.input_size = input_size
        self.feature_path = os.path.join(path, 'features.b')
        self.is_image = is_image
        self.is_double = is_double
        self.directed = directed
        self.data_switch = data_switch
        self.raw_latent = raw_latent

        # unlabeled
        if is_image:
            if is_double:
                self.asins_to_index = load_double_offsets(self.feature_path)
            else:
                self.asins_to_index = load_images_offsets(self.feature_path)
            self.index_to_asins = {
                index: asin
                for asin, index in self.asins_to_index.items()
            }
            self.num_examples = len(self.index_to_asins)
            self.item_indices = np.array(sorted(list(self.index_to_asins)))
            perm = self._rng.permutation(self.num_examples)
            self.item_indices = self.item_indices[perm]
        else:
            self.asins_to_index = load_features_indices(
                self.feature_path, input_size=self.input_size)
            self.index_to_asins = {
                index: asin
                for asin, index in self.asins_to_index.items()
            }
            self.num_examples = max(self.index_to_asins)
            print('self.num_examples:{}'.format(self.num_examples))
            self.item_indices = np.arange(self.num_examples)
            print('self.item_indices:{}'.format(self.item_indices))
        self.head_unlabeled = 0

        if self.directed:
            source_ids = []
            with open(os.path.join(path, 'source.txt')) as infile:
                for line in infile:
                    asin = line.strip()
                    source_ids.append(asin)
            self.source_indices = np.array(
                sorted([self.asins_to_index[asin] for asin in source_ids]))
            self.num_source = self.source_indices.shape[0]
            perm = self._rng.permutation(self.num_source)
            self.source_indices = self.source_indices[perm]
            self.head_source = 0

            target_ids = []
            with open(os.path.join(path, 'target.txt')) as infile:
                for line in infile:
                    asin = line.strip()
                    target_ids.append(asin)
            self.target_indices = np.array(
                sorted([self.asins_to_index[asin] for asin in target_ids]))
            self.num_target = self.target_indices.shape[0]
            perm = self._rng.permutation(self.num_target)
            self.target_indices = self.target_indices[perm]
            self.head_target = 0

        # pairs
        self.pairs_pos = []
        self.pairs_neg = []

        with open(os.path.join(path, 'pairs_pos.txt')) as infile:
            if reorder:
                buckets = defaultdict(list)
                for line in infile:
                    a, _, b = line.strip().split()
                    buckets[a[0]].append((a, b))

                lines = []
                while sum(len(v) for v in buckets.values()) > 0:
                    for _, v in sorted(buckets.items(), key=lambda x: x[0]):
                        if len(v) > 0:
                            lines.append(v.pop())
                for a, b in lines:
                    self.pairs_pos.append(
                        [self.asins_to_index[a], self.asins_to_index[b]])
            else:
                for line in infile:
                    a, _, b = line.strip().split()
                    self.pairs_pos.append(
                        [self.asins_to_index[a], self.asins_to_index[b]])

        with open(os.path.join(path, 'pairs_neg.txt')) as infile:
            for line in infile:
                a, _, b = line.strip().split()
                self.pairs_neg.append(
                    [self.asins_to_index[a], self.asins_to_index[b]])

        self.pairs_pos = np.array(self.pairs_pos)
        self.pairs_neg = np.array(self.pairs_neg)
        self.head_labeled_pos = 0
        self.head_labeled_neg = 0
        self.num_examples_labeled_pos = self.pairs_pos.shape[0]
        self.num_examples_labeled_neg = self.pairs_neg.shape[0]
Example #16
0
        self.put(1)
        return r


en = enumerate

# load_normal_q = Queue(2)
n_proc = 3
res_shape = [100, 2, 2, 5, 64, 64]

h = res_shape[-1]
zoom_order = 1
ratio = 0.25
ri = random.randint(9999)
print(ri)
rng = RandomState(ri)
t_ofs = (32 - res_shape[3]) / 2


def load_normal(path, cut=0):
    v, t, o, p, l = load_gzip(path)
    v = v[:, :, :res_shape[2]]
    if res_shape[2] == 3:
        v[:, :, 2] *= 255
    v_new = empty(res_shape, dtype="uint8")

    # t = concatenate([t,o],axis=1)

    for i in range(v.shape[0]):  #batch

        if p[i] < 10: p[i] = 100
    pl.figure()
    for i, c, label in zip(target_ids, colors, target_names):
        pl.plot(data[target == i, 0],
                data[target == i, 1],
                'o',
                c=c,
                label=label)
    pl.legend(target_names)


#----------------------------------------------------------------------
# Load iris data
iris = load_iris()
X, y = iris.data, iris.target

#----------------------------------------------------------------------
# First figure: PCA
pca = PCA(n_components=2, whiten=True).fit(X)
X_pca = pca.transform(X)
plot_2D(X_pca, iris.target, iris.target_names)

#----------------------------------------------------------------------
# Second figure: Kmeans labels
from sklearn.cluster import KMeans
from numpy.random import RandomState
rng = RandomState(42)
kmeans = KMeans(3, random_state=rng).fit(X_pca)
plot_2D(X_pca, kmeans.labels_, ["c0", "c1", "c2"])

pl.show()
 def set_random_id(self, random_id):
     self.random_id = self.random_state + random_id
     self.rnd = RandomState(self.random_id)
Example #19
0
import argparse
import argh
import sys
import os
import tensorflow as tf
import numpy as np

import logging
import daiquiri

daiquiri.setup(level=logging.DEBUG)
logger = daiquiri.getLogger(__name__)

from numpy.random import RandomState

prng = RandomState(1234567890)

from matplotlib import pyplot as plt


def plot_imgs(inputs):
    """Plot smallNORB images helper"""
    fig = plt.figure()
    plt.title('Show images')
    r = np.floor(np.sqrt(len(inputs))).astype(int)
    for i in range(r**2):
        size = inputs[i].shape[1]
        sample = inputs[i].flatten().reshape(size, size)
        a = fig.add_subplot(r, r, i + 1)
        a.imshow(sample, cmap='gray')
    plt.show()
Example #20
0
def setup_y_pred_tensor(request):
    prng = RandomState(1337)
    y_pred_value = prng.randn(1,9,9,5,9) / 4
    y_pred = tf.placeholder(tf.float32, [None, 9, 9, 5, 9], name='y_pred')
    return y_pred, y_pred_value
Example #21
0
# 感知机模型(原始形式)
a = tf.matmul(x,w)+b
y = tf.sigmoid(a)

# 损失函数
# 均方误差
cross_entropy = tf.reduce_mean(tf.square(y - y_))

# 学习率
learning_rate = 0.01

# 感知机学习算法(采用梯度下降法),优化参数
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

# 随机产生0/1训练数据集
rmd = RandomState(1)
dataset_size = 128
X = rmd.randint(2,size=(dataset_size,2))

# 定义标签规则,在这里所有的x1&x2的结果作为Y值,0表示负样本,1表示正样本
# 与操作
#Y = [[int(x1) and int(x2)] for (x1,x2) in X]
# 或操作
Y = [[int(x1) or int(x2)] for (x1,x2) in X]

# 创建会话
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    print(sess.run(w))
    print(sess.run(b))
    
Example #22
0
def test_complexity_random_uniform(num_nodes, num_data_points, seed):
    rs = RandomState(seed)
    data = rs.uniform(-1, 1, size=(num_nodes, num_data_points))
    nc = compute_neural_complexity(data, rs)
    print('Nerual Complexity (Random Uniform)', nc)
Example #23
0
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y_input')

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# tf.clip_by_value()可以将计算的数值限制在一个范围内(1e-10~1.0)
# y_表示真实值,y表示预测值,定义的是交叉熵损失函数
# 对于回归问题,最常用的损失函数是均方误差(MSE)mse = tf.reduce_mean(tf.square(y_-y))
cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))

# 多分类问题适合soft_max + cross_entropy
# cross_entropy2 = tf.nn.softmax_cross_entropy_with_logits(y, y_)

train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

rdm = RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print(sess.run(w1))
    print(sess.run(w2))

    steps = 5000
    for i in range(steps):
        start = (i * batch_size) % dataset_size
        end = min(start + batch_size, dataset_size)
        sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
Example #24
0
def test_complexity_random_gaussian(num_nodes, num_data_points, seed):
    rs = RandomState(seed)
    data = rs.normal(size=(num_nodes, num_data_points))
    nc = compute_neural_complexity(data, rs)
    print('Nerual Complexity (Random Normal Gaussian)', nc)
from time import time

from numpy.random import RandomState
import pylab as pl

from sklearn.datasets import fetch_olivetti_faces
from sklearn.cluster import MiniBatchKMeans
from sklearn import decomposition

# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')
n_row, n_col = 2, 3
n_components = n_row * n_col
image_shape = (64, 64)
rng = RandomState(0)

###############################################################################
# Load faces data
dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
faces = dataset.data

n_samples, n_features = faces.shape

# global centering
faces_centered = faces - faces.mean(axis=0)

# local centering
faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

print "Dataset consists of %d faces" % n_samples
Example #26
0
def test_complexity_constant(num_nodes, num_data_points, seed):
    rs = RandomState(seed)
    data = np.ones((num_nodes, num_data_points))
    nc = compute_neural_complexity(data, rs)
    print('Nerual Complexity (Constat-ones)', nc)
Example #27
0
 def seed(self, seed):
     self._random = RandomState(seed=seed)
Example #28
0
    def rngs(self) -> Dict[str, RandomState]:
        rngs = {}
        for key, seed in self._seeds.items():
            rngs[key] = RandomState(seed)

        return rngs
Example #29
0
def grid_search(data_path,
                dataset,
                features_path,
                feature_type,
                cache_path,
                tune_metric,
                n_runs=300):
    #read patients data
    df_patients = pd.read_csv(features_path + "patients.csv",
                              sep="\t",
                              header=0).drop(columns=["TEXT"])

    df_train, df_test, df_val = read_dataset(data_path, dataset, df_patients)
    print("[train/test set size: {}/{}]".format(len(df_train), len(df_test)))
    print("[grid searching with {} classifier]".format(CLASSIFIER))
    subject_ids, feature_matrix = extract_features(feature_type, features_path)
    train, val, test, label_vocab = vectorize(df_train, df_val, df_test,
                                              subject_ids)
    train_idx, train_Y = train["all"]
    val_idx, val_Y = val["all"]
    #slice the feature matrix to get the corresponding instances
    train_X = feature_matrix[train_idx, :]
    val_X = feature_matrix[val_idx, :]

    init_randomizer = RandomState(1)
    shuffle_randomizer = RandomState(2)
    results = []
    dirname = os.path.dirname(cache_path)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    res_fname = cache_path + "/grid_{}_{}_{}.pkl".format(
        dataset, feature_type, tune_metric).lower()
    try:
        df_results = pd.read_csv(res_fname)
    except FileNotFoundError:
        df_results = pd.DataFrame(columns=["seed"] + list(val.keys()))
        df_results.to_csv(res_fname, index=False, header=True)
    groups = list(val.keys())
    skip_seeds = set(df_results["seed"])
    ##train/test classifier for each random seed pair
    for j in range(n_runs):
        init_seed = init_randomizer.randint(10000)
        shuffle_seed = shuffle_randomizer.randint(10000)
        seed = "{}x{}".format(init_seed, shuffle_seed)
        if seed in skip_seeds:
            print("skipped seed: {}".format(seed))
            continue
        curr_results = {"seed": seed}
        print(" > seed: {}".format(seed))
        model = train_classifier(train_X,
                                 train_Y,
                                 val_X,
                                 val_Y,
                                 input_dimension=train_X.shape[-1],
                                 init_seed=init_seed,
                                 shuffle_seed=shuffle_seed)
        ####### VALIDATION ########

        #test each subgroup (note thtat *all* is also a subgroup)
        for subgroup in groups:
            val_idx_sub, val_Y_sub = val[subgroup]
            val_X_sub = feature_matrix[val_idx_sub, :]
            res_sub = evaluate_classifier(model, val_X_sub, val_Y_sub,
                                          label_vocab, feature_type, seed,
                                          subgroup)
            curr_results[subgroup] = res_sub[tune_metric]

        df_results = df_results.append(curr_results, ignore_index=True)
        df_results.to_csv(res_fname, index=False, header=True)

    #return the best seeds
    return get_best_seeds(df_results, groups)
from HMC import HMC
from LocalRFSamplerForBinaryWeights import LocalRFSamplerForBinaryWeights
from PhyloLocalRFMove import PhyloLocalRFMove
from Utils import generateBivariateFeatGradientIndexWithoutPiWithBivariateFeat
from ReversibleRateMtxPiAndBinaryWeightsWithGraphicalStructure import ReversibleRateMtxPiAndBinaryWeightsWithGraphicalStructure
from FullTrajectorGeneration import getObsArrayAtSameGivenTimes
from FullTrajectorGeneration import endPointSamplerSummarizeStatisticsOneBt
from HardCodedDictionaryUtils import getHardCodedDictChainGraph
from datetime import datetime
from numpy.random import RandomState

nStates = 3
## generate the exchangeable coefficients
## set the seed so that we can reproduce generating the
seed = 1234567890
prng = RandomState(seed)

nBivariateFeat = np.int(nStates * (nStates - 1) / 2)
bivariateWeights = prng.uniform(0, 1, nBivariateFeat)

np.random.seed(2)
stationaryWeights = prng.uniform(0, 1, nStates)

print("The true stationary weights are")
# change the extreme stationary weights to have a balanced stationary distribution
#stationaryWeights[2] = -0.5
#stationaryWeights[3] = 0.5
#stationaryWeights[11] = 0.6

## The true stationary weights are
## [-0.41675785 -0.05626683 -1.35        0.05       -1.         -0.84174737]