Exemple #1
0
def dnnPower(shape1,shape2,lb=0,rb=1,NE=1000000):
    a=st.powerlaw(shape1)
    b=st.powerlaw(shape2)
    domain=n.linspace(lb,rb,NE)
    avals=a.cdf(domain)
    bvals=b.cdf(domain)
    diffP=n.abs(avals-bvals).max()
    return diffP
Exemple #2
0
    def cmp_scipy(self, g, s):
        x = np.linspace(-1, s + 1, 10 * (s + 2) + 1)
        q = np.linspace(-1, 2, 51)

        p0 = stats.powerlaw(g + 1, scale=s)
        v0 = p0.pdf(x)
        c0 = p0.cdf(x)
        x0 = p0.ppf(q)

        p1 = PowerLaw(g, 0, s)
        v1 = p1.pdf(x)
        c1 = p1.cdf(x)
        x1 = p1.ppf(q)

        self.assertIn(0, x)
        self.assertIn(s, x)
        np.testing.assert_allclose(v0, v1)
        np.testing.assert_allclose(c0, c1)
        np.testing.assert_allclose(x0, x1)

        rep = str(p1)
        self.assertEqual(rep[:9], "PowerLaw(")
        self.assertEqual(rep[-1:], ")")
        v = rep[9:-1].split(",")
        self.assertEqual(float(v[0]), g)
        self.assertEqual(float(v[1]), 0)
        self.assertEqual(float(v[2]), s)
def test_power_law_converter_inversion(power):
    """
    Check that the power law inversion is invertible
    """
    c = PowerLawConverter(power, scale=1)
    x = stats.powerlaw(power + 1).rvs(size=1000)
    y, _ = c.to_uniform_parameter(x)
    x_out, _ = c.from_uniform_parameter(y)
    np.testing.assert_array_almost_equal(x, x_out)
def test_power_law_converter_distribution(power):
    """
    Check that the distribution of resulting samples is uniform when
    converting from a power law.
    """
    c = PowerLawConverter(power, scale=1)
    x = stats.powerlaw(power + 1).rvs(size=10000)
    y, _ = c.to_uniform_parameter(x)
    d, p = stats.kstest(y, 'uniform')
    assert p >= 0.05
def generateToy():

  np.random.seed(12345)

  fig,ax = plt.subplots(4,sharex=True)
  #fig,ax = plt.subplots(2)

  powerlaw_arg = 2
  triang_arg=0.7
  n_samples = 500
  #generate simple line with slope 1, from 0 to 1
  frozen_powerlaw = powerlaw(powerlaw_arg) #powerlaw.pdf(x, a) = a * x**(a-1)
  #generate triangle with peak at 0.7
  frozen_triangle = triang(triang_arg) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_uniform = uniform(0.2,0.5)
  frozen_uniform2 = uniform(0.3,0.2)

  x = np.linspace(0,1)

  signal = np.random.normal(0.5, 0.1, n_samples/2)

  data_frame = pd.DataFrame({'powerlaw':powerlaw.rvs(powerlaw_arg,size=n_samples),
    'triangle':triang.rvs(triang_arg,size=n_samples),
    'uniform':np.concatenate((uniform.rvs(0.2,0.5,size=n_samples/2),uniform.rvs(0.3,0.2,size=n_samples/2))),
    'powerlaw_signal':np.concatenate((powerlaw.rvs(powerlaw_arg,size=n_samples/2),signal))})

  ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
  hist(data_frame['powerlaw'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[0])
  #hist(data_frame['powerlaw'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[0])
  ax[0].legend(loc = 'best')

  ax[1].plot(x, frozen_triangle.pdf(x), 'k-', lw=2, label='triangle pdf')
  hist(data_frame['triangle'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[1])
  hist(data_frame['triangle'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[1])
  ax[1].legend(loc = 'best')

  #ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
  hist(data_frame['powerlaw_signal'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[2])
  #hist(data_frame['powerlaw_signal'],bins='blocks',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[2])
  ax[2].legend(loc = 'best')

  ax[3].plot(x, frozen_uniform.pdf(x)+frozen_uniform2.pdf(x), 'k-', lw=2, label='uniform pdf')
  hist(data_frame['uniform'],bins=100,normed=True,histtype='stepfilled',alpha=0.2,label='100 bins',ax=ax[3])
  #hist(data_frame['uniform'],bins='blocks',fitness = 'poly_events',p0=0.05,normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[3])
  ax[3].legend(loc = 'best')

  plt.show()
  fig.savefig('plots/toy_plots.png')
Exemple #6
0
def main(a):

    #arg = raw_input("Zipf & Pareto belongs to power law. Please select: 1: Pareto\'s Law, 2: Zipf\'s law") )

    fig, ax = plt.subplots(1, 1)
    mean, var, skew, kurt = powerlaw.stats(a, moments='mvsk')

    x = power_law_dist(a)

    ax.plot(x, powerlaw.pdf(x, a), 'r-', lw=5, alpha=0.6, label='powerlaw pdf')
    rv = powerlaw(a)
    ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    r = powerlaw.rvs(a, size=1000)
    ax.legend(loc='best', frameon=False)

    #plt.plt(x,y)
    plt.show()
Exemple #7
0
def set_distribution(name='norm', loc=0, scale=1, shape=1):
    distribution = {
        'norm': (norm(loc=loc, scale=scale), r'Normal Distribution',
                 r'$p(x)=\frac{\exp(-x^2/2)}{\sqrt{2\pi}}$' +
                 r'loc = {:.2f}, scale = {:.2f}'.format(loc, scale)),
        'expon':
        (expon(loc=loc,
               scale=scale), r'Exponential Distribution', r'$p(x)=\exp(-x)}$' +
         r'loc = {:.2f}, scale = {:.2f}'.format(loc, scale)),
        'powerlaw': (powerlaw(a=shape + 1), r'Power Law Distribution',
                     r'$p(x,\alpha)=\alpha x^{\alpha-1}$' +
                     r', $\alpha-1 = {:.2f}$'.format(shape)),
        'lognorm':
        (lognorm(s=shape), r'Lognormal Distribution',
         r'$p(x,s)=\frac{1}{sx\sqrt{2\pi}} exp(-\frac{\log^2(x)}{2s^2})$' +
         r', $shape = {:.2f}$'.format(shape))
    }
    return distribution[name]
def dataset_write(fp, distriName, byts, tot, dis, pa1, pa2):
    if distriName == 'zipf' or distriName == 'powerlaw':
        props = powerlaw(dis, pa1)
    elif distriName == 'weibull':
        props = weibull(dis, pa1, pa2)

    # if not os.path.exists(fp):
    #     os.mkdir(fp)

    freq = [round(prop * tot) + 1 for prop in props]
    dataset = gen(freq, byts)
    #print(len(dataset))

    if distriName == 'zipf' or distriName == 'powerlaw':
        # fpath = fp + distriName + str(pa1) + '.dat'
        fpath = fp
    elif distriName == 'weibull':
        # fpath = fp + distriName + str(pa1) + '_' + str(pa2) + '.dat'
        fpath = fp

    with open(fpath, 'wb') as f:
        for d in dataset:
            f.write(d)
    return fpath
Exemple #9
0
#count, bins, ignored = p.hist(n.random.weibull(5.,1000))
#x = n.arange(1,100.)/50.
#scale = count.max()/weib(x, 1., 5.).max()
W = weib(x, 1., 1.5)
W_ = W / (W * step).sum()
W__ = n.cumsum(W_)
W2 = weib(x, 1., 1.7)
W2_ = W2 / (W2 * step).sum()
W2__ = n.cumsum(W2_)
diffW = n.abs(W_ - W2_).max()
#p.plot(x, W_)
#p.plot(x, W2_)
##p.plot(x, weib(x, 1., 5.)*scale)
#p.show()

a = st.powerlaw(1.5)
b = st.powerlaw(1.7)
domain = n.linspace(0, 5.05, 10000)
avals = a.cdf(domain)
bvals = b.cdf(domain)
diffP = n.abs(avals - bvals).max()

print("distancias de KS para os modelos matematicos:", diffN, diffN2, diffU,
      diffU2, diffW, diffP)
# distancias de KS para os modelos matematicos:
# 0.0398776116762 0.0439947104098 0.0952338090952 0.047619047619 0.128565475845 0.0460149130584

# X = (-n.ln(U))^{1/a}
lb, rb, NE, shape1, shape2 = 0, 10, 10000, 1.5, 1.7
x = n.linspace(lb, rb, NE)
step = x[1] - x[0]
def generateToy():

    np.random.seed(12345)

    fig, ax = plt.subplots(4, sharex=True)
    #fig,ax = plt.subplots(2)

    powerlaw_arg = 2
    triang_arg = 0.7
    n_samples = 500
    #generate simple line with slope 1, from 0 to 1
    frozen_powerlaw = powerlaw(
        powerlaw_arg)  #powerlaw.pdf(x, a) = a * x**(a-1)
    #generate triangle with peak at 0.7
    frozen_triangle = triang(
        triang_arg
    )  #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
    frozen_uniform = uniform(0.2, 0.5)
    frozen_uniform2 = uniform(0.3, 0.2)

    x = np.linspace(0, 1)

    signal = np.random.normal(0.5, 0.1, n_samples / 2)

    data_frame = pd.DataFrame({
        'powerlaw':
        powerlaw.rvs(powerlaw_arg, size=n_samples),
        'triangle':
        triang.rvs(triang_arg, size=n_samples),
        'uniform':
        np.concatenate((uniform.rvs(0.2, 0.5, size=n_samples / 2),
                        uniform.rvs(0.3, 0.2, size=n_samples / 2))),
        'powerlaw_signal':
        np.concatenate((powerlaw.rvs(powerlaw_arg,
                                     size=n_samples / 2), signal))
    })

    ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
    hist(data_frame['powerlaw'],
         bins=100,
         normed=True,
         histtype='stepfilled',
         alpha=0.2,
         label='100 bins',
         ax=ax[0])
    #hist(data_frame['powerlaw'],bins='blocks',fitness='poly_events',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[0])
    ax[0].legend(loc='best')

    ax[1].plot(x, frozen_triangle.pdf(x), 'k-', lw=2, label='triangle pdf')
    hist(data_frame['triangle'],
         bins=100,
         normed=True,
         histtype='stepfilled',
         alpha=0.2,
         label='100 bins',
         ax=ax[1])
    hist(data_frame['triangle'],
         bins='blocks',
         fitness='poly_events',
         normed=True,
         histtype='stepfilled',
         alpha=0.2,
         label='b blocks',
         ax=ax[1])
    ax[1].legend(loc='best')

    #ax[0].plot(x, frozen_powerlaw.pdf(x), 'k-', lw=2, label='powerlaw pdf')
    hist(data_frame['powerlaw_signal'],
         bins=100,
         normed=True,
         histtype='stepfilled',
         alpha=0.2,
         label='100 bins',
         ax=ax[2])
    #hist(data_frame['powerlaw_signal'],bins='blocks',normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[2])
    ax[2].legend(loc='best')

    ax[3].plot(x,
               frozen_uniform.pdf(x) + frozen_uniform2.pdf(x),
               'k-',
               lw=2,
               label='uniform pdf')
    hist(data_frame['uniform'],
         bins=100,
         normed=True,
         histtype='stepfilled',
         alpha=0.2,
         label='100 bins',
         ax=ax[3])
    #hist(data_frame['uniform'],bins='blocks',fitness = 'poly_events',p0=0.05,normed=True,histtype='stepfilled',alpha=0.2,label='b blocks',ax=ax[3])
    ax[3].legend(loc='best')

    plt.show()
    fig.savefig('plots/toy_plots.png')
Exemple #11
0
import collections as ct
import scipy.stats as st

income_model_dict = ct.OrderedDict()
income_model_dict['johnsonsu'] = st.johnsonsu(-5.3839367311065747,
                                              0.84376726932941271,
                                              -224.21280806585787,
                                              79.661998696081355)
income_model_dict['powerlaw'] = st.powerlaw(0.16342470577523971,
                                            -3.1423954341714262e-15,
                                            55664716.096562646)
income_model_dict['exponpow'] = st.exponpow(0.25441022752240294,
                                            -1.8475789041433829e-22,
                                            36120900.670255348)
income_model_dict['nakagami'] = st.nakagami(0.10038339454419823,
                                            -3.0390927147076284e-22,
                                            33062195.426077582)
income_model_dict['exponweib'] = st.exponweib(-3.5157658448986489,
                                              0.44492833350419714,
                                              -15427.454196748848,
                                              2440.0278856175246)

drivingdistance_model_dict = ct.OrderedDict()
drivingdistance_model_dict['nakagami'] = st.nakagami(0.11928581143831021,
                                                     14.999999999999996,
                                                     41.404620910360876)
drivingdistance_model_dict['ncx2'] = st.ncx2(0.30254190304723211,
                                             1.1286538320791935,
                                             14.999999999999998,
                                             8.7361471573932192)
drivingdistance_model_dict['chi'] = st.chi(0.47882729877571095,
def generateToy():

  np.random.seed(12345)

  fig,ax = plt.subplots()
  triang_arg=0.5
  #frozen_triangle = triang(c=triang_arg, loc=2) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_triangle = triang(c=0.5,loc=2) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_powerlaw = powerlaw(2) #powerlaw.pdf(x, a) = a * x**(a-1)

  x = np.linspace(0,1,20)
  x2 = np.linspace(0,1,20)
  nx = x
  nx2 = x2
  #nd = frozen_powerlaw.ppf(nx)
  #nd = np.array([0,0.3162,0.4472,0.5477,0.6324,0.7071,0.7746,0.8367,0.8944,0.9487])
  nd = np.array([0,0.140175,0.264911,0.378405,0.48324,0.581139,0.67332,0.760682,0.843909,0.923538])
  #nd = np.array([0.0723805,0.204159,0.322876,0.431782,0.532971,0.627882,0.717556,0.802776,0.884144,0.962142])
  #pdf = frozen_powerlaw.pdf(x)
  #nd = frozen_triangle.ppf(nx)
  #print x
  #print nd
  #raw_input()
  #pdf = frozen_triangle.pdf(x)
  #print nd
  #print pdf
  #raw_input()
  #for i in range(len(nd)-1):
  #  print (nd[i+1]-nd[i])*(nd[i+1]+nd[i])
  #raw_input()

  #nd2 = frozen_triangle2.ppf(nx2)
  #pdf2 = frozen_triangle2.pdf(x2)

  #print nd,nd2
  #ndc = np.concatenate((nd,nd2),axis=0)
  #print 'ndc', ndc
  #nxc = np.concatenate((nx,nx2))
  #print pdf, pdf2
  #pdfc = np.concatenate((pdf,pdf2))
  #xc = np.concatenate((x,x2))

  #plt.plot(nd,len(nx)*[1],"x")
  #plt.plot(x,pdf)
  #hist(nd,'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  #plt.plot(nd[0:11],len(nx[0:11])*[1],"x")
  #plt.plot(x[0:11],pdf[0:11])
  #hist(nd[0:11],'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)
  #hist(ndc,bins=50,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  #plt.plot(nd[11:],len(nx[11:])*[1],"x")
  #plt.plot(x[11:],pdf[11:])
  #hist(nd[11:],'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  print nd
  plt.plot(nd,len(nd)*[1],"x")
  #plt.plot(x,pdf)
  hist(nd,'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax)

  plt.show()
  fig.savefig('plots/toy_plots2.png')
Exemple #13
0
    def __init__(self, a, loc, scale, capMaxImpact=False):
        super(PowerLawValue, self).__init__("Basic PowerLaw")
        # Set the powerlaw object now.  It doesn't change between runs

        self.powerdistro = stats.powerlaw(a=a, loc=loc, scale=scale)
        self.capMaxImpact = capMaxImpact
Exemple #14
0
            dimension corresponding to the deviating role, i.e. the number of
            players in role i of dimension i should num_players[i] - 1.
        """
        payoffs = np.empty(self.num_role_strats)
        for i, (gp, r) in enumerate(zip(self._gps, self.role_index)):
            payoffs[i] = gp.predict(profiles[r]).mean()
        return payoffs

    def min_payoffs(self):
        return self._min_payoffs.view()

    def max_payoffs(self):
        return self._max_payoffs.view()


_CV_PARAMS = {'alpha': stats.powerlaw(.2, loc=1e-3, scale=50)}


# XXX This changed in a scipy update and should be verified that its doing what
# we want
def _train_gp(x, y, **search_kwds):
    if 'n_jobs' in search_kwds and search_kwds['n_jobs'] < 1:
        # one job per cpu core
        search_kwds['n_jobs'] = multiprocessing.cpu_count()
    cv = model_selection.RandomizedSearchCV(
        gaussian_process.GaussianProcessRegressor(),
        _CV_PARAMS, error_score=-np.inf, **search_kwds)
    cv.fit(x, y)
    return cv.best_estimator_

Exemple #15
0
	def prepare_dataset(self, name='adult'):
		if name == 'adult':
			from utils.load_adult import get_train_test
			from utils.Custom_Dataset import Custom_Dataset
			import torch

			train_data, train_target, test_data, test_target = get_train_test()

			X_train = torch.tensor(train_data.values, requires_grad=False).float()
			y_train = torch.tensor(train_target.values, requires_grad=False).long()
			X_test = torch.tensor(test_data.values, requires_grad=False).float()
			y_test = torch.tensor(test_target.values, requires_grad=False).long()

			print("X train shape: ", X_train.shape)
			print("y train shape: ", y_train.shape)
			pos, neg =(y_train==1).sum().item() , (y_train==0).sum().item()
			print("Train set Positive counts: {}".format(pos),"Negative counts: {}.".format(neg), 'Split: {:.2%} - {:.2%}'.format(1. * pos/len(X_train), 1.*neg/len(X_train)))
			print("X test shape: ", X_test.shape)
			print("y test shape: ", y_test.shape)
			pos, neg =(y_test==1).sum().item() , (y_test==0).sum().item()
			print("Test set Positive counts: {}".format(pos),"Negative counts: {}.".format(neg), 'Split: {:.2%} - {:.2%}'.format(1. * pos/len(X_test), 1.*neg/len(X_test)))

			train_indices, valid_indices = get_train_valid_indices(len(X_train), self.train_val_split_ratio, self.sample_size_cap)

			train_set = Custom_Dataset(X_train[train_indices], y_train[train_indices], device=self.device)
			validation_set = Custom_Dataset(X_train[valid_indices], y_train[valid_indices], device=self.device)
			test_set = Custom_Dataset(X_test, y_test, device=self.device)

			return train_set, validation_set, test_set
		elif name == 'mnist':

			train = FastMNIST('datasets/MNIST', train=True, download=True)
			test = FastMNIST('datasets/MNIST', train=False, download=True)

			train_indices, valid_indices = get_train_valid_indices(len(train), self.train_val_split_ratio, self.sample_size_cap)
			
			from utils.Custom_Dataset import Custom_Dataset

			train_set = Custom_Dataset(train.data[train_indices], train.targets[train_indices], device=self.device)
			validation_set = Custom_Dataset(train.data[valid_indices],train.targets[valid_indices] , device=self.device)
			test_set = Custom_Dataset(test.data, test.targets, device=self.device)

			del train, test

			return train_set, validation_set, test_set

		elif name == 'cifar10':

			'''
			from torchvision import transforms			
			transform_train = transforms.Compose([
				transforms.RandomCrop(32, padding=4),
				transforms.RandomHorizontalFlip(),
				transforms.ToTensor(),
				transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
			])

			transform_test = transforms.Compose([
				transforms.ToTensor(),
				transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
			])
			'''

			train = FastCIFAR10('datasets/cifar', train=True, download=True)#, transform=transform_train)
			test = FastCIFAR10('datasets/cifar', train=False, download=True)#, transform=transform_test)

			train_indices, valid_indices = get_train_valid_indices(len(train), self.train_val_split_ratio, self.sample_size_cap)
			
			from utils.Custom_Dataset import Custom_Dataset

			train_set = Custom_Dataset(train.data[train_indices], train.targets[train_indices], device=self.device)
			validation_set = Custom_Dataset(train.data[valid_indices],train.targets[valid_indices] , device=self.device)
			test_set = Custom_Dataset(test.data, test.targets, device=self.device)
			del train, test

			return train_set, validation_set, test_set
		elif name == "sst":
			import torchtext.data as data
			text_field = data.Field(lower=True)
			from torch import long as torch_long
			label_field = LabelField(dtype = torch_long, sequential=False)


			import torchtext.datasets as datasets
			train_data, validation_data, test_data = datasets.SST.splits(text_field, label_field, fine_grained=True)

			indices_list = powerlaw(list(range(len(train_data))), self.n_participants)
			ratios = [len(indices) / len(train_data) for indices in indices_list]

			train_datasets = split_torchtext_dataset_ratios(train_data, ratios)

			text_field.build_vocab(*(train_datasets + [validation_data, test_data]))
			label_field.build_vocab(*(train_datasets + [validation_data, test_data]))

			self.args.text_field = text_field
			self.args.label_field = label_field

			return train_datasets, validation_data, test_data

		elif name == 'mr':

			import torchtext.data as data
			from utils import mydatasets

			text_field = data.Field(lower=True)
			from torch import long as torch_long
			label_field = LabelField(dtype = torch_long, sequential=False)
			# label_field = data.Field(sequential=False)

			train_data, dev_data = mydatasets.MR.splits(text_field, label_field, root='.data/mr', shuffle=False)

			validation_data, test_data = dev_data.split(split_ratio=0.5, random_state = random.seed(1234))
			
			indices_list = powerlaw(list(range(len(train_data))), self.n_participants)
			ratios = [len(indices) / len(train_data) for indices in  indices_list]

			train_datasets = split_torchtext_dataset_ratios(train_data, ratios)

			# print(train_data, dir(train_data))
			# print((train_datasets[0].examples[0].text))
			# print((train_datasets[0].examples[1].text))
			# print((train_datasets[0].examples[2].text))
			# exit()


			text_field.build_vocab( *(train_datasets + [validation_data, test_data] ))
			label_field.build_vocab( *(train_datasets + [validation_data, test_data] ))

			self.args.text_field = text_field
			self.args.label_field = label_field

			return train_datasets, validation_data, test_data

		elif name == 'imdb':

			from torch import long as torch_long
			# text_field = Field(tokenize = 'spacy', preprocessing = generate_bigrams) # generate_bigrams takes about 2 minutes
			text_field = Field(tokenize = 'spacy')
			label_field = LabelField(dtype = torch_long)

			dirname = '.data/imdb/aclImdb'

			from torch.nn.init import normal_
			from torchtext import datasets


			train_data, test_data = datasets.IMDB.splits(text_field, label_field) # 25000, 25000 samples each

			# use 5000 out of 25000 of test_data as the test_data
			test_data, remaining = test_data.split(split_ratio=0.2 ,random_state = random.seed(1234))
			
			# use 5000 out of the remaining 2000 of test_data as valid data
			valid_data, remaining = remaining.split(split_ratio=0.25 ,random_state = random.seed(1234))

			# train_data, valid_data = train_data.split(split_ratio=self.train_val_split_ratio ,random_state = random.seed(1234))

			indices_list = powerlaw(list(range(len(train_data))), self.n_participants)
			ratios = [len(indices) / len(train_data) for indices in  indices_list]

			train_datasets = split_torchtext_dataset_ratios(train_data, ratios)

			MAX_VOCAB_SIZE = 25_000

			text_field.build_vocab(*(train_datasets + [valid_data, test_data] ), max_size = MAX_VOCAB_SIZE, vectors = "glove.6B.100d",  unk_init = normal_)
			label_field.build_vocab( *(train_datasets + [valid_data, test_data] ))

			# INPUT_DIM = len(text_field.vocab)
			# OUTPUT_DIM = 1
			# EMBEDDING_DIM = 100

			PAD_IDX = text_field.vocab.stoi[text_field.pad_token]

			self.args.text_field = text_field
			self.args.label_field = label_field
			self.args.pad_idx = PAD_IDX

			return train_datasets, valid_data, test_data

		elif name == 'names':

			from utils.load_names import get_train_test
			from utils.Custom_Dataset import Custom_Dataset
			import torch
			from collections import Counter

			X_train, y_train, X_test, y_test, reference_dict = get_train_test()

			print("X train shape: ", X_train.shape)
			print("y train shape: ", y_train.shape)
			
			print("X test shape: ", X_test.shape)
			print("y test shape: ", y_test.shape)

			from utils.Custom_Dataset import Custom_Dataset
			train_set = Custom_Dataset(X_train, y_train)
			test_set = Custom_Dataset(X_test, y_test)

			return train_set, test_set
a = 1.66
mean, var, skew, kurt = powerlaw.stats(a, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(powerlaw.ppf(0.01, a), powerlaw.ppf(0.99, a), 100)
ax.plot(x, powerlaw.pdf(x, a), 'r-', lw=5, alpha=0.6, label='powerlaw pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = powerlaw(a)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = powerlaw.ppf([0.001, 0.5, 0.999], a)
np.allclose([0.001, 0.5, 0.999], powerlaw.cdf(vals, a))
# True

# Generate random numbers:

r = powerlaw.rvs(a, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
Exemple #17
0
 def verify(self, pandas_series):
     return stats.kstest(pandas_series, powerlaw(self.alpha).cdf)
sim.SetParam('sink_radius',0.1)
sim.SetParam('nbody','lfkdk')
sim.SetParam('run_id','DISC1')
sim.SetParam('tsnapfirst',0.0)
sim.SetParam('dt_snap',6.28/20.)
sim.SetParam('tend',6.28*10)


sim.PreSetupForPython()

#Now we start doing the actual work

phi = np.random.uniform(0,2*np.pi,Ngas)

#generation of r coordinate
mr=powerlaw(0.5,loc=0.1,scale=10)
r=mr.rvs(size=Ngas)

#z coordinate, using first theta
theta=np.zeros(Ngas)
for i in range(Ngas):
    theta[i]=np.random.normal(0,scale=0.05*r[i]**0.5,size=1)
z=theta*r

#convert cylindrical/spherical coordinates to cartesian
x=np.sin(phi)*r
y=np.cos(phi)*r

vphi=1./np.sqrt(r)#/np.sqrt(2.)
vx=-np.cos(phi)*vphi
vy=np.sin(phi)*vphi
Exemple #19
0
	def get_train_loaders(self, n_participants, split='powerlaw', batch_size=None):
		if not batch_size:
			batch_size = self.train_batch_size

		if split == 'classimbalance':
			if self.name not in ['mnist','cifar10']:
				raise NotImplementedError("Calling on dataset {}. Only mnist and cifar10 are implemnted for this split".format(self.name))

			n_classes = 10			
			data_indices = [(self.train_dataset.targets == class_id).nonzero().view(-1).tolist() for class_id in range(n_classes)]
			class_sizes = np.linspace(1, n_classes, n_participants, dtype='int')
			print("class_sizes for each party", class_sizes)
			party_mean = self.sample_size_cap // self.n_participants

			from collections import defaultdict
			party_indices = defaultdict(list)
			for party_id, class_sz in enumerate(class_sizes):	
				classes = range(class_sz) # can customize classes for each party rather than just listing
				each_class_id_size = party_mean // class_sz
				# print("party each class size:", party_id, each_class_id_size)
				for i, class_id in enumerate(classes):
					# randomly pick from each class a certain number of samples, with replacement 
					selected_indices = random.choices(data_indices[class_id], k=each_class_id_size)

					# randomly pick from each class a certain number of samples, without replacement 
					'''
					NEED TO MAKE SURE THAT EACH CLASS HAS MORE THAN each_class_id_size for no replacement sampling
					selected_indices = random.sample(data_indices[class_id],k=each_class_id_size)
					'''
					party_indices[party_id].extend(selected_indices)

					# top up to make sure all parties have the same number of samples
					if i == len(classes) - 1 and len(party_indices[party_id]) < party_mean:
						extra_needed = party_mean - len(party_indices[party_id])
						party_indices[party_id].extend(data_indices[class_id][:extra_needed])
						data_indices[class_id] = data_indices[class_id][extra_needed:]

			indices_list = [party_index_list for party_id, party_index_list in party_indices.items()] 

		elif split == 'powerlaw':
			if self.name in ['sst', 'mr', 'imdb']:
				# sst, mr, imdb split is different from other datasets, so return here				

				self.train_loaders = [BucketIterator(train_dataset, batch_size=self.train_batch_size, device=self.device, sort_key=lambda x: len(x.text),train=True) for train_dataset in self.train_datasets]
				self.shard_sizes = [(len(train_dataset)) for train_dataset in self.train_datasets]
				return self.train_loaders

			else:
				indices_list = powerlaw(list(range(len(self.train_dataset))), n_participants)

		elif split in ['balanced','equal']:
			from utils.utils import random_split
			indices_list = random_split(sample_indices=list(range(len(self.train_dataset))), m_bins=n_participants, equal=True)
		
		elif split == 'random':
			from utils.utils import random_split
			indices_list = random_split(sample_indices=list(range(len(self.train_dataset))), m_bins=n_participants, equal=False)

		# from collections import Counter
		# for indices in indices_list:
		# 	print(Counter(self.train_dataset.targets[indices].tolist()))

		self.shard_sizes = [len(indices) for indices in indices_list]
		participant_train_loaders = [DataLoader(self.train_dataset, batch_size=batch_size, sampler=SubsetRandomSampler(indices)) for indices in indices_list]

		return participant_train_loaders
Exemple #20
0
from scipy.stats import powerlaw
import matplotlib.pyplot as plt
import numpy as np

pl = powerlaw(.8, loc=0, scale=2)
samples = pl.rvs(10000)  # create random variables
alpha, loc, scale = powerlaw.fit(samples)  # fit the variables

# plotting
plt.figure(0)
plt.clf()
plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
plt.plot(x, pl.pdf(x), linewidth=2, label="fit")

plt.figure(1)
plt.clf()
plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
plt.plot(x, pl.pdf(x), linewidth=2, label="fit")
plt.xscale("log", basex=10, nonposy='clip')
plt.yscale("log", basey=10, nonposy='clip')

plt.show()
Exemple #21
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
Exemple #22
0
def main(args=None):
    from ligo.lw import lsctables
    from ligo.lw import utils as ligolw_utils
    from ligo.lw import ligolw
    import lal.series
    from scipy import stats

    p = parser()
    args = p.parse_args(args)

    xmldoc = ligolw.Document()
    xmlroot = xmldoc.appendChild(ligolw.LIGO_LW())
    process = register_to_xmldoc(xmldoc, p, args)

    gwcosmo = GWCosmo(
        cosmology.default_cosmology.get_cosmology_from_string(args.cosmology))

    ns_mass_min = 1.0
    ns_mass_max = 2.0
    bh_mass_min = 5.0
    bh_mass_max = 50.0

    ns_astro_spin_min = -0.05
    ns_astro_spin_max = +0.05
    ns_astro_mass_dist = stats.norm(1.33, 0.09)
    ns_astro_spin_dist = stats.uniform(ns_astro_spin_min,
                                       ns_astro_spin_max - ns_astro_spin_min)

    ns_broad_spin_min = -0.4
    ns_broad_spin_max = +0.4
    ns_broad_mass_dist = stats.uniform(ns_mass_min, ns_mass_max - ns_mass_min)
    ns_broad_spin_dist = stats.uniform(ns_broad_spin_min,
                                       ns_broad_spin_max - ns_broad_spin_min)

    bh_astro_spin_min = -0.99
    bh_astro_spin_max = +0.99
    bh_astro_mass_dist = stats.pareto(b=1.3)
    bh_astro_spin_dist = stats.uniform(bh_astro_spin_min,
                                       bh_astro_spin_max - bh_astro_spin_min)

    bh_broad_spin_min = -0.99
    bh_broad_spin_max = +0.99
    bh_broad_mass_dist = stats.reciprocal(bh_mass_min, bh_mass_max)
    bh_broad_spin_dist = stats.uniform(bh_broad_spin_min,
                                       bh_broad_spin_max - bh_broad_spin_min)

    if args.distribution.startswith('bns_'):
        m1_min = m2_min = ns_mass_min
        m1_max = m2_max = ns_mass_max
        if args.distribution.endswith('_astro'):
            x1_min = x2_min = ns_astro_spin_min
            x1_max = x2_max = ns_astro_spin_max
            m1_dist = m2_dist = ns_astro_mass_dist
            x1_dist = x2_dist = ns_astro_spin_dist
        elif args.distribution.endswith('_broad'):
            x1_min = x2_min = ns_broad_spin_min
            x1_max = x2_max = ns_broad_spin_max
            m1_dist = m2_dist = ns_broad_mass_dist
            x1_dist = x2_dist = ns_broad_spin_dist
        else:  # pragma: no cover
            assert_not_reached()
    elif args.distribution.startswith('nsbh_'):
        m1_min = bh_mass_min
        m1_max = bh_mass_max
        m2_min = ns_mass_min
        m2_max = ns_mass_max
        if args.distribution.endswith('_astro'):
            x1_min = bh_astro_spin_min
            x1_max = bh_astro_spin_max
            x2_min = ns_astro_spin_min
            x2_max = ns_astro_spin_max
            m1_dist = bh_astro_mass_dist
            m2_dist = ns_astro_mass_dist
            x1_dist = bh_astro_spin_dist
            x2_dist = ns_astro_spin_dist
        elif args.distribution.endswith('_broad'):
            x1_min = bh_broad_spin_min
            x1_max = bh_broad_spin_max
            x2_min = ns_broad_spin_min
            x2_max = ns_broad_spin_max
            m1_dist = bh_broad_mass_dist
            m2_dist = ns_broad_mass_dist
            x1_dist = bh_broad_spin_dist
            x2_dist = ns_broad_spin_dist
        else:  # pragma: no cover
            assert_not_reached()
    elif args.distribution.startswith('bbh_'):
        m1_min = m2_min = bh_mass_min
        m1_max = m2_max = bh_mass_max
        if args.distribution.endswith('_astro'):
            x1_min = x2_min = bh_astro_spin_min
            x1_max = x2_max = bh_astro_spin_max
            m1_dist = m2_dist = bh_astro_mass_dist
            x1_dist = x2_dist = bh_astro_spin_dist
        elif args.distribution.endswith('_broad'):
            x1_min = x2_min = bh_broad_spin_min
            x1_max = x2_max = bh_broad_spin_max
            m1_dist = m2_dist = bh_broad_mass_dist
            x1_dist = x2_dist = bh_broad_spin_dist
        else:  # pragma: no cover
            assert_not_reached()
    else:  # pragma: no cover
        assert_not_reached()

    dists = (m1_dist, m2_dist, x1_dist, x2_dist)

    # Read PSDs
    psds = list(
        lal.series.read_psd_xmldoc(
            ligolw_utils.load_fileobj(
                args.reference_psd,
                contenthandler=lal.series.PSDContentHandler)).values())

    # Construct mass1, mass2, spin1z, spin2z grid.
    m1 = np.geomspace(m1_min, m1_max, 10)
    m2 = np.geomspace(m2_min, m2_max, 10)
    x1 = np.linspace(x1_min, x1_max, 10)
    x2 = np.linspace(x2_min, x2_max, 10)
    params = m1, m2, x1, x2

    # Calculate the maximum distance on the grid.
    max_z = gwcosmo.get_max_z(psds,
                              args.waveform,
                              args.f_low,
                              args.min_snr,
                              m1,
                              m2,
                              x1,
                              x2,
                              jobs=args.jobs)
    if args.max_distance is not None:
        new_max_z = cosmology.z_at_value(gwcosmo.cosmo.luminosity_distance,
                                         args.max_distance * units.Mpc)
        max_z[max_z > new_max_z] = new_max_z
    max_distance = gwcosmo.sensitive_distance(max_z).to_value(units.Mpc)

    # Find piecewise constant approximate upper bound on distance.
    max_distance = cell_max(max_distance)

    # Calculate V * T in each grid cell
    cdfs = [dist.cdf(param) for param, dist in zip(params, dists)]
    cdf_los = [cdf[:-1] for cdf in cdfs]
    cdfs = [np.diff(cdf) for cdf in cdfs]
    probs = np.prod(np.meshgrid(*cdfs, indexing='ij'), axis=0)
    probs /= probs.sum()
    probs *= 4 / 3 * np.pi * max_distance**3
    volume = probs.sum()
    probs /= volume
    probs = probs.ravel()

    volumetric_rate = args.nsamples / volume * units.year**-1 * units.Mpc**-3

    # Draw random grid cells
    dist = stats.rv_discrete(values=(np.arange(len(probs)), probs))
    indices = np.unravel_index(dist.rvs(size=args.nsamples),
                               max_distance.shape)

    # Draw random intrinsic params from each cell
    cols = {}
    cols['mass1'], cols['mass2'], cols['spin1z'], cols['spin2z'] = [
        dist.ppf(stats.uniform(cdf_lo[i], cdf[i]).rvs(size=args.nsamples))
        for i, dist, cdf_lo, cdf in zip(indices, dists, cdf_los, cdfs)
    ]

    # Swap binary components as needed to ensure that mass1 >= mass2.
    # Note that the .copy() is important.
    # See https://github.com/numpy/numpy/issues/14428
    swap = cols['mass1'] < cols['mass2']
    cols['mass1'][swap], cols['mass2'][swap] = \
        cols['mass2'][swap].copy(), cols['mass1'][swap].copy()
    cols['spin1z'][swap], cols['spin2z'][swap] = \
        cols['spin2z'][swap].copy(), cols['spin1z'][swap].copy()

    # Draw random extrinsic parameters
    cols['distance'] = stats.powerlaw(
        a=3, scale=max_distance[indices]).rvs(size=args.nsamples)
    cols['longitude'] = stats.uniform(0, 2 * np.pi).rvs(size=args.nsamples)
    cols['latitude'] = np.arcsin(stats.uniform(-1, 2).rvs(size=args.nsamples))
    cols['inclination'] = np.arccos(
        stats.uniform(-1, 2).rvs(size=args.nsamples))
    cols['polarization'] = stats.uniform(0, 2 * np.pi).rvs(size=args.nsamples)
    cols['coa_phase'] = stats.uniform(-np.pi,
                                      2 * np.pi).rvs(size=args.nsamples)
    cols['time_geocent'] = stats.uniform(1e9, units.year.to(
        units.second)).rvs(size=args.nsamples)

    # Convert from sensitive distance to redshift and comoving distance.
    # FIXME: Replace this brute-force lookup table with a solver.
    z = np.linspace(0, max_z.max(), 10000)
    ds = gwcosmo.sensitive_distance(z).to_value(units.Mpc)
    dc = gwcosmo.cosmo.comoving_distance(z).to_value(units.Mpc)
    z_for_ds = interp1d(ds, z, kind='cubic', assume_sorted=True)
    dc_for_ds = interp1d(ds, dc, kind='cubic', assume_sorted=True)
    zp1 = 1 + z_for_ds(cols['distance'])
    cols['distance'] = dc_for_ds(cols['distance'])

    # Apply redshift factor to convert from comoving distance and source frame
    # masses to luminosity distance and observer frame masses.
    for key in ['distance', 'mass1', 'mass2']:
        cols[key] *= zp1

    # Populate sim_inspiral table
    sims = xmlroot.appendChild(lsctables.New(lsctables.SimInspiralTable))
    for row in zip(*cols.values()):
        sims.appendRow(**dict(dict.fromkeys(sims.validcolumns, None),
                              process_id=process.process_id,
                              simulation_id=sims.get_next_id(),
                              waveform=args.waveform,
                              f_lower=args.f_low,
                              **dict(zip(cols.keys(), row))))

    # Record process end time.
    process.comment = str(volumetric_rate)
    process.set_end_time_now()

    # Write output file.
    write_fileobj(xmldoc, args.output)
def generateToy():

  np.random.seed(12345)

  fig,ax = plt.subplots()
  triang_arg=0.5
  #frozen_triangle = triang(c=triang_arg, loc=2) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_triangle = triang(c=0.5,loc=2) #up-sloping line from loc to (loc + c*scale) and then downsloping for (loc + c*scale) to (loc+scale).
  frozen_powerlaw = powerlaw(2) #powerlaw.pdf(x, a) = a * x**(a-1)

  x = np.linspace(0,1,20)
  x2 = np.linspace(0,1,20)
  nx = x
  nx2 = x2
  #nd = frozen_powerlaw.ppf(nx)
  #nd = np.array([0,0.3162,0.4472,0.5477,0.6324,0.7071,0.7746,0.8367,0.8944,0.9487])
  nd = np.array([0,0.140175,0.264911,0.378405,0.48324,0.581139,0.67332,0.760682,0.843909,0.923538])
  #nd = np.array([0.0723805,0.204159,0.322876,0.431782,0.532971,0.627882,0.717556,0.802776,0.884144,0.962142])
  #pdf = frozen_powerlaw.pdf(x)
  #nd = frozen_triangle.ppf(nx)
  #print x
  #print nd
  #raw_input()
  #pdf = frozen_triangle.pdf(x)
  #print nd
  #print pdf
  #raw_input()
  #for i in range(len(nd)-1):
  #  print (nd[i+1]-nd[i])*(nd[i+1]+nd[i])
  #raw_input()

  #nd2 = frozen_triangle2.ppf(nx2)
  #pdf2 = frozen_triangle2.pdf(x2)

  #print nd,nd2
  #ndc = np.concatenate((nd,nd2),axis=0)
  #print 'ndc', ndc
  #nxc = np.concatenate((nx,nx2))
  #print pdf, pdf2
  #pdfc = np.concatenate((pdf,pdf2))
  #xc = np.concatenate((x,x2))

  #plt.plot(nd,len(nx)*[1],"x")
  #plt.plot(x,pdf)
  #hist(nd,'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  #plt.plot(nd[0:11],len(nx[0:11])*[1],"x")
  #plt.plot(x[0:11],pdf[0:11])
  #hist(nd[0:11],'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)
  #hist(ndc,bins=50,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  #plt.plot(nd[11:],len(nx[11:])*[1],"x")
  #plt.plot(x[11:],pdf[11:])
  #hist(nd[11:],'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax,normed=True)

  print(nd)
  plt.plot(nd,len(nd)*[1],"x")
  #plt.plot(x,pdf)
  hist(nd,'blocks',fitness='poly_events',p0=0.05,histtype='bar',alpha=0.2,label='b blocks',ax=ax)

  plt.show()
  fig.savefig('plots/toy_plots2.png')
Exemple #24
0
def generate_dataset(distriName, para, tot, dis):
    import sys
    import random
    from scipy.stats import powerlaw
    import numpy as np
    import math

    bytePerStr = 4
    filepath = "./dataset_temp/" + distriName + '_' + str(para) + '_' + str(
        tot) + '_' + str(dis) + ".dat"
    filenum = 1000
    filesize = tot // filenum
    if (os.path.exists("./dataset_temp") == False):
        os.mkdir("./dataset_temp")
        os.mkdir("./dataset_temp/temp")

    def powerlaw(N, s):
        res = []
        base = 0.0
        for n in range(1, N + 1):
            t = 1 / (n**s)
            base += t
            res.append(t)
        return [r / base for r in res]

    def weibull(N, p, k):
        res = []
        for n in range(0, N, 1):
            power1 = n**k
            p1 = (1 - p)**power1
            power2 = (n + 1)**k
            p2 = (1 - p)**power2
            res.append(p1 - p2)
        return res

    def gen_random_strings(len, byts):
        strs = set()
        res = []
        for i in range(len):
            s = os.urandom(byts)
            while s in strs:
                s = os.urandom(byts)
            res.append(s)
            strs.add(s)
        return res

    def gen(freqs, byts):
        strs = gen_random_strings(len(freqs), byts)
        chs = [i for i in range(len(freqs))]
        for fileno in range(0, filenum - 1):
            temp_filepath = "./dataset_temp/temp/" + str(fileno) + ".dat"
            with open(temp_filepath, "ab") as f:
                for j in range(0, filesize):
                    p = random.randint(0, len(chs) - 1)
                    pos = chs[p]
                    f.write(strs[pos])
                    if (freqs[pos] > 0):
                        freqs[pos] -= 1
                    if freqs[pos] == 0:
                        del chs[p]
                f.close()

        last_filesize = 0
        fileno = filenum - 1
        temp_filepath = "./dataset_temp/temp/" + str(fileno) + ".dat"
        with open(temp_filepath, "ab") as f:
            while len(chs) != 0:
                p = random.randint(0, len(chs) - 1)
                pos = chs[p]
                f.write(strs[pos])
                last_filesize += 1
                if (freqs[pos] > 0):
                    freqs[pos] -= 1
                if freqs[pos] == 0:
                    del chs[p]

    def read_str(fp, sl, bytesNum):
        st = fp.read(bytesNum)
        while st:
            sl.append(st)
            st = fp.read(bytesNum)

    def read_shuffle_write(sl, fp1, fp2, bytesNum):
        with open(fp1, "rb") as f1:
            read_str(f1, sl, bytesNum)
            f1.close()
        with open(fp2, "rb") as f2:
            read_str(f2, sl, bytesNum)
            f2.close()
        random.shuffle(sl)
        with open(fp1, "wb") as f1:
            for j in range(filesize):
                f1.write(sl[j])
            f1.close()
        with open(fp2, "wb") as f2:
            for j in range(filesize, len(sl)):
                f2.write(sl[j])
            f2.close()

    def front_tail_shuffle(bytesNum):
        frontfile = [i for i in range(100, 200)]
        tailfile = [i for i in range(900, 1000)]
        random.shuffle(frontfile)
        random.shuffle(tailfile)
        for i in range(0, 100):
            str_list = []
            frontfilepath = "./dataset_temp/temp/" + str(frontfile[i]) + ".dat"
            tailfilepath = "./dataset_temp/temp/" + str(tailfile[i]) + ".dat"
            read_shuffle_write(str_list, frontfilepath, tailfilepath, bytesNum)

    def whole_shuffle(bytesNum):
        file1 = random.randint(0, filenum - 1)
        file2 = random.randint(0, filenum - 1)
        while file2 == file1:
            file2 = random.randint(0, filenum - 1)
        str_list = []
        filepath1 = "./dataset_temp/temp/" + str(file1) + ".dat"
        filepath2 = "./dataset_temp/temp/" + str(file2) + ".dat"
        read_shuffle_write(str_list, filepath1, filepath2, bytesNum)

    if distriName == 'zipf' or distriName == 'powerlaw':
        props = powerlaw(dis, para)
    elif distriName == 'weibull':
        pa = 0.1
        props = weibull(dis, pa, para)

    freq = [math.ceil(prop * tot) for prop in props]
    minFreq = min(freq)
    maxFreq = max(freq)
    gen(freq, bytePerStr)
    front_tail_shuffle(bytePerStr)
    for i in range(100):
        whole_shuffle(bytePerStr)
    file_list = [i for i in range(filenum)]
    random.shuffle(file_list)
    with open(filepath, "wb") as f:
        for i in range(filenum):
            readfilepath = "./dataset_temp/temp/" + str(file_list[i]) + ".dat"
            with open(readfilepath, "rb") as rf:
                st = rf.read(bytePerStr)
                while st:
                    f.write(st)
                    st = rf.read(bytePerStr)
                rf.close()
        f.close()
    for fileno in range(0, filenum):
        temp_filepath = "./dataset_temp/temp/" + str(fileno) + ".dat"
        os.remove(temp_filepath)
    os.rmdir("./dataset_temp/temp")

    return (minFreq, maxFreq)
Exemple #25
0
trial_odds_yes = .05
trial_odds_no = .95  # Odds of going to trial.

regulation_odds_yes = .05  # Odds of having an audit requirement imposed.
regulation_odds_no = .95  # This can be modeled further, to include several other costs we are leaving out.

# Statistical Values

settlements = loadtxt(
    'settlements.dat')  # Loading in external data for settlements
fit = powerlaw.fit(
    settlements
)  # Fitting data to a simulated Power Law, which we think is reasonable.

incidents = powerlaw(a=fit[0], loc=fit[1], scale=fit[2])

c.progress("Disclosure Legal")

# Disclosure complexity (Legal)
# (Lawyers * Lawyer Rate * Hours) + (Engineers * Eng Pay * Hours)
disclosure_lawyers = np.random.uniform(
    lawyers_min, lawyers_max,
    simulations)  # What's the minimum? What's the maximum?
disclosure_lawyer_rate = np.random.normal(
    lawyer_rate_average, lawyer_rate_variance, simulations
)  # Using https://thervo.com/costs/attorney-fees as stand-in data
disclosure_lawyer_hours = np.random.uniform(
    disclosure_lawyer_hours_min, disclosure_lawyer_hours_max,
    simulations)  # At least a day per lawyer, as much as several weeks