def create_noise(shape, show_figure=True): rbin = rand.binomial(1, 0.5, shape) rexp = rand.exponential(1, shape) rnor = rand.normal(1, 1, shape) rpow = rand.power(1, shape) rran = rand.rand(shape) runi = rand.uniform(0, 1, shape) if show_figure is True: fign = plt.figure() axn1 = fign.add_subplot(2, 3, 1) axn1.hist(rbin) axn1.title.set_text('Binominaal') axn2 = fign.add_subplot(2, 3, 2) axn2.hist(rexp) axn2.title.set_text('Exponential') axn3 = fign.add_subplot(2, 3, 3) axn3.hist(rnor) axn3.title.set_text('Normal') axn4 = fign.add_subplot(2, 3, 4) axn4.hist(rpow) axn4.title.set_text('Power') axn5 = fign.add_subplot(2, 3, 5) axn5.hist(rran) axn5.title.set_text('Rand') axn6 = fign.add_subplot(2, 3, 6) axn6.hist(runi) axn6.title.set_text('Uniform') fign.show() return (rbin, rexp, rnor, rpow, rran, runi)
def make_dataset1(): '''Make a dataset of single samples with labels from which distribution they come from''' # now lets make some samples lns = min_max_scale(lognormal(size=bsize)) #log normal powers = min_max_scale(power(0.1,size=bsize)) #power law norms = min_max_scale(normal(size=bsize)) #normal uniforms = min_max_scale(uniform(size=bsize)) #uniform # add our data together data = np.concatenate((lns,powers,norms,uniforms)) # concatenate our labels labels = np.concatenate(( (np.repeat(LOGNORMAL,bsize)), (np.repeat(POWER,bsize)), (np.repeat(NORM,bsize)), (np.repeat(UNIFORM,bsize)))) tsize = len(labels) # make sure dimensionality and types are right data = data.reshape((len(data),1)) data = data.astype(np.float32) labels = labels.astype(np.int32) labels = labels.reshape((len(data),)) return data, labels, tsize
def inject(func, max_amp, inject_frac, data): d = np.copy(data) K = round(inject_frac * M) # I never decided what would be the best way to randomize amplitudes amps = r.power(3.0, [1, K]) * max_amp trends = np.dot(func.reshape([N, 1]), amps) # gives NxM output array trends += 1.0 i = r.randint(0, M-1, K) d[:, i] *= trends return d
def time_to_mutation_rate(tree): if not hasattr(GC, "NUMPY_SEEDED"): from numpy.random import seed as numpy_seed numpy_seed(seed=GC.random_number_seed) GC.random_number_seed += 1 GC.NUMPY_SEEDED = True t = read_tree_newick(tree) for node in t.traverse_preorder(): if node.edge_length is not None: node.edge_length *= power(a=GC.tree_rate_shape) return str(t)
def power_mutation(generation, random_seed=None): random.seed(random_seed) parent = random.choice(generation) child = list() s = power(0.5) alpha = uniform(low=0, high=1) for gen in parent: lower_bound = random.randint(0, 5) upper_bound = random.randint(0, 5) t = (gen - lower_bound) / (upper_bound - lower_bound) if t < alpha: child.append(gen - s * (gen - lower_bound)) else: child.append(gen + s * (upper_bound - gen)) return tuple(child)
def make_widedataset(width=width): # we're going to make rows of 40 features unsorted wlns = min_max_scale(lognormal(size=(bsize, width))) #log normal wpowers = min_max_scale(power(0.1, size=(bsize, width))) #power law wnorms = min_max_scale(normal(size=(bsize, width))) #normal wuniforms = min_max_scale(uniform(size=(bsize, width))) #uniform wdata = np.concatenate((wlns, wpowers, wnorms, wuniforms)) # concatenate our labels wlabels = np.concatenate( ((np.repeat(LOGNORMAL, bsize)), (np.repeat(POWER, bsize)), (np.repeat(NORM, bsize)), (np.repeat(UNIFORM, bsize)))) joint_shuffle(wdata, wlabels) wdata = wdata.astype(np.float32) wlabels = wlabels.astype(np.int32) wlabels = wlabels.reshape((len(data), )) return wdata, wlabels
def make_widedataset(width=width): # we're going to make rows of 40 features unsorted wlns = min_max_scale(lognormal(size=(bsize,width))) #log normal wpowers = min_max_scale(power(0.1,size=(bsize,width))) #power law wnorms = min_max_scale(normal(size=(bsize,width))) #normal wuniforms = min_max_scale(uniform(size=(bsize,width))) #uniform wdata = np.concatenate((wlns,wpowers,wnorms,wuniforms)) # concatenate our labels wlabels = np.concatenate(( (np.repeat(LOGNORMAL,bsize)), (np.repeat(POWER,bsize)), (np.repeat(NORM,bsize)), (np.repeat(UNIFORM,bsize)))) joint_shuffle(wdata,wlabels) wdata = wdata.astype(np.float32) wlabels = wlabels.astype(np.int32) wlabels = wlabels.reshape((len(data),)) return wdata, wlabels
def __init__(self): #MODEL PARAMETERS self.NUMBER_OF_YEARS = 30 #MODEL OPERATORS #probabilitiy self.PREFERRED_AGE_DIFFERENCE = -0.1 self.AGE_PROBABILITY_MULTIPLIER = -0.2 self.PREFERRED_AGE_DIFFERENCE_GROWTH = 0.1 self.SB_PROBABILITY_MULTIPLIER = 0 #relationship operator self.SEXES = 2 self.MIN_AGE = 15 self.MAX_AGE = 65 self.BIN_SIZE = 5 self.DURATIONS = lambda a1, a2: 30*random.exponential(1) self.RECRUIT_WARM_UP = 20 self.RECRUIT_INITIAL = 0.02 self.RECRUIT_RATE = 0.005 #infection operator self.INFECTIVITY = 0.01 self.INITIAL_PREVALENCE = 0.01 self.SEED_TIME = 20 # in weeks #time operator self.time = -1 self.grid_queue_index = 0 #MODEL POPULATION self.INITIAL_POPULATION = 100 self.AGENT_ATTRIBUTES = {} self.BORN = lambda: -52*random.uniform(self.MIN_AGE, self.MAX_AGE) self.SEX = lambda: random.randint(self.SEXES) self.DNP = lambda: random.power(0.1)*1.2 self.SEXUAL_BEHAVIOR = lambda: random.randint(1,5)
if rank == 0: #1.1 Sample and set parameters from prior distribution #print "---Sample", i,"---" s = CommunityDistributed.CommunityDistributed(comm, 0, []) # set constants s.INITIAL_POPULATION = 10000 # scale this up later? s.NUMBER_OF_YEARS = 30 # set parameters s.probability_multiplier = prior[1]() s.preferred_age_difference = prior[2]() s.preferred_age_difference_growth = prior[3]() s.DNPscale = prior[4]() s.DNPshape = prior[5]() s.DNP = lambda: random.power(s.DNPshape) *s.DNPscale s.durations_scale = prior[6]() s.durations_shape = prior[7]() s.DURATIONS = lambda a1,a2: s.durations_scale*random.exponential(s.durations_shape) #1.2 Run simulation s.run() #1.3 Save to csv print str(i) + ",", print ",".join(map(lambda x: str(round(x,2)),[s.probability_multiplier, s.preferred_age_difference, s.preferred_age_difference_growth, s.DNPscale, s.DNPshape, s.durations_scale, s.durations_shape]))+",",
def bin(row): return np.histogram(row,bins=len(row),range=(0.0,1.0))[0]/float(len(row)) print "Apply the histogram to all the data rows" bdata = np.apply_along_axis(bin,1,wdata).astype(np.float32) blabels = wlabels # ensure we have our test data test_bdata = np.apply_along_axis(bin,1,test_wdata).astype(np.float32) test_blabels = test_wlabels # helper data enum_funcs = [ (LOGNORMAL,"log normal",lambda size: lognormal(size=size)), (POWER,"power",lambda size: power(0.1,size=size)), (NORM,"normal",lambda size: normal(size=size)), (UNIFORM,"uniforms",lambda size: uniform(size=size)), ] # uses enum_funcs to evaluate PER CLASS how well our classify operates def classify_test(bnet,ntests=1000): for tup in enum_funcs: enum, name, func = tup lns = min_max_scale(func(size=(ntests,width))) #log normal blns = np.apply_along_axis(bin,1,lns).astype(np.float32) blns_labels = np.repeat(enum,ntests) blns_labels.astype(np.int32) classification = bnet.classify(blns) print "%s %s / %s ::: %s " % (name,sum(classification == blns_labels),ntests, collections.Counter(classification))
return np.histogram(row, bins=len(row), range=(0.0, 1.0))[0] / float( len(row)) print "Apply the histogram to all the data rows" bdata = np.apply_along_axis(bin, 1, wdata).astype(np.float32) blabels = wlabels # ensure we have our test data test_bdata = np.apply_along_axis(bin, 1, test_wdata).astype(np.float32) test_blabels = test_wlabels # helper data enum_funcs = [ (LOGNORMAL, "log normal", lambda size: lognormal(size=size)), (POWER, "power", lambda size: power(0.1, size=size)), (NORM, "normal", lambda size: normal(size=size)), (UNIFORM, "uniforms", lambda size: uniform(size=size)), ] # uses enum_funcs to evaluate PER CLASS how well our classify operates def classify_test(bnet, ntests=1000): for tup in enum_funcs: enum, name, func = tup lns = min_max_scale(func(size=(ntests, width))) #log normal blns = np.apply_along_axis(bin, 1, lns).astype(np.float32) blns_labels = np.repeat(enum, ntests) blns_labels.astype(np.int32) classification = bnet.classify(blns) print "%s %s / %s ::: %s " % (name, sum(classification == blns_labels),
timecost.append([mid_time-start_time,time.time()-mid_time]) #zipf start_time=time.time() a=dsg.zipf(1.25,times) mid_time=time.time() b=nr.poisson(1.25,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #power start_time=time.time() a=dsg.power(1.5,times) mid_time=time.time() b=nr.power(1.5,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #geometric start_time=time.time() a=dsg.geometric(0.4,times) mid_time=time.time() b=nr.geometric(0.4,times) timecost.append([mid_time-start_time,time.time()-mid_time]) #pareto start_time=time.time() a=dsg.pareto(1.25,times) mid_time=time.time()
def generate(file_path, duration, seed=0, signal_separation=200, signal_separation_interval=20, min_mass=1.2, max_mass=1.6, f_lower=20, srate=4096, padding=256, tstart=0): """Function that generates test data with injections. Arguments --------- file_path : str The path at which the data should be stored. duration : int or float Duration of the output file in seconds. seed : {int, 0}, optional A seed to use for generating injection parameters and noise. signal_separation : {int or float, 200}, optional The average duration between two injections. signal_separation_interval : {int or float, 20}, optional The duration between two signals will be signal_separation + t, where t is drawn uniformly from the interval [-signal_separation_interval, signal_separation_interval]. min_mass : {float, 1.2}, optional The minimal mass at which injections will be made (in solar masses). max_mass : {float, 1.6}, optional The maximum mass at which injections will be made (in solar masses). f_lower : {int or float, 20}, optional Noise will be generated down to the specified frequency. Below they will be set to zero. (The waveforms are generated with a lower frequency cutofff of 25 Hertz) srate : {int, 4096}, optional The sample rate at which the data is generated. padding : {int or float, 256}, optional Duration in the beginning and end of the data that does not contain any injections. tstart : {int or float, 0}, optional The inital time of the data. """ np.random.seed(seed) size = (duration // signal_separation) #Generate injection times random_time_samples = int(round(float(signal_separation_interval) * float(srate))) signal_separation_samples = int(round(float(signal_separation) * float(srate))) time_samples = randint(signal_separation_samples - random_time_samples, signal_separation_samples + random_time_samples, size=size) time_samples = time_samples.cumsum() times = time_samples / float(srate) times = times[np.where(np.logical_and(times > padding, times < duration - padding))[0]] size = len(times) #Generate parameters cphase = uniform(0, np.pi*2.0, size=size) ra = uniform(0, 2 * np.pi, size=size) dec = np.arccos(uniform(-1., 1., size=size)) - np.pi/2 inc = np.arccos(uniform(-1., 1., size=size)) pol = uniform(0, 2 * np.pi, size=size) dist = power(3, size) * 400 m1 = uniform(min_mass, max_mass, size=size) m2 = uniform(min_mass, max_mass, size=size) #Save parameters to file. stat_file_path, ext = os.path.splitext(file_path) stat_file_path = stat_file_path + '_stats' + ext with h5py.File(stat_file_path, 'w') as f: f['times'] = times f['cphase'] = cphase f['ra'] = ra f['dec'] = dec f['inc'] = inc f['pol'] = pol f['dist'] = dist f['mass1'] = m1 f['mass2'] = m2 f['seed'] = seed p = aLIGOZeroDetHighPower(2 * int(duration * srate), 1.0/64, f_lower) #Generate noise data = {} for i, ifo in enumerate(['H1', 'L1']): data[ifo] = colored_noise(p, int(tstart), int(tstart + duration), seed=seed + i, low_frequency_cutoff=f_lower) data[ifo] = resample_to_delta_t(data[ifo], 1.0/srate) # make waveforms and add them into the noise for i in range(len(times)): hp, hc = get_td_waveform(approximant="TaylorF2", mass1=m1[i], mass2=m2[i], f_lower=25, delta_t=1.0/srate, inclination=inc[i], coa_phase=cphase[i], distance=dist[i]) hp.start_time += times[i] + int(tstart) hc.start_time += times[i] + int(tstart) for ifo in ['H1', 'L1']: ht = Detector(ifo).project_wave(hp, hc, ra[i], dec[i], pol[i]) time_diff = float(ht.start_time - data[ifo].start_time) sample_diff = int(round(time_diff / data[ifo].delta_t)) ht.prepend_zeros(sample_diff) ht.start_time = data[ifo].start_time data[ifo] = data[ifo].add_into(ht) #Save the data for ifo in ['H1', 'L1']: data[ifo].save(file_path, group='%s' % (ifo))
def mutate_description(dictionary, description): # create mutants of a given description # first, we generate a 2d array of the closest words to each of the words in the description closest_words = [] for i in range(DESCRIPTION_LEN): # each array consists of the (NUM_CLOSE_WORDS)-closest encodings to the given word closest = [] for j in range(NUM_CLOSE_WORDS): # each encoding contains the new word, its embedding, and its distance from the given word encoding = {"word": "", "embedding": [], "distance": float("inf")} closest.append(encoding) closest_words.append(closest) # second, we run through all the encodings in the dictionary, finding our closest words for encoding in dictionary: # find the distance from this encoding to each of our description words distances = [ dist_sqr(encoding["embedding"], description[i]["embedding"]) for i in range(DESCRIPTION_LEN) ] for i in range(DESCRIPTION_LEN): # if the distance from this encoding to any one of our description words makes it one of the # (NUM_CLOSEST_WORDS)-closest words to that description word, we'll update the list of closest words to # that description word if distances[i] < closest_words[i][NUM_CLOSE_WORDS - 1]["distance"]: # find the position where this new word belongs pos = binary_search(distances[i], [ close_word["distance"] for close_word in closest_words[i] ]) # slide over all following "close words" for j in range(NUM_CLOSE_WORDS - 1, pos, -1): closest_words[i][j] = closest_words[i][j - 1] # insert this "close word" closest_words[i][pos] = { "word": encoding["word"], "embedding": encoding["embedding"], "distance": distances[i] } # third, we create mutated descriptions based on the "close words" to each word in the description mutated_descriptions = [] for i in range(NUM_MUTANTS): # create the specified number of mutated descriptions mutated_desc = [] for j in range(DESCRIPTION_LEN): # randomly select a word from the closest words, using a power law distribution # this gives greater probability of selection to closer words, especially to the same word itself selected_word_float = NUM_CLOSE_WORDS * ( 1 - rnd.power(POWER_LAW_CONST)) selected_word = closest_words[j][int(selected_word_float)] # put this randomly selected word in the mutated description mutated_desc.append({ "word": selected_word["word"], "embedding": selected_word["embedding"] }) mutated_descriptions.append(mutated_desc) return mutated_descriptions
def power(size, params): try: return random.power(params['a'], size) except ValueError as e: exit(e)