예제 #1
0
파일: test_noise.py 프로젝트: Jee-Bee/Meas
def create_noise(shape, show_figure=True):
    rbin = rand.binomial(1, 0.5, shape)
    rexp = rand.exponential(1, shape)
    rnor = rand.normal(1, 1, shape)
    rpow = rand.power(1, shape)
    rran = rand.rand(shape)
    runi = rand.uniform(0, 1, shape)
    if show_figure is True:
        fign = plt.figure()
        axn1 = fign.add_subplot(2, 3, 1)
        axn1.hist(rbin)
        axn1.title.set_text('Binominaal')
        axn2 = fign.add_subplot(2, 3, 2)
        axn2.hist(rexp)
        axn2.title.set_text('Exponential')
        axn3 = fign.add_subplot(2, 3, 3)
        axn3.hist(rnor)
        axn3.title.set_text('Normal')
        axn4 = fign.add_subplot(2, 3, 4)
        axn4.hist(rpow)
        axn4.title.set_text('Power')
        axn5 = fign.add_subplot(2, 3, 5)
        axn5.hist(rran)
        axn5.title.set_text('Rand')
        axn6 = fign.add_subplot(2, 3, 6)
        axn6.hist(runi)
        axn6.title.set_text('Uniform')
        fign.show()
    return (rbin, rexp, rnor, rpow, rran, runi)
예제 #2
0
def make_dataset1():
    '''Make a dataset of single samples with labels from which distribution they come from'''
    # now lets make some samples 
    lns      = min_max_scale(lognormal(size=bsize)) #log normal
    powers   = min_max_scale(power(0.1,size=bsize)) #power law
    norms    = min_max_scale(normal(size=bsize))    #normal
    uniforms = min_max_scale(uniform(size=bsize))    #uniform
    # add our data together
    data = np.concatenate((lns,powers,norms,uniforms))
    
    # concatenate our labels
    labels = np.concatenate((
        (np.repeat(LOGNORMAL,bsize)),
        (np.repeat(POWER,bsize)),
        (np.repeat(NORM,bsize)),
        (np.repeat(UNIFORM,bsize))))
    tsize = len(labels)
    
    # make sure dimensionality and types are right
    data = data.reshape((len(data),1))
    data = data.astype(np.float32)
    labels = labels.astype(np.int32)
    labels = labels.reshape((len(data),))
    
    return data, labels, tsize
예제 #3
0
def make_dataset1():
    '''Make a dataset of single samples with labels from which distribution they come from'''
    # now lets make some samples 
    lns      = min_max_scale(lognormal(size=bsize)) #log normal
    powers   = min_max_scale(power(0.1,size=bsize)) #power law
    norms    = min_max_scale(normal(size=bsize))    #normal
    uniforms = min_max_scale(uniform(size=bsize))    #uniform
    # add our data together
    data = np.concatenate((lns,powers,norms,uniforms))
    
    # concatenate our labels
    labels = np.concatenate((
        (np.repeat(LOGNORMAL,bsize)),
        (np.repeat(POWER,bsize)),
        (np.repeat(NORM,bsize)),
        (np.repeat(UNIFORM,bsize))))
    tsize = len(labels)
    
    # make sure dimensionality and types are right
    data = data.reshape((len(data),1))
    data = data.astype(np.float32)
    labels = labels.astype(np.int32)
    labels = labels.reshape((len(data),))
    
    return data, labels, tsize
예제 #4
0
def inject(func, max_amp, inject_frac, data):
    d = np.copy(data)
    K = round(inject_frac * M)
    # I never decided what would be the best way to randomize amplitudes
    amps = r.power(3.0, [1, K]) * max_amp
    trends = np.dot(func.reshape([N, 1]), amps) # gives NxM output array
    trends += 1.0
    i = r.randint(0, M-1, K)
    d[:, i] *= trends
    return d
예제 #5
0
 def time_to_mutation_rate(tree):
     if not hasattr(GC, "NUMPY_SEEDED"):
         from numpy.random import seed as numpy_seed
         numpy_seed(seed=GC.random_number_seed)
         GC.random_number_seed += 1
         GC.NUMPY_SEEDED = True
     t = read_tree_newick(tree)
     for node in t.traverse_preorder():
         if node.edge_length is not None:
             node.edge_length *= power(a=GC.tree_rate_shape)
     return str(t)
def power_mutation(generation, random_seed=None):
    random.seed(random_seed)
    parent = random.choice(generation)
    child = list()
    s = power(0.5)
    alpha = uniform(low=0, high=1)
    for gen in parent:
        lower_bound = random.randint(0, 5)
        upper_bound = random.randint(0, 5)
        t = (gen - lower_bound) / (upper_bound - lower_bound)
        if t < alpha:
            child.append(gen - s * (gen - lower_bound))
        else:
            child.append(gen + s * (upper_bound - gen))

    return tuple(child)
예제 #7
0
def make_widedataset(width=width):
    # we're going to make rows of 40 features unsorted
    wlns = min_max_scale(lognormal(size=(bsize, width)))  #log normal
    wpowers = min_max_scale(power(0.1, size=(bsize, width)))  #power law
    wnorms = min_max_scale(normal(size=(bsize, width)))  #normal
    wuniforms = min_max_scale(uniform(size=(bsize, width)))  #uniform

    wdata = np.concatenate((wlns, wpowers, wnorms, wuniforms))

    # concatenate our labels
    wlabels = np.concatenate(
        ((np.repeat(LOGNORMAL, bsize)), (np.repeat(POWER, bsize)),
         (np.repeat(NORM, bsize)), (np.repeat(UNIFORM, bsize))))

    joint_shuffle(wdata, wlabels)
    wdata = wdata.astype(np.float32)
    wlabels = wlabels.astype(np.int32)
    wlabels = wlabels.reshape((len(data), ))
    return wdata, wlabels
예제 #8
0
def make_widedataset(width=width):
    # we're going to make rows of 40 features unsorted
    wlns      = min_max_scale(lognormal(size=(bsize,width))) #log normal
    wpowers   = min_max_scale(power(0.1,size=(bsize,width))) #power law
    wnorms    = min_max_scale(normal(size=(bsize,width)))    #normal
    wuniforms = min_max_scale(uniform(size=(bsize,width)))    #uniform
    
    wdata = np.concatenate((wlns,wpowers,wnorms,wuniforms))
    
    # concatenate our labels
    wlabels = np.concatenate((
        (np.repeat(LOGNORMAL,bsize)),
        (np.repeat(POWER,bsize)),
        (np.repeat(NORM,bsize)),
        (np.repeat(UNIFORM,bsize))))
    
    joint_shuffle(wdata,wlabels)
    wdata = wdata.astype(np.float32)
    wlabels = wlabels.astype(np.int32)
    wlabels = wlabels.reshape((len(data),))
    return wdata, wlabels
예제 #9
0
    def __init__(self):
        #MODEL PARAMETERS
        self.NUMBER_OF_YEARS = 30
        
        #MODEL OPERATORS
        #probabilitiy
        self.PREFERRED_AGE_DIFFERENCE = -0.1
        self.AGE_PROBABILITY_MULTIPLIER = -0.2
        self.PREFERRED_AGE_DIFFERENCE_GROWTH = 0.1
        self.SB_PROBABILITY_MULTIPLIER = 0
        
        #relationship operator
        self.SEXES = 2
        self.MIN_AGE = 15
        self.MAX_AGE = 65
        self.BIN_SIZE = 5
        self.DURATIONS = lambda a1, a2: 30*random.exponential(1)
        self.RECRUIT_WARM_UP = 20
        self.RECRUIT_INITIAL = 0.02
        self.RECRUIT_RATE = 0.005
        
        #infection operator
        self.INFECTIVITY = 0.01
        self.INITIAL_PREVALENCE = 0.01
        self.SEED_TIME = 20  # in weeks

        #time operator
        self.time = -1
        self.grid_queue_index = 0
                
        #MODEL POPULATION
        self.INITIAL_POPULATION = 100
        self.AGENT_ATTRIBUTES = {}
        self.BORN = lambda: -52*random.uniform(self.MIN_AGE, self.MAX_AGE)
        self.SEX = lambda: random.randint(self.SEXES)
        self.DNP = lambda: random.power(0.1)*1.2
        self.SEXUAL_BEHAVIOR = lambda: random.randint(1,5)
 if rank == 0:    
     #1.1 Sample and set parameters from prior distribution
     #print "---Sample", i,"---"
     s = CommunityDistributed.CommunityDistributed(comm, 0, [])
     # set constants
     s.INITIAL_POPULATION = 10000  # scale this up later?
     s.NUMBER_OF_YEARS = 30
     
     # set parameters
     s.probability_multiplier = prior[1]()
     s.preferred_age_difference = prior[2]()        
     s.preferred_age_difference_growth = prior[3]()
     
     s.DNPscale = prior[4]()
     s.DNPshape = prior[5]()
     s.DNP = lambda: random.power(s.DNPshape) *s.DNPscale
     
     s.durations_scale = prior[6]()
     s.durations_shape = prior[7]()
     s.DURATIONS = lambda a1,a2: s.durations_scale*random.exponential(s.durations_shape)
     
     #1.2 Run simulation
     s.run()
     
     #1.3 Save to csv
     print str(i) + ",",
     print ",".join(map(lambda x: str(round(x,2)),[s.probability_multiplier,
                          s.preferred_age_difference, s.preferred_age_difference_growth,
                          s.DNPscale, s.DNPshape,
                          s.durations_scale, s.durations_shape]))+",",
     
예제 #11
0
def bin(row):
    return np.histogram(row,bins=len(row),range=(0.0,1.0))[0]/float(len(row))

print "Apply the histogram to all the data rows"
bdata = np.apply_along_axis(bin,1,wdata).astype(np.float32)
blabels = wlabels

# ensure we have our test data
test_bdata = np.apply_along_axis(bin,1,test_wdata).astype(np.float32)
test_blabels = test_wlabels

# helper data 
enum_funcs = [
    (LOGNORMAL,"log normal",lambda size: lognormal(size=size)),
    (POWER,"power",lambda size: power(0.1,size=size)),
    (NORM,"normal",lambda size: normal(size=size)),
    (UNIFORM,"uniforms",lambda size: uniform(size=size)),
]

# uses enum_funcs to evaluate PER CLASS how well our classify operates
def classify_test(bnet,ntests=1000):
    for tup in enum_funcs:
        enum, name, func = tup
        lns = min_max_scale(func(size=(ntests,width))) #log normal
        blns = np.apply_along_axis(bin,1,lns).astype(np.float32)
        blns_labels = np.repeat(enum,ntests)
        blns_labels.astype(np.int32)
        classification = bnet.classify(blns)
        print "%s %s / %s ::: %s " % (name,sum(classification == blns_labels),ntests, collections.Counter(classification))
예제 #12
0
    return np.histogram(row, bins=len(row), range=(0.0, 1.0))[0] / float(
        len(row))


print "Apply the histogram to all the data rows"
bdata = np.apply_along_axis(bin, 1, wdata).astype(np.float32)
blabels = wlabels

# ensure we have our test data
test_bdata = np.apply_along_axis(bin, 1, test_wdata).astype(np.float32)
test_blabels = test_wlabels

# helper data
enum_funcs = [
    (LOGNORMAL, "log normal", lambda size: lognormal(size=size)),
    (POWER, "power", lambda size: power(0.1, size=size)),
    (NORM, "normal", lambda size: normal(size=size)),
    (UNIFORM, "uniforms", lambda size: uniform(size=size)),
]


# uses enum_funcs to evaluate PER CLASS how well our classify operates
def classify_test(bnet, ntests=1000):
    for tup in enum_funcs:
        enum, name, func = tup
        lns = min_max_scale(func(size=(ntests, width)))  #log normal
        blns = np.apply_along_axis(bin, 1, lns).astype(np.float32)
        blns_labels = np.repeat(enum, ntests)
        blns_labels.astype(np.int32)
        classification = bnet.classify(blns)
        print "%s %s / %s ::: %s " % (name, sum(classification == blns_labels),
예제 #13
0
    
    timecost.append([mid_time-start_time,time.time()-mid_time])

    #zipf
    start_time=time.time()
    a=dsg.zipf(1.25,times)
    mid_time=time.time()
    b=nr.poisson(1.25,times)

    timecost.append([mid_time-start_time,time.time()-mid_time])

    #power
    start_time=time.time()
    a=dsg.power(1.5,times)
    mid_time=time.time()
    b=nr.power(1.5,times)
    
    timecost.append([mid_time-start_time,time.time()-mid_time])

    #geometric
    start_time=time.time()
    a=dsg.geometric(0.4,times)
    mid_time=time.time()
    b=nr.geometric(0.4,times)
    
    timecost.append([mid_time-start_time,time.time()-mid_time])

    #pareto
    start_time=time.time()
    a=dsg.pareto(1.25,times)
    mid_time=time.time()
예제 #14
0
def generate(file_path, duration, seed=0, signal_separation=200,
             signal_separation_interval=20, min_mass=1.2, max_mass=1.6,
             f_lower=20, srate=4096, padding=256, tstart=0):
    """Function that generates test data with injections.
    
    Arguments
    ---------
    file_path : str
        The path at which the data should be stored. 
    duration : int or float
        Duration of the output file in seconds.
    seed : {int, 0}, optional
        A seed to use for generating injection parameters and noise.
    signal_separation : {int or float, 200}, optional
        The average duration between two injections.
    signal_separation_interval : {int or float, 20}, optional
        The duration between two signals will be signal_separation + t,
        where t is drawn uniformly from the interval
        [-signal_separation_interval, signal_separation_interval].
    min_mass : {float, 1.2}, optional
        The minimal mass at which injections will be made (in solar
        masses).
    max_mass : {float, 1.6}, optional
        The maximum mass at which injections will be made (in solar
        masses).
    f_lower : {int or float, 20}, optional
        Noise will be generated down to the specified frequency.
        Below they will be set to zero. (The waveforms are generated
        with a lower frequency cutofff of 25 Hertz)
    srate : {int, 4096}, optional
        The sample rate at which the data is generated.
    padding : {int or float, 256}, optional
        Duration in the beginning and end of the data that does not
        contain any injections.
    tstart : {int or float, 0}, optional
        The inital time of the data.
    """    
    np.random.seed(seed)
    
    size = (duration // signal_separation)
    
    #Generate injection times
    random_time_samples = int(round(float(signal_separation_interval) * float(srate)))
    signal_separation_samples = int(round(float(signal_separation) * float(srate)))
    time_samples = randint(signal_separation_samples - random_time_samples, signal_separation_samples + random_time_samples, size=size)
    time_samples = time_samples.cumsum()
    times = time_samples / float(srate)
    
    times = times[np.where(np.logical_and(times > padding, times < duration - padding))[0]]
    size = len(times)
    
    #Generate parameters
    cphase = uniform(0, np.pi*2.0, size=size)
    
    ra = uniform(0, 2 * np.pi, size=size)
    dec = np.arccos(uniform(-1., 1., size=size)) - np.pi/2
    inc = np.arccos(uniform(-1., 1., size=size))
    pol = uniform(0, 2 * np.pi, size=size)
    dist = power(3, size) * 400

    m1 = uniform(min_mass, max_mass, size=size)
    m2 = uniform(min_mass, max_mass, size=size)
    
    #Save parameters to file.
    stat_file_path, ext = os.path.splitext(file_path)
    stat_file_path = stat_file_path + '_stats' + ext
    with h5py.File(stat_file_path, 'w') as f:
        f['times'] = times
        f['cphase'] = cphase
        f['ra'] = ra
        f['dec'] = dec
        f['inc'] = inc
        f['pol'] = pol
        f['dist'] = dist
        f['mass1'] = m1
        f['mass2'] = m2
        f['seed'] = seed
    
    p = aLIGOZeroDetHighPower(2 * int(duration * srate), 1.0/64, f_lower)
    
    #Generate noise
    data = {}
    for i, ifo in enumerate(['H1', 'L1']):
        data[ifo] = colored_noise(p, int(tstart),
                                    int(tstart + duration), 
                                    seed=seed + i,
                                    low_frequency_cutoff=f_lower)
        data[ifo] = resample_to_delta_t(data[ifo], 1.0/srate)
    
    # make waveforms and add them into the noise
    for i in range(len(times)):
        hp, hc = get_td_waveform(approximant="TaylorF2",
                                mass1=m1[i], 
                                mass2=m2[i],
                                f_lower=25,
                                delta_t=1.0/srate,
                                inclination=inc[i],
                                coa_phase=cphase[i],
                                distance=dist[i])
        hp.start_time += times[i] + int(tstart)
        hc.start_time += times[i] + int(tstart)
        
        for ifo in ['H1', 'L1']:
            ht = Detector(ifo).project_wave(hp, hc, ra[i], dec[i], pol[i])
            time_diff = float(ht.start_time - data[ifo].start_time)
            sample_diff = int(round(time_diff / data[ifo].delta_t))
            ht.prepend_zeros(sample_diff)
            ht.start_time = data[ifo].start_time
            data[ifo] = data[ifo].add_into(ht)
    
    #Save the data
    for ifo in ['H1', 'L1']:
        data[ifo].save(file_path, group='%s' % (ifo))
def mutate_description(dictionary, description):
    # create mutants of a given description
    # first, we generate a 2d array of the closest words to each of the words in the description
    closest_words = []
    for i in range(DESCRIPTION_LEN):
        # each array consists of the (NUM_CLOSE_WORDS)-closest encodings to the given word
        closest = []

        for j in range(NUM_CLOSE_WORDS):
            # each encoding contains the new word, its embedding, and its distance from the given word
            encoding = {"word": "", "embedding": [], "distance": float("inf")}

            closest.append(encoding)

        closest_words.append(closest)

    # second, we run through all the encodings in the dictionary, finding our closest words
    for encoding in dictionary:
        # find the distance from this encoding to each of our description words
        distances = [
            dist_sqr(encoding["embedding"], description[i]["embedding"])
            for i in range(DESCRIPTION_LEN)
        ]

        for i in range(DESCRIPTION_LEN):
            # if the distance from this encoding to any one of our description words makes it one of the
            # (NUM_CLOSEST_WORDS)-closest words to that description word, we'll update the list of closest words to
            # that description word
            if distances[i] < closest_words[i][NUM_CLOSE_WORDS -
                                               1]["distance"]:
                # find the position where this new word belongs
                pos = binary_search(distances[i], [
                    close_word["distance"] for close_word in closest_words[i]
                ])

                # slide over all following "close words"
                for j in range(NUM_CLOSE_WORDS - 1, pos, -1):
                    closest_words[i][j] = closest_words[i][j - 1]

                # insert this "close word"
                closest_words[i][pos] = {
                    "word": encoding["word"],
                    "embedding": encoding["embedding"],
                    "distance": distances[i]
                }

    # third, we create mutated descriptions based on the "close words" to each word in the description
    mutated_descriptions = []

    for i in range(NUM_MUTANTS):
        # create the specified number of mutated descriptions
        mutated_desc = []

        for j in range(DESCRIPTION_LEN):
            # randomly select a word from the closest words, using a power law distribution
            # this gives greater probability of selection to closer words, especially to the same word itself
            selected_word_float = NUM_CLOSE_WORDS * (
                1 - rnd.power(POWER_LAW_CONST))
            selected_word = closest_words[j][int(selected_word_float)]

            # put this randomly selected word in the mutated description
            mutated_desc.append({
                "word": selected_word["word"],
                "embedding": selected_word["embedding"]
            })

        mutated_descriptions.append(mutated_desc)

    return mutated_descriptions
예제 #16
0
파일: utils.py 프로젝트: kunlegiwa/MANGO
def power(size, params):
    try:
        return random.power(params['a'], size)
    except ValueError as e:
        exit(e)