Exemple #1
0
def main(args):
    '''

    :param args:
    :return:none , output is  a dir includes 3 .txt files
    '''
    [train_, val_, test_] = args.proportion
    out_num = args.num
    if train_ + val_ + test_ - 1. > 0.01:  #delta
        print('erro')
        return

    if args.reference:
        ref_df = pd.read_csv(args.reference, index_col='scences')
        print('load refs ok')
    else:
        ref_df = None

    out_dir = Path(args.out_dir)
    out_dir.mkdir_p()
    train_txt_p = out_dir / 'train.txt'
    val_txt_p = out_dir / 'val.txt'
    test_txt_p = out_dir / 'test.txt'

    dataset_path = Path(args.dataset_path)
    trajs = dataset_path

    item_list = []  #

    # filtering and combination
    scenes = trajs.dirs()
    scenes.sort()  #blocks
    scenes = scene_fileter(scenes)
    for scene in scenes:

        files = scene.files()
        files.sort()
        files = file_fileter(args.dataset_path, files, ref_df)
        item_list += files

    #list constructed
    random.seed(args.rand_seed)
    random.shuffle(item_list)
    if out_num and out_num < len(item_list):
        item_list = item_list[:out_num]

    for i in range(len(item_list)):
        item_list[i] = item_list[i].relpath(dataset_path)

    length = len(item_list)
    train_bound = int(length * args.proportion[0])
    val_bound = int(length * args.proportion[1]) + train_bound
    test_bound = int(length * args.proportion[2]) + val_bound

    print(" train items:{}\n val items:{}\n test items:{}".format(
        len(item_list[:train_bound]), len(item_list[train_bound:val_bound]),
        len(item_list[val_bound:test_bound])))
    writelines(item_list[:train_bound], train_txt_p)
    writelines(item_list[train_bound:val_bound], val_txt_p)
    writelines(item_list[val_bound:test_bound], test_txt_p)
Exemple #2
0
def generate_failure_network2():
    g = nx.read_weighted_edgelist('edgelist/ninux/0', nodetype=int)
    index = 0
    for i in range(10):
        random.seed(1234)
        bc = nx.betweenness_centrality(g)
        nodes = [(k, v) for k, v in bc.items()]
        nodes.sort(key=lambda x: x[1], reverse=True)
        to_rem = 0
        i = 0
        for node in nodes:
            g1 = g.copy()
            to_rem = node[0]
            g1.remove_node(node[0])
            if nx.is_connected(g1):
                print(i)
                break
            i += 1
        g.remove_node(to_rem)
        #print(nx.number_connected_components(g))
        for j in range(4):
            nx.write_weighted_edgelist(g, 'edgelist/testdata2/' + str(index))
            nx.write_weighted_edgelist(
                g, 'edgelist/testdata2/' + str(40 * 2 - 1 - index))
            index += 1
Exemple #3
0
def main():
    args = parse_args()
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if args.cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)
    device = torch.device("cuda" if args.cuda and torch.cuda.is_available() else "cpu")

    ent2id_dict, pre_alignments, triples, pred_subjs, pred_objs, id_lists = read_data(args.file_dir, args.lang_num)
    np.random.shuffle(pre_alignments)
    train_pre_alignments = np.array(pre_alignments[:int(len(pre_alignments) // 1 * args.train_split)], dtype=np.int32)
    test_pre_alignments = np.array(pre_alignments[int(len(pre_alignments) // 1 * args.train_split):], dtype=np.int32)

    ent_num = len(ent2id_dict)
    rel_num = len(pred_subjs)
    config = GBertConfig(ent_num, rel_num, args.entity_embedding_dim, args.entity_embedding_dim)

    adjacency_matrix = get_adjacency_matrix(ent_num, triples, norm=True).to(device)

    g_bert = GBert(config).to(device)

    optimizer = optim.Adagrad(g_bert.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    entity_indices = torch.LongTensor(np.arange(ent_num)).to(device)

    for epoch in range(args.epochs):
        g_bert.train()
        optimizer.zero_grad()
        output = g_bert(entity_indices)

        # Loss function, cost function goes here

        optimizer.step()
Exemple #4
0
    def get_filelist_dict(self, face_list):
        """ Parse list of face images into a filelist dictionary
         
        Inputs:
            face_list -- a list of paths to face images (list) 
        
        Outputs:
            filelists_dict -- a dictionary of image paths organized by category (dict)
        """
    # -- Organize images into the appropriate categories
        cats = {}
        for f in face_list:
            cat = "/".join(f.split('/')[:-1])
            name = f.split('/')[-1]
            if cat not in cats:
                cats[cat] = [name]
            else:
                cats[cat] += [name]

                
        # -- Shuffle the images into a new random order
        filelists_dict = {}
        seed = 1
        for cat in cats:
            filelist = cats[cat]
            if self._rand_gallery:
                random.seed(seed)
                random.shuffle(filelist)
                seed += 1
            filelist = [ cat + '/' + f for f in filelist ]
            filelists_dict[cat] = filelist
            
        return filelists_dict
Exemple #5
0
    def fit(self, train_data):
        #TO DO: Learn the parameters from the training data

        self.w = [0.0] * (self.vocab_size)
        self.b = 0

        for x in range(0, self.mi):
            #print("here")
            tr_size = len(train_data[0])  #699
            indices = list(range(tr_size))

            random.seed(5)
            np.random.shuffle(indices)
            train_data = ([train_data[0][i] for i in indices],
                          [train_data[1][i] for i in indices])
            x, y = train_data
            words_bin_form = np.asarray(get_feature_vectors(x, self.binFeats))
            y = np.asarray(y)
            for i in range(len(words_bin_form)):

                single = words_bin_form[i]
                label = y[i]
                gw = [0.0] * (len(words_bin_form[0]))
                gb = 0
                if label * (np.dot(self.w, single) + self.b) <= 1:
                    dg = np.multiply(-1 * label, single)
                    db = -1 * label
                    t = LA.norm(dg)
                    if t < 0.00001:
                        break
                    self.w = np.subtract(self.w, np.multiply(self.lr, dg))
                    self.b -= db * self.lr
Exemple #6
0
def seedit(seed=0):
    """ Fixed seed makes for repeatability, but there may be two different
    random number generators involved. """
    import random
    import numpy
    random.seed(seed)
    numpy.random.seed(seed)
def parallel(index):
    nb_anchors = all_parameters[index][0]
    tics = all_parameters[index][1]
    ga = GA(
        fitness_func=Work,
        n_individu=nb_ind,
        CrossThreshold=0.2,
        MutationThreshold=0.3,
        gen=nb_gen,
        dim=nb_anchors,
        MAX_X=max_x,
        MAX_Y=max_y,
        TICS=tics
    )
    random.seed(index)
    start = time.time()
    optimal_anchors = ga.run()
    end = time.time()

    # getting the optimal area and the minavg from optimal anchors

    l = getAllSubRegions(optimal_anchors)
    optimal_areas = getDisjointSubRegions(l)
    minAvgRA = getExpectation(optimal_areas)


#    drawNetwork(optimal_anchors, optimal_areas, algo_="p_genetic",max_x_=max_x, max_y_=max_y)

    print("**Optimal Anchor Pos.:" + str(optimal_anchors), minAvgRA)
    print('Runinig Times : ' + str(round((end - start) / 60.0, 2)) + ' (min.)')

    f_res = open('./TXT/p_genetic.txt', 'a')
    f_res.write(str(optimal_anchors)+';'+str(minAvgRA)+';'+str(end - start)+';'+str(nb_anchors)+';'+str(tics)+'\n')
    f_res.close()
Exemple #8
0
def randomString(stringLength=10, n = 10):
	random.seed(1)
	keyVal = []
	for x in range(n):
		letters = string.ascii_lowercase
		keyVal.append(''.join(random.choice(letters) for i in range(stringLength)))
	return keyVal
Exemple #9
0
def seedit(seed=0):
    """ Fixed seed makes for repeatability, but there may be two different
    random number generators involved. """
    import random
    import numpy
    random.seed(seed)
    numpy.random.seed(seed)
Exemple #10
0
    def __init__(self, environment, phase_lengths, min_phase_length, seed=0):
        self.environment = environment
        self.phase_lengths = phase_lengths
        self.min_phase_length = min_phase_length
        self.seed = seed
        self.edge_lower_bounds = None  # Used in context generation

        np.random.seed(self.seed)
        random.seed(self.seed)
    def generate(self):
        
        best_policy = None
        best_reward = -float('Inf')
        candidates = []
        eps = 1 # equal to actions space resolution, eps is step size
        pop_size = 4
        cross_prob = 1
        exchange_prob = 1
        mutation_pob = 1
        generation = 4
        tmp_reward = []
        tmp_policy = []
        random.seed(54)
        turb = 5
        
        try:
            # Agents should make use of 20 episodes in each training run, if making sequential decisions

            # Define population
            indv_template = DecimalIndividual(ranges=[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1),(0, 1), (0, 1)], eps=eps)
            population = Population(indv_template=indv_template, size = pop_size)
            population.init()  # Initialize population with individuals.

            # Create genetic operators
            # Use built-in operators here.
            selection = RouletteWheelSelection()
            crossover = UniformCrossover(pc=cross_prob, pe=exchange_prob) # PE = Gene exchange probability
            mutation = FlipBitMutation(pm=mutation_pob) # 0.1 todo The probability of mutation

            # Create genetic algorithm engine to run optimization
            engine = GAEngine(population=population, selection=selection,
                            crossover=crossover, mutation=mutation,)

            # Define and register fitness function
            @engine.fitness_register
            def fitness(indv):
                p = [0 for _ in range(10)]
                p = indv.solution
                policy = {'1': [p[0], p[1]], '2': [p[2], p[3]], '3': [p[4], p[5]], '4': [p[6], p[7]], '5': [p[8], p[9]]}xw
                reward = self.environment.evaluatePolicy(policy) # Action in Year 1 only
                print('Sequential Result : ', reward)
                tmp_reward.append(reward)
                tmp_policy.append(policy)
                tmp_single = []
                return reward + uniform(-turb, turb)
            
            # run
            engine.run(ng = generation)
            best_reward = max(tmp_reward)
            best_policy = tmp_policy[-pop_size]
        
        except (KeyboardInterrupt, SystemExit):
            print(exc_info())
        
        return best_policy, best_reward
Exemple #12
0
def main(x):
    # 1st r
    random.seed(1)
    l = np.asarray(x)
    print(l)

    _test_LDA(l)

    # 2nd method
    #another_method()
    jaccard(int(l[0]))
Exemple #13
0
 def querry(self, bt, tt):
     if not self.noise_amp:
         for i in range(bt, tt):
             yield self.reconstituted_wf[i % self.win_size]
     else:
         # a random seed is chosen for each i, this allows for consistency between queries
         # the drawback is that it slows down the iterator quite a bit: 30 times slower for 1 millions samples
         for i in range(bt, tt):
             random.seed(a=i)
             noise = random.random() * self.noise_amp
             yield self.reconstituted_wf[i % self.win_size] + noise
Exemple #14
0
    def _set_seed(seed: int) -> None:
        """
        Set experiment seed.

        Args:
            seed: seed

        """
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
Exemple #15
0
    def get_initial_centers_from_data_set(data, k):
        if CHOOSE_INITIAL_CENTERS_RANDOMLY:
            random.seed(8)
            return np.array(random.choices(data, k=k), dtype=np.float64)

        min_point = data.min(0)
        max_point = data.max(0)
        centers = []

        for i in range(k):
            centers.append(min_point + (max_point - min_point) / k)

        return centers
Exemple #16
0
    def __init__(self,
                 G,
                 expRate,
                 infRate,
                 recRate,
                 rngSeed=None,
                 tallyFuncs=None,
                 logSim=False):
        self.G = G
        self.expRate = expRate
        self.infRate = infRate
        self.recRate = recRate
        self.tallyFuncs = tallyFuncs
        self.logSim = logSim
        self.rngSeed = rngSeed

        self.nodeStates = np.zeros(len(G.nodes()))

        self.exposedList = []

        self.infectiousList = []
        self.siList = []

        if not self.rngSeed is None:
            random.seed(self.rngSeed)

        #Mark a single node as infectious
        validSourceNodes = np.where(self.nodeStates == 0)
        sourceNode = choice(validSourceNodes[0])
        self.sourceNode = sourceNode

        self.nodeStates[sourceNode] = 2
        self.infectiousList.append(sourceNode)
        for edge in G.edges(sourceNode):
            if self.nodeStates[edge[1]] == 0:
                self.siList.append(edge)

        #A list of simulation state arrays
        if self.logSim:
            self.simStates = []
            self.simState = [self.nodeStates.copy(), self.siList.copy()]
            self.simStates.append(self.simState)

        self.t = 0

        self.useTally = not tallyFuncs is None

        if (self.useTally):
            self.tallyStats = []
            self.recordTallyStats()
def genId(nodeIdList):
    collisionCheck = []
    #generation of ids is the privilege of the initiator only
    if(Initiator == True):
        #generate rand id for every node in nodelist    
        for node in nodeIdList: 
            random.seed([node.ip_addr]) 
            node.id = randint(9,10000000)
            #if the random number has already been generated
            while node.id in collisionCheck:
                node.id = randint(1000,10000000) 
            #append the newly generated unique id to the list     
            collisionCheck.append(node.id)
            
    return nodeIdList
Exemple #18
0
def act_as_baboon(my_id, init_side):
    side = init_side
    random.seed(my_id)
    global westers
    global easters
    for i in xrange(NUM_CROSSINGS):

        print (side)
        print (westers)
        print (easters)
        if side == 0:
            westqueue.acquire()
        else:
            eastqueue.acquire()

        with mutex:
            if (easters > westers) or (westers <= 0):
                #print ("east")
                eastqueue.release()
            elif (easters <= westers) or (easters <= 0):
                westqueue.release()
                #print ("west")

        #print("Got here")

        with turnstile:
            switches[side].lock(rope)
        with multiplex:
            sleep(random.random())  # crossing; Seeded random number
        switches[side].unlock(rope)
        with mutex2:
            if side == 0:
                easters += 1
                westers -= 1
            else:
                westers += 1
                easters -= 1
            side = 1 - side
            if (easters > westers) or (westers <= 0):
                #print ("east")
                eastqueue.release()
            elif (easters <= westers) or (easters <= 0):
                westqueue.release()
                #print ("west")

        #print("Got to run")
    print ("Baboon %d finished" % my_id)
Exemple #19
0
def generate_failure_network():
    g = nx.read_weighted_edgelist('edgelist/ninux/0', nodetype=int)
    index = 0
    for i in range(10):
        random.seed(1234)
        bc = nx.betweenness_centrality(g)
        max_node = (max(bc, key=bc.get))
        nodes = [(k, v) for k, v in bc.items()]
        nodes.sort(key=lambda x: x[1], reverse=True)
        node_key = nodes[15:25][random.randint(0, 10)][0]
        g.remove_node(node_key)
        #print(nx.number_connected_components(g))
        for j in range(4):
            nx.write_weighted_edgelist(g, 'edgelist/testdata/' + str(index))
            nx.write_weighted_edgelist(
                g, 'edgelist/testdata/' + str(40 * 2 - 1 - index))
            index += 1
Exemple #20
0
    def _gen_filenames(self):
        """
        返回要处理的文件的文件名

        :return: filenames list
        """
        filenames = tf.gfile.Glob(self.glob)

        # Shuffle the ordering of all image files in order to guarantee
        # random ordering of the images with respect to label in the
        # saved TFRecord files. Make the randomization repeatable.
        if self.shuffle:
            shuffled_index = list(range(len(filenames)))
            random.seed(12345)
            random.shuffle(shuffled_index)

            filenames = [filenames[i] for i in shuffled_index]

        print('Found %d  files ' % len(filenames))
        return filenames
def load_data(img_rows, img_cols):
    # img_rows, img_cols = 336, 456
    datPaths = []
    for root, dirs, files in os.walk(r"data/train", topdown=False):
        for name in files:
            if (name != ".DS_Store"):
                l = os.path.join(root, name)
                datPaths.append(l)
        for name in dirs:
            print(os.path.join(root, name))
    datPaths = sorted(datPaths)
    random.seed(2020)
    random.shuffle(datPaths)
    print(datPaths[0])
    #
    data = []
    for imagePath in datPaths:
        image = cv2.imread(imagePath, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (img_cols, img_rows))
        data.append(image)

    data = np.array(data, dtype="float")
    print(data.shape)
    print("[INFO] data matrix: {:.2f}MB".format(data.nbytes / (1024 * 1000.0)))

    train_data = data
    print(train_data.shape[0], ' samples')
    # 测试数据
    X_train = train_data.astype('float32')
    print('X_train shape:', X_train.shape)
    print(X_train[0], 'train samples')
    X_train = (X_train - 127.5) / 127.5

    X_train = X_train.reshape(-1, img_rows, img_cols, 1)
    print(X_train.shape, 'X_train.shape')
    return X_train
Exemple #22
0
from datetime import datetime
from csv import DictReader
from math import exp, log, sqrt
from random import random,shuffle
import pickle
import sys
from ngram import getUnigram
import string
import random
from config import path
import networkx as nx
seed =1024
random.seed(seed)


def prepare_graph(paths):
    G = nx.Graph()

    pr_dict = dict()
    for path in paths:
        print(path)
        c = 0
        start = datetime.now()

        for t, row in enumerate(DictReader(open(path), delimiter=',')): 
            if c%100000==0:
                print('finished',c)
            q1 = str(row['question1_hash'])
            q2 = str(row['question2_hash'])
            G.add_edge(q1,q2)
            c+=1
Exemple #23
0
from matplotlib.animation import FuncAnimation
from PIL import Image as im
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time
import os
import sys
import tkinter as tk
from datetime import datetime
from random import random
from graphics import *
from particle import *
import threading

random.seed(datetime.now())

img_dir = "imgs/"


def bounce(particles):
    color = ["white", "green", "yellow", "red", "blue", "orange", "pink"]

    for i in range(100):
        xi = engine.width / 2
        yi = engine.width / 2
        dxi = random.randint(-10, 10) * random.random()
        dyi = random.randint(-10, 10) * random.random()
        r = random.randint(0, 5)

        ball = Particle(engine.canvas,
Exemple #24
0
def _generate_vals(count, token):
    random.seed(token)
    return random.shuffle([1 for __ in range(count // 2)] + [-1 for __ in range(count // 2)])
    for i in range(N):
        r = random()
        n = randint(0, V // 2)  # 在个体的前半段随机选取一个点进行变异
        m = randint(V // 2, V - 1)  # 在个体的后半段随机选取一个点进行变异
        if r <= belta:
            if population[i][n] + 1 <= max_num:
                population[i][n] = population[i][n] + 1  # 整数编码,随机+1或者减1

            if population[i][m] - 1 >= 0:
                population[i][m] = population[i][m] - 1
    for i in range(N):
        m = len(set(population[i]))
        # print(m)
        if m < V:
            # print(population[i],old_population[i])
            population[i] = old_population[i]


# 以下是测试用例
if __name__ == "__main__":
    from numpy import random
    random.seed(0)
    xN = 5
    yN = 3
    belta = 1

    p = population(4, 7)
    print(p)
    mutation(p, belta, 29)
    print(p)
Exemple #26
0
        device = torch.device("cpu")
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if not os.path.exists(hyp_path):
        os.makedirs(hyp_path)
    if not os.path.exists(ref_path):
        os.makedirs(ref_path)

    print('load training data.')

    attnmap_type = args.alternative_attnmap
    print('Alternative attention type: %s' % (attnmap_type))

    if args.num_train_data != None:
        all_files = sorted(os.listdir(inputs_dir))
        random.seed(args.random_seed)
        idx = [
            random.randint(0,
                           len(all_files) - 1)
            for i in range(0, args.num_train_data)
        ]
        idx = [0]
        print(idx)
        inputs_dir = [inputs_dir + '/' + all_files[i] for i in idx]
        warmup_steps = 1000 * args.num_train_data // (batch_size) * 2
        print('Warmup steps: %d' % (warmup_steps))

    train = SummarizationDataset(inputs_dir,
                                 is_test=False,
                                 dataset_type='train',
                                 dataset=args.dataset,
Exemple #27
0
def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)
Exemple #28
0
def main(start_time):
    random.seed(1000)
    count1 = -1
    best_sw_archive = []

    nodes = create_nodes()  # create mesh
    links = create_mesh()  # create mesh
    traffic = load_traffic()  # load benchmark
    mesh_mean, mesh_dev = calc_params(nodes, links, traffic)
    elapsed = timeit.default_timer() - start_time
    best_sw = [1, 1, deepcopy(nodes), deepcopy(links), elapsed]  # normalizing 
    amosa_flag = 1

    for i in range(0, 5):
        v = random.randint(0, 10)
        for j in range(0, v):
            nodes, links = perturb(nodes, links)
        
        tm, td, = calc_params(nodes, links, traffic)
        tm = tm / mesh_mean
        td = td / mesh_dev
        elapsed = timeit.default_timer() - start_time
        sw = [tm, td, deepcopy(nodes), deepcopy(links), elapsed]
        best_sw_archive.append(sw)  # initializing the archive

    ################################################ Enter AMOSA ############################################

    temp=500
    z = 0
    el = timeit.default_timer() - start_time
    x = int((el / 1200) + 1) * 1200

    while 1:
        temp=temp*0.95
        ### cluster/prune if size becomes too big
        score=[]
        for i in range(0,len(best_sw_archive)):
			s=best_sw_archive[i][0]*0.7+best_sw_archive[i][1]*0.3 #custom scoring
			score.append(s)
        if len(best_sw_archive) > 30:
            b_num = numpy.array(score)
            b_ind = b_num.argsort()[:20]  # index of lowest 20 phv
            reduced_archive = []
            for i in range(0, len(b_ind)):
                reduced_archive.append(deepcopy(best_sw_archive[b_ind[i]]))
            best_sw_archive = deepcopy(reduced_archive)

        ### terminate
        z = z + 1
        if z == 2500: ## put stop condition here, currently just letting it run for ~infinity
            quit(0)

        r = random.randint(0, len(best_sw_archive) - 1)
        best_sw = deepcopy(best_sw_archive[r])  # best_sw is the randomly picked current pt

        for i in range(0, 500):  # make num_iter perturbations
            ### print out after some time e.g. every ~20 mins
            el = timeit.default_timer() - start_time
            if el > x:
                x=x+1200
                max_print=-1
                if len(best_sw_archive)>10:
                    max_print=10
                else:
                    max_print=len(best_sw_archive)
                for k in range(0, max_print): #printing to file
                    count1=count1+1
                    text_file = open("Output" + str(count1) + ".txt", "w")
                    text_file.write("mean= %f \n" % best_sw_archive[k][0])
                    text_file.write("dev= %f \n" % best_sw_archive[k][1])
                    text_file.write("timestamp= %f \n" % best_sw_archive[k][4])
                    for asd in range(0, 64):
                        text_file.write("%s, " % best_sw_archive[k][2][asd])
                        if asd % 16 == 15:
                            text_file.write("\n")
                    text_file.write("\n")
                    for asd in range(0, 64):
                        for j in range(0, 64):
                            text_file.write("%d, " % best_sw_archive[k][3][asd][j])
                        text_file.write("\n")
                    text_file.close()

            nodes = deepcopy(best_sw[2])
            links = deepcopy(best_sw[3])
            ### do perturbation
            nodes, links = perturb(nodes, links)
            tm, td = calc_params(nodes, links, traffic)
            tm = tm / mesh_mean
            td = td / mesh_dev
            elapsed = timeit.default_timer() - start_time
            new_sw = [tm, td, deepcopy(nodes), deepcopy(links), elapsed]

            # check dominance
            if best_sw[0] < new_sw[0] and best_sw[1] < new_sw[1]:  # current point dominates new point
                d=0
                c1=0
                for i in range(0,len(best_sw_archive)):
                    d_temp=delta_dom(best_sw_archive[i], new_sw)
                    if (d_temp!=0):
                        c1=c1+1
                    d = d+d_temp
                d=d+delta_dom(best_sw,new_sw)
                d_avg=d/(c1+1)
                
                m_factor=0
                if d_avg*temp>5:
                    m_factor=5
                else:
                    m_factor=d_avg*temp
                prob = float(1 / (1+2.718**m_factor))
                rp = random.random()
                if rp < prob:  # set new point as current point
                    best_sw = deepcopy(new_sw)

            elif new_sw[0] < best_sw[0] and new_sw[1] < best_sw[1]:  # new point dominates current point
                d=0
                for j in range(0,len(best_sw_archive)):
                    archive_point=[best_sw_archive[j][0],best_sw_archive[j][1]]
                    new_point=[new_sw[0],new_sw[1]]
                    if(dominates(archive_point,new_point,0)):
                        d=d+1

                if d > 1:  # new point dominated by k points in archive
                    min1=999
                    ind_min1=-1
                    for j in range(0, len(best_sw_archive)):
                        archive_point = [best_sw_archive[j][0], best_sw_archive[j][1]]
                        new_point = [new_sw[0], new_sw[1]]
                        if (dominates(archive_point, new_point, 0)):
                            dom=delta_dom(archive_point,new_point)
                            if dom<min1:
                                min1=dom
                                ind_min1=j

                    prob = float(1 / (2.718**(-min1)))
                    rp=random.random()
                    if rp < prob:  # set new point as current point
                        best_sw = deepcopy(best_sw_archive[ind_min1])
                    else:
                        best_sw = deepcopy(new_sw)

                else:  # new point either dominates or non-dominates other points
                    # anyone that is dominated by new point is bad
                    temp_archive = []
                    for j in range(0, len(best_sw_archive)):
                        if not (new_sw[0] < best_sw_archive[j][0] and new_sw[1] < best_sw_archive[j][1]):  # new point does not dominate these archive points
                            temp_archive.append(deepcopy(best_sw_archive[j]))
                    best_sw_archive = deepcopy(temp_archive)
                    best_sw_archive.append(deepcopy(new_sw))
                    best_sw = deepcopy(new_sw)

            else:  # non-dominance stand-off
                d = 0
                for j in range(0, len(best_sw_archive)):
                    archive_point = [best_sw_archive[j][0], best_sw_archive[j][1]]
                    new_point = [new_sw[0], new_sw[1]]
                    if (dominates(archive_point, new_point, 0)):
                        d = d + 1

                if d > 1:  # new point dominated by k points in archive
                    sum1=0
                    for i in range(0,len(best_sw_archive)):
                        archive_point = [best_sw_archive[j][0], best_sw_archive[j][1]]
                        new_point = [new_sw[0], new_sw[1]]
                        if (dominates(archive_point, new_point, 0)):
                            sum1=sum1+delta_dom(archive_point,new_point)
                    sum1=sum1/d #delta_dom_avg
                    
                    m_factor=0
                    if sum1*temp>5:
                        m_factor=5
                    else:
                        m_factor=sum1*temp
                    prob = float(1 / (1+2.718**(m_factor)))
                    rp=random.random()
                    if rp < prob:  # set new point as current point
                        best_sw = deepcopy(new_sw)
                else:  # new point either dominates or non-dominates other points
                    # anyone that is dominated by new point is bad
                    temp_archive = []
                    for j in range(0, len(best_sw_archive)):
                        if not (new_sw[0] < best_sw_archive[j][0] and new_sw[1] < best_sw_archive[j][1]):  # new point does not dominate these archive points
                            temp_archive.append(deepcopy(best_sw_archive[j]))
                    best_sw_archive = deepcopy(temp_archive)
                    best_sw_archive.append(deepcopy(new_sw))
                    best_sw = deepcopy(new_sw)
Exemple #29
0
    def perform(self,
                num_days,
                learner_type,
                context_structure=None,
                lower_bound_type=LowerBoundType.Hoeffding,
                context_generation_every_day=-1,
                debug_info=False,
                monitoring_on=True):
        if debug_info:
            print("\n--------- Starting experiment with " + learner_type.name +
                  " ---------")

        self.environment.restore(
        )  # Restore the environment (so that randomization doesn't affect different algorithms)
        np.random.seed(self.seed)
        random.seed(self.seed)

        ###############################################
        # Setup the experiment
        ###############################################

        # Setup the context
        day_length = sum(self.phase_lengths)
        if context_structure == None:
            context_structure = [day_length]

        # Instatiate DDA class with the selected matching algorithm
        Dda = DDA(Hungarian_algorithm())

        # Instantiate the main Graph
        graph = Graph()

        # Setup the monitoring for the experiment
        monitor = ExperimentMonitor(num_days, day_length, monitoring_on)

        ###############################################
        # Utility functions (NOTE: some are implemented as clojures)
        ###############################################

        # Build the Class_Algos from the ids of the Class_Envs (given a context structure)
        def build_contextualized_algo_classes(context_structure):
            contextualized_algo_classes = {
            }  # Dictionary to map rounds to corresponding algo_class (when using context)

            for (context_id, context) in enumerate(context_structure):
                left_classes_ids = [
                    c.id for c in self.environment.classes[0] if c.is_left
                ]
                right_classes_ids = [
                    c.id for c in self.environment.classes[0] if not c.is_left
                ]

                algo_classes = [
                    Class_Algo(id, True) for id in left_classes_ids
                ] + [Class_Algo(id, False) for id in right_classes_ids]

                for i in left_classes_ids:
                    for j in right_classes_ids:
                        distribution = Beta(
                        ) if learner_type == LearnerType.ThompsonSampling else UCB1(
                        )
                        class_edge = Class_Algo_Edge(distribution)
                        l_class = [c for c in algo_classes if c.id == i][0]
                        r_class = [c for c in algo_classes if c.id == j][0]
                        l_class.set_edge_data(r_class, class_edge)
                        r_class.set_edge_data(l_class, class_edge)

                for i in range(context):
                    round_id = i + sum(context_structure[:context_id])
                    contextualized_algo_classes[round_id] = algo_classes

            return contextualized_algo_classes

        def get_algo_class(contextualized_algo_classes, class_id, round_id):
            return [
                c for c in contextualized_algo_classes[round_id]
                if c.id == class_id
            ][0]

        def get_env_class(class_id, phase_id):
            return [
                c for c in self.environment.classes[phase_id]
                if c.id == class_id
            ][0]

        def update_UCB1_current_time(contextualized_algo_classes,
                                     iteration_number):
            for algo_classes in contextualized_algo_classes.values():
                for c in algo_classes:
                    for ed in c.edge_data.values():
                        ed.distribution.current_time = iteration_number

        def is_beginning_of_context(round_id, context_structure):
            if len(context_structure) <= 1:
                return False

            for context in context_structure:
                if round_id == 0:
                    return True
                round_id -= context

            return False

        ###############################################
        # Main experiment loop
        ###############################################

        contextualized_algo_classes = build_contextualized_algo_classes(
            context_structure)

        rewards_by_context = {
        }  # Save all the reward data with context labels (i.e. round_id and class_id pair)
        all_rewards = []

        iteration_number = 0

        for day in range(num_days):  # For every day the experiment is run
            if debug_info:
                print("------ Day " + str(day + 1) + " ------")

            round_id = 0

            for (phase_id, phase_length) in enumerate(
                    self.phase_lengths):  # For every phase of the day
                # print("---- Phase " + str(phase_id + 1) + " ----")

                for _ in range(phase_length):  # For every round in the phase
                    # print("-- Round " + str(round_id) + " --")

                    iteration_number += 1
                    round_reward = 0

                    # Sample new nodes from the environment
                    new_nodes = self.environment.get_new_nodes(phase_id)

                    # Experiment monitoring
                    monitor.new_nodes_added(day, round_id, new_nodes)

                    # Add those new nodes to the graph (mapping the id returned by the environment into the correct Class_Algo)
                    for (class_id, time_to_stay) in new_nodes:
                        node_class = get_algo_class(
                            contextualized_algo_classes, class_id, round_id)
                        graph.add_node(node_class, time_to_stay)

                    # Experiment monitoring
                    monitor.graph_size_pre_matching(day, round_id,
                                                    len(graph.nodes),
                                                    len(graph.edges))

                    # Update the distribution used by each edge to match the current context structure
                    if len(context_structure) > 1 and learner_type in [
                            LearnerType.ThompsonSampling, LearnerType.UCB1
                    ]:
                        for node in graph.nodes:
                            node_class = get_algo_class(
                                contextualized_algo_classes,
                                node.node_class.id, round_id)
                            node.node_class = node_class

                    # Update the estimates of the weights of the graph
                    if learner_type == LearnerType.ThompsonSampling:
                        # beta sample
                        graph.update_weights(
                            is_beginning_of_context(round_id,
                                                    context_structure))
                    elif learner_type == LearnerType.UCB1:
                        # UCB1 bound
                        update_UCB1_current_time(contextualized_algo_classes,
                                                 iteration_number)
                        graph.update_weights(
                            is_beginning_of_context(round_id,
                                                    context_structure))
                    elif learner_type == LearnerType.Clairvoyant:
                        # Update the clairvoyant graph with the real weights
                        for edge in graph.edges:
                            node1_env_class = get_env_class(
                                edge.node1.node_class.id, phase_id)
                            edge_data = node1_env_class.edge_data[
                                edge.node2.node_class.id]
                            edge.weight = edge_data.weight_distribution.p * edge_data.constant_weight
                    elif learner_type == LearnerType.ContextEvaluation:
                        # Update the graph with the Hoeffding lower bound estimated from old data
                        for edge in graph.edges:
                            edge_context = (round_id,
                                            min(edge.node1.node_class.id,
                                                edge.node2.node_class.id),
                                            max(edge.node1.node_class.id,
                                                edge.node2.node_class.id))
                            lower_bound = self.edge_lower_bounds[edge_context]
                            edge.weight = lower_bound

                    # Whenever a node is going to exit the experiment run the DDA (Deferred Dynamic Acceptance) algorithm
                    if len(graph.edges) > 0 and Dda.is_there_critical_node(
                            graph.nodes):
                        matching_edges, full_matching_edges = Dda.perform_matching(
                            graph)

                        # Experiment monitoring
                        monitor.matching_performed(day, round_id,
                                                   matching_edges,
                                                   full_matching_edges)

                        # Given the results of DDA (if and what nodes to match), actually perform the matching
                        for edge in matching_edges:

                            if learner_type in [
                                    LearnerType.ThompsonSampling,
                                    LearnerType.UCB1
                            ]:

                                # Draw rewards and update distributions for each matching performed
                                matching_result, matching_weight = self.environment.get_reward(
                                    edge.node1.node_class.id,
                                    edge.node2.node_class.id, phase_id)
                                reward = matching_result * matching_weight

                                # Experiment monitoring
                                monitor.reward_collected(
                                    day, round_id, phase_id,
                                    edge.node1.node_class.id,
                                    edge.node2.node_class.id, matching_result,
                                    matching_weight)

                                # Save contextualized reward
                                reward_context = (
                                    round_id,
                                    min(edge.node1.node_class.id,
                                        edge.node2.node_class.id),
                                    max(edge.node1.node_class.id,
                                        edge.node2.node_class.id))
                                if reward_context not in rewards_by_context:
                                    rewards_by_context[reward_context] = []
                                rewards_by_context[reward_context].append(
                                    (matching_result, matching_weight))

                            elif learner_type in [
                                    LearnerType.Clairvoyant,
                                    LearnerType.ContextEvaluation
                            ]:
                                # For clairvoyant algorithms there is no need to sample rewards from the environment
                                reward = edge.weight

                                # Experiment monitoring
                                monitor.reward_collected(
                                    day, round_id, phase_id,
                                    edge.node1.node_class.id,
                                    edge.node2.node_class.id, 1, reward)

                            round_reward += reward

                            node1_class = get_algo_class(
                                contextualized_algo_classes,
                                edge.node1.node_class.id, round_id)
                            edge_data = node1_class.edge_data[
                                edge.node2.node_class.id]

                            if learner_type == LearnerType.ThompsonSampling:
                                # TS update
                                edge_data.distribution.update_parameters(
                                    [matching_result, 1 - matching_result])
                                # Update estimate of constant weight
                                edge_data.update_estimated_weight(
                                    matching_weight)
                            elif learner_type == LearnerType.UCB1:
                                # UCB1 update
                                edge_data.distribution.update_parameters(
                                    matching_result)
                                # Update estimate of constant weight
                                edge_data.update_estimated_weight(
                                    matching_weight)

                            # Remove matched nodes from the graph
                            graph.remove_node(edge.node1)
                            graph.remove_node(edge.node2)

                    # Run the end_round routine of the graph, to update the time_to_stay for each node
                    graph.end_round_routine()

                    # Experiment monitoring
                    monitor.graph_size_post_matching(day, round_id,
                                                     len(graph.nodes),
                                                     len(graph.edges))

                    all_rewards.append(round_reward)

                    round_id += 1

                # End of phase

            # Context generation
            if context_generation_every_day > 0 and (
                    day + 1) % context_generation_every_day == 0:
                if debug_info:
                    print(
                        "----- Generating new optimal context structure -----")

                all_context_structures = generate_context_structures(
                    day_length, self.min_phase_length)

                env_copy = self.environment.copy()
                context_generation_exp = Experiment(env_copy,
                                                    self.phase_lengths,
                                                    self.min_phase_length)

                def evaluate_context_structure(context_structure):
                    # Build lower bounds on expected reward per edge
                    left_classes_ids = [
                        c.id for c in env_copy.classes[0] if c.is_left
                    ]
                    right_classes_ids = [
                        c.id for c in env_copy.classes[0] if not c.is_left
                    ]
                    context_generation_exp.edge_lower_bounds = {}
                    for (context_id, context) in enumerate(context_structure):
                        for left_id in left_classes_ids:
                            for right_id in right_classes_ids:
                                rewards = []

                                for i in range(context):
                                    round_id = i + sum(
                                        context_structure[:context_id])
                                    context_key = (round_id,
                                                   min(left_id, right_id),
                                                   max(left_id, right_id))

                                    if context_key in rewards_by_context:
                                        rewards += rewards_by_context[
                                            context_key]

                                if len(rewards) > 0:
                                    # Hoeffding lower bound
                                    bernoulli_rewards = list(
                                        map(lambda el: el[0], rewards))
                                    weight_rewards = list(
                                        map(lambda el: el[1], rewards))
                                    bernoulli_mean = np.mean(bernoulli_rewards)
                                    weight_mean = np.mean(weight_rewards)
                                    hoeffding_bound = np.sqrt(
                                        -np.log(0.05) /
                                        (2 * len(bernoulli_rewards)))
                                    # Gaussian lower bound
                                    full_rewards = list(
                                        map(lambda el: el[0] * el[1], rewards))
                                    mean_reward = np.mean(full_rewards)
                                    reward_std = np.std(full_rewards)
                                    n = len(full_rewards)
                                    z = 1.96  # for a 95% confidence interval
                                    gaussian_bound = mean_reward - (
                                        z * (reward_std / np.sqrt(n)))
                                    # Gaussian lower bound on weight
                                    gaussian_weight_bound = weight_mean - (
                                        z *
                                        (np.std(weight_rewards) / np.sqrt(n)))

                                    if lower_bound_type == LowerBoundType.Hoeffding:
                                        total_lower_bound = weight_mean * (
                                            bernoulli_mean - hoeffding_bound)
                                    elif lower_bound_type == LowerBoundType.Gaussian:
                                        total_lower_bound = gaussian_bound
                                    elif lower_bound_type == LowerBoundType.Hybrid:
                                        total_lower_bound = gaussian_weight_bound * (
                                            bernoulli_mean - hoeffding_bound)
                                    total_lower_bound = max(
                                        0, total_lower_bound)
                                else:
                                    total_lower_bound = 0  # minus infinity

                                for i in range(context):
                                    round_id = i + sum(
                                        context_structure[:context_id])
                                    context_key = (round_id,
                                                   min(left_id, right_id),
                                                   max(left_id, right_id))
                                    context_generation_exp.edge_lower_bounds[
                                        context_key] = total_lower_bound

                    rewards, _ = context_generation_exp.perform(
                        day + 1,
                        LearnerType.ContextEvaluation,
                        context_structure,
                        monitoring_on=False)

                    if debug_info:
                        print("-- Context structure " +
                              str(context_structure) +
                              " has an expected reward of " +
                              str(sum(rewards)))

                    return sum(rewards)

                best_context_structure = max(all_context_structures,
                                             key=evaluate_context_structure)
                context_structure = best_context_structure

                if debug_info:
                    print("Best context structure is " +
                          str(best_context_structure))

                # Experiment monitoring
                monitor.context_generation_performed(day,
                                                     best_context_structure)

                contextualized_algo_classes = build_contextualized_algo_classes(
                    best_context_structure)

                # Re-feed old data to the newly built algo_classes
                for ((round_id, left_class_id, right_class_id),
                     results) in rewards_by_context.items():
                    for (matching_result, matching_weight) in results:
                        left_class = get_algo_class(
                            contextualized_algo_classes, left_class_id,
                            round_id)
                        edge_data = left_class.edge_data[right_class_id]

                        if learner_type == LearnerType.ThompsonSampling:
                            # TS update
                            edge_data.distribution.update_parameters(
                                [matching_result, 1 - matching_result])
                        elif learner_type == LearnerType.UCB1:
                            # UCB1 update
                            edge_data.distribution.update_parameters(
                                matching_result)

                        edge_data.update_estimated_weight(matching_weight)

            # End of day

        return all_rewards, monitor
 def __init__(self):
     random.seed()
     self.hypha = []
 def _attack(self, manager, turn):
     random.seed(str(manager.json) + str(turn))
     if random() < self._percent_chance_attack:
         self.bucket.attacked = True
Exemple #32
0
    # http://www.coppeliarobotics.com/helpFiles/en/remoteApiConstants.htm
    opmode = vrep.simx_opmode_oneshot_wait

    # Try to retrieve motors and robot handlers
    # http://www.coppeliarobotics.com/helpFiles/en/remoteApiFunctionsPython.htm#simxGetObjectHandle
    ret1, wristHandle = vrep.simxGetObjectHandle(clientID, "WristMotor",
                                                 opmode)
    ret2, elbowHandle = vrep.simxGetObjectHandle(clientID, "ElbowMotor",
                                                 opmode)
    ret3, shoulderHandle = vrep.simxGetObjectHandle(clientID, "ShoulderMotor",
                                                    opmode)
    ret4, robotHandle = vrep.simxGetObjectHandle(clientID, "2W1A", opmode)

    # If handlers are OK, execute three random simulations
    if ret1 == 0 and ret2 == 0 and ret3 == 0:
        random.seed()
        for i in range(0, 3):
            # Start the simulation
            # http://www.coppeliarobotics.com/helpFiles/en/remoteApiFunctionsPython.htm#simxStartSimulation
            vrep.simxStartSimulation(clientID, opmode)
            print("----- Simulation started -----")

            # Start getting the robot position
            # Unlike other commands, we will use a streaming operating mode
            # http://www.coppeliarobotics.com/helpFiles/en/remoteApiFunctionsPython.htm#simxGetObjectPosition
            pret, robotPos = vrep.simxGetObjectPosition(
                clientID, robotHandle, -1, vrep.simx_opmode_streaming)
            print ("2w1a position: (x = " + str(robotPos[0]) +\
                  ", y = " + str(robotPos[1]) + ")")

            # Start getting the robot orientation
Exemple #33
0
    def train(self, epochs=1, log_dir='log', dataset_split=0.1):
        # tokenizer
        self.tokenizing(mode='train', dataset_split=dataset_split)

        writer = SummaryWriter(log_dir)

        model = self.model
        optimizer = AdamW(model.parameters(), lr=3e-5, eps=1e-8)

        # 총 훈련 스텝 : 배치반복 횟수 * 에폭
        total_steps = len(self.train_dataloader) * epochs

        # 학습률을 조금씩 감소시키는 스케줄러 생성
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=total_steps)

        # 재현을 위해 랜덤시드 고정
        seed_val = 42
        random.seed(seed_val)
        np.random.seed(seed_val)
        torch.manual_seed(seed_val)
        torch.cuda.manual_seed_all(seed_val)

        # 그래디언트 초기화
        model.zero_grad()

        # 에폭만큼 반복
        for epoch_i in range(0, epochs):
            print("")
            print('======== Epoch {:} / {:} ========'.format(
                epoch_i + 1, epochs))
            print('Training...')
            t0 = time.time()
            total_loss = 0
            train_accuracy, nb_train_steps = 0, 0

            model.train()

            # 데이터로더에서 배치만큼 반복하여 가져옴
            for step, batch in enumerate(self.train_dataloader):
                # 경과 정보 표시
                if step % 100 == 0 and not step == 0:
                    elapsed = format_time(time.time() - t0)
                    print(
                        '  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(
                            step, len(self.train_dataloader), elapsed))

                batch = tuple(t.to(self.device) for t in batch)  # 배치를 GPU에 넣음
                b_input_ids, b_input_mask, b_labels = batch  # 배치에서 데이터 추출
                outputs = model(b_input_ids,
                                token_type_ids=None,
                                attention_mask=b_input_mask,
                                labels=b_labels)  # Forward 수행
                loss = outputs[0]  # 로스 구함
                total_loss += loss.item()  # 총 로스 계산

                loss.backward()  # Backward 수행으로 그래디언트 계산
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               1.0)  # 그래디언트 클리핑
                optimizer.step()  # 그래디언트를 통해 가중치 파라미터 업데이트

                scheduler.step()  # 스케줄러로 학습률 감소
                model.zero_grad()  # 그래디언트 초기화

                ##accuracy
                logits = outputs[1]
                # CPU로 데이터 이동
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                # 출력 로짓과 라벨을 비교하여 정확도 계산
                train_accuracy += flat_accuracy(logits, label_ids)
                nb_train_steps += 1

            # 평균 로스 계산
            avg_train_loss = total_loss / len(self.train_dataloader)

            print("")
            print("  Train loss: {0:.2f}, Train Accuracy: {1:.2f}".format(
                avg_train_loss, train_accuracy / nb_train_steps))
            print("  Training epcoh took: {:}".format(
                format_time(time.time() - t0)))

            # ========================================
            #               Validation
            # ========================================

            print("")
            print("Running Validation...")

            t0 = time.time()
            model.eval()

            # 변수 초기화
            eval_loss, eval_accuracy = 0, 0
            nb_eval_steps = 0
            labels_accuracy, preds_accuracy = [], []

            # 데이터로더에서 배치만큼 반복하여 가져옴
            for batch in self.validation_dataloader:
                batch = tuple(t.to(self.device) for t in batch)
                b_input_ids, b_input_mask, b_labels = batch

                with torch.no_grad():
                    outputs = model(b_input_ids,
                                    token_type_ids=None,
                                    attention_mask=b_input_mask)

                logits = outputs[0]
                # CPU로 데이터 이동
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                # 출력 로짓과 라벨을 비교하여 정확도 계산
                tmp_eval_accuracy = flat_accuracy(logits, label_ids)
                eval_accuracy += tmp_eval_accuracy
                nb_eval_steps += 1

                labels_accuracy.append(label_ids.flatten())
                preds_accuracy.append(np.argmax(logits, axis=1).flatten())

            print("  Validation Accuracy: {0:.2f}".format(eval_accuracy /
                                                          nb_eval_steps))
            print("  Validation took: {:}".format(format_time(time.time() -
                                                              t0)))

            # precision and recall
            labels_accuracy = [y for x in labels_accuracy
                               for y in x]  # list flatten
            preds_accuracy = [y for x in preds_accuracy for y in x]
            print(classification_report(labels_accuracy, preds_accuracy))

            writer.add_scalar('Avg_loss(training)', avg_train_loss,
                              epoch_i + 1)
            writer.add_scalars(
                'Accuracy', {
                    'Train': train_accuracy / nb_train_steps,
                    'Val': eval_accuracy / nb_eval_steps
                }, epoch_i + 1)

            if (epoch_i + 1) % 3 == 0 and (
                    epoch_i + 1) != epochs:  ##마지막 iteration은 아래에서 수행.
                save_path = os.path.join(self.save_path, str(epoch_i + 1))
                if not os.path.exists(save_path): os.makedirs(save_path)

                model.save_pretrained(save_path)

        print("")
        print("Training complete!")
        writer.close()

        save_path = os.path.join(self.save_path, str(epochs))
        if not os.path.exists(save_path): os.makedirs(save_path)

        model.save_pretrained(save_path)
def make_ssa_synthetic(fname='data/Baby-Names-SSA.csv'):
    # Repeatable random
    import random
    random.seed(1)

    # Date for age calculation
    now = 2020

    # Population adjustment by year in 20Ms
    population = np.linspace(5, 16, 100)
    years = np.linspace(1919, 2018, 100, dtype=int)
    year_to_pop = dict(zip(years, population))

    # Rank to count
    rank_to_freq = {'1': 1.0, '2': 0.9, '3': 0.8, 
                    '4': 0.7, '5': 0.6}

    # Read the rank popularity of names by year
    df = pd.read_csv('data/Baby-Names-SSA.csv')
    df = df.set_index('Year').sort_index()
    unstack = df.unstack()

    # Random features
    colors = ('Red', 'Green', 'Blue', 
              'Yellow', 'Purple', 'Black')
    flowers = ('Daisy', 'Orchid', 'Rose', 
               'Violet', 'Lily')

    # Python list-of-lists to construct new data
    rows = []
    for (rank_gender, year), name in unstack.iteritems():
        rank, gender = rank_gender
        age = now - year
        count = int(year_to_pop[year] *
                    rank_to_freq[rank])
        for _ in range(count):
            color = random.choice(colors)
            flower = random.choice(flowers)
            rows.append(
                (age, gender, name, color, flower))

    df = pd.DataFrame(rows)
    df.columns = ('Age', 'Gender', 'Name',
                  'Favorite_Color', 'Favorite_Flower')
    df = df.sample(frac=0.8, random_state=1)
    df.to_parquet('data/usa_names_all.parq', index=False)
    
    # Add age-correlated flower preference
    old = df[df.Age > 70].sample(frac=0.2, random_state=1)
    df.loc[old.index, 'Favorite_Flower'] = 'Orchid'
    young =df[df.Age < 30].sample(frac=0.1, random_state=1)
    df.loc[young.index, 'Favorite_Flower'] = 'Rose'
    
    # Make some data missing selectively by age
    # Missing color for all but forty 30-40 yos
    drop = df[(df.Age > 30) & (df.Age <= 40)].index[:-40]
    df.loc[drop, 'Favorite_Color'] = None

    # Missing flower for all but forty 20-30 yos
    drop = df[(df.Age > 20) & (df.Age <= 30)].index[:-40]
    df.loc[drop, 'Favorite_Flower'] = None
    
    # Jumble the order but keep all rows then write
    df = df.sample(frac=1.0, random_state=1)
    df.to_parquet('data/usa_names.parq', index=False)
Exemple #35
0
def fibonacci_sphere(samples, rseed):
    # http://stackoverflow.com/a/26127012/1243487
    rnd = 5.
    random.seed(rseed)
    rnd = random.random() * samples
Exemple #36
0
# maximum number of terms in q0
MAX_SEQUENCE_LENGTH = 15

WORD_EMBEDDING_PATH = "wsj-collection-vectors"

METRIC = " -mMAP@40"

annealing_steps = 10000.
start_eps = 1.0
end_eps = 0.1
eps = start_eps
stepDrop = (start_eps - end_eps) / annealing_steps

PADDING = np.zeros(WORD_VECTOR_DIMENSIONS)

random.seed(500)

# HYPERPARAMETERS:
atire.init("atire -a " + ASSESSMENT_FILE + METRIC)


def write_to_file(filename, information):
    """

    :param filename:
    :param v: a 2d list of training information
    :return:
    """
    with open(filename, "a") as f:
        message = ""
        for row in information:
Exemple #37
0
def _generate_indices(bound, count, token):
    random.seed(token)
    return random.sample(range(bound), count)
Exemple #38
0
 def __getitem__(self, idx):
     random.seed(a=idx)
     return self.reconstituted_wf(idx % self.win_size) + random.random() * self.noise_amp
Exemple #39
0
                for stat, value in data.items():
                    sampled_data[stat] = "{0}|@{1}".format(value, sample_rate)
                return sampled_data
        return {}

    @staticmethod
    def send(_dict, addr):
        """
        Sends key/value pairs via UDP.

        >>> StatsdClient.send({"example.send":"11|c"}, ("127.0.0.1", 8125))
        """
        # TODO(rbtz@): IPv6 support
        udp_sock = socket(AF_INET, SOCK_DGRAM)
        # TODO(rbtz@): Add batch support
        for item in _dict.items():
            print(":".join(item).encode('utf-8'))
            udp_sock.sendto(":".join(item).encode('utf-8'), addr)

if __name__ == "__main__":
    import random
    import time
    random.seed(int(time.time()))
    client = StatsdClient()

    for i in xrange(random.randrange(100000)):
        client.timing("stats.sample.timing", random.randrange(500))

    for i in xrange(random.randrange(100000)):
        client.count("stats.sample.count", random.randrange(500))