コード例 #1
0
def stoL(astring, mix=False):
    wd = astring.split(' ')
    rt = []
    for w in wd:
        rt.append(w)
    if mix: random.shuffle(rt)
    return rt  #list for now.. #made this a string too tho.....
コード例 #2
0
def cutData(originPath, trainPath, validPath, scale=0.9):

    origin = open(originPath, 'r', encoding='utf-8', errors='ignore')
    train = open(trainPath, 'w', encoding='utf-8', errors='ignore')
    valid = open(validPath, 'w', encoding='utf-8', errors='ignore')

    originReader = csv.reader(origin)
    trainWriter = csv.writer(train)
    validWriter = csv.writer(valid)

    #去除首行
    total = list(originReader)[1:]

    order = list(range(len(total)))
    random.shuffle(order)

    trainData = [
        total[order[index]] for index in order[:int(scale * len(total))]
    ]
    validData = [
        total[order[index]] for index in order[int(scale * len(total)):]
    ]

    for element in trainData:
        trainWriter.writerow(element)

    for element in validData:
        validWriter.writerow(element)

    origin.close()
    train.close()
    valid.close()
コード例 #3
0
ファイル: algorithms.py プロジェクト: optNGUI/Projekt-KI
 def select():
     cnt = 2
     sum_fit = sum([fitness(ind) for ind in population])
     pop = population[:]
     random.shuffle(pop)
     pairs = []
     for i in range(cnt):
         n = random.uniform(0, sum_fit)
         m = random.uniform(0, sum_fit)
         tmp_sum = 0
         first, second = None, None
         for ind in pop:
             tmp_sum += fitness(ind)
             if tmp_sum >= n:
                 first = ind
                 break
         tmp_sum = 0
         for ind in pop:
             tmp_sum += fitness(ind)
             if tmp_sum >= m:
                 second = ind
                 break
         if first is None or second is None:
             print("FAILED")
             raise RuntimeError("Ooops")
         pairs.append((first, second))
     print(str(pairs))
     return pairs
コード例 #4
0
def trueBalance():
    import random
    list1 = ["tr","ue","bal","ance"]
    list2 = ["everything","you","want","nothing","you","cant"]
    random.shuffle(list1)
    random.shuffle(list2)
    print("{}{} {}{}: {} {} {} {} {} {}".format(list1[0],list1[1],list1[2],list1[3],list2[0],list2[1],list2[2],list2[3],list2[4],list2[5]))
コード例 #5
0
ファイル: dataset.py プロジェクト: argosopentech/onmt-models
def trim_to_length_random(source, target, length):
    """Trim data to a max of length.

    Data is shuffled in place if over limit.

    Args:
        source (collections.deque): Source data
        target (collections.deque): Target data
        length (int): Trim to length

    Returns:
        (collections.deque, collections.deque): Trimmed data
    """
    if length == None:
        return (source, target)
    else:
        # Randomly select data to use if over length
        if length < len(source):
            zipped_data = list(zip(source, target))
            random.shuffle(zipped_data)
            source = [x[0] for x in zipped_data]
            target = [x[1] for x in zipped_data]
        source = source[:length]
        target = target[:length]
        return (source, target)
コード例 #6
0
def photometric_distort(image):
    """
    Distort brightness, contrast, saturation, and hue, each with a 50% chance, in random order.
    :param image: image, a PIL Image
    :return: distorted image
    """
    new_image = image

    distortions = [
        FT.adjust_brightness, FT.adjust_contrast, FT.adjust_saturation,
        FT.adjust_hue
    ]

    random.shuffle(distortions)

    for d in distortions:
        if random.random() < 0.5:
            if d.__name__ is 'adjust_hue':
                # Caffe repo uses a 'hue_delta' of 18 - we divide by 255 because PyTorch needs a normalized value
                adjust_factor = random.uniform(-18 / 255., 18 / 255.)
            else:
                # Caffe repo uses 'lower' and 'upper' values of 0.5 and 1.5 for brightness, contrast, and saturation
                adjust_factor = random.uniform(0.5, 1.5)

            # Apply this distortion
            new_image = d(new_image, adjust_factor)

    return new_image
def data_gen(img_folder, batch_size, shuffle=True):
  c = 0
  n = os.listdir(img_folder) #List of training images
  random.shuffle(n)
  X_labels, Y_labels = read_csv_labels()
  
  while (True):         
    img = np.zeros((batch_size, DEFAULT_HEIGHT, DEFAULT_WIDTH, 3)).astype('float')
    mask = np.zeros((batch_size, DEFAULT_HEIGHT//STRIDE, DEFAULT_WIDTH//STRIDE, NUM_JOINTS)).astype('float')

    for i in range(c, c+batch_size): #initially from 0 to 16, c = 0. 

      train_img = cv2.imread(img_folder+'/'+n[i])/255.
      train_img =  cv2.resize(train_img, (DEFAULT_HEIGHT, DEFAULT_WIDTH))# Read an image from folder and resize
      
      img[i-c] = train_img #add to array - img[0], img[1], and so on.       
                                                   
      # extract the number of the image from the string name      
      id_img = get_ID(n[i])
    
  
      x = X_labels[id_img,:].astype('int')    
      y = Y_labels[id_img,:].astype('int')
      train_mask = create_mask(x,y,EPSILON)
      train_mask = cv2.resize(train_mask, (DEFAULT_HEIGHT//STRIDE, DEFAULT_WIDTH//STRIDE))
      #train_mask = train_mask.reshape(512, 512, 1) # Add extra dimension for parity with train_img size [512 * 512 * 3]

      mask[i-c] = train_mask

    c+=batch_size
    if(c+batch_size>=len(os.listdir(img_folder))):
      c=0
      random.shuffle(n)
                  # print "randomizing again"
    yield img, mask
コード例 #8
0
def lexicase(population):
    """Find the best indidvidual by lexicase from a sub-population."""
    test_set = getTrainingSet()
    candidates = []
    for i in range(LEXICASE_SIZE):
        x = randint(0, int(POPULATION_SIZE) - 1)
        candidates.append(x)
    test_cases = []
    for k in range(CASES):
        l = randint(0, len(test_set) - 1)
        test_cases.append(l)

    test_cases = list(zip(a, b))

    random.shuffle(c)

    a, b = zip(*c)

    while True:
        case = test_set[test_cases[0]]
        best_on_first_case = [
            c for c in candidates if population[c].getTrainingPredictions()[
                test_cases[0]] == case[-1]
        ]
        if len(best_on_first_case) > 0: candidates = best_on_first_case
        if len(candidates) == 1: return population[0]
        del test_cases[0]
        if len(test_cases) == 0:
            index = choice(candidates)
            return population[index]
コード例 #9
0
def main(args):
    '''

    :param args:
    :return:none , output is  a dir includes 3 .txt files
    '''
    [train_, val_, test_] = args.proportion
    out_num = args.num
    if train_ + val_ + test_ - 1. > 0.01:  #delta
        print('erro')
        return

    if args.reference:
        ref_df = pd.read_csv(args.reference, index_col='scences')
        print('load refs ok')
    else:
        ref_df = None

    out_dir = Path(args.out_dir)
    out_dir.mkdir_p()
    train_txt_p = out_dir / 'train.txt'
    val_txt_p = out_dir / 'val.txt'
    test_txt_p = out_dir / 'test.txt'

    dataset_path = Path(args.dataset_path)
    trajs = dataset_path

    item_list = []  #

    # filtering and combination
    scenes = trajs.dirs()
    scenes.sort()  #blocks
    scenes = scene_fileter(scenes)
    for scene in scenes:

        files = scene.files()
        files.sort()
        files = file_fileter(args.dataset_path, files, ref_df)
        item_list += files

    #list constructed
    random.seed(args.rand_seed)
    random.shuffle(item_list)
    if out_num and out_num < len(item_list):
        item_list = item_list[:out_num]

    for i in range(len(item_list)):
        item_list[i] = item_list[i].relpath(dataset_path)

    length = len(item_list)
    train_bound = int(length * args.proportion[0])
    val_bound = int(length * args.proportion[1]) + train_bound
    test_bound = int(length * args.proportion[2]) + val_bound

    print(" train items:{}\n val items:{}\n test items:{}".format(
        len(item_list[:train_bound]), len(item_list[train_bound:val_bound]),
        len(item_list[val_bound:test_bound])))
    writelines(item_list[:train_bound], train_txt_p)
    writelines(item_list[train_bound:val_bound], val_txt_p)
    writelines(item_list[val_bound:test_bound], test_txt_p)
コード例 #10
0
ファイル: map.py プロジェクト: pranayspeed/CleanerBot
def random_matrix(no_rows, no_cols, no_obs):
    arr = []
    for i in range(no_rows * no_cols):
        if i < no_obs:
            arr.append(1)
        else:
            arr.append(0)

    random.shuffle(arr)

    start_position = {'x': 0, 'y': 0}
    rand_pos = random.randint(0, no_rows * no_cols - no_obs - 1)

    matrix = []
    count = 0
    for i in range(no_rows):
        row = []
        for j in range(no_cols):
            row.append(arr[i * no_cols + j])
            if arr[j] == 0:
                if count == rand_pos:
                    start_position = {'x': j, 'y': i}
                count += 1
        matrix.append(row)
    return matrix, start_position
コード例 #11
0
ファイル: v1NN.py プロジェクト: eshafeeqe/Projects
    def get_filelist_dict(self, face_list):
        """ Parse list of face images into a filelist dictionary
         
        Inputs:
            face_list -- a list of paths to face images (list) 
        
        Outputs:
            filelists_dict -- a dictionary of image paths organized by category (dict)
        """
    # -- Organize images into the appropriate categories
        cats = {}
        for f in face_list:
            cat = "/".join(f.split('/')[:-1])
            name = f.split('/')[-1]
            if cat not in cats:
                cats[cat] = [name]
            else:
                cats[cat] += [name]

                
        # -- Shuffle the images into a new random order
        filelists_dict = {}
        seed = 1
        for cat in cats:
            filelist = cats[cat]
            if self._rand_gallery:
                random.seed(seed)
                random.shuffle(filelist)
                seed += 1
            filelist = [ cat + '/' + f for f in filelist ]
            filelists_dict[cat] = filelist
            
        return filelists_dict
コード例 #12
0
def make_steps(step, ampl):
    """
    Perform training epochs
    @param step Number of epochs to perform
    @param ampl the K, the randomized component of the score matrix.
    """
    global w2ts, t2i, steps, features, score, histories

    # shuffle the training pictures
    random.shuffle(train)
    # Map whale id to the list of associated training picture hash value
    w2ts = {}
    for w, hs in w2hs.items():
        for h in hs:
            if h in train_set:
                if w not in w2ts: w2ts[w] = []
                if h not in w2ts[w]: w2ts[w].append(h)
    for w, ts in w2ts.items(): w2ts[w] = np.array(ts)

    # Map training picture hash value to index in 'train' array
    t2i = {}
    for i, t in enumerate(train): t2i[t] = i

    # Compute the match score for each picture pair
    features, score = compute_score()

    # Train the model for 'step' epochs
    history = model.fit_generator(
        TrainingData(score + ampl * np.random.random_sample(size=score.shape), steps=step, batch_size=64),
        initial_epoch=steps, epochs=steps + step, max_queue_size=12, workers=6, verbose=2,
        callbacks=[checkpoint]
    ).history
    steps += step
コード例 #13
0
    def test03_ThreadedTransactions(self):
        if verbose:
            print('\n', '-=' * 30)
            print("Running %s.test03_ThreadedTransactions..." % \
                  self.__class__.__name__)

        keys = list(range(self.records))
        import random
        random.shuffle(keys)
        records_per_writer = self.records // self.writers
        readers_per_writer = self.readers // self.writers
        self.assertEqual(self.records, self.writers * records_per_writer)
        self.assertEqual(self.readers, self.writers * readers_per_writer)
        self.assertTrue((records_per_writer % readers_per_writer) == 0)

        readers = []
        for x in range(self.readers):
            rt = Thread(
                target=self.readerThread,
                args=(self.d, x),
                name='reader %d' % x,
            )  #verbose = verbose)
            if sys.version_info[0] < 3:
                rt.setDaemon(True)
            else:
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in range(self.writers):
            a = keys[records_per_writer * x:records_per_writer * (x + 1)]
            b = readers[readers_per_writer * x:readers_per_writer * (x + 1)]
            wt = Thread(
                target=self.writerThread,
                args=(self.d, a, b),
                name='writer %d' % x,
            )  #verbose = verbose)
            writers.append(wt)

        dt = Thread(target=self.deadlockThread)
        if sys.version_info[0] < 3:
            dt.setDaemon(True)
        else:
            dt.daemon = True
        dt.start()

        for t in writers:
            if sys.version_info[0] < 3:
                t.setDaemon(True)
            else:
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join()

        self.doLockDetect = False
        dt.join()
コード例 #14
0
ファイル: ai_memes.py プロジェクト: schesa/ai-memes
def random_data_generator(photos, wordtoidx, max_length, num_descriptions_per_batch):
  # x1 - Training data for photos
  # x2 - The caption that goes with each photo
  # y - The predicted rest of the caption
  x1, x2, y = [], [], []
  while True:
    random.shuffle(train_data)
    d=0
    for key, desc in train_data:
      photo = photos[key+'.jpg']
      d+=1
      if d==num_descriptions_per_batch:
        yield ([np.array(x1), np.array(x2)], np.array(y))
        x1, x2, y = [], [], []
        d=0
      # Convert each word into a list of sequences.
      seq = [wordtoidx[word] for word in desc.split(' ') if word in wordtoidx]
      # Generate a training case for every possible sequence and outcome
      for i in range(1, len(seq)):
        in_seq, out_seq = seq[:i], seq[i]
        in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
        out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
        x1.append(photo)
        x2.append(in_seq)
        y.append(out_seq)
コード例 #15
0
ファイル: algorithms.py プロジェクト: optNGUI/Projekt-KI
 def select():
     cnt = 2
     sum_fit = sum([fitness(ind) for ind in population])
     pop = population[:]
     random.shuffle(pop)
     pairs = []
     for i in range(cnt):
         n = random.uniform(0, sum_fit)
         m = random.uniform(0, sum_fit)
         tmp_sum = 0
         first, second = None, None
         for ind in pop:
             tmp_sum += fitness(ind)
             if tmp_sum >= n:
                 first = ind
                 break
         tmp_sum = 0
         for ind in pop:
             tmp_sum += fitness(ind)
             if tmp_sum >= m:
                 second = ind
                 break
         if first is None or second is None:
             print("FAILED")
             raise RuntimeError("Ooops")
         pairs.append((first, second))
     print(str(pairs))
     return pairs
コード例 #16
0
ファイル: DataReader.py プロジェクト: gabrielcnr/languages
 def _put_all_data_pointers(self):
     fnames = os.listdir(self._train_data_location)
     logging.warn("Put all data pointers {}..{}".format(
         fnames[0:5], fnames[-5:]))
     for _ in range(self.N_SHUFFLE):
         random.shuffle(fnames)
         for fname in fnames:
             self._dp_queue.put(fname)
コード例 #17
0
 def shuffle(self):
     """
     Returns a random shuffling of the array.
     :rtype: List[int]
     """
     # random.shuffle returns None
     random.shuffle(self.nums)
     return self.nums
コード例 #18
0
    def balanced_sample(self):

        for i in range(self.batchnum):
            for j in self.pool:
                p = self.pool[j]
                imgpath = random.sample(p, 1)[0]
                self.cadidates.append([imgpath, j])
        random.shuffle(self.cadidates)
コード例 #19
0
ファイル: control.py プロジェクト: Sach-y/WargameControl
 def map_random_rotate(self):
     """Rotate maps from the pool"""
     Server.change_map_settings(self.map_pool[self.currentMapId])
     print("Rotating map to " + self.map_pool[self.currentMapId]["mapName"])
     self.currentMapId += 1
     if self.currentMapId == len(self.map_pool):
         self.currentMapId = 0
         random.shuffle(self.map_pool)
コード例 #20
0
ファイル: test_thread.py プロジェクト: ksikora/system
    def test03_ThreadedTransactions(self):
        if verbose:
            print('\n', '-=' * 30)
            print("Running %s.test03_ThreadedTransactions..." % \
                  self.__class__.__name__)

        keys=list(range(self.records))
        import random
        random.shuffle(keys)
        records_per_writer=self.records//self.writers
        readers_per_writer=self.readers//self.writers
        self.assertEqual(self.records,self.writers*records_per_writer)
        self.assertEqual(self.readers,self.writers*readers_per_writer)
        self.assertTrue((records_per_writer%readers_per_writer)==0)

        readers=[]
        for x in range(self.readers):
            rt = Thread(target = self.readerThread,
                        args = (self.d, x),
                        name = 'reader %d' % x,
                        )#verbose = verbose)
            if sys.version_info[0] < 3 :
                rt.setDaemon(True)
            else :
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in range(self.writers):
            a=keys[records_per_writer*x:records_per_writer*(x+1)]
            b=readers[readers_per_writer*x:readers_per_writer*(x+1)]
            wt = Thread(target = self.writerThread,
                        args = (self.d, a, b),
                        name = 'writer %d' % x,
                        )#verbose = verbose)
            writers.append(wt)

        dt = Thread(target = self.deadlockThread)
        if sys.version_info[0] < 3 :
            dt.setDaemon(True)
        else :
            dt.daemon = True
        dt.start()

        for t in writers:
            if sys.version_info[0] < 3 :
                t.setDaemon(True)
            else :
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join()

        self.doLockDetect = False
        dt.join()
コード例 #21
0
ファイル: Assignment 1.py プロジェクト: simonmdsn/AL-DS_2020
def makeListFromNumberOfParameters(n, shuffle, reverse):
    listen = []
    for i in range(n - 1):
        listen.append(i)
    if reverse:
        listen.reverse()
    if shuffle:
        random.shuffle(listen)
    return listen
コード例 #22
0
ファイル: mock.py プロジェクト: chbrandt/booq
def generate_ids(number_objects):
    '''
    Generate 'number_objects' IDs
    '''
    from numpy import arange,random
    N = number_objects
    ids = arange(1,2*N)
    random.shuffle(ids)
    return ids[:N]
コード例 #23
0
ファイル: dataset.py プロジェクト: argosopentech/onmt-models
 def data(self, length=None):
     if self.shuffled_dataset == None:
         source, target = self.dataset.data()
         zipped_data = list(zip(source, target))
         random.shuffle(zipped_data)
         shuffled_source = [x[0] for x in zipped_data]
         shuffled_target = [x[1] for x in zipped_data]
         self.shuffled_dataset = Dataset(shuffled_source, shuffled_target)
     return self.shuffled_dataset.data()
コード例 #24
0
ファイル: perceptron.py プロジェクト: Krolov18/Gloses
    def train(self, train_data, test_data, iter_max):
        import random, sys

        for i in range(iter_max):
            for (vecteur, classe) in train_data:
                self.update(classe, self.predict(features=vecteur), vecteur)
            print("ACCURACY\tI\tTRAIN\tTEST", file=sys.stderr)
            print("\t" + "\t".join((i, self.evaluate(train_data), self.evaluate(test_data))), file=sys.stderr)
            random.shuffle(train_data)
コード例 #25
0
ファイル: models.py プロジェクト: CheyneWilson/chess
 def create_game(self, player_1_username, player_2_username):
     u"""
     Create a new game with the black and white player determined at random.
     """
     # The default Mersenne Twister random should be sufficient for this
     players = [player_1_username, player_2_username]
     random.shuffle(players)
     game_model = self.create(white_player=players[0], black_player=players[1])
     return game_model
コード例 #26
0
def agita():
    import random
    lista = []
    for i in range(5):
        lista.append(input('Introduce una cadena:'))

    random.shuffle(lista)

    for i in lista:
        print(i)
コード例 #27
0
def tinder(age,gender):
    import random
    list = [-500,-50,-34,-27,0,-4,-78]
    random.shuffle(list)
    if age < 18:
        print("Jailbait!")
    if age >= 18 and gender == "female":
        print("hey, you wanna see my dick?")
    if age >= 18 and gender == "male":
        print("you have {} matches".format(list[0]))
コード例 #28
0
ファイル: game.py プロジェクト: damien-c-d/party-room-bot-v4
 def choose_word(self):
     if self.trivia:
         self.trivia_questions.sort()
         for _ in range(4):
             random.shuffle(self.trivia_questions)
         return random.choice(self.trivia_questions)
     else:
         self.words.sort()
         for _ in range(4):
             random.shuffle(self.words)
         return random.choice(self.words)
コード例 #29
0
ファイル: game.py プロジェクト: slmaaa/CatchProject
 def assignTeam(self):
     teaming = []
     random.shuffle(self.players)
     players_per_team = round(len(self.players) / (len(self.teams) + 0.5))
     for team_number in range(len(self.teams)):
         teaming += players_per_team * [self.teams[team_number]]
     for player, team in zip(self.players, teaming):
         player.team = team
     for index, player in enumerate(self.players):
         player.key = index
     self.status = "PREPARE"
コード例 #30
0
    def test01_1WriterMultiReaders(self):
        if verbose:
            print '\n', '-=' * 30
            print "Running %s.test01_1WriterMultiReaders..." % \
                  self.__class__.__name__

        keys = range(self.records)
        import random
        random.shuffle(keys)
        records_per_writer = self.records // self.writers
        readers_per_writer = self.readers // self.writers
        self.assertEqual(self.records, self.writers * records_per_writer)
        self.assertEqual(self.readers, self.writers * readers_per_writer)
        self.assertTrue((records_per_writer % readers_per_writer) == 0)
        readers = []

        for x in xrange(self.readers):
            rt = Thread(
                target=self.readerThread,
                args=(self.d, x),
                name='reader %d' % x,
            )  #verbose = verbose)
            import sys
            if sys.version_info[0] < 3:
                rt.setDaemon(True)
            else:
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in xrange(self.writers):
            a = keys[records_per_writer * x:records_per_writer * (x + 1)]
            a.sort()  # Generate conflicts
            b = readers[readers_per_writer * x:readers_per_writer * (x + 1)]
            wt = Thread(
                target=self.writerThread,
                args=(self.d, a, b),
                name='writer %d' % x,
            )  #verbose = verbose)
            writers.append(wt)

        for t in writers:
            import sys
            if sys.version_info[0] < 3:
                t.setDaemon(True)
            else:
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join()
コード例 #31
0
    def getResult(self,
                  query,
                  domains=[],
                  beginDate='',
                  endDate='',
                  title=False,
                  snippet=True,
                  fullContent=False):
        import time
        start_time = time.time()
        if not (domains):
            domains = ['']
        else:
            random.shuffle(domains)

        with Pool(processes=multiprocessing.cpu_count()) as pool:
            results_by_domain = pool.starmap(
                self.getResultsByDomain,
                zip(domains, repeat(query), repeat(beginDate),
                    repeat(endDate)))

        results_flat_list = list(chain.from_iterable(results_by_domain))

        try:
            results_by_domain.remove({})
        except:
            pass

        with Pool(processes=multiprocessing.cpu_count()) as pool:
            result = pool.starmap(
                format_output,
                zip(results_flat_list, repeat(self.newspaper3k), repeat(title),
                    repeat(snippet), repeat(fullContent)))
        domains_list = [item[1] for item in result]
        filter_domains_list = list(dict.fromkeys(domains_list))

        docs_info_list = [item[0] for item in result]

        all_results = []

        for dominio_list in [
                dominio_list for dominio_list in results_by_domain
                if dominio_list is not None
        ]:
            all_results.extend(dominio_list)

        total_time = time.time() - start_time
        statistical_dict = search_statistics(total_time, len(docs_info_list),
                                             len(filter_domains_list),
                                             filter_domains_list)
        final_output = [statistical_dict, docs_info_list]

        return final_output
コード例 #32
0
ファイル: AS02.py プロジェクト: hmgans/DataMiningHW
def minHashFunction(t, s1, s2):

    info1 = s1.keys()
    info2 = s2.keys()

    dictionary = dict()

    for i in info1:
        dictionary[i] = i
    for i in info2:
        dictionary[i] = i

    vector1 = dictionary.copy()
    vector2 = dictionary.copy()

    # create vectors
    for i in dictionary:
        if(i in info1):
            vector1[i] = 1
        else:
            vector1[i] = 0
        if(i in info2):
            vector2[i] = 1
        else:
            vector2[i] = 0

    listOfKGrams = []
    for i in dictionary.keys():
        listOfKGrams.append(i)
    #perform calculations
    totalSim = 0
    scalar = float(1)/t
    # Shuffle the list of grams and then find columns with that contain the same first gram
    for i in range(t):
        random.shuffle(listOfKGrams)
        first1 = None
        first2 = None
        for j in range(len(listOfKGrams)):
            gram = listOfKGrams[j]

            if vector1[gram] == 1 and first1 is None:
                first1 = gram
            if(vector2[gram] == 1 and first2 is None):
                first2 = gram

            if(first1 is not None and first2 is not None):
                break

        if(first1 == first2):
            totalSim += float(scalar) # Add the scalar for simularity when the first in a column are the same.
        first1 = None
        first2 = None
    return totalSim
コード例 #33
0
ファイル: tagging.py プロジェクト: wsrtka/Hyperreal
def build_corpus(sents, train_r=0.7, dev_r=0.15):
    tl = int(train_r * len(sents))
    dl = int(dev_r * len(sents))

    s = sents.copy()
    random.shuffle(s)

    train = flair.SentenceDataset(s[:tl])
    dev = flair.SentenceDataset(s[tl:tl+dl])
    test = flair.SentenceDataset(s[tl+dl:])

    return flair.Corpus(train, dev, test)
コード例 #34
0
ファイル: test_thread.py プロジェクト: DecipherOne/Troglodyte
    def test01_1WriterMultiReaders(self):
        if verbose:
            print '\n', '-=' * 30
            print "Running %s.test01_1WriterMultiReaders..." % \
                  self.__class__.__name__

        keys=range(self.records)
        import random
        random.shuffle(keys)
        records_per_writer=self.records//self.writers
        readers_per_writer=self.readers//self.writers
        self.assertEqual(self.records,self.writers*records_per_writer)
        self.assertEqual(self.readers,self.writers*readers_per_writer)
        self.assertTrue((records_per_writer%readers_per_writer)==0)
        readers = []

        for x in xrange(self.readers):
            rt = Thread(target = self.readerThread,
                        args = (self.d, x),
                        name = 'reader %d' % x,
                        )#verbose = verbose)
            import sys
            if sys.version_info[0] < 3 :
                rt.setDaemon(True)
            else :
                rt.daemon = True
            readers.append(rt)

        writers=[]
        for x in xrange(self.writers):
            a=keys[records_per_writer*x:records_per_writer*(x+1)]
            a.sort()  # Generate conflicts
            b=readers[readers_per_writer*x:readers_per_writer*(x+1)]
            wt = Thread(target = self.writerThread,
                        args = (self.d, a, b),
                        name = 'writer %d' % x,
                        )#verbose = verbose)
            writers.append(wt)

        for t in writers:
            import sys
            if sys.version_info[0] < 3 :
                t.setDaemon(True)
            else :
                t.daemon = True
            t.start()

        for t in writers:
            t.join()
        for t in readers:
            t.join()
コード例 #35
0
def split_data(file2idx, name2idx, val_ratio=0.1):
    '''
    划分数据集,val需保证每类至少有1个样本
    :param file2idx:文件的标签id
    :param val_ratio:验证集占总数据的比例
    :return:训练集,验证集路径
    '''
    data = set(os.listdir(config.train_dir))
    #  去重
    new_data = set()
    dv = set()
    for i in data:
        file_path = os.path.join(config.train_dir, i)
        with open(file_path, 'r', encoding='utf-8') as fr:
            ss = fr.read()
        if ss in dv:
            continue  # 去重
        elif name2idx['窦性心律'] in file2idx[i] and name2idx[
                '窦性心律不齐'] in file2idx[i]:
            continue  # 去除同时出现窦性心律和窦性心律不齐
        else:
            new_data.add(i)
            dv.add(ss)
    data = new_data

    for i in data:
        #  部分数据标记为不完全性右束支传导阻滞或完全性右束支传导阻滞,却没标记为右束支传导阻滞
        if (name2idx['不完全性右束支传导阻滞'] in file2idx[i] or name2idx['完全性右束支传导阻滞'] in file2idx[i]) \
                and name2idx['右束支传导阻滞'] not in file2idx[i]:
            file2idx[i].append(name2idx['右束支传导阻滞'])
        #  部分数据标记为完全性左束支传导阻滞,却没标记为左束支传导阻滞
        if name2idx['完全性左束支传导阻滞'] in file2idx[i] \
                and name2idx['右束支传导阻滞'] not in file2idx[i]:
            file2idx[i].append(name2idx['左束支传导阻滞'])
            file2idx[i].append(name2idx['左束支传导阻滞'])

    val = set()
    idx2file = [[] for _ in range(config.num_classes)]
    for file in data:
        for idx in file2idx[file]:
            idx2file[idx].append(file)

    for item in idx2file:
        # print(len(item), item)
        num = int(len(item) * val_ratio)
        # 乱序
        random.shuffle(item)
        val = val.union(item[:num])
    train = data.difference(val)

    return list(train), list(val)
コード例 #36
0
ファイル: speedyGA.py プロジェクト: burjorjee/speedyGApy
def staircaseFunctionEvolve(length, numSteps, order, delta, sigma, probMutation, probCrossover, popSize, maxGens):
    L = arange(length)
    random.shuffle(L)
    L=L[:order*numSteps]
    L.shape=(-1,order)
    V=ones(L.shape, dtype='int8')
    evolve(partial(staircaseFunction, L=L, V=V, delta=delta, sigma=sigma),
        length,
        popSize,
        maxGens,
        probMutation,
        probCrossover=probCrossover,
        visualizeGen=partial(staircaseFunctionVisualize,L=L, figNum=3),
        visualizeRun=partial(visualizeRun, figNum=4))
コード例 #37
0
ファイル: test_thread.py プロジェクト: webiumsk/WOT-0.9.12
    def test02_SimpleLocks(self):
        if verbose:
            print "\n", "-=" * 30
            print "Running %s.test02_SimpleLocks..." % self.__class__.__name__
        keys = range(self.records)
        import random

        random.shuffle(keys)
        records_per_writer = self.records // self.writers
        readers_per_writer = self.readers // self.writers
        self.assertEqual(self.records, self.writers * records_per_writer)
        self.assertEqual(self.readers, self.writers * readers_per_writer)
        self.assertTrue(records_per_writer % readers_per_writer == 0)
        readers = []
        for x in xrange(self.readers):
            rt = Thread(target=self.readerThread, args=(self.d, x), name="reader %d" % x)
            if sys.version_info[0] < 3:
                rt.setDaemon(True)
            else:
                rt.daemon = True
            readers.append(rt)

        writers = []
        for x in xrange(self.writers):
            a = keys[records_per_writer * x : records_per_writer * (x + 1)]
            a.sort()
            b = readers[readers_per_writer * x : readers_per_writer * (x + 1)]
            wt = Thread(target=self.writerThread, args=(self.d, a, b), name="writer %d" % x)
            writers.append(wt)

        for t in writers:
            if sys.version_info[0] < 3:
                t.setDaemon(True)
            else:
                t.daemon = True
            t.start()

        for t in writers:
            t.join()

        for t in readers:
            t.join()
コード例 #38
0
ファイル: core.py プロジェクト: nanestev/asapvideo
 def _randomize(self, iteration):
     random.shuffle(self._expressions)
     return next(iter(self._expressions))
コード例 #39
0
ファイル: utils.py プロジェクト: jglara/algothink
def random_order(graph):
    l= graph.keys()
    random.shuffle(l)
    return l
コード例 #40
0
ファイル: speedyGA.py プロジェクト: burjorjee/speedyGApy
def evolve(fitnessFunction,
            length,
            popSize,
            maxGens,
            probMutation,
            probCrossover=1,
            sigmaScaling=True,
            sigmaScalingCoeff=1,
            SUS=True,
            visualizeGen=visualizeGen,
            visualizeRun=visualizeRun):

    maskReposFactor = 5
    uniformCrossoverMaskRepos = rand(popSize/2, (length+1)*maskReposFactor) < 0.5
    mutMaskRepos = rand(popSize, (length+1)*maskReposFactor) < probMutation

    avgFitnessHist = zeros(maxGens+1)
    maxFitnessHist = zeros(maxGens+1)

    pop = zeros((popSize, length), dtype='int8')
    pop[rand(popSize, length)<0.5] = 1

    for gen in xrange(maxGens):

        fitnessVals = fitnessFunction(pop)
        fitnessVals = transpose(fitnessVals)
        maxFitnessHist[gen] = fitnessVals.max()
        avgFitnessHist[gen] = fitnessVals.mean()

        print "gen = %.3d   avgFitness = %3.3f  maxfitness = %3.3f" % (gen, avgFitnessHist[gen], maxFitnessHist[gen])
        if visualizeGen:
            visualizeGen(pop, gen=gen, avgFitness=avgFitnessHist[gen], maxFitness=maxFitnessHist[gen])
        if sigmaScaling:
            sigma = std(fitnessVals)
            if sigma:
                fitnessVals = 1 + (fitnessVals - fitnessVals.mean()) / (sigmaScalingCoeff * sigma)
                fitnessVals[fitnessVals<0] = 0
            else:
                fitnessVals = ones(1,popSize)

        cumNormFitnessVals = cumsum(fitnessVals/fitnessVals.sum())
        if SUS:
            markers = random.random() + arange(popSize,dtype='float')/popSize
            markers[markers>1] = markers[markers >1] - 1
        else:
            markers = rand(1, popSize)
        markers = sort(markers)
        parentIndices = zeros(popSize, dtype='int16')
        ctr = 0
        for idx in xrange(popSize):
            while markers[idx]>cumNormFitnessVals[ctr]:
                ctr += 1
            parentIndices[idx] = ctr
        random.shuffle(parentIndices)

        # deterimine the first parents of each mating pair
        firstParents = pop[parentIndices[0:popSize/2],:]
        # determine the second parents of each mating pair
        secondParents = pop[parentIndices[popSize/2:],:]

        temp = floor(random.random() * length * maskReposFactor-1)
        masks = uniformCrossoverMaskRepos[:, temp:temp+length]
        reprodIndices = rand(popSize/2)<1-probCrossover
        masks[reprodIndices, :] = False
        firstKids = firstParents
        firstKids[masks] = secondParents[masks]
        secondKids = secondParents
        secondKids[masks] = firstParents[masks]
        pop = vstack((firstKids, secondKids))

        temp = floor(random.random()*length*(maskReposFactor-1))
        masks = mutMaskRepos[:, temp:temp+length]
        pop[masks] = pop[masks] + 1
        pop = remainder(pop, 2)

    visualizeRun(avgFitnessHist, maxFitnessHist)
コード例 #41
0
ファイル: labels.py プロジェクト: DruidGreeneyes/pyrivet_core
def _generate_vals(count, token):
    random.seed(token)
    return random.shuffle([1 for __ in range(count // 2)] + [-1 for __ in range(count // 2)])
コード例 #42
0
ファイル: ex.py プロジェクト: caiknife/test-python-project
def process_all_in_random_order(data, process):
    data = list(data)
    random.shuffle(data)
    for elem in data:
        process(data)