Exemple #1
0
    def sample (self, percent):
        """Sample the tags for a given percentage.

        Warning: the current object is changed!
        """
        self.total = 0
        for key in self._locations.keys():
            plus = self._locations[key][0]
            plus_ind = self._indexes[key][0]
            total_plus = len(plus)
            num = int(total_plus*percent)
            ind_tokeep = sorted(random_sample(xrange(total_plus), num))
            self._locations[key][0] = array(BYTE4, (plus[i] for i in ind_tokeep))
            total_unique = 0
            self._indexes[key][0] = array(BYTE4, [])
            pappend = self._indexes[key][0].append
            for i in ind_tokeep:
                pappend(plus_ind[i])
                if plus_ind[i] == 0:
                    total_unique += 1
            
            minus = self._locations[key][1]
            minus_ind = self._indexes[key][1]
            total_minus = len(minus)
            num = int(total_minus*percent)
            ind_tokeep = sorted(random_sample(xrange(total_minus), num))
            self._locations[key][1] = array(BYTE4, (minus[i] for i in ind_tokeep))
            self._indexes[key][1] = array(BYTE4, [])
            mappend = self._indexes[key][1].append
            for i in ind_tokeep:
                mappend(minus_ind[i])
                if minus_ind[i] == 0:
                    total_unique += 1

            self.total += total_unique
Exemple #2
0
    def sample (self, percent):
        """Sample the tags for a given percentage.

        Warning: the current object is changed!
        """
        self.total = 0
        for key in self.__locations.keys():
            num = int(len(self.__locations[key][0])*percent)
            self.__locations[key][0]=array(BYTE4,sorted(random_sample(self.__locations[key][0],num)))
            num = int(len(self.__locations[key][1])*percent)
            self.__locations[key][1]=array(BYTE4,sorted(random_sample(self.__locations[key][1],num)))
            self.total += len(self.__locations[key][0]) + len(self.__locations[key][1])
Exemple #3
0
    def get_tuples(self, num_positive_samples, num_negative_samples):
        """It generates a list of tuples (Anchor, Sample, [0|1]) for each image.

        Arguments:
            num_positive_samples {int} -- The number of positive samples.
            num_negative_samples {int} -- The number of negative samples.

        Returns:
            [(string, string, int)] -- A list of sample tuples.
        """

        self._logger.info(
                        'Recieved parameters:: num_positive_samples: %d num_negative_samples: %d',
                        num_positive_samples,
                        num_negative_samples)

        #Images
        images = self._get_images()

        #Label images
        labelled_images = self._get_labelled_images()

        samples = []
        for label, label_images in tqdm(labelled_images.items(), desc = 'Generating input tuples', total = len(labelled_images)):
            #Available samples
            n_avail_positive_samples = min(num_positive_samples, len(label_images))
            n_avail_negative_samples = num_negative_samples

            #Candidates for negative sample
            negative_sample_candidates = images - set(label_images)

            for anchor in label_images:
                #Positive samples
                positive_samples = random_sample(label_images, n_avail_positive_samples)

                #Negative samples
                negative_samples = random_sample(negative_sample_candidates, n_avail_negative_samples)

                #Positive sample tuple
                for p_sample in positive_samples:
                    sample = (anchor, p_sample, 1)
                    samples.append(sample)

                #Negative sample tuple
                for n_sample in negative_samples:
                    sample = (anchor, n_sample, 0)
                    samples.append(sample)

        #Pandas DataFrame
        tuple_df = DataFrame(samples, columns = self._output_df_cols)

        return tuple_df
Exemple #4
0
    def get_triplets(self, num_samples):
        """It generates a list of triplets (Anchor, Positive, Negative) for each image.

        Arguments:
            num_samples {int} -- The number of samples per image

        Returns:
            [(string, string, string)] -- A list of sample triplets
        """

        self._logger.info('Parameters:: num_samples: %d', num_samples)

        #Images
        images = self._get_images()

        #Label images
        labelled_images = self._get_labelled_images()

        #Triplet placeholder
        samples = []

        for _, label_images in tqdm(labelled_images.items(), desc = 'Generating input triplets', total = len(labelled_images)):
            #Available samples
            n_avail_samples = min(num_samples, len(label_images))

            #Candidates for negative sample
            negative_sample_candidates = images - label_images

            for anchor in label_images:
                #Positive samples
                positive_samples = random_sample(label_images, n_avail_samples)

                #Negative samples
                negative_samples = random_sample(negative_sample_candidates, n_avail_samples)

                #Shuffle samples
                random_shuffle(positive_samples)
                random_shuffle(negative_samples)

                for index in range(n_avail_samples):
                    #Create a sample
                    sample = (anchor, positive_samples[index], negative_samples[index])

                    #Append sample to the sample list
                    samples.append(sample)

        #Pandas DataFrame
        triplets = DataFrame(samples, columns = self._output_df_cols)

        return triplets
Exemple #5
0
    def sample(self, percent):
        """Sample the tags for a given percentage.

        Warning: the current object is changed!
        """
        self.total = 0
        for key in self.__locations.keys():
            num = int(len(self.__locations[key][0]) * percent)
            self.__locations[key][0] = array(
                BYTE4, sorted(random_sample(self.__locations[key][0], num)))
            num = int(len(self.__locations[key][1]) * percent)
            self.__locations[key][1] = array(
                BYTE4, sorted(random_sample(self.__locations[key][1], num)))
            self.total += len(self.__locations[key][0]) + len(
                self.__locations[key][1])
def calculate_field_type(field_name, field_values, field_position, num_fields, num_samples=100, random=True):
    '''
    For each field, returns highest-scoring field type of first num_samples non-empty
    instances.
    '''
    # # Convert to str and drop NAs for type detection
    field_values = field_values.dropna().apply(unicode)

    num_samples = min(len(field_values), num_samples)
    field_sample = random_sample(field_values, num_samples) if random else field_values[:num_samples]

    type_scores_from_name = get_type_scores_from_field_name(field_name, num_samples=num_samples)
    type_scores_from_values = get_type_scores_from_field_values(field_sample)

    # Combine type score dictionaries
    final_type_scores = defaultdict(int)
    for t, score in type_scores_from_name.iteritems():
        final_type_scores[t] += score
    for t, score in type_scores_from_values.iteritems():
        final_type_scores[t] += score

    # Normalize field scores
    score_tuples = []
    normalized_type_scores = {}
    total_score = sum(final_type_scores.values())
    if total_score:
        for type_name, score in final_type_scores.iteritems():
            score_tuples.append((type_name, score))
            normalized_type_scores[type_name] = float(score) / total_score

        final_field_type = max(score_tuples, key=lambda t: t[1])[0]
        return (final_field_type, normalized_type_scores)
    else:
        return (DT.STRING.value, normalized_type_scores)
Exemple #7
0
    def clean(self):
        ram = self.cleaned_data.get('ram', None)
        cores = self.cleaned_data.get('cores', None)

        if cores:
            cores = random_sample(range(0, cpu_count()), cores)
            cores = ','.join(map(str, cores))
            self.cleaned_data['cores'] = cores
        else:
            cores = cpu_count()
            cores = str(list(range(0, cores))).strip('[]').replace(" ", "")
            self.cleaned_data['cores'] = cores

        if ram:
            ram = int(ram)
            self.cleaned_data['ram'] = ram
        else:
            ram = int(DHost.memory('total'))
            self.cleaned_data['ram'] = ram

        if self.ssh_users:
            for ssh_user in self.ssh_users:
                user_obj = User.objects.get(email=ssh_user)
                if not user_obj.ssh_pub_key:
                    raise forms.ValidationError("SSH key not found")
        return self.cleaned_data
Exemple #8
0
def vehicle_brand(n: int = 1, data_only: bool = True) -> list:
    """Obtenha o nome de marca(s) de veículo(s).

    Parameters
    ----------
    n
        Recebe o número de marcas de veículos a ser gerado. O valor mínimo é 1 e o máximo é 87.
    """

    if not (1 <= n <= 87):
        msg_error = f'The n value "{n}" is invalid. Enter a valid number of UF.'
        msg_error += f' The range is 1 to 27 UF code.'

        raise ValueError(msg_error)

    full_data = {
        'msg':
        'success',
        'data':
        random_sample(
            [v_brand['brand_name'] for v_brand in ALL_VEHICLE_BRANDS.values()
             ],  # Create a list brand name
            n)
    }

    if data_only:
        return full_data['data']
    else:
        return full_data
Exemple #9
0
    def sample (self, percent):
        """Sample the tags for a given percentage.

        Warning: the current object is changed!
        
        Side effect: self.total_unique is set to None, and counts are unset.
        """
        self.total = 0
        self.total_unique = None
        for key in self._locations.keys():
            num = int(len(self._locations[key][0])*percent)
            self._locations[key][0]=array(BYTE4,sorted(random_sample(self._locations[key][0],num)))
            num = int(len(self._locations[key][1])*percent)
            self._locations[key][1]=array(BYTE4,sorted(random_sample(self._locations[key][1],num)))
            self.total += len(self._locations[key][0]) + len(self._locations[key][1])
            self._counts[key] = [[],[]]
Exemple #10
0
    def handle(self) -> dict:
        players_list = self.persistent_remember("lastGamePlayersList", list)
        indexes_players_that_participated = self.persistent_remember(
            "IndexesPlayersThatParticipatedInCurrentDecisionRound", list)

        if indexes_players_that_participated is not None:
            cleaned_players_list = list()
            for i_player, player_dict in enumerate(players_list):
                if i_player not in indexes_players_that_participated:
                    cleaned_players_list.append(player_dict)
            players_list = cleaned_players_list

        if players_list is not None:
            from random import sample as random_sample
            for i_player, player_dict in enumerate(
                    random_sample(players_list, len(players_list))):
                player_name = player_dict[
                    "name"] if "name" in player_dict.keys() else i_player
                self.say(f"{player_name} quelle est votre décision ?")
                self.memorize_session_then_state(DecisionStateHandler)
                indexes_players_that_participated.append(i_player)
                self.persistent_memorize(
                    "IndexesPlayersThatParticipatedInCurrentDecisionRound",
                    indexes_players_that_participated)
                # todo: fix that and memorize not when asking to take the decision, but when taking it
                return self.to_platform_dict()

        self.say("Super, toutes les décisions ont été prises.")
        return self.to_platform_dict()
Exemple #11
0
    def get_ky_own_create_article_id_list(self):
        """
        获取ky article_list
        :return:
        """
        article_id_list = [
            str(article_id) for article_id in range(self.ky_min_article_id,
                                                    self.ky_max_article_id)
        ]

        # 截取
        article_id_list = random_sample(article_id_list, self.ky_intercept_num)
        res = [{
            'uid':
            get_uuid3(target_str='{}::{}'.format('ky', article_id)),
            'article_type':
            'ky',
            'title':
            '未知',
            'article_id':
            article_id,
            'article_url':
            'https://www.kaiyanapp.com/detail.html?vid={}'.format(article_id),
        } for article_id in article_id_list]

        return res
Exemple #12
0
def vehicle_brand(n: int = 1, data_only: bool = True) -> list:
    """Random generation of vehicle brand.

    Keyword arguments:

    `n: int` - A number of vehicle brand for generate random code. The range is of 1 to 87.

    `data_only: bool` - If True, return data only. If False, return msg and data.
    """

    # Check if number of UF is invalid. If true, raise exception.
    if not (1 <= n <= 87):
        msg_error = f'The n value "{n}" is invalid. Enter a valid number of UF.'
        msg_error += f' The range is 1 to 27 UF code.'

        raise ValueError(msg_error)

    full_data = {
        'msg':
        'success',
        'data':
        random_sample(
            [v_brand['brand_name'] for v_brand in ALL_VEHICLE_BRANDS.values()
             ],  # Create a list brand name
            n)
    }

    if data_only:
        return full_data['data']
    else:
        return full_data
Exemple #13
0
 def get_training_set(self, n_samples):
     """
     Returns a list of <n_samples> indices in buffer.memory, randomly ordered
     :return: list(<int>)
     """
     training_set = random_sample(range(len(self.memory)-1), n_samples)
     return training_set
Exemple #14
0
    def random(self, count, fixture_name):
        items = self.fixture(fixture_name)
        count = int(count)
        choices = random_sample(items, count)

        for choice in choices:
            yield choice
def add_top_neig_update(g1, thres_neig, folNm, inputs, model, scaler,
                        neig_list):
    neig_list_orig = None
    rand_flag = 0
    if not neig_list:  # Checking if empty
        logging_debug("No more neighbors to add")
        return g1, 0, None, None, None, rand_flag, neig_list
    if inputs["use_all_neigs"] == 0:
        # Don't check all neighbors - just a subset if number of neighbors is large
        if len(neig_list) > thres_neig:  # Make 500
            neig_list_orig = neig_list
            neig_list = dict(random_sample(neig_list.items(), thres_neig))
            rand_flag = 1

    node_to_add, score, compBool, rand_flag = find_imp_neig(
        neig_list, g1, inputs['perc'], model, scaler, inputs,
        inputs['explore_prob'], rand_flag)

    g1 = add_newnode(g1, node_to_add, neig_list[node_to_add]['graph_neigs'])

    if neig_list_orig is None:
        neig_list = update_neig_list(neig_list, node_to_add, folNm, g1.nodes())
    else:
        neig_list = update_neig_list(neig_list_orig, node_to_add, folNm,
                                     g1.nodes())
    return g1, 1, node_to_add, score, compBool, rand_flag, neig_list
Exemple #16
0
    def on_start(self):
        """ on_start is called when a Locust start before any task is scheduled """
        increase_system_open_file_limits()
        self._load_options()
        csvs_found = self._find_data_csv()
        self._build_urL_list()

        if not self.host:
            exit_with_failure_msg(
                "host must be passed after --host flag.\n+" +
                "Example: > locust --host https://example.com")
        if not self.datacsv:
            if not len(csvs_found):
                exit_with_failure_msg(
                    "No CSV/TSV file found in same folder as locustfile.\n" +
                    "Example: > locust --host https://example.com")
            else:
                exit_with_failure_msg(
                    "Two or more CSV data files found in same folder as locustfile\n"
                    + "{}".format(" | ".join(csvs_found)))

        num_sample_urls = 3
        logger.info('USING BASE HOSTNAME: {}'.format(self.host))
        logger.info('USING CSV FILE: {}'.format(self.datacsv))
        logger.info('TESTING {} VARIATIONS OF THE SAME URL'.format(
            len(URL_LIST)))
        logger.info('Sample of URLs:\n\n{}\n'.format(
            ((self.host + '{}\n') * num_sample_urls).format(
                *random_sample(URL_LIST, num_sample_urls))))
Exemple #17
0
    def apply_crossover(self, pop):
        """ crossover operation using intermediate point method """

        pop_size = self.pop_size
        nvars = self.nvars
        num_crossover = self.num_crossover
        half_side = int(num_crossover / 2)

        # pick unique samples from population
        p_idx = random_sample(range(pop_size), k=num_crossover)

        # two sets of parents
        parents_1 = pop[p_idx[:half_side], :]
        parents_2 = pop[p_idx[half_side:], :]

        # random point
        random_delta = np.random.rand(num_crossover, nvars)

        # generate offspring
        offspring = np.empty((num_crossover, nvars))
        offspring[:half_side, :] = parents_1 + np.multiply(
            random_delta[:half_side, :], (parents_2 - parents_1))
        offspring[half_side:, :] = parents_1 + np.multiply(
            random_delta[:half_side, :], (parents_2[::-1] - parents_1))

        # update population
        pop[p_idx, :] = offspring

        return pop, p_idx
	def impute_missing_data(self):
		# process each category/class
		for category in self.missing_dataset:
			print("imputing category: %s" % category[:-1])
			# check if we have sample data for imputation
			# if not, skip and go to next category/class
			# since, we will skip these rows, they won't
			# be included in the final dataset that will be
			# saved in the source file after imputation
			if category not in self.sample_dataset:
				print("\t~ no sample data for imputing missing data for category %d" % category)
				continue
			# retrive and impute each row until the lis is empty
			while len(self.missing_dataset[category])>0:
				# retrive a row (first row)
				row = self.missing_dataset[category].pop(0)
				# get a random row from sample dataset under same category/class
				sample = random_sample(self.sample_dataset[category])
				# check for missing value for each feature and
				# if missing, replace the value with the value
				# from randomly selected sample row
				for feature in range(1,len(row)):
					if row[feature] == self.missing_data_chr:
						row[feature] = sample[feature]
				# insert the row in sample dataset
				self.sample_dataset[category].append(row)
Exemple #19
0
def spread_distribution(combinedInterval, componentsForInterval):
    population = []
    for component in componentsForInterval:
        population += [component] * component.inCount
        component.outCount = 0

    for component in random_sample(population, combinedInterval.outCount):
        component.outCount += 1
Exemple #20
0
def final_summarize_children_or_subtree(dates, out_files, tag_dict,
                                        tei_logger):
    """Produce the final form of the aggregated information after a WARC has been processed into text or notext tables:
        - the frequency for each tag
        - the name and attributes of the root tag
        - the average word count of all the texts it contains
        - the average number of descendant tags
        - the average length of immediate texts
        - the URLs of some random occurrences
        - the placeholder for normal name of tag
        - the placeholder for preserved attribute name
    """
    _ = dates, tei_logger  # Silence IDE
    out_notext_fh, out_text_fh = out_files
    print('frequency',
          'tag',
          'average_word_count',
          'average_descendant_num',
          'immediate_texts_average_length',
          'URL_example',
          'normal_name',
          'preserved_attribute',
          sep='\t',
          file=out_text_fh)
    print('frequency',
          'tag',
          'average_word_count',
          'average_descendant_num',
          'immediate_texts_average_length',
          'URL_example',
          'normal_name',
          'preserved_attribute',
          sep='\t',
          file=out_notext_fh)
    for root_name_attr, (freq, no_of_words, no_of_descendants, all_links,
                         len_of_immediate_text) in tag_dict.items():
        random_links = random_sample(all_links, k=min(5, len(all_links)))
        example_links = ' '.join(random_links)
        avg_no_of_words = no_of_words / freq
        avg_no_of_descendants = no_of_descendants / freq
        avg_len_of_immediate_text = len_of_immediate_text / freq
        if avg_no_of_words == 0:
            category = 'null'
            out_file = out_notext_fh
        else:
            category = 'default'
            out_file = out_text_fh
        rename = 'default'
        print(freq,
              root_name_attr,
              avg_no_of_words,
              avg_no_of_descendants,
              avg_len_of_immediate_text,
              example_links,
              category,
              rename,
              sep='\t',
              file=out_file)
Exemple #21
0
 def run(self, model, obj):
     """Run eps-greedy algorithm on a trained model and user defined domain.
     
     Parameters
     ----------
     model : edbo.models 
         Trained model to be sampled.
     obj : edbo.objective 
         Objective object containing information about the domain.
     
     Returns
     ----------
     pandas.DataFrame 
         Selected domain points.
     """
     
     # Get predictions
     domain = to_torch(obj.domain, gpu=obj.gpu)
     pred = obj.domain.copy()
     pred['pred'] = model.predict(domain)
     
     # Make choice list
     choice_list = [0] * round(self.eps * 1000) + [1] * round((1 - self.eps) * 1000)
     
     # Select batch
     selected = pd.DataFrame(columns=obj.domain.columns.values)
     for i in range(self.batch_size):
         
         # Observed domain points
         known_X = pd.concat([
                     obj.results.drop(obj.target, axis=1),
                     selected],
                     sort=False)
         
         # Sample choice list
         choice = random_sample(choice_list, 1)[0]
         
         # Random sample with probability eps
         if choice == 0:
             if self.duplicates == True:
                 candidates = obj.domain
             elif self.duplicates == False:
                 candidates = complement(obj.domain, known_X)
             selected_i = candidates.sample(1)
         
         # Else argmax model predictions
         elif choice == 1:
             selected_i = argmax(pred,
                                 known_X, 
                                 target='pred',
                                 duplicates=self.duplicates,
                                 top_n=1).drop('pred', axis=1)
         
         # Append
         selected = pd.concat([selected, selected_i], sort=False)
     
     return selected
Exemple #22
0
    def get_zq_own_create_article_id_list(self, min_article_id: int,
                                          max_article_id: int):
        """
        自己create的article_id_list
        :return:
        """
        # 取中间值, 避免老是在发老新闻, 取更接近新的文章
        middle_article_id = int((min_article_id + max_article_id) / 2)
        middle_article_id = int((middle_article_id + max_article_id) / 2)
        middle_article_id = int((middle_article_id + max_article_id) / 2)
        self.lg.info('middle_article_id: {}'.format(middle_article_id))
        article_id_list = [
            str(article_id)
            for article_id in range(middle_article_id, max_article_id)
        ]

        # 截取3
        article_id_list = random_sample(article_id_list, self.zq_intercept_num)
        res = [{
            'uid':
            get_uuid3(target_str='{}::{}'.format('zq', article_id)),
            'article_type':
            'zq',
            'title':
            '未知',
            'article_id':
            article_id,
            'article_url':
            'https://focus.youth.cn/mobile/detail/id/{}#'.format(article_id),
        } for article_id in article_id_list]

        new_res = res

        # 本地不检测了
        # article_parser = ArticleParser(logger=self.lg)
        # # article_list = self.loop.run_until_complete(article_parser.get_article_list_by_article_type(
        # #     article_type=self.article_type,))
        # new_res = []
        # for item in res:
        #     article_url = item.get('article_url', '')
        #     try:
        #         self.lg.info('本地检测url: {}'.format(article_url))
        #         _ = self.loop.run_until_complete(article_parser._parse_article(
        #             article_url=article_url,))
        #         title = _.get('title', '')
        #         assert title != ''
        #         # 标题必须小于等于30
        #         assert len(title) <= 30
        #     except Exception:
        #         continue
        #
        #     item.update({
        #         'title': title,
        #     })
        #     new_res.append(item)

        return new_res
Exemple #23
0
 def swap_rows(self):
     """Swap the rows and return the board"""
     board = self.board
     swap_list = range(0, self.size)
     for i in range(0, self.size, self.base):
         l_ind = i
         r_ind = i + self.base
         for j in range(self.base - 1):
             swap = random_sample(swap_list[l_ind:r_ind], 2)
             board[swap[0]], board[swap[1]] = board[swap[1]], board[swap[0]]
Exemple #24
0
 def handle_index(self, req):
     self.expire_host_cache()
     addr = req.remote_addr
     if not addr in self.HostCache:
         sz = len(self.routers)
         rids = random_sample(xrange(sz), min(sz, self.ROUTER_COUNT))
         self.HostCache[addr] = rids
     else:
         rids = self.HostCache[addr]
     return Response(self.render(rids), mimetype='text/html')
Exemple #25
0
def sample(population, k=None):
    """Behaves like random.sample, but if k is omitted, it default to
    randint(1, len(population)), so that a non-empty sample is returned."""

    population = list(population)

    if k is None:
        k = randint(1, len(population))

    return random_sample(population, k)
Exemple #26
0
 def handle_index (self, req):
     self.expire_host_cache()
     addr = req.remote_addr
     if not addr in self.HostCache:
         sz = len(self.routers)
         rids = random_sample (xrange(sz), min(sz, self.ROUTER_COUNT))
         self.HostCache[addr] = rids
     else:
         rids = self.HostCache[addr]
     return Response (self.render(rids), mimetype='text/html')
 def generate(self):
     # $$$ this needs to consider that some positions don't have valid ACGT,
     #     and exclude them from the nErrors computation and from eligibility
     #     as an error position
     nErrors = round(pSubstitution * ntSequenceLength)
     if (nErrors == 0):
         self.mutatedSeq = self.seq
     else:
         errorPositions = random_sample(range(len(self.seq)), nErrors)
         self.mutatedSeq = self.apply_errors(errorPositions)
     return self.mutatedSeq
Exemple #28
0
 def swap_columns(self):
     """Swap the columns and return the board"""
     board = self.board
     swap_list = range(0, self.size)
     for i in range(0, self.size, self.base):
         l_ind = i
         r_ind = i + self.base
         for j in range(self.base - 1):
             swap = random_sample(swap_list[l_ind:r_ind], 2)
             for k in range(0, self.size):
                 row = board[k]
                 row[swap[0]], row[swap[1]] = row[swap[1]], row[swap[0]]
Exemple #29
0
def final_bigram(dates, out_files, tag_dict, tei_logger):
    """Produce the final form of the aggregated information after a WARC has been processed into the table:
        - the frequency for each tag bigram
        - the name and attributes of the root tag
        - the URLs of some random occurrences
    """
    _ = dates, tei_logger  # Silence IDE
    out_file = out_files[0]
    for root_name_attr, (freq, all_links) in tag_dict.items():
        random_links = random_sample(all_links, k=min(5, len(all_links)))
        example_links = ' '.join(random_links)
        print(freq, root_name_attr, example_links, sep='\t', file=out_file)
Exemple #30
0
    def _tournament(self, params):

        competitors = random_sample(self.population, params['n_competitors'])

        fitness_to_beat = competitors[0].fitness
        winner = competitors[0]

        for competitor in competitors:
            if competitor.fitness > fitness_to_beat:
                winner = competitor

        return winner
Exemple #31
0
    def setIconSize(self, size):
        """
        Sets the icon size.

        Args:
            size: (width, height), QSize or int in % units
        """
        #  FIXME[hv] review when the size is int
        if isinstance(size, tuple):
            s = qtc.QSize(size[0], size[1])
            self._percentIconSize = None
        elif isinstance(size, int) or isinstance(size, float):  # in % units
            self._percentIconSize = size
            x, y = self._model.getDim()
            x = int(x * (size / 100.0))
            y = int(y * (size / 100.0))
            s = qtc.QSize(x, y)
            size = x, y
        elif isinstance(size, qtc.QSize):
            s = size
            size = size.width(), size.height()
            self._percentIconSize = None
        else:
            raise Exception("Invalid icon size.")

        self._pageItemModel.setIconSize(qtc.QSize(s))
        dispConfig = self._pageItemModel.getDisplayConfig()
        if dispConfig is not None:
            m = self._pageItemModel
            cc = dispConfig.getColumnConfig(0)
            lSize = len(cc.getLabels()) if cc is not None else 0
            s = qtc.QSize(size[0], size[1])
            margin = 10
            if lSize > 0:
                maxWidth = 0
                r = m.rowCount()
                fontMetrics = self._listView.fontMetrics()

                for i in random_sample(range(r), min(10, r)):
                    index = m.createIndex(i, 0)
                    labels = m.data(index, widgets.LABEL_ROLE)
                    for text in labels:
                        w = fontMetrics.boundingRect(text).width() + margin
                        maxWidth = max(maxWidth, w)
                s.setWidth(max(s.width(), maxWidth))
                s.setHeight(s.height() + lSize * 16)
        self._listView.setIconSize(s)
        self._pageItemModel.setIconSize(s)
        self.__updatePageBar()
        self.__updateSelectionInView(self._pageBar.getCurrentPage() - 1)
        self.sigPageSizeChanged.emit()
def calculate_field_type(field_name,
                         field_values,
                         field_position,
                         num_fields,
                         num_samples=100,
                         random=True):
    '''
    For each field, returns highest-scoring field type of first num_samples non-empty
    instances.
    '''
    # # Convert to str and drop NAs for type detection
    #field_values = field_values.dropna().apply(unicode)
    field_values = field_values.dropna().apply(str)

    print('field_values', field_values)

    num_samples = min(len(field_values), num_samples)
    #field_sample = random_sample(field_values, num_samples) if random else field_values[:num_samples]
    field_sample = random_sample(
        list(field_values),
        num_samples) if random else field_values[:num_samples]

    print('field_sample', field_sample)

    type_scores_from_name = get_type_scores_from_field_name(
        field_name, num_samples=num_samples)
    type_scores_from_values = get_type_scores_from_field_values(field_sample)

    # Combine type score dictionaries
    final_type_scores = defaultdict(int)
    #for t, score in type_scores_from_name.iteritems():
    for t, score in type_scores_from_name.items():
        final_type_scores[t] += score
    #for t, score in type_scores_from_values.iteritems():
    for t, score in type_scores_from_values.items():
        final_type_scores[t] += score

    # Normalize field scores
    score_tuples = []
    normalized_type_scores = {}
    total_score = sum(final_type_scores.values())
    if total_score:
        #for type_name, score in final_type_scores.iteritems():
        for type_name, score in final_type_scores.items():
            score_tuples.append((type_name, score))
            normalized_type_scores[type_name] = float(score) / total_score

        final_field_type = max(score_tuples, key=lambda t: t[1])[0]
        return (final_field_type, normalized_type_scores)
    else:
        return (DT.STRING.value, normalized_type_scores)
Exemple #33
0
def TestPrint():
    DATA = random.random_sample([25, 1]) * 10.
    _RGA_ = RealCodedGeneticAlgorithm()
    MIN_VALUE = MinimumValue()
    _FitnessFunction = FitnessFunction()

    Step = 25
    print "Started data::", '\n', DATA, '\n'
    print "Real-Coded Genetic Algorithm::"
    for i in xrange(Step):
        rga = _RGA_(DATA, 0.01)  #0.01 -> rate == 10%
        minrezult = MIN_VALUE(rga)
        print rga, '\n', "The minimum value at each sten RGA::", minrezult, '\n'
    return rga, minrezult
Exemple #34
0
	def __sample_from_pmf(self, pmf, nominals):
		# normalize PMF to make it "proper"
		abs(pmf, out = pmf)
		
		# -- CDF
		bins = cumsum(pmf)
		
		if (bins[-1] == 0.0):
			print pmf
			raise ValueError
		
		bins /= bins[-1] # normalization
		
		return nominals[digitize(random.random_sample(1), bins)]
Exemple #35
0
# change all prints to python logging
# add comments and details about the code

hyper_para = list()

hyper_para.append((1e-4, 1e-2, 'real'))
hyper_para.append((10, 1000, 'int'))
hyper_para.append((1, 9, 'int'))
hyper_para.append((50, 500, 'int'))
hyper_para.append((1e-1, 9e-1, 'real'))

hyper_values = list()
hyper_values.append([0.001, 100, 3, 100, 0.25])

for i in range(0):
    hyper_values.append(random_sample(hyper_para))

res = list()  # to store the results

for i in range(len(hyper_values)):
    values = hyper_values[i]
    print values
    max_features = 5000
    maxlen = 10
    batch_size = 8
    embedding_dims = 100
    nb_filter = 100
    filter_length = 3
    hidden_dims = 100
    nb_epoch = 3
 def filter_records(self, records):
     return random_sample(records, self.max_number)
Exemple #37
0
 def new_mid(self):
     m = "".join(random_sample(ascii_letters, 20)) + "/NMA"
     return "<" + "@".join([m, self.address]) + ">"
Exemple #38
0
def sample_unique_(S):
    return random_sample(S,1)[0]
Exemple #39
0
def heat(X, magnitude, random = np.random.RandomState()):
    "Add random heat to the values"
    shape = X.shape if isarray(X) else X
    return (random.random_sample(shape)-0.5) * (magnitude*2)