Ejemplos de DefaultDict en Python, ejemplos de typing.DefaultDict en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_bytecode_opt.py Proyecto: porglezomp/scheme-jit

    def test_mark_functions(self) -> None:
        func = make_func()
        func.start.split_after(0)
        bb0 = func.start
        bb0_split = next(bb0.successors())
        bb1 = next(bb0_split.successors())

        opt = FunctionOptimizer(func)
        opt.compute_preds()
        self.assertEqual(
            opt.succs,
            DefaultDict(
                list, {
                    id(bb0): [(bb0, 1, bb0_split)],
                    id(bb0_split): [(bb0_split, 1, bb1)],
                    id(bb1): [],
                }))
        self.assertEqual(
            opt.preds,
            DefaultDict(
                list, {
                    id(bb0): [],
                    id(bb0_split): [(bb0, 1, bb0_split)],
                    id(bb1): [(bb0_split, 1, bb1)],
                }))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: day_14.py Proyecto: sergiokv13/advent-2020

def get_letter_count(steps, template, rules):
    pairs_count = DefaultDict(int)
    for i in range(len(template) - 1):
        pairs_count[template[i] + template[i + 1]] += 1

    for i in range(steps):
        pairs_count_copy = pairs_count.copy()
        for pair in pairs_count_copy.keys():
            # original pair will be removed
            pairs_count[pair] -= pairs_count_copy[pair]
            for new_pair in rules[pair]:
                # add as many new pairs as the base one
                pairs_count[new_pair] += pairs_count_copy[pair]
                if (pairs_count[pair] == 0): del pairs_count[pair]

    # count occurences
    occ_count = DefaultDict(int)
    for pair in pairs_count.keys():
        occ_count[pair[0]] += pairs_count[pair]
        occ_count[pair[1]] += pairs_count[pair]

    max_val = float("-inf")
    min_val = float("inf")
    for letter in occ_count.keys():
        val = math.ceil(occ_count[letter] / 2)
        if val > max_val: max_val = val
        if val < min_val: min_val = val

    return max_val, min_val

Ejemplo n.º 3

0

Mostrar archivo

Archivo: day-14.py Proyecto: ThomW/advent-of-code

def part_two(data, steps):
	start, rules = process_input(data)

	print()
	print(start)

	# Initialize letter counter
	letters = DefaultDict(int)
	for char in start:
		letters[char] += 1

	# Convert start string into a map where key is letter combo, value is frequency
	pairs = DefaultDict(int)
	for n in range(0, len(start) - 1):
		pairs[start[n:n+2]] = 1

	# Loop through steps incrementing counters for matching pairs
	for step in range(0, steps):
		new_pairs = DefaultDict(int)
		for pair, frequency in pairs.items():

			# Follow rules to inject new values in the middle of pairs
			new_pairs[pair[0] + rules[pair]] += frequency
			new_pairs[rules[pair] + pair[1]] += frequency

			# Keep track of the new letters introduced into the overall string
			letters[rules[pair]] += frequency

		pairs = new_pairs

	return max(letters.values()) - min(letters.values())

Ejemplo n.º 4

0

Mostrar archivo

Archivo: backend.py Proyecto: coduxer/mirage

    def __init__(self) -> None:
        self.appdirs = AppDirs(appname=__app_name__, roaming=True)

        self.saved_accounts = Accounts(self)
        self.ui_settings    = UISettings(self)
        self.ui_state       = UIState(self)
        self.history        = History(self)

        self.models = ModelStore()

        self.clients: Dict[str, MatrixClient] = {}

        self._sso_server:      Optional[SSOServer]      = None
        self._sso_server_task: Optional[asyncio.Future] = None

        self.profile_cache: Dict[str, nio.ProfileGetResponse] = {}
        self.get_profile_locks: DefaultDict[str, asyncio.Lock] = \
            DefaultDict(asyncio.Lock)  # {user_id: lock}

        self.send_locks: DefaultDict[str, asyncio.Lock] = \
            DefaultDict(asyncio.Lock)  # {room_id: lock}

        cache_dir = Path(
            os.environ.get("MIRAGE_CACHE_DIR") or self.appdirs.user_cache_dir,
        )

        self.media_cache: MediaCache = MediaCache(self, cache_dir)

        self.presences: Dict[str, Presence] = {}

        self.concurrent_get_presence_limit = asyncio.BoundedSemaphore(8)

        self.mxc_events: DefaultDict[str, List[Event]] = DefaultDict(list)

Ejemplo n.º 5

0

Mostrar archivo

    def print_file_extension_stats(self, sort_by: str = 'size') -> None:
        ''' print file extension stats
        '''
        try:
            stats = DefaultDict(lambda: DefaultDict(int))
            for filepath in scan_files(self._path):
                ext = os.path.splitext(filepath)[1]
                stats[ext]['files'] += 1
                try:
                    stats[ext]['size'] += os.stat(filepath).st_size
                except (OSError, FileNotFoundError) as err:
                    logger.warning(err)

            data = [(ext, info['files'], info['size'])
                    for ext, info in stats.items()]
            sorting_field_id = 1 if sort_by == 'files' else 2  # if by size
            data = sorted(data,
                          key=lambda x: x[sorting_field_id],
                          reverse=True)
            data = [(ext, files, convert_size(size))
                    for ext, files, size in data]
            print("\nFile Extensions statistics:\n")
            print(
                tabulate(data,
                         headers=['Extension', "Files", "Size"],
                         tablefmt="github"))
        except KeyboardInterrupt:
            print("Interrupted by user")

Ejemplo n.º 6

0

Mostrar archivo

def check_sudoku_x(grid, size):
    """Check diagonals, rows, columns, boxes.
    Return whether solved, unsolved, or incorrect. If incorrect, put all errors
    """

    # populate `result` as Solved or Unsolved
    result = 'Solved'
    for i in range(size):
        for j in range(size):
            if grid[i][j] == 0:
                result = 'Unsolved'
                break
        else:
            continue
        break

    errors = ''

    # check diagonals

    # check diagonal 1 for duplicates
    d = DefaultDict(list)
    for i in range(size):
        d[grid[i][i]] += [(i, i)]

    for k, coords in sorted(d.items()):
        if len(coords) > 1 and k != 0:
            result = 'Incorrect'
            errors += f'    {k} is repeated in diagonal 1\n'
            errors += f'      '
            # print coords of duplicates:
            for coord in coords:
                # print(coord)
                errors += f'({coord[0]+1},{coord[1]+1}) '
            errors += '\n'

    # check diagonal 2 for duplicates
    d = DefaultDict(list)
    for i in range(size):
        d[grid[i][size - i - 1]] += [(i, size - i - 1)]

    for k, coords in sorted(d.items()):
        if len(coords) > 1 and k != 0:
            result = 'Incorrect'
            errors += f'    {k} is repeated in diagonal 2\n'
            errors += f'      '
            # print coords of duplicates:
            for coord in coords:
                # print(coord)
                errors += f'({coord[0]+1},{coord[1]+1}) '
            errors += '\n'

    # use check_sudoku to error check rows and columns, boxes
    result_sub, errors_sub = check_sudoku(grid, size)

    # Any errors from Sudoku will be lifted to Sudoku-X
    if result_sub == 'Incorrect':
        result = 'Incorrect'
    errors += errors_sub
    return result, errors

Ejemplo n.º 7

0

Mostrar archivo

 async def initialize(self):
     await self.cleanup_task()
     self.guild_cache = DefaultDict(self._guild_defaults, await
                                    self.config.all_guilds())
     self.channel_cache = DefaultDict(self._channel_defaults, await
                                      self.config.all_channels())
     self.cog_ready.set()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: round.py Proyecto: zsaladin/LFT2

    def __init__(self):
        self._datums: Datums = Datums()

        self._votes: Votes = Votes()
        self._votes_by_data_id: VotesByDataID = DefaultDict(OrderedDict)

        self._voters = set()
        self._voters_by_data_id = DefaultDict(set)

Ejemplo n.º 9

0

Mostrar archivo

def check_latin_square(grid, size):
    """Check rows, columns.
    Return whether solved, unsolved, or incorrect. If incorrect, put all errors
    """

    # populate `result` as Solved or Unsolved
    result = 'Solved'
    for i in range(size):
        for j in range(size):
            if grid[i][j] == 0:
                result = 'Unsolved'
                break
        else:
            continue
        break

    errors = ''

    # error check columns
    for c in range(size):
        # print(f'Checking column {c+1}')
        # Populate a dictionary val -> coords for this column
        d = DefaultDict(list)
        for r in range(size):
            d[grid[r][c]] += [(r, c)]

        for k, v in d.items():
            if len(v) > 1 and k != 0:
                result = 'Incorrect'
                errors += f'    {k} is repeated in column {c+1}\n'
                errors += f'      '
                # print coords of duplicates:
                for coord in v:
                    errors += f'({coord[0]+1},{coord[1]+1}) '
                errors += '\n'

    # error check rows
    for r in range(size):
        # print(f'Checking row {r+1}')
        # Populate a dictionary val -> coords for this row
        d = DefaultDict(list)
        for c in range(size):
            d[grid[r][c]] += [(r, c)]

        for k, coords in d.items():  # for each character
            # print(k, coords)
            if len(coords) > 1 and k != 0:  # if duplicated
                result = 'Incorrect'
                errors += f'    {k} is repeated in row {r+1}\n'
                errors += f'      '
                # print coords of duplicates:
                for coord in coords:
                    # print(coord)
                    errors += f'({coord[0]+1},{coord[1]+1}) '
                errors += '\n'

    return result, errors

Ejemplo n.º 10

0

Mostrar archivo

Archivo: markovchain.py Proyecto: venisasinha/Fake-Tweeter

def split_tweets(tweets):
    """
    Turn list of Tweets into flat list of words. Additionally, keep track of
    word counts and start/end words for future reference
    :param tweets: list of strings representing tweets
    :return: list of words from tweets, start words & probabilities, end words & probabilities
    """
    words = []
    start_words = DefaultDict(int)
    end_words = DefaultDict(int)
    word_counts = DefaultDict(int)

    for tweet in tweets:
        # Split space-seperated words into list
        split_tweet = tweet.split(" ")

        # iterate through each word
        for i, word in enumerate(split_tweet):

            # want to remove extraneous punctuation aside from twitter handles (@)
            if "@" not in word:
                # word = re.sub(r"[^\w\s]", "", word)
                pass
            # len(word) == 1 or isalpha(word[1:]) and

            # remove tweets that are images or direct links
            if "pic.twitter" not in word and "http" not in word:
                # add word to our list
                words.append(word)

                # increment count occurances of that word
                word_counts[word] += 1

                # if the first word in a tweet
                if i is 0:
                    start_words[word] += 1

                # if potentially last word in a tweet/sentence, add to posisble end words
                if len(word) > 1 and word[-1] in [".", "!", "?"]:
                    end_words[word] += 1

    # convert to normal dictionary so we dont accidentally create more keys
    end_words = dict(end_words)

    # hold the list of start words w/ weighting applied
    weighted_start_words = []

    # give higher-frequency words more chance to be picked by random choice function
    for key, value in start_words.items():
        weighted_start_words.extend([key] * value)

    # convert counts of end words to  P(end word | word)
    for key, value in end_words.items():
        end_words[key] = end_words[key] / word_counts[key]

    return words, weighted_start_words, end_words

Ejemplo n.º 11

0

Mostrar archivo

Archivo: logger.py Proyecto: c3se/alvis-intro

 def __init__(self, log_dir=None, **kwargs):
     # [number of GPUs]x[GPU type]
     self.main_tag = "x".join(
         os.environ["SLURM_GPUS_PER_NODE"].split(":")[::-1])
     if log_dir is None:
         main_file = sys.argv[0]
         if main_file[-3:] == ".py":
             main_file = main_file[:-3]
         log_dir = f"logs/{main_file}_{self.main_tag}"
     super().__init__(log_dir=log_dir, **kwargs)
     self.scalars = DefaultDict(lambda: DefaultDict(list))

Ejemplo n.º 12

0

Mostrar archivo

    def getCount(self, stockcode):
        stockcode = stockcode.lower()
        __date = 0
        __text = 1
        counter = DefaultDict(int)
        bydate = DefaultDict(str)
        buffer_date = ''
        for index, row in self.dffilter.iterrows():
            # try:
            if row[__text] != '' and isinstance(row[__text], str):
                tanggal = row[__date].split("T")[0]
                teks = row[__text].replace("\n",
                                           " ").replace(",", " ").replace(
                                               ".", " ").lower()
                teks_perkata = teks.split(" ")
                kode_saham = []
                if tanggal != buffer_date:
                    counter = DefaultDict(int)
                else:
                    pass
                cek_double = []
                for x in teks_perkata:
                    if x not in cek_double:
                        if x in self.stock_table.Kode.values:
                            if x not in self.excluded_word.values:
                                kode_saham.append(x)
                                cek_double.append(x)
                                counter[x] += 1
                                bydate[tanggal] = copy.copy(counter)
                            else:
                                continue
                        else:
                            continue
                    else:
                        continue
                # teks_rep = ' '.join([x for x in teks if x not in self.excluded_word.values])
                # if teks_rep.find(stockcode) != -1: # diganti ini
                #     counter[stockcode]+=1
                #     bydate[tanggal]=copy.copy(counter)
                buffer_date = tanggal
            else:
                continue
            # except:
            #     print('error ', row)
            #     continue

        mention_counter = [bydate[x][stockcode] for x in bydate]
        mention_date = [x for x in bydate]

        df = pd.DataFrame([mention_date, mention_counter]).transpose()
        df.columns = ('date', 'mentions')
        print(df.tail())
        return df[(df['date'] >= self.awal) & (df['date'] <= self.akhir)]

Ejemplo n.º 13

0

Mostrar archivo

Archivo: invoice.py Proyecto: TheBestEagleEye/FluffyCogs

 def __init__(self, bot: Red):
     self.bot = bot
     self.config = Config.get_conf(self, identifier=2113674295, force_registration=True)
     self.config.register_guild(**self._guild_defaults())
     self.config.register_channel(**self._channel_defaults())
     self.guild_cache: GuildCache = DefaultDict(self._guild_defaults)
     self.channel_cache: ChannelCache = DefaultDict(self._channel_defaults)
     self.guild_as: DefaultDict[int, AntiSpam] = DefaultDict(partial(AntiSpam, self.intervals))
     self.member_as: DefaultDict[Tuple[int, int], AntiSpam] = DefaultDict(
         partial(AntiSpam, self.intervals)
     )
     self.dynamic_ready: Dict[int, asyncio.Event] = {}
     self.cog_ready = asyncio.Event()
     asyncio.create_task(self.initialize())

Ejemplo n.º 14

0

Mostrar archivo

def main():

    # Greedy best first search
    d = DefaultDict(list)
    d2 = DefaultDict(list)
    d3 = DefaultDict(list)
    # Unos pocetnog grada
    unosGrada = input("Unesite pocetni grad")
    unosKrajnjegGrada = input("Unesite krajni grad")
    # Niz u kojeg spremamo gradove
    niz = []
    nzi2 = []
    # Citanje u pajek pbliku
    G = nx.read_pajek('airports-astar.net')
    G1 = nx.Graph(G)
    # G2 = nx.DiGraph(G)

    # A* algoritam zadatak 1
    print(" \n A* algoritam ------------------------------")
    Astar = aStarAlgorithm(citanjeKordinata("airports-astar.net"), unosGrada,
                           unosKrajnjegGrada)
    putPreden, udaljenostPredena, vrijemeOdradivanjaAlgoritma = Astar.a_star()
    print(
        "Put preden je: {0}, udaljenost predena je: {1}, vrijeme odradivanja algoritma je: {2}"
        .format(putPreden, udaljenostPredena, vrijemeOdradivanjaAlgoritma))

    print(" \n -----------------------------------------")

    # BFS ALGORITAM
    print("Best first search algoritam ---------------------")
    # Citanje iz filea
    niz, niz2 = citanjeIzFilea(d, d2, d3)
    # print(niz)

    #print(udaljenostDoCilja(d3["LHR"][0], d3["BER"][0]))
    print("\n")
    listaPredenihGradova = greedyBFS(d, d2, d3, unosGrada, unosKrajnjegGrada,
                                     niz, niz2)
    suma = ispisKonacnogPuta(listaPredenihGradova, d2)
    print("\n Suma udaljenosti puta je:{0}".format(suma))

    # Zadatak 2

    put, distanca, vrijemeAlgoritma = Astar.a_star()
    print(put)
    print(distanca)

    # Graf graficki prikazan
    """ generate_pajek(G)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: day08.py Proyecto: AngelOnFira/advent-of-code-2020

def solve(processed: Proccessed_line) -> DefaultDict[int, str]:
    five_characters = set()
    six_characters = set()
    segments = DefaultDict(str)
    known_values = DefaultDict(str)
    for value in processed.inputs:
        length = len(value)
        if length == 2:
            known_values["1"] = value
        elif length == 3:
            known_values["7"] = value
        elif length == 4:
            known_values["4"] = value
        elif length == 7:
            known_values["8"] = value
        elif length == 5:
            five_characters.add(value)
        elif length == 6:
            six_characters.add(value)

    segments[1] = string_differences(known_values["1"], known_values["7"])
    temp = known_values["4"] + segments[1]
    for num in six_characters:
        if len(string_differences(num, temp)) == 1:
            known_values["9"] = num
            six_characters.remove(num)
            break

    for num in five_characters:
        if len(string_differences(num, known_values["9"])) != 1:
            known_values["2"] = num
            five_characters.remove(num)
            break

    temp = string_differences(known_values["9"], known_values["7"])
    for num in six_characters:
        if len(string_differences(num, temp)) == 3:
            known_values["6"] = num
        else:
            known_values["0"] = num

    segments[3] = string_differences(known_values["8"], known_values["6"])

    for num in five_characters:
        if segments[3] in num:
            known_values["3"] = num
        else:
            known_values["5"] = num
    return known_values

Ejemplo n.º 16

0

Mostrar archivo

Archivo: votes.py Proyecto: jonathanvdc/res-publica

    def __init__(self, index_path: str, devices: DeviceIndex,
                 votes: Dict[VoteId,
                             VoteAndBallots], vote_secrets: Dict[VoteId, str],
                 suspicious_ballots: Dict[VoteId, List[SuspiciousBallot]]):

        self.index_path = index_path
        self.devices = devices
        self.votes = votes
        self.vote_secrets = vote_secrets
        self.suspicious_ballots = suspicious_ballots
        self.last_heartbeat = time.monotonic()

        # The ballot ID cache remembers ballot IDs.
        self.ballot_id_cache = DefaultDict(dict)
        self.ballot_to_voter_cache = DefaultDict(dict)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: quadraticgreedy.py Proyecto: donsheehy/greedypermutation

def _greedy(M, seed=None, gettransportplan=False, mass=None):
    """
    Return an iterator that yields `(point, index)` pairs, where `point`
    is the next point in a greedy permutation and `index` is the index of they
    nearest predecessor.

    The optional `seed` parameter indicates the point that should appear first.
    """
    P = list(M)

    if mass is None:
        mass = [1] * len(M)
    elif len(mass) != len(M):
        raise ValueError("`mass` must of same length as `M`")
    # If no seed is provided, use the first point.
    if seed is None:
        seed = P[0]
    else:
        # Put the seed in the first position.
        seed_index = P.index(seed)
        P[0], P[seed_index] = P[seed_index], P[0]
    n = len(P)
    transportplan = DefaultDict(int)
    if gettransportplan:
        transportplan[P[0]] = sum(mass)
    yield P[0], None, transportplan
    pred = {p: 0 for p in P}
    preddist = {p: M.dist(p, P[pred[p]]) for p in P}
    for i in range(1, n):
        transportplan = DefaultDict(int)
        farthest = i
        for j in range(i + 1, n):
            if preddist[P[j]] > preddist[P[farthest]]:
                farthest = j
        P[i], P[farthest] = P[farthest], P[i]
        if gettransportplan:
            transportplan[P[pred[P[i]]]] -= mass[i]
            transportplan[P[i]] += mass[i]
        # Update the predecessor distance if necessary.
        for j in range(i + 1, n):
            newdistance = M.dist(P[i], P[j])
            if newdistance < preddist[P[j]]:
                if gettransportplan:
                    transportplan[P[pred[P[j]]]] -= mass[j]
                    transportplan[P[i]] += mass[j]
                pred[P[j]] = i
                preddist[P[j]] = newdistance
        yield P[i], pred[P[i]], transportplan

Ejemplo n.º 18

0

Mostrar archivo

    def search(self, query: SearchQuery) -> SearchResultsByTopic:
        query_lowercase = query.lower()
        results = DefaultDict(list)

        index = self._index_store.load_index()
        if self._current_language not in index.localization_dependent:
            build_and_store_index_background()
            raise IndexNotFoundException

        for topic, match_items in chain(
                index.localization_independent.items(),
                index.localization_dependent[self._current_language].items(),
        ):
            if not self._may_see_topic.get(topic, True):
                continue
            permissions_check = self._may_see_item_func.get(
                topic, lambda _: True)

            for match_item in match_items:
                if (any(query_lowercase in match_text
                        for match_text in match_item.match_texts)
                        and permissions_check(match_item.url)):
                    results[match_item.topic].append(
                        SearchResult(
                            match_item.title,
                            match_item.url,
                        ))

        yield from self._sort_search_results(results)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: majority_element.py Proyecto: cplyon/leetcode

 def majorityElement(self, nums: list[int]) -> int:
     counts = DefaultDict(int)
     for n in nums:
         counts[n] = counts[n] + 1
         if counts[n] > len(nums) // 2:
             return n
     return 0

Ejemplo n.º 20

0

Mostrar archivo

Archivo: 39-2.py Proyecto: Cenibee/PYALG

    def canFinish(self, numCourses: int,
                  prerequisites: List[List[int]]) -> bool:
        graph = DefaultDict(list)

        for x, y in prerequisites:
            graph[x].append(y)

        traced = set()
        visited = set()

        def dfs(i):
            if i in traced:
                return False

            if i in visited:
                return True

            traced.add(i)
            for y in graph[i]:
                if not dfs(y):
                    return False
            traced.remove(i)

            visited.add(i)
            return True

        for x in list(graph):
            if not dfs(x):
                return False
        return True

Ejemplo n.º 21

0

Mostrar archivo

def to_pairs(s: str) -> list[tuple[str, str]]:
    pairs: dict[tuple[str, str], int] = DefaultDict(lambda: 0)
    last = s[0]
    for next_ in s[1:]:
        pairs[(last, next_)] += 1
        last = next_
    return pairs

Ejemplo n.º 22

0

Mostrar archivo

Archivo: intermediate_sugar.py Proyecto: majdabd/NeuroLang

 def _obtain_sugared_columns(self, expression):
     sugared_columns = DefaultDict(dict)
     for atom in extract_logic_atoms(expression):
         for arg in atom.args:
             if isinstance(arg, Column):
                 sugared_columns[arg.set_symbol][arg] = ir.Symbol.fresh()
     return sugared_columns

Ejemplo n.º 23

0

Mostrar archivo

Archivo: day1702.py Proyecto: jobartucz/AdventOfCode-2020

def shuffle(universe, iteration):
    newuniverse = DefaultDict(bool)

    (minx,miny,minz,minw,maxx,maxy,maxz,maxw) = findminmax(universe)

    for a in range(minx-1,maxx+2):
        for b in range(miny-1,maxy+2):
            for c in range(minz-1,maxz+2):
                for d in range(minw-1,maxw+2):
                    numtrue = 0
                    for i in range(-1,2):
                        for j in range(-1,2):
                            for k in range(-1,2):
                                for l in range(-1,2):
                                    if i == 0 and j == 0 and k == 0 and l == 0:
                                        continue
                                    if universe[(a+i,b+j,c+k,d+l)]:
                                        numtrue += 1
                    if universe[(a,b,c,d)]:
                        if numtrue == 2 or numtrue == 3:
                            newuniverse[(a,b,c,d)] = True
                    else:
                        if numtrue == 3:
                            newuniverse[(a,b,c,d)] = True

    return newuniverse

Ejemplo n.º 24

0

Mostrar archivo

def subsampling(
    frequency: Counter[str],
    heuristic: Optional[str],
    threshold: float,
) -> Dict[str, float]:
    """
    Downsample frequent words.

    Subsampling implementation from annotated C code of Mikolov et al. 2013:
    http://mccormickml.com/2017/01/11/word2vec-tutorial-part-2-negative-sampling
    This blog post is linked from TensorFlow's website, so authoratative?

    NOTE the default threshold is 1e-3, not 1e-5 as in the paper version
    """
    cumulative_freq = sum(abs_freq for abs_freq in frequency.values())
    keep_prob: Dict[str, float] = dict()

    if heuristic is None:
        keep_prob = DefaultDict(lambda: 1)
        return keep_prob

    if heuristic == 'code':
        for word_id, abs_freq in frequency.items():
            rel_freq = abs_freq / cumulative_freq
            keep_prob[word_id] = ((math.sqrt(rel_freq / threshold) + 1) *
                                  (threshold / rel_freq))
    elif heuristic == 'paper':
        for word_id, abs_freq in frequency.items():
            rel_freq = abs_freq / cumulative_freq
            keep_prob[word_id] = math.sqrt(threshold / rel_freq)
    else:
        raise ValueError('Unknown heuristic of subsampling.')
    return keep_prob

Ejemplo n.º 25

0

Mostrar archivo

    def run(self, data, **kwargs):
        
        if False and (data.is_directed()):
            raise Exception("only undirected is supported")
        params = dict(kwargs)
        if 'seed' in params:
            if params['seed'] is not None:  self.logger.info("seed ignored")
            del params['seed']
        params['n_jobs'] = utils.get_num_thread(None if "n_jobs" not in params else params['n_jobs'])
        params['metric'] = 'precomputed'
        A = convert.to_coo_adjacency_matrix(data, simalarity=False, distance_fun='exp_minus')
        params['eps'] = 0.5 if not data.is_weighted() else float(np.mean(data.get_edges()['weight']))

        def fun():
            obj = sklearn.cluster.DBSCAN(**params)
            return obj.fit_predict(A)

        timecost, res = utils.timeit(fun)
        
        clusters = DefaultDict(list)
        for i, c in enumerate(res):
            clusters[str(int(c))].append(i)
        self.logger.info("Made %d clusters in %f seconds" % (len(clusters), timecost))

        result = {}
        result['runname'] = self.name
        result['params'] = params
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['timecost'] = timecost
        result['clusters'] = clusters 

        save_result(result)
        self.result = result 
        return self

Ejemplo n.º 26

0

Mostrar archivo

Archivo: draft_release_notes.py Proyecto: agile-lee/ESMValTool

def draft_notes_since(project, previous_release_date=None, labels=None):
    """Draft release notes containing the merged pull requests.

    Arguments
    ---------
    project: str
        Project to draft release notes from. Valid options are esmvaltool and
        esmvalcore
    previous_release_date: datetime.datetime
        date of the previous release
    labels: list
        list of GitHub labels that deserve separate sections
    """
    project = project.lower()
    if previous_release_date is None:
        previous_release_date = PREVIOUS_RELEASE[project]
    else:
        previous_release_date = dateutil.parse(previous_release_date)
    if labels is None:
        labels = LABELS[project]

    pulls = _get_pull_requests(project)

    lines = DefaultDict(list)
    labelless_pulls = []
    for pull in pulls:
        print(pull.updated_at, pull.merged_at, pull.number, pull.title)
        if pull.updated_at < previous_release_date:
            break
        if not pull.merged or pull.merged_at < previous_release_date:
            continue
        pr_labels = {label.name for label in pull.labels}
        for label in labels:
            if label in pr_labels:
                break
        else:
            labelless_pulls.append(pull)
            label = 'enhancement'
        lines[label].append((pull.closed_at, _compose_note(pull)))

    # Warn about label-less PR:
    _list_labelless_pulls(labelless_pulls)

    # Create sections
    sections = [
        VERSION[project],
        '-' * len(VERSION[project]),
        '',
        "This release includes",
    ]
    for label in labels:
        try:
            entries = sorted(lines[label])  # sort by merge time
        except KeyError:
            continue
        title = TITLES.get(label, label.title())
        sections.append('\n'.join(['', title, '~' * len(title), '']))
        sections.append('\n'.join(entry for _, entry in entries))
    notes = '\n'.join(sections)
    print(notes)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: search.py Proyecto: petrows/checkmk

    def _search(self,
                query: SearchQuery) -> Mapping[str, Iterable[SearchResult]]:
        if not IndexBuilder.index_is_built(self._redis_client):
            build_index_background()
            raise IndexNotFoundException

        query_preprocessed = f"*{query.lower().replace(' ', '*')}*"
        results: DefaultDict[str, List[SearchResult]] = DefaultDict(list)

        self._search_redis(
            query_preprocessed,
            IndexBuilder.key_categories(
                IndexBuilder.PREFIX_LOCALIZATION_INDEPENDENT),
            IndexBuilder.PREFIX_LOCALIZATION_INDEPENDENT,
            results,
        )
        self._search_redis(
            query_preprocessed,
            IndexBuilder.key_categories(
                IndexBuilder.PREFIX_LOCALIZATION_DEPENDENT),
            IndexBuilder.add_to_prefix(
                IndexBuilder.PREFIX_LOCALIZATION_DEPENDENT,
                self._current_language,
            ),
            results,
        )

        return results

Ejemplo n.º 28

0

Mostrar archivo

async def main() -> None:
    services_with_actions: DefaultDict[str, Set[str]] = DefaultDict(set)
    service_names: Dict[str, str] = {}
    service_page_responses = await collect_service_info()

    for r in service_page_responses:
        service_name, service_prefix, actions = await extract_actions(
            html=r.text)
        services_with_actions[service_prefix].update(actions)

        if IGNORED_SERVICE_ALIASES.get(service_name) != service_prefix:
            if (service_prefix in service_names
                    and service_names[service_prefix] != service_name):
                raise ValueError(
                    "Found two different service names for service prefix"
                    f" {service_prefix!r}: {service_names[service_prefix]!r}"
                    f" and {service_name!r}.")
            service_names[service_prefix] = service_name

    original_services_with_actions = await collect_existing_actions()
    for service_prefix, actions in services_with_actions.items():
        actions.update(
            original_services_with_actions.get(service_prefix) or set())

    await asyncio.gather(
        *(write_service(service_prefix, service_names[service_prefix], actions)
          for service_prefix, actions in services_with_actions.items()))

Ejemplo n.º 29

0

Mostrar archivo

Archivo: 41-m.py Proyecto: Cenibee/PYALG

    def findCheapestPrice(self, n: int, flights: List[List[int]], src: int,
                          dst: int, K: int) -> int:
        '''
        [0 ~ n-1] 도시 중 src -> dst 도시를 가는 flights를 통한 경로 중 k 번 이하 거쳐서 갈 수 있는 가장 싼경로
        BFS로 탐색하다가 dst 발견 했을 때 K 보다 작으면 무시 아니면 리턴
        '''
        graph = DefaultDict(list)

        for source, dest, price in flights:
            graph[source].append([price, dest])

        priority_q = [[0, -1, src]]

        while priority_q:
            price, stops, city = heapq.heappop(priority_q)

            if city == dst and stops <= K:
                return price
            elif stops >= K:
                continue

            for next_price, next_city in graph[city]:
                total_price = price + next_price
                heapq.heappush(priority_q, [total_price, stops + 1, next_city])

        return -1

Ejemplo n.º 30

0

Mostrar archivo

    def run(self, data, damping=None, max_iter=None, convergence=None, verbose=None, seed=None):
        
        if False and (data.is_directed()):
            raise Exception("only undirected is supported")
        if seed is not None:self.logger.info("seed ignored")        
        params = locals();del params['self'];del params['data'];del params['seed']
        params = {u:v for u, v in params.items() if v is not None}
        params['affinity'] = 'precomputed'
        params['copy'] = False

        A = convert.to_coo_adjacency_matrix(data, simalarity=True)

        def fun():
            obj = sklearn.cluster.AffinityPropagation(**params)
            return obj.fit_predict(A.toarray())

        timecost, res = utils.timeit(fun)
        
        clusters = DefaultDict(list)
        for i, c in enumerate(res):
            clusters[str(c)].append(i)
        self.logger.info("Made %d clusters in %f seconds" % (len(clusters), timecost))

        result = {}
        result['runname'] = self.name
        result['params'] = params
        result['dataname'] = data.name
        result['meta'] = self.get_meta()
        result['timecost'] = timecost
        result['clusters'] = clusters 

        save_result(result)
        self.result = result 
        return self