def life(rules: collections.defaultdict, state: collections.defaultdict): # print("Life", dict_to_state(state)) mn = min(state.keys()) - 3 mx = max(state.keys()) + 3 # print(mn, '->', mx) d = state_as_dict() for x in range(mn, mx + 2): substate = dict_to_state(state, x - 2, x + 3) result = rules[substate] # print(x, substate, '->', result) if result == '#': d[x] = result return d
def save_dict_to_file(self, my_dict: defaultdict) -> None: try: os.makedirs(TARGET_PATH, exist_ok=True) for code in my_dict.keys(): counter = 1 with open(f'{self.target_path}{code}.txt', mode='w', encoding="UTF-8") as file: for name in my_dict[code]: file.write(f'{counter}. {name}\n') counter += 1 except Exception: print('File write failed !') raise Exception
def month_agg(df: pandas.DataFrame, features: defaultdict): agg = { 'mean': numpy.mean, 'std': numpy.std, 'max': numpy.max, 'median': numpy.median } for raw_feature in features.keys(): if 'month' in raw_feature: for obj_key, obj_value in features[raw_feature].item(): agg_feature = raw_feature.replace('month', 'month_agg') for agg_name, agg_method in agg.items(): features[agg_feature][obj_key][agg_name] = agg_method( numpy.array(obj_value.values()))
def solution(n: int, stars: defaultdict, Tree): tree = Tree([0 for _ in range(400001)]) ys = stars.keys() ys = sorted(ys, reverse=True) faces = 0 for y in ys: for x in stars[y]: left = tree.sum(0, x - 1) right = tree.sum(x + 1, 400000) faces = (faces + ((left % MOD) * (right % MOD))) % MOD for x in stars[y]: tree.update(x, 1) return faces
def select_node(start_node: Any, neighbors: defaultdict) -> Generator[Any, None, None]: node = start_node yield node while len(neighbors) > 1: options = neighbors[node] _remove_from_neighbors(neighbors, node) if len(options) > 0: min_len = min([len(neighbors[option]) for option in options]) node = choice([ option for option in options if len(neighbors[option]) == min_len ]) else: node = choice(list(neighbors.keys())) yield node
def perplexity_lidstone(set_stat: defaultdict, train_stat: defaultdict, lamb, num_all_events): """ Calculate 2^ -(sum(p(w \in W)/ |H|) when using lidstone to calculate probabilities :param set_stat: The set evaluating on :param lamb: the lambda value to use in lidstone :param train_stat: data of the trained model :param num_all_events: number of events in train :return: the perplexity of the model """ log_sum = 0 for w in set_stat.keys(): log_sum += log2(lidstone(w, train_stat, lamb, num_all_events)) * set_stat[w] prep_score = 2**(-(log_sum / total_event_num(set_stat))) return round(prep_score, 2)
def find_ticket_fields(tickets: defaultdict, ticket_rules: defaultdict): """Assigns ticket fields to indexes based on the ticket rules and seen ticket values.""" index_field_map = defaultdict(str) unassigned_fields = set(ticket_rules.keys()) while len(unassigned_fields) != 0: unassigned_tickets = [i for i in tickets if i not in index_field_map] for i in unassigned_tickets: possible_fields = [ field for field in unassigned_fields if tickets[i].issubset(ticket_rules[field]) ] if len(possible_fields) == 0: raise Exception(f"mistake in rule parsing: no possible values for {i}") if len(possible_fields) == 1: found = possible_fields[0] index_field_map[i] = found unassigned_fields.remove(found) return index_field_map
def flatten(tree_: defaultdict, depth: int = 0): """Takes tree dict and returns list of terms with depth values Variation on ptr() from the gist Recursively gets term objects from terms_dict, adds depth, and appends to list of terms. @param tree_: defaultdict Tree @param depth: int Depth of indents """ for key in sorted(tree_.keys()): term = terms_dict[key] term['depth'] = depth terms.append(term) depth += 1 flatten(tree_[key], depth) depth -= 1
def check_possible(t_num_to_indexes: defaultdict, s: List[int]): t_nums = list(t_num_to_indexes.keys()) t_nums.sort(key=lambda num: len(t_num_to_indexes[num]), reverse=True) for target in t_nums: possible = True rotate_count = 0 for num in t_nums: if num == target: continue indexes = t_num_to_indexes[num] if all(s[i] == target for i in indexes): rotate_count += len(indexes) else: possible = False break if possible: return rotate_count
def generate_message(pressed_buttons: set, axes: defaultdict, hats: set): assert all((p in buttonmapping for p in pressed_buttons)) assert all((k in axismapping for k in axes.keys())) assert all((h in hatmapping for h in hats)) buttons_encoded = sum([ 1 << buttonmapping.index(button_name) for button_name in pressed_buttons ]) hat_encoded = hatcodes[sum( [1 << hatmapping.index(d_pad_str) for d_pad_str in hats])] rawaxis_encoded = [axes[axis] for axis in axismapping] axis_encoded = [((0 if abs(x) < axis_deadzone else x) >> 8) + 128 for x in rawaxis_encoded] rawbytes = struct.pack('>BHBBBB', hat_encoded, buttons_encoded, *axis_encoded) return binascii.hexlify(rawbytes) + b'\n'
def trend(df: pandas.DataFrame, features: defaultdict): # use z-score to normalize for feature_name, feature_value in features.keys(): if 'monthly' in feature_name: for obj_key, obj_value in feature_value.items(): coefficient = numpy.vstack( [obj_value.keys(), numpy.ones(len(obj_value.keys()))]).T trend_slope = feature_name.replace('monthly', 'trend_slope') features[trend_slope][obj_key]['slope'] = numpy.linalg.lstsq( coefficient, obj_value.values())[0] trend_bias = feature_name.replace('monthly', 'trend_bias') index, last_month = max(enumerate(obj_value.keys()), key=operator.itemgetter(1)) all_value = numpy.array(obj_value.values()) features[trend_bias][obj_key]['bias'] = stats.zscore( all_value)[index]
def cycle(grid: defaultdict) -> None: mutations = [] for point in list(grid.keys()): bc = 0 for d in dirs.values(): np = Point(point.x + d.x, point.y + d.y, point.z + d.z) if grid[np] == 'black': bc += 1 color = grid[point] if color == 'black' and (bc == 0 or bc > 2): mutations.append(point) elif color == 'white' and bc == 2: mutations.append(point) for p in mutations: grid[p] = 'black' if grid[p] == 'white' else 'white'
def unpack_cooccurrence( cooccurence_dict: defaultdict ) -> Union[np.array, np.array, np.array]: """ :param cooccurence_dict: A dictionary containing the distane between each word. :return: the cooccurence dictionary unpacked. """ first, second, x_ijs = [], [], [] for first_id in cooccurence_dict.keys(): for second_id in cooccurence_dict[first_id].keys(): x_ij = cooccurence_dict[first_id][second_id] # add (main, context) pair first.append(first_id) second.append(second_id) x_ijs.append(x_ij) # add (context, main) pair first.append(second_id) second.append(first_id) x_ijs.append(x_ij) return np.array(first), np.array(second), np.array(x_ijs)
def get_black_neighbors(tiles: defaultdict, coord: Tuple[int, int]) -> int: black_neighbors = [(coord[0] + direction[0], coord[1] + direction[1]) in tiles.keys() for direction in directions.values()] return sum(black_neighbors)
def histogram(word_dict: defaultdict, interactive=False): fig = plt.figure() ax = plt.subplot() words = list(word_dict.keys()) counts = list(word_dict.values()) wc = zip(words, counts) wc = sorted(wc, key=lambda elem: elem[1], reverse=True) words, counts = zip(*wc) bars = plt.bar(words, counts, color='g', tick_label=None) curr_word = ax.annotate("", xy=(0, 0), xytext=(40, 40), textcoords="offset points", arrowprops=dict(arrowstyle="->")) curr_word.set_visible(False) def update_label(label, bar): x = bar.get_x() + bar.get_width() / 2. y = bar.get_y() + bar.get_height() curr_word.xy = (x, y) curr_word.set_text(label) def draw_labels(): for i, bar in enumerate(bars): offset_x = 10 * (i % 20) offset_y = 15 * (i % 20) x = bar.get_x() + bar.get_width() / 2. y = bar.get_y() + bar.get_height() word = ax.annotate(words[i], xy=(x, y), xytext=(40 + offset_x, 40 + offset_y), textcoords="offset points", bbox=dict(boxstyle="round", fc="0.8"), arrowprops=dict(arrowstyle="-", alpha=0.2)) fig.canvas.draw_idle() def show_label_on_plot_hover(event): vis = curr_word.get_visible() hover_on_bar = False for i, bar in enumerate(bars): if bar.contains(event)[0]: hover_on_bar = True update_label(words[i], bar) curr_word.set_visible(True) break if vis and not hover_on_bar: curr_word.set_visible(False) fig.canvas.draw_idle() plt.xlabel('Words') plt.ylabel('Occurences') plt.title('Histogram of words in Ed Stafford: First Man Out') plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) if interactive: fig.canvas.mpl_connect('motion_notify_event', show_label_on_plot_hover) else: draw_labels() plt.show()
def getOrderedKeys(data: defaultdict) -> list: ord_keys = [] for i in sorted(data.keys()): ord_keys.append(i) return ord_keys
class UtilMultiFile(UtilObject): """ Keeps specified number of files opened, for read or write Attributes: mode - read or write maxCount - maximum number of opened files at any given moment hitCount - number of open file hits xactCount - number of transactions (reads or writes) fileList - list of open file names, sorted by the time fileDict - map of file name to a file handle """ def __init__(self, maxCount, mode): self.maxCount = maxCount self.mode = mode self.fileList = [] self.fileDict = {} self.fileCache = DefDict(list) self.hitCount = 0 self.xactCount = 0 def write(self, fileName, line): # Try to cache it first lines = self.fileCache[fileName] lines.append(line) if len(lines) > 100: self.cacheFlush(fileName) def cacheFlush(self, fileName): assert (self.mode[0] in ('w', 'a')) f = self.fileHandle(fileName) for l in self.fileCache[fileName]: try: f.write(l) except IOError as e: print("Could not write to %s: error %d %s" % (fileName, e.errno, e.strerror)) return self.fileCache[fileName] = [] self.xactCount += 1 def fileHandle(self, fileName): if fileName not in self.fileDict: if len(self.fileList) == self.maxCount: oldFileName = self.fileList[0] self.fileDict[oldFileName].close() del self.fileDict[oldFileName] self.fileList = self.fileList[1:] try: f = open(fileName, self.mode) except IOError as e: print("Could not open %s: error %d %s" % (fileName, e.errno, e.strerror)) return None self.fileDict[fileName] = f self.fileList.append(fileName) else: self.hitCount += 1 return self.fileDict[fileName] def closeAll(self): for fileName in self.fileCache.keys(): self.cacheFlush(fileName) for f in self.fileDict.values(): f.close() self.fileDict = {} self.fileList = [] def getStats(self): return "%u hits out of %u transactions: %u%%" % ( self.hitCount, self.xactCount, (100 * self.hitCount / self.xactCount) if self.xactCount else 0)
def print_registration_identifier(hull: defaultdict) -> None: panels = list(hull.keys()) identifier = create_painted_identifier(panels) print_bw_image_to_console(identifier, PANELS.WHITE, PANELS.BLACK)
# Now that we have build dictionary of average distances, let's translate # it into the best reclassified TaxaTypes # First, build dictionary dir -> TaxaTypeTree print ("\nBuilding TaxaType trees...") taxaTypeTree = TaxaTypeTree(set([x.type for x in taxaDict.values()])) UtilStore(taxaTypeTree, TAXA_TYPE_TREE()) taxaDistCntDict = UtilLoad(GENOME_TAX_DIST_CNT_DICT()) # Now find optimal reclassified TaxaTypes, and dump them into a file print("Build reclassification...") reclassObjList = [] dumpDirNodeCostDict = {} for dir in dirTaxaTypeDictDict.keys(): nodeCostDict = taxaTypeTree.bldCostDict(dirTaxaTypeDictDict[dir]) dumpDirNodeCostDict[dir] = taxaTypeTree.utilJsonDump( nodeAttribDict = nodeCostDict) taxaType, cost = taxaTypeTree.optimal(nodeCostDict) dist = taxaDict[dir].type.distance(taxaType) reclassObjList.append(UtilObject(dir = dir, cogCorr = dirCorrDict[dir], oldClassif = taxaDict[dir].type, newClassif = taxaType, taxaDist = dist, cogDist=cost, taxaDistCnts = taxaDistCntDict[dir])) UtilStore(dumpDirNodeCostDict, DIR_NODE_COST_DICT()) reclassObjList = sorted(reclassObjList, key = lambda x: x.cogCorr) UtilStore([x for x in reclassObjList if x.taxaDist > 0], HIER_RECLASSIFIED_LIST())
def get_possible_options(steps_dict: collections.defaultdict, path: str): return (sorted( set(steps_dict.keys()).union( set(ft.reduce(lambda a, b: a + b, steps_dict.values()))).difference(set(path))))
def unique_words(hist: defaultdict): return len(hist.keys())
def convo_totals(imessage_soup: BeautifulSoup, totals_data: defaultdict, word_data: defaultdict, dates_data: defaultdict) -> (dict, dict, dict): # I am embedding these functions into the larger function here because # the alternative would be to pass the data dict each time that I call # the function to calculate a certain statistic if they were defined outside # this function. For example, each time I would like to call get_dates() to # update the dates_data dict, I would need to pass the dates_data dict as an # argument since the get_dates() function would not have access to the dict as # it is in an entirely separate namespace. The issue is that these are fairly # large dictionaries, and passing them several hundred (or several thousand) # times can slow down the program. By defining these functions within the # convo_totals() function, they are in the convo_totals() namespace and can # access and update the data dictionaries without needing them passing through # as arguments to the function, thus making the program a bit more efficient. def get_number_messages(sender: str) -> None: """ Updates the number of messages a person sent""" totals_data[name_conversion[sender]] += 1 def get_number_words(message: str) -> None: """ Updates the number of keywords sent """ if message: buffer = "" for char in message: if char.isalnum(): buffer += char.lower() else: if buffer in words.words: word_data[buffer] += 1 buffer = "" def get_dates(date: str) -> None: """ Updates the number of messages sent in a certain time """ # Each date is passed through in the form MM/DD/YYYY HR:MN:SC month, year = date[:2], date[6:10] # For some reason the iMessage HTML file starts with "-------" # and it is tagged as a data, so this if statement deals with # that case. if not month.startswith('-'): formatted = year + "_" + month dates_data[formatted] += 1 # The way the program gives me the iMessage data is very # weird. It is an HTML file, but the way they distinguish between whoever sent # a message is by giving one the class triangle-isosceles and the other the # class triangle-isosceles2, so that is what those represent. However, our # totals_data dict doesn't care about that, since it uses actual names to # store data. However, one of the keys in the dictionary will ALWAYS be my # full name, as it is ran through the Facebook Messenger algorithm first, # which stores my name as a dict key. Using this, we can simply use a variable # to store the value of the other person's name, and use this variable whenever # we need to index/update that person's message count. for person in totals_data.keys(): if person != "Matthew Grosman": other_person = person # Store this information in a dictionary to make it easy to just index in the # get_number_messages() function like we did in the facebook statistics code. name_conversion = { 'triangle-isosceles': other_person, 'triangle-isosceles2': "Matthew Grosman" } # All text messages are stored in a <p> tag, so we search for that, and filter # them down to the ones whose class is either date, triangle-isosceles, or # triangle-isosceles2. The way the program gives me the iMessage data is very # weird. It is an HTML file, but the way they distinguish between whoever sent # a message is by giving one the class triangle-isosceles and the other the # class triangle-isosceles2, so that is what those represent. The date class # is simply the date and time that a message was sent. All other classes were # irrelevant to this data collect and need not be considered. The 'message' # variable that we are assigning into each iteration is a Tag type from the # BeautifulSoup module. for message in imessage_soup.find_all( 'p', attrs={ 'class': ['date', 'triangle-isosceles', 'triangle-isosceles2'] }): content = message.string # message['class'][0] represents the class. We need to use indexing since # message['class'] is technically a list, although it only contains one # item, at least in the context of this project and my testing. if message['class'][0] == 'date': get_dates(content) else: get_number_messages(message['class'][0]) get_number_words(content) # Sort dates_data so the excel spreadsheet is nicely formatted dates_data = {k: v for k, v in sorted(dates_data.items())} return totals_data, dates_data, words.sort_dict(word_data)
def get_first_step(steps_dict: collections.defaultdict) -> str: return sorted( set(steps_dict.keys()).difference( set(ft.reduce(lambda a, b: a + b, steps_dict.values()))))[0]
class UtilMultiFile(UtilObject): """ Keeps specified number of files opened, for read or write Attributes: mode - read or write maxCount - maximum number of opened files at any given moment hitCount - number of open file hits xactCount - number of transactions (reads or writes) fileList - list of open file names, sorted by the time fileDict - map of file name to a file handle """ def __init__(self, maxCount, mode): self.maxCount = maxCount self.mode = mode self.fileList = [] self.fileDict = {} self.fileCache = DefDict(list) self.hitCount = 0 self.xactCount = 0 def write(self, fileName, line): # Try to cache it first lines = self.fileCache[fileName] lines.append(line) if len(lines) > 100: self.cacheFlush(fileName) def cacheFlush(self, fileName): assert(self.mode[0] in ('w', 'a')) f = self.fileHandle(fileName) for l in self.fileCache[fileName]: try: f.write(l) except IOError as e: print("Could not write to %s: error %d %s" % ( fileName, e.errno, e.strerror)) return self.fileCache[fileName] = [] self.xactCount += 1 def fileHandle(self, fileName): if fileName not in self.fileDict: if len(self.fileList) == self.maxCount: oldFileName = self.fileList[0] self.fileDict[oldFileName].close() del self.fileDict[oldFileName] self.fileList = self.fileList[1:] try: f = open(fileName, self.mode) except IOError as e: print("Could not open %s: error %d %s" % (fileName, e.errno, e.strerror)) return None self.fileDict[fileName] = f self.fileList.append(fileName) else: self.hitCount += 1 return self.fileDict[fileName] def closeAll(self): for fileName in self.fileCache.keys(): self.cacheFlush(fileName) for f in self.fileDict.values(): f.close() self.fileDict = {} self.fileList = [] def getStats(self): return "%u hits out of %u transactions: %u%%" % (self.hitCount, self.xactCount, (100 * self.hitCount / self.xactCount) if self.xactCount else 0)
def dict_to_state(data: collections.defaultdict, mn=None, mx=None): if mn is None: mn = min(data.keys()) - 1 if mx is None: mx = max(data.keys()) + 2 return ''.join((data[x] if x in data else '.' for x in range(mn, mx)))
def compareDicts(words1: defaultdict, words2: defaultdict): print(len(set(words1.keys()).intersection(set( words2.keys())))) # O(1) + O(len(m)) + O(len(m)+len(n) + O(len(n))
def usingGraphs(self, d: defaultdict): print(" graph is defaultdict -> ", d) print(" keys -> ", type(d.keys()), " d.get(a) -> ", d.get('a')) # get the edges for key in d.keys(): print(" graph key -> ", key, "graphs edges -> ", d[key])