Exemple #1
0
def life(rules: collections.defaultdict, state: collections.defaultdict):
    # print("Life", dict_to_state(state))
    mn = min(state.keys()) - 3
    mx = max(state.keys()) + 3
    # print(mn, '->', mx)
    d = state_as_dict()
    for x in range(mn, mx + 2):
        substate = dict_to_state(state, x - 2, x + 3)
        result = rules[substate]
        # print(x, substate, '->', result)
        if result == '#':
            d[x] = result
    return d
Exemple #2
0
    def save_dict_to_file(self, my_dict: defaultdict) -> None:
        try:
            os.makedirs(TARGET_PATH, exist_ok=True)
            for code in my_dict.keys():
                counter = 1
                with open(f'{self.target_path}{code}.txt',
                          mode='w',
                          encoding="UTF-8") as file:
                    for name in my_dict[code]:
                        file.write(f'{counter}. {name}\n')
                        counter += 1

        except Exception:
            print('File write failed !')
            raise Exception
Exemple #3
0
def month_agg(df: pandas.DataFrame, features: defaultdict):
    agg = {
        'mean': numpy.mean,
        'std': numpy.std,
        'max': numpy.max,
        'median': numpy.median
    }

    for raw_feature in features.keys():
        if 'month' in raw_feature:
            for obj_key, obj_value in features[raw_feature].item():
                agg_feature = raw_feature.replace('month', 'month_agg')
                for agg_name, agg_method in agg.items():
                    features[agg_feature][obj_key][agg_name] = agg_method(
                        numpy.array(obj_value.values()))
def solution(n: int, stars: defaultdict, Tree):
    tree = Tree([0 for _ in range(400001)])
    ys = stars.keys()
    ys = sorted(ys, reverse=True)
    faces = 0

    for y in ys:
        for x in stars[y]:
            left = tree.sum(0, x - 1)
            right = tree.sum(x + 1, 400000)
            faces = (faces + ((left % MOD) * (right % MOD))) % MOD
        for x in stars[y]:
            tree.update(x, 1)

    return faces
Exemple #5
0
def select_node(start_node: Any,
                neighbors: defaultdict) -> Generator[Any, None, None]:
    node = start_node
    yield node
    while len(neighbors) > 1:
        options = neighbors[node]
        _remove_from_neighbors(neighbors, node)
        if len(options) > 0:
            min_len = min([len(neighbors[option]) for option in options])
            node = choice([
                option for option in options
                if len(neighbors[option]) == min_len
            ])
        else:
            node = choice(list(neighbors.keys()))
        yield node
Exemple #6
0
def perplexity_lidstone(set_stat: defaultdict, train_stat: defaultdict, lamb,
                        num_all_events):
    """
    Calculate 2^ -(sum(p(w \in W)/ |H|) when using lidstone to calculate probabilities
    :param set_stat: The set evaluating on
    :param lamb: the lambda value to use in lidstone
    :param train_stat: data of the trained model
    :param num_all_events: number of events in train
    :return: the perplexity of the model
    """
    log_sum = 0
    for w in set_stat.keys():
        log_sum += log2(lidstone(w, train_stat, lamb,
                                 num_all_events)) * set_stat[w]
    prep_score = 2**(-(log_sum / total_event_num(set_stat)))
    return round(prep_score, 2)
Exemple #7
0
def find_ticket_fields(tickets: defaultdict, ticket_rules: defaultdict):
    """Assigns ticket fields to indexes based on the ticket rules and seen ticket values."""
    index_field_map = defaultdict(str)
    unassigned_fields = set(ticket_rules.keys())
    while len(unassigned_fields) != 0:
        unassigned_tickets = [i for i in tickets if i not in index_field_map]
        for i in unassigned_tickets:
            possible_fields = [
                field for field in unassigned_fields if tickets[i].issubset(ticket_rules[field])
            ]
            if len(possible_fields) == 0:
                raise Exception(f"mistake in rule parsing: no possible values for {i}")
            if len(possible_fields) == 1:
                found = possible_fields[0]
                index_field_map[i] = found
                unassigned_fields.remove(found)
    return index_field_map
Exemple #8
0
 def flatten(tree_: defaultdict, depth: int = 0):
     """Takes tree dict and returns list of terms with depth values
     
     Variation on ptr() from the gist
     Recursively gets term objects from terms_dict, adds depth,
     and appends to list of terms.
     
     @param tree_: defaultdict Tree
     @param depth: int Depth of indents
     """
     for key in sorted(tree_.keys()):
         term = terms_dict[key]
         term['depth'] = depth
         terms.append(term)
         depth += 1
         flatten(tree_[key], depth)
         depth -= 1
 def check_possible(t_num_to_indexes: defaultdict, s: List[int]):
     t_nums = list(t_num_to_indexes.keys())
     t_nums.sort(key=lambda num: len(t_num_to_indexes[num]),
                 reverse=True)
     for target in t_nums:
         possible = True
         rotate_count = 0
         for num in t_nums:
             if num == target:
                 continue
             indexes = t_num_to_indexes[num]
             if all(s[i] == target for i in indexes):
                 rotate_count += len(indexes)
             else:
                 possible = False
                 break
         if possible:
             return rotate_count
Exemple #10
0
def generate_message(pressed_buttons: set, axes: defaultdict, hats: set):
    assert all((p in buttonmapping for p in pressed_buttons))
    assert all((k in axismapping for k in axes.keys()))
    assert all((h in hatmapping for h in hats))

    buttons_encoded = sum([
        1 << buttonmapping.index(button_name)
        for button_name in pressed_buttons
    ])
    hat_encoded = hatcodes[sum(
        [1 << hatmapping.index(d_pad_str) for d_pad_str in hats])]

    rawaxis_encoded = [axes[axis] for axis in axismapping]
    axis_encoded = [((0 if abs(x) < axis_deadzone else x) >> 8) + 128
                    for x in rawaxis_encoded]
    rawbytes = struct.pack('>BHBBBB', hat_encoded, buttons_encoded,
                           *axis_encoded)
    return binascii.hexlify(rawbytes) + b'\n'
Exemple #11
0
def trend(df: pandas.DataFrame, features: defaultdict):
    # use z-score to normalize
    for feature_name, feature_value in features.keys():
        if 'monthly' in feature_name:
            for obj_key, obj_value in feature_value.items():
                coefficient = numpy.vstack(
                    [obj_value.keys(),
                     numpy.ones(len(obj_value.keys()))]).T
                trend_slope = feature_name.replace('monthly', 'trend_slope')
                features[trend_slope][obj_key]['slope'] = numpy.linalg.lstsq(
                    coefficient, obj_value.values())[0]

                trend_bias = feature_name.replace('monthly', 'trend_bias')
                index, last_month = max(enumerate(obj_value.keys()),
                                        key=operator.itemgetter(1))
                all_value = numpy.array(obj_value.values())
                features[trend_bias][obj_key]['bias'] = stats.zscore(
                    all_value)[index]
Exemple #12
0
def cycle(grid: defaultdict) -> None:
    mutations = []
    for point in list(grid.keys()):
        bc = 0
        for d in dirs.values():
            np = Point(point.x + d.x, point.y + d.y, point.z + d.z)
            if grid[np] == 'black':
                bc += 1

        color = grid[point]

        if color == 'black' and (bc == 0 or bc > 2):
            mutations.append(point)
        elif color == 'white' and bc == 2:
            mutations.append(point)

    for p in mutations:
        grid[p] = 'black' if grid[p] == 'white' else 'white'
Exemple #13
0
def unpack_cooccurrence(
    cooccurence_dict: defaultdict
) -> Union[np.array, np.array, np.array]:
    """
    :param cooccurence_dict: A dictionary containing the distane between each word.
    :return: the cooccurence dictionary unpacked.
    """
    first, second, x_ijs = [], [], []
    for first_id in cooccurence_dict.keys():
        for second_id in cooccurence_dict[first_id].keys():
            x_ij = cooccurence_dict[first_id][second_id]
            # add (main, context) pair
            first.append(first_id)
            second.append(second_id)
            x_ijs.append(x_ij)
            # add (context, main) pair
            first.append(second_id)
            second.append(first_id)
            x_ijs.append(x_ij)

    return np.array(first), np.array(second), np.array(x_ijs)
Exemple #14
0
def get_black_neighbors(tiles: defaultdict, coord: Tuple[int, int]) -> int:
    black_neighbors = [(coord[0] + direction[0], coord[1] + direction[1])
                       in tiles.keys() for direction in directions.values()]

    return sum(black_neighbors)
Exemple #15
0
def histogram(word_dict: defaultdict, interactive=False):
    fig = plt.figure()
    ax = plt.subplot()
    words = list(word_dict.keys())
    counts = list(word_dict.values())
    wc = zip(words, counts)
    wc = sorted(wc, key=lambda elem: elem[1], reverse=True)
    words, counts = zip(*wc)
    bars = plt.bar(words, counts, color='g', tick_label=None)

    curr_word = ax.annotate("",
                            xy=(0, 0),
                            xytext=(40, 40),
                            textcoords="offset points",
                            arrowprops=dict(arrowstyle="->"))
    curr_word.set_visible(False)

    def update_label(label, bar):
        x = bar.get_x() + bar.get_width() / 2.
        y = bar.get_y() + bar.get_height()
        curr_word.xy = (x, y)
        curr_word.set_text(label)

    def draw_labels():
        for i, bar in enumerate(bars):
            offset_x = 10 * (i % 20)
            offset_y = 15 * (i % 20)
            x = bar.get_x() + bar.get_width() / 2.
            y = bar.get_y() + bar.get_height()
            word = ax.annotate(words[i],
                               xy=(x, y),
                               xytext=(40 + offset_x, 40 + offset_y),
                               textcoords="offset points",
                               bbox=dict(boxstyle="round", fc="0.8"),
                               arrowprops=dict(arrowstyle="-", alpha=0.2))
        fig.canvas.draw_idle()

    def show_label_on_plot_hover(event):
        vis = curr_word.get_visible()
        hover_on_bar = False
        for i, bar in enumerate(bars):
            if bar.contains(event)[0]:
                hover_on_bar = True
                update_label(words[i], bar)
                curr_word.set_visible(True)
                break
        if vis and not hover_on_bar:
            curr_word.set_visible(False)
        fig.canvas.draw_idle()

    plt.xlabel('Words')
    plt.ylabel('Occurences')
    plt.title('Histogram of words in Ed Stafford: First Man Out')
    plt.tick_params(axis='x',
                    which='both',
                    bottom=False,
                    top=False,
                    labelbottom=False)
    if interactive:
        fig.canvas.mpl_connect('motion_notify_event', show_label_on_plot_hover)
    else:
        draw_labels()
    plt.show()
Exemple #16
0
 def getOrderedKeys(data: defaultdict) -> list:
     ord_keys = []
     for i in sorted(data.keys()):
         ord_keys.append(i)
     return ord_keys
Exemple #17
0
class UtilMultiFile(UtilObject):
    """
    Keeps specified number of files opened, for read or write
    Attributes:
        mode - read or write
        maxCount - maximum number of opened files at any given moment
        hitCount - number of open file hits
        xactCount - number of transactions (reads or writes)
        fileList - list of open file names, sorted by the time
        fileDict - map of file name to a file handle
    """
    def __init__(self, maxCount, mode):
        self.maxCount = maxCount
        self.mode = mode
        self.fileList = []
        self.fileDict = {}
        self.fileCache = DefDict(list)
        self.hitCount = 0
        self.xactCount = 0

    def write(self, fileName, line):
        # Try to cache it first
        lines = self.fileCache[fileName]
        lines.append(line)
        if len(lines) > 100:
            self.cacheFlush(fileName)

    def cacheFlush(self, fileName):
        assert (self.mode[0] in ('w', 'a'))
        f = self.fileHandle(fileName)
        for l in self.fileCache[fileName]:
            try:
                f.write(l)
            except IOError as e:
                print("Could not write to %s: error %d %s" %
                      (fileName, e.errno, e.strerror))
                return
        self.fileCache[fileName] = []
        self.xactCount += 1

    def fileHandle(self, fileName):
        if fileName not in self.fileDict:
            if len(self.fileList) == self.maxCount:
                oldFileName = self.fileList[0]
                self.fileDict[oldFileName].close()
                del self.fileDict[oldFileName]
                self.fileList = self.fileList[1:]
            try:
                f = open(fileName, self.mode)
            except IOError as e:
                print("Could not open %s: error %d %s" %
                      (fileName, e.errno, e.strerror))
                return None
            self.fileDict[fileName] = f
            self.fileList.append(fileName)
        else:
            self.hitCount += 1
        return self.fileDict[fileName]

    def closeAll(self):
        for fileName in self.fileCache.keys():
            self.cacheFlush(fileName)
        for f in self.fileDict.values():
            f.close()
        self.fileDict = {}
        self.fileList = []

    def getStats(self):
        return "%u hits out of %u transactions: %u%%" % (
            self.hitCount, self.xactCount,
            (100 * self.hitCount / self.xactCount) if self.xactCount else 0)
Exemple #18
0
def print_registration_identifier(hull: defaultdict) -> None:
    panels = list(hull.keys())
    identifier = create_painted_identifier(panels)
    print_bw_image_to_console(identifier, PANELS.WHITE, PANELS.BLACK)
# Now that we have build dictionary of average distances, let's translate
# it into the best reclassified TaxaTypes

# First, build dictionary dir -> TaxaTypeTree
print ("\nBuilding TaxaType trees...")
taxaTypeTree = TaxaTypeTree(set([x.type for x in taxaDict.values()]))
UtilStore(taxaTypeTree, TAXA_TYPE_TREE())

taxaDistCntDict = UtilLoad(GENOME_TAX_DIST_CNT_DICT())

# Now find optimal reclassified TaxaTypes, and dump them into a file
print("Build reclassification...")
reclassObjList = []
dumpDirNodeCostDict = {}
for dir in dirTaxaTypeDictDict.keys():
    nodeCostDict = taxaTypeTree.bldCostDict(dirTaxaTypeDictDict[dir])
    dumpDirNodeCostDict[dir] = taxaTypeTree.utilJsonDump(
        nodeAttribDict = nodeCostDict)
    taxaType, cost = taxaTypeTree.optimal(nodeCostDict)
    dist = taxaDict[dir].type.distance(taxaType)
    reclassObjList.append(UtilObject(dir = dir, cogCorr = dirCorrDict[dir],
        oldClassif = taxaDict[dir].type, newClassif = taxaType,
        taxaDist = dist, cogDist=cost, taxaDistCnts = taxaDistCntDict[dir]))

UtilStore(dumpDirNodeCostDict, DIR_NODE_COST_DICT())

reclassObjList = sorted(reclassObjList, key = lambda x: x.cogCorr)

UtilStore([x for x in reclassObjList if x.taxaDist > 0],
          HIER_RECLASSIFIED_LIST())
def get_possible_options(steps_dict: collections.defaultdict, path: str):
    return (sorted(
        set(steps_dict.keys()).union(
            set(ft.reduce(lambda a, b: a + b,
                          steps_dict.values()))).difference(set(path))))
def unique_words(hist: defaultdict):
    return len(hist.keys())
def convo_totals(imessage_soup: BeautifulSoup, totals_data: defaultdict,
                 word_data: defaultdict,
                 dates_data: defaultdict) -> (dict, dict, dict):

    # I am embedding these functions into the larger function here because
    # the alternative would be to pass the data dict each time that I call
    # the function to calculate a certain statistic if they were defined outside
    # this function. For example, each time I would like to call get_dates() to
    # update the dates_data dict, I would need to pass the dates_data dict as an
    # argument since the get_dates() function would not have access to the dict as
    # it is in an entirely separate namespace. The issue is that these are fairly
    # large dictionaries, and passing them several hundred (or several thousand)
    # times can slow down the program. By defining these functions within the
    # convo_totals() function, they are in the convo_totals() namespace and can
    # access and update the data dictionaries without needing them passing through
    # as arguments to the function, thus making the program a bit more efficient.

    def get_number_messages(sender: str) -> None:
        """ Updates the number of messages a person sent"""
        totals_data[name_conversion[sender]] += 1

    def get_number_words(message: str) -> None:
        """ Updates the number of keywords sent """
        if message:
            buffer = ""

            for char in message:
                if char.isalnum():
                    buffer += char.lower()
                else:
                    if buffer in words.words:
                        word_data[buffer] += 1
                    buffer = ""

    def get_dates(date: str) -> None:
        """ Updates the number of messages sent in a certain time """
        # Each date is passed through in the form MM/DD/YYYY HR:MN:SC
        month, year = date[:2], date[6:10]

        # For some reason the iMessage HTML file starts with "-------"
        # and it is tagged as a data, so this if statement deals with
        # that case.
        if not month.startswith('-'):
            formatted = year + "_" + month
            dates_data[formatted] += 1

    # The way the program gives me the iMessage data is very
    # weird. It is an HTML file, but the way they distinguish between whoever sent
    # a message is by giving one the class triangle-isosceles and the other the
    # class triangle-isosceles2, so that is what those represent. However, our
    # totals_data dict doesn't care about that, since it uses actual names to
    # store data. However, one of the keys in the dictionary will ALWAYS be my
    # full name, as it is ran through the Facebook Messenger algorithm first,
    # which stores my name as a dict key. Using this, we can simply use a variable
    # to store the value of the other person's name, and use this variable whenever
    # we need to index/update that person's message count.
    for person in totals_data.keys():
        if person != "Matthew Grosman":
            other_person = person

    # Store this information in a dictionary to make it easy to just index in the
    # get_number_messages() function like we did in the facebook statistics code.
    name_conversion = {
        'triangle-isosceles': other_person,
        'triangle-isosceles2': "Matthew Grosman"
    }

    # All text messages are stored in a <p> tag, so we search for that, and filter
    # them down to the ones whose class is either date, triangle-isosceles, or
    # triangle-isosceles2. The way the program gives me the iMessage data is very
    # weird. It is an HTML file, but the way they distinguish between whoever sent
    # a message is by giving one the class triangle-isosceles and the other the
    # class triangle-isosceles2, so that is what those represent. The date class
    # is simply the date and time that a message was sent. All other classes were
    # irrelevant to this data collect and need not be considered. The 'message'
    # variable that we are assigning into each iteration is a Tag type from the
    # BeautifulSoup module.
    for message in imessage_soup.find_all(
            'p',
            attrs={
                'class': ['date', 'triangle-isosceles', 'triangle-isosceles2']
            }):
        content = message.string

        # message['class'][0] represents the class. We need to use indexing since
        # message['class'] is technically a list, although it only contains one
        # item, at least in the context of this project and my testing.
        if message['class'][0] == 'date':
            get_dates(content)

        else:
            get_number_messages(message['class'][0])
            get_number_words(content)

    # Sort dates_data so the excel spreadsheet is nicely formatted
    dates_data = {k: v for k, v in sorted(dates_data.items())}
    return totals_data, dates_data, words.sort_dict(word_data)
def get_first_step(steps_dict: collections.defaultdict) -> str:
    return sorted(
        set(steps_dict.keys()).difference(
            set(ft.reduce(lambda a, b: a + b, steps_dict.values()))))[0]
Exemple #24
0
class UtilMultiFile(UtilObject):
    """
    Keeps specified number of files opened, for read or write
    Attributes:
        mode - read or write
        maxCount - maximum number of opened files at any given moment
        hitCount - number of open file hits
        xactCount - number of transactions (reads or writes)
        fileList - list of open file names, sorted by the time
        fileDict - map of file name to a file handle
    """

    def __init__(self, maxCount, mode):
        self.maxCount = maxCount
        self.mode = mode
        self.fileList = []
        self.fileDict = {}
        self.fileCache = DefDict(list)
        self.hitCount = 0
        self.xactCount = 0

    def write(self, fileName, line):
        # Try to cache it first
        lines = self.fileCache[fileName]
        lines.append(line)
        if len(lines) > 100:
            self.cacheFlush(fileName)

    def cacheFlush(self, fileName):
        assert(self.mode[0] in ('w', 'a'))
        f = self.fileHandle(fileName)
        for l in self.fileCache[fileName]:
            try:
                f.write(l)
            except IOError as e:
                print("Could not write to %s: error %d %s" % (
                    fileName, e.errno, e.strerror))
                return
        self.fileCache[fileName] = []
        self.xactCount += 1

    def fileHandle(self, fileName):
        if fileName not in self.fileDict:
            if len(self.fileList) == self.maxCount:
                oldFileName = self.fileList[0]
                self.fileDict[oldFileName].close()
                del self.fileDict[oldFileName]
                self.fileList = self.fileList[1:]
            try:
                f = open(fileName, self.mode)
            except IOError as e:
                print("Could not open %s: error %d %s" % (fileName, e.errno,
                                                          e.strerror))
                return None
            self.fileDict[fileName] = f
            self.fileList.append(fileName)
        else:
            self.hitCount += 1
        return self.fileDict[fileName]

    def closeAll(self):
        for fileName in self.fileCache.keys():
            self.cacheFlush(fileName)
        for f in self.fileDict.values():
            f.close()
        self.fileDict = {}
        self.fileList = []

    def getStats(self):
        return "%u hits out of %u transactions: %u%%" % (self.hitCount,
                self.xactCount, (100 * self.hitCount / self.xactCount) if
                self.xactCount else 0)
Exemple #25
0
def dict_to_state(data: collections.defaultdict, mn=None, mx=None):
    if mn is None:
        mn = min(data.keys()) - 1
    if mx is None:
        mx = max(data.keys()) + 2
    return ''.join((data[x] if x in data else '.' for x in range(mn, mx)))
def compareDicts(words1: defaultdict, words2: defaultdict):
    print(len(set(words1.keys()).intersection(set(
        words2.keys()))))  # O(1) + O(len(m)) + O(len(m)+len(n) + O(len(n))
Exemple #27
0
 def usingGraphs(self, d: defaultdict):
     print(" graph is defaultdict -> ", d)
     print(" keys -> ", type(d.keys()), " d.get(a) -> ", d.get('a'))
     # get the edges
     for key in d.keys():
         print(" graph key -> ", key, "graphs edges -> ", d[key])