def __main__():
    with open('MobyDick.txt') as inputfile:
        f = inputfile.read().upper().translate(None, string.punctuation).translate(None, " ").translate(None, "\n")
    q = 0
    while q < 1:
        q = int(raw_input("Please enter block length q >= 1 "))
    #q = 4
    dicts = functions.block_distribution(q, f)
    functions.entropy(dicts)
    functions.block_index_of_coincidence(q, f)
    #functions.letters_frequencies(f, letters)
    return 0
Beispiel #2
0
    def __init__(self, input_data, random_subset=False):
        """
        Initializes a new Decision Tree Node. If random_subset is True, only chooses features from a random subset,
        newly created at each node.

        :param input_data: pandas data frame
        :param random_subset: boolean
        """
        self.input_data = input_data
        self.n = max(input_data.count())
        self.left, self.right = None, None
        # If you pass trivial input data or fewer than 10 items, just create a leaf
        if fc.entropy(input_data) == 0 or max(input_data.count()) < 10:
            self.leaf = True
            self.decision = None, None, None
            self.information_gain = 0
        else:
            result = fc.best_split(input_data, random_subset)
            feature_type = result[0]
            best_feature = result[1]
            split = result[2]
            self.information_gain = result[3]
            # If no information gain was possible, the node should be a leaf
            if self.information_gain == 0:
                self.leaf = True
                self.decision = None, None, None
            else:
                # Otherwise, node is not a leaf and has a decision type
                self.leaf = False
                self.decision = feature_type, best_feature, split
Beispiel #3
0
    def build_tree(self, X, y, feature_indices, depth):
       

        if depth is self.max_depth or len(y) < self.min_split or entropy(y) is 0:
            return mode(y)[0][0]
        
        feature_index, threshold = find_split(X, y, feature_indices)

        X_true, y_true, X_false, y_false = split(X, y, feature_index, threshold)
        if y_true.shape[0] is 0 or y_false.shape[0] is 0:
            return mode(y)[0][0]
        
        branch_true = self.build_tree(X_true, y_true, feature_indices, depth + 1)
        branch_false = self.build_tree(X_false, y_false, feature_indices, depth + 1)

        return Node(feature_index, threshold, branch_true, branch_false)
Beispiel #4
0
 def __init__(self, byteSeq):
     self.hash = hashlib.sha1(byteSeq)
     self.entropy = functions.entropy(byteSeq)
     self.length = len(byteSeq)
Beispiel #5
0
 window = Sg.Window('Lab №2, Hafman\'s code', layout)
 dict_coded = {}
 while True:  # The Event Loop
     event, values = window.read()
     try:
         if event in (None, 'Exit', 'Cancel'):
             break
         elif event == "Code text":
             if values[0] and values[1]:
                 p, a = input.input_probability_and_alphabet(values[0])
                 text = input.input_text(values[1])
                 dict_coded = f.hafman(p, a)
                 outInfo.update(
                     char_out.format(
                         dict_coded,
                         f.entropy(p),
                         f.redundancy(p),
                         f.average_codeword(dict_coded.values()),
                         f.crafting_inequality(dict_coded.values()),
                     ))
                 text = f.code_text(text, dict_coded)
                 outCodedText.update(text)
                 output.output("ResultCode.txt", text)
         elif event == "Decode text":
             with open("ResultCode.txt", 'r') as result_code:
                 text = result_code.read()
             text = f.decode_text(text, dict_coded)
             outDecodedText.update(text)
             output.output("ResultDecode.txt", text)
         else:
             print("Choose files")