Example #1
0
def _compress_data(data, dictionary, *, initial_phrase, compression_end):
    result = list()
    code_length = len(util.to_binary(len(dictionary.keys())))
    phrase = initial_phrase

    for ch in data:
        phrase += ch
        if not (phrase in dictionary.keys()):
            result.append(
                util.extend_to_length(dictionary[phrase[:-1]],
                                      code_length,
                                      ending_bit=_zero_bit))
            # print('stream', phrase[:-1], ':', utilities.extend_to_length(dictionary[phrase[:-1]], code_length))
            # print('phrase:', phrase)

            if _is_power_of_two(len(dictionary.keys())):
                code_length += 1

            dictionary_length_binary = util.to_binary(len(dictionary.keys()))
            dictionary[phrase] = dictionary_length_binary
            # print(phrase, '->', dictionary_length_binary)

            phrase = phrase[-1]

    if compression_end:
        result.append(
            util.extend_to_length(dictionary[phrase],
                                  code_length,
                                  ending_bit=_zero_bit))

    return _empty_str.join(result), phrase
Example #2
0
 def translate_a(self, parser):
     '''
     Generate binary codes for A instructions.
     '''
     assert(parser.commandType() == parser.A_COMMAND)
     smb = parser.symbol()
     if not smb.isdigit():
         if not self.symbol_table.contains(smb):
             self.symbol_table.addEntry(smb, self.symbol_address)
             self.symbol_address += 1
         smb = self.symbol_table.getAddress(smb)
     return "0" + to_binary(smb).zfill(15)
Example #3
0
def _omega_code(number):
    _ensure_correct_number(number)
    result = list()
    result.append(_zero_bit)

    current_value = number
    while not (current_value == 1):
        value_bits = util.to_binary(current_value)
        result.append(value_bits)
        current_value = len(value_bits) - 1

    result.reverse()
    return _empty_str.join(result)
Example #4
0
def main(df,
         random_state,
         test_size,
         first_star,
         second_star,
         state="Given Data"):
    """
    Runs the complete cycle and builds a model. Takes a dataframe, 
    tokenizes, clean, vectorize and return model
    :param df: Pandas dataframe
    :param random_state: int
    :param test_size: int, split size for test
    :param first_star: int, first star rating
    :param second_star: int, second star rating
    :param state: string, state initials
    :return: logistic regression model object
    """
    if state:
        df = df[df["state"] == state]
    df = df[df.stars_rev.isin([first_star, second_star])]
    df = df[["text", "stars_rev"]].astype(str)

    df = util.to_binary(df, str(first_star), str(second_star))
    corpus = df["text"].values  #list of reviews
    y = df["stars_rev"].values  #target

    cleaned = util.clean_stem(corpus)

    X_train, X_test, y_train, \
            y_test = train_test_split(cleaned, y, test_size=test_size,
                                      random_state=random_state)

    model = ReviewClassifier()  #instantiating model
    model.fit(X_train, y_train)

    #Estimating metrics
    matrix, recall, precision, accuracy = model.metrics_eval(X_test, y_test)
    print("Number of reviews: {}".format(len(corpus)), "\n")
    print("Confusion matrix")
    print(matrix, "\n")
    print("Recall: {}%".format(round(recall * 100, 2)))
    print("Precision: {}%".format(round(precision * 100, 2)))
    print("Accuracy: {}%".format(round(accuracy * 100, 2)))

    return model
Example #5
0
def _generate_dictionary():
    result = {chr(i): util.to_binary(i) for i in range(1, _dictionary_length)}
    result[-1] = '0'
    return result
Example #6
0
def _decompress_data(bits, dictionary, reversed_dictionary, *, initial_phrase):
    result = list()
    code_length = len(util.to_binary(len(dictionary.keys())))
    # print('dictionary length', len(dictionary.keys()))
    # print()
    i = 0

    if initial_phrase == _empty_str:
        chunk = bits[:code_length]
        decompressed_chunk = reversed_dictionary[_remove_leading_zeros(chunk)]
        result.append(decompressed_chunk)
        # print('chunk', chunk)
        # print('decompressed chunk', decompressed_chunk)
        # print()

        phrase = decompressed_chunk

        i = code_length
        if _is_power_of_two(len(dictionary.keys())):
            code_length += 1
    else:
        phrase = initial_phrase

    # print('dictionary length', len(dictionary.keys()))
    # print()

    while i + code_length <= len(bits):
        chunk = bits[i:i + code_length]
        if _one_bit in chunk:

            if _remove_leading_zeros(chunk) in reversed_dictionary.keys():
                decompressed_chunk = reversed_dictionary[_remove_leading_zeros(
                    chunk)]
            else:
                decompressed_chunk = phrase + phrase[0]  # special case

            # print('chunk', chunk)
            # print('decompressed chunk', decompressed_chunk)

            dict_element = phrase + decompressed_chunk[0]

            if not (dict_element in dictionary.keys()):
                result.append(decompressed_chunk)

                # print('{} -> {}'.format(dict_element, util.to_binary(len(dictionary.keys()))))
                # print()
                dictionary[dict_element] = util.to_binary(
                    len(dictionary.keys()))
                reversed_dictionary[util.to_binary(
                    len(reversed_dictionary.keys()))] = dict_element

                phrase = decompressed_chunk
            else:
                phrase += decompressed_chunk

            i += code_length
            if _is_power_of_two(len(dictionary.keys())):
                code_length += 1
        else:
            break

    joined_result = _empty_str.join(result)
    rest_bits = bits[i:]

    return joined_result, rest_bits, phrase
    'Supportive Environment Rating', 'Effective School Leadership Rating',
    'Strong Family-Community Ties Rating', 'Trust Rating',
    'Student Achievement Rating'
]

# Convert Yes/No to to 1/0
binary_columns = ['Community School?']

for col in percent_columns:
    se_2016_renamed[col] = util.pct_to_number(se_2016_renamed, col)
for col in money_columns:
    se_2016_renamed[col] = util.money_to_number(se_2016_renamed, col)
for col in rating_columns:
    se_2016_renamed[col] = util.rating_to_number(se_2016_renamed, col)
for col in binary_columns:
    se_2016_renamed[col] = util.to_binary(se_2016_renamed, col)

se_2016_renamed.columns = [
    util.sanitize_column_names(c) for c in se_2016_renamed.columns
]
se_2016_renamed.head()

# In[21]:

plt.hist(
    se_2016_renamed.loc[se_2016_renamed['average_ela_proficiency'].notnull(),
                        'average_ela_proficiency'],
    bins=20)
plt.show()
plt.hist(
    se_2016_renamed.loc[se_2016_renamed['average_math_proficiency'].notnull(),
Example #8
0
def _delta_code(number):
    _ensure_correct_number(number)
    bits = util.to_binary(number)
    return _gamma_code(len(bits)) + bits[1:]
Example #9
0
def _gamma_code(number):
    _ensure_correct_number(number)
    bits = util.to_binary(number)
    return _zero_bit * (len(bits) - 1) + bits
Example #10
0
 def jump(self, mnemonic):
     '''
     Translate jump code into binary.
     '''
     assert (mnemonic in self._jump_list)
     return to_binary(self._jump_list.index(mnemonic)).zfill(3)
Example #11
0
 def dest(self, mnemonic):
     '''
     Translate dest code into binary.
     '''
     assert (mnemonic in self._dest_list)
     return to_binary(self._dest_list.index(mnemonic)).zfill(3)
Example #12
0
 def jump(self, mnemonic):
     '''
     Translate jump code into binary.
     '''
     assert(mnemonic in self._jump_list)
     return to_binary(self._jump_list.index(mnemonic)).zfill(3)
Example #13
0
 def dest(self, mnemonic):
     '''
     Translate dest code into binary.
     '''
     assert(mnemonic in self._dest_list)
     return to_binary(self._dest_list.index(mnemonic)).zfill(3)