def _compress_data(data, dictionary, *, initial_phrase, compression_end): result = list() code_length = len(util.to_binary(len(dictionary.keys()))) phrase = initial_phrase for ch in data: phrase += ch if not (phrase in dictionary.keys()): result.append( util.extend_to_length(dictionary[phrase[:-1]], code_length, ending_bit=_zero_bit)) # print('stream', phrase[:-1], ':', utilities.extend_to_length(dictionary[phrase[:-1]], code_length)) # print('phrase:', phrase) if _is_power_of_two(len(dictionary.keys())): code_length += 1 dictionary_length_binary = util.to_binary(len(dictionary.keys())) dictionary[phrase] = dictionary_length_binary # print(phrase, '->', dictionary_length_binary) phrase = phrase[-1] if compression_end: result.append( util.extend_to_length(dictionary[phrase], code_length, ending_bit=_zero_bit)) return _empty_str.join(result), phrase
def translate_a(self, parser): ''' Generate binary codes for A instructions. ''' assert(parser.commandType() == parser.A_COMMAND) smb = parser.symbol() if not smb.isdigit(): if not self.symbol_table.contains(smb): self.symbol_table.addEntry(smb, self.symbol_address) self.symbol_address += 1 smb = self.symbol_table.getAddress(smb) return "0" + to_binary(smb).zfill(15)
def _omega_code(number): _ensure_correct_number(number) result = list() result.append(_zero_bit) current_value = number while not (current_value == 1): value_bits = util.to_binary(current_value) result.append(value_bits) current_value = len(value_bits) - 1 result.reverse() return _empty_str.join(result)
def main(df, random_state, test_size, first_star, second_star, state="Given Data"): """ Runs the complete cycle and builds a model. Takes a dataframe, tokenizes, clean, vectorize and return model :param df: Pandas dataframe :param random_state: int :param test_size: int, split size for test :param first_star: int, first star rating :param second_star: int, second star rating :param state: string, state initials :return: logistic regression model object """ if state: df = df[df["state"] == state] df = df[df.stars_rev.isin([first_star, second_star])] df = df[["text", "stars_rev"]].astype(str) df = util.to_binary(df, str(first_star), str(second_star)) corpus = df["text"].values #list of reviews y = df["stars_rev"].values #target cleaned = util.clean_stem(corpus) X_train, X_test, y_train, \ y_test = train_test_split(cleaned, y, test_size=test_size, random_state=random_state) model = ReviewClassifier() #instantiating model model.fit(X_train, y_train) #Estimating metrics matrix, recall, precision, accuracy = model.metrics_eval(X_test, y_test) print("Number of reviews: {}".format(len(corpus)), "\n") print("Confusion matrix") print(matrix, "\n") print("Recall: {}%".format(round(recall * 100, 2))) print("Precision: {}%".format(round(precision * 100, 2))) print("Accuracy: {}%".format(round(accuracy * 100, 2))) return model
def _generate_dictionary(): result = {chr(i): util.to_binary(i) for i in range(1, _dictionary_length)} result[-1] = '0' return result
def _decompress_data(bits, dictionary, reversed_dictionary, *, initial_phrase): result = list() code_length = len(util.to_binary(len(dictionary.keys()))) # print('dictionary length', len(dictionary.keys())) # print() i = 0 if initial_phrase == _empty_str: chunk = bits[:code_length] decompressed_chunk = reversed_dictionary[_remove_leading_zeros(chunk)] result.append(decompressed_chunk) # print('chunk', chunk) # print('decompressed chunk', decompressed_chunk) # print() phrase = decompressed_chunk i = code_length if _is_power_of_two(len(dictionary.keys())): code_length += 1 else: phrase = initial_phrase # print('dictionary length', len(dictionary.keys())) # print() while i + code_length <= len(bits): chunk = bits[i:i + code_length] if _one_bit in chunk: if _remove_leading_zeros(chunk) in reversed_dictionary.keys(): decompressed_chunk = reversed_dictionary[_remove_leading_zeros( chunk)] else: decompressed_chunk = phrase + phrase[0] # special case # print('chunk', chunk) # print('decompressed chunk', decompressed_chunk) dict_element = phrase + decompressed_chunk[0] if not (dict_element in dictionary.keys()): result.append(decompressed_chunk) # print('{} -> {}'.format(dict_element, util.to_binary(len(dictionary.keys())))) # print() dictionary[dict_element] = util.to_binary( len(dictionary.keys())) reversed_dictionary[util.to_binary( len(reversed_dictionary.keys()))] = dict_element phrase = decompressed_chunk else: phrase += decompressed_chunk i += code_length if _is_power_of_two(len(dictionary.keys())): code_length += 1 else: break joined_result = _empty_str.join(result) rest_bits = bits[i:] return joined_result, rest_bits, phrase
'Supportive Environment Rating', 'Effective School Leadership Rating', 'Strong Family-Community Ties Rating', 'Trust Rating', 'Student Achievement Rating' ] # Convert Yes/No to to 1/0 binary_columns = ['Community School?'] for col in percent_columns: se_2016_renamed[col] = util.pct_to_number(se_2016_renamed, col) for col in money_columns: se_2016_renamed[col] = util.money_to_number(se_2016_renamed, col) for col in rating_columns: se_2016_renamed[col] = util.rating_to_number(se_2016_renamed, col) for col in binary_columns: se_2016_renamed[col] = util.to_binary(se_2016_renamed, col) se_2016_renamed.columns = [ util.sanitize_column_names(c) for c in se_2016_renamed.columns ] se_2016_renamed.head() # In[21]: plt.hist( se_2016_renamed.loc[se_2016_renamed['average_ela_proficiency'].notnull(), 'average_ela_proficiency'], bins=20) plt.show() plt.hist( se_2016_renamed.loc[se_2016_renamed['average_math_proficiency'].notnull(),
def _delta_code(number): _ensure_correct_number(number) bits = util.to_binary(number) return _gamma_code(len(bits)) + bits[1:]
def _gamma_code(number): _ensure_correct_number(number) bits = util.to_binary(number) return _zero_bit * (len(bits) - 1) + bits
def jump(self, mnemonic): ''' Translate jump code into binary. ''' assert (mnemonic in self._jump_list) return to_binary(self._jump_list.index(mnemonic)).zfill(3)
def dest(self, mnemonic): ''' Translate dest code into binary. ''' assert (mnemonic in self._dest_list) return to_binary(self._dest_list.index(mnemonic)).zfill(3)
def jump(self, mnemonic): ''' Translate jump code into binary. ''' assert(mnemonic in self._jump_list) return to_binary(self._jump_list.index(mnemonic)).zfill(3)
def dest(self, mnemonic): ''' Translate dest code into binary. ''' assert(mnemonic in self._dest_list) return to_binary(self._dest_list.index(mnemonic)).zfill(3)