def sampling_estupido(): files = get_file_names() final = list() for file in files: print("File: " + file) out = read_pickle("raw", file) print("sampling") stupid = divide_estupido(out) final = final + stupid print("to pickle") write_new_pickle(final, "lexicon_dataset_smaller")
def sampling(): files = get_file_names() training = list() testing = list() for file in files: print("File: " + file) out = read_pickle("raw", file) print("sampling") training_sample, testing_sample = divide(out) training = training + training_sample testing = testing + testing_sample print("to pickle") write_new_pickle(training, "training") write_new_pickle(testing, "testing")
from src.data.import_dataset import read_pickle import pandas as pd from src.utils.utils import get_file_names, get_file_path from src.data.export_dataset import save_lexicon_results import matplotlib.pyplot as plt import seaborn as sns from src.utils.words import GET_POLARTIY lexicon = read_pickle('', 'lexicon_results') hu = GET_POLARTIY() only_values_hu = list() for key, value in hu.items(): only_values_hu.append(value) lexicon_dict = dict() for pair in lexicon: lexicon_dict[pair[0]] = pair[1] for key, value in hu.items(): if key in lexicon_dict and lexicon_dict[key] > 0 and value < 0: print(key) print(value) print(lexicon_dict[key]) ''' filtered_dict = dict() positive = 0 negative = 0 for word in lexicon:
from src.data.import_dataset import read_pickle import pandas as pd from src.data.export_dataset import export_dataset _FILE_NAMES = [ 'reviews_Automotive', 'reviews_Cell_Phones_and_Accessories', 'reviews_Video_Games', 'reviews_Movies_and_TV' ] p = read_pickle('interim', 'lexicon_dataset_small') print(p) dataframe = pd.DataFrame(p) ov_total = [] for file in _FILE_NAMES: for i in range(1, 6): overall_1 = dataframe[dataframe.overall == i & dataframe.category == file].sample(75).to_dict('records') ov_total = ov_total + overall_1 export_dataset(ov_total, 'lexicon_dataset_smaller')
def extract_word_dictionary(): return read_pickle('', 'filtered_lexicon')
def extract_word_dictionary2(): lex = read_pickle('', 'sent_lex1000') lex_dict=dict() for pair in lex: lex_dict[pair[0]] = pair[1] return lex_dict
def main(): matrix_training = read_pickle('', 'training_matrix_hui') matrix_testing = read_pickle('', 'testing_matrix_hui') model = run_model(matrix_training, matrix_testing)