def sampling_estupido():
    files = get_file_names()
    final = list()
    for file in files:
        print("File: " + file)
        out = read_pickle("raw", file)
        print("sampling")
        stupid = divide_estupido(out)
        final = final + stupid
        print("to pickle")

    write_new_pickle(final, "lexicon_dataset_smaller")
def sampling():
    files = get_file_names()
    training = list()
    testing = list()
    for file in files:
        print("File: " + file)
        out = read_pickle("raw", file)
        print("sampling")
        training_sample, testing_sample = divide(out)
        training = training + training_sample
        testing = testing + testing_sample
        print("to pickle")

    write_new_pickle(training, "training")
    write_new_pickle(testing, "testing")
Example #3
0
from src.data.import_dataset import read_pickle
import pandas as pd
from src.utils.utils import get_file_names, get_file_path
from src.data.export_dataset import save_lexicon_results
import matplotlib.pyplot as plt
import seaborn as sns
from src.utils.words import GET_POLARTIY

lexicon = read_pickle('', 'lexicon_results')

hu = GET_POLARTIY()

only_values_hu = list()
for key, value in hu.items():
    only_values_hu.append(value)

lexicon_dict = dict()

for pair in lexicon:
    lexicon_dict[pair[0]] = pair[1]

for key, value in hu.items():
    if key in lexicon_dict and lexicon_dict[key] > 0 and value < 0:
        print(key)
        print(value)
        print(lexicon_dict[key])
'''
filtered_dict = dict()
positive = 0
negative = 0
for word in lexicon:
Example #4
0
from src.data.import_dataset import read_pickle
import pandas as pd
from src.data.export_dataset import export_dataset

_FILE_NAMES = [
    'reviews_Automotive', 'reviews_Cell_Phones_and_Accessories',
    'reviews_Video_Games', 'reviews_Movies_and_TV'
]

p = read_pickle('interim', 'lexicon_dataset_small')
print(p)
dataframe = pd.DataFrame(p)
ov_total = []
for file in _FILE_NAMES:
    for i in range(1, 6):
        overall_1 = dataframe[dataframe.overall == i & dataframe.category ==
                              file].sample(75).to_dict('records')
        ov_total = ov_total + overall_1

export_dataset(ov_total, 'lexicon_dataset_smaller')
def extract_word_dictionary():

    return  read_pickle('', 'filtered_lexicon')
def extract_word_dictionary2():
    lex = read_pickle('', 'sent_lex1000')
    lex_dict=dict()
    for pair in lex:
        lex_dict[pair[0]] = pair[1]
    return lex_dict
def main():
    matrix_training = read_pickle('', 'training_matrix_hui')
    matrix_testing = read_pickle('', 'testing_matrix_hui')

    model = run_model(matrix_training, matrix_testing)