Example #1
0
def train_model_for_debug():
    feature_extraction_config = {
        'text_to_vector_uni_vocabulary': 'vocabularies/text_to_vector_uni_vocabulary_10.txt',
        'text_to_vector_bi_vocabulary': 'vocabularies/text_to_vector_bi_vocabulary.txt',
        'tf_idf_vector': False,
        'counter_vector': True,
        'binary_vector': False,
        'best_representing_words_list': 'vocabularies/service_best_words_custom.txt',
        'surrounding_words': True,
        'polarity_vocabulary': 'vocabularies/polarity_words.txt',
        'positive_words_count': True,
        'negative_words_count': True,
        'polarity_count': True,
        'parts_of_speech': True,
        'uni_gram': True,
        'bi_gram': False,
        'not_count': False,
        'remove_stop_words': False
    }

    class_map = {0: 0,
                 1: 1,
                 # 2: 2,
                 3: 3}

    data, target, original_text = train_model.prepare_data(src_path=src_path,
                                            data_field=data_field,
                                            target_field=target_field,
                                            class_map=class_map,
                                            balance_classes=False,
                                            randomize=False,
                                            feature_config=feature_extraction_config)
    clf = SVC(kernel='linear')

    train_model.test_model(clf, data, target, original_text, 'service_quality_mistakes.csv', verbose=True)
def main(model_path, test_data_path):
    model = keras.models.load_model(model_path)
    x_test_data, y_test_data, classes = get_file_data(test_data_path)
    test_model(model, x_test_data, y_test_data)
Example #3
0
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from train_model import test_model
from keras.models import load_model
import pickle

# Parameters
# to_load = "nietzsche"
to_load = "obama2"
length = 200

outfile = open("generated/{}_long.txt".format(to_load), "w")
model = load_model("saved_models/{}.h5".format(to_load))
char_to_indices = pickle.load(
    open("saved_models/{}c2i.p".format(to_load), "rb"))
indices_to_char = pickle.load(
    open("saved_models/{}i2c.p".format(to_load), "rb"))

for temperature in [0.2, 0.4, 0.6]:
    generated_string = test_model(model=model, char_to_indices=char_to_indices, indices_to_char=indices_to_char, \
     seed_string=" ", temperature=0.3, test_length=length)
    output = "Temperature: {} Generated string: {}".format(
        temperature, generated_string)
    print(output)
    outfile.write(output + "\n")
    outfile.flush()
outfile.close()
Example #4
0
from keras.models import load_model
import pickle

# Parameters
origin = "full_pandas_with_import"
model_name = "full_pandas_with_import_usize512_maxlen240_numlayers3_dropout0"
weights = "weights-13-0.94.h5"
length = 10000
keep_chars = 50

model = load_model("saved_models/{}/{}".format(model_name, weights))
char_to_indices = pickle.load(
    open("saved_models/{}_c2i.p".format(origin), "rb"))
indices_to_char = pickle.load(
    open("saved_models/{}_i2c.p".format(origin), "rb"))

for temperature in [0.1, 0.35, 0.5, 1, 2]:
    generated_string = test_model(model=model,
                                  char_to_indices=char_to_indices,
                                  indices_to_char=indices_to_char,
                                  seed_string="def ",
                                  temperature=temperature,
                                  test_length=length,
                                  keep_chars=keep_chars)

    with open("generated/{}.txt".format(model_name), "a",
              encoding="utf-8") as outfile:
        output = "Temperature: {} Generated string: \n{}\n".format(
            temperature, generated_string)
        print(output)
        outfile.write(output + "\n")
text_to_vector_vocabulary = vocabularies.read_vocabulary('text_to_vector_vocabulary.txt')
best_representing_words = vocabularies.read_best_words_list('clean_best_words.txt')
polarity_vocabulary = vocabularies.read_polarity_vocabulary('polarity_words.txt')


feature_extraction_config = {
    'text_to_vector_vocabulary': text_to_vector_vocabulary,
    'tf_idf_vector': False,
    'counter_vector': True,
    'binary_vector': False,
    'best_representing_words_list': best_representing_words,
    'surrounding_words': True,
    'polarity_vocabulary': polarity_vocabulary,
    'positive_words_count': True,
    'negative_words_count': True,
    'polarity_count': True,
    'parts_of_speech': False
}

data, target = train_model.prepare_data(src_path=data_path,
                                        data_field=data_field,
                                        target_field=target_field,
                                        class_map=class_map,
                                        balance_classes=True,
                                        randomize=False,
                                        feature_config=feature_extraction_config)

clf = SVC(kernel='linear', degree=3)
#clf = tree.DecisionTreeClassifier(max_depth=5)
train_model.test_model(clf, data, target)