コード例 #1
0
def main(cfg):
    try:
        # nltk.download("vader_lexicon")
        # nltk.download('wordnet')
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        total_files = len(configs)
        results = {}
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            dataset_dir = normalize()
            X, y = extract_features(dataset_dir)
            config_result = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS)
            glbs.RESULTS[glbs.FILE_NAME] = config_result
            glbs.RESULTS = add_results(glbs.RESULTS, glbs)
            if glbs.EXPORT_AS_BASELINE:
                export_as_baseline(config_result, config[1])
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        add_results_glbs(results, glbs)
        write_results(divide_results(glbs.RESULTS))
        send_work_done(glbs.DATASET_DIR)
        print_message("Done!")
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.DATASET_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
コード例 #2
0
def write_info_gain(features, name):
    glbs = GlobalParameters()
    file_path = (GlobalParameters().RESULTS_PATH + "\\" + name +
                 " for hebrew dataset" + ".xlsx")
    # Create an new Excel file and add a worksheet.
    workbook = xlsxwriter.Workbook(file_path)
    worksheet = workbook.add_worksheet()

    row = 0
    for i, data in enumerate(features):

        worksheet.write("A" + str(i + 3), data[0].split("_")[0])
        worksheet.write("B" + str(i + 3), data[0].split("_")[-1])
        worksheet.write("C" + str(i + 3), "{:.2f}".format(data[1]))
        try:
            worksheet.write("D" + str(i + 3), "{:.2f}".format(data[2]))
            if len(glbs.IDF) > 0:
                worksheet.write("E" + str(i + 3), glbs.IDF[i])
        except:
            if len(glbs.IDF) > 0:
                worksheet.write("D" + str(i + 3), glbs.IDF[i])
        row = i

    worksheet.add_table(
        "A2:D" + str(row + 3),
        {
            "columns": [
                {
                    "header": "selection type"
                },
                {
                    "header": "feture name"
                },
                {
                    "header": name
                },
                {
                    "header": "p-value"
                },
                {
                    "header": "tfidf"
                },
            ],
            "style":
            "Table Style Light 8",
        },
    )
    workbook.close()
コード例 #3
0
ファイル: main.py プロジェクト: botimerj/1Rx
def adc_energy_graphs():
    gp = GlobalParameters()
    rram = RRAM(gp)
    rram.adc.energy_calc(plot=True)
    rram.adc.energy_calc()
    print("Conversion Energy: ", rram.adc.energy)
    print("ADC resolution: ", rram.adc.N)
コード例 #4
0
def corpus_name():
    """
    return the name of the corpus (for example: Corpus of 1000 female & 600 male in English)
    :param train: the train (or the test) text
    :param train_labels: the labels of the train corpus
    :param test_labels: the labels of the test corpus
    :return: string, the name of the corpus
    """
    glbs = GlobalParameters()
    string = "Corpus of "
    labels = glbs.LABELS
    dic = {}

    for label in labels:
        # usually the labels contain the file format at the end (file.txt)
        label = label.split('.')[0]
        if label in dic:
            dic[label] += 1
        else:
            dic[label] = 1

    for label, number in dic.items():
        string += str(number) + " " + label + ", "

    language = text_language(glbs.TRAIN_DATA[0])
    string = string[:-2] + " in " + language[0].upper() + language[1:]

    # Replace the last , with &
    for i in range(1, len(string)):
        if string[-i] == ',':
            string = string[:-i] + ' &' + string[-i + 1:]
            break

    return string
コード例 #5
0
ファイル: main.py プロジェクト: botimerj/1Rx
def energy_vs_adc_res():
    # Default global params
    gp = GlobalParameters()

    # Define Constant Global params
    gp.adc.comp_var = 0.000
    gp.rram.size_x = 64
    gp.rram.size_y = 64
    gp.rram.r_var = np.log10(1) / 6

    #Define Parametric Global params
    set1 = [1, 2, 4, 8]
    set2 = [1, 3, 7, 15]
    #set2 = [1, 3]

    settings = [[s1, s2] for s1 in set1 for s2 in set2]
    gp_list = []
    for s in settings:
        gp.rram.n_bit = s[0]
        gp.mvm.active_rows = s[1]
        gp_list.append(copy.deepcopy(gp))

    [distance, energy, bools] = sweep_gp_params(gp_list)
    print(np.array(energy))
    energy = np.array(energy) * 1e12 / (128 * 128 + 127 * 128)

    f = open("outputs/energy_vs_adc_res", 'w')
    for e in energy:
        f.write(str(energy))
    f.close()
コード例 #6
0
def write_info_gain(features, name):
    file_path = os.path.join(GlobalParameters().RESULTS_PATH,
                             name + " for hebrew dataset" + '.xlsx')
    # Create an new Excel file and add a worksheet.
    workbook = xlsxwriter.Workbook(file_path)
    worksheet = workbook.add_worksheet()

    row = 0
    for i, data in enumerate(features):
        print(data)

        worksheet.write('A' + str(i + 3), data[0].split('_')[0])
        worksheet.write('B' + str(i + 3), data[0].split('_')[-1])
        worksheet.write('C' + str(i + 3), str(data[1]))
        try:
            worksheet.write('D' + str(i + 3), str(data[2]))
        except:
            pass
        row = i

    worksheet.add_table(
        "A2:D" + str(row + 3), {
            'columns': [{
                'header': 'selection type'
            }, {
                'header': "feture name"
            }, {
                'header': name
            }, {
                'header': "p-value"
            }],
            'style':
            'Table Style Light 8'
        })
    workbook.close()
コード例 #7
0
def print_run_details():
    glbs = GlobalParameters()
    print("""
	---------------------------------------
	Training path: {}
	Testing Path: {}
	Features: {}
	Stylistic Features: {}
	Normalization: {}
	Methods: {}
	Measure: {}
	Output Path: {}
	Results Path: {}
	---------------------------------------
	""".format(
        glbs.TRAIN_DIR,
        glbs.TEST_DIR,
        glbs.FEATURES,
        glbs.STYLISTIC_FEATURES,
        glbs.NORMALIZATION,
        glbs.METHODS,
        glbs.MEASURE,
        glbs.OUTPUT_DIR,
        glbs.RESULTS_PATH,
    ))
コード例 #8
0
def add_results(old_results):
    glbs = GlobalParameters()
    temp = {}
    temp["results"] = old_results[glbs.FILE_NAME]
    temp["features"] = glbs.FEATURES
    temp["normalization"] = glbs.NORMALIZATION
    temp["stylistic_features"] = glbs.STYLISTIC_FEATURES
    old_results[glbs.FILE_NAME] = temp
    return old_results
コード例 #9
0
def generate_word_clouds(max_words=200):
    glbs = GlobalParameters()
    training_path = glbs.TRAIN_DIR
    testing_path = glbs.TEST_DIR
    result_path = glbs.RESULTS_PATH + r"\Words Clouds"

    if path.exists(result_path):
        shutil.rmtree(result_path, ignore_errors=True)
    os.makedirs(result_path)

    training = {}
    for file in os.listdir(training_path):
        if file.endswith('.txt'):
            training[file.replace('.txt',
                                  '')] = open(os.path.join(
                                      training_path, file),
                                              "r",
                                              encoding="utf8",
                                              errors='replace').readlines()

    testing = {}
    for file in os.listdir(testing_path):
        if file.endswith('.txt'):
            testing[file.replace('.txt',
                                 '')] = open(os.path.join(testing_path, file),
                                             "r",
                                             encoding="utf8",
                                             errors='replace').readlines()

    if text_language(testing[list(testing.keys())[0]][0]) == 'hebrew':
        for key, value in training.items():
            for post in range(len(value)):
                training[key][post] = training[key][post][::-1]
        for key, value in testing.items():
            for post in range(len(value)):
                testing[key][post] = testing[key][post][::-1]
        stop_words = hebrew_stopwords
    else:
        stop_words = stopwords

    for name, text in training.items():
        title = "training " + name + " unigrams"
        freq = dict(get_top_n_words(text, 1, 1))
        generate_and_save(freq, max_words, result_path, stop_words, title)

        title = "training " + name + " bigrams"
        freq = dict(get_top_n_words(text, 2, 2))
        generate_and_save(freq, max_words, result_path, stop_words, title)

    for name, text in testing.items():
        title = "testing " + name + " unigrams"
        freq = dict(get_top_n_words(text, 1, 1))
        generate_and_save(freq, max_words, result_path, stop_words, title)

        title = "testing " + name + " bigrams"
        freq = dict(get_top_n_words(text, 2, 2))
        generate_and_save(freq, max_words, result_path, stop_words, title)
コード例 #10
0
ファイル: main.py プロジェクト: botimerj/1Rx
def rvar_arows_shmoo():
    # Default global params
    gp = GlobalParameters()
    gp.adc.comp_var = 0.000
    gp.rram.size_x = 256
    gp.rram.size_y = 256

    r_var = np.log10([1, 1.01, 1.05, 1.1, 1.2, 1.5, 2, 3]) / 6
    active_rows = [1, 3, 7, 15, 31, 63, 127, 255]
    #r_var = np.log10([1, 1.01, 2])/6
    #active_rows = [1, 15, 63]

    settings = [[r, a] for r in r_var for a in active_rows]
    shmoo_grid = []

    print('R-Dim | Nb | AR')
    print('---------------')

    shmoo_grid = []
    for s in settings:
        gp.rram.rvar = s[0]
        gp.mvm.active_rows = s[1]
        print('{} | {} :'.format(10**(6 * s[0]), s[1]), end='')

        M = 128
        N = 32
        res = 8

        mvm = MVM(gp)

        vec = np.random.random([1, M]) * 2 - 1
        mat = np.random.random([M, N]) * 2 - 1

        start = time.time()
        result = mvm.dot(vec, mat, res)
        print('{:.2f}:'.format(mvm.e_read * 1e12), end=' ')

        result_t = mvm.dot_truth(vec, mat, res)

        if False in (result == result_t):
            shmoo_grid.append(0)
            print("Fail")
        else:
            shmoo_grid.append(1)
            print("Pass")

    shmoo_grid = np.array(shmoo_grid).reshape(len(r_var), len(active_rows))
    print(shmoo_grid)

    f = open("outputs/schmoo", 'w')
    f.write(str(shmoo_grid))
    f.close()
コード例 #11
0
def write_sfm(score):
    file_path = (GlobalParameters().RESULTS_PATH + "\\" + "sfm" +
                 " for hebrew dataset" + ".xlsx")
    # Create an new Excel file and add a worksheet.
    workbook = xlsxwriter.Workbook(file_path)
    worksheet = workbook.add_worksheet()

    row = 1
    for key, value in score.items():
        worksheet.write("A" + str(row), key)
        worksheet.write("B" + str(row), value)
        row += 1

    workbook.close()
コード例 #12
0
ファイル: main.py プロジェクト: botimerj/1Rx
def distance_vs_active_rows():
    # Default global params
    gp = GlobalParameters()

    # Define Constant Global params
    gp.adc.comp_var = 0.005
    gp.rram.size_x = 128
    gp.rram.size_y = 128
    #gp.rram.size_x = 8
    #gp.rram.size_y = 8

    #Define Parametric Global params
    set1 = np.log10([1, 1.05, 1.1, 1.5, 2]) / 6
    set2 = [1, 3, 7, 15, 31, 63, 127]
    #set1 = np.log10([1, 2])/6
    #set2 = [1, 3, 7]

    settings = [[s1, s2] for s1 in set1 for s2 in set2]
    gp_list = []
    for s in settings:
        gp.rram.rvar = s[0]
        gp.mvm.active_rows = s[1]
        gp_list.append(copy.deepcopy(gp))

    [distance, energy, bools] = sweep_gp_params(gp_list)
    distance = np.array(distance).reshape(len(set1), len(set2))
    #print(distance)

    f = open("outputs/distance_vs_arows", 'w')
    for d in distance:
        f.write(str(d) + '\n')
    f.close()

    fig, ax = plt.subplots()
    #ax.set_xlabel('Active Rows (K)')
    ax.set_xlabel('ADC Resolution (N)')
    ax.set_ylabel('Accuracy (Distance from truth)')
    ax.set_title('MAC accuracy')

    for i in range(distance.shape[0]):
        plt.plot(np.log2(np.array(set2) + 1), distance[i])

    r_var = (10**(np.array(set1) * 6) - 1) * 100
    r_var_str = ['{:.0f}%'.format(r) for r in r_var]
    ax.legend(r_var_str, title='Rcell Variation')
    plt.show()
コード例 #13
0
def main(cfg):
    try:
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        results = {}
        n_test_dir = ""
        total_files = len(configs)
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            n_train_dir = normalize()
            if glbs.TEST_DIR != "":
                n_test_dir = normalize(test=True)
            train, tr_labels, test, ts_labels, all_features = extract_features(
                n_train_dir, n_test_dir)
            for selection in glbs.SELECTION:
                try:
                    train, test = get_selected_features(
                        selection, train, tr_labels, test, ts_labels,
                        all_features)
                except:
                    pass
            results[glbs.FILE_NAME] = classify(train,
                                               tr_labels,
                                               test,
                                               ts_labels,
                                               all_features,
                                               model_number=i)
            results = add_results(results)
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        write_results(divide_results(results))
        send_work_done(glbs.TRAIN_DIR)
        print_message("Done!")
        # clean_backup_files()
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.TRAIN_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
コード例 #14
0
def write_results(results):
    glbs = GlobalParameters()
    print_message("Writing results...")

    pickle_path = glbs.RESULTS_PATH + "\\Pickle files"
    if path.exists(pickle_path):
        shutil.rmtree(pickle_path, ignore_errors=True)
    os.makedirs(pickle_path)

    xlsx_path = glbs.RESULTS_PATH + "\\Xlsx files"
    if path.exists(xlsx_path):
        shutil.rmtree(xlsx_path, ignore_errors=True)
    time.sleep(0.5)
    os.makedirs(xlsx_path)

    for key in results.keys():
        with open(pickle_path + "\\" + key + ".pickle", "wb+") as file:
            pickle.dump(results[key], file)
        new_write_file_content(pickle_path + "\\" + key + ".pickle", key, xlsx_path)
コード例 #15
0
def write_results(results):
    glbs = GlobalParameters()
    print_message("Writing results...")
    # add_to_csv(results, glbs.RESULTS_PATH)

    pickle_path = os.path.join(glbs.RESULTS_PATH, "Pickle files")
    if path.exists(pickle_path):
        shutil.rmtree(pickle_path, ignore_errors=True)
    os.makedirs(pickle_path)

    xlsx_path = os.path.join(glbs.RESULTS_PATH, "Xlsx files")
    if path.exists(xlsx_path):
        shutil.rmtree(xlsx_path, ignore_errors=True)
    time.sleep(0.5)
    os.makedirs(xlsx_path)

    for key in results.keys():
        with open(os.path.join(pickle_path, key) + ".pickle", "wb+") as file:
            pickle.dump(results[key], file)
        new_write_file_content(
            os.path.join(pickle_path, key) + ".pickle", key, xlsx_path)
コード例 #16
0
ファイル: main.py プロジェクト: botimerj/1Rx
def test_mvm():
    # Default global params
    gp = GlobalParameters()

    dim = [64]
    n_bit = [1]
    #active_rows = [1, 2, 3, 8, 16, 20, 24, 28, 32]
    active_rows = [1, 2, 8, 16, 32, 64]
    #active_rows = [1, 15, 63, 128]
    settings = [[d, n, a] for d in dim for n in n_bit for a in active_rows]

    print('R-Dim | Nb | AR')
    print('---------------')
    for s in settings:
        gp.rram.size_x = s[0]
        gp.rram.size_y = s[0]
        gp.rram.n_bit = s[1]
        gp.mvm.active_rows = s[2]
        print('{}x{} | {} | {} :'.format(s[0], s[0], s[1], s[2]), end='')

        M = 128
        N = 128
        res = 8

        mvm = MVM(gp)

        vec = np.random.random([1, M]) * 2 - 1
        mat = np.random.random([M, N]) * 2 - 1

        start = time.time()
        result = mvm.dot(vec, mat, res)
        print('{:.2f}'.format(mvm.e_read * 1e12), end=' ')

        result_t = mvm.dot_truth(vec, mat, res)

        if False in (result == result_t):
            print("Fail")
        else:
            print("Pass")
コード例 #17
0
def print_run_details():
    glbs = GlobalParameters()
    print("""
	---------------------------------------
	Dataset path: {}
	Features: {}
	Stylistic Features: {}
	Normalization: {}
	Methods: {}
	Measure: {}
	Export as baseline: {}
	Results Path: {}
	---------------------------------------
	""".format(
        glbs.DATASET_DIR,
        glbs.FEATURES,
        glbs.STYLISTIC_FEATURES,
        glbs.NORMALIZATION,
        glbs.METHODS,
        glbs.MEASURE,
        glbs.EXPORT_AS_BASELINE,
        glbs.RESULTS_PATH,
    ))
コード例 #18
0
ファイル: main.py プロジェクト: botimerj/1Rx
def energy_vs_active_rows():
    # Default global params
    gp = GlobalParameters()

    # Define Constant Global params
    gp.adc.comp_var = 0.000
    gp.rram.size_x = 256
    gp.rram.size_y = 256
    gp.rram.r_var = np.log10(1) / 6

    #Define Parametric Global params
    set1 = [0]
    set2 = [1, 3, 7, 15, 31, 63, 127, 255]
    #set2 = [1, 3]

    settings = [[s1, s2] for s1 in set1 for s2 in set2]
    gp_list = []
    for s in settings:
        gp.rram.rvar = s[0]
        gp.mvm.active_rows = s[1]
        gp_list.append(copy.deepcopy(gp))

    [distance, energy, bools] = sweep_gp_params(gp_list)
    print(np.array(energy))
    energy = np.array(energy) * 1e12 / (128 * 128 + 127 * 128)

    f = open("outputs/energy_vs_arows", 'w')
    for e in energy:
        f.write(str(energy))
    f.close()

    fig, ax = plt.subplots()
    ax.set_xlabel('ADC Resolution (N)')
    ax.set_ylabel('Energy/OP (pJ)')
    ax.set_title('Energy Efficiency of MAC')
    plt.plot(np.log2(np.array(set2) + 1), energy)
    plt.show()
コード例 #19
0
ファイル: main.py プロジェクト: botimerj/1Rx
def boundary_test():
    M = 4
    N = 4
    res = 8

    gp = GlobalParameters()
    mvm = MVM(gp)

    vec = np.random.random([1, M]) * 2 - 1
    print("==Vec==")
    print(vec)
    mat = np.random.random([M, N]) * 2 - 1
    print("==Mat==")
    print(mat)

    result = mvm.dot(vec, mat, res)
    print("==Res==")
    print(result)
    result_t = mvm.dot_truth(vec, mat, res)

    if False in (result == result_t):
        print("Fail")
    else:
        print("Pass")
コード例 #20
0
def selectionHalfMethod(X, y, all_features):
    glbs = GlobalParameters()
    filename = glbs.FILE_NAME
    results = {}
    # nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1]))
    nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1]))
    max_last_result = 0
    bottom = (0, 0)
    top = nxt
    while top != bottom:
        max_nxt_result = 0
        print_message(nxt[0])
        print_message(nxt[1])
        glbs.FILE_NAME = glbs.FILE_NAME + str(nxt[1])
        select = select_k_best(nxt[0], int(nxt[1]))
        glbs.FEATURE_MODEL[1] = select
        results[glbs.FILE_NAME] = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS)
        for method in results[glbs.FILE_NAME].items():
            if mean(method[1]["accuracy"]) > max_nxt_result:
                max_nxt_result = mean(method[1]["accuracy"])
        results = add_results(results, glbs, nxt)
        if max_nxt_result >= max_last_result:
            top = nxt
            if bottom[1] == 0:
                nxt = (nxt[0], int(int(nxt[1]) / 2))
            if bottom[1] != 0:
                nxt = (nxt[0], int((int(nxt[1]) + bottom[1]) / 2))
            max_last_result = max_nxt_result
        elif max_nxt_result < max_last_result:
            bottom = nxt
            nxt = (nxt[0], int((top[1] + bottom[1]) / 2))
        glbs.SELECTION[0] = nxt
        if bottom[1] - top[1] == -1 and bottom == nxt:
            break
    glbs.FILE_NAME = filename
    add_results_glbs(results, glbs)
コード例 #21
0
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC

from confusion_matrix import accuracy_confusion_matrix
from global_parameters import print_message, GlobalParameters
from model_persistence import save_model
from precision_recall_curve import precision_recall
from roc_curve import roc_curve_data

glbs = GlobalParameters()

methods = {
    "svc": LinearSVC(),
    "rf": RandomForestClassifier(),
    "mlp": MLPClassifier(),
    "lr": LogisticRegression(),
    "mnb": MultinomialNB(),
}


def get_results(ts_labels, prediction, decision):
    # ( "multilabel_confusion_matrix",   multilabel_confusion_matrix(ts_labels, prediction)),
    measures = {
        "accuracy_score":
        accuracy_score(ts_labels, prediction),
コード例 #22
0
def set_global_parameters(configs):
    glbls = GlobalParameters()
    config = configs[1]
    glbls.FILE_NAME = configs[0]
    glbls.FEATURES = config["features"]
    glbls.NORMALIZATION = "".join(sorted(config["nargs"].upper()))
    glbls.OUTPUT_DIR = config["output_csv"]
    glbls.METHODS = config["methods"]
    glbls.TRAIN_DIR = config["train"]
    glbls.TEST_DIR = config["test"]
    glbls.RESULTS_PATH = config["results"]
    glbls.MEASURE = config["measure"]
    glbls.STYLISTIC_FEATURES = config["stylistic_features"]
    glbls.SELECTION = config["selection"].items(
    ) if "selection" in config else []
    try:
        if 'language' in config:
            glbls.LANGUAGE = config['language']
        else:
            path = os.path.join(config["train"],
                                os.listdir(config["train"])[0])
            glbls.LANGUAGE = text_language(
                open(path, "r", encoding="utf8", errors='replace').read())
    except:
        glbls.LANGUAGE = "english"
コード例 #23
0
def clean_backup_files():
    glbs = GlobalParameters()
    print_message("removing temp files...")
    folder_path = os.sep.join(glbs.RESULTS_PATH.split(
        os.sep)[:-1]) + os.sep + "temp_backups"
    shutil.rmtree(folder_path, ignore_errors=True)
コード例 #24
0
def plot_confusion_matrix(cm,
                          result_path,
                          normalize=True,
                          title=None,
                          accuracy=None,
                          cmap=plt.cm.Blues,
                          color='black'):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    glbs = GlobalParameters()
    labels = [label.split('.')[0] for label in list(set(glbs.LABELS))]

    fig, ax = plt.subplots(figsize=(3, 2))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(
        xticks=np.arange(cm.shape[1]),
        yticks=np.arange(cm.shape[0]),
        # ... and label them with the respective list entries
        xticklabels=labels,
        yticklabels=labels,
        # title=title,
        ylabel='True',
        xlabel='Predicted')
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), ha="right", rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j,
                    i,
                    format(cm[i, j], fmt),
                    ha="center",
                    va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()

    if accuracy:
        accuracy = float('{0:.4g}'.format(accuracy * 100))
        plt.title('Accuracy Score: ' + str(accuracy) + '\nConfusion Matrix:')
        plt.rcParams.update({"text.color": color})

    plt.savefig(os.path.join(result_path, title) + '.jpg', bbox_inches='tight')

    plt.close('all')
コード例 #25
0
def set_global_parameters(configs):
    glbls = GlobalParameters()
    config = configs[1]
    glbls.FILE_NAME = configs[0]
    glbls.FEATURES = config["features"]
    glbls.NORMALIZATION = [n.lower() for n in config["nargs"]]
    glbls.METHODS = config["methods"]
    glbls.DATASET_DIR = config["dataset"]
    glbls.RESULTS_PATH = config["results"]
    glbls.MEASURE = config["measure"]
    glbls.STYLISTIC_FEATURES = config["stylistic_features"]
    glbls.SELECTION = list(config["selection"].items())
    glbls.K_FOLDS = config["k_folds_cv"]
    glbls.ITERATIONS = config["iterations"]
    glbls.BASELINE_PATH = config["baseline_path"]
    glbls.EXPORT_AS_BASELINE = config["export_as_baseline"]
    glbls.FEATURE_MODEL = []
    try:
        if "language" in config:
            glbls.LANGUAGE = config["language"]
        else:
            path = config["dataset"] + "\\" + os.listdir(config["dataset"])[0]
            glbls.LANGUAGE = text_language(
                open(path, "r", encoding="utf8", errors="replace").read())
    except:
        glbls.LANGUAGE = "english"
    glbls.STOP_WORDS = None
    if "s" in config["nargs"]:
        glbls.STOP_WORDS = "english"