def treenimine(m): algus = datetime.now() print(f"\n\nN-grammidega ennustamine {n['tuup']} {n['tahis']}") print(f"{time.ctime(time.time())}") print(f"ngram size: {n['ngram_size']}, df size: {n['df_size']}") print("andmepunktid: {0[0]}, tunnused: {0[1]}".format(vektorid.shape)) results = [] names = [] for name, model in m: algus2 = datetime.now() kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True) cv_results = cross_val_score( model, preprocessing.scale(X_train, with_mean=False), np.ravel(Y_train), cv=kfold, scoring="accuracy", n_jobs=-1, ) results.append(cv_results) names.append(name) lõpp2 = datetime.now() aeg2 = lõpp2 - algus2 print( "{:40s} {:3.06f} {:10s} {:20s}".format( f"{name}:", round(cv_results.mean(), 6), "({:1.06f})".format(round(cv_results.std(), 6)), f"(Aeg: {str(aeg2)})", ) ) lõpp = datetime.now() aeg = lõpp - algus print(f"Aeg: {aeg}\n\n") pyplot.boxplot(results, labels=names) pyplot.title("Algorithm Comparison / ngramm {} {}".format(n["tuup"], n["tahis"])), pyplot.xticks(rotation=90) pyplot.draw() pyplot.savefig( nimi( "graafikud/{} eksimismaatriks ngramm {} {}".format( name, n["tuup"], n["tahis"] ), "png", ), bbox_inches="tight", dpi=100, )
def funktsioon(ds, m): algus = datetime.now() array = ds.values x = np.array(array[:, 1:]) y = np.array(array[:, :1]) scaler = preprocessing.StandardScaler().fit(x) X_scaled = scaler.transform(x) X_train, X_validation, Y_train, Y_validation = train_test_split( X_scaled, y, test_size=0.20, shuffle=True ) print("\n\nsõnatüübid") print(f"{time.ctime(time.time())}") print("andmepunktid: {}, tunnused: 17\n".format(len(X_train))) for name, model in m: algus2 = datetime.now() model.fit(X_train, Y_train) names = ["eesti", "soome", "vene"] disp = plot_confusion_matrix( model, X_validation, Y_validation, display_labels=names, cmap=pyplot.cm.Blues, normalize="true", ) disp.ax_.set_title(f"Normaliseeritud eksimismaatriks {name}") pyplot.draw() pyplot.savefig( nimi("graafikud/Normaliseeritud eksimismaatriks {name} ", "png"), bbox_inches="tight", dpi=100, ) lõpp2 = datetime.now() aeg2 = lõpp2 - algus2 print( "{:40s} {:150s} {:20s}".format( f"{name}:", f"{list(disp.confusion_matrix)}", f"(Aeg: {str(aeg2)})", ) ) lõpp = datetime.now() aeg = lõpp - algus print(f"Aeg: {aeg}\n\n")
y = np.ceil((df_suurused[1] - df_suurused[0]) / df_aste) pyplot.xticks( np.arange(x), list(range(ngrammi_suurused[0], ngrammi_suurused[1], ngrammi_aste)), ) pyplot.yticks( np.arange(y), list(range(df_suurused[0], df_suurused[1], df_aste)) ) pyplot.title( str(f"N-grammi tüüp: {ngrammi_tahis}, N-grammi tüüp {ngrammi_tuup}") ) pyplot.draw() pyplot.savefig( nimi( f"graafikud/täpsused ngramm {ngrammi_tahis} {ngrammi_tuup}", "png" ), bbox_inches="tight", dpi=100, ) except TypeError: print( ngrammi_tahis, ngrammi_tuup, "#######################################", toodeldud_andmed, ) lõpp = datetime.now() aeg = lõpp - algus
uus_tulemused = [[] for _ in range(len(models))] for i in range(len(tulemused)): for j in range(len(tulemused[i])): uus_tulemused[j].append(tulemused[i][j]) laius = 3 korgus = 2 for j in range(len(uus_tulemused)): fig, axs = pyplot.subplots(2, laius, figsize=(15, 10)) fig.suptitle(f"Normaliseeritud eksimismaatriks {nimed[j]}") for i in range(len(uus_tulemused[0])): axs[i // laius, i % laius].set_title(variandid[i]) df_cm = pd.DataFrame(uus_tulemused[j][i], index=keeled, columns=keeled) sn.heatmap(df_cm, annot=True, cmap=pyplot.cm.Blues, ax=axs[i // laius, i % laius]) pyplot.draw() pyplot.savefig( nimi( f"graafikud/Normaliseeritud eksimismaatriks {nimed[j]} ngrammid", "png", ), bbox_inches="tight", dpi=100, ) pyplot.show() pyplot.close()
def treenimine(ds, m): algus = datetime.now() array = ds.values x = np.array(array[:, 1:]) y = np.array(array[:, :1]) scaler = preprocessing.StandardScaler().fit(x) X_scaled = scaler.transform(x) X_train, X_validation, Y_train, Y_validation = train_test_split( X_scaled, y, test_size=0.20, shuffle=True) results = [] names = [] print(f"\n\nSõnatüüpidel ennustamine") print(f"{time.ctime(time.time())}") print("andmepunktid: {}, tunnused: 17\n".format(len(X_train))) for name, model in m: algus2 = datetime.now() kfold = StratifiedKFold(n_splits=10, shuffle=True) try: cv_results = cross_val_score( model, preprocessing.scale(X_train), np.ravel(Y_train), cv=kfold, scoring="accuracy", n_jobs=-1, ) except: print( "================================| Error |================================" ) results.append(cv_results) names.append(name) lõpp2 = datetime.now() aeg2 = lõpp2 - algus2 print("{:40s} {:3.06f} {:10s} {:20s}".format( f"{name}:", round(cv_results.mean(), 6), "({:1.06f})".format(round(cv_results.std(), 6)), f"(Aeg: {str(aeg2)})", )) pyplot.figure(figsize=(8, 6)) pyplot.boxplot(results, labels=names) pyplot.title("Algoritmide täpsused") pyplot.xticks(rotation=90) pyplot.savefig( nimi( r"C:\Users\rasmu\OneDrive\Töölaud\Programmid\Python 3\Uurimistöö\Graafikud\Lõplikud\Sõnatüüpide efektiivsus ", "png", ), bbox_inches="tight", dpi=100, ) pyplot.draw() lõpp = datetime.now() aeg = lõpp - algus print(f"Aeg: {aeg}\n\n")