""" import numpy as np import pickle from import_data import load_pickle from features import simple_features, feature_array folders = [ 'MM09', 'MM10', 'MM11', 'MM12', 'MM14', 'MM15', 'MM16', 'MM18', 'MM19', 'MM20', 'MM21' ] path = "C:/Users\SB00745777\OneDrive - Ulster University\KaraOne\Data/" for f in folders: new_path = path + f data = load_pickle(new_path, "window_data.p") labels = load_data(new_path, "labels", "labels") feature_vector = [] for tr in data: wdw_mn, wdw_absmn, wdw_sm, wdw_sd, wdw_md, wdw_vr, wdw_mx, wdw_absmx, wdw_mnm, wdw_absmin, wdw_mxmn, wdw_mnmx = ( [] for i in range(12)) for wdw in tr: ch_mn, ch_absmn, ch_sm, ch_sd, ch_md, ch_vr, ch_mx, ch_absmx, ch_mnm, ch_absmin, ch_mxmn, ch_mnmx = ( [] for i in range(12)) for ch in wdw: mn, absmn, sd, sm, md, vr, mx, absmx, mnm, absmin, mxmn, mnmx = simple_features( ch) ch_mn.append(mn) ch_absmn.append(absmn) ch_sm.append(sm)
import pandas as pd import time as time # from neural_network_classifier import run_neural_network # from xgboost_classifier import run_xgboost from import_data import load_pickle from feature_extraction import extract_features # from svm_classifier import run_svm from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from neural_network_classifier import magic train1_df = load_pickle('data/train1.pkl') test1_df = load_pickle('data/test1.pkl') train2_df = load_pickle('data/train2.pkl') test2_df = load_pickle('data/test2.pkl') # charf_tr1 = extract_features(train1_df, chars=True) # wordf_tr1 = extract_features(train1_df, words=True) # posf_tr1 = extract_features(train1_df, pos_tags = True) # posf_ts1 = extract_features(test1_df, pos_tags=True) # print("svm w/ countvectorizer on n-gram chars") tr1_char_1 = extract_features( train1_df, TfidfVectorizer(analyzer="char", ngram_range=(1, 1), binary=False)) tr1_char_2 = extract_features( train1_df, TfidfVectorizer(analyzer="char", ngram_range=(2, 2), binary=False)) tr1_char_3 = extract_features( train1_df,
import numpy as np from sklearn.svm import SVC from sklearn.model_selection import StratifiedKFold, cross_val_score from import_data import load_pickle from feature_extraction import extract_features from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.preprocessing import LabelEncoder train1_df = load_pickle('data/train1.pkl') count_tr1_char_2 = extract_features( train1_df, TfidfVectorizer(analyzer="word", ngram_range=(3, 3), binary=False)) def run_svm(df): # casting X as list X = df["text"].tolist() # labelencoding and casting y as list enc = LabelEncoder() y = df["author"].tolist() y = enc.fit_transform(y).tolist() clf = SVC(gamma="auto", kernel="linear") score = cross_val_score(clf, X, y, cv=5) return [score, score.mean(), score.std()] print(run_svm(count_tr1_char_2))
from sklearn.multiclass import OneVsRestClassifier folders = [ 'MM09', 'MM10', 'MM11', 'MM12', 'MM14', 'MM15', 'MM16', 'MM18', 'MM19', 'MM20', 'MM21' ] #folder names path = "C:/Users\cfcoo\OneDrive - Ulster University\KaraOne\Data/" classes = [ '/uw/', '/tiy/', '/iy/', '/m/', '/n/', '/piy/', '/diy/', 'gnaw', 'pat', 'knew', 'pot' ] #class labels classifier_scores = pd.DataFrame() #DataFrame for saving CV scores for f in folders: new_path = path + f data = load_pickle(new_path, "td_df.p") features = data.Features.tolist() features = np.array(features) targets = np.array(data.Targets) class_names = order_class_labels( classes, targets) #orders class_names in the order they first appear in dataset X_Train, X_Test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42) clf = SVC(kernel='linear', class_weight='balanced') pca = PCA(svd_solver='randomized', whiten=True)