import numpy as np import matplotlib.pyplot as plt import sklearn.cross_validation as cv from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC, SVC from sklearn.metrics import accuracy_score, f1_score from sklearn.metrics import precision_score, recall_score from sklearn.grid_search import GridSearchCV from sklearn.feature_extraction.text import TfidfTransformer import cPickle as pickle from sklearn.decomposition import TruncatedSVD, PCA, KernelPCA from sklearn.feature_selection import SelectKBest, chi2 # Setup the features extractor = FeatureExtract() labels = extractor.labels works = extractor.works feature1 = extractor.feature_cp[0] feature_shell = [] length = [2, 3] for l in length: folder = '../shell/length%d_no_mirror' % l dict_list = [] for work in works: data = pickle.load(open('%s/%s.pkl' % (folder, work), 'rb')) dict_list.append(data) feature, names = extractor._vectorize(dict_list) feature_shell.append(feature)
sys.path.append('../') from jos_learn.features import FeatureExtract import numpy as np import matplotlib.pyplot as plt import sklearn.cross_validation as cv from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC from sklearn.metrics import accuracy_score, f1_score from sklearn.metrics import precision_score, recall_score from sklearn.grid_search import GridSearchCV from sklearn.feature_extraction.text import TfidfTransformer # Setup the features extractor = FeatureExtract() labels = extractor.labels feature1, feature_name1 = extractor.feature_cp feature2, feature_name2 = extractor.feature_ps_ql_pair normalizer = TfidfTransformer() feature1_norm = normalizer.fit_transform(feature1).toarray() feature2_norm = normalizer.fit_transform(feature2).toarray() feature1_unsecure = feature1[labels == 2] feature2_unsecure = feature2[labels == 2] feature1_unsecure_norm = feature1_norm[labels == 2] feature2_unsecure_norm = feature2_norm[labels == 2] feature1 = feature1[labels != 2] feature2 = feature2[labels != 2]