index=None, encoding='utf8') print('sgd特征已保存\n') ########################### pac(PassiveAggressiveClassifier) ################################ print('PAC stacking') stack_train = np.zeros((len(train), number)) stack_test = np.zeros((len(test), number)) score_va = 0 for i, (tr, va) in enumerate( StratifiedKFold(score, n_folds=n_folds, random_state=1017)): print('stack:%d/%d' % ((i + 1), n_folds)) pac = PassiveAggressiveClassifier(random_state=1017) pac.fit(train_feature[tr], score[tr]) score_va = pac._predict_proba_lr(train_feature[va]) score_te = pac._predict_proba_lr(test_feature) print(score_va) print('得分' + str(mean_squared_error(score[va], pac.predict(train_feature[va])))) stack_train[va] += score_va stack_test += score_te stack_test /= n_folds stack = np.vstack([stack_train, stack_test]) df_stack = pd.DataFrame() for i in range(stack.shape[1]): df_stack['tfidf_pac_classfiy_{}'.format(i)] = np.around(stack[:, i], 6) df_stack.to_csv('feature/tfidf_pac_2_error_single_classfiy.csv', index=None, encoding='utf8') print('pac特征已保存\n')
# In[100]: tf = TfidfVectorizer(stop_words='english') tf_train = tf.fit_transform(X_train) tf_test = tf.transform(X_test) #for keys,value in tf.vocabulary_.items(): #print(keys,value) #if keys=='jessica': #tat=value # In[101]: linear_clf = PassiveAggressiveClassifier(max_iter=50) linear_clf.fit(tf_train, y_train) test_proba = linear_clf._predict_proba_lr(tf_test)[:, 1] * 100 # In[102]: res = pd.DataFrame(X_test) k = pd.DataFrame(df_test['title']) res['confidence'] = test_proba res['class'] = linear_clf.predict(tf_test) r = df_test['title'] res['title'] = k # In[103]: nres = res.to_numpy() for x in nres: if x[3] == title: