print('完成降维处理') train_feature = pd.DataFrame(feature) #f1[['type_similar','content_similar']] train_feature = pd.concat([train_feature, f1], axis=1) print('所有特征') print(train_feature.info()) # 进行三折运算 kf = KFold(n_splits=3, shuffle=False, random_state=1) predictions = [] for train, test in kf.split(train_feature): # The predictors we're using to train the algorithm. Note how we only take then rows in the train folds. train_predictors = (train_feature.iloc[train, :]) # The target we're using to train the algorithm. train_target = train_merge['Level'].iloc[train] test_predictions = classify.term().predict( train_predictors, train_target, train_feature.iloc[test, :], 'gender') predictions.append(test_predictions) # 将结果写入csv predictions = np.concatenate(predictions, axis=0) StackingSubmission = pd.DataFrame({'predictions': predictions}) StackingSubmission['Level'] = train_merge['Level'] StackingSubmission.to_csv('Level.csv', sep=',', header=True, index=False, line_terminator="\n") #predictions[predictions > .5] = 1 #predictions[predictions <= .5] = 0
#remove label missed samples gender_traindatas, genderlabel = preprocessob.removezero(traindata, genderdata) age_traindatas, agelabel = preprocessob.removezero(traindata, agedata) edu_traindatas, edulabel = preprocessob.removezero(traindata, educationdata) # 填写你的wv向量路径 w2vtrain = np.load('wv300_win100.train.npy') w2vtest = np.load('wv300_win100.test.npy') wv_gender_traindatas, genderlabel = preprocessob.removezero(w2vtrain, genderdata) wv_age_traindatas, agelabel = preprocessob.removezero(w2vtrain, agedata) wv_edu_traindatas, edulabel = preprocessob.removezero(w2vtrain, educationdata) if order=='test': termob1 = classify.term() termob2 = classify.term() termob3 = classify.term() p1 = multiprocessing.Process(target=termob1.validation, args=(gender_traindatas, genderlabel, wv_gender_traindatas, 'gender',)) p2=multiprocessing.Process(target=termob2.validation,args=(age_traindatas, agelabel, wv_age_traindatas, 'age',)) p3=multiprocessing.Process(target=termob3.validation,args=(edu_traindatas, edulabel, wv_edu_traindatas, 'edu',)) p1.start() p2.start() p3.start() p1.join() p2.join() p3.join() elif order=='predict':
#remove label missed samples gender_traindatas, genderlabel = preprocessob.removezero( traindata, labels_list_transform) print(gender_traindatas.shape, gender_traindatas.shape[0]) # 填写你的wv向量路径 w2vtrain = np.load('wv300_win100.train.npy') w2vtest = np.load('wv300_win100.test.npy') wv_gender_traindatas, wv_genderlabel = preprocessob.removezero( w2vtrain, labels_list_transform) print('预处理结束') pre_time_end = time.time() print('total time is', pre_time_end - pre_time_start) if order == 'test': termob1 = classify.term() # termob2 = classify.term() # termob3 = classify.term() p1 = multiprocessing.Process(target=termob1.validation, args=(gender_traindatas, genderlabel, wv_gender_traindatas, 'category')) p1.start() # p2.start() # p3.start() p1.join() # p2.join() # p3.join() elif order == 'predict': termob = classify.term()