Example #1
0
        print('完成降维处理')
        train_feature = pd.DataFrame(feature)
        #f1[['type_similar','content_similar']]
        train_feature = pd.concat([train_feature, f1], axis=1)
        print('所有特征')
        print(train_feature.info())
        # 进行三折运算
        kf = KFold(n_splits=3, shuffle=False, random_state=1)
        predictions = []
        for train, test in kf.split(train_feature):
            # The predictors we're using to train the algorithm.  Note how we only take then rows in the train folds.
            train_predictors = (train_feature.iloc[train, :])
            # The target we're using to train the algorithm.
            train_target = train_merge['Level'].iloc[train]
            test_predictions = classify.term().predict(
                train_predictors, train_target, train_feature.iloc[test, :],
                'gender')
            predictions.append(test_predictions)

        # 将结果写入csv
        predictions = np.concatenate(predictions, axis=0)
        StackingSubmission = pd.DataFrame({'predictions': predictions})
        StackingSubmission['Level'] = train_merge['Level']
        StackingSubmission.to_csv('Level.csv',
                                  sep=',',
                                  header=True,
                                  index=False,
                                  line_terminator="\n")

        #predictions[predictions > .5] = 1
        #predictions[predictions <= .5] = 0
Example #2
0
    #remove label missed samples
    gender_traindatas, genderlabel = preprocessob.removezero(traindata, genderdata)
    age_traindatas, agelabel = preprocessob.removezero(traindata, agedata)
    edu_traindatas, edulabel = preprocessob.removezero(traindata, educationdata)

    # 填写你的wv向量路径
    w2vtrain = np.load('wv300_win100.train.npy')
    w2vtest = np.load('wv300_win100.test.npy')

    wv_gender_traindatas, genderlabel = preprocessob.removezero(w2vtrain, genderdata)
    wv_age_traindatas, agelabel = preprocessob.removezero(w2vtrain, agedata)
    wv_edu_traindatas, edulabel = preprocessob.removezero(w2vtrain, educationdata)

    if order=='test':
        termob1 = classify.term()
        termob2 = classify.term()
        termob3 = classify.term()
        p1 = multiprocessing.Process(target=termob1.validation,
                                     args=(gender_traindatas, genderlabel, wv_gender_traindatas, 'gender',))
        p2=multiprocessing.Process(target=termob2.validation,args=(age_traindatas, agelabel, wv_age_traindatas, 'age',))
        p3=multiprocessing.Process(target=termob3.validation,args=(edu_traindatas, edulabel, wv_edu_traindatas, 'edu',))

        p1.start()
        p2.start()
        p3.start()

        p1.join()
        p2.join()
        p3.join()
    elif order=='predict':
Example #3
0
    #remove label missed samples
    gender_traindatas, genderlabel = preprocessob.removezero(
        traindata, labels_list_transform)
    print(gender_traindatas.shape, gender_traindatas.shape[0])

    # 填写你的wv向量路径
    w2vtrain = np.load('wv300_win100.train.npy')
    w2vtest = np.load('wv300_win100.test.npy')

    wv_gender_traindatas, wv_genderlabel = preprocessob.removezero(
        w2vtrain, labels_list_transform)
    print('预处理结束')
    pre_time_end = time.time()
    print('total time is', pre_time_end - pre_time_start)
    if order == 'test':
        termob1 = classify.term()
        # termob2 = classify.term()
        # termob3 = classify.term()
        p1 = multiprocessing.Process(target=termob1.validation,
                                     args=(gender_traindatas, genderlabel,
                                           wv_gender_traindatas, 'category'))

        p1.start()
        # p2.start()
        # p3.start()

        p1.join()
        # p2.join()
        # p3.join()
    elif order == 'predict':
        termob = classify.term()