예제 #1
0
import tpot
from tpot import TPOTClassifier
from sklearn.metrics import precision_score, f1_score, recall_score
import warnings
import numpy as np
import pandas as pd
import pymysql
from collections import Counter

if __name__ == '__main__':
    warnings.filterwarnings('ignore')
    from sklearn.metrics import precision_score, f1_score, recall_score
    warnings.filterwarnings('ignore')
    # 加载数据
    sql = "SELECT * from bidata.trail_pigeon_wdf1"
    df = load_data_new(sql, filename="drumping.csv")
    df_btest = load_data_new(sql, filename="btest.csv")

    label_by_contract = "target_is_DD_ACTIVE"
    labels = label_by_contract

    select_columns = [
        "CAFE20_gender",
        "CAFE20_region",
        "CAFE20_levels",
        "is_festival_user",
        "level_use",
        "is_LAST_2YEAR_DD_ACTIVE",
        "cafe_tag_is_mop_available",
        "is_merch_user",
        "p4week_active",
from model_evalu import evalution_model, plot_importance
import numpy as np
from models import rf_mdoel, gbdt_mdoel, xgb_model, cat_boost_model
from sklearn.metrics import make_scorer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, f1_score, recall_score
from sklearn.base import clone
if __name__ == '__main__':
    warnings.filterwarnings('ignore')
    from sklearn.metrics import precision_score, f1_score, recall_score

    warnings.filterwarnings('ignore')
    # 加载数据
    sql = "SELECT * from bidata.trail_pigeon_wdf1"
    df = load_data_new(sql, filename="drumping.csv")

    label_by_contract = "target_is_DD_ACTIVE"
    labels = label_by_contract

    select_columns = [
        "CAFE20_gender",
        "CAFE20_region",
        "CAFE20_levels",
        "is_festival_user",
        "level_use",
        "is_LAST_2YEAR_DD_ACTIVE",
        "cafe_tag_is_mop_available",
        "is_merch_user",
        "p4week_active",
        "is_LAST_1YEAR_DD_ACTIVE",
예제 #3
0
from data_treatment import data_clean2
from data_treatment import load_data_new
import joblib
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == '__main__':
    sql = " "
    df = load_data_new(sql, filename="btest.csv")
    df = data_clean2(df)
    labels = "target_is_DD_ACTIVE"
    select_columns = [
        'is_festival_user',
        'is_LAST_2YEAR_DD_ACTIVE',
        'cafe_tag_is_mop_available',
        'IS_SR_KIT_USER',
        'level_use',
        'skr_rate',
        'merch_rate',
        'active_index',
        'cafe_tag_p6m_food_qty',
        'DD_rev',
        'svc_revenue',
        'SR_KIT_NUM',
        'cafe_tag_p3m_merch_party_size',
        'CAFE20_VISIT_MERCH',
        'CAFE20_AMT',
        'cafe_tag_p3m_food_qty',
        'p3m_weekday_trans',
        'max_DD_rev',
        'DD_end_gap',
import tpot
from tpot import TPOTClassifier
from sklearn.metrics import precision_score, f1_score, recall_score
import warnings
import numpy as np
import pandas as pd
import pymysql
from collections import Counter

if __name__ == '__main__':
    warnings.filterwarnings('ignore')
    from sklearn.metrics import precision_score, f1_score, recall_score
    warnings.filterwarnings('ignore')
    # 加载数据
    sql = "SELECT * from bidata.trail_pigeon_wdf1"
    df = load_data_new(sql, filename="df_20190226.csv")

    label_by_contract = "is_pigeon"
    labels = label_by_contract

    select_columns = [
        # "sale_id",
        # "teacher_id",
        "know_origin",
        "grade_subject",
        "student_city_class",
        "student_province",
        "grade_rank",
        "class_rank_fillna",
        "student_province_byphone",
        "subject_ids",
예제 #5
0
from model_evalu import evalution_model


if __name__ == '__main__':
    labels = "attend"


    file = "load_data/audio_data_with_success_attend_cutwords.pkl"
    with open(file, 'rb') as f:
        data = pickle.load(f)

    #student_intention_id
    sql_student = """SELECT v.student_id,v.student_intention_id,v.student_no,lpo.apply_time AS order_apply_time,lpo.order_id ,v.submit_time from hfjydb.view_student v
    LEFT JOIN hfjydb.lesson_plan_order lpo on lpo.student_intention_id = v.student_intention_id"""

    df_student = load_data_new(sql_student, filename="df_students_s.csv")
    df_student = df_student[~df_student["order_id"].isnull()]

    df_concat = pd.merge(data, df_student, on="student_intention_id", how="left")
    df_concat = df_concat[~df_concat["order_id"].isnull()]

    conts = df_concat["student_intention_id"].value_counts()
    conts = conts[conts == 1]

    df_concat = df_concat[df_concat["student_intention_id"].isin(list(conts.index))]
    df_voice = df_concat[["order_id", "order_apply_time", "attend", "content"]]

    print(1)
    #分词
    start = time.clock()
예제 #6
0
import warnings
warnings.filterwarnings('ignore')
from data_treatment import load_data_new, data_clean2
import pandas as pd

from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

if __name__ == '__main__':
    sql = "select * from bidata.trail_pigeon"
    df = load_data_new(sql, filename="df_20190215.csv")

    student_sql = """SELECT student_id,count(student_id) from trail_pigeon 
                    GROUP BY student_id"""
    student_mul = load_data_new(student_sql, filename="student_mul.csv")
    student_ids = student_mul[student_mul["count(student_id)"] == 1]

    df = df[df["student_id"].isin(student_mul["student_id"])]

    label_by_contract = "is_pigeon"
    labels = label_by_contract

    # 数据预处理
    df = data_clean2(df,
                     min_date="2018-05-01",
                     mid_date="2018-09-15",
                     max_date="2018-09-30",
                     label=labels)
예제 #7
0
from sklearn.metrics import make_scorer
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from model_evalu import evalution_model,plot_importance
import numpy as npr
from xgboost import XGBClassifier
from sklearn.metrics import precision_score,f1_score,recall_score
import warnings
import pandas as pd
import numpy as np
import pymysql
from  catboost  import  CatBoostClassifier,CatBoostRegressor,Pool
if __name__ == '__main__':
    warnings.filterwarnings('ignore')
    #加载数据
    sql = "select * from bidata.trail_boost"
    df = load_data_new(sql, filename="df_201810166.csv")

    #加载新数据
    # sql_new = "select DISTINCT teacher_id,history_trail_cnt,history_trail_suc_cnt_bycontract,history_trail_suc_cnt_bystudent,first_tkod_tifl_count from trail_boost"
    # conn = pymysql.connect(host="rm-2ze974348wa9e1ev3uo.mysql.rds.aliyuncs.com", port=3306, user="******",
    #                        passwd="xMbquuHi98JyfiF1", db="bidata", charset="utf8")
    # df_new = pd.read_sql(sql_new, conn)
    # conn.close()
    #
    # df = pd.merge(df,df_new,how="left",on=["teacher_id"])

    label_by_contract = "is_sucess_by_contract"
    label_by_pay = "is_sucess_by_pay"
    label_by_official_course = "is_sucess_by_official_course"
    labels = label_by_contract
    select_columns = [