lambda x: re_spaces.sub(" ", re_attags.sub(" ", " " + x + " "))[1:-1])
    df = df.drop_duplicates(subset=['text'])
    df.index = df['id'] = range(df.shape[0])

    non_alphanums = re.compile('[^A-Za-z]+')

    def normalize_text(text):
        return non_alphanums.sub(' ', text).lower().strip()

    df['text_normalized'] = df['text'].map(lambda x: normalize_text(x))
    df['textblob_score'] = df['text_normalized'].map(
        lambda x: textblob.TextBlob(x).polarity)

    import wordbag_regressor
    print("Train wordbag regressor")
    wb_regressor = wordbag_regressor.WordbagRegressor(
        "../models/wordbag_model.pkl.gz", tripadvisor_dir)
    #wb_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz")
    df['wordbag_score'] = wb_regressor.predict(df['text'].values)

    import wordhash_regressor
    print("Train wordhash regressor")
    wh_regressor = wordhash_regressor.WordhashRegressor(
        "../models/wordhash_model.pkl.gz", tripadvisor_dir)
    #wh_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz")
    df['wordhash_score'] = wh_regressor.predict(df['text'].values)

    import wordseq_regressor
    print("Train wordseq regressor")
    ws_regressor = wordseq_regressor.WordseqRegressor(
        "../models/wordseq_model.pkl.gz", tripadvisor_dir)
    #ws_regressor = wordseq_regressor.WordseqRegressor("../models/wordseq_model.pkl.gz")
Ejemplo n.º 2
0
    df = df.drop_duplicates(subset=['text'])
    df.index = df['id'] = range(df.shape[0])

    non_alphanums = re.compile('[^A-Za-z]+')

    def normalize_text(text):
        return non_alphanums.sub(' ', text).lower().strip()

    df['text_normalized'] = df['text'].map(lambda x: normalize_text(x))
    df['textblob_score'] = df['text_normalized'].map(
        lambda x: textblob.TextBlob(x).polarity)

    import wordbag_regressor
    print("Train wordbag regressor")
    #wb_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz", tripadvisor_dir)
    wb_regressor = wordbag_regressor.WordbagRegressor(
        "../models/wordbag_model.pkl.gz")
    df['wordbag_score'] = wb_regressor.predict(df['text'].values)

    import wordhash_regressor
    print("Train wordhash regressor")
    wh_regressor = wordhash_regressor.WordhashRegressor(
        "../models/wordhash_model.pkl.gz", tripadvisor_dir)
    #wh_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz")
    df['wordhash_score'] = wh_regressor.predict(df['text'].values)

    import wordseq_regressor
    print("Train wordseq regressor")
    ws_regressor = wordseq_regressor.WordseqRegressor(
        "../models/wordseq_model.pkl.gz", tripadvisor_dir)
    #ws_regressor = wordseq_regressor.WordseqRegressor("../models/wordseq_model.pkl.gz")
    df['wordseq_score'] = ws_regressor.predict_batch(df['text'].values)