Python SklearnRegressionComponentの例

プログラミング言語: Python

名前空間/パッケージ名: text2props.modules.regression.components

hotexamples.comのコード掲載数: 5

Python SklearnRegressionComponent - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtext2props.modules.regression.components.SklearnRegressionComponentの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

SklearnRegressionComponent(5)

よく使われるメソッド

SklearnRegressionComponent (5)

コード例 #1

ファイルを表示

 def __init__(self, random_state: int = None, known_latent_traits: Dict[str, Dict[str, float]] = None):
     if known_latent_traits is not None:
         latent_traits_calibrator = KnownParametersCalibrator(latent_traits=known_latent_traits)
         if set(known_latent_traits.keys()) != {DIFFICULTY, DISCRIMINATION}:
             raise ValueError("wrong keys in known_latent_traits dictionary")
     else:
         latent_traits_calibrator = IRTCalibrator(DIFFICULTY_RANGE, DISCRIMINATION_RANGE)
     vec_diff = TfidfVectorizer(stop_words='english', preprocessor=vectorizer_text_preprocessor, max_features=1000)
     feat_eng_regression_pipeline_difficulty = FeatureEngAndRegressionPipeline(
         FeatureEngineeringModule([IRFeaturesComponent(vec_diff, concatenate_correct=True, concatenate_wrong=True)]),
         RegressionModule([
             SklearnRegressionComponent(
                 RandomForestRegressor(n_estimators=250, max_depth=50, random_state=random_state),
                 latent_trait_range=DIFFICULTY_RANGE
             )
         ])
     )
     vec_disc = TfidfVectorizer(stop_words='english', preprocessor=vectorizer_text_preprocessor, max_features=800)
     feat_eng_regression_pipeline_discrimination = FeatureEngAndRegressionPipeline(
         FeatureEngineeringModule([IRFeaturesComponent(vec_disc, concatenate_correct=True, concatenate_wrong=True)]),
         RegressionModule([
             SklearnRegressionComponent(
                 RandomForestRegressor(n_estimators=100, max_depth=25, random_state=random_state),
                 latent_trait_range=DISCRIMINATION_RANGE
             )
         ])
     )
     estimator_from_text = FeatureEngAndRegressionEstimatorFromText(
         {
             DIFFICULTY: feat_eng_regression_pipeline_difficulty,
             DISCRIMINATION: feat_eng_regression_pipeline_discrimination
         }
     )
     super().__init__(latent_traits_calibrator, estimator_from_text)

コード例 #2

ファイルを表示

df_test = df_test.drop(
    df_test.head(100).index
)  # Not to use the validation data used in 5.1 for model selection
dict_latent_traits = pickle.load(
    open(os.path.join(DATA_PATH, 'known_latent_traits.p'), "rb"))

# define latent traits calibrator (known latent traits)
latent_traits_calibrator = KnownParametersCalibrator(dict_latent_traits)

file = open("outputs/5_3_read.txt", 'w')

# pipeline difficulty
pipe_b = FeatureEngAndRegressionPipeline(
    FeatureEngineeringModule([ReadabilityFeaturesComponent()]),
    RegressionModule([
        SklearnRegressionComponent(RandomForestRegressor(random_state=SEED),
                                   latent_trait_range=B_RANGE)
    ]))
# pipeline discrimination
pipe_a = FeatureEngAndRegressionPipeline(
    FeatureEngineeringModule([ReadabilityFeaturesComponent()]),
    RegressionModule([
        SklearnRegressionComponent(RandomForestRegressor(random_state=SEED),
                                   latent_trait_range=A_RANGE)
    ]))
# create estimator from text form the previous pipelines
estimator_from_text = FeatureEngAndRegressionEstimatorFromText({
    DIFFICULTY:
    pipe_b,
    DISCRIMINATION:
    pipe_a
})

コード例 #3

ファイルを表示

#   model, as obtained in the scripts 5_1_*
pipeline_difficulty = FeatureEngAndRegressionPipeline(
    FeatureEngineeringModule([
        IRFeaturesComponent(TfidfVectorizer(
            stop_words='english',
            preprocessor=vectorizer_text_preprocessor,
            min_df=0.02,
            max_df=0.92),
                            concatenate_correct=True,
                            concatenate_wrong=True),
        LinguisticFeaturesComponent(),
        ReadabilityFeaturesComponent(),
    ]),
    RegressionModule([
        SklearnRegressionComponent(RFRegressor(n_estimators=100,
                                               max_depth=20,
                                               random_state=SEED),
                                   latent_trait_range=DIFFICULTY_RANGE)
    ]))
pipeline_discrimination = FeatureEngAndRegressionPipeline(
    FeatureEngineeringModule([
        IRFeaturesComponent(TfidfVectorizer(
            stop_words='english',
            preprocessor=vectorizer_text_preprocessor,
            min_df=0.02,
            max_df=0.96),
                            concatenate_correct=True,
                            concatenate_wrong=True),
        LinguisticFeaturesComponent(),
        ReadabilityFeaturesComponent(),
    ]),
    RegressionModule([

コード例 #4

ファイルを表示

        # pipeline difficulty
        vec_b = TfidfVectorizer(stop_words='english',
                                preprocessor=preproc,
                                min_df=min_df,
                                max_df=max_df)
        pipe_b = FeatureEngAndRegressionPipeline(
            FeatureEngineeringModule([
                IRFeaturesComponent(vec_b,
                                    concatenate_correct=True,
                                    concatenate_wrong=True),
                LinguisticFeaturesComponent(),
                ReadabilityFeaturesComponent(),
            ]),
            RegressionModule([
                SklearnRegressionComponent(SVR(), latent_trait_range=B_RANGE)
            ]))
        # pipeline discrimination
        vec_a = TfidfVectorizer(stop_words='english',
                                preprocessor=preproc,
                                min_df=min_df,
                                max_df=max_df)
        pipe_a = FeatureEngAndRegressionPipeline(
            FeatureEngineeringModule([
                IRFeaturesComponent(vec_a,
                                    concatenate_correct=True,
                                    concatenate_wrong=True),
                LinguisticFeaturesComponent(),
                ReadabilityFeaturesComponent(),
            ]),
            RegressionModule([

コード例 #5

ファイルを表示

for min_df in np.arange(0.00, 0.11, 0.02):
    for max_df in np.arange(0.90, 1.01, 0.02):

        file = open("outputs/5_1_model_selection_DT_mindf_%.2f_maxdf_%.2f.txt" % (min_df, max_df), 'w')
        file.write("MIN_DF = %.2f - MAX DF = %.2f" % (min_df, max_df))

        # pipeline difficulty
        vec_b = TfidfVectorizer(stop_words='english', preprocessor=preproc, min_df=min_df, max_df=max_df)
        pipe_b = FeatureEngAndRegressionPipeline(
            FeatureEngineeringModule([
                IRFeaturesComponent(vec_b, concatenate_correct=True, concatenate_wrong=True),
                LinguisticFeaturesComponent(),
                ReadabilityFeaturesComponent(),
            ]),
            RegressionModule([
                SklearnRegressionComponent(DecisionTreeRegressor(random_state=SEED), latent_trait_range=B_RANGE)
            ])
        )
        # pipeline discrimination
        vec_a = TfidfVectorizer(stop_words='english', preprocessor=preproc, min_df=min_df, max_df=max_df)
        pipe_a = FeatureEngAndRegressionPipeline(
            FeatureEngineeringModule([
                IRFeaturesComponent(vec_a, concatenate_correct=True, concatenate_wrong=True),
                LinguisticFeaturesComponent(),
                ReadabilityFeaturesComponent(),
            ]),
            RegressionModule([
                SklearnRegressionComponent(DecisionTreeRegressor(random_state=SEED), latent_trait_range=A_RANGE)
            ])
        )
        # create estimator from text form the previous pipelines