def fit(self, debate_data, y, with_meta_grid_search=True):
        for predictor, grid_search in self.stack:
            if grid_search:
                predictor.fit(debate_data, y, True)

        debate_estimator_sample_probs = []
        debates_len = len(debate_data)
        for index, (test_debate_data, test_labels) in enumerate(zip(debate_data, y)):
            train_debate_data = debate_data[0:index] + debate_data[index + 1:debates_len + 1]
            train_labels = y[0:index] + y[index + 1:debates_len + 1]

            estimator_sample_probs = self.stack_fit_predict(train_debate_data, train_labels, [test_debate_data], [test_labels])
            debate_estimator_sample_probs.append(estimator_sample_probs)

        predictions_zero = list(zip(*debate_estimator_sample_probs))
        predictions_zero = [flatten(x) for x in predictions_zero]

        base_predictions = self.convert_to_meta_input(predictions_zero)

        y_meta = flatten(y)

        self.meta_learner = Pipeline([('log_reg', LogisticRegression(**self.params['meta_params']))])

        if with_meta_grid_search:
            self.meta_learner = grid_search_log_reg_c(self.meta_learner, base_predictions, y_meta, None, 'macro_recall')
        else:
            self.meta_learner.fit(base_predictions, y_meta)

        self.stack_fit_predict(debate_data, y)
    def fit(self, debate_data, y, with_grid_search=False):
        y = flatten(y)

        train_x = IVectorTransformer(debate_data).features

        pipe = Pipeline([('log_reg',
                          LogisticRegression(**self.params['log_reg']))])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data,
                                               'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)
Ejemplo n.º 3
0
    def fit(self, debate_data, y, with_grid_search=False):
        x_text = flatten([x.texts for x in debate_data])
        train_x = x_text

        pipe = Pipeline([
            ('tfidf', TfidfVectorizer(**self.params['tfidf_params'])),
            ('log_reg', LogisticRegression(**self.params['log_reg']))
        ])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data, 'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)
Ejemplo n.º 4
0
    def fit(self, debate_data, y, with_grid_search=True):
        debates = [x.debate for x in debate_data]
        x_texts = flatten([x.texts for x in debate_data])
        x_authors = flatten([x.authors for x in debate_data])
        y = flatten(y)

        self.ivectors_scaler = MinMaxScaler(
            **self.params['min_max_scaler_params'])
        self.opensmile_scaler = MinMaxScaler(
            **self.params['min_max_scaler_params'])
        self.bert_cls_token_scaler = MinMaxScaler(
            **self.params['min_max_scaler_params'])
        self.tfidf_scaler = MinMaxScaler(
            **self.params['min_max_scaler_params'])
        self.liwc_scaler = MinMaxScaler(**self.params['min_max_scaler_params'])
        self.authors_scaler = MinMaxScaler(
            **self.params['min_max_scaler_params'])
        self.vectorizer = TfidfVectorizer(**self.params['tfidf_params'])

        ivectors = IVectorTransformer(debate_data).features
        ivectors = self.ivectors_scaler.fit_transform(ivectors)

        opensmile = OpensmileTransformer(debates).features
        opensmile = self.opensmile_scaler.fit_transform(opensmile)

        bert_cls_token = self.bert_client.encode(x_texts)
        bert_cls_token = self.bert_cls_token_scaler.fit_transform(
            bert_cls_token)

        tfidf_ngrams = self.vectorizer.fit_transform(x_texts).toarray()
        tfidf_ngrams = self.tfidf_scaler.fit_transform(tfidf_ngrams)

        liwc = LIWCTransformer(x_texts).features
        liwc = self.liwc_scaler.fit_transform(liwc)

        authors = AuthorsTransformer(x_authors).features
        authors = self.authors_scaler.fit_transform(authors)

        train_x = merge_features(ivectors, opensmile, bert_cls_token,
                                 tfidf_ngrams, liwc, authors)

        pipe = Pipeline([('log_reg',
                          LogisticRegression(**self.params['log_reg']))])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data,
                                               'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)
    def fit(self, debate_data, y, with_grid_search=False):
        x_texts = flatten([x.texts for x in debate_data])
        y = flatten(y)

        train_x = self.bert_client.encode(x_texts)

        pipe = Pipeline([
            ('log_reg', LogisticRegression(**self.params['log_reg']))
        ])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data, 'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)
    def fit(self, debate_data, y, with_grid_search=False):
        x_texts = flatten([x.texts for x in debate_data])
        y = flatten(y)

        train_x = TextstatReadabilityTransformer(x_texts).features

        pipe = Pipeline([('log_reg',
                          LogisticRegression(**self.params['log_reg']))])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data,
                                               'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)
    def fit(self, debate_data, y, with_grid_search=False):
        x_texts = flatten([x.texts for x in debate_data])
        x_authors = flatten([x.authors for x in debate_data])
        y = flatten(y)

        train_x = merge_features(
            LIWCTransformer(x_texts).features,
            AuthorsTransformer(x_authors).features)

        pipe = Pipeline([('log_reg',
                          LogisticRegression(**self.params['log_reg']))])

        if with_grid_search:
            self.model = grid_search_log_reg_c(pipe, train_x, y, debate_data,
                                               'macro_recall')
        else:
            self.model = pipe
            self.model.fit(train_x, y)