Python FeatureSelectedClassifier.fitの例

プログラミング言語: Python

名前空間/パッケージ名: strephit.classification.classifiers

メソッド/関数: fit

hotexamples.comのコード掲載数: 4

Python FeatureSelectedClassifier.fit - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのstrephit.classification.classifiers.FeatureSelectedClassifier.fitの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

FeatureSelectedClassifier(3)

fit(2)

predict(1)

コード例 #1

ファイルを表示

ファイル: train.py プロジェクト: Wikidata/StrepHit

def main(
    training_set,
    language,
    outfile,
    model_class,
    model_param,
    extractor_class,
    extractor_param,
    gazetteer,
    folds,
    scoring,
    skip_majority,
    evaluate_gold,
):
    """ Trains the classifier """

    gazetteer = reverse_gazetteer(json.load(gazetteer)) if gazetteer else {}

    model_cls, model_args = initialize(model_class, model_param, False)

    if evaluate_gold:
        gold_extractor = initialize(extractor_class, [("language", language)] + list(extractor_param), True)

        gold_evaluation(map(json.loads, training_set), gold_extractor, gazetteer, model_cls, model_args)

        training_set.seek(0)

    extractor = initialize(extractor_class, [("language", language)] + list(extractor_param), True)

    logger.info("Building training set from '%s' ..." % training_set.name)
    for row in training_set:
        data = json.loads(row)
        extractor.process_sentence(data["sentence"], data["lu"], data["fes"], add_unknown=True, gazetteer=gazetteer)
    x, y = extractor.get_features(refit=True)
    logger.info("Got %d samples with %d features each", *x.shape)

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)

    if folds > 1:
        kfolds_evaluation(folds, model, scoring, skip_majority, x, y)

    logger.info("Fitting model ...")
    model.fit(x, y)

    joblib.dump((model, {"extractor": extractor}), outfile)

    logger.info("Done, dumped model to '%s'", outfile)

コード例 #2

ファイルを表示

ファイル: train.py プロジェクト: rpatil524/StrepHit

def main(training_set, language, outfile, model_class, model_param, extractor_class,
         extractor_param, gazetteer, folds, scoring, skip_majority, evaluate_gold):
    """ Trains the classifier """

    gazetteer = reverse_gazetteer(json.load(gazetteer)) if gazetteer else {}

    model_cls, model_args = initialize(model_class, model_param, False)

    if evaluate_gold:
        gold_extractor = initialize(
            extractor_class, [('language', language)] + list(extractor_param), True
        )

        gold_evaluation(
            map(json.loads, training_set), gold_extractor,
            gazetteer, model_cls, model_args
        )

        training_set.seek(0)

    extractor = initialize(extractor_class, [('language', language)] + list(extractor_param), True)

    logger.info("Building training set from '%s' ..." % training_set.name)
    for row in training_set:
        data = json.loads(row)
        extractor.process_sentence(data['sentence'], data['lu'], data['fes'],
                                   add_unknown=True, gazetteer=gazetteer)
    x, y = extractor.get_features(refit=True)
    logger.info('Got %d samples with %d features each', *x.shape)

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)

    if folds > 1:
        kfolds_evaluation(folds, model, scoring, skip_majority, x, y)

    logger.info('Fitting model ...')
    model.fit(x, y)

    joblib.dump((model, {
        'extractor': extractor
    }), outfile)

    logger.info("Done, dumped model to '%s'", outfile)

コード例 #3

ファイルを表示

ファイル: train.py プロジェクト: Wikidata/StrepHit

def gold_evaluation(sentences, extractor, gazetteer, model_cls, model_args):
    logger.info("Evaluating on the gold sentences")

    for each in sentences:
        if not each.get("gold_fes"):
            extractor.process_sentence(each["sentence"], each["lu"], each["fes"], add_unknown=True, gazetteer=gazetteer)
    x_tr, y_tr = extractor.get_features(refit=True)

    extractor.start()
    tagged_gold = []
    for each in sentences:
        if each.get("gold_fes"):
            tagged_gold.append(
                (
                    each["gold_fes"],
                    extractor.process_sentence(
                        each["sentence"], each["lu"], each["fes"], add_unknown=False, gazetteer=gazetteer
                    ),
                )
            )

    if not tagged_gold:
        logger.warn("asked to evaluate gold, but no gold sentences found")
        return

    x_gold, _ = extractor.get_features(refit=False)
    y_gold = []
    for gold_fes, tagged in tagged_gold:
        for chunk, is_sample in tagged:
            if is_sample:
                y_gold.append([extractor.label_index[fe or "O"] for fe in gold_fes.get(chunk, [])])

    assert len(y_gold) == x_gold.shape[0]

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)
    model.fit(x_tr, y_tr)
    y_pred = model.predict(x_gold)

    correct = len([1 for actual, predicted in zip(y_gold, y_pred) if predicted in actual])
    logger.info("Gold accuracy: %f (%d / %d roles)", float(correct) / len(y_gold), correct, len(y_gold))

コード例 #4

ファイルを表示

ファイル: train.py プロジェクト: rpatil524/StrepHit

def gold_evaluation(sentences, extractor, gazetteer, model_cls, model_args):
    logger.info('Evaluating on the gold sentences')

    for each in sentences:
        if not each.get('gold_fes'):
            extractor.process_sentence(each['sentence'], each['lu'], each['fes'],
                                       add_unknown=True, gazetteer=gazetteer)
    x_tr, y_tr = extractor.get_features(refit=True)

    extractor.start()
    tagged_gold = []
    for each in sentences:
        if each.get('gold_fes'):
            tagged_gold.append((each['gold_fes'], extractor.process_sentence(
                each['sentence'], each['lu'], each['fes'],
                add_unknown=False, gazetteer=gazetteer
            )))

    if not tagged_gold:
        logger.warn('asked to evaluate gold, but no gold sentences found')
        return

    x_gold, _ = extractor.get_features(refit=False)
    y_gold = []
    for gold_fes, tagged in tagged_gold:
        for chunk, is_sample in tagged:
            if is_sample:
                y_gold.append([extractor.label_index[fe or 'O'] for fe in gold_fes.get(chunk, [])])

    assert len(y_gold) == x_gold.shape[0]

    model = FeatureSelectedClassifier(model_cls, extractor.lu_column(), model_args)
    model.fit(x_tr, y_tr)
    y_pred = model.predict(x_gold)

    correct = len([1 for actual, predicted in zip(y_gold, y_pred) if predicted in actual])
    logger.info('Gold accuracy: %f (%d / %d roles)', float(correct) / len(y_gold),
                correct, len(y_gold))