コード例 #1
0
    def load_models(self, draft_model_file, wp10_model_file):
        '''
        Load in the ORES models.
        '''

        self.draft_model = Model.load(open(draft_model_file, 'rb'))
        self.wp10_model = Model.load(open(wp10_model_file, 'rb'))
コード例 #2
0
ファイル: scoring_context.py プロジェクト: reviforks/ores
    def from_config(cls, config, name, section_key="scoring_contexts"):
        """
        Expects:

            scoring_contexts:
                enwiki:
                    scorer_models:
                        damaging: enwiki_damaging_2014
                        good-faith: enwiki_good-faith_2014
                    extractor: enwiki
                ptwiki:
                    scorer_models:
                        damaging: ptwiki_damaging_2014
                        good-faith: ptwiki_good-faith_2014
                    extractor: ptwiki

            extractors:
                enwiki_api: ...
                ptwiki_api: ...

            scorer_models:
                enwiki_damaging_2014: ...
                enwiki_good-faith_2014: ...
        """
        logger.info("Loading {0} '{1}' from config.".format(cls.__name__, name))
        section = config[section_key][name]

        model_map = {}
        for model_name, key in section['scorer_models'].items():
            scorer_model = Model.from_config(config, key)
            model_map[model_name] = scorer_model

        extractor = Extractor.from_config(config, section['extractor'])

        return cls(name, model_map=model_map, extractor=extractor)
コード例 #3
0
ファイル: scoring_context.py プロジェクト: groceryheist/ores
    def map_from_config(cls, config, context_names,
                        section_key="scoring_contexts"):
        """
        Loads a whole set of ScoringContext's from a configuration file
        while maintaining a cache of model names.  This aids in better memory
        management and allows model aliases to be implemented at the
        configuration level.

        :Returns:
            A map of context_names and ScoringContext's where models are loaded
            once and reused cross contexts.
        """
        model_key_map = {}
        context_map = {}

        for context_name in context_names:
            section = config[section_key][context_name]
            model_map = {}
            for model_name, key in section['scorer_models'].items():
                if key in model_key_map:
                    scorer_model = model_key_map[key]
                else:
                    scorer_model = Model.from_config(config, key)
                    model_key_map[key] = scorer_model
                model_map[model_name] = scorer_model

            extractor = Extractor.from_config(config, section['extractor'])
            context_map[context_name] = cls(
                context_name, model_map=model_map, extractor=extractor)

        return context_map
コード例 #4
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.INFO if not args['--debug'] else logging.DEBUG,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')

    sys.path.insert(0, ".")  # Search local directory first
    features = yamlconf.import_module(args['<features>'])
    label_name = args['<label>']
    if args['<model>'] is not None:
        model = Model.load(open(args['<model>']))
    else:
        model = None

    additional_fields = args['<additional-field>']

    if args['--input'] == "<stdin>":
        observations = read_observations(sys.stdin)
    else:
        observations = read_observations(open(args['--input']))

    if args['--output'] == "<stdout>":
        output = sys.stdout
    else:
        output = open(args['--output'], 'w')

    verbose = args['--verbose']

    run(observations, output, features, label_name, model, additional_fields,
        verbose)
コード例 #5
0
ファイル: model.py プロジェクト: accraze/inference-services
 def load(self):
     with open("enwiki.goodfaith.gradient_boosting.model") as f:
         self.model = Model.load(f)
     self.extractor = api.Extractor(
         mwapi.Session("https://en.wikipedia.org",
                       user_agent="KFServing revscoring demo"))
     self.ready = True
コード例 #6
0
ファイル: score.py プロジェクト: shalinikum/articlequality
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    scorer_model = Model.load(open(args['<model-file>'], 'rb'))
    if args['<text>'] == "<stdin>":
        text = sys.stdin.read()
    else:
        text = open(args['<text>']).read()

    print(score(scorer_model, text))
コード例 #7
0
ファイル: score.py プロジェクト: wiki-ai/wikiclass
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    scorer_model = Model.load(open(args['<model-file>'], 'rb'))
    if args['<text>'] == "<stdin>":
        text = sys.stdin.read()
    else:
        text = open(args['<text>']).read()

    print(score(scorer_model, text))
コード例 #8
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    if args['--class-weight'] is not None:
        class_weights = dict(
            map(_parse_class_weight_option, args['--class-weight'])
        )
        global CLASS_WEIGHTS
        CLASS_WEIGHTS.update(class_weights)

    paths = args['<dump-file>']
    with open(args['--model']) as f:
        model = Model.load(f)

    sunset = mwtypes.Timestamp(args['--sunset'])

    if args['--score-at'] not in SCORE_ATS:
        raise ValueError("--score-at value {0} not available in {1}"
                         .format(args['--score-at'], SCORE_ATS))
    else:
        score_at = args['--score-at']

    if args['--rev-scores'] == "<stdout>":
        rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS)
    else:
        rev_scores = mysqltsv.Writer(
            open(args['--rev-scores'], "w"), headers=HEADERS)

    if args['--extend'] is None:
        skip_scores_before = {}
    else:
        logger.info("Reading in past scores from {0}".format(args['--extend']))
        skip_scores_before = {}
        rows = mysqltsv.read(
            open(args['--extend']),
            types=[int, str, int, mwtypes.Timestamp, str, float])
        for row in rows:
            skip_scores_before[row.page_id] = row.timestamp
        logger.info("Completed reading scores from old output.")

    if args['--processes'] == "<cpu count>":
        processes = cpu_count()
    else:
        processes = int(args['--processes'])

    verbose = args['--verbose']
    run(paths, model, sunset, score_at, rev_scores, skip_scores_before,
        processes, verbose=verbose)
コード例 #9
0
 def load_model_and_queue(self, q, config, key):
     model = Model.from_config(config, key)
     # Just return the model info and the root of the features
     q.put((model.info, list(dig(model.features))))
コード例 #10
0
 def load(self, config, key):
     return Model.from_config(config, key)
コード例 #11
0
 def load_model_and_queue(self, q, config, key):
     model = Model.from_config(config, key)
     model.info = None  # We don't need info on the server-side
     q.put(model)
コード例 #12
0
ファイル: classify_text.py プロジェクト: wiki-ai/wikiclass
from pprint import pprint

import articlequality
from revscoring import Model

scorer_model = Model.load(open('../revscoring_models/enwiki.nettrom_wp10.gradient_boosting.model', 'rb'))

# Classifies a revision of an article based on wikitext alone
text = "An '''anachronism''' {{cite }}(from the [[Ancient Greek|Greek]] <ref ..."
prediction_results = articlequality.score(scorer_model, text)

# Print predicted assessment class and probabilities for all classes.
pprint(("assessment", prediction_results['prediction']))
pprint(("probs", prediction_results['probability']))
コード例 #13
0
from pprint import pprint

import articlequality
from revscoring import Model

scorer_model = Model.load(
    open('../revscoring_models/enwiki.nettrom_wp10.gradient_boosting.model',
         'rb'))

# Classifies a revision of an article based on wikitext alone
text = "An '''anachronism''' {{cite }}(from the [[Ancient Greek|Greek]] <ref ..."
prediction_results = articlequality.score(scorer_model, text)

# Print predicted assessment class and probabilities for all classes.
pprint(("assessment", prediction_results['prediction']))
pprint(("probs", prediction_results['probability']))
コード例 #14
0
import mwapi
import bz2
from revscoring import Model
from revscoring.extractors import api

model = Model.load(
    bz2.open("models/ptwiki.draft_quality.gradient_boosting.model.bz2", "rb"))

extractor = api.Extractor(
    mwapi.Session(host="https://pt.wikipedia.org",
                  user_agent="draftquality test"))
values = extractor.extract(58071111, model.features)
print(model.score(values))
コード例 #15
0
import mwapi

from revscoring import Model
from revscoring.extractors import api

with open("models/enwiki.damaging.linear_svc.model") as f:
    model = Model.load(f)

extractor = api.Extractor(mwapi.Session(host="https://en.wikipedia.org",
                                         user_agent="revscoring demo"))
values = extractor.extract(123456789, model.features)
print(model.score(values))
コード例 #16
0
import mwapi
from revscoring import Model
from revscoring.extractors.api.extractor import Extractor
from revscoring.errors import RevisionNotFound
from revscoring.errors import TextDeleted
import pandas as pd
import numpy as np

#enwiki.goodfaith.gradient_boosting.model
#enwiki.damaging.gradient_boosting.model
with open("models/enwiki.damaging.gradient_boosting.model") as f:
       scorer_model = Model.load(f)

extractor = Extractor(mwapi.Session(host="https://en.wikipedia.org",
                                          user_agent="revscoring demo"))

def get_score(rev_id):
    feature_values = list(extractor.extract(rev_id, scorer_model.features))
    results = scorer_model.score(feature_values)
    return results

df = pd.read_csv("data.csv")
df["label_damage"] = ""
df["confidence_damage"] = ""

for i in range(len(df["rev_id"])):
    print(str(i) + "/" + str(len(df["rev_id"])))

    try:
        results = get_score(df["rev_id"][i])
        df["label_damage"][i] = results["prediction"]