Beispiel #1
0
def run(session, observations, output, mid_level_wp, verbose):

    for ob in fetch_page_wikiprojects(session,
                                      observations,
                                      mid_level_wp,
                                      verbose=verbose):
        dump_observation(ob, output)
Beispiel #2
0
def run(dump_paths, threads, output, verbose=False, extractor=None):

    if len(dump_paths) == 0:
        label_events = extract_labelings(mwxml.Dump.from_file(sys.stdin),
                                         extractor, verbose=verbose)

    else:
        label_events = mwxml.map(lambda d, p:
                                 extract_labelings(d, extractor, verbose),
                                 dump_paths, threads=threads)

    for labeling in label_events:
        dump_observation(labeling, output)
Beispiel #3
0
def run(dump_paths, page_labelings, output, threads, verbose=False):
    logging.basicConfig(
        level=logging.DEBUG if verbose else logging.WARNING,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')

    if len(dump_paths) == 0:
        labelings = extract_text(mwxml.Dump.from_file(sys.stdin),
                                 page_labelings,
                                 verbose=verbose)

    else:
        labelings = mwxml.map(lambda d, p: \
                                    extract_text(d, page_labelings, verbose),
                              dump_paths, threads=threads)

    for labeling in labelings:
        dump_observation(labeling, output)
Beispiel #4
0
def run(dump_paths, page_labelings, output, threads, verbose=False):
    logging.basicConfig(
        level=logging.DEBUG if verbose else logging.WARNING,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )

    if len(dump_paths) == 0:
        labelings = extract_text(mwxml.Dump.from_file(sys.stdin),
                                 page_labelings, verbose=verbose)

    else:
        labelings = mwxml.map(lambda d, p:
                              extract_text(d, page_labelings, verbose),
                              dump_paths, threads=threads)

    for labeling in labelings:
        dump_observation(labeling, output)
Beispiel #5
0
def run(observations, model, verbose):

    for ob in observations:
        try:
            features = list(solve(model.features, cache=ob['cache']))
            ob['score'] = model.score(features)
            del ob['cache']
            dump_observation(ob, sys.stdout)
            if verbose:
                sys.stderr.write(".")
                sys.stderr.flush()
        except:  # noqa: E722
            # We're naughty to eat all exceptions.
            sys.stderr.write(traceback.format_exc())

    if verbose:
        sys.stderr.write("\n")
def run(labelings, dependents, output, extractors, verbose=False):
    extractor_pool = Pool(processes=extractors)

    extractor = LabelingDependentExtractor(dependents)

    for observation in extractor_pool.imap(extractor.extract_and_cache, labelings):
        if observation is not None:
            if verbose:
                sys.stderr.write(".")
                sys.stderr.flush()

            dump_observation(observation, output)
        else:
            if verbose:
                sys.stderr.write("-")
                sys.stderr.flush()

    if verbose:
        sys.stderr.write("\n")
def run(labelings, dependents, output, extractors, verbose=False):
    extractor_pool = Pool(processes=extractors)

    extractor = LabelingDependentExtractor(dependents)

    for observation in extractor_pool.imap(extractor.extract_and_cache,
                                           labelings):
        if observation is not None:
            if verbose:
                sys.stderr.write(".")
                sys.stderr.flush()

            dump_observation(observation, output)
        else:
            if verbose:
                sys.stderr.write("-")
                sys.stderr.flush()

    if verbose:
        sys.stderr.write("\n")
Beispiel #8
0
def run(host, obs, try_deleted_first, output, threads, verbose):

    session = mwapi.Session(
        host,
        user_agent="Fetch text (wikigrammar) <*****@*****.**>",
        formatversion=2)
    mwapi.cli.do_login(session, host)

    obs_batches = read_chunks(obs, 10)
    text_fetcher = TextFetcher(session, try_deleted_first=try_deleted_first)

    with ThreadPoolExecutor(max_workers=threads) as executor:
        for obs_with_text in executor.map(text_fetcher.fetch_text,
                                          obs_batches):
            if verbose:
                sys.stderr.write(str(len(obs_with_text)) + " ")
                sys.stderr.flush()

            for ob in obs_with_text:
                dump_observation(ob, output)

        if verbose:
            sys.stderr.write("\n")
Beispiel #9
0
def run(labelings, output, session, verbose):

    for labeling in fetch_text(session, labelings, verbose=verbose):
        if labeling['text'] is not None:
            dump_observation(labeling, output)
Beispiel #10
0
def run(labelings, output, session, verbose):

    for labeling in fetch_text(session, labelings, verbose=verbose):
        if labeling['text'] is not None:
            dump_observation(labeling, output)
Beispiel #11
0
def run(session, observations, claims, output, verbose):

    for ob in fetch_item_info(session, observations, claims, verbose=verbose):
        dump_observation(ob, output)
Beispiel #12
0
def run(session, observations, claims, output, verbose):

    for ob in fetch_item_info(session, observations, claims, verbose=verbose):
        dump_observation(ob, output)
Beispiel #13
0
            "rev_id": rev_id,
            "cache": feature_values,
            "approved": approved
        }
    except RuntimeError as e:
        sys.stderr.write(str(e))
    else:
        print(observation)
        training_features.append(observation)

print("Dump observations to file")
from revscoring.utilities.util import dump_observation, read_observations

with open("observations.json.bz2", "wt") as dumpfile:
    for observation in training_features:
        dump_observation(observation, dumpfile)

with open("observations.json.bz2", "r") as dumpfile:
    training_features = list(read_observations(dumpfile))

from revscoring.scoring.models import GradientBoosting

is_approved = GradientBoosting(features,
                               labels=[True, False],
                               version="Demo",
                               learning_rate=0.01,
                               max_features="log2",
                               n_estimators=700,
                               max_depth=5,
                               population_rates={
                                   False: 0.5,
Beispiel #14
0
def run(labelings, output, session, verbose):
    for ob in fetch_text(session, labelings, verbose):
        dump_observation(ob, output)
Beispiel #15
0
def run(observations, session, threads, output):
    for obs in fetch_draft_texts(observations, session, threads):
        dump_observation(obs, output)