Ejemplo n.º 1
0
def make_ninja_deps(rules_filename, out=sys.stdout):
    """
    Output a complete Ninja file describing how to build the wordfreq data.
    """
    print(HEADER, file=out)
    # Copy in the rules section
    with open(rules_filename, encoding='utf-8') as rulesfile:
        print(rulesfile.read(), file=out)

    lines = []
    # The first dependency is to make sure the build file is up to date.
    add_dep(lines, 'build_deps', 'rules.ninja', 'build.ninja',
            extra='wordfreq_builder/ninja.py')
    lines.extend(itertools.chain(
        twitter_deps(
            data_filename('raw-input/twitter/all-2014.txt'),
            slice_prefix=data_filename('slices/twitter/tweets-2014'),
            combined_prefix=data_filename('generated/twitter/tweets-2014'),
            slices=40,
            languages=CONFIG['sources']['twitter']
        ),
        wikipedia_deps(
            data_filename('raw-input/wikipedia'),
            CONFIG['sources']['wikipedia']
        ),
        google_books_deps(
            data_filename('raw-input/google-books')
        ),
        leeds_deps(
            data_filename('source-lists/leeds'),
            CONFIG['sources']['leeds']
        ),
        opensubtitles_deps(
            data_filename('source-lists/opensubtitles'),
            CONFIG['sources']['opensubtitles']
        ),
        subtlex_en_deps(
            data_filename('source-lists/subtlex'),
            CONFIG['sources']['subtlex-en']
        ),
        subtlex_other_deps(
            data_filename('source-lists/subtlex'),
            CONFIG['sources']['subtlex-other']
        ),
        jieba_deps(
            data_filename('source-lists/jieba'),
            CONFIG['sources']['jieba']
        ),
        combine_lists(all_languages())
    ))

    print('\n'.join(lines), file=out)
Ejemplo n.º 2
0
def make_ninja_deps(rules_filename, out=sys.stdout):
    """
    Output a complete Ninja file describing how to build the wordfreq data.
    """
    print(HEADER, file=out)
    # Copy in the rules section
    with open(rules_filename, encoding="utf-8") as rulesfile:
        print(rulesfile.read(), file=out)

    lines = []
    # The first dependency is to make sure the build file is up to date.
    add_dep(lines, "build_deps", "rules.ninja", "build.ninja", extra="wordfreq_builder/ninja.py")
    lines.extend(
        twitter_deps(
            data_filename("raw-input/twitter/all-2014.txt"),
            slice_prefix=data_filename("slices/twitter/tweets-2014"),
            combined_prefix=data_filename("generated/twitter/tweets-2014"),
            slices=40,
            languages=CONFIG["sources"]["twitter"],
        )
    )
    lines.extend(wikipedia_deps(data_filename("raw-input/wikipedia"), CONFIG["sources"]["wikipedia"]))
    lines.extend(google_books_deps(data_filename("raw-input/google-books")))
    lines.extend(leeds_deps(data_filename("source-lists/leeds"), CONFIG["sources"]["leeds"]))
    lines.extend(opensubtitles_deps(data_filename("source-lists/opensubtitles"), CONFIG["sources"]["opensubtitles"]))
    lines.extend(combine_lists(all_languages()))

    print("\n".join(lines), file=out)
Ejemplo n.º 3
0
def make_ninja_deps(rules_filename, out=sys.stdout):
    """
    Output a complete Ninja file describing how to build the wordfreq data.
    """
    print(HEADER, file=out)
    # Copy in the rules section
    with open(rules_filename, encoding='utf-8') as rulesfile:
        print(rulesfile.read(), file=out)

    lines = []
    # The first dependency is to make sure the build file is up to date.
    add_dep(lines,
            'build_deps',
            'rules.ninja',
            'build.ninja',
            extra='wordfreq_builder/ninja.py')
    lines.extend(
        twitter_deps(
            data_filename('raw-input/twitter/all-2014.txt'),
            slice_prefix=data_filename('slices/twitter/tweets-2014'),
            combined_prefix=data_filename('generated/twitter/tweets-2014'),
            slices=40,
            languages=CONFIG['sources']['twitter']))
    lines.extend(
        wikipedia_deps(data_filename('raw-input/wikipedia'),
                       CONFIG['sources']['wikipedia']))
    lines.extend(google_books_deps(data_filename('raw-input/google-books')))
    lines.extend(
        leeds_deps(data_filename('source-lists/leeds'),
                   CONFIG['sources']['leeds']))
    lines.extend(
        opensubtitles_deps(data_filename('source-lists/opensubtitles'),
                           CONFIG['sources']['opensubtitles']))
    lines.extend(combine_lists(all_languages()))

    print('\n'.join(lines), file=out)
Ejemplo n.º 4
0
from wordfreq_builder.config import (CONFIG, data_filename, wordlist_filename,
                                     all_languages, source_names)
import sys
import pathlib

HEADER = """# This file is automatically generated. Do not edit it.
# You can regenerate it using the 'wordfreq-build-deps' command.
"""
TMPDIR = data_filename('tmp')


def add_dep(lines, rule, input, output, extra=None, params=None):
    if isinstance(output, list):
        output = ' '.join(output)
    if isinstance(input, list):
        input = ' '.join(input)
    if extra:
        if isinstance(extra, list):
            extra = ' '.join(extra)
        extrastr = ' | ' + extra
    else:
        extrastr = ''
    build_rule = "build {output}: {rule} {input}{extra}".format(output=output,
                                                                rule=rule,
                                                                input=input,
                                                                extra=extrastr)
    lines.append(build_rule)
    if params:
        for key, val in params.items():
            lines.append("  {key} = {val}".format(key=key, val=val))
    lines.append("")
Ejemplo n.º 5
0
from wordfreq_builder.config import (
    CONFIG, data_filename, wordlist_filename, all_languages, source_names
)
import sys
import pathlib
import itertools

HEADER = """# This file is automatically generated. Do not edit it.
# You can change its behavior by editing wordfreq_builder/ninja.py,
# and regenerate it by running 'make'.
"""
TMPDIR = data_filename('tmp')


def add_dep(lines, rule, input, output, extra=None, params=None):
    if isinstance(output, list):
        output = ' '.join(output)
    if isinstance(input, list):
        input = ' '.join(input)
    if extra:
        if isinstance(extra, list):
            extra = ' '.join(extra)
        extrastr = ' | ' + extra
    else:
        extrastr = ''
    build_rule = "build {output}: {rule} {input}{extra}".format(
        output=output, rule=rule, input=input, extra=extrastr
    )
    lines.append(build_rule)
    if params:
        for key, val in params.items():
Ejemplo n.º 6
0
from wordfreq_builder.config import CONFIG, data_filename, wordlist_filename, all_languages, source_names
import sys
import pathlib

HEADER = """# This file is automatically generated. Do not edit it.
# You can regenerate it using the 'wordfreq-build-deps' command.
"""
TMPDIR = data_filename("tmp")


def add_dep(lines, rule, input, output, extra=None, params=None):
    if isinstance(output, list):
        output = " ".join(output)
    if isinstance(input, list):
        input = " ".join(input)
    if extra:
        if isinstance(extra, list):
            extra = " ".join(extra)
        extrastr = " | " + extra
    else:
        extrastr = ""
    build_rule = "build {output}: {rule} {input}{extra}".format(output=output, rule=rule, input=input, extra=extrastr)
    lines.append(build_rule)
    if params:
        for key, val in params.items():
            lines.append("  {key} = {val}".format(key=key, val=val))
    lines.append("")


def make_ninja_deps(rules_filename, out=sys.stdout):
    """