Beispiel #1
0
def json_output(conf):
    list_file = os.path.join(conf.paths.branch_output, 'json-file-list')
    public_list_file = os.path.join(conf.paths.projectroot,
                                    conf.paths.public_site_output, 'json',
                                    '.file_list')

    cmd = 'rsync --recursive --times --delete --exclude="*pickle" --exclude=".buildinfo" --exclude="*fjson" {src} {dst}'

    json_dst = os.path.join(conf.paths.projectroot,
                            conf.paths.public_site_output, 'json')

    if not os.path.exists(json_dst):
        logger.debug('created directories for {0}'.format(json_dst))
        os.makedirs(json_dst)

    builder = 'json'
    if 'edition' in conf.project and conf.project.edition != conf.project.name:
        builder += '-' + conf.project.edition

    command(
        cmd.format(src=os.path.join(conf.paths.projectroot,
                                    conf.paths.branch_output, builder) + '/',
                   dst=json_dst))

    copy_if_needed(list_file, public_list_file)
    logger.info('deployed json files to local staging.')
Beispiel #2
0
def test_filtered_model(working_path, tconf, d):
    '''This function tests the model made so far.
    It first filters the data to only use those needed for the test file.
    This can speed it  up over the binarised version but has a history of failing on certain corpora

    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''
    with Timer(d, 'test', lg=logger):
        command(
            "{0}/scripts/training/filter-model-given-input.pl {3}/filtered {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable"
            .format(tconf.paths.moses, tconf.test.name,
                    tconf.paths.aux_corpus_files, working_path),
            logger=logger,
            capture=True)
        command(
            "{0}/bin/moses -f {1}/filtered/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out"
            .format(tconf.paths.moses, working_path,
                    tconf.paths.aux_corpus_files, tconf.test.name,
                    tconf.settings.foreign),
            logger=logger,
            capture=True)
        c = command(
            "{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}"
            .format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                    tconf.test.name, working_path, tconf.settings.foreign),
            logger=logger,
            capture=True)

        d["BLEU"] = c.out
Beispiel #3
0
def train_model(working_path, lm_path, l_len, l_order, l_lang, l_direct,
                l_score, l_align, l_orient, l_model, tconf, d):
    '''This function does the training for the given configuration

    :param string working_path: path to working directory
    :param int l_len: max phrase length
    :param int l_order: n-gram order
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_score: score options setting, any combination of --GoodTuring, --NoLex, --OnlyDirect
    :param string l_align: alignment algorithm
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'train', lg=logger):
        os.makedirs(working_path)
        command(
            "{0}/scripts/training/train-model.perl -root-dir {13}/train -corpus {1}/{2}.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{2}.blm.{3}:1 -mgiza -mgiza-cpus {12} -external-bin-dir {0}/tools -cores {12} --parallel --parts 3 2>&1 > {13}/training.out"
            .format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                    tconf.train.name, tconf.settings.foreign, l_score, l_align,
                    l_model, l_orient, l_direct, l_lang, l_order, lm_path,
                    tconf.settings.threads, working_path),
            logger=logger,
            capture=True)
Beispiel #4
0
def binarise_model(working_path, l_lang, l_direct, l_orient, l_model, tconf, d):
    '''This function binarises the phrase and reoridering tables.
    Binarising them speeds up loading the decoder, though doesn't actually speed up decoding sentences

    :param string working_path: the path to the working directory
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'binarise', lg=logger):
        binarised_model_path = os.path.join(working_path, 'binarised-model')
        os.makedirs(binarised_model_path)
        command("{0}/bin/processPhraseTable  -ttable 0 0 {1}/train/model/{2}.gz -nscores 5 -out {1}/binarised-model/phrase-table".format(tconf.paths.moses, working_path, tconf.settings.phrase_table_name), logger=logger, capture=True)
        command("{0}/bin/processLexicalTable -in {1}/train/model/{6}.{2}-{3}-{4}-{5}.gz -out {1}/binarised-model/reordering-table".format(tconf.paths.moses, working_path, l_model, l_orient, l_direct, l_lang, tconf.settings.reordering_name), logger=logger, capture=True)

        copy_always(os.path.join(working_path, 'mert-work', 'moses.ini'),
                    os.path.join(binarised_model_path, 'moses.ini'))

        sub_dict = (re.compile(r'PhraseDictionaryMemory'), 'PhraseDictionaryBinary')
        mosesini = os.path.join(working_path, 'binarised-model', 'moses.ini')
        logger.info(mosesini)
        munge_page(mosesini, sub_dict)
        phrase_table_path = os.path.join('train', 'model', tconf.settings.phrase_table_name) + '.gz'
        sub_table = (re.compile(phrase_table_path), 'binarised-model/phrase-table')
        munge_page(mosesini, sub_table)
Beispiel #5
0
def transfer_source(conf, sconf):
    target = os.path.join(conf.paths.projectroot, conf.paths.branch_source)

    if not os.path.exists(target):
        os.makedirs(target)
        logger.debug('created directory for sphinx build: {0}'.format(target))
    elif not os.path.isdir(target):
        msg = '"{0}" exists and is not a directory'.format(target)
        logger.error(msg)
        raise InvalidFile(msg)

    source_dir = os.path.join(conf.paths.projectroot, conf.paths.source)

    # we don't want rsync to delete directories that hold generated content in
    # the target so we can have more incremental
    exclusions = "--exclude=" + ' --exclude='.join(
        [os.path.join('includes', 'steps'),
         os.path.join('includes', 'toc')])

    command(
        'rsync --times --checksum --recursive {2} --delete {0}/ {1}'.format(
            source_dir, target, exclusions))

    source_exclusion(conf, sconf)
    dump_file_hashes(conf)

    logger.info('prepared source for sphinx build in {0}'.format(target))
Beispiel #6
0
def _generate_images(cmd, dpi, width, target, source):
    command(cmd.format(cmd=_get_inkscape_cmd(),
                       dpi=dpi,
                       width=width,
                       target=target,
                       source=source))
    logger.info('generated image file {0}'.format(target))
Beispiel #7
0
def download_file(file, url):
    if not os.path.isdir(os.path.dirname(file)):
        os.makedirs(os.path.dirname(file))

    cmd = ['curl', '-s', '--remote-time', url, '-o', file]
    command(' '.join(cmd))
    logger.info('downloaded {0}'.format(file))
Beispiel #8
0
def binarise_model(working_path, l_lang, l_direct, l_orient, l_model, tconf, d):
    '''This function binarises the phrase and reoridering tables.
    Binarising them speeds up loading the decoder, though doesn't actually speed up decoding sentences

    :param string working_path: the path to the working directory
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'binarise', lg=logger):
        binarised_model_path = os.path.join(working_path, 'binarised-model')
        os.makedirs(binarised_model_path)
        command("{0}/bin/processPhraseTable  -ttable 0 0 {1}/train/model/{2}.gz -nscores 5 -out {1}/binarised-model/phrase-table".format(tconf.paths.moses, working_path, tconf.settings.phrase_table_name), logger=logger, capture=True)
        command("{0}/bin/processLexicalTable -in {1}/train/model/{6}.{2}-{3}-{4}-{5}.gz -out {1}/binarised-model/reordering-table".format(tconf.paths.moses, working_path, l_model, l_orient, l_direct, l_lang, tconf.settings.reordering_name), logger=logger, capture=True)

        copy_always(os.path.join(working_path, 'mert-work', 'moses.ini'),
                    os.path.join(binarised_model_path, 'moses.ini'))

        sub_dict = (re.compile(r'PhraseDictionaryMemory'), 'PhraseDictionaryBinary')
        mosesini = os.path.join(working_path, 'binarised-model', 'moses.ini')
        logger.info(mosesini)
        munge_page(mosesini, sub_dict)
        phrase_table_path = os.path.join('train', 'model', tconf.settings.phrase_table_name) + '.gz'
        sub_table = (re.compile(phrase_table_path), 'binarised-model/phrase-table')
        munge_page(mosesini, sub_table)
Beispiel #9
0
def build_language_model(lm_path, l_order, l_smoothing, tconf, d):
    '''This function builds the language model for the goven config

    :param string lm_path: path to language model directory
    :param int l_order: n-gram order
    :param string l_smoothing: smoothing algorithm
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    # Create language model
    with Timer(d, 'lm', lg=logger):
        os.makedirs(lm_path)

        cmds = [
            "{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".
            format(tconf.paths.irstlm, tconf.paths.aux_corpus_files,
                   tconf.train.name, tconf.settings.foreign, lm_path),
            "{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz"
            .format(tconf.paths.irstlm, tconf.train.name, l_order, l_smoothing,
                    tconf.settings.foreign, lm_path),
            "{0}/bin/compile-lm --text  {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}".
            format(tconf.paths.irstlm, tconf.train.name,
                   tconf.settings.foreign, lm_path),
            "{0}/bin/build_binary -i {3}/{1}.arpa.{2} {3}/{1}.blm.{2}".format(
                tconf.paths.moses, tconf.train.name, tconf.settings.foreign,
                lm_path),
            "echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}"
            .format(tconf.paths.moses, lm_path, tconf.train.name,
                    tconf.settings.foreign),
        ]

        for cmd in cmds:
            command(cmd, logger=logger, capture=True)
Beispiel #10
0
def run_sphinx(builder, sconf, conf):
    dirpath = os.path.join(conf.paths.branch_output, builder)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
        logger.info('created directories "{1}" for sphinx builder {0}'.format(builder, dirpath))

    if 'language' in sconf and sconf.language is not None:
        command('sphinx-intl build --language=' + sconf.language)
        logger.info('compiled all PO files for translated build.')

    logger.info('starting sphinx build {0} at {1}'.format(builder, timestamp()))

    cmd = 'sphinx-build {0} -d {1}/doctrees-{2} {3} {4}' # per-builder-doctreea

    sphinx_cmd = cmd.format(get_sphinx_args(sconf, conf),
                            os.path.join(conf.paths.projectroot, conf.paths.branch_output),
                            os.path.basename(sconf.build_output),
                            os.path.join(conf.paths.projectroot, conf.paths.branch_source),
                            sconf.build_output)

    out = command(sphinx_cmd, capture=True, ignore=True)
    # out = sphinx_native_worker(sphinx_cmd)
    logger.info('completed sphinx build {0} at {1}'.format(builder, timestamp()))

    output = '\n'.join([out.err, out.out])

    if out.return_code == 0:
        logger.info('successfully completed {0} sphinx build at {1}'.format(builder, timestamp()))
    else:
        logger.warning('the sphinx build {0} was not successful. not running finalize operation'.format(builder))

    return out.return_code, sconf, conf, output
Beispiel #11
0
def download_file(file, url):
    if not os.path.isdir(os.path.dirname(file)):
        os.makedirs(os.path.dirname(file))

    cmd = ['curl', '-s', '--remote-time', url, '-o', file]
    command(' '.join(cmd))
    logger.info('downloaded {0}'.format(file))
Beispiel #12
0
def _generate_images(cmd, dpi, width, target, source):
    command(
        cmd.format(cmd=_get_inkscape_cmd(),
                   dpi=dpi,
                   width=width,
                   target=target,
                   source=source))
    logger.info('generated image file {0}'.format(target))
Beispiel #13
0
def tune_model(working_path, tconf, d):
    '''This function tunes the model made so far.
    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'tune', lg=logger):
        command("{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses  {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.tune.name, tconf.settings.foreign, working_path), logger=logger, capture=True)
Beispiel #14
0
def tune_model(working_path, tconf, d):
    '''This function tunes the model made so far.
    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'tune', lg=logger):
        command("{0}/scripts/training/mert-moses.pl {1}/{2}.true.en {1}/{2}.true.{3} {0}/bin/moses  {4}/train/model/moses.ini --working-dir {4}/mert-work --mertdir {0}/bin/ 2>&1 > {4}/mert.out".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.tune.name, tconf.settings.foreign, working_path), logger=logger, capture=True)
Beispiel #15
0
def test_binarised_model(working_path, tconf, d):
    '''This function tests the model so far with the binarised phrase table

    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''
    with Timer(d, 'test', lg=logger):
        command("{0}/bin/moses -f {1}/binarised-model/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out".format(tconf.paths.moses, working_path, tconf.paths.aux_corpus_files, tconf.test.name, tconf.settings.foreign), logger=logger, capture=True)
        c = command("{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.test.name, working_path, tconf.settings.foreign), logger=logger, capture=True)
        d["BLEU"] = c.out
Beispiel #16
0
def clean_corpus(corpus_name, tconf):
    '''This function cleans a corpus to have proper length up to 80 words
    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''

    cmd = "{0}/scripts/training/clean-corpus-n.perl {1}/{2}.true {3} en {1}/{2}.clean 1 80"
    cmd = cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                     corpus_name, tconf.settings.foreign)

    command(cmd, logger=logger, capture=True)
Beispiel #17
0
def create_branch(args):
    """
    Takes a single branch name and (if necessary) creates a new branch. Then,
    populates the ``build/<branch>`` directory for the new branch using either
    the parent branch or ``master``. Safe to run multiple times (after a rebase)
    to update the build cache from master.

    Also calls :method:`~giza.operations.build_env.fix_build_environment()` to
    tweak the new build output to update hashes and on-disk copies of the
    environment to prevent unnecessary full-rebuilds from sphinx.
    """

    conf = fetch_config(args)

    g = GitRepo(conf.paths.projectroot)

    branch = conf.runstate.git_branch
    base_branch = g.current_branch()

    if base_branch == branch:
        base_branch = 'master'
        logger.warning(
            'seeding build data for branch "{0}" from "master"'.format(branch))

    branch_builddir = os.path.join(conf.paths.projectroot, conf.paths.output,
                                   branch)

    base_builddir = os.path.join(conf.paths.projectroot, conf.paths.output,
                                 base_branch)

    if g.branch_exists(branch):
        logger.info('checking out branch "{0}"'.format(branch))
    else:
        logger.info(
            'creating and checking out a branch named "{0}"'.format(branch))

    g.checkout_branch(branch)

    cmd = "rsync -r --times --checksum {0}/ {1}".format(
        base_builddir, branch_builddir)
    logger.info('seeding build directory for "{0}" from "{1}"'.format(
        branch, base_branch))
    command(cmd)
    logger.info('branch creation complete.')

    # get a new config here for the new branch
    conf = fetch_config(args)
    builders = get_existing_builders(conf)
    app = BuildApp(conf)
    app.pool = 'process'

    fix_build_env_tasks(builders, conf, app)

    app.run()
Beispiel #18
0
def test_binarised_model(working_path, tconf, d):
    '''This function tests the model so far with the binarised phrase table

    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''
    with Timer(d, 'test', lg=logger):
        command("{0}/bin/moses -f {1}/binarised-model/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out".format(tconf.paths.moses, working_path, tconf.paths.aux_corpus_files, tconf.test.name, tconf.settings.foreign), logger=logger, capture=True)
        c = command("{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.test.name, working_path, tconf.settings.foreign), logger=logger, capture=True)
        d["BLEU"] = c.out
Beispiel #19
0
def clean_corpus(corpus_name, tconf):
    '''This function cleans a corpus to have proper length up to 80 words
    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''

    cmd = "{0}/scripts/training/clean-corpus-n.perl {1}/{2}.true {3} en {1}/{2}.clean 1 80"
    cmd = cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                     corpus_name, tconf.settings.foreign)

    command(cmd, logger=logger, capture=True)
Beispiel #20
0
def _weak_bootstrapping(args):
    args.languages_to_build = args.editions_to_build = []
    args.builder = 'html'
    conf = fetch_config(args)
    app = BuildApp(conf)

    try:
        sphinx_publication(conf, args, app)
    except:
        sphinx_publication(conf, args, app)
        shutil.rmtree('docs-tools')

    command('python build/docs-tools/makecloth/meta.py build/makefile.meta')
Beispiel #21
0
def truecase_corpus(corpus_name, tconf):
    '''This function truecases a corpus

    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''
    cmd = "{0}/scripts/recaser/truecase.perl --model {1}/truecase-model.{3} < {1}/{2}.tok.{3} > {1}/{2}.true.{3}"

    command(cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                       corpus_name, "en"),
            logger=logger, capture=True)
    command(cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                       corpus_name, tconf.settings.foreign),
            logger=logger, capture=True)
Beispiel #22
0
def truecase_corpus(corpus_name, tconf):
    '''This function truecases a corpus

    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''
    cmd = "{0}/scripts/recaser/truecase.perl --model {1}/truecase-model.{3} < {1}/{2}.tok.{3} > {1}/{2}.true.{3}"

    command(cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                       corpus_name, "en"),
            logger=logger, capture=True)
    command(cmd.format(tconf.paths.moses, tconf.paths.aux_corpus_files,
                       corpus_name, tconf.settings.foreign),
            logger=logger, capture=True)
Beispiel #23
0
def test_filtered_model(working_path, tconf, d):
    '''This function tests the model made so far.
    It first filters the data to only use those needed for the test file.
    This can speed it  up over the binarised version but has a history of failing on certain corpora

    :param string working_path: path to working directory
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''
    with Timer(d, 'test', lg=logger):
        command("{0}/scripts/training/filter-model-given-input.pl {3}/filtered {3}/mert-work/moses.ini {2}/{1}.true.en -Binarizer {0}/bin/processPhraseTable".format(tconf.paths.moses, tconf.test.name, tconf.paths.aux_corpus_files, working_path), logger=logger, capture=True)
        command("{0}/bin/moses -f {1}/filtered/moses.ini  < {2}/{3}.true.en > {1}/{3}.translated.{4} 2> {1}/{3}.out".format(tconf.paths.moses, working_path, tconf.paths.aux_corpus_files, tconf.test.name, tconf.settings.foreign), logger=logger, capture=True)
        c = command("{0}/scripts/generic/multi-bleu.perl -lc {1}/{2}.true.{4} < {3}/{2}.translated.{4}".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.test.name, working_path, tconf.settings.foreign), logger=logger, capture=True)

        d["BLEU"] = c.out
Beispiel #24
0
def include_files(conf, files=None):
    if files is not None:
        return files
    else:
        source_dir = os.path.join(conf.paths.projectroot, conf.paths.source)
        grep = command('grep -R ".. include:: /" {0} || exit 0'.format(source_dir), capture=True).out

        rx = re.compile(source_dir + r'(.*):.*\.\. include:: (.*)')

        s = [ m.groups()
              for m in [ rx.match(d)
                         for d in grep.split('\n') ]
              if m is not None
            ]

        def tuple_sort(k):
            return k[1]
        s.sort(key=tuple_sort)

        files = dict()

        for i in groupby(s, itemgetter(1)):
            files[i[0]] = set()
            for src in i[1]:
                if not src[0].endswith('~') and not src[0].endswith('overview.rst'):
                    files[i[0]].add(src[0])
            files[i[0]] = list(files[i[0]])
            files[i[0]].sort()

        files.update(generated_includes(conf))

        return files
Beispiel #25
0
def make_project(args):
    curdir = os.getcwd()
    curdir_list = os.listdir(curdir)

    _weak_bootstrapping(args)

    if args.quickstart_git is True:
        logger.info('creating a new git repository')
        r = command('git init', capture=True)

        if not r.out.startswith('Reinitialized'):
            command('git add .')
            try:
                command('git commit -m "initial commit"')
            except CommandError:
                pass
Beispiel #26
0
def _render_tex_into_pdf(fn, deployed_path, path):
    pdflatex = 'TEXINPUTS=".:{0}:" pdflatex --interaction batchmode --output-directory {0} {1}'.format(
        path, fn)

    base_fn = os.path.basename(fn)
    cmds = [
        pdflatex,
        "makeindex -s {0}/python.ist {0}/{1}.idx ".format(path, base_fn[:-4]),
        pdflatex, pdflatex
    ]

    for idx, cmd in enumerate(cmds):
        r = command(command=cmd, ignore=True)

        if r.succeeded is True:
            logger.info(
                'pdf completed rendering stage {0} of {1} successfully.'.
                format(idx, len(cmds)))
        else:
            if idx <= 1:
                logger.warning(
                    'pdf build encountered error early on {0}, continuing cautiously.'
                    .format(base_fn))
                continue
            else:
                logger.error(
                    'pdf build encountered error running pdflatex, investigate on {0}. terminating'
                    .format(base_fn))
                return False

    pdf_fn = os.path.splitext(fn)[0] + '.pdf'
    copy_if_needed(pdf_fn, deployed_path, 'pdf')
Beispiel #27
0
def make_project(args):
    curdir = os.getcwd()
    curdir_list = os.listdir(curdir)

    _weak_bootstrapping(args)

    if args.quickstart_git is True:
        logger.info('creating a new git repository')
        r = command('git init', capture=True)

        if not r.out.startswith('Reinitialized'):
            command('git add .')
            try:
                command('git commit -m "initial commit"')
            except CommandError:
                pass
Beispiel #28
0
    def cmd(self, *args):
        args = ' '.join(args)

        try:
            return command(command='cd {0} ; git {1}'.format(self.path, args), capture=True)
        except CommandError as e:
            raise GitError(e)
Beispiel #29
0
    def cmd(self, *args):
        args = ' '.join(args)

        try: 
            return command(command='cd {0} ; git {1}'.format(self.path, args), capture=True)
        except CommandError as e:
            raise GitError(e)
Beispiel #30
0
def include_files(conf, files=None):
    if files is not None:
        return files
    else:
        source_dir = os.path.join(conf.paths.projectroot, conf.paths.source)
        grep = command(
            'grep -R ".. include:: /" {0} || exit 0'.format(source_dir),
            capture=True).out

        rx = re.compile(source_dir + r'(.*):.*\.\. include:: (.*)')

        s = [
            m.groups() for m in [rx.match(d) for d in grep.split('\n')]
            if m is not None
        ]

        def tuple_sort(k):
            return k[1]

        s.sort(key=tuple_sort)

        files = dict()

        for i in groupby(s, itemgetter(1)):
            files[i[0]] = set()
            for src in i[1]:
                if not src[0].endswith('~') and not src[0].endswith(
                        'overview.rst'):
                    files[i[0]].add(src[0])
            files[i[0]] = list(files[i[0]])
            files[i[0]].sort()

        files.update(generated_includes(conf))

        return files
Beispiel #31
0
def tokenize_corpus(corpus_dir, corpus_name, tconf):
    '''This function tokenizes a corpus

    :param string corpus_dir: path to directory to the corpus
    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''

    cmd = "{0}/scripts/tokenizer/tokenizer.perl -l en < {1}/{3}.{4} > {2}/{3}.tok.{4} -threads {5}"
    command(cmd.format(tconf.paths.moses, corpus_dir,
                       tconf.paths.aux_corpus_files, corpus_name, "en",
                       tconf.settings.threads),
            logger=logger, capture=True)
    command(cmd.format(tconf.paths.moses, corpus_dir,
                       tconf.paths.aux_corpus_files, corpus_name, tconf.settings.foreign,
                       tconf.settings.threads),
            logger=logger, capture=True)
Beispiel #32
0
def run_sphinx(builder, sconf, conf):
    if safe_create_directory(sconf.fq_build_output):
        logger.info('created directory "{1}" for sphinx builder {0}'.format(
            builder, sconf.fq_build_output))

    if 'language' in sconf and sconf.language is not None:
        command('sphinx-intl build --language=' + sconf.language)
        logger.info('compiled all PO files for translated build.')

    logger.info('starting sphinx build {0}'.format(builder))

    cmd = 'sphinx-build {0} -d {1}/doctrees-{2} {3} {4}'  # per-builder-doctree

    sphinx_cmd = cmd.format(
        get_sphinx_args(sconf, conf),
        os.path.join(conf.paths.projectroot,
                     conf.paths.branch_output), sconf.build_output,
        os.path.join(conf.paths.projectroot, conf.paths.branch_source),
        sconf.fq_build_output)

    logger.debug(sphinx_cmd)
    with Timer("running sphinx build for: {0}, {1}, {2}".format(
            builder, sconf.language, sconf.edition)):
        out = command(sphinx_cmd, capture=True, ignore=True)

    logger.info('completed sphinx build {0}'.format(builder))

    if True:  # out.return_code == 0:
        logger.info('successfully completed {0} sphinx build ({1})'.format(
            builder, out.return_code))

        finalizer_app = BuildApp(conf)
        finalizer_app.pool = "thread"
        finalizer_app.root_app = False
        finalize_sphinx_build(sconf, conf, finalizer_app)

        with Timer("finalize sphinx {0} build".format(builder)):
            finalizer_app.run()
    else:
        logger.warning(
            'the sphinx build {0} was not successful. not running finalize operation'
            .format(builder))

    output = '\n'.join([out.err, out.out])

    return out.return_code, output
Beispiel #33
0
def run_sphinx(builder, sconf, conf):
    dirpath = os.path.join(conf.paths.projectroot, conf.paths.branch_output,
                           builder)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
        logger.info('created directories "{1}" for sphinx builder {0}'.format(
            builder, dirpath))

    if 'language' in sconf and sconf.language is not None:
        command('sphinx-intl build --language=' + sconf.language)
        logger.info('compiled all PO files for translated build.')

    logger.info('starting sphinx build {0} at {1}'.format(
        builder, timestamp()))

    cmd = 'sphinx-build {0} -d {1}/doctrees-{2} {3} {4}'  # per-builder-doctreea

    sphinx_cmd = cmd.format(
        get_sphinx_args(sconf, conf),
        os.path.join(conf.paths.projectroot, conf.paths.branch_output),
        os.path.basename(sconf.build_output),
        os.path.join(conf.paths.projectroot, conf.paths.branch_source),
        sconf.build_output)

    logger.debug(sphinx_cmd)
    out = command(sphinx_cmd, capture=True, ignore=True)
    # out = sphinx_native_worker(sphinx_cmd)
    logger.info('completed sphinx build {0} at {1}'.format(
        builder, timestamp()))

    if True:  # out.return_code == 0:
        logger.info('successfully completed {0} sphinx build at {1}'.format(
            builder, timestamp()))

        finalizer_app = BuildApp(conf)
        finalizer_app.root_app = False
        finalize_sphinx_build(sconf, conf, finalizer_app)
        finalizer_app.run()
    else:
        logger.warning(
            'the sphinx build {0} was not successful. not running finalize operation'
            .format(builder))

    output = '\n'.join([out.err, out.out])

    return out.return_code, output
Beispiel #34
0
def slides_output(conf):
    cmd = "rsync --recursive --times --delete {src} {dst}"

    dst = os.path.join(conf.paths.public_site_output, "slides")

    if not os.path.exists(dst):
        logger.debug("created directories for {0}".format(dst))
        os.makedirs(dst)

    builder = "slides"

    if "edition" in conf.project and conf.project.edition != conf.project.name:
        builder += "-" + conf.project.edition

    command(cmd.format(src=os.path.join(conf.paths.branch_output, builder) + "/", dst=dst))

    logger.info("deployed slides local staging.")
Beispiel #35
0
def tokenize_corpus(corpus_dir, corpus_name, tconf):
    '''This function tokenizes a corpus

    :param string corpus_dir: path to directory to the corpus
    :param string corpus_name: name of the corpus in the directory
    :param config tconf: translate configuration
    '''

    cmd = "{0}/scripts/tokenizer/tokenizer.perl -l en < {1}/{3}.{4} > {2}/{3}.tok.{4} -threads {5}"
    command(cmd.format(tconf.paths.moses, corpus_dir,
                       tconf.paths.aux_corpus_files, corpus_name, "en",
                       tconf.settings.threads),
            logger=logger, capture=True)
    command(cmd.format(tconf.paths.moses, corpus_dir,
                       tconf.paths.aux_corpus_files, corpus_name, tconf.settings.foreign,
                       tconf.settings.threads),
            logger=logger, capture=True)
Beispiel #36
0
def transfer_source(conf, sconf):
    target = os.path.join(conf.paths.projectroot, conf.paths.branch_source)

    dir_exists = safe_create_directory(target)

    # this operation is just for messaging the above operation, and error'ing
    # appropriately.
    if dir_exists is True:
        logger.info('created directory for sphinx build: {0}'.format(target))
    elif not os.path.isdir(target):
        msg = '"{0}" exists and is not a directory'.format(target)
        logger.error(msg)
        raise InvalidFile(msg)

    source_dir = os.path.join(conf.paths.projectroot, conf.paths.source)
    image_dir = os.path.join(conf.paths.images[len(conf.paths.source)+1:])
    ref_dir = 'reference'

    exclusions = [ os.path.join('includes', 'table'),
                   os.path.join('includes', 'generated'),
                   os.path.join(ref_dir, 'method') + os.path.sep + "*.rst",
                   os.path.join(ref_dir, 'command') + os.path.sep + "*.rst",
                   os.path.join(ref_dir, 'operator', 'query') + os.path.sep + "*.rst",
                   os.path.join(ref_dir, 'operator', 'aggregation') + os.path.sep + "*.rst",
                   ref_dir + os.path.sep + "*.rst",
                   image_dir + os.path.sep + "*.png",
                   image_dir + os.path.sep + "*.rst",
                   image_dir + os.path.sep + "*.eps" ]

    prefix_len = len(os.path.join(conf.paths.projectroot, conf.paths.branch_source)) + 1
    exclusions.extend([ o for o in conf.system.content.output_directories(prefix_len) ])

    # we don't want rsync to delete directories that hold generated content in
    # the target so we can have more incremental builds.
    exclusions = "--exclude=" + ' --exclude='.join(exclusions)

    cmd = 'rsync --checksum --recursive {2} --delete {0}/ {1}'.format(source_dir, target, exclusions)
    command(cmd)

    # remove files from the source tree specified in the sphinx config for this
    # build.
    source_exclusion(conf, sconf)
    os.utime(target, None)

    logger.info('prepared and migrated source for sphinx build in {0}'.format(target))
Beispiel #37
0
def slides_output(conf):
    cmd = 'rsync --recursive --times --delete {src} {dst}'

    dst = os.path.join(conf.paths.public_site_output, 'slides')

    if not os.path.exists(dst):
        logger.debug('created directories for {0}'.format(dst))
        os.makedirs(dst)

    builder = 'slides'
    
    if 'edition' in conf.project and conf.project.edition != conf.project.name:
        builder += '-' + conf.project.edition

    command(cmd.format(src=os.path.join(conf.paths.branch_output, builder) + '/',
                       dst=dst))

    logger.info('deployed slides local staging.')
Beispiel #38
0
def deploy_target(cmd):
    r = command(cmd, capture=True, ignore=True, logger=logger)

    if r.succeeded is True:
        return r
    elif r.return_code == 23:
        logger.warning('permissions error on remote end, possibly timestamp related.')
        return r
    else:
        raise CommandError('"{0}" returned code {1}'.format(r.out, r.return_code))
Beispiel #39
0
def slides_output(conf):
    cmd = 'rsync --recursive --times --delete {src} {dst}'

    dst = os.path.join(conf.paths.public_site_output, 'slides')

    if not os.path.exists(dst):
        logger.debug('created directories for {0}'.format(dst))
        os.makedirs(dst)

    builder = 'slides'

    if 'edition' in conf.project and conf.project.edition != conf.project.name:
        builder += '-' + conf.project.edition

    command(
        cmd.format(src=os.path.join(conf.paths.branch_output, builder) + '/',
                   dst=dst))

    logger.info('deployed slides local staging.')
Beispiel #40
0
def transfer_images(conf, sconf):
    image_dir = os.path.join(conf.paths.projectroot, conf.paths.branch_images)
    if not os.path.isdir(image_dir):
        return False
    elif sconf.builder == 'latex':

        if 'edition' in sconf and sconf.edition is not None:
            builder_dir = hyph_concat(sconf.builder, sconf.edition)
        else:
            builder_dir = sconf.builder

        builder_dir = os.path.join(conf.paths.projectroot, conf.paths.branch_output, builder_dir)

        safe_create_directory(builder_dir)
        cmd = 'rsync -am --include="*.png" --include="*.jpg" --include="*.eps" --exclude="*" {0}/ {1} '.format(image_dir, builder_dir)

        command(cmd)
        command(cmd.replace('images', 'figures'), ignore=True)

        logger.info('migrated images for latex build')
Beispiel #41
0
def deploy_target(cmd):
    r = command(cmd, capture=True, ignore=True, logger=logger)

    if r.succeeded is True:
        return r
    elif r.return_code == 23:
        logger.warning(
            'permissions error on remote end, possibly timestamp related.')
        return r
    else:
        raise CommandError('"{0}" returned code {1}'.format(
            r.out, r.return_code))
Beispiel #42
0
def train_model(working_path, lm_path, l_len, l_order, l_lang, l_direct, l_score,
                l_align, l_orient, l_model, tconf, d):

    '''This function does the training for the given configuration

    :param string working_path: path to working directory
    :param int l_len: max phrase length
    :param int l_order: n-gram order
    :param string l_lang: reordering language setting, either f or fe
    :param string l_direct: reordering directionality setting, either forward, backward, or bidirectional
    :param string l_score: score options setting, any combination of --GoodTuring, --NoLex, --OnlyDirect
    :param string l_align: alignment algorithm
    :param string l_orient: reordering orientation setting, either mslr, msd, monotonicity, leftright
    :param string l_model: reordering modeltype setting, either wbe, phrase, or hier
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    with Timer(d, 'train', lg=logger):
        os.makedirs(working_path)
        command("{0}/scripts/training/train-model.perl -root-dir {13}/train -corpus {1}/{2}.clean -f en -e {3} --score-options \'{4}\' -alignment {5} -reordering {6}-{7}-{8}-{9} -lm 0:{10}:{11}/{2}.blm.{3}:1 -mgiza -mgiza-cpus {12} -external-bin-dir {0}/tools -cores {12} --parallel --parts 3 2>&1 > {13}/training.out".format(tconf.paths.moses, tconf.paths.aux_corpus_files, tconf.train.name, tconf.settings.foreign, l_score, l_align, l_model, l_orient, l_direct, l_lang, l_order, lm_path, tconf.settings.threads, working_path), logger=logger, capture=True)
Beispiel #43
0
def make_project(args):
    """
    Generate a project skeleton. Prefer this operation over
    ``sphinx-quickstart``. Also builds skeleton HTML artifacts.
    """

    curdir = os.getcwd()
    curdir_list = os.listdir(curdir)

    _weak_bootstrapping(args)

    if args.quickstart_git is True:
        logger.info('creating a new git repository')
        r = command('git init', capture=True)

        if not r.out.startswith('Reinitialized'):
            command('git add .')
            try:
                command('git commit -m "initial commit"')
            except CommandError:
                pass
Beispiel #44
0
    def am(self, patches, repo=None, sign=False):
        cmd_base = 'curl {path} | git am --3way'

        if sign is True:
            cmd_base += ' --signoff'

        for obj in patches:
            if obj.startswith('http'):
                if not obj.endswith('.patch'):
                    obj += '.patch'

                command(cmd_base.format(path=obj))
                logger.info("applied {0}".format(obj))
            elif re.search('[a-zA-Z]+', obj):
                path = '/'.join([ repo, 'commit', obj ]) + '.patch'

                command(cmd_base.format(path=path))
                logger.info('merged commit {0} for {1} into {2}'.format(obj, repo, self.current_branch()))
            else:
                if repo is None:
                    logger.warning('not applying "{0}", because of missing repo'.format(obj))
                else:
                    path = '/'.join([ repo, 'pull', obj ]) + '.patch'
                    command(cmd_base.format(path=path))
                    logger.info("applied {0}".format(obj))
Beispiel #45
0
    def am(self, patches, repo=None, sign=False):
        cmd_base = 'curl {path} | git am --3way'

        if sign is True:
            cmd_base += ' --signoff'

        for obj in patches:
            if obj.startswith('http'):
                if not obj.endswith('.patch'):
                    obj += '.patch'

                command(cmd_base.format(path=obj))
                logger.info("applied {0}".format(obj))
            elif re.search('[a-zA-Z]+', obj):
                path = '/'.join([ repo, 'commit', obj ]) + '.patch'

                command(cmd_base.format(path=path))
                logger.info('merged commit {0} for {1} into {2}'.format(obj, repo, self.current_branch()))
            else:
                if repo is None:
                    logger.warning('not applying "{0}", because of missing repo'.format(obj))
                else:
                    path = '/'.join([ repo, 'pull', obj ]) + '.patch'
                    command(cmd_base.format(path=path))
                    logger.info("applied {0}".format(obj))
Beispiel #46
0
def _weak_bootstrapping(args):
    args.languages_to_build = args.editions_to_build = []
    args.builder = 'html'
    conf = fetch_config(args)
    app = BuildApp(conf)

    mod_path = os.path.dirname(inspect.getfile(giza))
    qstart_path = os.path.join(mod_path, 'quickstart')

    command('rsync --ignore-existing --recursive {0}/. {1}'.format(qstart_path, os.getcwd()))
    logger.info('migrated new site files')

    try:
        sphinx_publication(conf, args, app)
    except:
        sphinx_publication(conf, args, app)
        shutil.rmtree('docs-tools')

    command('python build/docs-tools/makecloth/meta.py build/makefile.meta')
    logger.info('bootstrapped makefile system')

    logger.info('updated project skeleton in current directory.')
Beispiel #47
0
def json_output(conf):
    list_file = os.path.join(conf.paths.branch_output, 'json-file-list')
    public_list_file = os.path.join(conf.paths.public_site_output,
                                    'json', '.file_list')

    cmd = 'rsync --recursive --times --delete --exclude="*pickle" --exclude=".buildinfo" --exclude="*fjson" {src} {dst}'

    json_dst = os.path.join(conf.paths.public_site_output, 'json')

    if not os.path.exists(json_dst):
        logger.debug('created directories for {0}'.format(json_dst))
        os.makedirs(json_dst)

    builder = 'json'
    if 'edition' in conf.project and conf.project.edition != conf.project.name:
        builder += '-' + conf.project.edition

    command(cmd.format(src=os.path.join(conf.paths.branch_output, builder) + '/',
                       dst=json_dst))

    copy_if_needed(list_file, public_list_file)
    logger.info('deployed json files to local staging.')
Beispiel #48
0
def _render_tex_into_pdf(fn, deployed_path, path, output_format="pdf"):
    """
    Runs ``pdflatex`` operations, can generate ``dvi`` and ``pdf``. Runs
    pdflatex multiple times to correctly index and cross reference the PDF.
    """

    if output_format == 'dvi':
        pdflatex = 'TEXINPUTS=".:{0}:" pdflatex --output-format dvi --interaction batchmode --output-directory {0} {1}'.format(
            path, fn)
    elif output_format == 'pdf':
        pdflatex = 'TEXINPUTS=".:{0}:" pdflatex --interaction batchmode --output-directory {0} {1}'.format(
            path, fn)
    else:
        logger.error(
            'not rendering pdf because {0} is not an output format'.format(
                output_format))
        return

    base_fn = os.path.basename(fn)
    cmds = [
        pdflatex,
        "makeindex -s {0}/python.ist {0}/{1}.idx ".format(path, base_fn[:-4]),
        pdflatex, pdflatex
    ]

    if output_format == 'dvi':
        cmds.append("cd {0}; dvipdf {1}.dvi".format(path, base_fn[:-4]))

    for idx, cmd in enumerate(cmds):
        r = command(command=cmd, ignore=True)

        if r.succeeded is True:
            logger.info(
                'pdf completed rendering stage {0} of {1} successfully ({2}).'.
                format(idx, len(cmds), base_fn))
        else:
            if idx <= 1:
                logger.warning(
                    'pdf build encountered error early on {0}, continuing cautiously.'
                    .format(base_fn))
                continue
            else:
                logger.error(
                    'pdf build encountered error running pdflatex, investigate on {0}. terminating'
                    .format(base_fn))
                logger.error(cmd)
                return False

    pdf_fn = os.path.splitext(fn)[0] + '.pdf'
    copy_if_needed(pdf_fn, deployed_path, 'pdf')
Beispiel #49
0
def _weak_bootstrapping(args):
    args.languages_to_build = args.editions_to_build = []
    args.builder = 'html'
    conf = fetch_config(args)
    app = BuildApp(conf)

    mod_path = os.path.dirname(inspect.getfile(giza))
    qstart_path = os.path.join(mod_path, 'quickstart')

    command('rsync --ignore-existing --recursive {0}/. {1}'.format(
        qstart_path, os.getcwd()))
    logger.info('migrated new site files')

    try:
        sphinx_publication(conf, args, app)
    except:
        sphinx_publication(conf, args, app)
        shutil.rmtree('docs-tools')

    command('python build/docs-tools/makecloth/meta.py build/makefile.meta')
    logger.info('bootstrapped makefile system')

    logger.info('updated project skeleton in current directory.')
Beispiel #50
0
def build_language_model(lm_path, l_order, l_smoothing, tconf, d):
    '''This function builds the language model for the goven config

    :param string lm_path: path to language model directory
    :param int l_order: n-gram order
    :param string l_smoothing: smoothing algorithm
    :param config tconf: translate configuration
    :param dict d: output dictionary
    '''

    # Create language model
    with Timer(d, 'lm', lg=logger):
        os.makedirs(lm_path)

        cmds = [
            "{0}/bin/add-start-end.sh < {1}/{2}.true.{3} > {4}/{2}.sb.{3}".format(tconf.paths.irstlm, tconf.paths.aux_corpus_files, tconf.train.name, tconf.settings.foreign, lm_path),
            "{0}/bin/build-lm.sh -i {5}/{1}.sb.{4} -t {5}/tmp -p -n {2} -s {3} -o {5}/{1}.ilm.{4}.gz".format(tconf.paths.irstlm, tconf.train.name, l_order, l_smoothing, tconf.settings.foreign, lm_path),
            "{0}/bin/compile-lm --text  {3}/{1}.ilm.{2}.gz {3}/{1}.arpa.{2}".format(tconf.paths.irstlm, tconf.train.name, tconf.settings.foreign, lm_path),
            "{0}/bin/build_binary -i {3}/{1}.arpa.{2} {3}/{1}.blm.{2}".format(tconf.paths.moses, tconf.train.name, tconf.settings.foreign, lm_path),
            "echo 'Is this a Spanish sentance?' | {0}/bin/query {1}/{2}.blm.{3}".format(tconf.paths.moses, lm_path, tconf.train.name, tconf.settings.foreign),
        ]

        for cmd in cmds:
            command(cmd, logger=logger, capture=True)
Beispiel #51
0
def _render_tex_into_pdf(fn, path):
    pdflatex = 'TEXINPUTS=".:{0}:" pdflatex --interaction batchmode --output-directory {0} {1}'.format(path, fn)

    base_fn = os.path.basename(fn)
    cmds = [ pdflatex,
             "makeindex -s {0}/python.ist {0}/{1}.idx ".format(path, base_fn[:-4]),
             pdflatex,
             pdflatex ]

    for idx, cmd in enumerate(cmds):
        r = command(command=cmd, ignore=True)

        if r.succeeded is True:
            logger.info('pdf completed rendering stage {0} of {1} successfully.'.format(idx, len(cmds)))
        else:
            if idx <= 1:
                logger.warning('pdf build encountered error early on {0}, continuing cautiously.'.format(base_fn))
                continue
            else:
                logger.error('pdf build encountered error running pdflatex, investigate on {0}. terminating'.format(base_fn))
                return False
Beispiel #52
0
def finalize_dirhtml_build(sconf, conf):
    builder = sconf.builder

    single_html_dir = get_single_html_dir(conf)
    search_page = os.path.join(conf.paths.branch_output, builder, 'index.html')

    if os.path.exists(search_page):
        copy_if_needed(source_file=search_page,
                       target_file=os.path.join(single_html_dir,
                                                'search.html'))

    dest = os.path.join(conf.paths.projectroot, conf.paths.public_site_output)
    m_cmd = command('rsync -a {source}/ {destination}'.format(
        source=sconf.fq_build_output, destination=dest))

    logger.info('"{0}" migrated build from {1} to {2}, with result {3}'.format(
        sconf.name, sconf.fq_build_output, dest, m_cmd.return_code))

    if 'excluded_files' in sconf:
        fns = [
            os.path.join(conf.paths.projectroot, conf.paths.public_site_output,
                         fn) for fn in sconf['dirhtml']['excluded_files']
        ]

        cleaner(fns)
        logging.info('removed excluded files from dirhtml output directory')

    if conf.git.branches.current in conf.git.branches.published:
        sitemap_exists = sitemap(config_path=None, conf=conf)

        legacy_sitemap_fn = os.path.join(conf.paths.projectroot,
                                         conf.paths.branch_output,
                                         'sitemap.xml.gz')

        if os.path.exists(legacy_sitemap_fn) and sitemap_exists is True:
            copy_if_needed(source_file=legacy_sitemap_fn,
                           target_file=os.path.join(
                               conf.paths.projectroot,
                               conf.paths.public_site_output,
                               'sitemap.xml.gz'))
Beispiel #53
0
def finalize_dirhtml_build(sconf, conf):
    pjoin = os.path.join
    builder = sconf.builder

    single_html_dir = get_single_html_dir(conf)
    search_page = pjoin(conf.paths.branch_output, builder, 'index.html')

    if os.path.exists(search_page):
        copy_if_needed(source_file=search_page,
                       target_file=pjoin(single_html_dir, 'search.html'))

    dest = pjoin(conf.paths.projectroot, conf.paths.public_site_output)
    m_cmd = command('rsync -a {source}/ {destination}'.format(source=sconf.build_output,
                                                              destination=dest))

    logger.info('"{0}" migrated build from {1} to {2}, with result {3}'.format(sconf.name, sconf.build_output, dest, m_cmd.return_code))

    if 'excluded_files' in sconf:
        fns = [ pjoin(conf.paths.projectroot,
                      conf.paths.public_site_output,
                      fn)
                for fn in sconf['dirhtml']['excluded_files'] ]

        cleaner(fns)
        logging.info('removed excluded files from dirhtml output directory')

    if conf.git.branches.current in conf.git.branches.published:
        sitemap_exists = sitemap(config_path=None, conf=conf)

        if sitemap_exists is True:
            copy_if_needed(source_file=pjoin(conf.paths.projectroot,
                                             conf.paths.branch_output,
                                             'sitemap.xml.gz'),
                           target_file=pjoin(conf.paths.projectroot,
                                             conf.paths.public_site_output,
                                             'sitemap.xml.gz'))
Beispiel #54
0
def make_project(args):
    curdir = os.getcwd()
    curdir_list = os.listdir(curdir)

    if len(curdir_list) > 0 and '.git' in curdir_list:
        logger.critical('cannot create new project in directory that already has files: ' + curdir)
        _weak_bootstrapping(args)
        logger.info('attempted to bootstrap buildsystem')
    else:
        mod_path = os.path.dirname(inspect.getfile(giza))
        qstart_path = os.path.join(mod_path, 'quickstart')

        r = command('git init', capture=True)
        if not r.output.startswith('Re'):
            command('rsync -r {0}/. {1}'.format(qstart_path, curdir))
            command('git add .')
            try:
                command('git commit -m "initial commit"')
            except CommandError:
                pass

        logger.info('created project skeleton in current directory.')

        _weak_bootstrapping(args)
Beispiel #55
0
def logged_command(verb, cmd):
    r = command(cmd, capture=True)
    logger.info('{0}ed {1}'.format(verb, cmd.split(' ')[-1]))

    return r.out
Beispiel #56
0
def translate_file(in_file, out_file,  tconf, protected_file, super_temp=None):
    '''This function translates a given file to another language

    :param string in_file: path to file to be translated
    :param string out_file: path to file where translated output should be written
    :param config tconf: translateconfig object
    :param string protected_file': path to regex file to protect expressions from tokenization
    :param string super_temp: If you have a TempDir context inside of a TempDir
       context, this allows you to not create two. Just pass in the directory of
       the previous temporary directory
    '''

    if out_file is None:
        out_file = in_file + ".translated"

    with TempDir(super_temp=super_temp) as temp:
        logger.info("tempdir: " + temp)
        logger.info("decoding: " + in_file)
        if os.stat(in_file).st_size == 0:
            logger.warning("File is empty")
            open(out_file, "w").close()
            return
        if super_temp is None:
            shutil.copy(in_file, temp)
        in_file = os.path.basename(in_file)

        if protected_file is not None:
            command("{0}/scripts/tokenizer/tokenizer.perl -l en < {4}/{1} > {4}/{1}.tok.en -threads {2} -protected {3}".format(tconf.paths.moses, in_file, tconf.settings.threads, protected_file, temp), logger=logger, capture=True)
        else:
            command("{0}/scripts/tokenizer/tokenizer.perl -l en < {3}/{1} > {3}/{1}.tok.en -threads {2}".format(tconf.paths.moses, in_file, tconf.settings.threads, temp), logger=logger, capture=True)

        command("{0}/scripts/recaser/truecase.perl --model {1}/truecase-model.en < {3}/{2}.tok.en > {3}/{2}.true.en".format(tconf.paths.moses, tconf.paths.aux_corpus_files, in_file, temp), logger=logger, capture=True)
        command("{0}/bin/moses -f {1}/{3}/working/binarised-model/moses.ini < {4}/{2}.true.en > {4}/{2}.true.trans".format(tconf.paths.moses, tconf.paths.project, in_file, tconf.settings.best_run, temp), logger=logger, capture=True)
        command("{0}/scripts/recaser/detruecase.perl < {2}/{1}.true.trans > {2}/{1}.tok.trans".format(tconf.paths.moses, in_file, temp), logger=logger, capture=True)
        command("{0}/scripts/tokenizer/detokenizer.perl -l en < {3}/{1}.tok.trans > {2}".format(tconf.paths.moses, in_file, out_file, temp), logger=logger, capture=True)
Beispiel #57
0
def logged_command(verb, cmd):
    r = command(cmd, capture=True)
    logger.info('{0}ed {1}'.format(verb, cmd.split(' ')[-1]))

    return r.out