script(steps=[get_abs,
              core_nlp,
              lemma_trees,
              ext_vars,
              offsets,
              prep_vars,
              prune_vars,
              tag_trees,
              ext_rels,
              arts2csv,
              vars2csv,
              rels2csv],
       optional=[get_inp,
                 remove_server,
                 start_server,
                 stop_server,
                 add_cit,
                 add_meta,
                 clean,
                 clean_cache,
                 report,
                 tag_trees,
                 setup_server,
                 toneo,
                 ppgraph],
       default_cfg_fnames=[
           getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/abs/megamouth-abs.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/abs/elsevier/megamouth-abs-elsevier-6.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'])
Esempio n. 2
0
def bibtex(records_dir, bib_dir, resume=False):
    lookup_bibtex(records_dir, bib_dir, resume=resume)


@docstring(rank_results)
def rank(results_file):
    rank_results(results_file)


@arg("--max-n-records", type=int)
@docstring(results_to_html)
def html(search_results_file, records_dir, results_file,
         max_n_records=None):
    results_to_html(search_results_file, records_dir, results_file,
                    max_n_records)


# -----------------------------------------------------------------------------
# Optional steps
# -----------------------------------------------------------------------------

@arg("--max-n", type=int)
@docstring(get_terms)
def terms(csv_file, results_file, max_n=None):
    get_terms(csv_file, results_file, max_n)


script(steps=[search, bibtex, rank, html],
       optional=[terms, clean],
       default_cfg_fnames=['nature-corpus.ini'])
Esempio n. 3
0
#!/usr/bin/env python
"""
process full-text corpus, segment Plos, part 1
"""

from os import getenv

from baleen.pipeline import script
from baleen.steps import *

from megamouth.steps import get_full, get_inp

script(steps=[
    get_full, core_nlp, lemma_trees, ext_vars, offsets, prep_vars, prune_vars,
    tag_trees, ext_rels, arts2csv, vars2csv, rels2csv
],
       optional=[
           get_inp, remove_server, start_server, stop_server, add_cit,
           add_meta, clean, clean_cache, report, tag_trees, setup_server,
           toneo, ppgraph
       ],
       default_cfg_fnames=[
           getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini',
           getenv('MEGAMOUTH_HOME') +
           '/scripts/full/plos/megamouth-full-plos-1.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'
       ])
Esempio n. 4
0
#!/usr/bin/env python
"""
process all fulltracts
"""

from os import getenv

from baleen.pipeline import script
from baleen.steps import *
from megamouth.steps import clean_all

script(steps=[uniq, setup_server, multi_toneo, ppgraph],
       optional=[start_server, stop_server, report, clean, clean_cache],
       default_cfg_fnames=[
           getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini',
           getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'
       ])
Esempio n. 5
0
"""
pipeline for extracting variables from text
"""

from baleen.pipeline import script
from baleen.steps import *

script(
    steps=[core_nlp,
           lemma_trees,
           ext_vars,
           offsets,
           prep_vars,
           prune_vars,
           tocsv,
           setup_server,
           toneo,
           ppgraph,
           tag_trees,
           ext_rels,
           add_rels,
           add_cit,
           add_meta],
    optional=[remove_server,
              start_server,
              stop_server,
              clean,
              clean_cache,
              report])
Esempio n. 6
0
#!/usr/bin/env python

"""
process all fulltracts
"""

from os import getenv

from baleen.pipeline import script
from baleen.steps import *
from megamouth.steps import clean_all

script(steps=[
        uniq,
        setup_server,
        multi_toneo,
        ppgraph],
    optional=[
        start_server,
        stop_server,
        report,
        clean,
        clean_cache],
    default_cfg_fnames=[
        getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini',
        getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini',
        getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'])
Esempio n. 7
0
"""
run processes to create Nature full corpus
"""

from baleen.pipeline import script
from baleen.steps import core_nlp, lemma_trees, ext_vars, \
    offsets, prep_vars, prune_vars

from nature.steps import download, ext_full, soa, tocsv, toneo, ppgraph, \
    setupserver, startserver, stopserver, vertical

script(steps=[download,
              ext_full,
              soa,
              core_nlp,
              lemma_trees,
              ext_vars,
              offsets,
              prep_vars,
              prune_vars,
              tocsv,
              toneo,
              ppgraph],
       optional=[
           setupserver,
           startserver,
           stopserver,
           vertical],
       default_cfg_fnames=['nature-corpus.ini', 'local.ini'],
       default_section='FULL')
Esempio n. 8
0
"""

from baleen.pipeline import script
from baleen.steps import split_sent, parse_sent, lemma_trees, ext_vars, \
    offsets, prep_vars, prune_vars

from nature.steps import ext_abs, ext_sent, brat, soa, tocsv, toneo, ppgraph, \
    setupserver, startserver, stopserver, vertical

script(steps=[ext_abs,
              soa,
              split_sent,
              ext_sent,
              parse_sent,
              lemma_trees,
              ext_vars,
              offsets,
              prep_vars,
              prune_vars,
              tocsv,
              toneo,
              ppgraph],
       optional=[
           setupserver,
           startserver,
           stopserver,
           vertical,
           brat],
       default_cfg_fnames=['nature-corpus.ini', 'local.ini'],
       default_section='ABSTRACTS')