script(steps=[get_abs, core_nlp, lemma_trees, ext_vars, offsets, prep_vars, prune_vars, tag_trees, ext_rels, arts2csv, vars2csv, rels2csv], optional=[get_inp, remove_server, start_server, stop_server, add_cit, add_meta, clean, clean_cache, report, tag_trees, setup_server, toneo, ppgraph], default_cfg_fnames=[ getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini', getenv('MEGAMOUTH_HOME') + '/scripts/abs/megamouth-abs.ini', getenv('MEGAMOUTH_HOME') + '/scripts/abs/elsevier/megamouth-abs-elsevier-6.ini', getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'])
def bibtex(records_dir, bib_dir, resume=False): lookup_bibtex(records_dir, bib_dir, resume=resume) @docstring(rank_results) def rank(results_file): rank_results(results_file) @arg("--max-n-records", type=int) @docstring(results_to_html) def html(search_results_file, records_dir, results_file, max_n_records=None): results_to_html(search_results_file, records_dir, results_file, max_n_records) # ----------------------------------------------------------------------------- # Optional steps # ----------------------------------------------------------------------------- @arg("--max-n", type=int) @docstring(get_terms) def terms(csv_file, results_file, max_n=None): get_terms(csv_file, results_file, max_n) script(steps=[search, bibtex, rank, html], optional=[terms, clean], default_cfg_fnames=['nature-corpus.ini'])
#!/usr/bin/env python """ process full-text corpus, segment Plos, part 1 """ from os import getenv from baleen.pipeline import script from baleen.steps import * from megamouth.steps import get_full, get_inp script(steps=[ get_full, core_nlp, lemma_trees, ext_vars, offsets, prep_vars, prune_vars, tag_trees, ext_rels, arts2csv, vars2csv, rels2csv ], optional=[ get_inp, remove_server, start_server, stop_server, add_cit, add_meta, clean, clean_cache, report, tag_trees, setup_server, toneo, ppgraph ], default_cfg_fnames=[ getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini', getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini', getenv('MEGAMOUTH_HOME') + '/scripts/full/plos/megamouth-full-plos-1.ini', getenv('MEGAMOUTH_HOME') + '/scripts/local.ini' ])
#!/usr/bin/env python """ process all fulltracts """ from os import getenv from baleen.pipeline import script from baleen.steps import * from megamouth.steps import clean_all script(steps=[uniq, setup_server, multi_toneo, ppgraph], optional=[start_server, stop_server, report, clean, clean_cache], default_cfg_fnames=[ getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini', getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini', getenv('MEGAMOUTH_HOME') + '/scripts/local.ini' ])
""" pipeline for extracting variables from text """ from baleen.pipeline import script from baleen.steps import * script( steps=[core_nlp, lemma_trees, ext_vars, offsets, prep_vars, prune_vars, tocsv, setup_server, toneo, ppgraph, tag_trees, ext_rels, add_rels, add_cit, add_meta], optional=[remove_server, start_server, stop_server, clean, clean_cache, report])
#!/usr/bin/env python """ process all fulltracts """ from os import getenv from baleen.pipeline import script from baleen.steps import * from megamouth.steps import clean_all script(steps=[ uniq, setup_server, multi_toneo, ppgraph], optional=[ start_server, stop_server, report, clean, clean_cache], default_cfg_fnames=[ getenv('MEGAMOUTH_HOME') + '/scripts/megamouth.ini', getenv('MEGAMOUTH_HOME') + '/scripts/full/megamouth-full.ini', getenv('MEGAMOUTH_HOME') + '/scripts/local.ini'])
""" run processes to create Nature full corpus """ from baleen.pipeline import script from baleen.steps import core_nlp, lemma_trees, ext_vars, \ offsets, prep_vars, prune_vars from nature.steps import download, ext_full, soa, tocsv, toneo, ppgraph, \ setupserver, startserver, stopserver, vertical script(steps=[download, ext_full, soa, core_nlp, lemma_trees, ext_vars, offsets, prep_vars, prune_vars, tocsv, toneo, ppgraph], optional=[ setupserver, startserver, stopserver, vertical], default_cfg_fnames=['nature-corpus.ini', 'local.ini'], default_section='FULL')
""" from baleen.pipeline import script from baleen.steps import split_sent, parse_sent, lemma_trees, ext_vars, \ offsets, prep_vars, prune_vars from nature.steps import ext_abs, ext_sent, brat, soa, tocsv, toneo, ppgraph, \ setupserver, startserver, stopserver, vertical script(steps=[ext_abs, soa, split_sent, ext_sent, parse_sent, lemma_trees, ext_vars, offsets, prep_vars, prune_vars, tocsv, toneo, ppgraph], optional=[ setupserver, startserver, stopserver, vertical, brat], default_cfg_fnames=['nature-corpus.ini', 'local.ini'], default_section='ABSTRACTS')