def main(): parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument('extract', help='Extracted tsv file of bdb-extract process') parser.add_argument('--outpath', default=None, help='Output file containing frequencies') parser.add_argument('--logdir', default='.', help='Directory to place log files.') args = parser.parse_args() initialize_logging(logdir=args.logdir) regexify_keywords_to_file(**vars(args))
def main(): import argparse parser = argparse.ArgumentParser(fromfile_prefix_chars='@!') parser.add_argument('anndir', help='Path to directory containing brat annotation files') parser.add_argument('txtdir', default=None, help='Path to directory containing brat annotation files') parser.add_argument('outdir', default='bratdb', help='Path to output directory where brat data will be stored.') parser.add_argument('--logdir', default='.', help='Directory to place log files.') args = parser.parse_args() initialize_logging(logdir=args.logdir) build_brat_dump(args.anndir, args.txtdir, args.outdir)
def main(): parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument('regex', help='Regex tsv file of bdb-extract-regex process;' ' concept {tab} term {tab} regex') parser.add_argument('--directory', default=None, help='Directory containing files to process') parser.add_argument('--extension', default='.txt', help='Only process files with this extension') parser.add_argument('--connection-string', default=None, dest='connection_string', help='sqlalchemy-flavored connection string') parser.add_argument('--outpath', default=None, help='Output directory to place result.') parser.add_argument( '--exclude-captured', default=False, action='store_true', dest='exclude_captured', help= 'Only retain metadata; Exclude captured text as this may contain PII') parser.add_argument('--run-hours', default=None, type=int, dest='run_hours', help='End program after specified hours of running.') parser.add_argument( '--logdir', default=None, help= 'Directory to place log files. If not specified, defaults to output directory.' ) parser.add_argument( '--query', default=None, nargs='+', help='query to retrieve name, document_text pairs from database table;' ' additional items can be included, but text must be last') args = parser.parse_args() initialize_logging(logdir=args.logdir or args.outpath) apply_regex_to_corpus(**vars(args))
def main(): import argparse parser = argparse.ArgumentParser(fromfile_prefix_chars='@') parser.add_argument('bratdb', help='Path to brat data dump (result of build script)') parser.add_argument('--outpath', default=None, help='Output file containing frequencies') parser.add_argument('--ignore-tags', dest='ignore_tags', default=None, nargs='+', help='Ignore specified tags/labels;' ' ignored if `keep-tags` is specified') parser.add_argument('--keep-tags', dest='keep_tags', default=None, nargs='+', help='Keep only tags listed here; overrides `ignore-tags`') parser.add_argument('--ignore-stopwords', dest='ignore_stopwords', default=False, action='store_true', help='Ignore stopwords') parser.add_argument('--allow-multiple-labels-per-term', dest='one_label_per_term', default=True, action='store_false', help='Allow all terms to be associated with more than one label/concept/term.') parser.add_argument('--logdir', default='.', help='Directory to place log files.') args = parser.parse_args() initialize_logging(logdir=args.logdir) extract_keywords_to_file(**vars(args))