Exemple #1
0
def main():
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument('extract',
                        help='Extracted tsv file of bdb-extract process')
    parser.add_argument('--outpath',
                        default=None,
                        help='Output file containing frequencies')
    parser.add_argument('--logdir',
                        default='.',
                        help='Directory to place log files.')
    args = parser.parse_args()
    initialize_logging(logdir=args.logdir)
    regexify_keywords_to_file(**vars(args))
Exemple #2
0
def main():
    import argparse

    parser = argparse.ArgumentParser(fromfile_prefix_chars='@!')
    parser.add_argument('anndir',
                        help='Path to directory containing brat annotation files')
    parser.add_argument('txtdir', default=None,
                        help='Path to directory containing brat annotation files')
    parser.add_argument('outdir', default='bratdb',
                        help='Path to output directory where brat data will be stored.')
    parser.add_argument('--logdir', default='.',
                        help='Directory to place log files.')
    args = parser.parse_args()

    initialize_logging(logdir=args.logdir)
    build_brat_dump(args.anndir, args.txtdir, args.outdir)
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument('regex',
                        help='Regex tsv file of bdb-extract-regex process;'
                        ' concept {tab} term {tab} regex')
    parser.add_argument('--directory',
                        default=None,
                        help='Directory containing files to process')
    parser.add_argument('--extension',
                        default='.txt',
                        help='Only process files with this extension')
    parser.add_argument('--connection-string',
                        default=None,
                        dest='connection_string',
                        help='sqlalchemy-flavored connection string')
    parser.add_argument('--outpath',
                        default=None,
                        help='Output directory to place result.')
    parser.add_argument(
        '--exclude-captured',
        default=False,
        action='store_true',
        dest='exclude_captured',
        help=
        'Only retain metadata; Exclude captured text as this may contain PII')
    parser.add_argument('--run-hours',
                        default=None,
                        type=int,
                        dest='run_hours',
                        help='End program after specified hours of running.')
    parser.add_argument(
        '--logdir',
        default=None,
        help=
        'Directory to place log files. If not specified, defaults to output directory.'
    )
    parser.add_argument(
        '--query',
        default=None,
        nargs='+',
        help='query to retrieve name, document_text pairs from database table;'
        ' additional items can be included, but text must be last')
    args = parser.parse_args()
    initialize_logging(logdir=args.logdir or args.outpath)
    apply_regex_to_corpus(**vars(args))
Exemple #4
0
def main():
    import argparse

    parser = argparse.ArgumentParser(fromfile_prefix_chars='@')
    parser.add_argument('bratdb',
                        help='Path to brat data dump (result of build script)')
    parser.add_argument('--outpath', default=None,
                        help='Output file containing frequencies')
    parser.add_argument('--ignore-tags', dest='ignore_tags', default=None, nargs='+',
                        help='Ignore specified tags/labels;'
                             ' ignored if `keep-tags` is specified')
    parser.add_argument('--keep-tags', dest='keep_tags', default=None, nargs='+',
                        help='Keep only tags listed here; overrides `ignore-tags`')
    parser.add_argument('--ignore-stopwords', dest='ignore_stopwords', default=False, action='store_true',
                        help='Ignore stopwords')
    parser.add_argument('--allow-multiple-labels-per-term', dest='one_label_per_term',
                        default=True, action='store_false',
                        help='Allow all terms to be associated with more than one label/concept/term.')
    parser.add_argument('--logdir', default='.',
                        help='Directory to place log files.')
    args = parser.parse_args()
    initialize_logging(logdir=args.logdir)
    extract_keywords_to_file(**vars(args))