def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--input', type=GzipFileType('r'), default=[sys.stdin], nargs='*', help='Input file (in TSV format, optionally compressed)') parser.add_argument('--field', type=str, default='review', help='Field name (Default: review)') parser.add_argument('--limit', type=int, default=None, help='Only process this many lines (for testing)') parser.add_argument('--n_jobs', type=int, default=-1, help="Number of jobs to run") parser.add_argument('--output', type=GzipFileType('w'), default=sys.stdout, help='Output file') subparsers = parser.add_subparsers() parser_tokenize = subparsers.add_parser('tokenize') parser_tokenize.add_argument('--sentences', action='store_true', help='split on sentences') parser_tokenize.set_defaults(func=run_tokenize) parser_train = subparsers.add_parser('train') parser_train.add_argument('--verbose', action='store_true', help='be verbose') parser_train.set_defaults(func=train_sentence_tokenizer) namespace = parser.parse_args(args) return namespace
def parse_args(args=None): ap = argparse.ArgumentParser() ap.add_argument('--fields', nargs='*', default=None) ap.add_argument('--input_delimiter', default='\t', help='input delimiter') ap.add_argument('--output_delimiter', default=',', help='output delimiter') ap.add_argument('--output_header', action='store_true') ap.add_argument('--input', type=GzipFileType('r'), default=sys.stdin) ap.add_argument('--output', type=GzipFileType('w'), default=sys.stdout) namespace = ap.parse_args(args) return namespace
def parse_args(args=None): parser = argparse.ArgumentParser() parser.add_argument('--input', type=GzipFileType('r'), nargs='*', default=[sys.stdin], help='Input file(s)') parser.add_argument('--output', type=GzipFileType('wb'), required=True, help='Output file') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--embedding', type=str, metavar='FILE', default=None, help='Input word2vec (or doc2vec) model') parser.add_argument('--train', type=str, metavar='FILE', default=None, help='(Labeled) training set') parser.add_argument('--plot_features', type=str, default=None, help='file to save feature comparison to') parser.add_argument( '--sentences', type=GzipFileType('r'), default=None, help='File containing sentences in JSON format (implies doc2vec)') parser.add_argument( '--vectors', metavar='FILE', type=str, default=None, help='File containing sentence vectors in Pickle format') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = argparse.ArgumentParser() parser.add_argument("--input", type=GzipFileType('r'), default=sys.stdin, help='input file') parser.add_argument("--num_splits", type=int, default=50, help='Number of splits') parser.add_argument("--output", type=str, required=True, help="output directory") parser.add_argument("--show_progress", action='store_true', help='show progress bar') parser.add_argument("--overwrite", action='store_true', help='overwrite any existing files') namespace = parser.parse_args(args) return namespace
def test_noext(self): fname = self.write_file('test', self.sample_strings) self.eval_file(fname, self.sample_strings, fun=open_gz) self.eval_file(fname, self.sample_strings, fun=GzipFileType())
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--logging', type=str, default='WARN', help="Logging level", choices=[ key for key in logging._levelNames.keys() if isinstance(key, str) ]) subparsers = parser.add_subparsers() p_mapper = subparsers.add_parser('mapper') p_mapper.add_argument('--h0_err', type=float, default=1.0, help='H0 error rate') p_mapper.add_argument('--h1_err', type=float, default=0.5, help='H1 error rate') p_mapper.add_argument('--population_size', type=int, default=2000, help='population size') p_mapper.add_argument('--sim_size', type=int, default=1000, help='Simulation size') p_mapper.add_argument('--nclusters', type=int, default=20, help='number of clusters to generate') p_mapper.add_argument('--join_negatives', type=int, default=0, help='whether to join negatives (if split_join<0)') p_mapper.add_argument( '--split_join', type=int, default=0, help='number of splits (if positive) or joins (if negative) to perform' ) p_mapper.add_argument('--sampling_warnings', type=int, default=0, help='if true, show sampling warnings') p_mapper.add_argument('--output', type=GzipFileType('w'), default=sys.stdout, help='Output file') p_mapper.add_argument('--metrics', type=str, required=True, nargs='*', help='Which metrics to compute') p_mapper.set_defaults(func=do_mapper) p_reducer = subparsers.add_parser('reducer') p_reducer.add_argument('--group_by', type=str, default=None, help='Field to group by') p_reducer.add_argument('--x_axis', type=str, default=None, help='Which column to plot as X axis') p_reducer.add_argument('--metrics', type=str, required=True, nargs='*', help='Which metrics to compute') p_reducer.add_argument('--input', type=GzipFileType('r'), default=sys.stdin, help='File input') p_reducer.add_argument('--output', type=str, metavar='DIR', help='Output directory') p_reducer.add_argument('--fig_title', type=str, default=None, help='Title (for figures generated)') p_reducer.add_argument('--fig_format', type=str, default='svg', help='Figure format') p_reducer.add_argument('--legend_loc', type=str, default='lower left', help='legend location') p_reducer.set_defaults(func=do_reducer) namespace = parser.parse_args(args) return namespace