def parse_args(args=None): parser = PathArgumentParser( description="Simulate data and/or run analysis") parser.add_argument( '--logging', type=str, default='WARN', help="Logging level", choices=[key for key in logging._levelNames.keys() if isinstance(key, str)]) subparsers = parser.add_subparsers() p_simul = subparsers.add_parser('simulate', help='generate simulation') add_simul_args(p_simul) p_simul.add_argument( '--output', type=GzipFileType('w'), default=sys.stdout, help='File output') p_simul.set_defaults(func=do_simulation) p_clust = subparsers.add_parser('cluster', help='run clustering') p_clust.add_argument( '--input', type=GzipFileType('r'), default=sys.stdin, help='File input') add_clust_args(p_clust) p_clust.add_argument( '--output', type=GzipFileType('w'), default=sys.stdout, help='File output') p_clust.set_defaults(func=do_cluster) p_analy = subparsers.add_parser('analyze', help='run analysis') p_analy.add_argument( '--input', type=GzipFileType('r'), default=sys.stdin, help='File input') add_analy_args(p_analy) p_analy.add_argument( '--output', type=GzipFileType('w'), default=sys.stdout, help='File output') p_analy.set_defaults(func=do_analyze) p_mapper = subparsers.add_parser( 'mapper', help='Perform multiple steps') add_simul_args(p_mapper) add_clust_args(p_mapper) add_analy_args(p_mapper) p_mapper.add_argument( '--output', type=GzipFileType('w'), default=sys.stdout, help='File output') p_mapper.set_defaults(func=do_mapper) p_reducer = subparsers.add_parser('reducer', help='summarize analysis results') add_analy_args(p_reducer) p_reducer.add_argument( '--input', type=GzipFileType('r'), default=sys.stdin, help='File input') p_reducer.add_argument( '--fig_title', type=str, default=None, help='Title (for figures generated)') p_reducer.add_argument( '--fig_format', type=str, default='svg', help='Figure format') p_reducer.add_argument( '--output', type=str, metavar='DIR', help='Output directory') p_reducer.set_defaults(func=do_reducer) namespace = parser.parse_args() return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument( "--logging", type=str, default="WARN", help="Logging level", choices=[key for key in logging._levelNames.keys() if isinstance(key, str)], ) subparsers = parser.add_subparsers() p_mapper = subparsers.add_parser("mapper") p_mapper.add_argument("--h0_err", type=float, default=1.0, help="H0 error rate") p_mapper.add_argument("--h1_err", type=float, default=0.5, help="H1 error rate") p_mapper.add_argument("--population_size", type=int, default=2000, help="population size") p_mapper.add_argument("--sim_size", type=int, default=1000, help="Simulation size") p_mapper.add_argument("--nclusters", type=int, default=20, help="number of clusters to generate") p_mapper.add_argument("--join_negatives", type=int, default=0, help="whether to join negatives (if split_join<0)") p_mapper.add_argument( "--split_join", type=int, default=0, help="number of splits (if positive) or joins (if negative) to perform" ) p_mapper.add_argument("--sampling_warnings", type=int, default=0, help="if true, show sampling warnings") p_mapper.add_argument("--output", type=GzipFileType("w"), default=sys.stdout, help="Output file") p_mapper.add_argument("--metrics", type=str, required=True, nargs="*", help="Which metrics to compute") p_mapper.set_defaults(func=do_mapper) p_reducer = subparsers.add_parser("reducer") p_reducer.add_argument("--group_by", type=str, default=None, help="Field to group by") p_reducer.add_argument("--x_axis", type=str, default=None, help="Which column to plot as X axis") p_reducer.add_argument("--metrics", type=str, required=True, nargs="*", help="Which metrics to compute") p_reducer.add_argument("--input", type=GzipFileType("r"), default=sys.stdin, help="File input") p_reducer.add_argument("--output", type=str, metavar="DIR", help="Output directory") p_reducer.add_argument("--fig_title", type=str, default=None, help="Title (for figures generated)") p_reducer.add_argument("--fig_format", type=str, default="svg", help="Figure format") p_reducer.add_argument("--legend_loc", type=str, default="lower left", help="legend location") p_reducer.set_defaults(func=do_reducer) namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--input', type=GzipFileType('r'), default=[sys.stdin], nargs='*', help='Input file (in TSV format, optionally compressed)') parser.add_argument('--field', type=str, default='review', help='Field name (Default: review)') parser.add_argument('--limit', type=int, default=None, help='Only process this many lines (for testing)') parser.add_argument('--n_jobs', type=int, default=-1, help="Number of jobs to run") parser.add_argument('--output', type=GzipFileType('w'), default=sys.stdout, help='Output file') subparsers = parser.add_subparsers() parser_tokenize = subparsers.add_parser('tokenize') parser_tokenize.add_argument('--sentences', action='store_true', help='split on sentences') parser_tokenize.set_defaults(func=run_tokenize) parser_train = subparsers.add_parser('train') parser_train.add_argument('--verbose', action='store_true', help='be verbose') parser_train.set_defaults(func=train_sentence_tokenizer) namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--input', type=str, metavar='FILE', nargs='+', help='Input files') parser.add_argument('--field', type=str, default='review', help='Field name (Default: review)') parser.add_argument('--verbose', action='store_true', help='be verbose') parser.add_argument('--output', type=str, required=True, help='where to save the model to') parser.add_argument('--limit', type=int, default=None, help='(for debugging) limit input to n lines') parser.add_argument('--corpus_model', type=str, default=None, help='where corpus model lives (GloVe)') parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(), help='Number of workers to use (default: same as number of CPUs)') parser.add_argument('--doc2vec', action='store_true', help='use Doc2Vec instead of Word2Vec model') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--embedding', type=str, metavar='FILE', default=None, help='Input word2vec (or doc2vec) model') parser.add_argument('--train', type=str, metavar='FILE', default=None, help='(Labeled) training set') parser.add_argument('--plot_features', type=str, default=None, help='file to save feature comparison to') parser.add_argument( '--sentences', type=GzipFileType('r'), default=None, help='File containing sentences in JSON format (implies doc2vec)') parser.add_argument( '--vectors', metavar='FILE', type=str, default=None, help='File containing sentence vectors in Pickle format') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--embedding', type=str, metavar='FILE', default=None, help='Input word2vec (or doc2vec) model') parser.add_argument('--train', type=str, metavar='FILE', default=None, help='(Labeled) training set') parser.add_argument('--plot_features', type=str, default=None, help='file to save feature comparison to') parser.add_argument('--sentences', type=GzipFileType('r'), default=None, help='File containing sentences in JSON format (implies doc2vec)') parser.add_argument('--vectors', metavar='FILE', type=str, default=None, help='File containing sentence vectors in Pickle format') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--input', type=str, metavar='FILE', nargs='+', help='Input files') parser.add_argument('--verbose', action='store_true', help='be verbose') parser.add_argument('--output', type=str, required=True, help='where to save the model to') parser.add_argument('--limit', type=int, default=None, help='(for debugging) limit input to n lines') parser.add_argument('--corpus_model', type=str, default=None, help='where corpus model lives (GloVe)') parser.add_argument('--workers', type=int, default=multiprocessing.cpu_count(), help='Number of workers to use (default: same as number of CPUs)') parser.add_argument('--doc2vec', action='store_true', help='use Doc2Vec instead of Word2Vec model') namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--input_labeled', type=GzipFileType('r'), default=(), required=False, nargs='*', help='Labeled input files (TSV format, optionally compressed)') parser.add_argument('--input_unlabeled', type=GzipFileType('r'), default=(), required=False, nargs='*', help='Unlabeled input files (TSV format, optionally compressed)') parser.add_argument('--n_jobs', type=int, default=-1, help="Number of jobs to run") parser.add_argument('--output', type=GzipFileType('w'), default=sys.stdout, help='Output file') subparsers = parser.add_subparsers() parser_tokenize = subparsers.add_parser('tokenize') parser_tokenize.add_argument('--sentences', action='store_true', help='split on sentences') parser_tokenize.set_defaults(func=run_tokenize) parser_train = subparsers.add_parser('train') parser_train.add_argument('--verbose', action='store_true', help='be verbose') parser_train.set_defaults(func=train_sentence_tokenizer) namespace = parser.parse_args(args) return namespace
def parse_args(args=None): parser = PathArgumentParser() parser.add_argument('--logging', type=str, default='WARN', help="Logging level", choices=[ key for key in logging._levelNames.keys() if isinstance(key, str) ]) subparsers = parser.add_subparsers() p_mapper = subparsers.add_parser('mapper') p_mapper.add_argument('--h0_err', type=float, default=1.0, help='H0 error rate') p_mapper.add_argument('--h1_err', type=float, default=0.5, help='H1 error rate') p_mapper.add_argument('--population_size', type=int, default=2000, help='population size') p_mapper.add_argument('--sim_size', type=int, default=1000, help='Simulation size') p_mapper.add_argument('--nclusters', type=int, default=20, help='number of clusters to generate') p_mapper.add_argument('--join_negatives', type=int, default=0, help='whether to join negatives (if split_join<0)') p_mapper.add_argument( '--split_join', type=int, default=0, help='number of splits (if positive) or joins (if negative) to perform' ) p_mapper.add_argument('--sampling_warnings', type=int, default=0, help='if true, show sampling warnings') p_mapper.add_argument('--output', type=GzipFileType('w'), default=sys.stdout, help='Output file') p_mapper.add_argument('--metrics', type=str, required=True, nargs='*', help='Which metrics to compute') p_mapper.set_defaults(func=do_mapper) p_reducer = subparsers.add_parser('reducer') p_reducer.add_argument('--group_by', type=str, default=None, help='Field to group by') p_reducer.add_argument('--x_axis', type=str, default=None, help='Which column to plot as X axis') p_reducer.add_argument('--metrics', type=str, required=True, nargs='*', help='Which metrics to compute') p_reducer.add_argument('--input', type=GzipFileType('r'), default=sys.stdin, help='File input') p_reducer.add_argument('--output', type=str, metavar='DIR', help='Output directory') p_reducer.add_argument('--fig_title', type=str, default=None, help='Title (for figures generated)') p_reducer.add_argument('--fig_format', type=str, default='svg', help='Figure format') p_reducer.add_argument('--legend_loc', type=str, default='lower left', help='legend location') p_reducer.set_defaults(func=do_reducer) namespace = parser.parse_args(args) return namespace