Exemple #1
0
def parse_args(args=None):
    parser = PathArgumentParser()
    parser.add_argument('--input', type=GzipFileType('r'), default=[sys.stdin], nargs='*',
                        help='Input file (in TSV format, optionally compressed)')
    parser.add_argument('--field', type=str, default='review',
                        help='Field name (Default: review)')
    parser.add_argument('--limit', type=int, default=None,
                        help='Only process this many lines (for testing)')
    parser.add_argument('--n_jobs', type=int, default=-1,
                        help="Number of jobs to run")
    parser.add_argument('--output', type=GzipFileType('w'), default=sys.stdout,
                        help='Output file')

    subparsers = parser.add_subparsers()

    parser_tokenize = subparsers.add_parser('tokenize')
    parser_tokenize.add_argument('--sentences', action='store_true',
                                 help='split on sentences')
    parser_tokenize.set_defaults(func=run_tokenize)

    parser_train = subparsers.add_parser('train')
    parser_train.add_argument('--verbose', action='store_true',
                              help='be verbose')
    parser_train.set_defaults(func=train_sentence_tokenizer)

    namespace = parser.parse_args(args)
    return namespace
def parse_args(args=None):
    ap = argparse.ArgumentParser()
    ap.add_argument('--fields', nargs='*', default=None)
    ap.add_argument('--input_delimiter', default='\t', help='input delimiter')
    ap.add_argument('--output_delimiter', default=',', help='output delimiter')
    ap.add_argument('--output_header', action='store_true')
    ap.add_argument('--input', type=GzipFileType('r'), default=sys.stdin)
    ap.add_argument('--output', type=GzipFileType('w'), default=sys.stdout)
    namespace = ap.parse_args(args)
    return namespace
Exemple #3
0
def parse_args(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--input',
                        type=GzipFileType('r'),
                        nargs='*',
                        default=[sys.stdin],
                        help='Input file(s)')
    parser.add_argument('--output',
                        type=GzipFileType('wb'),
                        required=True,
                        help='Output file')
    namespace = parser.parse_args(args)
    return namespace
Exemple #4
0
def parse_args(args=None):
    parser = PathArgumentParser()
    parser.add_argument('--embedding',
                        type=str,
                        metavar='FILE',
                        default=None,
                        help='Input word2vec (or doc2vec) model')
    parser.add_argument('--train',
                        type=str,
                        metavar='FILE',
                        default=None,
                        help='(Labeled) training set')
    parser.add_argument('--plot_features',
                        type=str,
                        default=None,
                        help='file to save feature comparison to')
    parser.add_argument(
        '--sentences',
        type=GzipFileType('r'),
        default=None,
        help='File containing sentences in JSON format (implies doc2vec)')
    parser.add_argument(
        '--vectors',
        metavar='FILE',
        type=str,
        default=None,
        help='File containing sentence vectors in Pickle format')
    namespace = parser.parse_args(args)
    return namespace
Exemple #5
0
def parse_args(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument("--input",
                        type=GzipFileType('r'),
                        default=sys.stdin,
                        help='input file')
    parser.add_argument("--num_splits",
                        type=int,
                        default=50,
                        help='Number of splits')
    parser.add_argument("--output",
                        type=str,
                        required=True,
                        help="output directory")
    parser.add_argument("--show_progress",
                        action='store_true',
                        help='show progress bar')
    parser.add_argument("--overwrite",
                        action='store_true',
                        help='overwrite any existing files')
    namespace = parser.parse_args(args)
    return namespace
Exemple #6
0
 def test_noext(self):
     fname = self.write_file('test', self.sample_strings)
     self.eval_file(fname, self.sample_strings, fun=open_gz)
     self.eval_file(fname, self.sample_strings, fun=GzipFileType())
def parse_args(args=None):
    parser = PathArgumentParser()

    parser.add_argument('--logging',
                        type=str,
                        default='WARN',
                        help="Logging level",
                        choices=[
                            key for key in logging._levelNames.keys()
                            if isinstance(key, str)
                        ])

    subparsers = parser.add_subparsers()

    p_mapper = subparsers.add_parser('mapper')
    p_mapper.add_argument('--h0_err',
                          type=float,
                          default=1.0,
                          help='H0 error rate')
    p_mapper.add_argument('--h1_err',
                          type=float,
                          default=0.5,
                          help='H1 error rate')
    p_mapper.add_argument('--population_size',
                          type=int,
                          default=2000,
                          help='population size')
    p_mapper.add_argument('--sim_size',
                          type=int,
                          default=1000,
                          help='Simulation size')
    p_mapper.add_argument('--nclusters',
                          type=int,
                          default=20,
                          help='number of clusters to generate')
    p_mapper.add_argument('--join_negatives',
                          type=int,
                          default=0,
                          help='whether to join negatives (if split_join<0)')
    p_mapper.add_argument(
        '--split_join',
        type=int,
        default=0,
        help='number of splits (if positive) or joins (if negative) to perform'
    )
    p_mapper.add_argument('--sampling_warnings',
                          type=int,
                          default=0,
                          help='if true, show sampling warnings')
    p_mapper.add_argument('--output',
                          type=GzipFileType('w'),
                          default=sys.stdout,
                          help='Output file')
    p_mapper.add_argument('--metrics',
                          type=str,
                          required=True,
                          nargs='*',
                          help='Which metrics to compute')
    p_mapper.set_defaults(func=do_mapper)

    p_reducer = subparsers.add_parser('reducer')
    p_reducer.add_argument('--group_by',
                           type=str,
                           default=None,
                           help='Field to group by')
    p_reducer.add_argument('--x_axis',
                           type=str,
                           default=None,
                           help='Which column to plot as X axis')
    p_reducer.add_argument('--metrics',
                           type=str,
                           required=True,
                           nargs='*',
                           help='Which metrics to compute')
    p_reducer.add_argument('--input',
                           type=GzipFileType('r'),
                           default=sys.stdin,
                           help='File input')
    p_reducer.add_argument('--output',
                           type=str,
                           metavar='DIR',
                           help='Output directory')
    p_reducer.add_argument('--fig_title',
                           type=str,
                           default=None,
                           help='Title (for figures generated)')
    p_reducer.add_argument('--fig_format',
                           type=str,
                           default='svg',
                           help='Figure format')
    p_reducer.add_argument('--legend_loc',
                           type=str,
                           default='lower left',
                           help='legend location')
    p_reducer.set_defaults(func=do_reducer)

    namespace = parser.parse_args(args)
    return namespace