예제 #1
0
    def __init__(self):
        self.default_logdir = cutils.get_config_for_module(
            "model_xvalidator")['default']['logdir']
        print("default logdir set to {}".format(self.default_logdir))
        parser = argparse.ArgumentParser(
            description="Perform Kfold cross-validation on a given neural "
            "network with the given dataset.")

        # cross-validation settings
        group = parser.add_argument_group(title="Cross-validation parameters")
        group.add_argument('--num_crossvals',
                           type=atypes.int_range(1),
                           required=True,
                           help='number of cross validations to perform')

        # network to train
        group = parser.add_argument_group(title="Network to use")
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m')

        # training_parameters
        group = parser.add_argument_group(title="Training parameters to use")
        net_args.add_training_settings_args(group,
                                            num_epochs={
                                                'required': False,
                                                'default': 11,
                                                'short_alias': 'e'
                                            })
        group.add_argument('--tb_dir',
                           default=self.default_logdir,
                           help=('directory to store training logs for '
                                 'tensorboard.'))

        # dataset input
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        item_args = dargs.ItemTypeArgs()
        atype = dargs.arg_type.INPUT
        group = parser.add_argument_group(title="Input dataset")
        dset_args.add_dataset_arg_double(group, atype)
        item_args.add_item_type_args(group, atype)
        group.add_argument('--test_items_count',
                           type=atypes.int_range(1),
                           help='number of dataset items to include in the '
                           'test set. Overrides test_items_fraction.')
        group.add_argument('--test_items_fraction',
                           type=float,
                           default=0.1,
                           help='number of dataset items to include in the '
                           'test set, expressed as a fraction.')

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
    def __init__(self):
        self.default_logdir = cutils.get_config_for_module(
            "model_trainer")['default']['logdir']
        parser = argparse.ArgumentParser(
            description="Train network using provided dataset")

        # dataset input
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        item_args = dargs.ItemTypeArgs()
        atype = dargs.arg_type.INPUT
        group = parser.add_argument_group(title="Input dataset")
        dset_args.add_dataset_arg_double(group, atype)
        item_args.add_item_type_args(group, atype)
        group.add_argument('--test_items_count',
                           type=atypes.int_range(1),
                           help='Number of dataset items to include in the '
                           'test set. Overrides test_items_fraction.')
        group.add_argument('--test_items_fraction',
                           type=float,
                           default=0.1,
                           help='Number of dataset items to include in the '
                           'test set, expressed as a fraction.')
        modes = net_cons.DATASET_SPLIT_MODES
        group.add_argument('--split_mode',
                           choices=modes,
                           required=True,
                           help='Method of splitting the test items subset '
                           'from the input dataset.')

        # network to train
        group = parser.add_argument_group(title="Network configuration")
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m')
        group.add_argument('--tb_dir',
                           default=self.default_logdir,
                           help=('directory to store training logs for '
                                 'tensorboard.'))
        group.add_argument('--save',
                           action='store_true',
                           help=('save the model after training. Model files '
                                 'are saved under tb_dir as net.network_name/'
                                 'net.network_name.tflearn.*'))

        # training settings
        group = parser.add_argument_group(title="Training parameters")
        net_args.add_training_settings_args(group,
                                            num_epochs={
                                                'required': False,
                                                'default': 11,
                                                'short_alias': 'e'
                                            })

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
예제 #3
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description=('Create classification report in HTML format from '
                         'provided evaluation results in TSV format'))

        # input tsv
        parser.add_argument('infile',
                            nargs='?',
                            type=argparse.FileType('r'),
                            default=sys.stdin,
                            help=('name of input TSV to read from. If not '
                                  'provided, read from stdin.'))

        # output settings
        group = parser.add_argument_group(title="Output settings")
        group.add_argument('--tablesize',
                           type=atypes.int_range(1),
                           help=('Maximum number of table rows per html '
                                 'report file.'))
        group.add_argument('--logdir',
                           help=('Directory to store output logs. If a '
                                 'non-default directory is used, it must '
                                 'exist prior to calling this script.'))
        item_args = dargs.ItemTypeArgs(out_item_prefix='add')
        help = {
            k: 'add image placeholder for {}'.format(desc)
            for k, desc in item_args.item_descriptions.items()
        }
        item_args.add_item_type_args(group, dargs.arg_type.OUTPUT, help=help)

        # meta-information to include in report headers
        g_title = "Meta-information to include in report headers"
        group = parser.add_argument_group(title=g_title)
        nargs.add_network_arg(group, required=False)
        nargs.add_model_file_arg(group, required=False)
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.INPUT,
                                         required=False)

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
예제 #4
0
    def add_packet_arg(self, parser, short_alias=None, required=True):
        """
            Add argument for packet dimensions to the given parser

            Parameters
            ----------
            :param parser:  the argparse parser to add this argument to
            :type parser:   argparse.ArgumentParser
        """
        aliases = []
        if short_alias is not None:
            aliases.append('-{}'.format(short_alias))
        aliases.append('--{}'.format(self.long_alias))
        if self._no_EC:
            meta = self.metavar[:3]
            nargs = 3
        else:
            meta = self.metavar
            nargs = 5
        parser.add_argument(*aliases, metavar=meta, nargs=nargs,
                            type=atypes.int_range(1), required=required,
                            help=self.helpstr)
        return parser
                       default=None,
                       type=int,
                       help=('Index of the dataset item after the last '
                             'item to use for evaluation.'))

    group = parser.add_argument_group(title='Attribute settings')
    group.add_argument('--attribute',
                       required=True,
                       help='Metadata attribute whose value distribution '
                       'to check.')
    group.add_argument('--attribute_type',
                       choices=('str', 'float', 'int'),
                       default='str',
                       help='Type of data in the attribute, default: string')
    group.add_argument('--fp_precision',
                       type=atypes.int_range(0),
                       help='Number of decimal points to round floating-point '
                       'attributes to, default: 2')
    group.add_argument('--nullable',
                       default=False,
                       action='store_true',
                       help='If set, script will assume this attribute can '
                       'can be unset or set to None')

    args = parser.parse_args(sys.argv[1:])
    outfile = args.outfile
    attr = args.attribute
    name, srcdir = dset_args.get_dataset_double(args, dargs.arg_type.INPUT)
    io_handler = io_utils.DatasetFsPersistencyHandler(load_dir=srcdir)
    dset = io_handler.load_dataset(name)
    items_slice = slice(args.start_item, args.stop_item)
예제 #6
0
import cmdint.common.argparse_types as atypes

DEFAULT_ARGPARSE_VALUES = {
    'short_alias': None,
    'type': str,
    'default': None,
    'action': 'store',
    'required': False,
}

TRAIN_SETTINGS_ARGS = {
    'num_epochs': {
        'type': atypes.int_range(1),
        'required': True,
        'help': 'Number of training epochs',
    },
    'optimizer': {
        'help': 'Gradient descent optimizer to use',
    },
    'learning_rate': {
        'type': float,
        'help': 'Learning rate to use',
    },
    'loss_fn': {
        'help': 'Loss function to use',
    },
    'batch_size': {
        'type': atypes.int_range(1),
        'help': 'Batch size for training data',
    },
    'validation_batch_size': {
    def __init__(self):
        parser = argparse.ArgumentParser(
            description="Create dataset from multiple files with packets",
            formatter_class=argparse.RawTextHelpFormatter)

        # global settings
        parser.add_argument('--log_level',
                            default='INFO',
                            choices=logging._nameToLevel.keys(),
                            help='global logging output level (default: '
                            '%(default)s))')
        parser.add_argument('--max_cache_size',
                            default=40,
                            type=atypes.int_range(1),
                            help=('maximum size of parsed files cache'))
        parser.add_argument('--num_evicted',
                            default=10,
                            type=atypes.int_range(1),
                            help=('number of cache entires to evict when the '
                                  'cache gets full'))

        # input settings
        group = parser.add_argument_group(title='Input settings')
        packet_args = cargs.PacketArgs()
        packet_args.add_packet_arg(group)
        group.add_argument('-f',
                           '--filelist',
                           required=True,
                           help=('input files list in TSV format'))

        # output (dataset) settings
        group = parser.add_argument_group(title='Output settings')
        out_aliases = {'dataset name': 'name', 'dataset directory': 'outdir'}
        dset_args = dargs.DatasetArgs(output_aliases=out_aliases)
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.OUTPUT,
                                         dir_short_alias='d',
                                         dir_default='.',
                                         name_short_alias='n')

        # output (dataset) data item settings
        group = parser.add_argument_group(title='Data item settings')
        item_args = dargs.ItemTypeArgs()
        item_args.add_item_type_args(group, dargs.arg_type.OUTPUT)
        group.add_argument('--dtype',
                           default='float32',
                           help='cast extracted items to the given numpy data '
                           'type (default: %(default)s))')

        # output (dataset) target settings
        group = parser.add_argument_group(title='Item target settings')
        group.add_argument('--target',
                           required=True,
                           nargs=2,
                           metavar=('METHOD', 'ARGS'),
                           help=_TARGET_ARG_HELP)

        # output (dataset) metadata settings
        group = parser.add_argument_group(title='Metadata settings')
        group.add_argument('--extra_metafields',
                           nargs='+',
                           default=[],
                           metavar='FIELD',
                           help=('additional fields in the event list to '
                                 'include in dataset metadata'))

        subparsers = parser.add_subparsers(
            dest="converter", help='Packet to item conversion methods')

        def_m = subparsers.add_parser("default",
                                      help=("Convert events to dataset items "
                                            "using default transformer"))
        def_m.add_argument('--gtu_range',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('START_GTU', 'STOP_GTU'),
                           required=True,
                           help=('range of GTUs to use'))
        def_m.add_argument('--packet_idx',
                           type=atypes.int_range(0),
                           required=True,
                           help=('index of packet to use'))

        apack = subparsers.add_parser("allpack",
                                      help=("Convert events to dataset items "
                                            "using all_packets transformer"))
        apack.add_argument('--gtu_range',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('START_GTU', 'STOP_GTU'),
                           required=True,
                           help=('range of GTUs containing shower.'))

        gpack = subparsers.add_parser("gtupack",
                                      help=("Convert events to dataset items "
                                            "using gtu_in_packet transformer"))
        gpack.add_argument('--num_gtu_around',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('NUM_BEFORE', 'NUM_AFTER'),
                           help=('number of GTU/frames before and after '
                                 'gtu_in_packet to include in dataset items'))
        gpack.add_argument('--no_bounds_adjust',
                           action='store_true',
                           help=('do not shift the frames window if part of '
                                 'it is out of packet bounds. An exception '
                                 'will be raised instead '))

        self.parser = parser
        self.packet_args = packet_args
        self.dset_args = dset_args
        self.item_args = item_args
예제 #8
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description='Evaluate model classification sensitivity for given '
            'target as function of a specific data attribute.')

        # input tsv
        parser.add_argument('infiles',
                            nargs='+',
                            metavar='INFILE',
                            help='Evaluation results in TSV format. Multiple '
                            'files can be specified, results from each '
                            'drawn as separate plot line')

        # input settings
        group = parser.add_argument_group(title="Evaluation settings")
        group.add_argument('--class_target',
                           required=True,
                           choices=cons.CLASSIFICATION_TARGETS,
                           help='Classification target for which to evaluate '
                           'sensitivity metrics.')
        group.add_argument('--column',
                           required=True,
                           help='Name of attribute from TSV file to use for '
                           'evaluating classification sensitivity.')
        group.add_argument('--column_type',
                           default='str',
                           choices=cutils.SUPOORTED_CAST_TYPES,
                           help='Type of data in the column, default: string.')
        group.add_argument('--add_yerr',
                           action='store_true',
                           help='Evaluate sensitivity error as well.')

        # output settings
        group = parser.add_argument_group(title="Output settings")
        group.add_argument('--outfile',
                           required=True,
                           help='Output filename (minus extension).')

        # plot settings
        group = parser.add_argument_group(title="Plot settings")
        group.add_argument('--xscale',
                           choices=('linear', 'log'),
                           default='linear',
                           help='Scale of x-axis in generated plot to use '
                           '(linear or logarithmic, default: linear).')
        group.add_argument('--xlabel',
                           help='Custom label for the plot X axis, defaults '
                           'to the "column" arg')
        group.add_argument('--ylabel',
                           help='Custom label for the plot Y axis, defaults '
                           'to "Sensitivity"')
        group.add_argument('--legend_loc',
                           type=atypes.float_range(0, 1),
                           nargs=2,
                           metavar=('X', 'Y'),
                           help='Location of plot legend relative to the '
                           'bottom-left of the plot.')
        group.add_argument('--plot_colors',
                           nargs='*',
                           metavar='COLOR',
                           help='Color of each line in the plot. Must be same '
                           'length as list of infiles if provided.')
        group.add_argument('--plot_labels',
                           nargs='*',
                           metavar='LABEL',
                           help='Add plot legend using passed in labels for '
                           'each plotline. Must be same length as list '
                           'of infiles. If not provided, do not add plot '
                           'legend.')

        # font size settings
        group = parser.add_argument_group(title="Font size settings")
        group.add_argument('--fontsize',
                           type=atypes.int_range(1),
                           help='Global default fontsize of plot elements.')
        group.add_argument('--legend_fontsize',
                           type=atypes.int_range(1),
                           help='Font size of plot legend.')
        group.add_argument('--label_fontsize',
                           type=atypes.int_range(1),
                           help='Font size of plot axes labels.')
        group.add_argument('--ticks_fontsize',
                           type=atypes.int_range(1),
                           nargs=2,
                           metavar=('MAJOR', 'MINOR'),
                           help='Font size of plot axes tick markers.')
        self.parser = parser
예제 #9
0
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            description="Create simulated air shower data as numpy arrays")
        out_aliases = {'dataset name': 'name', 'dataset directory': 'outdir'}
        self.packet_args = cargs.PacketArgs()
        self.dset_args = dargs.DatasetArgs(output_aliases=out_aliases)
        self.item_args = dargs.ItemTypeArgs()

        ds_group = self.parser.add_argument_group('dataset configuration')
        # packet dimensions
        self.packet_args.add_packet_arg(ds_group, required=True)
        # output dataset
        atype = dargs.arg_type.OUTPUT
        self.dset_args.add_dataset_arg_double(ds_group, atype)
        self.item_args.add_item_type_args(ds_group, atype)
        ds_group.add_argument('--num_data',
                              required=True,
                              type=atypes.int_range(1),
                              help=('Number of data items (both noise and '
                                    'shower)'))
        ds_group.add_argument('--dtype',
                              default='uint8',
                              help=('Data type of dataset items (default: '
                                    'uint8)'))

        shower_group = self.parser.add_argument_group('shower properties')
        # arguments qualifying shower property ranges
        args = [
            'shower_max', 'duration', 'track_length', 'start_gtu', 'start_y',
            'start_x'
        ]
        reqs = [True, True, True, False, False, False]
        descs = [
            'Peak relative diff. between shower track and bg pixel values',
            'Number of GTU or frames containing shower track pixels',
            'Length of shower tracks as viewed in the yx projection',
            'First GTU or packet frame containing shower pixels',
            'Start_gtu frame X coordinate from which the shower tracks begin',
            'Start_gtu frame Y coordinate from which the shower tracks begin'
        ]
        types = ([atypes.int_range(1)] * 3) + ([atypes.int_range(0)] * 3)
        for idx in range(len(args)):
            arg = args[idx]
            cargs.add_number_range_arg(shower_group,
                                       arg,
                                       arg_desc=descs[idx],
                                       required=reqs[idx],
                                       arg_type=types[idx])

        bg_group = self.parser.add_argument_group('background properties')
        # additional arguments applying to packet background
        cargs.add_number_range_arg(bg_group,
                                   'bg_lambda',
                                   required=True,
                                   arg_type=atypes.float_range(0),
                                   arg_desc=('Bg pixel values average '
                                             '(Poisson distribution lambda)'))
        cargs.add_number_range_arg(bg_group,
                                   'bad_ECs',
                                   default=(0, 0),
                                   arg_type=atypes.int_range(-1),
                                   arg_desc='Bad ECs count per data item')