Esempio n. 1
0
 def parse_yaml_file(self, file):
     # yaml_parser
     if not 'yaml' in self.get_symbolic_data()['parse']:
         self.get_symbolic_data(
         )['parse']['yaml'] = yaml_parser.get_all_guid_files(
             parser_utils.get_project_path('', file),
             parser_utils.parse_project)
     self.get_symbolic_data()['parse'] = yaml_parser.parse_yaml(
         file,
         self.get_symbolic_data()['parse'])
Esempio n. 2
0
def parse():
    # store as much as you can in SmallfileWorkload object
    # so per-thread invocations inherit

    test_params = smf_test_params.smf_test_params()
    inv = test_params.master_invoke  # for convenience

    parser = argparse.ArgumentParser(
            description='parse smallfile CLI parameters')
    add = parser.add_argument
    add('--yaml-input-file',
            help='input YAML file containing all parameters below')
    add('--output-json',
            default=test_params.output_json,
            help='if true then output JSON-format version of results')
    add('--response-times',
            type=boolean, default=inv.measure_rsptimes,
            help='if true then record response time of each file op')
    add('--network-sync-dir',
            help='if --top not shared filesystem, provide shared filesystem directory')
    add('--operation',
            default='cleanup', choices=SmallfileWorkload.all_op_names,
            help='type of operation to perform on each file')
    add('--top',
            type=directory_list, default=inv.top_dirs,
            help='top directory or directories used by smallfile')
    add('--host-set',
            type=host_set, default=test_params.host_set,
            help='list of workload generator hosts (or file containing it) ')
    add('--launch-by-daemon',
            type=boolean, default=test_params.launch_by_daemon,
            help='use non-ssh launcher to get test running')
    add('--files',
            type=positive_integer, default=inv.iterations, 
            help='files processed per thread')
    add('--threads',
            type=positive_integer, default=test_params.thread_count, 
            help='threads per client')
    add('--files-per-dir',
            type=positive_integer, default=inv.files_per_dir, 
            help='files per (sub)directory')
    add('--dirs-per-dir',
            type=positive_integer, default=inv.dirs_per_dir, 
            help='subdirectories per directory')
    add('--record-size',
            type=positive_integer, default=inv.record_sz_kb, 
            help='record size (KB)')
    add('--file-size',
            type=non_negative_integer, default=inv.total_sz_kb, 
            help='subdirectories per directory')
    add('--file-size-distribution',
            type=file_size_distrib, default=inv.filesize_distr,
            help='file size can be constant ("fixed") or random ("exponential")')
    add('--fsync',
            type=boolean, default=inv.fsync,
            help='call fsync() after each file is written/modified')
    add('--xattr-size',
            type=non_negative_integer, default=inv.xattr_size, 
            help='extended attribute size (bytes)')
    add('--xattr-count',
            type=non_negative_integer, default=inv.xattr_count, 
            help='number of extended attributes per file')
    add('--pause',
            type=non_negative_integer, default=inv.pause_between_files,
            help='pause between each file (microsec)')
    add('--stonewall',
            type=boolean, default=inv.stonewall,
            help='stop measuring as soon as first thread is done')
    add('--finish',
            type=boolean, default=inv.finish_all_rq,
            help='stop processing files as soon as first thread is done')
    add('--prefix',
            default=inv.prefix,
            help='filename prefix')
    add('--suffix',
            default=inv.suffix,
            help='filename suffix')
    add('--hash-into-dirs',
            type=boolean, default=inv.hash_to_dir,
            help='if true then pseudo-randomly place files into directories')
    add('--same-dir',
            type=boolean, default=inv.is_shared_dir,
            help='if true then all threads share the same directories')
    add('--verbose',
            type=boolean, default=inv.verbose,
            help='if true then log extra messages about test')
    add('--permute-host-dirs',
            type=boolean, default=test_params.permute_host_dirs,
            help='if true then shift clients to different host directories')
    add('--record-ctime-size',
            type=boolean, default=inv.record_ctime_size,
            help='if true then update file xattr with ctime+size')
    add('--verify-read',
            type=boolean, default=inv.verify_read,
            help='if true then check that data read = data written')
    add('--incompressible',
            type=boolean, default=inv.incompressible,
            help='if true then non-compressible data written')

    # these parameters shouldn't be used by mere mortals

    add('--min-dirs-per-sec',
            type=positive_integer, default=test_params.min_directories_per_sec,
            help=argparse.SUPPRESS)
    add('--log-to-stderr', type=boolean, default=inv.log_to_stderr, 
            help=argparse.SUPPRESS)
    add('--remote-pgm-dir', default=test_params.remote_pgm_dir, 
            help=argparse.SUPPRESS)
    add('--slave',
            help=argparse.SUPPRESS)
    add('--as-host',
            help=argparse.SUPPRESS)

    args = parser.parse_args()

    inv.opname = args.operation
    test_params.top_dirs = [ os.path.abspath(p) for p in args.top ]
    test_params.launch_by_daemon = args.launch_by_daemon
    inv.iterations = args.files
    test_params.thread_count = args.threads
    inv.files_per_dir = args.files_per_dir
    inv.dirs_per_dir = args.dirs_per_dir
    inv.record_sz_kb = args.record_size
    inv.total_sz_kb = args.file_size
    test_params.size_distribution = inv.filesize_distr = args.file_size_distribution
    inv.xattr_size = args.xattr_size
    inv.xattr_count = args.xattr_count
    inv.prefix = args.prefix
    inv.suffix = args.suffix
    inv.hash_to_dir = args.hash_into_dirs
    inv.pause_between_files = args.pause
    inv.stonewall = args.stonewall
    inv.finish_all_rq = args.finish
    inv.measure_rsptimes = args.response_times
    inv.fsync = args.fsync
    inv.record_ctime_size = args.record_ctime_size
    test_params.permute_host_dirs = args.permute_host_dirs
    test_params.output_json = args.output_json
    inv.incompressible = args.incompressible
    inv.verify_read = args.verify_read
    test_params.min_directories_per_sec = args.min_dirs_per_sec
    inv.is_shared_dir = args.same_dir
    inv.verbose = args.verbose
    inv.log_to_stderr = args.log_to_stderr
    test_params.remote_pgm_dir = args.remote_pgm_dir
    test_params.network_sync_dir = args.network_sync_dir
    test_params.is_slave = args.slave
    inv.onhost = smallfile.get_hostname(args.as_host)
    test_params.host_set = args.host_set

    # if YAML input was used, update test_params object with this
    # YAML parameters override CLI parameters

    if args.yaml_input_file:
        if not yaml_parser_installed:
            raise SmfParseException('python yaml module not available - is this PyPy?')
        yaml_parser.parse_yaml(test_params, args.yaml_input_file)
    if not test_params.network_sync_dir:
        test_params.network_sync_dir = os.path.join(test_params.top_dirs[0], 'network_shared')

    # validate parameters further now that we know what they all are

    sdmsg = 'directory %s containing network sync dir. must exist on all hosts (including this one)'
    parentdir = os.path.dirname(test_params.network_sync_dir)
    if not os.path.isdir(parentdir) and args.host_set != None:
        raise SmfParseException(sdmsg % parentdir)

    if inv.record_sz_kb > inv.total_sz_kb and inv.total_sz_kb != 0:
        raise SmfParseException('record size cannot exceed file size')

    if inv.record_sz_kb == 0 and inv.verbose:
        print(('record size not specified, ' +
               'large files will default to record size %d KB') %
               (SmallfileWorkload.biggest_buf_size / inv.BYTES_PER_KB))

    if test_params.top_dirs:
        for d in test_params.top_dirs:
            if len(d) < 6:
                raise SmfParseException(
                        'directory less than 6 characters, ' +
                        'cannot use top of filesystem, too dangerous')
            if not os.path.isdir(d) and test_params.network_sync_dir != None:
                raise SmfParseException(
                        'you must ensure that shared directory ' + d + 
                        ' is accessible ' +
                        'from this host and every remote host in test')
    if test_params.top_dirs:
        inv.set_top(test_params.top_dirs)
    else:
        test_params.top_dirs = inv.top_dirs

    if test_params.network_sync_dir:
        inv.network_dir = test_params.network_sync_dir
    else:
        test_params.network_sync_dir = inv.network_dir
    inv.starting_gate = os.path.join(inv.network_dir, 'starting_gate.tmp')

    if inv.iterations < 10:
        inv.stonewall = False

    if not test_params.is_slave:
        prm_list = test_params.human_readable()
        for (prm_name, prm_value) in prm_list:
            print('%40s : %s' % (prm_name, prm_value))

    test_params.recalculate_timeouts()
    return test_params
Esempio n. 3
0
        return np.max(np.argwhere(labels.detach().numpy() == 3)[:, 1])
    elif unroll_steps_type == 'batch_number':
        return int(2 + np.ceil(epoch / 2))
    else:
        raise ConfigurationError('Unknown unroll_steps_type.')


if __name__ == '__main__':
    argv = sys.argv[1:]

    if len(argv) > 0:
        model_name = argv[0]
    else:
        model_name = f'paper-reference-soft_att'

    params = parse_yaml(model_name, 'param')
    print(f'run {model_name} on  {torch.cuda.get_device_name()}')

    batch_size = params['batch_size']
    unroll_steps_type = params.get('unroll_steps_type', 'full_length')  # batch_length, batch_number

    grad_clip = params.get('grad_clip', None)

    normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor(), normalize])

    if params.get('image_augmentation', False):
        transform_aug = T.Compose([T.Resize(256), T.RandomAffine(degrees=45, translate=(0.3, 0.3), scale=(0.9, 1.2), shear=10), T.RandomPerspective(), T.RandomHorizontalFlip(), T.CenterCrop(224), T.ToTensor(), normalize])
        data_train = Flickr8k('data/Flicker8k_Dataset', 'data/Flickr_8k.trainImages.txt', 'data/Flickr8k.token.txt', transform=transform_aug, max_vocab_size=params['max_vocab_size'], all_lower=params['all_lower'])
    else:
        data_train = Flickr8k('data/Flicker8k_Dataset', 'data/Flickr_8k.trainImages.txt', 'data/Flickr8k.token.txt', transform=transform, max_vocab_size=params['max_vocab_size'], all_lower=params['all_lower'])