model.save() if args.verbosity >= 1: sys.stdout.write(' (model saved)') sys.stdout.flush() time_since_save = timedelta() elif args.verbosity >= 1: # sys.stdout.write(' ') sys.stdout.flush() before = datetime.now() train = {name: 0.0 for name in train} n = 0 else: for iteration in range(iteration_start, iteration_end + 1): generated = dataset.generate(n=args.batch_size, mode='train', noise_range=args.pixel_noise) model(data=generated, optimize=True, dropout=dropout) if iteration % args.evaluation_frequency == 0 or iteration == 1 or iteration == args.evaluation_frequency // 2 or iteration == iteration_end: generated = dataset.generate(n=args.evaluation_size, mode='train', noise_range=args.pixel_noise) train = model(query=query, data=generated) generated = dataset.generate(n=args.evaluation_size, mode='validation', noise_range=args.pixel_noise) validation = model(query=query, data=generated) after = datetime.now() if args.report_file: with open(args.report_file, 'a') as filehandle: filehandle.write(str(iteration))
time=datetime.now().strftime('%H:%M:%S'), dtype=dataset.type, name=dataset.name, mode=(' ' + mode if mode else ''))) sys.stdout.write(' 0% 0/{parts} (time per part: n/a)'.format( parts=num_parts)) sys.stdout.flush() for part in range(1, num_parts + 1): before = datetime.now() if args.unmanaged and len(parts) == 1 and parts[0] == 1: path = directory else: path = os.path.join(directory, 'part{}'.format(start + part)) generated = dataset.generate(n=args.instances, mode=mode, noise_range=args.pixel_noise, include_model=args.include_model, alternatives=True) if generated is None: assert False elif tf_records_flag: tf_util.write_records(dataset=dataset, records=generated, path=path) else: dataset.serialize(path=path, generated=generated, archive=args.archive, concat_worlds=args.concatenate_images, html=args.html) # if args.html and dataset.type == 'agreement':
from shapeworld import dataset dataset = dataset(dtype='agreement', name='oneshape_simple_textselect', config='load(../data/oneshape_simple_textselect)') generated = dataset.generate(n=250, mode='train') k = ['caption_str', 'texts_str', 'pred_items'] for l in generated: print(l, type(generated[l])) if l == 'target': print(generated[l].shape) for i in range(10): print( f'Prediction items: {generated[k[2]][i]}, caption: {generated[k[0]][i]}, texts: {generated[k[1]][i]}' )
from shapeworld import dataset import pprint dataset = dataset( dtype='agreement', name='oneshape_simple_textselect', ) generated = dataset.generate(n=30, mode='train', noise_range=0.1, include_model=True)
report_file_dir = os.path.dirname(args.report_file) if not os.path.isdir(report_file_dir): os.makedirs(report_file_dir) with open(args.report_file, 'w') as filehandle: filehandle.write( 'iteration,train loss,train accuracy,validation loss,validation accuracy\n' ) iteration_end = iteration_start + args.iterations - 1 if args.evaluate: # evaluation sys.stdout.write('{} evaluate model...\n'.format( datetime.now().strftime('%H:%M:%S'))) sys.stdout.write(' ') sys.stdout.flush() generated = dataset.generate(n=args.evaluation_size, mode='train') feed_dict = { placeholder: generated[value] for value, placeholder in feed_dict_assignment.items() } feed_dict[dropout] = 0.0 training_accuracy = session.run(fetches=accuracy, feed_dict=feed_dict) sys.stdout.write('training={:.3f}'.format(training_accuracy)) generated = dataset.generate(n=args.evaluation_size, mode='validation') feed_dict = { placeholder: generated[value] for value, placeholder in feed_dict_assignment.items() } feed_dict[dropout] = 0.0
elif args.directory_unmanaged: assert not args.append assert not args.parts or len(args.parts) == 1 modes = (args.mode,) directories = (args.directory_unmanaged,) parts = args.parts or (1,) start_part = (0,) else: assert False if len(parts) == 1 and parts[0] == 1: sys.stdout.write('{} generate {} {}{} data...\n'.format(datetime.now().strftime('%H:%M:%S'), dataset.type, dataset.name, ' ' + modes[0] if modes[0] else '')) sys.stdout.flush() generated = dataset.generate(n=args.instances, mode=modes[0], noise=(not args.no_pixel_noise), include_model=args.include_model) dataset.serialize(directory=directories[0], generated=generated, archive=args.archive) sys.stdout.write('{} data generation completed!\n'.format(datetime.now().strftime('%H:%M:%S'))) sys.stdout.flush() else: for mode, directory, num_parts, start in zip(modes, directories, parts, start_part): if args.captioner_statistics: filehandle = open(os.path.join(directory, 'captioner_statistics.csv'), 'a' if args.append else 'w') dataset.collect_captioner_statistics(filehandle=filehandle, append=args.append) sys.stdout.write('{} generate {}{} data...\n'.format(datetime.now().strftime('%H:%M:%S'), dataset, ' ' + mode if mode else '')) sys.stdout.write(' 0% 0/{} (time per part: n/a)'.format(num_parts)) sys.stdout.flush() for part in range(num_parts): before = datetime.now() generated = dataset.generate(n=args.instances, mode=mode, noise=(not args.no_pixel_noise), include_model=args.include_model) dataset.serialize(directory=directory, generated=generated, name='part{}'.format(start + part), archive=args.archive)