algorithms=dict()) for pipeline in pipelines: print('[{:.2f}%] algorithm={} doc={} ::'.format( 100 * processed / total, pipeline.algorithm.id(), doc), end='') processed += 1 d = 2 if pipeline.algorithm.id() == 'marques' else 0 pipeline.run(strips, d) results[doc]['algorithms'][pipeline.algorithm.id()] = dict() results[doc]['algorithms'][ pipeline.algorithm.id()]['time'] = pipeline.t_algorithm results[doc]['algorithms'][pipeline.algorithm.id( )]['compatibilities'] = pipeline.algorithm.compatibilities.tolist() results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'] = dict() acc_str = '' for solver, t_solver in zip(pipeline.solvers, pipeline.t_solvers): results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][ solver.id()] = dict() results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][ solver.id()]['solution'] = solver.solution results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][ solver.id()]['time'] = t_solver acc_str += ' {}={:.2f}%'.format( solver.id(), 100 * accuracy(solver.solution, init_permutation)) print(acc_str) os.makedirs('results', exist_ok=True) json.dump(results, open('results/default.json', 'w')) print('Elapsed time={:.2f} sec.'.format(time.time() - t0_glob))
solver = SolverLS(maximize=True) pipeline = Pipeline(algorithm, [solver]) # load strips and shuffle the strips print('1) Load strips') strips = Strips(path=args.doc, filter_blanks=True) strips.shuffle() init_permutation = strips.permutation() print('Shuffled order: ' + str(init_permutation)) print('2) Results') pipeline.run(strips) # matrix -> list (displacements according the neighobors strips in solution) compatibilities = pipeline.algorithm.compatibilities displacements = pipeline.algorithm.displacements solution = pipeline.solvers[0].solution displacements = [ displacements[prev][curr] for prev, curr in zip(solution[:-1], solution[1:]) ] corrected = [init_permutation[idx] for idx in solution] print('Solution: ' + str(solution)) print('Correct order: ' + str(corrected)) print('Accuracy={:.2f}%'.format(100 * accuracy(solution, init_permutation))) print('Qc={:.2f}%'.format( 100 * Qc(compatibilities, init_permutation, pre_process=True, normalized=True))) reconstruction = strips.image(order=solution, displacements=displacements) plt.imshow(reconstruction, cmap='gray') plt.axis('off') plt.show()
processed = 1 total = len(docs) * len(algorithms) records = [] for algorithm in algorithms: d = 2 if algorithm.id() == 'marques' else 0 for doc, strips in strips_all.items(): print('[{:.2f}%] algorithm={} doc={}'.format(100 * processed / total, algorithm.id(), doc)) processed += 1 init_permutation = strips.permutation() compatibilities = algorithm(strips=strips, d=d).compatibilities qc = Qc(compatibilities, init_permutation, pre_process=False, normalized=True) for solver in solvers: solution = solver(instance=compatibilities).solution if solution is not None: # some solutions for the tested algorithm are None due to the poor compat. eval. acc = accuracy(solution, init_permutation) print(' => {} - acc={:.2f}% qc={:.2f}%'.format( solver.id(), 100 * acc, 100 * qc)) records.append([ algorithm.id(), solver.id(), doc, acc, qc, init_permutation, solution, compatibilities.tolist() ]) os.makedirs('results', exist_ok=True) json.dump(records, open('results/fair.json', 'w')) print('Elapsed time={:.2f} sec.'.format(time.time() - t0_glob))
def reconstruct(): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # parameters processing parser = argparse.ArgumentParser(description='Single reconstruction :: Proposed.') parser.add_argument( '-d', '--dataset', action='store', dest='dataset', required=False, type=str, default='cdip', help='Dataset [D1, D2, or cdip].' ) parser.add_argument( '-t', '--thresh', action='store', dest='thresh', required=False, type=str, default='sauvola', help='Thresholding method [otsu or sauvola].' ) parser.add_argument( '-m', '--model-id', action='store', dest='model_id', required=False, type=str, default=None, help='Model identifier (tag).' ) parser.add_argument( '-i', '--input-size', action='store', dest='input_size', required=False, nargs=2, type=int, default=[3000, 32], help='Network input size (H x W).' ) parser.add_argument( '-v', '--vshift', action='store', dest='vshift', required=False, type=int, default=10, help='Vertical shift range.' ) parser.add_argument( '-r', '--results-id', action='store', dest='results_id', required=False, type=str, default=None, help='Identifier of the results file.' ) parser.add_argument( '-fd', '--feat-dim', action='store', dest='feat_dim', required=False, type=int, default=64, help='Features dimensionality.' ) parser.add_argument( '-fl', '--feat-layer', action='store', dest='feat_layer', required=False, type=str, default='drop9', help='Features layer.' ) parser.add_argument( '-a', '--activation', action='store', dest='activation', required=False, type=str, default='sigmoid', help='Activation function (final net layer).' ) args = parser.parse_args() input_size = tuple(args.input_size) assert args.dataset in ['D1', 'D2', 'cdip'] assert args.thresh in ['otsu', 'sauvola'] assert args.results_id is not None assert args.vshift >= 0 assert input_size in [(3000, 32), (3000, 48), (3000, 64)] # algorithm definition weights_path_left = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['best_model_left'] weights_path_right = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['best_model_right'] sample_height = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['sample_height'] algorithm = Proposed( weights_path_left, weights_path_right, args.vshift, args.input_size, feat_dim=args.feat_dim, feat_layer=args.feat_layer, activation=args.activation, sample_height=sample_height, thresh_method=args.thresh, sess=sess ) solver = SolverConcorde(maximize=False, max_precision=2) pipeline = Pipeline(algorithm, solver) # reconstruction instances if args.dataset == 'D1': docs = ['datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62) if i != 3] elif args.dataset == 'D2': docs = ['datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21)] else: docs = ['datasets/D3/mechanical/D{:03}'.format(i) for i in range(1, 101)] # cdip # results / initial configuration dir_name = 'results/proposed' os.makedirs(dir_name, exist_ok=True) results = { 'model_id': args.model_id, 'data': [] # experimental results data } results_fname = '{}/{}.json'.format(dir_name, args.results_id) total = len(docs) for it, doc in enumerate(docs, 1): # load strips t0 = time.time() strips = Strips(path=doc, filter_blanks=True) strips.shuffle() init_permutation = strips.permutation() load_time = time.time() - t0 # run the pipeline solution, _, displacements = pipeline.run(strips) # results acc = accuracy(solution, init_permutation) print('[{:.2f}%] doc={}, accuracy={:.2f}% inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s'.format( 100 * it / total, doc.split('/')[-1], 100 * acc, algorithm.inference_time, pipeline.comp_time, pipeline.opt_time )) sys.stdout.flush() results['data'].append({ 'doc': doc, 'solution': solution, 'accuracy': acc, 'init_perm': init_permutation, 'load_time': load_time, 'comp_time': pipeline.comp_time, 'opt_time': pipeline.opt_time, 'displacements': displacements.tolist(), 'inf_time': algorithm.inference_time, 'prep_time': algorithm.preparation_time, 'pw_time': algorithm.pairwise_time }) sess.close() # dump results json.dump(results, open(results_fname, 'w'))
def reconstruct(): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # parameters processing parser = argparse.ArgumentParser( description='Single reconstruction :: Sib18.') parser.add_argument('-d', '--dataset', action='store', dest='dataset', required=False, type=str, default='cdip', help='Dataset [D1, D2, or cdip].') parser.add_argument('-r', '--results-id', action='store', dest='results_id', required=False, type=str, default=None, help='Identifier of the results file.') parser.add_argument('-v', '--vshift', action='store', dest='vshift', required=False, type=int, default=10, help='Vertical shift range.') args = parser.parse_args() assert args.dataset in ['D1', 'D2', 'cdip'] assert args.results_id is not None # algorithm definition weights_path = json.load(open('traindata/sib18/info.json', 'r'))['best_model'] algorithm = Sib18('sn', weights_path, args.vshift, (3000, 32), num_classes=2, thresh_method='sauvola', seed=SEED, sess=sess) solver = SolverConcorde(maximize=True, max_precision=2) pipeline = Pipeline(algorithm, solver) # reconstruction instances docs1 = [ 'datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62) if i != 3 ] docs2 = ['datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21)] docs3 = ['datasets/D3/mechanical/D{:03}'.format(i) for i in range(1, 101)] # cdip if args.dataset == 'D1+D2': docs = docs1 + docs2 elif args.dataset == 'D1': docs = docs1 elif args.dataset == 'D2': docs = docs2 else: docs = docs3 # results / initial configuration dir_name = 'results/sib18' os.makedirs(dir_name, exist_ok=True) results = { 'model_id': 'sib18', 'data': [] # experimental results data } results_fname = '{}/{}.json'.format(dir_name, args.results_id) total = len(docs) for it, doc in enumerate(docs, 1): # load strips # load strips t0 = time.time() strips = Strips(path=doc, filter_blanks=True) strips.shuffle() init_permutation = strips.permutation() load_time = time.time() - t0 # run the pipeline solution, _, displacements = pipeline.run(strips) # results acc = accuracy(solution, init_permutation) print( '[{:.2f}%] doc={}, accuracy={:.2f}% inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s' .format(100 * it / total, doc.split('/')[-1], 100 * acc, algorithm.inference_time, pipeline.comp_time, pipeline.opt_time)) sys.stdout.flush() results['data'].append({ 'doc': doc, 'solution': solution, 'accuracy': acc, 'init_perm': init_permutation, 'load_time': load_time, 'comp_time': pipeline.comp_time, 'opt_time': pipeline.opt_time, 'displacements': displacements.tolist(), 'inf_time': algorithm.inference_time, 'prep_time': algorithm.preparation_time, 'pw_time': algorithm.pairwise_time }) sess.close() # dump results json.dump(results, open(results_fname, 'w'))
def reconstruct(): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # parameters processing parser = argparse.ArgumentParser( description='Testing reconstruction of mixed documents.') parser.add_argument('-d', '--dataset', action='store', dest='dataset', required=False, type=str, default='cdip', help='Dataset [D1, D2, or cdip].') parser.add_argument('-r', '--results-id', action='store', dest='results_id', required=False, type=str, default=None, help='Identifier of the results file.') parser.add_argument('-v', '--vshift', action='store', dest='vshift', required=False, type=int, default=10, help='Vertical shift range.') args = parser.parse_args() assert args.dataset in ['D1', 'D2', 'D1+D2', 'cdip'] assert args.results_id is not None # algorithm definition weights_path = json.load(open('traindata/sib18/info.json', 'r'))['best_model'] algorithm = Sib18('sn', weights_path, args.vshift, (3000, 32), num_classes=2, thresh_method='sauvola', seed=SEED, sess=sess) solver = SolverConcorde(maximize=True, max_precision=2) pipeline = Pipeline(algorithm, solver) # reconstruction instances if args.dataset == 'D1': docs = [ 'datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62) if i != 3 ] elif args.dataset == 'D2': docs = [ 'datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21) ] else: docs = [ 'datasets/D3/mechanical/D{:03}'.format(i) for i in range(1, 101) ] # cdip # results dir_name = 'results/sib18_multi' os.makedirs(dir_name, exist_ok=True) results_fname = '{}/{}.json'.format(dir_name, args.results_id) compatibility_matrix_fname = '{}/{}.npy'.format(dir_name, args.results_id) results = { 'model_id': 'sib18', 'data': [] # experimental results data } # load strips print('loading strips') t0 = time.time() strips = Strips() # empty for doc in docs: # join documents strips strips += Strips(path=doc, filter_blanks=True) strips.shuffle() load_time = time.time() - t0 # run the pipeline print('running the pipeline') solution, compatibilities, displacements = pipeline.run(strips, verbose=True) # results acc = accuracy(solution, strips.permutation(), strips.sizes()) print( 'accuracy={:.2f}% load_time={:.2f}s inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s' .format(100 * acc, load_time, algorithm.inference_time, pipeline.comp_time, pipeline.opt_time)) sys.stdout.flush() results['data'].append({ 'solution': solution, 'accuracy': acc, 'init_perm': strips.permutation(), 'sizes': strips.sizes(), 'load_time': load_time, 'comp_time': pipeline.comp_time, 'opt_time': pipeline.opt_time, 'displacements': displacements.tolist(), 'inf_time': algorithm.inference_time, 'prep_time': algorithm.preparation_time, 'pw_time': algorithm.pairwise_time }) sess.close() # dump results and comp. matrix json.dump(results, open(results_fname, 'w')) np.save(compatibility_matrix_fname, compatibilities)