def write_file(group): if args.verbose: print "Processing '%s' group..." % group out = open(os.path.join(args.outputdir, '%s-5col.txt' % group), 'wt') if group == 'train': reals, attacks = db.get_train_data() elif group == 'devel': reals, attacks = db.get_devel_data() elif group == 'test': reals, attacks = db.get_test_data() else: raise RuntimeError, "group parameter has to be train, devel or test" total = len(reals) + len(attacks) counter = 0 for obj in reals: counter += 1 if args.verbose: print "Processing file %s [%d/%d]..." % (obj.make_path(), counter, total) arr = obj.load(args.inputdir, '.hdf5') arr = arr[~numpy.isnan(arr)] #remove NaN entries => invalid avg = numpy.mean(arr[:args.average]) # This is a tremendous disencapsulation, but can't do it better for now client_id = obj._File__f.client.id out.write('%d %d %d %s %.5e\n' % (client_id, client_id, client_id, obj.make_path(), avg)) for obj in attacks: counter += 1 if args.verbose: print "Processing file %s [%d/%d]..." % (obj.make_path(), counter, total) arr = obj.load(args.inputdir, '.hdf5') arr = arr[~numpy.isnan(arr)] #remove NaN entries => invalid avg = numpy.mean(arr[:args.average]) # This is a tremendous disencapsulation, but can't do it better for now client_id = obj._File__f.client.id out.write('%d %d attack %s %.5e\n' % (client_id, client_id, obj.make_path(), avg)) out.close()
def write_file(group): if args.verbose: print("Processing '%s' group..." % group) out = open(os.path.join(args.outputdir, '%s-5col.txt' % group), 'wt') if group == 'train': reals, attacks = db.get_train_data() elif group == 'devel': reals, attacks = db.get_devel_data() elif group == 'test': reals, attacks = db.get_test_data() else: raise RuntimeError("group parameter has to be train, devel or test") total = len(reals) + len(attacks) counter = 0 for obj in reals: counter += 1 if args.verbose: print("Processing file %s [%d/%d]..." % (obj.make_path(), counter, total)) arr = obj.load(args.inputdir, '.hdf5') arr = arr[~numpy.isnan(arr)] #remove NaN entries => invalid avg = numpy.mean(arr[beginScoresInterval:endScoresInterval]) # This is a tremendous disencapsulation, but can't do it better for now client_id = obj.get_client_id() out.write('%d %d %d %s %.5e\n' % (client_id, client_id, client_id, obj.make_path(), avg)) for obj in attacks: counter += 1 if args.verbose: print("Processing file %s [%d/%d]..." % (obj.make_path(), counter, total)) arr = obj.load(args.inputdir, '.hdf5') arr = arr[~numpy.isnan(arr)] #remove NaN entries => invalid avg = numpy.mean(arr[beginScoresInterval:endScoresInterval]) # This is a tremendous disencapsulation, but can't do it better for now client_id = obj.get_client_id() out.write('%d %d attack %s %.5e\n' % (client_id, client_id, obj.make_path(), avg)) out.close()
def main(): basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'scores') OUTPUTDIR = os.path.join(basedir, 'time-analysis') parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('inputdir', metavar='DIR', type=str, nargs='?', default=INPUTDIR, help='directory containing the scores to be analyzed (defaults to "%(default)s").') parser.add_argument('outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help='Base directory that will be used to save the results (defaults to "%(default)s").') parser.add_argument('-a', '--average', default=False, action='store_true', dest='average', help='if set, average thresholds instead of applying a score thresholding at every window interval') parser.add_argument('-m', '--min-hter', default=False, action='store_true', dest='minhter', help='if set, uses the min. HTER threshold instead of the EER threshold on the development set') parser.add_argument('-v', '--verbose', default=False, action='store_true', dest='verbose', help='increases the script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() if not os.path.exists(args.inputdir): parser.error("input directory does not exist") if not os.path.exists(args.outputdir): if args.verbose: print "Creating output directory `%s'..." % args.outputdir bob.db.utils.makedirs_safe(args.outputdir) db = args.cls(args) devel = dict(zip(('real', 'attack'), db.get_devel_data())) test = dict(zip(('real', 'attack'), db.get_test_data())) # make full paths devel['real'] = [k.make_path(args.inputdir, '.hdf5') for k in devel['real']] devel['attack'] = [k.make_path(args.inputdir, '.hdf5') for k in devel['attack']] test['real'] = [k.make_path(args.inputdir, '.hdf5') for k in test['real']] test['attack'] = [k.make_path(args.inputdir, '.hdf5') for k in test['attack']] # finds out window-size and overlap args.windowsize, args.overlap = get_parameters(devel['real'][0]) if args.verbose: print "Discovered parameters:" print " * window-size: %d" % args.windowsize print " * overlap : %d" % args.overlap # try a match with the next file, just to make sure windowsize2, overlap2 = get_parameters(devel['real'][1]) if args.windowsize != windowsize2 or args.overlap != overlap2: raise RuntimeError, "A possible misdetection of windowsize and overlap occurred between files '%s' and '%s'. The first detection showed a window-size/overlap of %d/%d while the second, %d/%d. You will have to edit this script and set these values by hand" % (devel['real'][0], devel['real'][1], args.windowsize, args.overlap, windowsize2, overlap2) # quickly load the development set and establish the threshold: thres = ml.time.eval_threshold(devel['real'], devel['attack'], args.minhter, args.verbose) analyzer = ml.time.Analyzer(test['real'], test['attack'], thres, args.windowsize, args.overlap, args.average, args.verbose) outfile = os.path.join(args.outputdir, 'time-analysis-table.rst') title = 'Time Analysis, Window *%d*, Overlap *%d*' % (args.windowsize, args.overlap) write_table(title, analyzer, open(outfile, 'wt'), args) if args.verbose: write_table(title, analyzer, sys.stdout, args) outfile = os.path.join(args.outputdir, 'time-analysis-misclassified-at-220.txt') analyzer.write_misclassified(open(outfile, 'wt'), 219) #Canonical limit outpdf = os.path.join(args.outputdir, 'time-analysis.pdf') analyzer.plot(outpdf, title)
def main(): """Main method""" basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'quantities') OUTPUTDIR = os.path.join(basedir, 'mlp') parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('inputdir', metavar='DIR', type=str, default=INPUTDIR, nargs='?', help='Base directory containing the 5-quantities to be loaded. Final MLP input will be generated from the input directory and concatenated column-wise to form the final training matrix (defaults to "%(default)s").') parser.add_argument('outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help='Base directory that will be used to save the results. The given value will be interpolated with time.strftime and then, os.environ (in this order), so you can include %%()s strings (e.g. %%(SGE_TASK_ID)s) to make up the final output directory path (defaults to "%(default)s").') parser.add_argument('-b', '--batch-size', metavar='INT', type=int, dest='batch', default=200, help='The number of samples per training iteration. Good values are greater than 100. Defaults to %(default)s') parser.add_argument('-e', '--epoch', metavar='INT', type=int, dest='epoch', default=1, help='This is the number of training steps that need to be executed before we attempt to measure the error on the development set. Defaults to %(default)s') parser.add_argument('-n', '--hidden-neurons', metavar='INT', type=int, dest='nhidden', default=5, help='The number hidden neurons in the (single) hidden layer of the MLP. Defaults to %(default)s') parser.add_argument('-m', '--maximum-iterations', metavar='INT', type=int, dest='maxiter', default=0, help='The maximum number of iterations to train for. A value of zero trains until a valley is detected on the development set. Defaults to %(default)s') parser.add_argument('-i', '--no-improvements', metavar='INT', type=int, dest='noimprov', default=0, help='The maximum number of iterations to wait for in case no improvements happen in the development set average RMSE. If that number of iterations is reached, the training is stopped. Values in the order of 10-20%% of the maximum number of iterations should be a reasonable default. If set to zero, do not consider this stop criteria. Defaults to %(default)s') parser.add_argument('-f', '--overwrite', action='store_true', dest='overwrite', default=False, help='If set and the destination directory exists, overwrite the results contained there') parser.add_argument('-V', '--verbose', action='store_true', dest='verbose', default=False, help='Increases this script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() start_time = time.time() paramfile = ConfigParser.SafeConfigParser() paramfile.add_section('time') paramfile.set('time', 'start', time.asctime()) if args.verbose: print "Start time is", time.asctime() if not os.path.exists(args.inputdir): parser.error("input directory `%s' does not exist" % args.inputdir) use_outputdir = time.strftime(args.outputdir) #interpolate time use_outputdir = use_outputdir % os.environ #interpolate environment if os.path.exists(use_outputdir): if not args.overwrite: parser.error("output directory '%s' exists and the overwrite flag was not set" % use_outputdir) else: bob.db.utils.makedirs_safe(use_outputdir) if args.verbose: print "Output directory set to \"%s\"" % use_outputdir use_inputdir = [] abspath = os.path.abspath(args.inputdir) use_inputdir.append(abspath) if args.verbose: print "Loading non-NaN entries from input files at '%s' database..." % args.name db = args.cls(args) data = { 'train': dict(zip(('real', 'attack'), db.get_train_data())), 'devel': dict(zip(('real', 'attack'), db.get_devel_data())), 'test' : dict(zip(('real', 'attack'), db.get_test_data())), } def merge_data(flist): d = bob.io.load([k.make_path(use_inputdir[0], '.hdf5') for k in flist]) return d[~numpy.isnan(d.sum(axis=1)),:] for key in data.keys(): for cls in data[key].keys(): if args.verbose: print "Loading %-5s/%-6s:" % (key, cls), data[key][cls] = merge_data(data[key][cls]) if args.verbose: print len(data[key][cls]) if args.verbose: print "Training MLP..." mlp, evolution = ml.rprop.make_mlp((data['train']['real'], data['train']['attack']), (data['devel']['real'], data['devel']['attack']), args.batch, args.nhidden, args.epoch, args.maxiter, args.noimprov, args.verbose) if args.verbose: print "Saving session information..." def get_version(package): __import__('pkg_resources').require(package)[0].version paramfile.add_section('software') for package in __import__('pkg_resources').require('antispoofing.motion'): paramfile.set('software', package.key, package.version) paramfile.add_section('environment') cmdline = [os.path.realpath(sys.argv[0])] + sys.argv[1:] paramfile.set('environment', 'command-line', ' '.join(cmdline)) paramfile.add_section('data') datapath = [os.path.realpath(k) for k in use_inputdir] paramfile.set('data', 'database', args.name) paramfile.set('data', 'input', '\n'.join(datapath)) paramfile.set('data', 'train-real', str(len(data['train']['real']))) paramfile.set('data', 'train-attack', str(len(data['train']['attack']))) paramfile.set('data', 'devel-real', str(len(data['devel']['real']))) paramfile.set('data', 'devel-attack', str(len(data['devel']['attack']))) paramfile.set('data', 'test-real', str(len(data['test']['real']))) paramfile.set('data', 'test-attack', str(len(data['test']['attack']))) paramfile.add_section('mlp') paramfile.set('mlp', 'shape', '-'.join([str(k) for k in mlp.shape])) paramfile.set('mlp', 'batch-size', str(args.batch)) paramfile.set('mlp', 'epoch-size', str(args.epoch)) paramfile.set('mlp', 'maximum-iterations', str(args.maxiter)) if args.verbose: print "Saving MLP..." mlpfile = bob.io.HDF5File(os.path.join(use_outputdir, 'mlp.hdf5'),'w') mlp.save(mlpfile) del mlpfile if args.verbose: print "Saving result evolution..." evofile = bob.io.HDF5File(os.path.join(use_outputdir, 'training-evolution.hdf5'),'w') evolution.save(evofile) del evofile if args.verbose: print "Running analysis..." evolution.report(mlp, (data['test']['real'], data['test']['attack']), os.path.join(use_outputdir, 'plots.pdf'), paramfile) paramfile.set('time', 'end', time.asctime()) total_time = int(time.time() - start_time) diff = datetime.timedelta(seconds=total_time) paramfile.set('time', 'duration', str(diff)) paramfile.set('time', 'host', socket.getfqdn()) if args.verbose: print "End time is", time.asctime() if args.verbose: print "Total training time:", str(diff) paramfile.write(open(os.path.join(use_outputdir, 'session.txt'), 'wb')) if args.verbose: print "All done, bye!"
def main(): """Main method""" basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'quantities') OUTPUTDIR = os.path.join(basedir, 'lda') parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('inputdir', metavar='DIR', type=str, default=INPUTDIR, nargs='?', help='Base directory containing the 5-quantities to be loaded. Final MLP input will be generated from the input directory and concatenated column-wise to form the final training matrix (defaults to "%(default)s").') parser.add_argument('outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help='Base directory that will be used to save the results. The given value will be interpolated with time.strftime and then, os.environ (in this order), so you can include %%()s strings (e.g. %%(SGE_TASK_ID)s) to make up the final output directory path (defaults to "%(default)s").') parser.add_argument('-f', '--overwrite', action='store_true', dest='overwrite', default=False, help='If set and the destination directory exists, overwrite the results contained there') parser.add_argument('-V', '--verbose', action='store_true', dest='verbose', default=False, help='Increases this script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() start_time = time.time() paramfile = ConfigParser.SafeConfigParser() paramfile.add_section('time') paramfile.set('time', 'start', time.asctime()) if args.verbose: print "Start time is", time.asctime() if not os.path.exists(args.inputdir): parser.error("input directory `%s' does not exist" % args.inputdir) use_outputdir = time.strftime(args.outputdir) #interpolate time use_outputdir = use_outputdir % os.environ #interpolate environment if os.path.exists(use_outputdir): if not args.overwrite: parser.error("output directory '%s' exists and the overwrite flag was not set" % use_outputdir) else: bob.db.utils.makedirs_safe(use_outputdir) if args.verbose: print "Output directory set to \"%s\"" % use_outputdir use_inputdir = [] abspath = os.path.abspath(args.inputdir) use_inputdir.append(abspath) if args.verbose: print "Loading non-NaN entries from input files at '%s' database..." % args.name db = args.cls(args) real, attack = db.get_train_data() def merge_data(flist): d = bob.io.load([k.make_path(use_inputdir[0], '.hdf5') for k in flist]) return d[~numpy.isnan(d.sum(axis=1)),:] real = merge_data(real) attack = merge_data(attack) if args.verbose: print "Evaluating mean and standard deviation..." from antispoofing.utils.ml.norm import calc_mean_std mean, std = calc_mean_std(real, attack, nonStdZero=True) if args.verbose: print "Training LDA..." from antispoofing.utils.ml.norm import zeromean_unitvar_norm real = zeromean_unitvar_norm(real, mean, std) attack = zeromean_unitvar_norm(attack, mean, std) from antispoofing.utils.ml.lda import make_lda machine = make_lda([real, attack]) # adjust some details of the final machine to be saved machine.resize(machine.shape[0], 1) # so we get real and attacks on the "right" side of the axis machine.weights = -1 * machine.weights machine.input_subtract = mean machine.input_divide = std if args.verbose: print "Performance evaluation:" real, attack = db.get_devel_data() real = merge_data(real) attack = merge_data(attack) pos = machine(real)[:,0] neg = machine(attack)[:,0] thres = bob.measure.eer_threshold(neg, pos) far, frr = bob.measure.farfrr(neg, pos, thres) good_neg = bob.measure.correctly_classified_negatives(neg, thres).sum() good_pos = bob.measure.correctly_classified_positives(pos, thres).sum() print " -> EER @ devel set threshold: %.5e" % thres print " -> Devel set results:" print " * FAR : %.3f%% (%d/%d)" % (100*far, len(neg)-good_neg, len(neg)) print " * FRR : %.3f%% (%d/%d)" % (100*frr, len(pos)-good_pos, len(pos)) print " * HTER: %.3f%%" % (50*(far+frr)) real, attack = db.get_test_data() real = merge_data(real) attack = merge_data(attack) pos = machine(real)[:,0] neg = machine(attack)[:,0] far, frr = bob.measure.farfrr(neg, pos, thres) good_neg = bob.measure.correctly_classified_negatives(neg, thres).sum() good_pos = bob.measure.correctly_classified_positives(pos, thres).sum() print " -> Test set results:" print " * FAR: %.3f%% (%d/%d)" % (100*far, len(neg)-good_neg, len(neg)) print " * FRR: %.3f%% (%d/%d)" % (100*frr, len(pos)-good_pos, len(pos)) print " * HTER: %.3f%%" % (50*(far+frr)) if args.verbose: print "Saving session information..." def get_version(package): __import__('pkg_resources').require(package)[0].version paramfile.add_section('software') for package in __import__('pkg_resources').require('antispoofing.motion'): paramfile.set('software', package.key, package.version) paramfile.add_section('environment') cmdline = [os.path.realpath(sys.argv[0])] + sys.argv[1:] paramfile.set('environment', 'command-line', ' '.join(cmdline)) paramfile.add_section('data') datapath = [os.path.realpath(k) for k in use_inputdir] paramfile.set('data', 'database', args.name) paramfile.set('data', 'input', '\n'.join(datapath)) paramfile.set('data', 'train-real', str(len(real))) paramfile.set('data', 'train-attack', str(len(attack))) paramfile.add_section('lda') paramfile.set('lda', 'shape', '-'.join([str(k) for k in machine.shape])) if args.verbose: print "Saving LDA machine..." machfile = bob.io.HDF5File(os.path.join(use_outputdir, 'lda.hdf5'),'w') machine.save(machfile) del machfile paramfile.set('time', 'end', time.asctime()) total_time = int(time.time() - start_time) diff = datetime.timedelta(seconds=total_time) paramfile.set('time', 'duration', str(diff)) paramfile.set('time', 'host', socket.getfqdn()) if args.verbose: print "End time is", time.asctime() if args.verbose: print "Total training time:", str(diff) paramfile.write(open(os.path.join(use_outputdir, 'session.txt'), 'wb')) if args.verbose: print "All done, bye!"
def main(): """Main method""" basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'quantities') OUTPUTDIR = os.path.join(basedir, 'mlp') parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'inputdir', metavar='DIR', type=str, default=INPUTDIR, nargs='?', help= 'Base directory containing the 5-quantities to be loaded. Final MLP input will be generated from the input directory and concatenated column-wise to form the final training matrix (defaults to "%(default)s").' ) parser.add_argument( 'outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help= 'Base directory that will be used to save the results. The given value will be interpolated with time.strftime and then, os.environ (in this order), so you can include %%()s strings (e.g. %%(SGE_TASK_ID)s) to make up the final output directory path (defaults to "%(default)s").' ) parser.add_argument( '-b', '--batch-size', metavar='INT', type=int, dest='batch', default=200, help= 'The number of samples per training iteration. Good values are greater than 100. Defaults to %(default)s' ) parser.add_argument( '-e', '--epoch', metavar='INT', type=int, dest='epoch', default=1, help= 'This is the number of training steps that need to be executed before we attempt to measure the error on the development set. Defaults to %(default)s' ) parser.add_argument( '-n', '--hidden-neurons', metavar='INT', type=int, dest='nhidden', default=5, help= 'The number hidden neurons in the (single) hidden layer of the MLP. Defaults to %(default)s' ) parser.add_argument( '-m', '--maximum-iterations', metavar='INT', type=int, dest='maxiter', default=0, help= 'The maximum number of iterations to train for. A value of zero trains until a valley is detected on the development set. Defaults to %(default)s' ) parser.add_argument( '-i', '--no-improvements', metavar='INT', type=int, dest='noimprov', default=0, help= 'The maximum number of iterations to wait for in case no improvements happen in the development set average RMSE. If that number of iterations is reached, the training is stopped. Values in the order of 10-20%% of the maximum number of iterations should be a reasonable default. If set to zero, do not consider this stop criteria. Defaults to %(default)s' ) parser.add_argument( '-f', '--overwrite', action='store_true', dest='overwrite', default=False, help= 'If set and the destination directory exists, overwrite the results contained there' ) parser.add_argument('-V', '--verbose', action='store_true', dest='verbose', default=False, help='Increases this script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() start_time = time.time() paramfile = ConfigParser.SafeConfigParser() paramfile.add_section('time') paramfile.set('time', 'start', time.asctime()) if args.verbose: print "Start time is", time.asctime() if not os.path.exists(args.inputdir): parser.error("input directory `%s' does not exist" % args.inputdir) use_outputdir = time.strftime(args.outputdir) #interpolate time use_outputdir = use_outputdir % os.environ #interpolate environment if os.path.exists(use_outputdir): if not args.overwrite: parser.error( "output directory '%s' exists and the overwrite flag was not set" % use_outputdir) else: bob.db.utils.makedirs_safe(use_outputdir) if args.verbose: print "Output directory set to \"%s\"" % use_outputdir use_inputdir = [] abspath = os.path.abspath(args.inputdir) use_inputdir.append(abspath) if args.verbose: print "Loading non-NaN entries from input files at '%s' database..." % args.name db = args.cls(args) data = { 'train': dict(zip(('real', 'attack'), db.get_train_data())), 'devel': dict(zip(('real', 'attack'), db.get_devel_data())), 'test': dict(zip(('real', 'attack'), db.get_test_data())), } def merge_data(flist): d = bob.io.load([k.make_path(use_inputdir[0], '.hdf5') for k in flist]) return d[~numpy.isnan(d.sum(axis=1)), :] for key in data.keys(): for cls in data[key].keys(): if args.verbose: print "Loading %-5s/%-6s:" % (key, cls), data[key][cls] = merge_data(data[key][cls]) if args.verbose: print len(data[key][cls]) if args.verbose: print "Training MLP..." mlp, evolution = ml.rprop.make_mlp( (data['train']['real'], data['train']['attack']), (data['devel']['real'], data['devel']['attack']), args.batch, args.nhidden, args.epoch, args.maxiter, args.noimprov, args.verbose) if args.verbose: print "Saving session information..." def get_version(package): __import__('pkg_resources').require(package)[0].version paramfile.add_section('software') for package in __import__('pkg_resources').require('antispoofing.motion'): paramfile.set('software', package.key, package.version) paramfile.add_section('environment') cmdline = [os.path.realpath(sys.argv[0])] + sys.argv[1:] paramfile.set('environment', 'command-line', ' '.join(cmdline)) paramfile.add_section('data') datapath = [os.path.realpath(k) for k in use_inputdir] paramfile.set('data', 'database', args.name) paramfile.set('data', 'input', '\n'.join(datapath)) paramfile.set('data', 'train-real', str(len(data['train']['real']))) paramfile.set('data', 'train-attack', str(len(data['train']['attack']))) paramfile.set('data', 'devel-real', str(len(data['devel']['real']))) paramfile.set('data', 'devel-attack', str(len(data['devel']['attack']))) paramfile.set('data', 'test-real', str(len(data['test']['real']))) paramfile.set('data', 'test-attack', str(len(data['test']['attack']))) paramfile.add_section('mlp') paramfile.set('mlp', 'shape', '-'.join([str(k) for k in mlp.shape])) paramfile.set('mlp', 'batch-size', str(args.batch)) paramfile.set('mlp', 'epoch-size', str(args.epoch)) paramfile.set('mlp', 'maximum-iterations', str(args.maxiter)) if args.verbose: print "Saving MLP..." mlpfile = bob.io.HDF5File(os.path.join(use_outputdir, 'mlp.hdf5'), 'w') mlp.save(mlpfile) del mlpfile if args.verbose: print "Saving result evolution..." evofile = bob.io.HDF5File( os.path.join(use_outputdir, 'training-evolution.hdf5'), 'w') evolution.save(evofile) del evofile if args.verbose: print "Running analysis..." evolution.report(mlp, (data['test']['real'], data['test']['attack']), os.path.join(use_outputdir, 'plots.pdf'), paramfile) paramfile.set('time', 'end', time.asctime()) total_time = int(time.time() - start_time) diff = datetime.timedelta(seconds=total_time) paramfile.set('time', 'duration', str(diff)) paramfile.set('time', 'host', socket.getfqdn()) if args.verbose: print "End time is", time.asctime() if args.verbose: print "Total training time:", str(diff) paramfile.write(open(os.path.join(use_outputdir, 'session.txt'), 'wb')) if args.verbose: print "All done, bye!"
def main(): basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'scores') OUTPUTDIR = os.path.join(basedir, 'time-analysis') parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'inputdir', metavar='DIR', type=str, nargs='?', default=INPUTDIR, help= 'directory containing the scores to be analyzed (defaults to "%(default)s").' ) parser.add_argument( 'outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help= 'Base directory that will be used to save the results (defaults to "%(default)s").' ) parser.add_argument( '-a', '--average', default=False, action='store_true', dest='average', help= 'if set, average thresholds instead of applying a score thresholding at every window interval' ) parser.add_argument( '-m', '--min-hter', default=False, action='store_true', dest='minhter', help= 'if set, uses the min. HTER threshold instead of the EER threshold on the development set' ) parser.add_argument('-v', '--verbose', default=False, action='store_true', dest='verbose', help='increases the script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() if not os.path.exists(args.inputdir): parser.error("input directory does not exist") if not os.path.exists(args.outputdir): if args.verbose: print "Creating output directory `%s'..." % args.outputdir bob.db.utils.makedirs_safe(args.outputdir) db = args.cls(args) devel = dict(zip(('real', 'attack'), db.get_devel_data())) test = dict(zip(('real', 'attack'), db.get_test_data())) # make full paths devel['real'] = [ k.make_path(args.inputdir, '.hdf5') for k in devel['real'] ] devel['attack'] = [ k.make_path(args.inputdir, '.hdf5') for k in devel['attack'] ] test['real'] = [k.make_path(args.inputdir, '.hdf5') for k in test['real']] test['attack'] = [ k.make_path(args.inputdir, '.hdf5') for k in test['attack'] ] # finds out window-size and overlap args.windowsize, args.overlap = get_parameters(devel['real'][0]) if args.verbose: print "Discovered parameters:" print " * window-size: %d" % args.windowsize print " * overlap : %d" % args.overlap # try a match with the next file, just to make sure windowsize2, overlap2 = get_parameters(devel['real'][1]) if args.windowsize != windowsize2 or args.overlap != overlap2: raise RuntimeError, "A possible misdetection of windowsize and overlap occurred between files '%s' and '%s'. The first detection showed a window-size/overlap of %d/%d while the second, %d/%d. You will have to edit this script and set these values by hand" % ( devel['real'][0], devel['real'][1], args.windowsize, args.overlap, windowsize2, overlap2) # quickly load the development set and establish the threshold: thres = ml.time.eval_threshold(devel['real'], devel['attack'], args.minhter, args.verbose) analyzer = ml.time.Analyzer(test['real'], test['attack'], thres, args.windowsize, args.overlap, args.average, args.verbose) outfile = os.path.join(args.outputdir, 'time-analysis-table.rst') title = 'Time Analysis, Window *%d*, Overlap *%d*' % (args.windowsize, args.overlap) write_table(title, analyzer, open(outfile, 'wt'), args) if args.verbose: write_table(title, analyzer, sys.stdout, args) outfile = os.path.join(args.outputdir, 'time-analysis-misclassified-at-220.txt') analyzer.write_misclassified(open(outfile, 'wt'), 219) #Canonical limit outpdf = os.path.join(args.outputdir, 'time-analysis.pdf') analyzer.plot(outpdf, title)
def main(): """Main method""" basedir = os.path.dirname(os.path.dirname(os.path.realpath(sys.argv[0]))) INPUTDIR = os.path.join(basedir, 'quantities') OUTPUTDIR = os.path.join(basedir, 'lda') parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'inputdir', metavar='DIR', type=str, default=INPUTDIR, nargs='?', help= 'Base directory containing the 5-quantities to be loaded. Final MLP input will be generated from the input directory and concatenated column-wise to form the final training matrix (defaults to "%(default)s").' ) parser.add_argument( 'outputdir', metavar='DIR', type=str, default=OUTPUTDIR, nargs='?', help= 'Base directory that will be used to save the results. The given value will be interpolated with time.strftime and then, os.environ (in this order), so you can include %%()s strings (e.g. %%(SGE_TASK_ID)s) to make up the final output directory path (defaults to "%(default)s").' ) parser.add_argument( '-f', '--overwrite', action='store_true', dest='overwrite', default=False, help= 'If set and the destination directory exists, overwrite the results contained there' ) parser.add_argument('-V', '--verbose', action='store_true', dest='verbose', default=False, help='Increases this script verbosity') # Adds database support using the common infrastructure # N.B.: Only databases with 'video' support import antispoofing.utils.db antispoofing.utils.db.Database.create_parser(parser, 'video') args = parser.parse_args() start_time = time.time() paramfile = ConfigParser.SafeConfigParser() paramfile.add_section('time') paramfile.set('time', 'start', time.asctime()) if args.verbose: print "Start time is", time.asctime() if not os.path.exists(args.inputdir): parser.error("input directory `%s' does not exist" % args.inputdir) use_outputdir = time.strftime(args.outputdir) #interpolate time use_outputdir = use_outputdir % os.environ #interpolate environment if os.path.exists(use_outputdir): if not args.overwrite: parser.error( "output directory '%s' exists and the overwrite flag was not set" % use_outputdir) else: bob.db.utils.makedirs_safe(use_outputdir) if args.verbose: print "Output directory set to \"%s\"" % use_outputdir use_inputdir = [] abspath = os.path.abspath(args.inputdir) use_inputdir.append(abspath) if args.verbose: print "Loading non-NaN entries from input files at '%s' database..." % args.name db = args.cls(args) real, attack = db.get_train_data() def merge_data(flist): d = bob.io.load([k.make_path(use_inputdir[0], '.hdf5') for k in flist]) return d[~numpy.isnan(d.sum(axis=1)), :] real = merge_data(real) attack = merge_data(attack) if args.verbose: print "Evaluating mean and standard deviation..." from antispoofing.utils.ml.norm import calc_mean_std mean, std = calc_mean_std(real, attack, nonStdZero=True) if args.verbose: print "Training LDA..." from antispoofing.utils.ml.norm import zeromean_unitvar_norm real = zeromean_unitvar_norm(real, mean, std) attack = zeromean_unitvar_norm(attack, mean, std) from antispoofing.utils.ml.lda import make_lda machine = make_lda([real, attack]) # adjust some details of the final machine to be saved machine.resize(machine.shape[0], 1) # so we get real and attacks on the "right" side of the axis machine.weights = -1 * machine.weights machine.input_subtract = mean machine.input_divide = std if args.verbose: print "Performance evaluation:" real, attack = db.get_devel_data() real = merge_data(real) attack = merge_data(attack) pos = machine(real)[:, 0] neg = machine(attack)[:, 0] thres = bob.measure.eer_threshold(neg, pos) far, frr = bob.measure.farfrr(neg, pos, thres) good_neg = bob.measure.correctly_classified_negatives(neg, thres).sum() good_pos = bob.measure.correctly_classified_positives(pos, thres).sum() print " -> EER @ devel set threshold: %.5e" % thres print " -> Devel set results:" print " * FAR : %.3f%% (%d/%d)" % (100 * far, len(neg) - good_neg, len(neg)) print " * FRR : %.3f%% (%d/%d)" % (100 * frr, len(pos) - good_pos, len(pos)) print " * HTER: %.3f%%" % (50 * (far + frr)) real, attack = db.get_test_data() real = merge_data(real) attack = merge_data(attack) pos = machine(real)[:, 0] neg = machine(attack)[:, 0] far, frr = bob.measure.farfrr(neg, pos, thres) good_neg = bob.measure.correctly_classified_negatives(neg, thres).sum() good_pos = bob.measure.correctly_classified_positives(pos, thres).sum() print " -> Test set results:" print " * FAR: %.3f%% (%d/%d)" % (100 * far, len(neg) - good_neg, len(neg)) print " * FRR: %.3f%% (%d/%d)" % (100 * frr, len(pos) - good_pos, len(pos)) print " * HTER: %.3f%%" % (50 * (far + frr)) if args.verbose: print "Saving session information..." def get_version(package): __import__('pkg_resources').require(package)[0].version paramfile.add_section('software') for package in __import__('pkg_resources').require('antispoofing.motion'): paramfile.set('software', package.key, package.version) paramfile.add_section('environment') cmdline = [os.path.realpath(sys.argv[0])] + sys.argv[1:] paramfile.set('environment', 'command-line', ' '.join(cmdline)) paramfile.add_section('data') datapath = [os.path.realpath(k) for k in use_inputdir] paramfile.set('data', 'database', args.name) paramfile.set('data', 'input', '\n'.join(datapath)) paramfile.set('data', 'train-real', str(len(real))) paramfile.set('data', 'train-attack', str(len(attack))) paramfile.add_section('lda') paramfile.set('lda', 'shape', '-'.join([str(k) for k in machine.shape])) if args.verbose: print "Saving LDA machine..." machfile = bob.io.HDF5File(os.path.join(use_outputdir, 'lda.hdf5'), 'w') machine.save(machfile) del machfile paramfile.set('time', 'end', time.asctime()) total_time = int(time.time() - start_time) diff = datetime.timedelta(seconds=total_time) paramfile.set('time', 'duration', str(diff)) paramfile.set('time', 'host', socket.getfqdn()) if args.verbose: print "End time is", time.asctime() if args.verbose: print "Total training time:", str(diff) paramfile.write(open(os.path.join(use_outputdir, 'session.txt'), 'wb')) if args.verbose: print "All done, bye!"