type=float, default=0.96, help='Required classificationa accuracy') parser.add_argument('-lev', '--levels', type=int, default=(75, 50, 35, 20, 15, 10, 7, 5, 3, 2, 1, 0), nargs='+', help='Pruning percentile levels') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() print_message(message='EXAMPLE: MNIST dataset') print_param(description='Number of experiment observations', param_str=str(args.n_obs)) print_param(description='Initial number of hidden neurons', param_str=str(args.hidden_structure)) print_param(description='Required accuracy', param_str=str(args.req_acc)) params_str = '_hs' + str(args.hidden_structure) + '_ra' + str( args.req_acc).replace('.', '') + '_no' + str(args.n_obs) if args.generate: stats_data = list() for i_obs in range(1, args.n_obs + 1): print_message(message='MNIST experiment, observation ' + str(i_obs) + '/' + str(args.n_obs)) net = FeedForwardNet(hidden=args.hidden_structure, tf_name='Sigmoid') dataset = open_shelve('../examples/mnist/dataset_mnist_1K.ds', 'c') net.fit(x=dataset['x'],
def get_speech_data(): print_message(message='Reading alignments...') read_alignments() print_param(description='Number of loaded records (alignments)', param_str=str(len(mlf.keys()))) print_param(description='Number of alignment frames', param_str=str(len(mlf[mlf.keys()[0]]))) print_param(description='Number of found phonemes', param_str=str(len(samples))) print_param(description='Found phonemes', param_str=str(sorted(samples.keys()))) print_message(message='Reading features...') read_features() print_param(description='Number of loaded records (features)', param_str=str(len(features.keys()))) print_param(description='Number of feature frames', param_str=str(len(features[features.keys()[0]]))) print_message(message='Adding samples...') add_samples() print_message(message='Splitting data...') split_data() print_param(description='Number of training samples', param_str=str(len(data['x']))) print_param(description='Number of validation samples', param_str=str(len(data['x_val']))) print_param(description='Number of testing samples', param_str=str(len(data['x_test']))) print_param(description='Problem dimension', param_str=str(data['x'][0].shape[0])) print_param(description='Number of classes', param_str=str(len(samples))) print_message(message='Number of samples per class:') for phonem in sorted(samples.keys()): print_param(description=phonem, param_str=str(len(samples[phonem])))
''' Check args ''' if abs(sum(args_tmp.data_split) - 1) > 1e-5: stderr.write('Error: data_split args must give 1.0 together (e.g. 0.8 0.1 0.1).\n') exit() else: return args_tmp if __name__ == '__main__': args = parse_arguments() split_bounds = (args.n_samples*args.data_split[0], args.n_samples*(args.data_split[0]+args.data_split[1])) destination = 'dataset_karnin'+args.name_appendix+'.ds' a = args.a b = 2.0/(a+1)-1 print_message(message='Generating and splitting KARNIN data...') print_param(description='parameter a', param_str=str(a)) print_param(description='parameter b', param_str=str(b)) data = {'x': list(), 'y': list(), 'x_val': list(), 'y_val': list(), 'x_test': list(), 'y_test': list()} for ni in range(args.n_samples): x1 = uniform(-1, 1) x2 = uniform(-1, 1) if x1 < a and x2 <= b: y = 0.0 else: y = 1.0 ''' x0 = uniform(-1, a) y0 = uniform(-1, b) x1 = uniform(a, 1) y1 = uniform(-1, 1+(1-b))
print_message(message='Number of samples per class:') for phonem in sorted(samples.keys()): print_param(description=phonem, param_str=str(len(samples[phonem]))) if __name__ == '__main__': args = parse_arguments() destination = 'dataset_speech_bs'+str(args.border_size) destination += '_cs'+str(args.context_size)+'_nf'+str(args.n_filters) destination += '_ds'+str(int(args.data_split[0]*10))+str(int(args.data_split[1]*10))+str(int(args.data_split[2]*10)) destination += '_ns'+str(args.n_samples)+'_nr'+str(args.n_records) if args.phonemes: destination += '_'+str(args.phonemes).replace(',','+').replace(' ', '').replace('\'', '')[1:-1] destination += args.name_appendix+'.ds' print_message(message='Processing SPEECH data...') print_param(description='Path to features', param_str=args.feature_filename) print_param(description='Path to alignments', param_str=args.alignment_filename) print_param(description='Border size (strictness)', param_str=str(args.border_size)) print_param(description='Context size', param_str=str(args.context_size)) print_param(description='Number of MEL filters', param_str=str(args.n_filters)) print_param(description='Number of records', param_str=str(args.n_records)) print_param(description='Number of samples', param_str=str(args.n_samples)) print_param(description='Maximum number of other phonemes', param_str=str(args.max_rest)) print_param(description='Phonemes as classes', param_str=str(args.phonemes) if args.phonemes else 'all') print_param(description='Data split (train/val/test)', param_str=str(args.data_split)) print_param(description='Dataset destination file name', param_str=destination) mlf = dict() features = dict() samples = dict() data = {'x': list(), 'y': list(), 'x_val': list(), 'y_val': list(), 'x_test': list(), 'y_test': list(),
parser.add_argument('-g', '--generate', type=bool, default=False, help='Generate new stats or load the dumped?') parser.add_argument('-no', '--n_obs', type=int, default=10, help='Number of experiment observations') parser.add_argument('-hs', '--hidden_structure', type=int, default=[50], nargs='+', help='Neural network structure') parser.add_argument('-ra', '--req_acc', type=float, default=1.0, help='Required classificationa accuracy') parser.add_argument('-lev', '--levels', type=int, default=(75, 50, 35, 20, 10, 5, 1, 0), nargs='+', help='Pruning percentile levels') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() print_message(message='EXAMPLE: XOR dataset') print_param(description='Number of experiment observations', param_str=str(args.n_obs)) print_param(description='Initial number of hidden neurons', param_str=str(args.hidden_structure)) print_param(description='Required accuracy', param_str=str(args.req_acc)) params_str = '_hs'+str(args.hidden_structure)+'_ra'+str(args.req_acc).replace('.', '')+'_no'+str(args.n_obs) if args.generate: stats_data = list() for i_obs in range(1, args.n_obs+1): print_message(message='XOR experiment, observation '+str(i_obs)+'/'+str(args.n_obs)) net = FeedForwardNet(hidden=args.hidden_structure, tf_name='Sigmoid') dataset = open_shelve('../examples/xor/dataset_xor.ds', 'c') net.fit(x=dataset['x'], y=dataset['y'], x_val=dataset['x_val'], y_val=dataset['y_val'], learning_rate=0.4, n_epoch=50, req_acc=1.0) res = net.evaluate(x=dataset['x_test'], y=dataset['y_test']) print_message(message='Evaluation on test data after training:') print_param(description='Accuracy', param_str=str(res[1]))