Beispiel #1
0
def get_speech_data():
    print_message(message='Reading alignments...')
    read_alignments()
    print_param(description='Number of loaded records (alignments)', param_str=str(len(mlf.keys())))
    print_param(description='Number of alignment frames', param_str=str(len(mlf[mlf.keys()[0]])))
    print_param(description='Number of found phonemes', param_str=str(len(samples)))
    print_param(description='Found phonemes', param_str=str(sorted(samples.keys())))
    
    print_message(message='Reading features...')
    read_features()
    print_param(description='Number of loaded records (features)', param_str=str(len(features.keys())))
    print_param(description='Number of feature frames', param_str=str(len(features[features.keys()[0]])))

    print_message(message='Adding samples...')
    add_samples()

    print_message(message='Splitting data...')
    split_data()
    print_param(description='Number of training samples', param_str=str(len(data['x'])))
    print_param(description='Number of validation samples', param_str=str(len(data['x_val'])))
    print_param(description='Number of testing samples', param_str=str(len(data['x_test'])))
    print_param(description='Problem dimension', param_str=str(data['x'][0].shape[0]))
    print_param(description='Number of classes', param_str=str(len(samples)))

    print_message(message='Number of samples per class:')
    for phonem in sorted(samples.keys()):
        print_param(description=phonem, param_str=str(len(samples[phonem])))
 def load_stats(self, file_name):
     print_message(message='Loading pruning process statistics from ' +
                   file_name)
     with open(file_name, 'r') as f:
         stats_pack = load_cpickle(f)
     self.stats_data = stats_pack['data']
     self.means = stats_pack['means']
     self.stds = stats_pack['stds']
     self.vars = stats_pack['vars']
     self.n_obs = stats_pack['n_obs']
     self.pruning_steps = stats_pack['pruning_steps']
 def dump_stats(self, file_name):
     stats_pack = {
         'data': self.stats_data,
         'means': self.means,
         'stds': self.stds,
         'vars': self.vars,
         'n_obs': self.n_obs,
         'pruning_steps': self.pruning_steps
     }
     with open(file_name, 'w') as f:
         dump_cpickle(stats_pack, f)
     print_message(message='Experiment statistics dumped as ' + file_name)
Beispiel #4
0
 def dump(self, net_file_name):
     net_pack = {
         'w': self.w,
         'b': self.b,
         'w_is': self.w_is,
         'b_is': self.b_is,
         'w_init': self.w_init,
         'b_init': self.b_init,
         'structure': self.structure,
         'tf': self.tf_name,
         'labels': self.labels,
         'features': self.used_features,
         'label_sign': self.label_sign
     }
     if 'pruning' in self.opt.keys():
         net_pack['pruning_stats'] = self.opt['pruning'].stats
     with open(net_file_name, 'w') as f:
         dump_cpickle(net_pack, f)
     print_message(message='Net dumped as ' + net_file_name)
Beispiel #5
0
                        type=int,
                        default=-1,
                        help='Number of training samples per class.')
    parser.add_argument('-na',
                        '--name_appendix',
                        type=str,
                        default='',
                        help='Dataset filename appendix')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_arguments()
    destination = 'dataset_mnist' + args.name_appendix + '.ds'

    print_message(message='Loading YannLecun\'s MNIST data...')
    with open_gzip('../../../data/data_mnist/mnist.pkl.gz', 'rb') as f:
        data_train, data_val, data_test = load_cpickle(f)

    dataset = open_shelve(destination, 'c')
    class_counter = dict()
    if args.n_samples == -1:
        print_message(message='Got MNIST dataset: ' + str(len(data_train[0])) +
                      ' : ' + str(len(data_val[0])) + ' : ' +
                      str(len(data_test[0])) + ', saving...')
        dataset['x'] = [reshape(x, (784, 1)) for x in data_train[0]]
        dataset['y'] = data_train[1]
    else:
        print_message(message='Got MNIST dataset: ' +
                      str(args.n_samples * 10) + ' : ' +
                      str(len(data_val[0])) + ' : ' + str(len(data_test[0])) +
                        '--req_acc',
                        type=float,
                        default=0.96,
                        help='Required classificationa accuracy')
    parser.add_argument('-lev',
                        '--levels',
                        type=int,
                        default=(75, 50, 35, 20, 15, 10, 7, 5, 3, 2, 1, 0),
                        nargs='+',
                        help='Pruning percentile levels')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_arguments()
    print_message(message='EXAMPLE: MNIST dataset')
    print_param(description='Number of experiment observations',
                param_str=str(args.n_obs))
    print_param(description='Initial number of hidden neurons',
                param_str=str(args.hidden_structure))
    print_param(description='Required accuracy', param_str=str(args.req_acc))

    params_str = '_hs' + str(args.hidden_structure) + '_ra' + str(
        args.req_acc).replace('.', '') + '_no' + str(args.n_obs)
    if args.generate:
        stats_data = list()
        for i_obs in range(1, args.n_obs + 1):
            print_message(message='MNIST experiment, observation ' +
                          str(i_obs) + '/' + str(args.n_obs))
            net = FeedForwardNet(hidden=args.hidden_structure,
                                 tf_name='Sigmoid')
Beispiel #7
0
    print_message(message='Number of samples per class:')
    for phonem in sorted(samples.keys()):
        print_param(description=phonem, param_str=str(len(samples[phonem])))

if __name__ == '__main__':
    args = parse_arguments()
    destination = 'dataset_speech_bs'+str(args.border_size)
    destination += '_cs'+str(args.context_size)+'_nf'+str(args.n_filters)
    destination += '_ds'+str(int(args.data_split[0]*10))+str(int(args.data_split[1]*10))+str(int(args.data_split[2]*10))
    destination += '_ns'+str(args.n_samples)+'_nr'+str(args.n_records)
    if args.phonemes:
        destination += '_'+str(args.phonemes).replace(',','+').replace(' ', '').replace('\'', '')[1:-1]
    destination += args.name_appendix+'.ds'

    print_message(message='Processing SPEECH data...')
    print_param(description='Path to features', param_str=args.feature_filename)
    print_param(description='Path to alignments', param_str=args.alignment_filename)
    print_param(description='Border size (strictness)', param_str=str(args.border_size))
    print_param(description='Context size', param_str=str(args.context_size))
    print_param(description='Number of MEL filters', param_str=str(args.n_filters))
    print_param(description='Number of records', param_str=str(args.n_records))
    print_param(description='Number of samples', param_str=str(args.n_samples))
    print_param(description='Maximum number of other phonemes', param_str=str(args.max_rest))
    print_param(description='Phonemes as classes', param_str=str(args.phonemes) if args.phonemes else 'all')
    print_param(description='Data split (train/val/test)', param_str=str(args.data_split))
    print_param(description='Dataset destination file name', param_str=destination)
    
    mlf = dict()
    features = dict()
    samples = dict()
Beispiel #8
0
    if abs(sum(args_tmp.data_split) - 1) > 1e-5:
        stderr.write(
            'Error: data_split args must give 1.0 together (e.g. 0.8 0.1 0.1).\n'
        )
        exit()
    else:
        return args_tmp


if __name__ == '__main__':
    args = parse_arguments()
    split_bounds = (args.n_samples * args.data_split[0],
                    args.n_samples * (args.data_split[0] + args.data_split[1]))
    destination = 'dataset_train' + args.name_appendix + '.ds'

    print_message(message='Generating and splitting TRAIN data...')
    data = {
        'x': list(),
        'y': list(),
        'x_val': list(),
        'y_val': list(),
        'x_test': list(),
        'y_test': list()
    }
    for ni in range(args.n_samples):
        if ni % 3 == 0:
            x_east = [0, 1, 1, 0, 0, 0, 1]
            x_west = [0, 1, 1, 1, 1, 0, 0]
        elif ni % 3 == 1:
            x_east = [0, 0, 1, 0, 1, 0, 0]
            x_west = [1, 1, 1, 0, 1, 0, 0]
Beispiel #9
0
    parser = ArgumentParser(description='Run experiments and plot results for XOR dataset.')
    parser.add_argument('-g', '--generate', type=bool, default=False,
                        help='Generate new stats or load the dumped?')
    parser.add_argument('-no', '--n_obs', type=int, default=10,
                        help='Number of experiment observations')
    parser.add_argument('-hs', '--hidden_structure', type=int, default=[50], nargs='+',
                        help='Neural network structure')
    parser.add_argument('-ra', '--req_acc', type=float, default=1.0,
                        help='Required classificationa accuracy')
    parser.add_argument('-lev', '--levels', type=int, default=(75, 50, 35, 20, 10, 5, 1, 0), nargs='+',
                        help='Pruning percentile levels')
    return parser.parse_args()

if __name__ == '__main__':
    args = parse_arguments()
    print_message(message='EXAMPLE: XOR dataset')
    print_param(description='Number of experiment observations', param_str=str(args.n_obs))
    print_param(description='Initial number of hidden neurons', param_str=str(args.hidden_structure))
    print_param(description='Required accuracy', param_str=str(args.req_acc))

    params_str = '_hs'+str(args.hidden_structure)+'_ra'+str(args.req_acc).replace('.', '')+'_no'+str(args.n_obs)
    if args.generate:
        stats_data = list()
        for i_obs in range(1, args.n_obs+1):
            print_message(message='XOR experiment, observation '+str(i_obs)+'/'+str(args.n_obs))
            net = FeedForwardNet(hidden=args.hidden_structure, tf_name='Sigmoid')
            dataset = open_shelve('../examples/xor/dataset_xor.ds', 'c')
            net.fit(x=dataset['x'], y=dataset['y'], x_val=dataset['x_val'], y_val=dataset['y_val'], learning_rate=0.4,
                    n_epoch=50, req_acc=1.0)
            res = net.evaluate(x=dataset['x_test'], y=dataset['y_test'])
            print_message(message='Evaluation on test data after training:')