def get_training_template(train_data_iter): patterns = [] train_data_iter.reset_exp() for datum_id in xrange(train_data_iter.num_data): if datum_id % 10 == 0: print datum_id if train_data_iter.next(wait_for_positive_example=True, compute_patterns=True, max_template_length=40): # the context length is 11 for p in train_data_iter.patterns: pattern = p.copy() esp.threshold_edgemap(pattern,.30,edge_feature_row_breaks,report_level=False,abst_threshold=abst_threshold) esp.spread_edgemap(pattern,edge_feature_row_breaks,edge_orientations,spread_length=5) patterns.append(pattern) else: break _,_ ,\ registered_examples,template \ = et.simple_estimate_template(patterns) return registered_examples, template
output = open('all_patterns_piy050912.pkl','wb') cPickle.dump(all_patterns,output) output.close() output = open('all_patterns_context_piy050912.pkl','wb') cPickle.dump(all_patterns_context,output) output.close() mean_background = E_avg.E.copy() mean_background = np.maximum(np.minimum(mean_background,.4),.05) template_height, template_length, \ registered_templates,mean_template \ = et.simple_estimate_template(all_patterns) template_shape = np.array([template_height,template_length]) np.save('mean_template_piy050912',mean_template) np.save('template_shape_piy050912',template_shape) # # Get the data for tuning the j0 threshold # # # haven't run this stuff yet, but ran the stuff above #
cPickle.dump(all_patterns,output) output.close() output = open('all_patterns_context050912.pkl','wb') cPickle.dump(all_patterns_context,output) output.close() mean_background = E_avg.E.copy() mean_background = np.maximum(np.minimum(mean_background,.4),.05) template_height, template_length, \ registered_templates,mean_template \ = et.simple_estimate_template(all_patterns,template_length=33) template_shape = np.array([template_height,template_length]) np.save('mean_template050912',mean_template) np.save('template_shape050912',template_shape) # # Get the data for tuning the j0 threshold # # # haven't run this stuff yet, but ran the stuff above #
all_patterns.extend(train_data_iter.patterns) all_pattern_parts.extend(train_data_iter.pattern_parts) E_avg.add_frames( train_data_iter.E, train_data_iter.edge_feature_row_breaks, train_data_iter.edge_orientations, train_data_iter.abst_threshold) else: break output = open('train_patterns_liy051012.pkl', 'wb') cPickle.dump(all_patterns, output) cPickle.dump(all_pattern_parts, output) output.close() _,_ ,\ registered_ex_l,l_template \ = et.simple_estimate_template([ex[0] for ex in all_pattern_parts]) np.save('registered_ex_l051012', registered_ex_l) np.save('l_template051012', l_template) _,_ ,\ registered_ex_iy,iy_template \ = et.simple_estimate_template([ex[1] for ex in all_pattern_parts]) np.save('registered_ex_iy051012', registered_ex_iy) np.save('iy_template051012', iy_template) _,_ ,\ registered_ex_liy,liy_template \ = et.simple_estimate_template(all_patterns)
datum_id += 1 esp._edge_map_threshold_segments(train_data_iter.E, 40, 1, threshold=.3, edge_orientations = train_data_iter.edge_orientations, edge_feature_row_breaks = train_data_iter.edge_feature_row_breaks) pattern_times = esp.get_pattern_times([np.array((phn,))], train_data_iter.phns, train_data_iter.feature_label_transitions) for p in pattern_times: patterns.append(train_data_iter.E[:,max(0,p[0]-offset):min(train_data_iter.E.shape[1],p[1]+offset)].copy()) lens.append(p[1] - p[0] + 1) # get mean length mean_length = int(np.mean(np.array(lens))) template_height,template_length,registered_examples, template = et.simple_estimate_template(patterns,template_length=mean_length) np.save(phn+'_registered_examples070212',registered_examples) np.save(phn+'_template070212',template) import template_speech_rec.bernoulli_em as bem import template_speech_rec.classification as cl num_mix_list = [2,3,4,6,9] for phn in phn_list: print phn registered_examples = np.load(phn+'_registered_examples070212.npy') for num_mix in num_mix_list: print num_mix bm = bem.Bernoulli_Mixture(num_mix,registered_examples) bm.run_EM(.00001)
for phn_class in class_array[1:]: print phn_class train_masks = np.load(exp_path+phn_class+'_train_masks.npy') prev_lengths = np.zeros(np.sum(train_masks[0])) for fold_id, train_mask in enumerate(train_masks): print fold_id train_phn_examples = np.load(data_path+phn_class+"class_examples5.npy") train_phn_lengths = np.load(data_path+phn_class+"class_examples_lengths.npy") lengths = train_phn_lengths[train_mask].copy() assert not np.all(lengths==prev_lengths) prev_lengths = lengths template_length = int(np.mean(lengths)+.5) template_height,template_length,registered_templates, template = et.simple_estimate_template( [t[:,:l] for t,l in zip( train_phn_examples[train_mask], lengths)], template_length=template_length) del registered_templates np.save(exp_path+phn_class+str(fold_id)+'template1_1_0',template) k_vals, assignment_idx = kmeans_linear(2,lengths) k_vals = list(frozenset((k_vals + .5).astype(np.uint8))) print k_vals for i in xrange(len(k_vals)): template_height,template_length,registered_templates, template = et.simple_estimate_template([t[:,:l] for t,l,idx in zip( train_phn_examples[train_mask], lengths, assignment_idx) if idx == i],template_length=k_vals[i]) np.save(exp_path+phn_class+str(fold_id)+'template2_1_'+str(i),template) k_vals, assignment_idx = kmeans_linear(3,lengths) k_vals = list(frozenset((k_vals + .5).astype(np.uint8)))
print datum_id if train_data_iter.next(wait_for_positive_example=True, compute_patterns=True, max_template_length=40): # the context length is 11 for p in train_data_iter.patterns: pattern = p.copy() esp.threshold_edgemap(pattern,.30,edge_feature_row_breaks,report_level=False,abst_threshold=abst_threshold) esp.spread_edgemap(pattern,edge_feature_row_breaks,edge_orientations,spread_length=3) liy_patterns.append(pattern) else: break _,_ ,\ registered_ex_liy,liy_template \ = et.simple_estimate_template(liy_patterns) np.save('registered_ex_liy051512',registered_ex_liy) np.save('liy_template051512',liy_template) mean_background = np.load('mean_background_liy051012.npy') template_shape = liy_template.shape tpm_liy = TwoPartModel(liy_template,mean_background, 2*template_shape[1]/3,)
# construct the basic experiment to see how mixtures, and, in particular # mixtures of different lengths affect the classification rate # load in the examples phn = target_phn_list[0] phn_examples = np.load(root_path+'Data/'+phn+'_examples.npy') phn_lengths = np.load(root_path+ 'Data/'+phn+'_lengths.npy') unregistered_examples = [p[:,:l] for p,l in zip(phn_examples,phn_lengths)] template_lengths= [6,7,8,9,10] template_versions = [] for l in template_lengths: t_len, t_height, _ , template = et.simple_estimate_template(unregistered_examples,template_length=l) template_versions.append(template) # classification output # going to create a list of the experiments # each thing is going to be error rates and need some explanation # for the data provenance from the simulation # # #1 /p/ itself # need to redo the feature extraction with the features being less spread # it would make sense to do this with the whole training data, this time, simply let it run over night # want to make sure that everything is in order