def save_false_positives_different_fnrs(first_pass_fnrs,
                                        detection_clusters,
                                        detection_array,
                                        detection_template_ids,
                                        template_lengths,
                                        window_start,
                                        window_end,
                                        example_end_start_times,
                                        utterances_path,
                                        train_file_indices,
                                        num_mix,fnr):
    thresh_id = int(len(detection_clusters)* fnr/100. + 5)
    (pos_times,
     false_pos_times,
     false_neg_times) = rf.get_pos_false_pos_false_neg_detect_points(
         detection_clusters[thresh_id],
         detection_array,
         detection_template_ids,
         template_lengths,
         window_start,
         window_end,example_start_end_times,
         utterances_path,
         train_file_indices,
         verbose=True)
    out = open('data/false_pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
    pickle.dump(false_pos_times,out)
    out.close()
    out = open('data/pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
    pickle.dump(pos_times,out)
    out.close()
    out = open('data/false_neg_times_%d_%d.pkl' % (num_mix,fnr),'wb')
    pickle.dump(false_pos_times,out)
    out.close()
def get_train_set_division(candidate_thresholds,num_mix,
                           detection_array,detection_lengths,
                           templates,fpr,tpr,first_pass_fnrs):
        detection_clusters = rf.get_detect_clusters_threshold_array(candidate_thresholds,
                                                                    detection_array,
                                                                   detection_lengths,
                                                                    C0,C1)
        out = open('data/detection_clusters_%d.npy' % num_mix,
                   'wb')
        cPickle.dump(detection_clusters,out)
        out.close()
        template_lengths = tuple(t.shape[0] for t in templates)
        for fnr in first_pass_fnrs:
            print "num_mix=%d,fnr=%d" % (num_mix,fnr)
            thresh_id = int(len(detection_clusters)* fnr/100. + 5)
            (pos_times,
             false_pos_times,
             false_neg_times) = rf.get_pos_false_pos_false_neg_detect_points(
                 detection_clusters[thresh_id],
                 detection_array,
                 detection_template_ids,
                 template_lengths,
                 window_start,
                 window_end,example_start_end_times,
                 utterances_path,
                 train_file_indices,
                 verbose=True)
            out = open('data/false_pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(false_pos_times,out)
            out.close()
            out = open('data/pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(pos_times,out)
            out.close()
            out = open('data/false_neg_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(false_pos_times,out)
            out.close()
def perform_phn_train_detection_SVM(phn, num_mix_params,
                                    train_example_lengths,bgd,
                                    train_path,file_indices,
                                    first_pass_fnrs):
    FOMS = collections.defaultdict(list)
    for num_mix in num_mix_params:
        if num_mix > 1:
            outfile = np.load('data/%d_templates.npz' % num_mix)
            templates = tuple( outfile['arr_%d'%i] for i in xrange(len(outfile.files)))
        else:
            templates = (np.load('data/1_templates.npy')[0],)
        detection_array = np.zeros((train_example_lengths.shape[0],
                            train_example_lengths.max() + 2),dtype=np.float32)
        linear_filters_cs = et.construct_linear_filters(templates,
                                                        bgd)
        np.savez('data/linear_filter_%d.npy'% num_mix,*(tuple(lfc[0] for lfc in linear_filters_cs)))
        np.savez('data/c_%d.npy'%num_mix,*(tuple(lfc[1] for lfc in linear_filters_cs)))
        syllable = np.array((phn,))
        (detection_array,
         example_start_end_times,
         detection_lengths,
         detection_template_ids)=get_detection_scores_mixture_named_params(
             utterances_path,
             file_indices,
             detection_array,
             syllable,
             linear_filters_cs,S_config=sp,
             E_config=ep,
             verbose = True,
             num_examples =-1,
             return_detection_template_ids=True)
        np.save('data/detection_array_%d.npy' % num_mix,detection_array)
        np.save('data/detection_template_ids_%d.npy' % num_mix,detection_template_ids)
        if num_mix == num_mix[0]:
            out = open('data/example_start_end_times.pkl','wb')
            cPickle.dump(example_start_end_times,out)
            out.close()
            out = open('data/detection_lengths.pkl','wb')
            cPickle.dump(detection_lengths,out)
            out.close()
        window_start = -int(np.mean(tuple( t.shape[0] for t in templates))/3.+.5)
        window_end = -window_start
        max_detect_vals = rf.get_max_detection_in_syllable_windows(detection_array,
                                                                   example_start_end_times,
                                                                   detection_lengths,
                                                                   window_start,
                                                                   window_end)
        np.save('data/max_detect_vals_%d.npy' % num_mix,max_detect_vals)
        C0 = int(np.mean(tuple( t.shape[0] for t in templates))/3.+.5)
        C1 = int( 33 * 1.5 + .5)
        frame_rate = 1/.005
        fpr, tpr = rf.get_roc_curve(max_detect_vals,
                                    detection_array,
                                    np.array(detection_lengths),
                                    example_start_end_times,
                                    C0,C1,frame_rate)
        np.save('data/fpr_%d.npy' % num_mix,
                fpr)
        np.save('data/tpr_%d.npy' % num_mix,
                tpr)
        detection_clusters = rf.get_detect_clusters_threshold_array(max_detect_vals,
                                                                    detection_array,
                                                                    np.array(detection_lengths),
                                                                    C0,C1)
        out = open('data/detection_clusters_%d.npy' % num_mix,
                   'wb')
        cPickle.dump(detection_clusters,out)
        out.close()
        for i in xrange(1,11):
            thresh_idx = np.arange(fpr.shape[0])[fpr*60 <= i].min()
            FOMS[num_mix].append(tpr[thresh_idx])
        template_lengths = tuple(t.shape[0] for t in templates)
        for fnr in first_pass_fnrs:
            print "num_mix=%d,fnr=%d" % (num_mix,fnr)
            thresh_id = int(len(detection_clusters)* fnr/100. + 5)
            (pos_times,
             false_pos_times,
             false_neg_times) = rf.get_pos_false_pos_false_neg_detect_points(
                 detection_clusters[thresh_id],
                 detection_array,
                 detection_template_ids,
                 template_lengths,
                 window_start,
                 window_end,example_start_end_times,
                 utterances_path,
                 train_file_indices,
                 verbose=True)
            out = open('data/false_pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(false_pos_times,out)
            out.close()
            out = open('data/pos_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(pos_times,out)
            out.close()
            out = open('data/false_neg_times_%d_%d.pkl' % (num_mix,fnr),'wb')
            pickle.dump(false_pos_times,out)
            out.close()
    return FOMS
tpr = np.load('data/tpr_%d.npy' % num_mix,
                )
# only are going to use those thresholds that matter

detection_clusters = rf.get_detect_clusters_threshold_array(max_detect_vals,
                                                            detection_array,detection_lengths,

                                                            C0,C1)

(pos_times,
 false_pos_times,
 false_neg_times) = rf.get_pos_false_pos_false_neg_detect_points(
                 detection_clusters[thresh_id],
                 detection_array,
                 detection_template_ids,
                 template_lengths,
                 window_start,
                 window_end,example_start_end_times,
                 utterances_path,
                 train_file_indices,
                 verbose=True)


bgd = np.load('data/bgd.npy')
FOMS = phn_detection.get_true_pos_division(phn, num_mix_params,
                                    train_example_lengths,bgd,
                                    train_path,file_indices,
                                    np.array([7]),sp,ep)


outfile = np.load('data/3_spec_templates_aa.npz')
spec_templates = tuple(outfile['arr_%d'%i] for i in xrange(len(outfile.files)))