def plot_embedding(X, y, outtag, title=None, alpha=0.6):
    xmin, xmax = np.min(X,0), np.max(X,0)
    X = (X-xmin)/(xmax-xmin)
    
    fig = plt.figure()
    for i in range(len(y)):
        plt.text(X[i,0],X[i,1], 
                 ur'$\mathrm{%s}$' % sampa_to_unicode(vowels[int(y[i])]), 
                 color=colors[int(y[i])], 
                 alpha=alpha)
#        plt.scatter(X[i,0],X[i,1], color=colors[int(y[i])], alpha=alpha)   
    plt.xticks([]), plt.yticks([])
    if not title is None:
        plt.title(title)
    plt.xlim(np.min(X,0)[0]-0.01, np.max(X,0)[0]+0.01)
    plt.ylim(np.min(X,0)[1]-0.01, np.max(X,0)[1]+0.01)
    plt.savefig(os.path.join(cfg_figdir, outtag))
def plot_vowels(outfiletag, 
                #nsamples=1000, 
                vowels=None, 
                scale='log', 
                percentile=99, 
                speakers=None, 
                verbose=True,
                minsamples=1):
    if vowels is None:
        vowels = list(vowels_sampa)
        
    allowed_scales = ['log', 'linear', 'bark','mel']
    if not scale in allowed_scales:
        raise ValueError, 'scale must be one of [%s]' % ', '.join(allowed_scales)

    cgn_corpus = cgn.CGN()
    ifa_corpus = ifa.IFA()
    corpus = MergedCorpus([ifa_corpus, cgn_corpus])
    #corpus = MergedCorpus([ifa_corpus])
    fm = formants.FormantsMeasure(corpus)
    fm.info()
    forms = fm.sample(vowels, equal_samples=True,scale='hertz')
    
    vowels = filter(lambda x:forms[x].shape[0] >= minsamples, vowels)    
    # plot the static F1, F2
    fig = plt.figure()
    colors = ['b','g','r','c','m','y','k']
    xs = []
    ys = []
    min_x=np.inf
    max_x=np.NINF
    min_y=np.inf 
    max_y=np.NINF
    means = {}
    for n in range(len(vowels)):
        f2 = forms[vowels[n]][:,4]
        f1 = forms[vowels[n]][:,3]
        
        # filter out values outside specified percentile score
        f2_bottom_perc = stats.scoreatpercentile(f2, 100-percentile)
        f2_top_perc = stats.scoreatpercentile(f2, percentile)
        f1_bottom_perc = stats.scoreatpercentile(f1, 100-percentile)
        f1_top_perc = stats.scoreatpercentile(f1, percentile)
        
        f2_mask = np.logical_and(f2 > f2_bottom_perc, f2 < f2_top_perc)
        f1_mask = np.logical_and(f1 > f1_bottom_perc, f1 < f1_top_perc)
        
        mask = np.logical_and(f2_mask, f1_mask)
        
        f2 = f2[mask]
        f1 = f1[mask]
      
        xs.append(f2)
        ys.append(f1)
        min_f2 = np.min(f2)
        max_f2 = np.max(f2)
        min_f1 = np.min(f1)
        max_f1 = np.max(f1)
        if min_f2 < min_x:
            min_x = min_f2
        if max_f2 > max_x:
            max_x = max_f2
        if min_f1 < min_y:
            min_y = min_f1
        if max_f1 > max_y:
            max_y = max_f1
            
        f1_mean = np.mean(f1, axis=0)
        f2_mean = np.mean(f2, axis=0)
        means[vowels[n]] = (f1_mean, f2_mean)
        if verbose:
            print 'vowel: %s\tobserved: %d\t sample mean (f1,f2): (%.3f,%.3f)' % (vowels[n], f2.shape[0], f1_mean, f2_mean) 
    
    #nsamples = min(map(lambda x:x.shape[0], xs))
    
    for n in range(len(vowels)):
        #sample_ids = random.sample(range(xs[n].shape[0]), nsamples)
#        xs_loc = xs[n][sample_ids,:]
#        ys_loc = ys[n][sample_ids,:]
        xs_loc = xs[n]
        ys_loc = ys[n]
        #print 'samplesize for %s: %d' % (vowels[n],xs_loc.shape[0])
        plt.scatter(xs_loc, ys_loc,
                    color=colors[n % len(colors)], 
                    label=ur'$\mathrm{%s}$' % sampa_to_unicode(vowels[n]), 
                    alpha=0.2)
    for n in range(len(vowels)):
        
        plt.scatter(means[vowels[n]][1], means[vowels[n]][0], 
                    s=80,
                    color='k',
                    marker=ur'$\mathrm{%s}$' % sampa_to_unicode(vowels[n]))
        
    #print means
        
    plt.xlim(max_x+100, min_x-100)
    plt.ylim(max_y+100, min_y-100)
    plt.xlabel(r'F2')
    plt.ylabel(r'F1')
    if scale == 'log':
        plt.xscale('log')
        plt.yscale('log')
    plt.legend(loc='lower left')
    plt.savefig(os.path.join(cfg_figdir, outfiletag+'.png'))