Exemple #1
0
def find_all():
    
    print '\nTesting that each random partitioning algorithm can discover the entire feasible set.'
    q = 20
    n = 5
    answer = 84 # there 84 partitions of 20 having 5 parts
    sample_size = 1000
    names = ['divide_and_conquer','multiplicity','top_down','bottom_up']
    for name in names:
        passtest = 0
        feasibleset = []
        D = {}
        while passtest < 1:
            partitions = parts.rand_partitions(q, n, sample_size, name, D, zeros=False)
            partitions = [list(x) for x in set(tuple(x) for x in partitions)]
            feasibleset.extend(partitions)
            feasibleset = [list(x) for x in set(tuple(x) for x in feasibleset)]
            if len(feasibleset) == answer:
                print 'entire feasible set found using',name,'PASS'
                passtest +=1
            
            elif len(feasibleset) > answer:
                print 'feasible set is larger than it should be. FAIL.'
                break
            else:
                print 'working to randomly generate entire feasible set for q='+str(q)+' n='+str(n)
            
    return
def time_trials_sage():
    """ Compare the speed of the random partitioning function of Sage to the
    functions developed in Locey and McGlinn (2013), for cases when zero
    values are and are not allowed."""

    fig = plt.figure()
    algorithms = ['multiplicity', 'top_down', 'divide_and_conquer', 'bottom_up']
    LABELS = ['Multiplicity:', 'Top down:', 'Divide and conquer:', 'Bottom up:']
    COLORS = ['#00CED1', '#FF1493', 'k', 'gray']
    qs = [50, 100, 150, 200]
    sample_size = 300
    
    for i, q in enumerate(qs):
    
        ax = fig.add_subplot(2,2,i+1)
        print '\n',q
        
        step = int(q/10)
        ns = range(step, int(0.5*q)+step,step)
        
        Sage_times = []
        for n in ns:    
            print 'Sage', n
            
            with Timer() as t:
                x = rand_part_Sage(q, n, sample_size)
            Sage_times.append(round(t.interval,2))
        
        print 'Sage', q, Sage_times, '\n'
        
        for ii, name in enumerate(algorithms):
            times = []
            for n in ns:
                zeros=False
                D = {}    
                with Timer() as t:
                    x = parts.rand_partitions(q, n, sample_size, name, D, zeros)
                times.append(round(t.interval,2))
            
            Y = []
            for iii, t in enumerate(times):
                if Sage_times[ii] > 0.0:
                    Y.append(float(Sage_times[iii])/t)
                else:
                    Y.append(1.0)
            
            plt.plot(ns, Y, lw=3, color=COLORS[ii], label=LABELS[ii])
            plt.yscale('log')
            
        if i == 0:
            plt.text(1.5,11,'Sage/algorithm',fontsize=10,rotation='90')
            legend(bbox_to_anchor=(-0.03, 1.1, 2.25, .2), loc=10, ncol=4, mode="expand",prop={'size':9})#, borderaxespad=0.)
        
        plt.tick_params(axis='both', which='major', labelsize=8)
        plt.xlabel('number of parts',fontsize=10)
        
    plt.savefig('time_trials_sage.png', dpi=500) #, pad_inches=0)
    return
def time_trials_bigQ(  ):
    """ Compare the speed of the random partitioning functions developed
    in Locey and McGlinn (2013), for cases when zero values are and
    are not allowed. The code generates figure ? of the Appendix from
    Locey and McGlinn (2013) """
    
    fig = plt.figure()
    
    sample_size=1
    zeros = False
    qs = [5000, 20000, 200000, 500000] # at these values, this script will take >1 day to run
    
    for i, q in enumerate(qs):
        ax = fig.add_subplot(2,2,i+1)
        if i==0:
            algorithms = ['multiplicity','top_down','divide_and_conquer','bottom_up']
            LABELS = ['Multiplicity:','Top down:','Divide and conquer:','Bottom up:']
            COLORS = ['#00CED1','#FF1493','k','gray']
            ns = [10,20,40,80]
           
        if i==1:
            algorithms = ['multiplicity','top_down','divide_and_conquer']
            LABELS = ['Multiplicity:','Top down:','Divide and conquer:']
            COLORS = ['#00CED1','#FF1493','k']
            ns = [20,40,80,160]
        
        elif i==2:
            algorithms = ['multiplicity','top_down']
            LABELS = ['Multiplicity:','Top down:']
            COLORS = ['#00CED1','#FF1493']
            ns = [40,80,160,320]
            
        elif i==3:
            algorithms = ['multiplicity']
            LABELS = ['Multiplicity:']
            COLORS = ['#00CED1']
            ns = [80,160,320,640]
            
        for ii, name in enumerate(algorithms):
            times = []
            for n in ns:
                D = {}
                with Timer() as t:
                    x = parts.rand_partitions(q, n, sample_size, name, D, zeros=False)
                times.append(round(t.interval,2))
            
            plt.plot(ns,times,color=COLORS[ii],lw=3,label=LABELS[ii])
            
        plt.xlabel('number of parts, sample size='+str(sample_size)+')',fontsize=8)    
        plt.tick_params(axis='both', which='major', labelsize=8)
        
        if i==0 or i==2:
                plt.ylabel("Seconds",fontsize=10)
                if i==0:
                    legend(bbox_to_anchor=(0.0, 1.1, 2.2, .2), loc=10, ncol=4, mode="expand",prop={'size':9})#, borderaxespad=0.)
    
    plt.savefig('time_trials_bigQ.png', dpi=500)#, pad_inches=0)
    return
Exemple #4
0
def get_var_for_Q_N(q, n, sample_size, t_limit, analysis):
    """Given q and n, returns a list of variance of length sample size with variance of 
    
    each sample partitions or compositions.
    
    """
    QN_var = []
    try:
        with time_limit(t_limit):
            for Niter in range(sample_size):
                if analysis == 'partition':
                    QN_parts = parts.rand_partitions(q, n, 1, 'bottom_up', {}, True)
                else: QN_parts = rand_compositions(q, n, 1, True)
                QN_var.append(np.var(QN_parts[0], ddof = 1))
            return QN_var
    except TimeoutException, msg:
        print 'Timed out!'
        return QN_var
def get_all_SSADs(qnlist): # Figure X Locey and McGlinn (2013)      
    
    s_size = 300 
    i = 1
    fig = plt.figure()
    q = qnlist[0]
    nlist = qnlist[1]
    
    colors = ['#00CED1', '#FF1493', 'gray']
    
    for ind, num in enumerate(nlist):
        ax = fig.add_subplot(3,3,i)
        
        n = num  
        print i
        ct = 0
        clr = colors[ind]
        
        sample_size = s_size
        zeros = True    
        D = {}
        name = 'divide_and_conquer'
        SSADs = parts.rand_partitions(q, n, sample_size, name, D, zeros)
        
        for SSAD in SSADs:
            Y = vectorTohist(SSAD, zeros)
            
            while len(Y) > 40:
                Y.pop()   
            while len(Y) < 40:
                Y.append(0)
                
            x = range(0,40)
            plt.bar(x,Y, color=clr, linewidth=0, align='center', alpha = 0.015)
                
        plt.bar([0],[0], color=clr, linewidth=0, align='center', label= 'q='+str(q)+', n='+str(n))      
        ct+=1           
        plt.xlim(-1,8)
        plt.xlabel("Abundance class",fontsize=8)
        plt.ylabel("Frequency",fontsize=8)
        plt.tick_params(axis='both', which='major', labelsize=5)
        plt.setp(ax, xticks=[0,1,2,3,4,5,6,7,8])
        leg = plt.legend(loc=1,prop={'size':7})
        leg.draw_frame(False)
          
        i+=1
        
        ct = 0
        ax = fig.add_subplot(3,3,i)
        print i
        clr = colors[ind]
        sample_size = s_size
        zeros = True    
        D = {}
        name = 'divide_and_conquer'
        partitions = parts.rand_partitions(q, n, sample_size, name, D, zeros)
            
        skews = []
        for partition in partitions:
            skews.append(stats.skew(partition))
        
        D = get_kdens(skews)
        plt.plot(D[0],D[1],color = clr,lw=3, alpha = 0.99,label= 'n='+str(n))
        plt.xlabel("Skewnness",fontsize=8)
        
        plt.tick_params(axis='both', which='major', labelsize=5)
        plt.ylabel("Density",fontsize=8)
        i+=1
                
        ct = 0
        ax = fig.add_subplot(3,3,i)
        print i
        clr = colors[ind]
        sample_size = s_size
        zeros = False    
        D = {}
        name = 'divide_and_conquer'
        
        RADs = parts.rand_partitions(q, n, sample_size, name, D, zeros)
            
        ranks = range(1,n+1)
        max_ab = 0
        varlist = []
        for rad in RADs:
            log_rad = list(np.log(rad))
            
            variance = np.var(rad, ddof=1)
            varlist.append(variance)
            
            if max(rad) > max_ab: max_ab = max(rad)
            plt.plot(ranks,rad, color=colors[ind], lw=1.0,alpha=0.04)    
        print ' log(mean) vs. log(variance):', np.log(q/n), np.log(np.mean(varlist))
        plt.tick_params(axis='both', which='major', labelsize=5)
        plt.yscale('log')
        plt.xlabel("Rank",fontsize=8)
        plt.ylabel("Abundance",fontsize=8)
        i+=1
        
        
    plt.subplots_adjust(wspace=0.4, hspace=0.12)    
    plt.savefig('SSADfig-'+str(q)+'-'+str(n)+'.png', dpi=600, bbox_inches = 'tight', pad_inches=0.01)
    print 'done'
def kdens_unbias(): 
    """ The code below compares random partitioning nplottions of Sage and Locey and McGlinn (2013)
    to full feasible sets. These analyses confirm that the algorithms are unbiased. The code
    uses full feasible sets, the random partitioning function in Sage, and the random partitioning
    for cases when 0' are or are not allowed."""

    algs = ['multiplicity','top_down','divide_and_conquer','bottom_up']
    colors = ['#00CED1','#FF1493','k','gray']

    fig = plt.figure()
    nplot = 1 # a variable used to designate subplots
    sample_size = 10000 # min number of macrostates needed to safely capture distributional
                      # features across the feasible set
    
    metrics = ['gini', 'variance', 'median', 'skewness', 'evar']
    metric = metrics[2]
        
    while nplot <= 4:
        ax =fig.add_subplot(2,2,nplot)

        if nplot < 3:
            q = 50 # values of q and n small enough to generate 
            n = 10 # the entire feasible set
        else:
            q = 100 # values of q and n requiring random samples
            n = 20  # of feasible sets
        
        partitions = []
        for i, alg in enumerate(algs):
            if nplot == 1 or nplot == 3:
                zeros = False
                D = {}
                partitions = parts.rand_partitions(q, n, sample_size, alg, D, zeros)
            else:
                D = {}
                zeros = True
                partitions = parts.rand_partitions(q, n, sample_size, alg, D, zeros)
                
            kdens = mt.get_kdens_obs(partitions, metric)
            plt.xlim(min(kdens[0]), max(kdens[0]))
            plt.plot(kdens[0], kdens[1], color=colors[i], lw=0.7)
            
        if nplot == 1: # using the full feasible set, no zero values (i.e. proper integer partitions)
            
            partitions = []
            numparts = parts.NrParts(q, n)    
            partition = parts.first_lexical(q, n, None)
            partitions.append(partition)
            ct2 = 0
            while len(partitions) < numparts:
                    
                partition = parts.next_restricted_part(partition)
                if len(partition) == n: partitions.append(partition) 
                else:
                    print 'bug in next_restricted_part()'
                    sys.exit()    
                
            kdens = mt.get_kdens_obs(partitions, metric)
            plt.xlim(min(kdens[0]), max(kdens[0]))
            plt.plot(kdens[0], kdens[1], color='r', lw=3.0, alpha=0.5)
                
        elif nplot == 2: # using the full feasible set, zero values included
            partitions = []    
                
            for p in Partitions(q):
                partition = list(p)
                
                if len(partition) == n:
                    partitions.append(partition) 
                    
                elif len(partition) < n:
                    zeros = [0]*(n-len(partition))
                    partition.extend(zeros)
                    partitions.append(partition)
            
            kdens = mt.get_kdens_obs(partitions, metric)
            plt.xlim(min(kdens[0]), max(kdens[0]))
            plt.plot(kdens[0], kdens[1], color='r', lw=3.0, alpha=0.5)
    
        elif nplot == 3: 
            partitions = []
            while len(partitions) < sample_size: # Use the random partition nplottion in Sage to generate partitions for q and n
                partition = Partitions(q).random_element()
                if len(partition) == n:
                    partitions.append(partition)
                     
                else:
                    partition = parts.conjugate(partition)
                    if len(partition) == n:
                        partitions.append(partition)
                             
            kdens = mt.get_kdens_obs(partitions, metric)
            plt.xlim(min(kdens[0]), max(kdens[0]))
            plt.plot(kdens[0], kdens[1], color='r', lw=3.0, alpha=0.5)
        
        elif nplot == 4:
            partitions = []
            while len(partitions) < sample_size: # Use the random partition nplottion in Sage to generate partitions for q and n
                part = list(Partitions(q).random_element())
                if len(part) == n:
                    partitions.append(part)
                
                elif len(part) < n:
                    zeros = [0]*(n - len(part))
                    part.extend(zeros)
                    partitions.append(part)
                
            kdens = mt.get_kdens_obs(partitions, metric)
            plt.xlim(min(kdens[0]), max(kdens[0]))
            plt.plot(kdens[0], kdens[1], color='r', lw=3.0, alpha=0.5)
                     
        if nplot == 1:
            plt.plot([0],[0], color='#00CED1', lw=2, label = 'Multiplicity')
            plt.plot([0],[0], color='#FF1493',lw=2, label='Top-down')    
            plt.plot([0],[0], color='k',lw=2, label='Divide & Conquer')
            plt.plot([0],[0], color='gray',lw=2, label='Bottom-up')
            plt.plot([0],[0], color='r',lw=2, label='FS q='+str(q)+', n='+str(n),alpha=0.5)
            plt.legend(bbox_to_anchor=(-0.02, 1.00, 2.24, .2), loc=10, ncol=5, mode="expand",prop={'size':8})#, borderaxespad=0.)
            
        if nplot == 1 or nplot == 3:
            plt.ylabel("density", fontsize=12)    
        
        if nplot == 3 or nplot == 4:
            plt.xlabel(metric, fontsize=12)
        
        print nplot
        nplot+=1
        
        plt.tick_params(axis='both', which='major', labelsize=8)
        
    plt.savefig('kdens_'+metric+'_'+str(sample_size)+'.png', dpi=500, pad_inches=0)
Exemple #7
0
def bias_check():

    print '\nTesting algorithms for bias across combinations of q and n, allowing or excluding zero-valued parts.'
    qn_combos = [[50,10],[100,20],[200,40]]
    
    for combo in qn_combos:
        q = combo[0]
        n = combo[1]
        
        sagepartitions = []     
        DATA = open('./testfiles/sage_zeros_q=' + str(q) + '_n='+str(n)+'.txt','r')
        for line in DATA:
            partition = eval(line)
            sagepartitions.append(partition)
        sagevars = []
        for partition in sagepartitions:
            var = np.var(partition, ddof=1)
            sagevars.append(var)
              
        names = ['divide_and_conquer','multiplicity','top_down','bottom_up']
        for name in names:
            D = {}
            sample_size = len(sagepartitions)
            
            passes = 0
            tries = 0
            while tries < 10:
                partitions = parts.rand_partitions(q, n, sample_size, name, D, zeros=True)
                #partitions = [list(x) for x in set(tuple(x) for x in partitions)]
            
                myvars = []
                for partition in partitions:
                    var = np.var(partition, ddof=1)
                    myvars.append(var)
                
                tries+=1
                ks, p = stats.ks_2samp(sagevars, myvars)
                #t, p = stats.ttest_ind(sagevars, myvars)
                if p > 0.05:
                    passes+=1
            print name,'(q =', q,' n =',n,') with zeros: PASSED', passes,'out of',tries 
            
        DATA.close()  
        
        sagepartitions = []     
        DATA = open('./testfiles/sage_q=' + str(q) + '_n='+str(n)+'.txt','r')
        for line in DATA:
            partition = eval(line)
            sagepartitions.append(partition)
        sagevars = []
        for partition in sagepartitions:
            var = np.var(partition, ddof=1)
            sagevars.append(var)
              
        names = ['divide_and_conquer','multiplicity','top_down','bottom_up']
        for name in names:
            D = {}
            sample_size = len(sagepartitions)
            
            passes = 0
            tries = 0
            while tries < 10:
                partitions = parts.rand_partitions(q, n, sample_size, name, D, zeros=False)
                #partitions = [list(x) for x in set(tuple(x) for x in partitions)]
            
                myvars = []
                for partition in partitions:
                    var = np.var(partition, ddof=1)
                    myvars.append(var)
                
                tries+=1
                ks, p = stats.ks_2samp(sagevars, myvars)
                #t, p = stats.ttest_ind(sagevars, myvars)
                if p > 0.05:
                    passes+=1
            print name,'(q =', q,' n =',n,') no zeros: PASSED', passes,'out of',tries
            
        DATA.close()           
    return
Exemple #8
0
        for q in qs:
            
            if q <= 100: step = 1
            elif q <= 300: step = 5
            elif q <= 600: step = 10
            else: step = 20
            
            n = int(step)
                
            while n <= q:  
                print alg, zero, alg, q
                
                times = []
                D = {}    
                t0 = time.time()
                x = parts.rand_partitions(q, n, sample_size, alg, D, zeros)
                t = time.time() - t0
                times.append([n,t])
                
                if zero == True:
                    OUT = open('time_files/Python_' + alg + '_zeros_q=' + str(q) + '.txt','a+')
                elif zero == False:
                    OUT = open('time_files/Python_' + alg + '_q=' + str(q) + '.txt','a+')
                
                for i in times:
                    print>>OUT, i[0], i[1]
                
                OUT.close()
                n+=step