Ejemplo n.º 1
0
    def __init__(self, tlb_type, entry_num):
        self.stats = stats.Statistics()
        self.tlb_type = tlb_type
        if tlb_type == Structure.FULLY_ASSOCIATIVE:
            # a fully associative cache has multiple blocks in one set
            self.set_num = entry_num
            # We do not need an index for a fully-associative tlb/cache
            self.set_index = 1
            self.content = [tlb_entry.Entry()] * entry_num

        elif tlb_type == Structure.DIRECT_MAP:
            raise NotImplementedError(
                "The direct map tlb is not implemented yet\n")
            self.set_num = 1
            # the index =
            # the total number of entries
            self.set_index = entry_num
            self.init_content_set()

        elif tlb_type == Structure.SET2_ASSOCIATIVE:
            self.set_num = 2
            # the index =
            # the total number of entries / N (number of sets)
            self.set_index = int(entry_num / 2)
            self.init_content_set()

        elif tlb_type == Structure.SET4_ASSOCIATIVE:
            self.set_num = 4
            self.set_index = int(entry_num / 4)
            self.init_content_set()

        else:
            raise NotImplementedError("unimplemented tlb type")
def main():
    print('-------------------------------------')
    print('Machine Learning Foundation - Udacity')
    print('Project #1: Explore US Bikeshare Data')
    print('Student Name: Sugam Khetrapal')
    print('-------------------------------------')

    while True:
        bkshare = BikeShare()
        city_name, month_name, day = bkshare.user_input()
        df = bkshare.load_city_file(city_name, month_name, day)
        statistics = stats.Statistics(df)
        statistics.time_stats()
        statistics.station_stats()
        statistics.trip_duration_stats()
        statistics.user_stats()
        statistics.table_stats(city_name)

        start_again = input("Do you want to start again? (Y/N)").upper()
        if start_again == 'N':
            break
Ejemplo n.º 3
0
 def setUp(self):
     self.stats = stats.Statistics()
Ejemplo n.º 4
0
def tests(var,data,document,met,colors,run=''):
    df=data.copy()
    pk_reads=1
    pk_assemblers=1
    if var!="Ecoli strains":
        if var!='Phred':
            a = df.index.to_series().str.rsplit('_').str[-1].astype(float).sort_values()
            df = df.reindex(index=a.index)
    else:
        d={"Genome":[],"Assembler":[],met:[]}
        df_print=pd.DataFrame(data=d)
        for col in df.T.columns:
            gen=col.split('-')
            gen=gen[0][4:]
            for i,row in enumerate(df.T[col]):
                df_print=df_print.append(pd.DataFrame([[gen,df.T.index[i],row]],columns=["Genome","Assembler",met]), ignore_index=True)
        sns.swarmplot(x="Genome",y=met,hue="Assembler",data=df_print,palette="Set1")
        plt.ylabel(met)
        plt.xticks(rotation=90)
        #5132068,4641652,5437407,4894879
        #iai39,k12,o104,o83
        fig = plt.gcf()
        fig.set_size_inches(12, 8)
        fig.savefig(out+met+'_swarm'+run+'.png', dpi=300)
        fig.clear()
        document.add_picture(out+met+'_swarm'+run+'.png', width=Inches(6.25))
    table_docx(document,df)

    ob=stats.Statistics(df,exp,var,met,"outputs_csv/",samples_int,run)
    if var!="Ecoli strains" and run!='without_zscore':
        if not os.path.isfile(out+met+run+".png"):
            ob.scatter_plot(colors)
        document.add_picture(out+met+run+'.png', width=Inches(6.25))
        
    norms=ob.normality()
    equal=[]
    for item in df.values:
        for each2 in item:
            if each2 not in equal:
                equal.append(each2)
        #check if all the results are not the same. At least one need to be different    
    if len(equal)>1:
        if norms[0] and norms[1]:
            anova=ob.oneway()
            '''anova_assemblers=anova[0].pvalue
            anova_reads=anova[1].pvalue
            if anova_assemblers <0.05 or  anova_reads<0.05:
                #ob.tukey_test()'''
            k=ob.kruskal()
            pk_assemblers=k[0].pvalue
            pk_reads=k[1].pvalue
            if pk_assemblers <0.05 or  pk_reads<0.05:
                nem=ob.nemenyi_test()

        else:
            k=ob.kruskal()
            pk_assemblers=k[0].pvalue
            pk_reads=k[1].pvalue
            if pk_assemblers <0.05 or  pk_reads<0.05:
                nem=ob.nemenyi_test()
                if run!='without_zscore' and var!="Ecoli strains":
                    ob.correlation_pearson()
                    document.add_picture(out+'corr_'+met+run+'.png', width=Inches(6.00))
                    try:
                        ob.linear_regression()                        
                    except:
                        pass                    
    ob.results.close()    
            
    temp=open("outputs_csv/final_stats_"+met+""+run+".txt")
    for line in temp:
        document.add_paragraph(line.strip())
    if pk_assemblers <0.05: 
        document.add_heading("Nemenyi Assemblers",level=5)
        t = document.add_table(nem[0].shape[0]+1, nem[0].shape[1]+1,style='Light Grid Accent 1')
        for j in range(nem[0].shape[-1]):
            t.cell(0,j+1).text = nem[0].columns[j]
        for i in range (len(nem[0].index)):
            t.cell(i+1,0).text=nem[0].index[i]
        # add the rest of the data frame
        for i in range(nem[0].shape[0]):
            for j in range(nem[0].shape[-1]):
                t.cell(i+1,j+1).text = str(nem[0].values[i,j])
    if pk_reads<0.05: 
        document.add_heading("Nemenyi samples",level=5)
        t = document.add_table(nem[1].shape[0]+1, nem[1].shape[1]+1,style='Light Grid Accent 1')
        for j in range(nem[1].shape[-1]):
            t.cell(0,j+1).text = nem[1].columns[j]
        for i in range (len(nem[1].index)):
            t.cell(i+1,0).text=nem[1].index[i]
        # add the rest of the data frame
        for i in range(nem[1].shape[0]):
            for j in range(nem[1].shape[-1]):
                t.cell(i+1,j+1).text = str(nem[1].values[i,j])
    if run!='without_zscore' and var!="Ecoli strains":
        df.index=samples_int
        if not os.path.isfile(out+met+'_plot'+run+'.png'):
            df[var]=samples
            df.plot(x=var, title=met)
            plt.ylabel(met)
            plt.xticks(rotation=90)
                
            fig = plt.gcf()
            fig.set_size_inches(12, 8)
            fig.savefig(out+met+'_plot'+run+'.png', dpi=300)
            fig.clear()
        document.add_picture(out+met+'_plot'+run+'.png', width=Inches(6.25))
            
    if not os.path.isfile(out+met+'_box'+run+'.png'):
        df.plot.box(title=met,whis=3)
        plt.ylabel(met)
        plt.xticks(rotation=90)
        
        fig = plt.gcf()
        fig.set_size_inches(12, 8)
        fig.savefig(out+met+'_box'+run+'.png', dpi=300)
        fig.clear()
            
    document.add_picture(out+met+'_box'+run+'.png', width=Inches(6.25))
    return ob