def test(region_list, compare_file, compare_num):
    """
    using a dataArray datastructure rather than a dataset
    This function also uses a different way to read files into xarray
    so that it minimizes the overhead from to_xarray that is provided in pandas

    """

    new = True
    for file in region_list:
        print(file)
        temp = pd.read_hdf(file)
        temp = xr_conversion.df_to_xr(temp, 
            dims = ['age_group_id', 'sex_id',
            'year_id', 'cause_id', 'location_id'], 
            wide_dim_name = 'draw')
        temp = temp.squeeze('location_id')
        
        if new:
            da = temp
            new = False
        else:
            da = da + temp
     
    da.coords['location_id'] = compare_num
    da = da.expand_dims('location_id')
def test(region_list, compare_file, compare_num):
    """
    aggregating location files together by concatenating locations together and summing their values together
    along the location dimension so that all of the information is aggregated to represent the super region. Each
    time a file is concatenated and summed, the dataset is reindexed to include the location dimension so that 
    new location files can be added and summed in (like sub-saharan africa). To run and compare timing, comment out
    the call to compare_temp
    """
    #data = 0
    #location_id = 166
    #da_dot = xr.DataArray(data, coords=['location_id'], dims=['location_id'])
    #print("Dot Array {dot}".format(dot=da_dot))

    new = True
    for file in region_list:
        print(file)
        temp = pd.read_hdf(file)
        temp = xr_conversion.df_to_xr(temp, dims = ['location_id','age_group_id', 'sex_id', 'year_id', 'cause_id'], wide_dim_name = 'draw') 
        if new:
            da = temp
            new = False
        else:

            da = xr.concat([da, temp], dim='location_id')
            da = da.sum(dim='location_id')
            da.coords['location_id'] = compare_num
Exemple #3
0
def test(region_list, compare_file, compare_num):
    """
    using a dataArray datastructure rather than a dataset
    This function also uses a different way to read files into xarray
    so that it minimizes the overhead from to_xarray that is provided in pandas

    """

    new = True
    for file in region_list:
        t2= time.time()
        temp = pd.read_hdf(file)
        t3 = time.time()
        print("Time to read in an hdf5 file to pandas {time}".format(time=t3-t2))
        t4=time.time()
        temp = xr_conversion.df_to_xr(temp, dims = ['age_group_id', 'sex_id', 'year_id', 'cause_id', 'location_id'], wide_dim_name = 'draw')
        t5 = time.time()
        print("Time to convert from pandas to Xarray dataset {time}".format(time = t5-t4))
        print("Total upload time is {time}".format(time=t5-t2))
        temp = temp.squeeze('location_id')
        print(file)
        
        if new:
            da = temp
            new = False
        else:
            da = da + temp
            
     
    da.coords['location_id'] = compare_num
    da = da.expand_dims('location_id')
def read_hdf_to_netcdf(hdf_list):
	for file in hdf_list:
		print(file)
		df = pd.read_hdf(file)
		da = xr_conversion.df_to_xr(df, dims = ['location_id', 'age_group_id', 'cause_id','sex_id','year_id'], wide_dim_name='draw')
		split = file.split(".")
		netcdf_name = split[0]+".nc"
		da.to_netcdf(netcdf_name)
Exemple #5
0
def read_to_data_array(filelist):
    netcdf_list = []
    for file in filelist:
        t2=time.time()
        df = pd.read_hdf(file)
        da = xr_conversion.df_to_xr(df, dims = ['age_group_id', 'sex_id', 'year_id', 'cause_id', 'location_id'], wide_dim_name = 'draws')
        print(file)
        split = file.split(".")
        netcdf_name = split[0]+".nc"
        da.to_netcdf(netcdf_name)
        t3=time.time()
        netcdf_list.append(netcdf_name)
        print("Time to read and save to netcdf: {time}".format(time=t3-t2))
def compare_temp(aggregated_da, comp_file):
    """
    compares the aggregated values to the sub-saharan country file to make sure that the locations were
    aggregated correctly and the values match. 
    """
    print("In temp_compare function")

    #read in the compare file to an xarray dataset
    comp_df = pd.read_hdf(comp_file)
    comp_da = xr_conversion.df_to_xr(comp_df, dims = ['location_id','age_group_id', 'sex_id', 'year_id', 'cause_id'], wide_dim_name = 'draw')
    #compare = np.isclose(comp_da, aggregated_da, rtol=1e-05, atol=1e-08, equal_nan=True)
    #print("COMPARE VALUE {comp}".format(comp=compare))
    if comp_da.equals(aggregated_da):
        print("EQUAL")
    else:
        print("NOT EQUAL")
    #for el in comp_da:
        #print(comp_da.size())
        #print(el)
        #print(el.sizes())
    #xr.testing.assert_allclose(aggregated_da, comp_da, rtol=1e-02, atol=1e-04)
    #print("done comparing")
    print("Next equality test:")
    good_coords = [
        val for val in aggregated_da.coords['location_id'].values
        if val in comp_da.coords['location_id'].values]
    matched = comp_da.loc[{'location_id': good_coords}]
    print("GOOD COORD VALS: {good}".format(good=good_coords))
    print("MATCHED VALS: {matched}".format(matched=matched))
    
    print("Final test")    
    check = True
    print("AGG {agg}".format(agg=aggregated_da))
    print("COMP {comp}".format(comp=comp_da))
    draw_vals = comp_da.coords['draw'].values
    print("DRAW_VALS {draw}".format(draw=draw_vals))
    for el in draw_vals:
        print("HERE")
        print("VALUE {el}".format(el=el))
        #print(el.length())
        #print("Size {len}".format(len = el.size()))
        
        comp_val = comp_da.__getitem__(el)
        agg_val = aggregated_da.__getitem__(el)
                #print(comp_val)
                #print(agg_val)

            #compares the calculated values for equality within a margin and counts nan values and 0 as equal
        compare = np.isclose(comp_val, agg_val, rtol=1e-05, atol=1e-08, equal_nan=True)
            
            #if it is not equal
        if compare.all() is False:
            print("not the same")
            check = False
            print("COMP {comp}".format(comp=comp_val))
            print("AGG {agg}".format(agg=agg_val))


    if check is True:
        print("ALL EQUAL!!!")
    else:
        print("NOT all equal")
Exemple #7
0
def test(region_list, compare_file, compare_num):
    """
    aggregating location files together by concatenating locations together and summing their values together
    along the location dimension so that all of the information is aggregated to represent the super region. Each
    time a file is concatenated and summed, the dataset is reindexed to include the location dimension so that 
    new location files can be added and summed in (like sub-saharan africa). To run and compare timing, comment out
    the call to compare_temp
    """
    #data = 0
    #location_id = 166
    #da_dot = xr.DataArray(data, coords=['location_id'], dims=['location_id'])
    #print("Dot Array {dot}".format(dot=da_dot))

    new = True
    for file in region_list:
        print(file)
        temp = pd.read_hdf(file)
        temp = xr_conversion.df_to_xr(temp, dims = ['location_id','age_group_id', 'sex_id', 'year_id', 'cause_id'], wide_dim_name = 'draw') 
        #print(temp.sizes)
        #print("COORDS {stuff}".format(stuff=temp.coords['location_id']))
       # temp.coords['location_id'] = compare_num
        #print("AFTER: {coords}".format(coords=temp.coords['location_id']))
        #print(temp.sizes)
        #print("ATTRIBUTES: {att}".format(att=temp.attrs))

        #save = temp.reset_index(dims_or_levels = ['age','sex','year','cause','loc'])
        #print(save)
        #da_dot = temp['location_id']
        #da_dot.coords['location_id'] = 1

        #print("DA_DOT {dot}".format(dot=da_dot))
        #print(temp.sizes)
        #print(da_dot.sizes)
        #print("DATA {data}".format(data=temp.data))
        #print("LOCATION {loc}".format(loc = temp['location_id']))
        #print("DOT ARRAY {dot}".format(dot=da_dot))
        if new == True:
            da = temp
            new = False
            """
            if file == '':
                compare_num=temp['location_id']-1
                temp.coords['location_id'] = compare_num
                print("AFTER: {coords}".format(coords=temp.coords['location_id']))
                print(temp.sizes)
                print("ATTRIBUTES: {att}".format(att=temp.attrs))
            da.coords['location_id'] = compare_num
            """
            #print("DA {size}".format(size = da.sizes))
        else:
            #dot_da = da['location_id'].vals
            #print("DOT ARRAY {dot}".format(dot=dot_da))

            #dot_da = da['location_id']#.squeeze('location_id')
            #dot_da.coords['location_id'] = 1
            #print(dot_da)
            #print(dot_da.sizes)
            #print(dot_da.dims)
           
            #da_dot=da['location_id']
            #temp=temp.squeeze('location_id')
            #temp.coords['location_id'] = compare_num

            da = xr.concat([da, temp], dim='location_id')
            da = da.sum(dim='location_id')

            """
            print("NEXT")
            print(temp.dims)
            print(da.dims)
            #temp['location_id']=1
            good_coords = [
                val for val in da.coords['location_id'].values
                if val in temp.coords['location_id'].values]
            print(good_coords)
            matched = temp.loc[{'location_id': good_coords}]
            print("MATCHED: {match}".format(match = matched))
            da=da.groupby(matched).sum()
            print(da)
            """

            #da = xr.concat([da, temp], dim='location_id')
            #da = da.sum('location_id')
            #da = da.groupby['location_id'].sum()#dim = 'location_id')
            
            #da = da.dot(temp)
            #print(temp.dims)
            
            #print(da)
            #da.reindex_like(save)
            #for dim in save.dims:
            #da=da.expand_dims(dim)

            #da = da.dot(dot_da)
            #dot_da = temp['age_group_id']
            #da = da.dot(dot_da)

            #print("After dot product {da}".format(da=da))
            da.coords['location_id'] = compare_num