Ejemplo n.º 1
0
def merge(dataset1, dataset2):
    ''' Try to merge the given dataset into one

    '''
    ds1_map = function.get_mapping(dataset1)
    ds2_map = function.get_mapping(dataset2)
    keys = function.compare(ds1_map, ds2_map)
    if not len(keys) == 1:
        logging.warning('Only one dimension can be different. Did not merge')
        return
    logging.info('Merging datasets: %s and %s' % (dataset1.name,
        dataset2.name))
    key = keys[0]
    outmap = ds1_map
    for value in ds2_map[key]:
        outmap[key].append(value)
    combined_data = concatenate((dataset1[...], dataset2[...]), key)
    
    # copy doesn't work (because of changed shape)
    group = dataset1.parent
    name = dataset1.name
    dtype = dataset1.dtype
    attrs = dataset1.attrs.items()
    del group[dataset1.name]
    ds = group.create_dataset(name, shape=combined_data.shape,
            dtype=dtype)
    ds[...] = combined_data
    for key, value in attrs:
        ds.attrs[key] = value
    ds.attrs['mapping'] = pickle.dumps(outmap)
    del group[dataset2.name]
Ejemplo n.º 2
0
    def __call__(self, input_cubes, output_cube_names, params):
        ''' Collapse one dimension of the input cube with a method given by
        params. Store the output cube in the correct hdf project.

        '''
        if not len(params) == 2:
            logging.error('Please give a dimension to collapse and a method')
            raise ValueError('Please give a dimension to collapse and a method')
        if not len(input_cubes) == 1:
            logging.error('Please give exactly one cube!')
            raise ValueError('Please give exactly one cube!')

        collapse_dim, method = params
        cube = input_cubes[0]
        mapping = function.get_mapping(cube)
        if not collapse_dim in mapping.values():
            logging.error('The collapse dimension is not in the input cube')
            raise ValueError('The collapse dimension is not in the input cube')

        out_mapping = dict()
        for key, value in mapping.items():
            if value < collapse_dim:     
                out_mapping[key] = value
            elif value > collapse_dim:
                out_mapping[key] = value - 1 # no gaps please 


        logging.info('Creating new group: %s', output_cube_names[0])
        out_cube = cube.parent.create_group(output_cube_names[0])
        logging.debug('Copying attributes')
        for key in cube.attrs.keys():
            out_cube.attrs[key] = cube.attrs[key]

        out_cube.attrs['mapping'] = pickle.dumps(out_mapping)

        logging.debug('Create new datasets')
        for i in xrange(len(cube)):
            name = str(i)
            dset = cube[name]
            data = calc_data(dset, method, collapse_dim)
            ds = out_cube.create_dataset(name, shape=data.shape,
                    dtype=data.dtype)
            ds[...] = data
            ds_mapping = dict()
            for key, value in function.get_mapping(dset).items():
                if key < collapse_dim:     
                    ds_mapping[key] = value
                elif key > collapse_dim:     
                    ds_mapping[key-1] = value # no gaps please
            ds.attrs['mapping'] = pickle.dumps(ds_mapping)
Ejemplo n.º 3
0
def join_cubes(out_cube, cubes):
    ''' Join the given cubes in the group
    
    '''
    while len(cubes) > 1:
        #common elements
        logging.info('Joining cubes: %s and %s' % (cubes[-1].name,
            cubes[-2].name))
        for i, (ds1, ds2) in enumerate(zip(cubes[-2].values(),
            cubes[-1].values())):
            logging.debug('Concatenating datasets: %s and %s' % (ds1.name,
                ds2.name))
            ds1_map = function.get_mapping(ds1)
            ds2_map = function.get_mapping(ds2)
            keys = function.compare(ds1_map, ds2_map)
            if not len(keys) == 1:
                logging.error('Only one dimension can be different. Different'
                        ' dimensions are %s' % keys)
                raise ValueError
            key = keys[0]
            outmap = ds1_map
            for value in ds2_map[key]:
                outmap[key].append(value)
            combined_data = concatenate((ds1[...], ds2[...]), key)
            try:
                del out_cube[str(i)]
            except KeyError:
                pass

            ds = out_cube.create_dataset(str(i), shape=combined_data.shape,
                    dtype=ds1.dtype)
            ds[...] = combined_data
            for key, value in ds1.attrs.items():
                ds.attrs[key] = value
            ds.attrs['mapping'] = pickle.dumps(outmap)

        #append non common elements
        (len_a, cube_a), (len_b, cube_b) = sorted((len(c), c) for c in
                cubes[-2:])
        for i in xrange(len_a, len_b):
            name = str(i)
            out_cube.copy(cube_b[name].name, name)

        cubes[-2] = out_cube
        cubes.pop()
    for combination in set(combinations(cubes[0].values(), 2)):
        cube1, cube2 = combination
        merge(cube1, cube2)
Ejemplo n.º 4
0
    def __call__(self, input_cubes, output_cubes, params):
        ''' 
            
        '''
        if not input_cubes or len(input_cubes) < 2:
            logging.error('input_cubes must be a list of at least two input'
                    ' cubes.')
            raise ValueError

        first_grp_mapping = function.get_mapping(input_cubes[0])
        for cube in input_cubes[1:]:
            if not first_grp_mapping == function.get_mapping(cube):
                logging.error('All cubes must have the same dimension_labels.')
                raise ValueError
       
        out_cube = input_cubes[0].parent.create_group(output_cubes[0])
        for key in input_cubes[0].attrs.keys():
            out_cube.attrs[key] = input_cubes[0].attrs[key]
        join_cubes(out_cube, input_cubes)