def _get_data(cls, config):
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
    #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
    #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
    #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
    #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
    #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
    #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
                #tx_yml = get_safe(config, 'taxonomy')
                #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
                pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

                cnt = calculate_iteration_count(len(parser.sensor_map), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(param_dictionary=pdict)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
                        rdt[name]=d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
                    yield g
            except SlocumParseException as spe:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds = get_safe(config, 'dataset_object')

        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice, str):
                t_slice = eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

            stream_def = get_safe(config, 'stream_def')

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x * max_rec:(x + 1) * max_rec]

                # Make a 'master' RecDict
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                # Assign coordinate values to the RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign data values to the RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x * max_rec:(x + 1) * max_rec]
                    rdt[key] = d

                g = rdt.to_granule()
                yield g

            ds.close()
Example #3
0
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count',
                           SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
                #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
                #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
                #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
                #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
                #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id',
                                    'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(
                    len(parser.data_map[parser.data_map.keys()[0]]), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x * max_rec:(x + 1) *
                                                  max_rec]
                        rdt[name] = d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = rdt.to_granule()
                    yield g
            except SlocumParseException:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
                #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
                #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
                #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
                #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
                #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(len(parser.data_map[parser.data_map.keys()[0]]), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x * max_rec:(x + 1) * max_rec]
                        rdt[name] = d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = rdt.to_granule()
                    yield g
            except SlocumParseException:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        array_len = get_safe(config, 'constraints.array_len',1)

        max_rec = get_safe(config, 'max_records', 1)
        dprod_id = get_safe(config, 'data_producer_id')
        #tx_yml = get_safe(config, 'taxonomy')
        #ttool = TaxyTool.load(tx_yml)
        pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

        arr = npr.random_sample(array_len)
        log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr))
        cnt = calculate_iteration_count(arr.size, max_rec)
        for x in xrange(cnt):
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            d = arr[x*max_rec:(x+1)*max_rec]
            rdt['dummy'] = d
            g = rdt.to_granule()
            yield g
Example #6
0
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        array_len = get_safe(config, 'constraints.array_len', 1)

        max_rec = get_safe(config, 'max_records', 1)
        #dprod_id = get_safe(config, 'data_producer_id')

        stream_def = get_safe(config, 'stream_def')

        arr = npr.random_sample(array_len)

        #log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr))
        cnt = calculate_iteration_count(arr.size, max_rec)
        for x in xrange(cnt):
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            d = arr[x * max_rec:(x + 1) * max_rec]
            rdt['dummy'] = d
            g = rdt.to_granule()
            yield g
    def _get_data(cls, config):
        """
        A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers
        @param config Dict of configuration parameters - must contain ['constraints']['count']
        """
        cnt = get_safe(config,'constraints.count',1)

        max_rec = get_safe(config, 'max_records', 1)
        dprod_id = get_safe(config, 'data_producer_id')
        #tx_yml = get_safe(config, 'taxonomy')
        #ttool = TaxyTool.load(tx_yml)
        pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

        def fibGenerator():
            """
            A Fibonacci sequence generator
            """
            count = 0
            ret = []
            a, b = 1, 1
            while 1:
                count += 1
                ret.append(a)
                if count == max_rec:
                    yield np.array(ret)
                    ret=[]
                    count = 0

                a, b = b, a + b

        gen=fibGenerator()
        cnt = calculate_iteration_count(cnt, max_rec)
        for i in xrange(cnt):
            rdt = RecordDictionaryTool(param_dictionary=pdict)
            d = gen.next()
            rdt['data'] = d
            g = rdt.to_granule()
            yield g
Example #8
0
    def _get_data(cls, config):
        """
        A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers
        @param config Dict of configuration parameters - must contain ['constraints']['count']
        """
        cnt = get_safe(config, 'constraints.count', 1)

        max_rec = get_safe(config, 'max_records', 1)
        #dprod_id = get_safe(config, 'data_producer_id')

        stream_def = get_safe(config, 'stream_def')

        def fibGenerator():
            """
            A Fibonacci sequence generator
            """
            count = 0
            ret = []
            a, b = 1, 1
            while 1:
                count += 1
                ret.append(a)
                if count == max_rec:
                    yield np.array(ret)
                    ret = []
                    count = 0

                a, b = b, a + b

        gen = fibGenerator()
        cnt = calculate_iteration_count(cnt, max_rec)
        for i in xrange(cnt):
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            d = gen.next()
            rdt['data'] = d
            g = rdt.to_granule()
            yield g
    def _get_data(cls, config):
        """
        A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers
        @param config Dict of configuration parameters - must contain ['constraints']['count']
        """
        cnt = get_safe(config, "constraints.count", 1)

        max_rec = get_safe(config, "max_records", 1)
        # dprod_id = get_safe(config, 'data_producer_id')

        stream_def = get_safe(config, "stream_def")

        def fibGenerator():
            """
            A Fibonacci sequence generator
            """
            count = 0
            ret = []
            a, b = 1, 1
            while 1:
                count += 1
                ret.append(a)
                if count == max_rec:
                    yield np.array(ret)
                    ret = []
                    count = 0

                a, b = b, a + b

        gen = fibGenerator()
        cnt = calculate_iteration_count(cnt, max_rec)
        for i in xrange(cnt):
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            d = gen.next()
            rdt["data"] = d
            g = rdt.to_granule()
            yield g
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        parser_mod = get_safe(config, 'parser_mod', '')
        parser_cls = get_safe(config, 'parser_cls', '')
        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                try:
                    #find the new data check index in config
                    index = -1
                    for ndc in config['set_new_data_check']:
                        if ndc[0] == f[0]:
                            index = config['set_new_data_check'].index(ndc)
                            break
                except:
                    log.error('File name not found in attachment')

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(parser.record_count, max_rec)
                file_pos = -1
                for x in xrange(cnt):
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)
                    all_data = {}
                    all_data.clear()
                    for name in parser.sensor_names:
                        all_data[name] = []

                    for y in xrange(max_rec):
                        data_map, file_pos = parser.read_next_data()
                        if len(data_map.items()):
                            for name in parser.sensor_names:
                                all_data[name].append(data_map[name]) #[x * max_rec:(x + 1) * max_rec]

                    for name in parser.sensor_names:
                        rdt[name] = all_data[name]

                    g = rdt.to_granule()

                    #update new data check with the latest file position
                    if 'set_new_data_check' in config and index > -1:
                        config['set_new_data_check'][index] = (f[0], f[1], f[2], file_pos)

                    yield g

                parser.close()

            except HYPMException as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds=get_safe(config, 'dataset_object')
        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0,1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice,str):
                t_slice=eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
            #tx_yml = get_safe(config, 'taxonomy')
            #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
            pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x*max_rec:(x+1)*max_rec]

                # Make a 'master' RecDict
                #rdt = RecordDictionaryTool(taxonomy=ttool)
                rdt = RecordDictionaryTool(param_dictionary=pdict)
                # Make a 'coordinate' RecDict
                #rdt_c = RecordDictionaryTool(taxonomy=ttool)
                #rdt_c = RecordDictionaryTool(param_dictionary=pdict)
                # Make a 'data' RecDict
                #rdt_d = RecordDictionaryTool(taxonomy=ttool)
                #rdt_d = RecordDictionaryTool(param_dictionary=pdict)

                # Assign values to the coordinate RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign values to the data RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x*max_rec:(x+1)*max_rec]
                    rdt[key] = d

                # Add the coordinate and data RecDicts to the master RecDict
                #rdt['coords'] = rdt_c
                #rdt['data'] = rdt_d

                # Build and return a granule
                # CBM: ttool must be passed
                #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict)
                yield g

            ds.close()
 def test_calculate_iteration_count_not_even(self):
     total_recs = 101
     max_rec = 10
     self.assertEqual(calculate_iteration_count(total_recs=total_recs, max_rec=max_rec), 11)
 def test_calculate_iteration_count(self):
     total_recs = 100
     max_rec = 10
     self.assertEqual(calculate_iteration_count(total_recs=total_recs, max_rec=max_rec), 10)
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        parser_mod = get_safe(config, 'parser_mod', '')
        parser_cls = get_safe(config, 'parser_cls', '')
        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                try:
                    #find the new data check index in config
                    index = -1
                    for ndc in config['set_new_data_check']:
                        if ndc[0] == f[0]:
                            index = config['set_new_data_check'].index(ndc)
                            break
                except:
                    log.error('File name not found in attachment')

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id',
                                    'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(parser.record_count, max_rec)
                file_pos = -1
                for x in xrange(cnt):
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)
                    all_data = {}
                    all_data.clear()
                    for name in parser.sensor_names:
                        all_data[name] = []

                    for y in xrange(max_rec):
                        data_map, file_pos = parser.read_next_data()
                        if len(data_map.items()):
                            for name in parser.sensor_names:
                                all_data[name].append(
                                    data_map[name]
                                )  #[x * max_rec:(x + 1) * max_rec]

                    for name in parser.sensor_names:
                        try:
                            rdt[name] = all_data[name]
                        except Exception:
                            log.error('failed to set rdt[%s], all_data=%r',
                                      name, all_data)
                            raise

                    g = rdt.to_granule()

                    #update new data check with the latest file position
                    if 'set_new_data_check' in config and index > -1:
                        config['set_new_data_check'][index] = (f[0], f[1],
                                                               f[2], file_pos)

                    yield g

                parser.close()

            except HYPMException as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
Example #15
0
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds = get_safe(config, 'dataset_object')

        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters[
                'temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters[
                'zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters[
                'meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters[
                'vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice',
                               (slice(0, 1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice, str):
                t_slice = eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

            stream_def = get_safe(config, 'stream_def')

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x * max_rec:(x + 1) * max_rec]

                # Make a 'master' RecDict
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                # Assign coordinate values to the RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign data values to the RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x * max_rec:(x + 1) * max_rec]
                    rdt[key] = d

                g = rdt.to_granule()
                yield g

            ds.close()