Example #1
0
    def updateActivityParameterStats(self, parameterCounts):
        '''
        Examine the data for the Activity, compute and update some statistics on the measuredparameters
        for this activity.  Store the historgram in the associated table.
        '''
        a = self.activity
        for pname in parameterCounts:
            logger.debug(pname)
            p = m.Parameter.objects.using(self.dbAlias).get(name=pname)
            data = m.MeasuredParameter.objects.using(self.dbAlias).filter(parameter=p)
            numpvar = numpy.array([float(v.datavalue) for v in data])
            numpvar.sort()
            listvar = list(numpvar)
            logger.debug('parameter: %s, min = %f, max = %f, mean = %f, median = %f, mode = %f, p025 = %f, p975 = %f',
                            p, numpvar.min(), numpvar.max(), numpvar.mean(), median(listvar), mode(numpvar),
                            percentile(listvar, 0.025), percentile(listvar, 0.975))


            # Save statistics
            try:
                ap, created = m.ActivityParameter.objects.using(self.dbAlias).get_or_create(
                                        activity = a,
                                        parameter = p,
                                        number = len(listvar),
                                        min = numpvar.min(),
                                        max = numpvar.max(),
                                        mean = numpvar.mean(),
                                        median = median(listvar),
                                        mode = mode(numpvar),
                                        p025 = percentile(listvar, 0.025),
                                        p975 = percentile(listvar, 0.975)
                                        )

                if created:
                    logger.info('Created ActivityParameter for parameter.name = %s', p.name)
                else:
                    m.ActivityParameter.objects.using(self.dbAlias).update(
                                        activity = a,
                                        parameter = p,
                                        number = len(listvar),
                                        min = numpvar.min(),
                                        max = numpvar.max(),
                                        mean = numpvar.mean(),
                                        median = median(listvar),
                                        mode = mode(numpvar),
                                        p025 = percentile(listvar, 0.025),
                                        p975= percentile(listvar, 0.975)
                                        )
                    logger.info('Update ActivityParameter for parameter.name = %s', p.name)

            except IntegrityError:
                logger.warn('IntegrityError: Cannot create ActivityParameter for parameter.name = %s. Skipping.', p.name)

            # Compute and save histogram
            (counts, bins) = numpy.histogram(numpvar,100)
            logger.info(counts)
            logger.info(bins)
            i = 0
            for count in counts:
                try:
                    ##logger.info('Creating ActivityParameterHistogram...')
                    logger.info('count = %d, binlo = %f, binhi = %f', count, bins[i], bins[i+1])
                    h, created = m.ActivityParameterHistogram.objects.using(self.dbAlias).get_or_create(
                                                    activityparameter=ap, bincount=count, binlo=bins[i], binhi=bins[i+1])
                    i = i + 1
                    ##if created:
                    ##    logger.info('Created ActivityParameterHistogram for parameter.name = %s, h = %s', (p.name, h,))
                except IntegrityError:
                    logger.warn('IntegrityError: Cannot create ActivityParameter for parameter.name = %s. Skipping.', p.name)


        logger.info('Updated statistics for activity.name = %s', a.name)
Example #2
0
    def getTimeSeriesData(self, start_datetime, end_datetime):
        '''
        Return time series of a list of Parameters from a Platform
        '''
        data_dict = defaultdict(
            lambda: {
                'datetime': [],
                'lon': [],
                'lat': [],
                'depth': [],
                'datavalue': [],
                'units': '',
                'p010': '',
                'p990': ''
            })

        start_dt = []
        end_dt = []

        if not self.plotGroup:
            raise Exception('Must specify list plotGroup')

        for pln in self.platformName:
            for g in self.plotGroup:
                parameters = [x.strip() for x in g.split(',')]
                parameters_valid = []
                try:
                    for pname in parameters:

                        apQS = ActivityParameter.objects.using(self.database)
                        apQS = apQS.filter(activity__platform__name=pln)
                        apQS = apQS.filter(parameter__name=pname)
                        pQS = apQS.aggregate(Min('p010'), Max('p990'))
                        data_dict[pln + pname]['p010'] = pQS['p010__min']
                        data_dict[pln + pname]['p990'] = pQS['p990__max']
                        units = apQS.values('parameter__units')
                        data_dict[
                            pln +
                            pname]['units'] = units[0]['parameter__units']

                        qs = MeasuredParameter.objects.using(self.database)
                        qs = qs.filter(
                            measurement__instantpoint__timevalue__gte=
                            start_datetime)
                        qs = qs.filter(
                            measurement__instantpoint__timevalue__lte=
                            end_datetime)
                        qs = qs.filter(parameter__name=pname)
                        qs = qs.filter(
                            measurement__instantpoint__activity__platform__name
                            =pln)
                        sdt_count = qs.values_list(
                            'measurement__instantpoint__simpledepthtime__depth'
                        ).count()
                        qs = qs.values(
                            'measurement__instantpoint__timevalue',
                            'measurement__depth', 'measurement__geom',
                            'datavalue').order_by(
                                'measurement__instantpoint__timevalue')
                        data_dict[pln + pname]['sdt_count'] = sdt_count

                        # only plot data with more than one point
                        if len(qs) > 0:
                            for rs in qs:
                                geom = rs['measurement__geom']
                                lat = geom.y
                                lon = geom.x
                                data_dict[pln + pname]['lat'].insert(0, lat)
                                data_dict[pln + pname]['lon'].insert(0, lon)
                                data_dict[pln + pname]['datetime'].insert(
                                    0,
                                    rs['measurement__instantpoint__timevalue'])
                                data_dict[pln + pname]['depth'].insert(
                                    0, rs['measurement__depth'])
                                data_dict[pln + pname]['datavalue'].insert(
                                    0, rs['datavalue'])

                            # for salinity, throw out anything less than 20 and do the percentiles manually
                            if pname.find('salinity') != -1:
                                numpvar = np.array(
                                    data_dict[pln + pname]['datavalue'])
                                numpvar_filtered = numpvar[numpvar > 20.0]
                                numpvar_filtered.sort()
                                listvar = list(numpvar_filtered)
                                p010 = percentile(listvar, 0.010)
                                p990 = percentile(listvar, 0.990)
                                data_dict[pln + pname]['p010'] = p010
                                data_dict[pln + pname]['p990'] = p990

                            # dates are in reverse order - newest first
                            start_dt.append(data_dict[pln +
                                                      pname]['datetime'][-1])
                            end_dt.append(data_dict[pln +
                                                    pname]['datetime'][0])
                            logger.debug('Loaded data for parameter %s', pname)
                            parameters_valid.append(pname)

                except Exception:
                    logger.error(
                        '%s not available in database for the dates %s %s',
                        pname, start_datetime, end_datetime)
                    continue

                if len(parameters_valid) > 0:
                    self.plotGroupValid.append(','.join(parameters_valid))

        # get the ranges of the data
        if start_dt and end_dt:
            data_start_dt = sorted(start_dt)[0]
            data_end_dt = sorted(end_dt)[-1]
        else:
            #otherwise default to requested dates
            data_start_dt = start_datetime
            data_end_dt = end_datetime

        if self.plotDotParmName not in self.plotGroupValid:
            # if the dot plot parameter name is not in the valid list of parameters found, switch it to
            # something else choosing chlorophyll over another
            matching = [s for s in self.plotGroupValid if "chl" in s]
            if len(matching) > 0:
                self.plotDotParmName = matching[0]
            else:
                self.plotDotParmName = self.plotGroupValid[0]

        return data_dict, data_start_dt, data_end_dt
Example #3
0
    def getTimeSeriesData(self, startDatetime, endDatetime):
        '''
        Return time series of a list of Parameters from a Platform
        '''
        data_dict = defaultdict(lambda: {'datetime': [], 'lon': [], 'lat': [], 'depth': [], 'datavalue':[], 'units':'', 'p010':'', 'p990':''})

        start_dt= []
        end_dt = []

        if not self.plotGroup :
            raise Exception('Must specify list plotGroup')

        for pln in self.platformName:
            for g in self.plotGroup:
                parameters = [x.strip() for x in g.split(',')]
                parameters_valid = []
                try:
                    for pname in parameters:

                        apQS = ActivityParameter.objects.using(self.database)
                        apQS = apQS.filter(activity__platform__name=pln)
                        apQS = apQS.filter(parameter__name=pname)
                        pQS = apQS.aggregate(Min('p010'), Max('p990'))
                        min, max = (pQS['p010__min'], pQS['p990__max'])
                        data_dict[pln+pname]['p010'] = pQS['p010__min']
                        data_dict[pln+pname]['p990'] = pQS['p990__max']
                        units=apQS.values('parameter__units')
                        data_dict[pln+pname]['units'] = units[0]['parameter__units']

                        qs = MeasuredParameter.objects.using(self.database)
                        qs = qs.filter(measurement__instantpoint__timevalue__gte=startDatetime)
                        qs = qs.filter(measurement__instantpoint__timevalue__lte=endDatetime)
                        qs = qs.filter(parameter__name=pname)
                        qs = qs.filter(measurement__instantpoint__activity__platform__name=pln)
                        sdt_count = qs.values_list('measurement__instantpoint__simpledepthtime__depth').count()
                        qs = qs.values('measurement__instantpoint__timevalue', 'measurement__depth', 'measurement__geom', 'datavalue').order_by('measurement__instantpoint__timevalue')
                        data_dict[pln+pname]['sdt_count'] = sdt_count

                        # only plot data with more than one point
                        if len(qs) > 0:
                            for rs in qs:
                                geom = rs['measurement__geom']
                                lat = geom.y
                                lon = geom.x
                                data_dict[pln+pname]['lat'].insert(0, lat)
                                data_dict[pln+pname]['lon'].insert(0, lon)
                                data_dict[pln+pname]['datetime'].insert(0, rs['measurement__instantpoint__timevalue'])
                                data_dict[pln+pname]['depth'].insert(0, rs['measurement__depth'])
                                data_dict[pln+pname]['datavalue'].insert(0, rs['datavalue'])

                            # for salinity, throw out anything less than 20 and do the percentiles manually
                            if pname.find('salinity') != -1 :
                                numpvar = np.array(data_dict[pln+pname]['datavalue'])
                                numpvar_filtered = numpvar[numpvar>20.0]
                                numpvar_filtered.sort()
                                listvar = list(numpvar_filtered)
                                p010 = percentile(listvar, 0.010)
                                p990 = percentile(listvar, 0.990)
                                data_dict[pln+pname]['p010'] = p010
                                data_dict[pln+pname]['p990'] = p990

                            # dates are in reverse order - newest first
                            start_dt.append(data_dict[pln+pname]['datetime'][-1])
                            end_dt.append(data_dict[pln+pname]['datetime'][0])
                            logger.debug('Loaded data for parameter %s' % pname)
                            parameters_valid.append(pname)

                except Exception, e:
                    logger.error('%s not available in database for the dates %s %s' %(pname, startDatetime, endDatetime))
                    continue

                except Exception, e:
                    logger.error('%s not available in database for the dates %s %s' %(pname, startDatetime, endDatetime))
                    continue

                if len(parameters_valid) > 0:
                    self.plotGroupValid.append(','.join(parameters_valid))