def updateActivityParameterStats(self, parameterCounts): ''' Examine the data for the Activity, compute and update some statistics on the measuredparameters for this activity. Store the historgram in the associated table. ''' a = self.activity for pname in parameterCounts: logger.debug(pname) p = m.Parameter.objects.using(self.dbAlias).get(name=pname) data = m.MeasuredParameter.objects.using(self.dbAlias).filter(parameter=p) numpvar = numpy.array([float(v.datavalue) for v in data]) numpvar.sort() listvar = list(numpvar) logger.debug('parameter: %s, min = %f, max = %f, mean = %f, median = %f, mode = %f, p025 = %f, p975 = %f', p, numpvar.min(), numpvar.max(), numpvar.mean(), median(listvar), mode(numpvar), percentile(listvar, 0.025), percentile(listvar, 0.975)) # Save statistics try: ap, created = m.ActivityParameter.objects.using(self.dbAlias).get_or_create( activity = a, parameter = p, number = len(listvar), min = numpvar.min(), max = numpvar.max(), mean = numpvar.mean(), median = median(listvar), mode = mode(numpvar), p025 = percentile(listvar, 0.025), p975 = percentile(listvar, 0.975) ) if created: logger.info('Created ActivityParameter for parameter.name = %s', p.name) else: m.ActivityParameter.objects.using(self.dbAlias).update( activity = a, parameter = p, number = len(listvar), min = numpvar.min(), max = numpvar.max(), mean = numpvar.mean(), median = median(listvar), mode = mode(numpvar), p025 = percentile(listvar, 0.025), p975= percentile(listvar, 0.975) ) logger.info('Update ActivityParameter for parameter.name = %s', p.name) except IntegrityError: logger.warn('IntegrityError: Cannot create ActivityParameter for parameter.name = %s. Skipping.', p.name) # Compute and save histogram (counts, bins) = numpy.histogram(numpvar,100) logger.info(counts) logger.info(bins) i = 0 for count in counts: try: ##logger.info('Creating ActivityParameterHistogram...') logger.info('count = %d, binlo = %f, binhi = %f', count, bins[i], bins[i+1]) h, created = m.ActivityParameterHistogram.objects.using(self.dbAlias).get_or_create( activityparameter=ap, bincount=count, binlo=bins[i], binhi=bins[i+1]) i = i + 1 ##if created: ## logger.info('Created ActivityParameterHistogram for parameter.name = %s, h = %s', (p.name, h,)) except IntegrityError: logger.warn('IntegrityError: Cannot create ActivityParameter for parameter.name = %s. Skipping.', p.name) logger.info('Updated statistics for activity.name = %s', a.name)
def getTimeSeriesData(self, start_datetime, end_datetime): ''' Return time series of a list of Parameters from a Platform ''' data_dict = defaultdict( lambda: { 'datetime': [], 'lon': [], 'lat': [], 'depth': [], 'datavalue': [], 'units': '', 'p010': '', 'p990': '' }) start_dt = [] end_dt = [] if not self.plotGroup: raise Exception('Must specify list plotGroup') for pln in self.platformName: for g in self.plotGroup: parameters = [x.strip() for x in g.split(',')] parameters_valid = [] try: for pname in parameters: apQS = ActivityParameter.objects.using(self.database) apQS = apQS.filter(activity__platform__name=pln) apQS = apQS.filter(parameter__name=pname) pQS = apQS.aggregate(Min('p010'), Max('p990')) data_dict[pln + pname]['p010'] = pQS['p010__min'] data_dict[pln + pname]['p990'] = pQS['p990__max'] units = apQS.values('parameter__units') data_dict[ pln + pname]['units'] = units[0]['parameter__units'] qs = MeasuredParameter.objects.using(self.database) qs = qs.filter( measurement__instantpoint__timevalue__gte= start_datetime) qs = qs.filter( measurement__instantpoint__timevalue__lte= end_datetime) qs = qs.filter(parameter__name=pname) qs = qs.filter( measurement__instantpoint__activity__platform__name =pln) sdt_count = qs.values_list( 'measurement__instantpoint__simpledepthtime__depth' ).count() qs = qs.values( 'measurement__instantpoint__timevalue', 'measurement__depth', 'measurement__geom', 'datavalue').order_by( 'measurement__instantpoint__timevalue') data_dict[pln + pname]['sdt_count'] = sdt_count # only plot data with more than one point if len(qs) > 0: for rs in qs: geom = rs['measurement__geom'] lat = geom.y lon = geom.x data_dict[pln + pname]['lat'].insert(0, lat) data_dict[pln + pname]['lon'].insert(0, lon) data_dict[pln + pname]['datetime'].insert( 0, rs['measurement__instantpoint__timevalue']) data_dict[pln + pname]['depth'].insert( 0, rs['measurement__depth']) data_dict[pln + pname]['datavalue'].insert( 0, rs['datavalue']) # for salinity, throw out anything less than 20 and do the percentiles manually if pname.find('salinity') != -1: numpvar = np.array( data_dict[pln + pname]['datavalue']) numpvar_filtered = numpvar[numpvar > 20.0] numpvar_filtered.sort() listvar = list(numpvar_filtered) p010 = percentile(listvar, 0.010) p990 = percentile(listvar, 0.990) data_dict[pln + pname]['p010'] = p010 data_dict[pln + pname]['p990'] = p990 # dates are in reverse order - newest first start_dt.append(data_dict[pln + pname]['datetime'][-1]) end_dt.append(data_dict[pln + pname]['datetime'][0]) logger.debug('Loaded data for parameter %s', pname) parameters_valid.append(pname) except Exception: logger.error( '%s not available in database for the dates %s %s', pname, start_datetime, end_datetime) continue if len(parameters_valid) > 0: self.plotGroupValid.append(','.join(parameters_valid)) # get the ranges of the data if start_dt and end_dt: data_start_dt = sorted(start_dt)[0] data_end_dt = sorted(end_dt)[-1] else: #otherwise default to requested dates data_start_dt = start_datetime data_end_dt = end_datetime if self.plotDotParmName not in self.plotGroupValid: # if the dot plot parameter name is not in the valid list of parameters found, switch it to # something else choosing chlorophyll over another matching = [s for s in self.plotGroupValid if "chl" in s] if len(matching) > 0: self.plotDotParmName = matching[0] else: self.plotDotParmName = self.plotGroupValid[0] return data_dict, data_start_dt, data_end_dt
def getTimeSeriesData(self, startDatetime, endDatetime): ''' Return time series of a list of Parameters from a Platform ''' data_dict = defaultdict(lambda: {'datetime': [], 'lon': [], 'lat': [], 'depth': [], 'datavalue':[], 'units':'', 'p010':'', 'p990':''}) start_dt= [] end_dt = [] if not self.plotGroup : raise Exception('Must specify list plotGroup') for pln in self.platformName: for g in self.plotGroup: parameters = [x.strip() for x in g.split(',')] parameters_valid = [] try: for pname in parameters: apQS = ActivityParameter.objects.using(self.database) apQS = apQS.filter(activity__platform__name=pln) apQS = apQS.filter(parameter__name=pname) pQS = apQS.aggregate(Min('p010'), Max('p990')) min, max = (pQS['p010__min'], pQS['p990__max']) data_dict[pln+pname]['p010'] = pQS['p010__min'] data_dict[pln+pname]['p990'] = pQS['p990__max'] units=apQS.values('parameter__units') data_dict[pln+pname]['units'] = units[0]['parameter__units'] qs = MeasuredParameter.objects.using(self.database) qs = qs.filter(measurement__instantpoint__timevalue__gte=startDatetime) qs = qs.filter(measurement__instantpoint__timevalue__lte=endDatetime) qs = qs.filter(parameter__name=pname) qs = qs.filter(measurement__instantpoint__activity__platform__name=pln) sdt_count = qs.values_list('measurement__instantpoint__simpledepthtime__depth').count() qs = qs.values('measurement__instantpoint__timevalue', 'measurement__depth', 'measurement__geom', 'datavalue').order_by('measurement__instantpoint__timevalue') data_dict[pln+pname]['sdt_count'] = sdt_count # only plot data with more than one point if len(qs) > 0: for rs in qs: geom = rs['measurement__geom'] lat = geom.y lon = geom.x data_dict[pln+pname]['lat'].insert(0, lat) data_dict[pln+pname]['lon'].insert(0, lon) data_dict[pln+pname]['datetime'].insert(0, rs['measurement__instantpoint__timevalue']) data_dict[pln+pname]['depth'].insert(0, rs['measurement__depth']) data_dict[pln+pname]['datavalue'].insert(0, rs['datavalue']) # for salinity, throw out anything less than 20 and do the percentiles manually if pname.find('salinity') != -1 : numpvar = np.array(data_dict[pln+pname]['datavalue']) numpvar_filtered = numpvar[numpvar>20.0] numpvar_filtered.sort() listvar = list(numpvar_filtered) p010 = percentile(listvar, 0.010) p990 = percentile(listvar, 0.990) data_dict[pln+pname]['p010'] = p010 data_dict[pln+pname]['p990'] = p990 # dates are in reverse order - newest first start_dt.append(data_dict[pln+pname]['datetime'][-1]) end_dt.append(data_dict[pln+pname]['datetime'][0]) logger.debug('Loaded data for parameter %s' % pname) parameters_valid.append(pname) except Exception, e: logger.error('%s not available in database for the dates %s %s' %(pname, startDatetime, endDatetime)) continue except Exception, e: logger.error('%s not available in database for the dates %s %s' %(pname, startDatetime, endDatetime)) continue if len(parameters_valid) > 0: self.plotGroupValid.append(','.join(parameters_valid))