def srazsae_import(path, day_from, day_to): # WeatherObservation.objects.all().delete() if day_from is None or day_to is None: raise Exception("Wrong date range") get_or_create_processes() get_or_create_props() measure = Process.objects.get(name_id="measure") air_temperature = Property.objects.get(name_id="air_temperature") precipitation = Property.objects.get(name_id="precipitation") content = tuple(default_storage.listdir(path)) for con in content: folder_name = os.path.basename(os.path.dirname(con.object_name)) try: date_from_name = datetime.strptime(folder_name, "%Y%m%d") if day_from <= date_from_name < day_to: files = tuple(default_storage.listdir(con.object_name)) for f in files: file_name = os.path.basename(f.object_name) if file_name == "srazsae.dat": print("Importing srazsae.dat from") print(folder_name) print("working...") csv_file = default_storage.open(name=f.object_name, mode='r') foo = csv_file.data.decode('utf-8') reader = csv.reader(io.StringIO(foo), delimiter=" ") rows = list(reader) for rid_x, row in enumerate(rows, 1): weather_station = WeatherStation.objects.get_or_create( id_by_provider=row[0], name=row[0] )[0] parsed = parse(row[2] + " " + row[3]) time = parsed.astimezone(UTC_P0100) if row[1] == '32': dt_range = DateTimeTZRange(time, time, bounds="[]") observed_property = air_temperature else: observed_property = precipitation dt_range = DateTimeTZRange(time, time + timedelta(hours=1), bounds="[)") WeatherObservation.objects.get_or_create( phenomenon_time_range=dt_range, observed_property=observed_property, feature_of_interest=weather_station, procedure=measure, result=row[5] ) else: continue except Exception as e: print(e) print("Done!")
def handle(self, *args, **options): get_or_create_huaihe_stations() get_or_create_processes() get_or_create_props() arg_path = options['path'] # Observation.objects.all().delete() if arg_path is None: raise Exception("No path to folder defined!") process = Process.objects.get(name_id='measure') path = os.path.join(settings.IMPORT_ROOT, arg_path, '') file_count = 0 listed = default_storage.listdir(path) # files = len(listed) for filename in listed: char_idx = filename.object_name.rfind('/') + 1 file_csv_name = filename.object_name[char_idx:-4] file_count += 1 file_property = Property.objects.get(name_id=file_csv_name.lower()) if file_property is None: print('Error: no property exists to match the file: {}'.format( file_csv_name)) continue print('Processing | Name: {} | File: {}'.format( file_property, file_count)) path = filename.object_name csv_file = default_storage.open(name=path, mode='r') foo = csv_file.data.decode('UTF-8') reader = csv.reader(io.StringIO(foo), delimiter=',') rows = list(reader) rows.pop(0) for row in rows: station = SamplingFeature.objects.get(id_by_provider=row[0]) time_start = parse_datetime(row[2]) time_end = time_start + timedelta(1) time_range = DateTimeTZRange(time_start, time_end) result = float(row[3]) observation = Observation(phenomenon_time_range=time_range, observed_property=file_property, feature_of_interest=station, procedure=process, result=result) observation.save() return
def handle(self, *args, **options): get_or_create_processes() get_or_create_props() print('Metadata for EventObservations created')
def load(station, day): """Load and save ALA observations for given station and date.""" get_or_create_processes() process = Process.objects.get(name_id='measure') from_naive = datetime.combine(day, datetime.min.time()) to_naive = datetime.combine(day + timedelta(1), datetime.min.time()) from_aware = from_naive.replace(tzinfo=UTC_P0100) to_aware = to_naive.replace(tzinfo=UTC_P0100) from_s = int(from_aware.timestamp()) to_s = int(to_aware.timestamp()) url = 'http://a.la-a.la/chart/data_csvcz.php?probe={}&t1={}&t2={}'.format( station.id_by_provider, from_s, to_s) logger.info('Downloading {}'.format(url)) props = get_or_create_props() with closing(requests.get(url, stream=True)) as r: reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'utf-8'), delimiter=';') rows = list(reader) num_rows = len(rows) expected_rows = 24 * 60 * 60 // \ station_interval[station.id_by_provider] + 1 if num_rows != expected_rows: logger.warning("Expected {} rows, found {}. Station {}.".format( expected_rows, num_rows, station.id_by_provider)) prev_time = None for ridx, row in enumerate(rows, 1): time = parse(row[0], dayfirst=True).replace(tzinfo=UTC_P0100) for prop in props: if prev_time is None and prop.name_id == 'precipitation': continue if ridx == num_rows and prop.name_id != 'precipitation': continue time_from = \ prev_time if prop.name_id == 'precipitation' else time time_to = time time_range_boundary = '[]' if time_from == time_to else '[)' pt_range = DateTimeTZRange(time_from, time_to, time_range_boundary) if (prop.name_id not in props_to_provider_idx[station.id_by_provider]): continue prop_idx = \ props_to_provider_idx[station.id_by_provider][prop.name_id] res_str = row[prop_idx].replace(',', '.') if res_str == '': result = None result_null_reason = 'empty string in CSV' else: try: result = Decimal(res_str) result_null_reason = '' except Exception as e: result = None result_null_reason = 'invalid string in CSV' logger.error(e) if result is None: logger.warning( "Result_null_reason of measuring, station {}, " "property {}, phenomenon time {}: {}".format( station.id_by_provider, prop.name_id, time_from, result_null_reason)) try: defaults = { 'phenomenon_time_range': pt_range, 'observed_property': prop, 'feature_of_interest': station, 'procedure': process, 'result': result, 'result_null_reason': result_null_reason } obs, created = Observation.objects.update_or_create( phenomenon_time_range=pt_range, observed_property=prop, feature_of_interest=station, procedure=process, defaults=defaults) except IntegrityError as e: pass prev_time = time
def load_hod(day): day = day.strftime("%Y%m%d") get_or_create_processes() get_or_create_props() process = Process.objects.get(name_id='measure') path = basedir_def + day + '/HOD.dat' if default_storage.exists(path): csv_file = default_storage.open(name=path, mode='r') foo = csv_file.data.decode('utf-8') reader = csv.reader(io.StringIO(foo), delimiter=' ') rows = list(reader) for rid_x, row in enumerate(rows, 1): station_id = row[0] code = row[1] measure_date = row[2] measure_time = row[3] measure_id = row[4] result = None result_null_reason = '' try: result = float(row[5]) except Exception as e: logger.warning(e, exc_info=True) result_null_reason = 'invalid value in CSV' pass try: station = WatercourseStation.objects.get(id_by_provider=station_id) except WatercourseStation.DoesNotExist: logger.warning( "WatercourseStation does not exist. Measure with values station_id {}," "code {}, measure_date {}, measure_time {}, measure_id {} not imported".format( station_id, code, measure_date, measure_time, measure_id ) ) station = None pass if station: data_type = props_data_types.get(code) if data_type: try: observed_property = Property.objects.get(name_id=data_type) except Property.DoesNotExist: logger.error('Property with name %s does not exist.', data_type) observed_property = None pass if observed_property: time_str = measure_date + ' ' + measure_time time_from = datetime.strptime(time_str, "%d.%m.%Y %H:%M") pt_range = DateTimeTZRange(time_from, time_from, '[]') try: defaults = { 'phenomenon_time_range': pt_range, 'observed_property': observed_property, 'feature_of_interest': station, 'procedure': process, 'result': result, 'result_null_reason': result_null_reason } WatercourseObservation.objects.update_or_create( phenomenon_time_range=pt_range, observed_property=observed_property, feature_of_interest=station, procedure=process, defaults=defaults ) except IntegrityError as e: print(row) logger.warning( "Error in creating observation from station_id {}," "code {}, measure_date {}, measure_time {}, measure_id {}".format( station_id, code, measure_date, measure_time, measure_id ), exc_info=True) # logger.warning('Error in creating observation from measure %s', measure_id) pass else: logger.error('Unknown measure code %s', code) else: logger.error("Error file path: %s not found", path)
def load_srazsae(day, basedir=basedir_def): day = day.strftime("%Y%m%d") get_or_create_processes() get_or_create_props() measure = Process.objects.get(name_id="measure") air_temperature = Property.objects.get(name_id="air_temperature") precipitation = Property.objects.get(name_id="precipitation") path = basedir + day + '/srazsae.dat' if default_storage.exists(path): csv_file = default_storage.open(name=path, mode='r') foo = csv_file.data.decode('utf-8') reader = csv.reader(io.StringIO(foo), delimiter=" ") rows = list(reader) for rid_x, row in enumerate(rows, 1): try: result = row[5] station_id = row[0] weather_station = WeatherStation.objects.get(id_by_provider=station_id) time_str = row[2] + " " + row[3] parsed = datetime.strptime(time_str, "%d.%m.%Y %H:%M") time = parsed.astimezone(UTC_P0100) if row[1] == '32': dt_range = DateTimeTZRange(time, time, bounds="[]") observed_property = air_temperature else: observed_property = precipitation dt_range = DateTimeTZRange(time, time + timedelta(hours=1), bounds="[)") try: defaults = { 'phenomenon_time_range': dt_range, 'observed_property': observed_property, 'feature_of_interest': weather_station, 'procedure': measure, 'result': result } WeatherObservation.objects.update_or_create( phenomenon_time_range=dt_range, observed_property=observed_property, feature_of_interest=weather_station, procedure=measure, defaults=defaults ) except IntegrityError as e: logger.warning( "Error in creating srazsae observation from station_id {}," "measure_date {}, measure_id {}".format( station_id, time, row[4] ), exc_info=True) pass except WeatherStation.DoesNotExist: print('Error STATION WITH ID NOT FOUND: ', row[0]) else: logger.error("Error file path: %s not found", path)
def handle(self, *args, **options): start = time.time() stations = get_or_create_ozp_stations() properties = get_or_create_props() processes = get_or_create_processes() arg = options['path'] # Observation.objects.all().delete() if arg is None: raise Exception("No path to folder defined!") else: ozp_process = None for process in processes: if process.name_id == 'measure': ozp_process = process break path = os.path.join(settings.IMPORT_ROOT, arg, '') file_count = 0 listed = default_storage.listdir(path) # files = len(listed) for filename in listed: char_ix = filename.object_name.rfind('/') + 1 file_csv_name = filename.object_name[char_ix:-4] file_count += 1 file_stations = [] file_property = None for prop in properties: if prop.name_id == file_csv_name.lower(): file_property = prop break if file_property is None: print('Error: no property exists to match the file: {}'. format(file_csv_name)) continue print('Processing | Name: {} | File: {}'.format( file_property, file_count)) path = filename.object_name csv_file = default_storage.open(name=path, mode='r') foo = csv_file.data.decode('Windows-1250') reader = csv.reader(io.StringIO(foo), delimiter=';') rows = list(reader) i = 0 for row in rows: if i == 0: for indx, data in enumerate(row): for station in stations: if station.id_by_provider == data: file_stations.append(station) else: next_day = False date = row[0] start_hour = str((int(row[1]) - 1)) + ':00' end_hour = str(int(row[1])) + ':00' if end_hour == '24:00': end_hour = '23:59' next_day = True time_start = parse_time(date + ' ' + start_hour) time_end = parse_time(date + ' ' + end_hour) if next_day: time_end = time_end + timedelta(0, 60) time_range = DateTimeTZRange(time_start, time_end) for indx, data in enumerate(row): if indx > 1: station = file_stations[(indx - 2)] if data.find(',') > -1: result = float(data.replace(',', '.')) elif data == '': result = None observation = Observation( phenomenon_time_range=time_range, observed_property=file_property, feature_of_interest=station, procedure=ozp_process, result=result, result_null_reason='empty string in CSV' ) observation.save() continue else: result = float(data) observation = Observation( phenomenon_time_range=time_range, observed_property=file_property, feature_of_interest=station, procedure=ozp_process, result=result) observation.save() i += 1 end = round(((time.time() - start) / 60)) print('Minutes: {}'.format(end)) return