Exemple #1
0
def srazsae_import(path, day_from, day_to):

    # WeatherObservation.objects.all().delete()

    if day_from is None or day_to is None:
        raise Exception("Wrong date range")
    get_or_create_processes()
    get_or_create_props()
    measure = Process.objects.get(name_id="measure")
    air_temperature = Property.objects.get(name_id="air_temperature")
    precipitation = Property.objects.get(name_id="precipitation")

    content = tuple(default_storage.listdir(path))
    for con in content:
        folder_name = os.path.basename(os.path.dirname(con.object_name))
        try:
            date_from_name = datetime.strptime(folder_name, "%Y%m%d")
            if day_from <= date_from_name < day_to:
                files = tuple(default_storage.listdir(con.object_name))
                for f in files:
                    file_name = os.path.basename(f.object_name)
                    if file_name == "srazsae.dat":
                        print("Importing srazsae.dat from")
                        print(folder_name)
                        print("working...")
                        csv_file = default_storage.open(name=f.object_name, mode='r')
                        foo = csv_file.data.decode('utf-8')
                        reader = csv.reader(io.StringIO(foo), delimiter=" ")

                        rows = list(reader)

                        for rid_x, row in enumerate(rows, 1):
                            weather_station = WeatherStation.objects.get_or_create(
                                id_by_provider=row[0],
                                name=row[0]
                            )[0]

                            parsed = parse(row[2] + " " + row[3])
                            time = parsed.astimezone(UTC_P0100)
                            if row[1] == '32':
                                dt_range = DateTimeTZRange(time, time, bounds="[]")
                                observed_property = air_temperature
                            else:
                                observed_property = precipitation
                                dt_range = DateTimeTZRange(time, time + timedelta(hours=1), bounds="[)")

                            WeatherObservation.objects.get_or_create(
                                phenomenon_time_range=dt_range,
                                observed_property=observed_property,
                                feature_of_interest=weather_station,
                                procedure=measure,
                                result=row[5]
                            )
            else:
                continue
        except Exception as e:
            print(e)
    print("Done!")
Exemple #2
0
    def handle(self, *args, **options):
        get_or_create_huaihe_stations()
        get_or_create_processes()
        get_or_create_props()
        arg_path = options['path']
        # Observation.objects.all().delete()

        if arg_path is None:
            raise Exception("No path to folder defined!")

        process = Process.objects.get(name_id='measure')

        path = os.path.join(settings.IMPORT_ROOT, arg_path, '')
        file_count = 0
        listed = default_storage.listdir(path)
        # files = len(listed)
        for filename in listed:
            char_idx = filename.object_name.rfind('/') + 1
            file_csv_name = filename.object_name[char_idx:-4]
            file_count += 1
            file_property = Property.objects.get(name_id=file_csv_name.lower())

            if file_property is None:
                print('Error: no property exists to match the file: {}'.format(
                    file_csv_name))
                continue

            print('Processing | Name: {} | File: {}'.format(
                file_property, file_count))
            path = filename.object_name
            csv_file = default_storage.open(name=path, mode='r')
            foo = csv_file.data.decode('UTF-8')
            reader = csv.reader(io.StringIO(foo), delimiter=',')
            rows = list(reader)
            rows.pop(0)
            for row in rows:
                station = SamplingFeature.objects.get(id_by_provider=row[0])
                time_start = parse_datetime(row[2])
                time_end = time_start + timedelta(1)
                time_range = DateTimeTZRange(time_start, time_end)

                result = float(row[3])
                observation = Observation(phenomenon_time_range=time_range,
                                          observed_property=file_property,
                                          feature_of_interest=station,
                                          procedure=process,
                                          result=result)
                observation.save()

        return
Exemple #3
0
 def handle(self, *args, **options):
     get_or_create_processes()
     get_or_create_props()
     print('Metadata for EventObservations created')
Exemple #4
0
def load(station, day):
    """Load and save ALA observations for given station and date."""
    get_or_create_processes()
    process = Process.objects.get(name_id='measure')

    from_naive = datetime.combine(day, datetime.min.time())
    to_naive = datetime.combine(day + timedelta(1), datetime.min.time())

    from_aware = from_naive.replace(tzinfo=UTC_P0100)
    to_aware = to_naive.replace(tzinfo=UTC_P0100)

    from_s = int(from_aware.timestamp())
    to_s = int(to_aware.timestamp())

    url = 'http://a.la-a.la/chart/data_csvcz.php?probe={}&t1={}&t2={}'.format(
        station.id_by_provider, from_s, to_s)

    logger.info('Downloading {}'.format(url))
    props = get_or_create_props()

    with closing(requests.get(url, stream=True)) as r:
        reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'utf-8'),
                            delimiter=';')
        rows = list(reader)
        num_rows = len(rows)
        expected_rows = 24 * 60 * 60 // \
                        station_interval[station.id_by_provider] + 1
        if num_rows != expected_rows:
            logger.warning("Expected {} rows, found {}. Station {}.".format(
                expected_rows, num_rows, station.id_by_provider))
        prev_time = None

        for ridx, row in enumerate(rows, 1):
            time = parse(row[0], dayfirst=True).replace(tzinfo=UTC_P0100)
            for prop in props:
                if prev_time is None and prop.name_id == 'precipitation':
                    continue
                if ridx == num_rows and prop.name_id != 'precipitation':
                    continue
                time_from = \
                    prev_time if prop.name_id == 'precipitation' else time
                time_to = time
                time_range_boundary = '[]' if time_from == time_to else '[)'
                pt_range = DateTimeTZRange(time_from, time_to,
                                           time_range_boundary)
                if (prop.name_id
                        not in props_to_provider_idx[station.id_by_provider]):
                    continue
                prop_idx = \
                    props_to_provider_idx[station.id_by_provider][prop.name_id]
                res_str = row[prop_idx].replace(',', '.')
                if res_str == '':
                    result = None
                    result_null_reason = 'empty string in CSV'
                else:
                    try:
                        result = Decimal(res_str)
                        result_null_reason = ''
                    except Exception as e:
                        result = None
                        result_null_reason = 'invalid string in CSV'
                        logger.error(e)
                if result is None:
                    logger.warning(
                        "Result_null_reason of measuring, station {}, "
                        "property {}, phenomenon time {}: {}".format(
                            station.id_by_provider, prop.name_id, time_from,
                            result_null_reason))
                try:
                    defaults = {
                        'phenomenon_time_range': pt_range,
                        'observed_property': prop,
                        'feature_of_interest': station,
                        'procedure': process,
                        'result': result,
                        'result_null_reason': result_null_reason
                    }

                    obs, created = Observation.objects.update_or_create(
                        phenomenon_time_range=pt_range,
                        observed_property=prop,
                        feature_of_interest=station,
                        procedure=process,
                        defaults=defaults)

                except IntegrityError as e:
                    pass
            prev_time = time
Exemple #5
0
def load_hod(day):

    day = day.strftime("%Y%m%d")

    get_or_create_processes()
    get_or_create_props()

    process = Process.objects.get(name_id='measure')

    path = basedir_def + day + '/HOD.dat'

    if default_storage.exists(path):
        csv_file = default_storage.open(name=path, mode='r')
        foo = csv_file.data.decode('utf-8')

        reader = csv.reader(io.StringIO(foo), delimiter=' ')

        rows = list(reader)

        for rid_x, row in enumerate(rows, 1):
            station_id = row[0]
            code = row[1]
            measure_date = row[2]
            measure_time = row[3]
            measure_id = row[4]
            result = None
            result_null_reason = ''

            try:
                result = float(row[5])
            except Exception as e:
                logger.warning(e, exc_info=True)
                result_null_reason = 'invalid value in CSV'
                pass

            try:
                station = WatercourseStation.objects.get(id_by_provider=station_id)
            except WatercourseStation.DoesNotExist:
                logger.warning(
                    "WatercourseStation does not exist. Measure with values station_id {},"
                    "code {}, measure_date {}, measure_time {}, measure_id {} not imported".format(
                        station_id,
                        code,
                        measure_date,
                        measure_time,
                        measure_id
                    )
                )
                station = None
                pass

            if station:
                data_type = props_data_types.get(code)
                if data_type:
                    try:
                        observed_property = Property.objects.get(name_id=data_type)
                    except Property.DoesNotExist:
                        logger.error('Property with name %s does not exist.', data_type)
                        observed_property = None
                        pass

                    if observed_property:
                        time_str = measure_date + ' ' + measure_time
                        time_from = datetime.strptime(time_str, "%d.%m.%Y %H:%M")
                        pt_range = DateTimeTZRange(time_from, time_from, '[]')

                        try:

                            defaults = {
                                'phenomenon_time_range': pt_range,
                                'observed_property': observed_property,
                                'feature_of_interest': station,
                                'procedure': process,
                                'result': result,
                                'result_null_reason': result_null_reason
                            }

                            WatercourseObservation.objects.update_or_create(
                                phenomenon_time_range=pt_range,
                                observed_property=observed_property,
                                feature_of_interest=station,
                                procedure=process,
                                defaults=defaults
                            )

                        except IntegrityError as e:
                            print(row)
                            logger.warning(
                                "Error in creating observation from station_id {},"
                                "code {}, measure_date {}, measure_time {}, measure_id {}".format(
                                    station_id,
                                    code,
                                    measure_date,
                                    measure_time,
                                    measure_id
                                ),
                                exc_info=True)
                            # logger.warning('Error in creating observation from measure %s', measure_id)
                            pass
                else:
                    logger.error('Unknown measure code %s', code)
    else:
        logger.error("Error file path: %s not found", path)
Exemple #6
0
def load_srazsae(day, basedir=basedir_def):
    day = day.strftime("%Y%m%d")

    get_or_create_processes()
    get_or_create_props()
    measure = Process.objects.get(name_id="measure")
    air_temperature = Property.objects.get(name_id="air_temperature")
    precipitation = Property.objects.get(name_id="precipitation")

    path = basedir + day + '/srazsae.dat'

    if default_storage.exists(path):
        csv_file = default_storage.open(name=path, mode='r')
        foo = csv_file.data.decode('utf-8')
        reader = csv.reader(io.StringIO(foo), delimiter=" ")
        rows = list(reader)

        for rid_x, row in enumerate(rows, 1):
            try:
                result = row[5]
                station_id = row[0]
                weather_station = WeatherStation.objects.get(id_by_provider=station_id)
                time_str = row[2] + " " + row[3]
                parsed = datetime.strptime(time_str, "%d.%m.%Y %H:%M")
                time = parsed.astimezone(UTC_P0100)

                if row[1] == '32':
                    dt_range = DateTimeTZRange(time, time, bounds="[]")
                    observed_property = air_temperature
                else:
                    observed_property = precipitation
                    dt_range = DateTimeTZRange(time, time + timedelta(hours=1),
                                               bounds="[)")

                try:
                    defaults = {
                        'phenomenon_time_range': dt_range,
                        'observed_property': observed_property,
                        'feature_of_interest': weather_station,
                        'procedure': measure,
                        'result': result
                    }

                    WeatherObservation.objects.update_or_create(
                        phenomenon_time_range=dt_range,
                        observed_property=observed_property,
                        feature_of_interest=weather_station,
                        procedure=measure,
                        defaults=defaults
                    )

                except IntegrityError as e:
                    logger.warning(
                        "Error in creating srazsae observation from station_id {},"
                        "measure_date {}, measure_id {}".format(
                            station_id,
                            time,
                            row[4]
                        ), exc_info=True)
                    pass

            except WeatherStation.DoesNotExist:
                print('Error STATION WITH ID NOT FOUND: ', row[0])
    else:
        logger.error("Error file path: %s not found", path)
Exemple #7
0
    def handle(self, *args, **options):
        start = time.time()
        stations = get_or_create_ozp_stations()
        properties = get_or_create_props()
        processes = get_or_create_processes()
        arg = options['path']
        # Observation.objects.all().delete()

        if arg is None:
            raise Exception("No path to folder defined!")
        else:
            ozp_process = None
            for process in processes:
                if process.name_id == 'measure':
                    ozp_process = process
                    break

            path = os.path.join(settings.IMPORT_ROOT, arg, '')
            file_count = 0
            listed = default_storage.listdir(path)
            # files = len(listed)
            for filename in listed:
                char_ix = filename.object_name.rfind('/') + 1
                file_csv_name = filename.object_name[char_ix:-4]
                file_count += 1
                file_stations = []
                file_property = None
                for prop in properties:
                    if prop.name_id == file_csv_name.lower():
                        file_property = prop
                        break
                if file_property is None:
                    print('Error: no property exists to match the file: {}'.
                          format(file_csv_name))
                    continue
                print('Processing | Name: {} | File: {}'.format(
                    file_property, file_count))
                path = filename.object_name
                csv_file = default_storage.open(name=path, mode='r')
                foo = csv_file.data.decode('Windows-1250')
                reader = csv.reader(io.StringIO(foo), delimiter=';')
                rows = list(reader)
                i = 0
                for row in rows:
                    if i == 0:
                        for indx, data in enumerate(row):
                            for station in stations:
                                if station.id_by_provider == data:
                                    file_stations.append(station)
                    else:
                        next_day = False
                        date = row[0]
                        start_hour = str((int(row[1]) - 1)) + ':00'
                        end_hour = str(int(row[1])) + ':00'
                        if end_hour == '24:00':
                            end_hour = '23:59'
                            next_day = True
                        time_start = parse_time(date + ' ' + start_hour)
                        time_end = parse_time(date + ' ' + end_hour)
                        if next_day:
                            time_end = time_end + timedelta(0, 60)
                        time_range = DateTimeTZRange(time_start, time_end)

                        for indx, data in enumerate(row):
                            if indx > 1:
                                station = file_stations[(indx - 2)]
                                if data.find(',') > -1:
                                    result = float(data.replace(',', '.'))
                                elif data == '':
                                    result = None
                                    observation = Observation(
                                        phenomenon_time_range=time_range,
                                        observed_property=file_property,
                                        feature_of_interest=station,
                                        procedure=ozp_process,
                                        result=result,
                                        result_null_reason='empty string in CSV'
                                    )
                                    observation.save()
                                    continue
                                else:
                                    result = float(data)
                                observation = Observation(
                                    phenomenon_time_range=time_range,
                                    observed_property=file_property,
                                    feature_of_interest=station,
                                    procedure=ozp_process,
                                    result=result)
                                observation.save()

                    i += 1
            end = round(((time.time() - start) / 60))
            print('Minutes: {}'.format(end))
            return