Ejemplo n.º 1
0
def main(args):
    filename = args.get("filename")
    rest_names = args.get("restfilenames")
    content = args.get("metadata")
    measurand = args.get("measurand", "temperature")
    try:
        for line in content.split("\\n"):
            line = clean_str(line)
            if len(line) < 7 or not line[0].isdigit(): continue
            stationID, heightAboveNN, latitude, longitude, fromDate, toDate, *_ = line
            fromDate = to_osn_date(fromDate)
            if toDate: toDate = to_osn_date(toDate)
            createLocalAndRemoteSensor(stationID, measurand, fromDate, toDate,
                                       float(latitude), float(longitude))
        result = {
            "message": "finished given metadata",
            "filename": filename,
            "restfilenames": rest_names
        }
        return result
    except Exception as e:
        secretmanager.complete_sequence(rest_names)
        result = {
            "error":
            "failed metadata because of unkown error - jump to next file"
        }
        print(result, e)
        return result
def main(args):
    file_name = args.get("filename")
    rest_names = args.get("restfilenames")
    if file_name is None:
        return {"error": "seuquence should be stopped"}
    try:
        ftp_url = "ftp://ftp-cdc.dwd.de/" + args.get("ftp_url",
                                                     "climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/")
        inner_file_name = "COULD NOT GET FILENAME"
        sensorzip = urlopen(ftp_url + file_name)
        memfile = io.BytesIO(sensorzip.read())

        with ZipFile(memfile, 'r') as myzip:
            try:
                for z_info in myzip.filelist:
                    if z_info.filename.startswith("produkt"):
                        inner_file_name = z_info.filename
            except Exception as e:
                print(e)
            finally:
                csv_file_value_data = myzip.open(inner_file_name)
        result = {"csv": str(csv_file_value_data.read())[2:-1],
                  "restfilenames": rest_names}
        print("send in get csv")
        return result
    except Exception as e:
        secretmanager.complete_sequence(rest_names)
        result = {"error": "failed metadata because of unkown error - jump to next file"}
        print(result, e)
        return result
def main(args):
    csv = args.get("csv")
    rest_names = args.get("restfilenames")
    measurand = args.get("measurand", 'temperature')
    if csv is None: return {"error": "seuquence should be stopped"}
    try:
        api.login(username=secretmanager.__OSNUSERNAME__, password=secretmanager.__OSNPASSWORD__)
        lines = csv.splitlines()
        first_line, lines = lines[0], lines[1:]
        handle_content_data(first_line=first_line, lines=lines)
    except Exception as e: print("Exception {}".format(e))
    finally: secretmanager.complete_sequence(rest_names)
    return {"message": "finished"}
def main(args):
    filename = args.get('filename')
    rest_names = args.get('restfilenames')
    content = args.get('metadata')
    measurand = args.get('measurand', 'temperature')
    try:
        parse_metadata(content, measurand)
        return {
            'message': 'finished given metadata',
            'filename': filename,
            'restfilenames': rest_names
        }
    except Exception as e:
        secretmanager.complete_sequence(rest_names)
        result = {
            'error':
            'failed metadata because of unkown error - jump to next file'
        }
        print(result, e)
        return result
def main(args):
    inner_file_name = "COULD NOT GET FILENAME"
    file_name = args.get("filename")
    rest_names = args.get("restfilenames")
    try:
        ftp_url = "ftp://ftp-cdc.dwd.de/" + args.get(
            "ftp_url",
            "climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/"
        )

        sensorzip = urlopen(ftp_url + file_name)
        memfile = io.BytesIO(sensorzip.read())

        with ZipFile(memfile, 'r') as myzip:
            try:
                for z_info in myzip.filelist:
                    substrings = z_info.filename.split("_")
                    if substrings[0] == "Stationsmetadaten" or (
                            substrings[0] == "Metadaten"
                            and substrings[1] == "Geographie"):
                        inner_file_name = z_info.filename
            except Exception as e:
                print(e)
            finally:
                meta_data = myzip.open(inner_file_name)
        result = {
            "metadata": str(meta_data.read())[2:-1],
            "filename": file_name,
            "restfilenames": rest_names
        }
        print("send in get metadata", result)
        return result
    except Exception as e:
        secretmanager.complete_sequence(rest_names)
        result = {
            "message":
            "failed metadata because of unkown error - jump to next file"
        }
        return result
Ejemplo n.º 6
0
def main(args):
    csv = args.get("csv")
    rest_names = args.get("restfilenames")
    measurand = args.get("measurand", 'temperature')
    if csv is None:
        return {"error": "seuquence should be stopped"}
    try:
        logged_action = False
        lines = csv.split("\\r\\n")
        first_line = lines.pop(0)
        lines.pop(0)
        first_line = clean_str(first_line)
        len_first_line = len(first_line)
        field_defs = get_indices(first_line)
        print("fielddefs", field_defs)

        if len_first_line < 5:
            raise Exception(
                f'Nr of fields is lower than expected: {first_line}')

        dwd_id_idx, date_idx, quality_idx, structure_version_idx, \
        air_temperature_idx, humidity_idx, cloudiness_idx, \
        precipitation_yes_no_idx, precipitation_amount_idx, precipitation_type_idx, \
        air_pressure_nn_idx, air_pressure_idx, \
        sunshine_mins_per_hour_idx, wind_speed_idx, wind_direction_idx = field_defs

        if dwd_id_idx is None:
            raise Exception(
                f'File does not contain a dwd_id index: {field_defs}')
        if date_idx is None:
            raise Exception(
                f'File does not contain a Timestamp index: {field_defs}')
        if quality_idx is None: pass  # Not Implemented yet and not essential
        if structure_version_idx is None:
            pass  # Not Implemented yet and not essential

        print("len lines", len(lines))
        lines = [clean_str(x) for x in lines if not (x is None or x == '')]

        dwd_id = lines[0][dwd_id_idx]
        if not dwd_id.isdigit() or dwd_id is None:
            raise Exception(f'Not a valid dwd_id: {dwd_id}')
        print("dwdid", dwd_id)

        lines = sorted(lines, key=lambda x: x[date_idx])
        lines = [
            line for line in lines
            if len(line) > 5 and len(line) == len_first_line
        ]
        print("lenlines", len(lines))

        lines = list(zip(*lines))  # transpose

        dates = []
        hours = []
        iso_dates = []
        for ts in lines[date_idx]:
            year, month, day, hour = ts[:4], ts[4:6], ts[6:8], ts[-2:]
            dates.append(f'{year}-{month}-{day}')
            hours.append(f'{year}-{month}-{day}-{hour}')
            iso_dates.append(
                datetime(int(year), int(month), int(day),
                         int(hour)).isoformat())  # replace with strptime?

        print('-' * 50)

        valuebulk = {'collapsedMessages': []}
        messages = valuebulk['collapsedMessages']

        def _add_float_values(_idx: int, _i: int, _j: int, _osn_id: int):
            for _iso_date, _value in zip(iso_dates[_i:_j], lines[_idx][_i:_j]):
                _value = to_float_or_none(_value)
                if _value is not None and _value != -999.0:
                    messages.append({
                        'sensorId': _osn_id,
                        'timestamp': _iso_date,
                        'numberValue': _value
                    })

        def _add_cloudiness_values(_idx: int, _i: int, _j: int, _osn_id: int):
            for _iso_date, _value in zip(iso_dates[_i:_j], lines[_idx][_i:_j]):
                _value = to_int_or_none(_value)
                if _value is not None and 0 < _value < 8:
                    _value *= 1 / 8  # map to float between 0 and 1
                    messages.append({
                        'sensorId': _osn_id,
                        'timestamp': _iso_date,
                        'numberValue': _value
                    })

        def _process_chunks(_idx: int, _chunks: tuple, _sensors: dict,
                            _measurand: str, _add_values: Callable):
            nonlocal valuebulk, messages, logged_action
            print("chunks ", _chunks)
            _local_id = f'{dwd_id}-{_measurand}'
            with mongo_conn(mongo_db_url) as collection:
                if not logged_action:
                    collection.update({"_id": 5}, {"$inc": {"actionCount": 1}})
                    logged_action = True
                for _sensor_id in _chunks:
                    _sensor = _sensors[_sensor_id]
                    _osn_id = _sensor['osn_id']
                    _sensor_idx = _sensor['idx']
                    _chunk = _chunks[_sensor_id]
                    if _sensor['earliest_sent_value'] == '':
                        collection.update_one(
                            filter={'local_id': _local_id},
                            update={
                                '$set': {
                                    f'sensors.{_sensor_idx}.earliest_sent_value':
                                    dates[_chunk[0]]
                                }
                            })
                    for i, j in batchify(*_chunk, max_batch_size=3000):
                        _add_values(_idx, i, j, _osn_id)
                        t0 = time.time()
                        collection.update(
                            {"_id": 2},
                            {"$inc": {
                                "aimedValueCount": len(messages)
                            }})
                        if osn_push_valuebulk(valuebulk):
                            print(
                                f'Pushed {len(messages)} values to OSN. took: {round(time.time() - t0, 5)} sec'
                            )
                            collection.update_one(
                                filter={'local_id': _local_id},
                                update={
                                    '$set': {
                                        f'sensors.{_sensor_idx}.latest_sent_value':
                                        dates[j - 1]
                                    }
                                })
                            # count pushed values in whole process
                            collection.update(
                                {"_id": 2},
                                {"$inc": {
                                    "valueCount": len(messages)
                                }})
                            valuebulk['collapsedMessages'] = []
                            messages = valuebulk['collapsedMessages']
                        else:
                            valuebulk['collapsedMessages'] = []
                            messages = valuebulk['collapsedMessages']
                            continue  # this should probably be an exception...

        def _update(_measurand: str, _func: Callable):
            _sensors = mongo_sensors_by_local_id(dwd_id, _measurand)
            _chunks = seperate_by_sensor(dates, _sensors, dwd_id)
            _process_chunks(air_temperature_idx, _chunks, _sensors, _measurand,
                            _func)

        if measurand == 'temperature':
            if air_temperature_idx is not None:
                _update('temperature', _add_float_values)
            if humidity_idx is not None: _update('humidity', _add_float_values)
        elif measurand == 'cloudiness':
            if cloudiness_idx is not None:
                _update('cloudiness', _add_cloudiness_values)
        elif measurand == 'air_pressure':
            if air_pressure_idx is not None:
                _update('air_pressure', _add_float_values)
            if air_pressure_nn_idx is not None:
                _update('air_pressure_nn', _add_float_values)
        elif measurand == 'wind_speed':
            if wind_speed_idx is not None:
                _update('wind_speed', _add_float_values)
            if wind_direction_idx is not None:
                _update('wind_direction', _add_float_values)

    except Exception as e:
        print("Exception {}".format(e))
    finally:
        secretmanager.complete_sequence(rest_names)
    return {"message": "finished"}