Ejemplo n.º 1
0
    def scrape(self):
        page = requests.get(
            'https://www.ndbc.noaa.gov/station_page.php?station=' +
            self.stationID)

        self.tree = html.fromstring(page.content)

        if not self.isValidStation():
            return

        # setup the dictionaries
        buoy = {}
        reading = {}

        buoy['station_id'] = self.stationID
        reading['station_id'] = self.stationID

        # Main box variables
        reading['wind_direction'] = self.grabDirectionTupleFromString(
            self.grabFromTree('"Wind Direction (WDIR):"'))
        reading['wind_speed'] = self.grabNumberFromString(
            self.grabFromTree('"Wind Speed (WSPD):"'))
        reading['wind_gust'] = self.grabNumberFromString(
            self.grabFromTree('"Wind Gust (GST):"'))
        reading['wave_height'] = self.grabNumberFromString(
            self.grabFromTree('"Wave Height (WVHT):"')
        )  # this also seems to track 'Significant Wave Height'
        reading['dominant_period'] = self.grabNumberFromString(
            self.grabFromTree('"Dominant Wave Period (DPD):"'))
        reading['average_period'] = self.grabNumberFromString(
            self.grabFromTree('"Average Period (APD):"'))
        reading['wave_direction'] = self.grabDirectionTupleFromString(
            self.grabFromTree('"Mean Wave Direction (MWD):"'))
        reading['air_temperature'] = self.grabNumberFromString(
            self.grabFromTree('"Air Temperature (ATMP):"'))

        # Secondary box variables
        reading['significant_wave_height'] = self.grabNumberFromString(
            self.grabFromTree('"Significant Wave Height (WVHT):"'))
        reading['swell_height'] = self.grabNumberFromString(
            self.grabFromTree('"Swell Height (SwH):"'))
        reading['swell_period'] = self.grabNumberFromString(
            self.grabFromTree('"Swell Period (SwP):"'))
        reading['swell_direction'] = self.constructDirectionTupleFromString(
            self.grabFromTree('"Swell Direction (SwD):"'))
        reading['wind_wave_height'] = self.grabNumberFromString(
            self.grabFromTree('"Wind Wave Height (WWH):"'))
        reading['wind_wave_period'] = self.grabNumberFromString(
            self.grabFromTree('"Wind Wave Period (WWP):"'))
        reading[
            'wind_wave_direction'] = self.constructDirectionTupleFromString(
                self.grabFromTree('"Wind Wave Direction (WWD):"'))
        reading['average_wave_period'] = self.grabNumberFromString(
            self.grabFromTree('"Average Wave Period (APD):"'))

        # Grab the time
        reading['first_time'] = self.grabLocalTime(True)
        reading['second_time'] = self.grabLocalTime(False)

        # Grab the buoy name
        buoyName = self.grabBuoyName()
        buoy['name'] = buoyName
        reading['buoy_name'] = buoyName

        # Remove nulls
        reading = {k: v for k, v in reading.items() if v != None}

        # Instantiate model objects
        buoyObject = Buoy(**buoy)
        readingObject = Reading(**reading)

        #readingObject.id = self.db.readings.insert_one(readingObject.mongoDB()).inserted_id
        self.db.readings.update({'station_id': self.stationID},
                                readingObject.mongoDB(),
                                upsert=True)
        self.db.buoys.update({'station_id': self.stationID},
                             buoyObject.mongoDB(),
                             upsert=True)
Ejemplo n.º 2
0
    def update(self, stationID):
        potentialBuoy = self.db.potentialBuoys.find_one(
            {'station_id': stationID})

        if potentialBuoy is None:
            # This is not a valid stationID
            return
        buoyName = potentialBuoy['name']
        buoy = {'station_id': potentialBuoy['station_id'], 'name': buoyName}

        # gets you the first box:
        # https://www.ndbc.noaa.gov/data/realtime2/41013.txt
        # then this for second box:
        # https://www.ndbc.noaa.gov/data/realtime2/44013.spec

        try:
            firstBoxData = urllib2.urlopen(
                "https://www.ndbc.noaa.gov/data/realtime2/" + stationID +
                ".txt").read(2000)
            secondBoxData = urllib2.urlopen(
                "https://www.ndbc.noaa.gov/data/realtime2/" + stationID +
                ".spec").read(2000)
        except urllib2.HTTPError:
            print('error caught trying to fetch ' + stationID)
            # TODO make this still update the reading as empty or something
            return

        # For some stations (e.g. 44008) the first box data is taken every 10 min,
        # but only 1 per hour (usually on the 50 min) has all the data.

        # First box columns and units:
        # YY  MM DD hh mm WDIR WSPD GST  WVHT   DPD   APD MWD   PRES  ATMP  WTMP  DEWP  VIS PTDY  TIDE
        # yr  mo dy hr mn degT m/s  m/s     m   sec   sec degT   hPa  degC  degC  degC  nmi  hPa    ft

        # Second box columns and units:
        # YY  MM DD hh mm WVHT  SwH  SwP  WWH  WWP SwD WWD  STEEPNESS  APD MWD
        # yr  mo dy hr mn    m    m  sec    m  sec  -  degT     -      sec degT

        lines = firstBoxData.split('\n')
        firstLine = lines[0]
        firstLineValues = firstLine.split()

        minuteIndex = firstLineValues.index('mm')

        readingLines = lines[2:]

        # default to the latest if no 50 min readings are found
        latestFullReadingValues = readingLines[0].split()

        for readingLine in readingLines:
            readingValues = readingLine.split()
            minutes = readingValues[minuteIndex]
            if minutes == '50':
                latestFullReadingValues = readingValues
                break

        firstBoxDictionary = {}

        for index, label in enumerate(firstLineValues):
            readingValue = latestFullReadingValues[index]
            firstBoxDictionary[label] = readingValue

        lines = secondBoxData.split('\n')
        firstLine = lines[0]
        firstLineValues = firstLine.split()

        readingLine = lines[2]

        latestFullReadingValues = readingLine.split()

        secondBoxDictionary = {}

        for index, label in enumerate(firstLineValues):
            readingValue = latestFullReadingValues[index]
            secondBoxDictionary[label] = readingValue

        waveHeight = self.feetFromReading(firstBoxDictionary, 'WVHT')
        wavePeriod = self.floatFromReading(firstBoxDictionary, 'DPD')
        waveDirection = self.floatFromReading(firstBoxDictionary, 'MWD')
        swellHeight = self.feetFromReading(secondBoxDictionary, 'SwH')
        swellPeriod = self.floatFromReading(secondBoxDictionary, 'SwP')
        swellDirection = self.compassToDegreesFromReading(
            secondBoxDictionary, 'SwD')  # Are all of these Strings?
        #2019-10-20T17:50:00Z
        try:
            wavesDatetime = (firstBoxDictionary['#YY'] + '-' +
                             firstBoxDictionary['MM'] + '-' +
                             firstBoxDictionary['DD'] + 'T' +
                             firstBoxDictionary['hh'] + ':' +
                             firstBoxDictionary['mm'] + ':00Z')
        except KeyError:
            wavesDatetime = None
        windDirection = self.floatFromReading(firstBoxDictionary, 'WDIR')
        windSpeed = self.knotsFromReading(firstBoxDictionary, 'WSPD')

        reading = {
            'station_id': stationID,
            'buoy_name': buoyName,
            'wind_direction': windDirection,
            'wind_speed': windSpeed,
            'wave_height': waveHeight,
            'dominant_period': wavePeriod,
            'wave_direction': waveDirection,
            'swell_height': swellHeight,
            'swell_period': swellPeriod,
            'swell_direction': swellDirection,
            'datetime': wavesDatetime
        }

        # Remove nulls
        reading = {k: v for k, v in reading.items() if v != None}

        # Instantiate model objects
        buoyObject = Buoy(**buoy)
        readingObject = Reading(**reading)

        #readingObject.id = self.db.readings.insert_one(readingObject.mongoDB()).inserted_id
        self.db.readings.update({'station_id': stationID},
                                readingObject.mongoDB(),
                                upsert=True)
        self.db.buoys.update({'station_id': stationID},
                             buoyObject.mongoDB(),
                             upsert=True)