Ejemplo n.º 1
0
    def interpolate(self, timeseries, **args):
        cs = 'corrected_score'
        sp = 'speed'
        date = 'date'

        timestep = args['timestep']
        location = args['location']
        neighbor_series = args['neighbor_series']
        neighbor_locations = args['neighbor_locations']

        # override missing on neighbors
        lnseries = len(neighbor_series)
        ov_neighbor_series = []
        ovm = OverrideMissing()
        for i in xrange(lnseries):
            ov_series = ovm.override(neighbor_series[i], timestep, -1)
            ov_neighbor_series.append(ov_series)

        # find missing data on target
        finder = MissingDataFinder()
        new_amount = timeseries.shape[0]
        misses = finder.find(timeseries, timestep)

        # calucating distances
        distances = []
        for i in xrange(0, len(neighbor_series)):
            d = haversine(location, neighbor_locations[i])
            if(d == 0):
                raise Exception("distance is 0.")
            distances.append(d)

        # index start indices
        starts = {}
        for start, end, amount in misses:
            new_amount += amount
            starts[start] = [end, amount]

        # allocate new numpy array
        new_mat = zeros((new_amount,),\
                dtype=[('date', int32),\
                       ('corrected_score', float32),\
                       ('speed', float32)])

        keys = starts.keys()
        current_index = 0

        for i in range(len(timeseries)):
            if(i in keys):
            # missing data starting
                # add start measurement
                new_mat[current_index] = timeseries[i]
                current_index += 1

                end, n = starts[i]

                w_hat_k = {}
                for j in range(1, n + 1):
                    candidates = []
                    sum_of_w_hat = 0
                    sum_of_distances = 0

                    # search for candidates with no missing data
                    for k in xrange(len(ov_neighbor_series)):
                        nseries = ov_neighbor_series[k]
                        if(nseries[i + j][cs] != -1):
                            candidates.append(k)
                            sum_of_distances += distances[k]

                    # if no candidates available copy old data
                    if(len(candidates) == 0):
                        y = timeseries[i][cs]
                        new_timestep = timeseries[i][d] + j * timestep
                        new_mat[current_index] = (new_timestep, y, nan)
                        current_index += 1
                    else:
                        # calculate weight and sum, for later use in
                        # anti-proportional
                        for k in candidates:
                            w_hat_k[k] = 1.0 / (distances[k] / sum_of_distances)
                            sum_of_w_hat += w_hat_k[k]

                        # calculation of label
                        y = 0
                        ws = 0
                        for k in candidates:
                            # w_k is anti-proportional
                            w_k = w_hat_k[k] / sum_of_w_hat
                            y_k = w_k * ov_neighbor_series[k][i + j][cs]
                            ws_k = w_k * ov_neighbor_series[k][i + j][sp]
                            y += y_k
                            ws += ws_k

                        new_timestep = timeseries[i][date] + j * timestep
                        new_mat[current_index] = (new_timestep, y, ws)
                        current_index += 1
            else: # if not missing
                new_mat[current_index] = timeseries[i]
                current_index += 1

        return new_mat
    def interpolate(self, timeseries, **args):
        cs = 'corrected_score'
        sp = 'speed'
        date = 'date'

        timestep = args['timestep']
        location = args['location']
        neighbor_series = args['neighbor_series']
        neighbor_locations = args['neighbor_locations']

        # override missing on neighbors
        lnseries = len(neighbor_series)
        ov_neighbor_series = []
        ovm = OverrideMissing()
        for i in range(lnseries):
            ov_series = ovm.override(neighbor_series[i], timestep, -1)
            ov_neighbor_series.append(ov_series)

        # find missing data on target
        finder = MissingDataFinder()
        new_amount = timeseries.shape[0]
        misses = finder.find(timeseries, timestep)

        # calucating distances
        distances = []
        for i in range(0, len(neighbor_series)):
            d = haversine(location, neighbor_locations[i])
            if d == 0:
                raise Exception("distance is 0.")
            distances.append(d)

        # index start indices
        starts = {}
        for start, end, amount in misses:
            new_amount += int(amount)
            starts[start] = [int(end), int(amount)]

        # allocate new numpy array
        new_mat = zeros((new_amount,),\
                dtype=[('date', int32),\
                       ('corrected_score', float32),\
                       ('speed', float32)])

        keys = starts.keys()
        current_index = 0

        for i in range(len(timeseries)):
            if i in keys:
            # missing data starting
                # add start measurement
                new_mat[current_index] = timeseries[i]
                current_index += 1

                end, n = starts[i]
                n = int(n)    
                w_hat_k = {}
                for j in range(1, n + 1):
                    candidates = []
                    sum_of_w_hat = 0
                    sum_of_distances = 0

                    # search for candidates with no missing data
                    for k in range(len(ov_neighbor_series)):
                        nseries = ov_neighbor_series[k]
                        if(nseries[i + j][cs] != -1):
                            candidates.append(k)
                            sum_of_distances += distances[k]

                    # if no candidates available copy old data
                    if (len(candidates) == 0):
                        y = timeseries[i][cs]
                        new_timestep = timeseries[i][d] + j * timestep
                        new_mat[current_index] = (new_timestep, y, nan)
                        current_index += 1
                    else:
                        # calculate weight and sum, for later use in
                        # anti-proportional
                        for k in candidates:
                            w_hat_k[k] = 1.0 / (distances[k] / sum_of_distances)
                            sum_of_w_hat += w_hat_k[k]

                        # calculation of label
                        y = 0
                        ws = 0
                        for k in candidates:
                            # w_k is anti-proportional
                            w_k = w_hat_k[k] / sum_of_w_hat
                            y_k = w_k * ov_neighbor_series[k][i + j][cs]
                            ws_k = w_k * ov_neighbor_series[k][i + j][sp]
                            y += y_k
                            ws += ws_k

                        new_timestep = timeseries[i][date] + j * timestep
                        new_mat[current_index] = (new_timestep, y, ws)
                        current_index += 1
            else: # if not missing
                new_mat[current_index] = timeseries[i]
                current_index += 1

        return new_mat
Ejemplo n.º 3
0
    def get_windpark_nearest(self, target_idx, n_nearest,\
                    year_from=0, year_to=0):
        """This method fetches and returns a windpark from NREL, which consists
        of the target turbine with the given target_idx and the surrounding
        n-nearest turbines around the target turbine. When called, the wind
        measurements for a given range of years are downloaded for every
        turbine in the park.

        Parameters
        ----------

        target_idx : int
                     see windml.datasets.nrel.park_id for example ids.
        year_from  : int
                     2004 - 2006
        year_to    : int
                     2004 - 2006

        Returns
        -------

        Windpark
            An according windpark for target id, n-nearest, and time span.
        """

        #if only one year is desired
        if year_to == 0:
            year_to = year_from

        meta = self.fetch_nrel_meta_data_all()
        target = self.fetch_nrel_meta_data(target_idx)
        tlat, tlon = target[1], target[2]

        marked = []
        nearest = []
        distances = []
        for i in xrange(n_nearest):
            smallest = None
            for t in xrange(meta.shape[0]):
                d = haversine((tlat, tlon), (meta[t][1], meta[t][2]))
                if (smallest == None and t != target_idx - 1
                        and t not in marked):
                    smallest = t
                    smallest_d = d
                else:
                    if (d <= smallest_d and t != target_idx - 1
                            and t not in marked):
                        smallest = t
                        smallest_d = d

            marked.append(smallest)
            nearest.append(meta[smallest])
            distances.append(smallest_d)

        result = Windpark(target_idx, distances[-1])

        for row in nearest:
            newturbine = Turbine(row[0], row[1] , row[2] , row[3] , row[4],\
                                 row[5], row[6])
            if year_from != 0:
                for y in range(year_from, year_to + 1):
                    measurement = self.fetch_nrel_data(row[0], y,\
                                    ['date','corrected_score','speed'])
                    if y == year_from:
                        measurements = measurement
                    else:
                        measurements = np.concatenate(
                            (measurements, measurement))
                newturbine.add_measurements(measurements)
            result.add_turbine(newturbine)

        #add target turbine as last element
        newturbine = Turbine(target[0], target[1] , target[2] , target[3],\
                             target[4] , target[5], target[6])
        if year_from != 0:
            for y in range(year_from, year_to + 1):
                measurement = self.fetch_nrel_data(target[0], y,\
                                ['date','corrected_score','speed'])
                if y == year_from:
                    measurements = measurement
                else:
                    measurements = np.concatenate((measurements, measurement))
            newturbine.add_measurements(measurements)
        result.add_turbine(newturbine)

        return result
Ejemplo n.º 4
0
    def get_windpark_nearest(self, target_idx, n_nearest,\
                    year_from=0, year_to=0):
        """This method fetches and returns a windpark from NREL, which consists
        of the target turbine with the given target_idx and the surrounding
        n-nearest turbines around the target turbine. When called, the wind
        measurements for a given range of years are downloaded for every
        turbine in the park.

        Parameters
        ----------

        target_idx : int
                     see windml.datasets.nrel.park_id for example ids.
        year_from  : int
                     2004 - 2006
        year_to    : int
                     2004 - 2006

        Returns
        -------

        Windpark
            An according windpark for target id, n-nearest, and time span.
        """

        #if only one year is desired
        if year_to == 0:
            year_to=year_from

        meta = self.fetch_nrel_meta_data_all()
        target = self.fetch_nrel_meta_data(target_idx)
        tlat, tlon = target[1], target[2]

        marked = []
        nearest = []
        distances = []
        for i in range(n_nearest):
            smallest = None
            for t in range(meta.shape[0]):
                d = haversine((tlat, tlon), (meta[t][1], meta[t][2]))
                if(smallest == None and t != target_idx - 1 and t not in marked):
                    smallest = t
                    smallest_d = d
                else:
                    if(d <= smallest_d and t != target_idx - 1 and t not in marked):
                        smallest = t
                        smallest_d = d

            marked.append(smallest)
            nearest.append(meta[smallest])
            distances.append(smallest_d)

        result = Windpark(target_idx, distances[-1])

        for row in nearest:
            newturbine = Turbine(row[0], row[1] , row[2] , row[3] , row[4],\
                                 row[5], row[6])
            if year_from != 0:
                for y in range(year_from, year_to+1):
                   measurement = self.fetch_nrel_data(row[0], y,\
                                   ['date','corrected_score','speed'])
                   if y==year_from:
                       measurements = measurement
                   else:
                       measurements = np.concatenate((measurements, measurement))
                newturbine.add_measurements(measurements)
            result.add_turbine(newturbine)

        #add target turbine as last element
        newturbine = Turbine(target[0], target[1] , target[2] , target[3],\
                             target[4] , target[5], target[6])
        if year_from != 0:
            for y in range(year_from, year_to+1):
               measurement = self.fetch_nrel_data(target[0], y,\
                               ['date','corrected_score','speed'])
               if y == year_from:
                   measurements = measurement
               else:
                   measurements = np.concatenate((measurements, measurement))
            newturbine.add_measurements(measurements)
        result.add_turbine(newturbine)

        return result