def interpolate(self, timeseries, **args): cs = 'corrected_score' sp = 'speed' date = 'date' timestep = args['timestep'] location = args['location'] neighbor_series = args['neighbor_series'] neighbor_locations = args['neighbor_locations'] # override missing on neighbors lnseries = len(neighbor_series) ov_neighbor_series = [] ovm = OverrideMissing() for i in xrange(lnseries): ov_series = ovm.override(neighbor_series[i], timestep, -1) ov_neighbor_series.append(ov_series) # find missing data on target finder = MissingDataFinder() new_amount = timeseries.shape[0] misses = finder.find(timeseries, timestep) # calucating distances distances = [] for i in xrange(0, len(neighbor_series)): d = haversine(location, neighbor_locations[i]) if(d == 0): raise Exception("distance is 0.") distances.append(d) # index start indices starts = {} for start, end, amount in misses: new_amount += amount starts[start] = [end, amount] # allocate new numpy array new_mat = zeros((new_amount,),\ dtype=[('date', int32),\ ('corrected_score', float32),\ ('speed', float32)]) keys = starts.keys() current_index = 0 for i in range(len(timeseries)): if(i in keys): # missing data starting # add start measurement new_mat[current_index] = timeseries[i] current_index += 1 end, n = starts[i] w_hat_k = {} for j in range(1, n + 1): candidates = [] sum_of_w_hat = 0 sum_of_distances = 0 # search for candidates with no missing data for k in xrange(len(ov_neighbor_series)): nseries = ov_neighbor_series[k] if(nseries[i + j][cs] != -1): candidates.append(k) sum_of_distances += distances[k] # if no candidates available copy old data if(len(candidates) == 0): y = timeseries[i][cs] new_timestep = timeseries[i][d] + j * timestep new_mat[current_index] = (new_timestep, y, nan) current_index += 1 else: # calculate weight and sum, for later use in # anti-proportional for k in candidates: w_hat_k[k] = 1.0 / (distances[k] / sum_of_distances) sum_of_w_hat += w_hat_k[k] # calculation of label y = 0 ws = 0 for k in candidates: # w_k is anti-proportional w_k = w_hat_k[k] / sum_of_w_hat y_k = w_k * ov_neighbor_series[k][i + j][cs] ws_k = w_k * ov_neighbor_series[k][i + j][sp] y += y_k ws += ws_k new_timestep = timeseries[i][date] + j * timestep new_mat[current_index] = (new_timestep, y, ws) current_index += 1 else: # if not missing new_mat[current_index] = timeseries[i] current_index += 1 return new_mat
def interpolate(self, timeseries, **args): cs = 'corrected_score' sp = 'speed' date = 'date' timestep = args['timestep'] location = args['location'] neighbor_series = args['neighbor_series'] neighbor_locations = args['neighbor_locations'] # override missing on neighbors lnseries = len(neighbor_series) ov_neighbor_series = [] ovm = OverrideMissing() for i in range(lnseries): ov_series = ovm.override(neighbor_series[i], timestep, -1) ov_neighbor_series.append(ov_series) # find missing data on target finder = MissingDataFinder() new_amount = timeseries.shape[0] misses = finder.find(timeseries, timestep) # calucating distances distances = [] for i in range(0, len(neighbor_series)): d = haversine(location, neighbor_locations[i]) if d == 0: raise Exception("distance is 0.") distances.append(d) # index start indices starts = {} for start, end, amount in misses: new_amount += int(amount) starts[start] = [int(end), int(amount)] # allocate new numpy array new_mat = zeros((new_amount,),\ dtype=[('date', int32),\ ('corrected_score', float32),\ ('speed', float32)]) keys = starts.keys() current_index = 0 for i in range(len(timeseries)): if i in keys: # missing data starting # add start measurement new_mat[current_index] = timeseries[i] current_index += 1 end, n = starts[i] n = int(n) w_hat_k = {} for j in range(1, n + 1): candidates = [] sum_of_w_hat = 0 sum_of_distances = 0 # search for candidates with no missing data for k in range(len(ov_neighbor_series)): nseries = ov_neighbor_series[k] if(nseries[i + j][cs] != -1): candidates.append(k) sum_of_distances += distances[k] # if no candidates available copy old data if (len(candidates) == 0): y = timeseries[i][cs] new_timestep = timeseries[i][d] + j * timestep new_mat[current_index] = (new_timestep, y, nan) current_index += 1 else: # calculate weight and sum, for later use in # anti-proportional for k in candidates: w_hat_k[k] = 1.0 / (distances[k] / sum_of_distances) sum_of_w_hat += w_hat_k[k] # calculation of label y = 0 ws = 0 for k in candidates: # w_k is anti-proportional w_k = w_hat_k[k] / sum_of_w_hat y_k = w_k * ov_neighbor_series[k][i + j][cs] ws_k = w_k * ov_neighbor_series[k][i + j][sp] y += y_k ws += ws_k new_timestep = timeseries[i][date] + j * timestep new_mat[current_index] = (new_timestep, y, ws) current_index += 1 else: # if not missing new_mat[current_index] = timeseries[i] current_index += 1 return new_mat
def get_windpark_nearest(self, target_idx, n_nearest,\ year_from=0, year_to=0): """This method fetches and returns a windpark from NREL, which consists of the target turbine with the given target_idx and the surrounding n-nearest turbines around the target turbine. When called, the wind measurements for a given range of years are downloaded for every turbine in the park. Parameters ---------- target_idx : int see windml.datasets.nrel.park_id for example ids. year_from : int 2004 - 2006 year_to : int 2004 - 2006 Returns ------- Windpark An according windpark for target id, n-nearest, and time span. """ #if only one year is desired if year_to == 0: year_to = year_from meta = self.fetch_nrel_meta_data_all() target = self.fetch_nrel_meta_data(target_idx) tlat, tlon = target[1], target[2] marked = [] nearest = [] distances = [] for i in xrange(n_nearest): smallest = None for t in xrange(meta.shape[0]): d = haversine((tlat, tlon), (meta[t][1], meta[t][2])) if (smallest == None and t != target_idx - 1 and t not in marked): smallest = t smallest_d = d else: if (d <= smallest_d and t != target_idx - 1 and t not in marked): smallest = t smallest_d = d marked.append(smallest) nearest.append(meta[smallest]) distances.append(smallest_d) result = Windpark(target_idx, distances[-1]) for row in nearest: newturbine = Turbine(row[0], row[1] , row[2] , row[3] , row[4],\ row[5], row[6]) if year_from != 0: for y in range(year_from, year_to + 1): measurement = self.fetch_nrel_data(row[0], y,\ ['date','corrected_score','speed']) if y == year_from: measurements = measurement else: measurements = np.concatenate( (measurements, measurement)) newturbine.add_measurements(measurements) result.add_turbine(newturbine) #add target turbine as last element newturbine = Turbine(target[0], target[1] , target[2] , target[3],\ target[4] , target[5], target[6]) if year_from != 0: for y in range(year_from, year_to + 1): measurement = self.fetch_nrel_data(target[0], y,\ ['date','corrected_score','speed']) if y == year_from: measurements = measurement else: measurements = np.concatenate((measurements, measurement)) newturbine.add_measurements(measurements) result.add_turbine(newturbine) return result
def get_windpark_nearest(self, target_idx, n_nearest,\ year_from=0, year_to=0): """This method fetches and returns a windpark from NREL, which consists of the target turbine with the given target_idx and the surrounding n-nearest turbines around the target turbine. When called, the wind measurements for a given range of years are downloaded for every turbine in the park. Parameters ---------- target_idx : int see windml.datasets.nrel.park_id for example ids. year_from : int 2004 - 2006 year_to : int 2004 - 2006 Returns ------- Windpark An according windpark for target id, n-nearest, and time span. """ #if only one year is desired if year_to == 0: year_to=year_from meta = self.fetch_nrel_meta_data_all() target = self.fetch_nrel_meta_data(target_idx) tlat, tlon = target[1], target[2] marked = [] nearest = [] distances = [] for i in range(n_nearest): smallest = None for t in range(meta.shape[0]): d = haversine((tlat, tlon), (meta[t][1], meta[t][2])) if(smallest == None and t != target_idx - 1 and t not in marked): smallest = t smallest_d = d else: if(d <= smallest_d and t != target_idx - 1 and t not in marked): smallest = t smallest_d = d marked.append(smallest) nearest.append(meta[smallest]) distances.append(smallest_d) result = Windpark(target_idx, distances[-1]) for row in nearest: newturbine = Turbine(row[0], row[1] , row[2] , row[3] , row[4],\ row[5], row[6]) if year_from != 0: for y in range(year_from, year_to+1): measurement = self.fetch_nrel_data(row[0], y,\ ['date','corrected_score','speed']) if y==year_from: measurements = measurement else: measurements = np.concatenate((measurements, measurement)) newturbine.add_measurements(measurements) result.add_turbine(newturbine) #add target turbine as last element newturbine = Turbine(target[0], target[1] , target[2] , target[3],\ target[4] , target[5], target[6]) if year_from != 0: for y in range(year_from, year_to+1): measurement = self.fetch_nrel_data(target[0], y,\ ['date','corrected_score','speed']) if y == year_from: measurements = measurement else: measurements = np.concatenate((measurements, measurement)) newturbine.add_measurements(measurements) result.add_turbine(newturbine) return result