def transform(self, input_obs, column_names, *, smin=None): column_names = np.array(column_names) both_cols = npi.intersection(column_names, self.mm_colnames) obs_cols = npi.indices(column_names, both_cols) mm_cols = npi.indices(self.mm_colnames, both_cols) obs = input_obs[:, obs_cols] mm = self.mm[:, mm_cols] # the columns that did not make it not_mm_cols = npi.indices(self.mm_colnames, npi.difference(self.mm_colnames, both_cols)) # we can only work with rows that have not any "1" mm = mm[np.where(self.mm[:, not_mm_cols].sum(axis=1) == 0)[0], :] # clearing out "empty" rows mm_nonzero_rows = np.where(mm.sum(axis=1))[0] mm = mm[mm_nonzero_rows, :] # clearing out duplicate rows (out-features) mm = pd.DataFrame(mm).drop_duplicates().values # generate output names self.output_names = [ '_'.join(map(lambda j: both_cols[j], np.where(mm[row, :])[0])) for row in range(mm.shape[0]) ] if smin is None: res = map_features(obs, mm) else: res = map_features_smin(obs, mm, smin) return res
def _load_stock_prices(dates, symbol): print(f'Loading pricing data for stock {symbol.upper()}') symbol_dates = _filter_to_business_days( _load_dates_from_price_file(f'{_STOCK_DIR}/{symbol}.us.txt')) mat_idx_to_file_idx = npi.indices(symbol_dates, dates, missing=-1) missing_indices = np.where(mat_idx_to_file_idx == -1) if len(missing_indices) > 0: missing_dates = [ str(d)[:10] for d in sorted(set(dates[missing_indices])) ] print(f'Missing pricing data for stock {symbol.upper()} for ' f'the following dates: {missing_dates})') file_prices = np.zeros(shape=(len(symbol_dates) + 1, )) file_prices[:-1] = np.loadtxt( f'Stocks/{symbol}.us.txt', skiprows=1, # skip CSV header delimiter=',', usecols=4, dtype=np.float64) file_prices[-1] = np.nan return file_prices[mat_idx_to_file_idx]
def confidence_scores(self, X, annotator_ids=None, **kwargs): """Method returning the confidence scores for labelling the given samples. Parameters ---------- X: array-like, shape (n_samples, n_features) Samples whose class labels are queried. annotator_ids: array-like, shape (n_queried_annotators) The indices of the annotators whose confidence scores are queried. Returns ------- C: numpy.ndarray, shape (n_samples, n_annotators) confidence scores of the queried annotators for labelling the given samples. The non queried annotators should return np.nan values. """ # check annotator_ids annotator_ids = check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids') # obtain ids of queried samples X = check_array(X) sample_ids = indices(self.X_, X, missing=-1) sample_ids_flag = sample_ids >= 0 # confidence scores provided by queried annotators C = np.full((np.size(X, 0), self.n_annotators()), np.nan) C[sample_ids_flag, annotator_ids[:, None]] = self.C_[sample_ids[sample_ids_flag], annotator_ids[:, None]] return C
def create_pre_compute(): with open('./Data/Trajectories.txt', 'r') as f: # trajectory_num = [] trajectories_list = [] for line in f: lst = re.split(r'[ ]', line) lst.pop(-1) tmp = [float(i) for i in lst] # trajectory_num.append(tmp.pop(-1)) trajectories_list.append(tmp) trajectories = np.zeros([len(trajectories_list), 24]) for i, trajectory in enumerate(trajectories_list): for j, stop_area in enumerate(trajectory): trajectories[i, j] = int(stop_area) unique_trajectories = npi.unique(trajectories) label_uniques = npi.indices(unique_trajectories, trajectories) np.save('./Data/label_uniques.npy', label_uniques) np.save('./Data/trajectories_uniques.npy', unique_trajectories) np.save('./Data/np_trajectories.npy', trajectories) # trajectories = np.load('./Data/np_trajectories.npy') # unique_trajectories = np.load('./Data/trajectories_uniques.npy') # label_uniques = np.load('./Data/label_uniques.npy') dist_pre_compute = np.zeros([len(unique_trajectories), len(unique_trajectories)]) for i in range(0, len(unique_trajectories)-1): for j in range(i+1, len(unique_trajectories)): if j % 2000 == 0: print([i, j]) dist_pre_compute[i, j] = dist_pre_compute[j, i] = levenshtein(unique_trajectories[i, :], unique_trajectories[j, :]) # dist_pre_compute[i, j] = dist_pre_compute[j, i] = dtw(unique_trajectories[i, :], unique_trajectories[j, :]) np.save('./Data/pre_compute_dtw', dist_pre_compute)
def normalize_x(self, features): np_all_features = np.array(self.features) np_features_to_normalize = np.array(features) columns_to_normalize = npi.indices(np_all_features, np_features_to_normalize) for column in columns_to_normalize: mean = self.x[:,column:column+1].mean() sd = self.x[:,column:column+1].std() self.x[:,column:column+1] = (self.x[:,column:column+1] - mean) / sd
def create_np(): names_dict, label_list = points_dict() for file_tmp in os.listdir("./Data/CoordinatesInput"): if file_tmp.endswith(".txt"): str_file = file_tmp.title() str_file_pre = './Data/CoordinatesInput/%s.txt' % (str_file[:-4]) str_file_np = './Data/npData/%s' % (str_file[:-4]) with open(str_file_pre, 'r') as f: print(str_file) all_coordinates = [] for line in f: line = line.strip('\n') tmp = '' coordinates_list = [] for sign in line: if sign != '[' and sign != ']' and sign != "'" and sign != '"' and sign != ' ': if sign == ',': coordinates_list.append(tmp) tmp = '' else: tmp += sign coordinates_list.append(tmp) all_coordinates.append(coordinates_list) if len(all_coordinates) > 0: data = np.zeros([len(all_coordinates), 6]) for idx, item1 in enumerate(all_coordinates): for idy, item2 in enumerate(item1): data[idx][idy] = float(item2) tmp = data[:, :2] unique_coordinate = npi.unique(tmp) label = npi.indices(unique_coordinate, tmp) max_cluster = np.max(label) + 1 count = np.zeros(max_cluster) for i in label: count[i] += 1 temp = count idc = np.zeros(max_cluster) for i in range(max_cluster): idx = np.argmax(temp) idc[idx] = max_cluster - (i + 1) temp[idx] = 0 for idx, count_label in enumerate(idc): if count_label < max_cluster - math.floor(max_cluster * 0.1): label[np.where(label == idx)] = max_cluster j = 0 for i in range(max_cluster + 1): tmp = np.where(label == i) if len(tmp[0]) > 0: data[np.where(label == i), 5] = label_list[names_dict[str_file[:-4]]][j] j += 1 np.save(str_file_np, data)
def work(A, i): global out_pus X = A['rechit_x'][i] Y = A['rechit_y'][i] Z = A['rechit_z'][i] E = A['rechit_energy'][i] L = A['rechit_layer'][i] T = A['rechit_time'][i] D = A['rechit_detid'][i] H = A['simcluster_hits'][i] F = A['simcluster_fractions'][i] if not (len(X) == len(Y) == len(Z) == len(E) == len(L) == len(T)) or len(X) == 0: print("Error in number of entries") return num_entries = len(X) print("Hello, world!") # All features all_features = np.concatenate( (np.expand_dims(X, axis=1), np.expand_dims( Y, axis=1), np.expand_dims(Z, axis=1), np.expand_dims(E, axis=1), np.expand_dims(L, axis=1), np.expand_dims(T, axis=1)), axis=1) total_fractions = np.zeros(len(all_features)) for k in range(len(H)): my_simcluster_hits = H[k] my_simcluster_frac = F[k] print('\tCluster %d - %d' % (k, len(my_simcluster_hits))) valid_indices = np.isin(my_simcluster_hits, D) cluster_frac = my_simcluster_frac[valid_indices] cluster_hits = my_simcluster_hits[valid_indices] cluster_hits = npi.indices(D, cluster_hits, missing='raise') total_fractions[cluster_hits] += cluster_frac print("Hello, world 2!") pu_indices = np.argwhere()[total_fractions < 0.1] data_required = A[pu_indices] if len(pu_indices) > 300: out_pus.append(data_required) print("Length", len(out_pus)) if len(out_pus) == 5: return True else: return False
def gethpz(ras, dec, red, nside, ipixlst, nzbin, zlims): npix = len(ipixlst) print '\nMaking healpix grid of nside =', nside, 'with', npix, 'pixels in', nzbin, 'slices...' phi, theta = np.radians(ras), np.radians(90. - dec) ipix = hp.ang2pix(nside, theta, phi) izbin = np.digitize(red, zlims) - 1 cut = (np.isin(ipix, ipixlst)) & (izbin >= 0) & (izbin < nzbin) ipix, izbin = ipix[cut], izbin[cut] ipix = npi.indices(ipixlst, ipix) dathpz, edges = np.histogramdd(np.vstack([izbin, ipix]).transpose(), bins=(nzbin, npix)) return dathpz
def localised(self, x, y, xnew): windowloc = 200 x = np.asarray(x) y = np.asarray(y) if x[0] > x[-1]: x = x[::-1] y = y[::-1] ynew = np.zeros(len(xnew)) #Localised Radial Basis functions by looping through subset of data localisation = int(np.ceil(len(x) / windowloc)) factor = np.ceil(len(x) / localisation) for idloc in range(localisation): #localisation allows for some overlapping to improve the consistency upon reforming total data #determines if the sub-data is the last sub-data group if idloc == localisation - 1: start = int((idloc * factor) - 5) end = int(len(x)) #determines if the sub-data is the first sub-data group elif idloc == 0: start = 0 end = (int(1 * factor) + 5) else: start = int((idloc * factor) - 5) end = int(((idloc + 1) * factor) + 5) ydata_loop = y[start:end] xdata_loop = x[start:end] #selects the region of x data to extract the new y data xnew_cut_idx = npi.indices( xnew, xnew[(xnew >= xdata_loop[0]) & (xnew < xdata_loop[-1])]) xnew_cut = xnew[xnew_cut_idx] #caries out the radial basis function interpolation on the localised sub-data rbf = Rbf(xdata_loop, ydata_loop, function='linear', smooth=0) ynew[xnew_cut_idx] = rbf(xnew_cut) Data = pd.DataFrame() Data['x'] = xnew Data['y'] = ynew if self.BlackmanHarrisFiltervar.get() == 1: Data = Data.rolling(5, win_type='blackmanharris', min_periods=1, center=True).mean() Data.dropna(how='any', inplace=True) return [Data['x'].values, Data['y'].values]
def replace_neighbours(classes, classified, classified_ID, org_neighbours): """ VECTORIZED METHOD TO REPLACE VALUES IN A LIST OF ARRAYS AND CALCULATE PERCENTAGES classes = array of same length as len of org_label with new labels classified = numpy array with new classified values classified_ID = merged regions with a unique ID org_neighbours = output of calc_neighbours function replaced = list of arrays with replaced neighbours per region tablemerged = array indicating the percentages of neighbours for each region. First N columns are the unique amount of classes. Last column is the classified value of that region """ # Set up dictionary for replacement of unique neighbour values to classified neighbour values listv = np.unique(classes).tolist() keys = [] values = [] for i in listv: keys.append(classified_ID[classified == i]) leng = classified_ID[classified == i] values.append([i] * len(leng)) keys = np.concatenate(keys).tolist() values = np.concatenate(values).tolist() alldict = dict(zip(keys, values)) # Replace list of unique neightbours with classified neighbours # First get keys of final dict keys_clas = alldict.keys() values_clas = alldict.values() import numpy_indexed as npi arr = np.concatenate(org_neighbours) idx = npi.indices(keys_clas, arr, missing='mask') remap = np.logical_not(idx.mask) arr[remap] = np.array(values_clas)[idx[remap]] replaced = np.array_split(arr, np.cumsum([len(a) for a in org_neighbours][:-1])) # Determine classification % of neighbours and generate array for attribute table idx = [np.ones(len(a))*i for i, a in enumerate(replaced)] (rows, cols), table = npi.count_table(np.concatenate(idx), np.concatenate(replaced)) table = table.astype(float) table = table / table.sum(axis=1, keepdims=True) * 100 # Build classification table values_final = np.asarray(values_clas) tablemerged = np.column_stack((table, values_final)) return replaced, tablemerged
def resample_array(data_mask, timestamp_list): groups = data_mask[:, 0] groups = np.array(groups, dtype=int) data = data_mask[:, 1] ts_list = np.array(np.unique(groups), dtype=int).tolist() value_list = [data[groups == ts_].sum() for ts_ in ts_list] res_array = np.zeros((len(timestamp_list), 2), dtype=float) res_array[:, 0] = timestamp_list match_index = npi.indices(timestamp_list, ts_list) res_array[match_index, 1] = value_list return res_array
def splitClusters(self): for k in range(len(self.Clusters[:, 0])): #For every cluster if self.Clusters[ k, 1] >= 2 * self.N: # and self.Clusters[k,2]>2*self.r: #If # of data that cluster has is greater than N X = self.buffered_data[self.buffered_data[:, 1] == self.Clusters[k, 0], 3:] #data cluster k tree = kdtree.create( X.tolist()) #construct kdtree with data of cluster k for l in range(len(X[:, 0])): # for each data of cluster k points = tree.search_nn_dist( X[l, :], self.r) # find number of data in radius r for data l if len( points ) >= self.N: # if # of data in area is greater than N center = self.calculate_cluster_center( np.array(points) ) # calculate centroid of candidate cluster indices = npi.indices( X, points, missing='ignore' ) #find data in the all data of cluster k points2 = np.delete( X, indices, 0) # find remaining data of cluster k if len(points2) >= self.N: center2 = self.calculate_cluster_center( np.array(points2)) dis = euclidean_distances([center], [center2]) r1 = self.calculateRadius(points, center) r2 = self.calculateRadius(points2, center2) if float(dis) > r1 + r2 + 0.5 * self.r: new_cluster_label = self.Clusters.shape[0] + 1 self.Clusters = np.vstack([ self.Clusters, np.hstack([ new_cluster_label, len(points), 1, self.r, np.mean(np.std(points, axis=0)), center ]) ]) indices = np.isin(self.buffered_data[:, 3:], points)[:, 0] self.buffered_data[indices == True, 1] = new_cluster_label self.buffered_data[indices == True, 2] = 1 print("Cluster #%d is split." % (self.Clusters[k, 0])) break
def select_nodes(coord, connect, nodes): """ Selects unique nodes to build the mesh. Args: coord (:obj:`numpy.array`): mesh coordinates. connect (:obj:`numpy.array`): Element connectivity. nodes (:obj:`numpy.array`): Nodes to select. Returns: A tuple with coordinates and connectivity for the selected nodes. """ nodes = np.array(nodes) - 1 nodes = connect[nodes, 1:].flatten() index = npi.indices(coord[:, 0], np.unique(nodes)) return coord[index, :], connect[nodes, :]
def read_particles(AHF_file, particle_file): """ Reads the particles from the AHF dataset and splits them into a more intelligable structure, organised in a similar way to the ones that are stored alongside the particles. The big problem here is being able to match up the particle IDs with the location in the array that they exist; we actually need to re-sort the HaloID's such that they line up exactly. """ data = read_AHF_particles(AHF_file) switch = {"gas": 0, "dark_matter": 1, "stellar": 4} ids = {} with h5py.File(particle_file, "r") as handle: for name, particle_type in switch.items(): full_particle_type = "PartType{}".format(particle_type) this_id_list = handle[full_particle_type]["ParticleIDs"][...] ids[name] = this_id_list # Now we can prepare the output arrays. output_data = {} for name, particle_type in switch.items(): mask = data["Ptype"] == particle_type particle_ids = data["id"][mask] halo_ids = data["HaloID"][mask] # This finds the indicies where the two ID arrays match up indicies = ni.indices(ids[name], particle_ids) # We need to re-order the AHF data to be in the same order as # the actual HDF5 data. We can do that by using these indicies # as well as np.take. cleaned_halo_data = np.zeros_like(ids[name]) - 1 cleaned_halo_data[indicies] = halo_ids this_data = {"HaloID": cleaned_halo_data, "ParticleIDs": ids[name]} output_data[name] = this_data return output_data
def hppixtogrid(nzbin,nside,denshpz,winhpz,ipixlst,zlims,dobound,rmin,rmax,dmin,dmax,nx,ny,nz,lx,ly,lz,x0,y0,z0,cosmo): rsets = 10 # Number of random sets to average nran = 10000000 # Number of points in each random set npix = len(ipixlst) print '\nMapping (healpix,redshift) binning to (x,y,z) binning...' print 'Healpix grid with nzbin =',nzbin,'npix =',npix,'ntot =',nzbin*npix print 'Cuboid grid with nx =',nx,'ny =',ny,'nz =',nz,'ntot =',nx*ny*nz print 'Sampling with random points...' print 'rsets =',rsets print 'nran =',nran countgrid,densgrid,wingrid,maskgrid = np.zeros((nzbin,npix)),np.zeros((nx,ny,nz)),np.zeros((nx,ny,nz)),np.zeros((nx,ny,nz)) for iset in range(rsets): print 'Generating random set',iset+1,'...' # Generate random points in cuboid rxpos,rypos,rzpos = boxtools.genransim(nran,lx,ly,lz) # Convert random points to spherical co-ordinates rras,rdec,rred = boxtools.getradecred(dobound,rmin,rmax,dmin,dmax,rxpos,rypos,rzpos,x0,y0,z0,cosmo) # Cut points within redshift bins izbin = np.digitize(rred,zlims) - 1 cut = (izbin >= 0) & (izbin < nzbin) rxpos,rypos,rzpos,rras,rdec,izbin = rxpos[cut],rypos[cut],rzpos[cut],rras[cut],rdec[cut],izbin[cut] # Cut points within healpix pixels rphi,rtheta = np.radians(rras),np.radians(90.-rdec) ipix = hp.ang2pix(nside,rtheta,rphi) cut = np.isin(ipix,ipixlst) rxpos,rypos,rzpos,izbin,ipix = rxpos[cut],rypos[cut],rzpos[cut],izbin[cut],ipix[cut] # Re-index pixels to run from 1 to len(ipixlst) ipix = npi.indices(ipixlst,ipix) # Count numbers in each (healpix,redshift) cell tempgrid,edges = np.histogramdd(np.vstack([izbin+0.5,ipix+0.5]).transpose(),bins=(nzbin,npix)) countgrid += tempgrid # Bin densities in each (x,y,z) cell rdens = denshpz[izbin,ipix] tempgrid,edges = np.histogramdd(np.vstack([rxpos,rypos,rzpos]).transpose(),bins=(nx,ny,nz),range=((0.,lx),(0.,ly),(0.,lz)),normed=False,weights=rdens) densgrid += tempgrid # Bin window in each (x,y,z) cell rwin = winhpz[izbin,ipix] tempgrid,edges = np.histogramdd(np.vstack([rxpos,rypos,rzpos]).transpose(),bins=(nx,ny,nz),range=((0.,lx),(0.,ly),(0.,lz)),normed=False,weights=rwin) wingrid += tempgrid # Count numbers in each (x,y,z) cell tempgrid,edges = np.histogramdd(np.vstack([rxpos,rypos,rzpos]).transpose(),bins=(nx,ny,nz),range=((0.,lx),(0.,ly),(0.,lz))) maskgrid += tempgrid print 'Number of randoms in healpix grid mean =',np.mean(countgrid),'std =',np.std(countgrid),'nullfrac =',float(len(countgrid[countgrid == 0]))/float(nzbin*len(ipixlst)) print 'Number of randoms in cuboid grid mean =',np.mean(maskgrid[maskgrid>0.]),'std =',np.std(maskgrid[maskgrid>0.]) # Average densities and window on (x,y,z) grid densgrid = np.where(maskgrid>0.,densgrid/maskgrid,0.) wingrid = np.where(maskgrid>0.,wingrid/maskgrid,0.) return densgrid,wingrid
def all_faces(coord, connect): """ Gets vertices of all faces of the mesh. Args: coord (:obj:`numpy.array`): Coordinates of the element. connect (:obj:`numpy.array`): Element connectivity. Returns: Corresponding nodes. """ nodes_per_face = np.array([connect[:, [1,2,3,4]], connect[:, [5,6,7,8]], \ connect[:, [6,7,3,2]], connect[:, [7,8,4,3]], \ connect[:, [6,5,1,2]], connect[:, [5,8,4,1]]]).reshape(-1,4) ind_faces = npi.indices(coord[:, 0], nodes_per_face.flatten()).reshape(-1, 4) return ind_faces
def mat_arg(self, lat_link): """ lat_link = whether 1D system or 2D system Its argument comes from latticeND() function It generates non-zero positions of sparse matrix with amplitudes """ A = self.basisArray() for j in range(A.shape[0]): for sublink in lat_link: if A[j, sublink[0]] >= 1: hoppedVec = np.array(A[j]).tolist() hoppedVec[sublink[0]] -= 1 hoppedVec[sublink[1]] += 1 i = int( npi.indices(A, np.array([hoppedVec]), missing='mask')) amp = round( math.sqrt((A[j, sublink[1]] + 1) * A[j, sublink[0]]) * -self.T, 3) yield [i, j, amp]
def free_faces(coord, connect): """ Get vertices of external faces of the mesh. Args: coord (:obj:`numpy.array`): Coordinates of the element. connect (:obj:`numpy.array`): Element connectivity. Returns: Corresponding nodes. """ nodes_per_face = np.array([connect[:, [1,2,3,4]], connect[:, [5,6,7,8]], \ connect[:, [6,7,3,2]], connect[:, [7,8,4,3]], \ connect[:, [6,5,1,2]], connect[:, [5,8,4,1]]]).reshape(-1,4) unique, counts = npi.count(nodes_per_face) unique = unique[counts < 2] ind_faces = npi.indices(coord[:, 0], unique.flatten()).reshape(-1, 4) return ind_faces
def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs): """Method returning the class labels of the given samples. If the query value is greater than zero, it updates the n_queries and queried sample statistics Parameters ---------- X: array-like, shape (n_samples, n_features) Samples whose class labels are queried. annotator_ids: array-like, shape (n_queried_annotators) The indices of the annotators whose class labels are queried. query_value: int The query value represents the increment of the query statistics of the queried annotators. Returns ------- Y: numpy.ndarray, shape (n_samples, n_annotators) Class labels of the given samples which were provided by the queried annotators. The non queried annotators return np.nan values. """ # check annotator_ids annotator_ids = check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids') # obtain ids of queried samples X = check_array(X) sample_ids = indices(self.X_, X, missing=-1) sample_ids_flag = sample_ids >= 0 # class labels provided by queried annotators Y = np.full((np.size(X, 0), self.n_annotators()), np.nan) Y[sample_ids_flag, annotator_ids[:, None]] = self.Y_[sample_ids[sample_ids_flag], annotator_ids[:, None]] # update query statistics if query_value > 0: self.queried_flags_[sample_ids, annotator_ids[:, None]] = True self.n_queries_[annotator_ids] += query_value return Y
def transform(self, y): return npi.indices(self.mapping, y)
def polygons(zip_name, class_path, class_clouds, path_to_folder, save_class_path, save_imgs, poly_path, time_spaced): print("Current: ", zip_name) # grid code grid_img = zip_name[38:-16] # acquisition date year_img = zip_name[11:15] month_img = zip_name[15:17] day_img = zip_name[17:19] discriminator_img = zip_name[-6:] # predicted files names search_criteria = "*_predicted.tif" q = os.path.join(class_path, search_criteria) dem_fps = glob.glob(q) dem_fps.sort(key=os.path.getmtime, reverse=True) # dem_fps.sort(key=os.path.getmtime, reverse=False) #dem_fps.sort(key=os.path.getmtime) ???? ai pega sempre o primeiro que é dia 18 afff for fname in dem_fps: print("Teste: ", fname) if grid_img in fname: # test date name_file = fname[-74:-14] # get the name of the file year_test = name_file[11:15] month_test = name_file[15:17] day_test = name_file[17:19] discriminator_test = name_file[-6:] path_shapes = poly_path + '/' + str(year_img) + str( month_img) + str(day_img) + 'T' + str( discriminator_img) + '_' + str(year_test) + str( month_test) + str(day_test) + 'T' + str( discriminator_test) + '_' + str(grid_img) #------------------------ # d1 = datetime.datetime(int(year_img), int(month_img), int(day_img)) # current image # d2 = datetime.datetime(int(year_test), int(month_test), int(day_test)) # test image # print(d1) # print(d2) # days_dif = d1 - d2 # days_d = abs(days_dif.days) # if time_spaced is None: # time_spaced = days_d # #time_spaced = xxx # if days_d > time_spaced: # print(f"Images spaced more than {time_spaced} days... Getting the next image.") # continue # elif d1 < d2: # print("Previous image found for grid:", grid_img) # print("Date of current image:", day_img, "-", month_img, '-', year_img) # print("Date of previous image:", day_test, "-", month_test, '-', year_test) # break #------------------------------------ if os.path.isdir(path_shapes) is False: # compare date - date in yyyy/mm/dd format d1 = datetime.datetime(int(year_img), int(month_img), int(day_img)) # current image d2 = datetime.datetime(int(year_test), int(month_test), int(day_test)) # test image print(d1) print(d2) days_dif = d1 - d2 days_d = abs(days_dif.days) if time_spaced is None: time_spaced = days_d #time_spaced = xxx time_spaced = days_d # ADICIONEI PARA TESTAR if days_d > time_spaced: print( f"Images spaced more than {time_spaced} days... Getting the next image." ) continue elif d1 < d2: # because the download is made from the most recent image to the oldest print("Previous image found for grid:", grid_img) print("Date of current image:", day_img, "-", month_img, '-', year_img) print("Date of previous image:", day_test, "-", month_test, '-', year_test) BSCL1 = cloud_masks(path_to_folder, save_class_path) # current EVI1 = evi(path_to_folder) EVI1[BSCL1 == 9999] = np.nan # masking EVI final image # search for the path path_to_folder_BSCL2 = save_imgs + '/' + name_file + '.SAFE/GRANULE/' BSCL2 = cloud_masks(path_to_folder_BSCL2, save_class_path) # old image EVI0 = evi(path_to_folder_BSCL2) EVI0[BSCL2 == 9999] = np.nan # masking EVI inital image #mask_BSCL = BSCL2 - BSCL1 #BSCL2[mask_BSCL == -9999] = 9999 # use BSCL2 as reference # create polygons actual_open = rasterio.open(class_path + '/' + zip_name + '_predicted.tif') old_open = rasterio.open(class_path + '/' + name_file + '_predicted.tif') actual = actual_open.read() old = old_open.read() difference = old - actual difference[difference == 255] = 1 # deforestation (polygon is generated) difference[ difference == -255] = 0 # forest growth (polygon is not generated) # insert masks that represents clouds difference[ BSCL2 == 9999] = 0 # where there is clouds, the polygon is not generated difference[BSCL1 == 9999] = 0 # opening clouds masks from UNet clouds_actual = rasterio.open(class_clouds + '/' + zip_name + '_predicted.tif') clouds_old = rasterio.open(class_clouds + '/' + name_file + '_predicted.tif') clouds_actual_raster = clouds_actual.read() clouds_old_raster = clouds_old.read() difference[ clouds_actual_raster == 255] = 0 # where there is clouds, the polygon is not generated difference[clouds_old_raster == 255] = 0 # calling growth_rate() function dt = d1 - d2 dt = dt.days # getting time interval between the two images in days #----------------------------------------------------------------------------- print('Applying growth rate function') r_matrix, r_mask = growth_rate(EVI0[0, :, :], EVI1[0, :, :], dt, 1) arr_diff = difference[0, :, :] arr_old = old[0, :, :] r_mask[(arr_diff == 0) & (arr_old == 0 )] = 0 # not forested areas in both comparisions r_mask[ arr_diff == -255] = 0 # areas where there weren't forests but now there are # excluding noisy pixels img_r = copy.copy(r_mask) kernel = np.ones((2, 2), np.uint8) erosion_r = cv2.erode(img_r, kernel, iterations=1) erosion_r = np.int8(erosion_r) erosion_r = np.expand_dims( erosion_r, axis=0) # expanding channels to: [:,:,:] #----------------------------------------------------------------------------- print('Applying clouds correction') # Applying clouds corrections clouds_defor = clouds_actual_raster + clouds_old_raster + difference clouds_defor[clouds_defor > 0] = 1 clouds_sum = clouds_actual_raster + clouds_old_raster clouds_sum[clouds_sum > 1] = 1 lw, num = measurements.label(clouds_defor[0, :, :]) sum_all = clouds_sum[0, :, :] + lw # lw is the groups clouds_inter = np.zeros( (lw.shape[0], lw.shape[1] )) # tem as nuvens e o raster de deforestation clouds_inter = np.where((lw != sum_all), lw, 0) clouds_inter[clouds_inter != 0] uniques = np.unique(clouds_inter) teste_list = np.array( uniques).tolist() # 23690 classes em forma de lista teste_list[0] = 1 # tirar o 0 como grupo lw_list = np.array(lw).tolist( ) # mapa com todas as classes em forma de lista values = np.ones((len(teste_list))).tolist() keys = teste_list # 23690 classes em forma de lista # Changing the values that have clouds interference arr = np.concatenate(lw_list) idx = npi.indices(keys, arr, missing='mask') remap = np.logical_not(idx.mask) arr[remap] = np.array(values)[idx[remap]] replaced = np.array_split( arr, np.cumsum([len(a) for a in lw_list][:-1])) final = np.array(replaced) final[final != 1] = 0 # o que nao foram substituidos recebem 0 # mask in defor variable (onde final for 1, defor recebe 0) difference = difference[0, :, :] difference[final == 1] = 0 # excluding noisy pixels img = copy.copy(difference) kernel = np.ones((5, 5), np.uint8) erosion = cv2.erode(img, kernel, iterations=1) erosion = np.expand_dims( erosion, axis=0) # expanding channels to: [:,:,:] #----------------------------------------------------------------------------- # selecting the polygons which have some pixel with negative growth print('Evaluating falses positives') lw, num = measurements.label( erosion[0, :, :]) #group pixels zones sum_all = erosion_r + lw defor_mask = np.zeros((lw.shape[0], lw.shape[1])) defor_mask = np.where((lw != sum_all), lw, 0) defor_mask[defor_mask != 0] uniques = np.unique(defor_mask) teste_list = np.array(uniques).tolist() teste_list[0] = 1 # tirar o 0 como grupo lw_list = np.array(lw).tolist( ) # mapa com todas as classes em forma de lista values = np.ones((len(teste_list))).tolist() keys = teste_list dictionary = dict(zip(keys, values)) arr = np.concatenate(lw_list) idx = npi.indices(keys, arr, missing='mask') remap = np.logical_not(idx.mask) arr[remap] = np.array(values)[idx[remap]] replaced = np.array_split( arr, np.cumsum([len(a) for a in lw_list][:-1])) final = np.array(replaced) final[final != 1] = 0 # o que nao foram substituidos recebem 0 np.shape(final) final = np.expand_dims(final, axis=0) erosion[final == 0] = 0 #----------------------------------------------------------------------------- # remove groups of pixels with an area smaller than 10 pixels lw, num = measurements.label( erosion[0, :, :]) # grouping pixels clusters area = measurements.sum(erosion[0, :, :], lw, index=arange(lw.max() + 1)) areaImg = area[lw] areaImg = np.expand_dims(areaImg, axis=0) erosion[ areaImg < 10] = 0 # IT CAN BE TESTED DIFFERENT VALUES TO CHOOSE THE BEST ONE #----------------------------------------------------------------------------- # crs from a satellite image band - to save image metadata listOfFiles = list() for (dirpath, dirnames, filenames) in os.walk(path_to_folder): listOfFiles += [ os.path.join(dirpath, file) for file in filenames ] for bandname in listOfFiles: if (bandname.endswith("B02_10m.jp2")): # read metadata information b2 = rasterio.open(bandname) print('Creating shapefile') # creating vectors of deforestation raster (erosion) shapes = rasterio.features.shapes(erosion, transform=b2.transform) records = [{ "geometry": geometry, "properties": { "value": value } } for (geometry, value) in shapes if value == 1] schema = { "geometry": "Polygon", "properties": { "value": "int" } } os.mkdir(path_shapes) with fiona.open(path_shapes + '/' + str(year_img) + str(month_img) + str(day_img) + '_' + str(year_test) + str(month_test) + str(day_test) + str(grid_img) + '.shp', "w", "ESRI Shapefile", crs=b2.crs.data, schema=schema) as out_file: out_file.writerecords(records) #------------------------------------------------- print('Saving results') with rasterio.open(poly_path + '/' + str(year_img) + str(month_img) + str(day_img) + '_' + str(year_test) + str(month_test) + str(day_test) + str(grid_img) + '_deforestation.tif', 'w', driver='Gtiff', width=b2.width, height=b2.height, count=1, crs=b2.crs, transform=b2.transform, dtype='uint8') as deforestation: deforestation.write(erosion) deforestation.close() with rasterio.open(poly_path + '/' + str(year_img) + str(month_img) + str(day_img) + '_' + str(year_test) + str(month_test) + str(day_test) + str(grid_img) + '_negative_growth.tif', 'w', driver='Gtiff', width=b2.width, height=b2.height, count=1, crs=b2.crs, transform=b2.transform, dtype='int8') as rmask: rmask.write(erosion_r) rmask.close() break # sai do laço for?
def main(): indices = [0, 100, 200, 400, 800, 1600, 4000] # Perform coarse-scale simulation with constant k. unc_Ts_constk = np.zeros((config.Nt_coarse, config.N_coarse + 2)) unc_Ts_constk[0] = config.get_T0(config.nodes_coarse) for i in range(1, config.Nt_coarse): unc_Ts_constk[i] = physics.simulate( config.nodes_coarse, config.faces_coarse, unc_Ts_constk[i - 1], config.T_a, config.T_b, lambda x: np.ones_like(x) * config.k_ref, config.get_cV, config.rho, config.A, config.get_q_hat, np.zeros_like(config.nodes_coarse[1:-1]), config.dt_coarse, config.dt_coarse * (i - 1), config.dt_coarse * i, False) # Perform coarse-scale simulation. unc_Ts = np.zeros((config.Nt_coarse, config.N_coarse + 2)) unc_Ts[0] = config.get_T0(config.nodes_coarse) for i in range(1, config.Nt_coarse): unc_Ts[i] = physics.simulate(config.nodes_coarse, config.faces_coarse, unc_Ts[i - 1], config.T_a, config.T_b, config.get_k, config.get_cV, config.rho, config.A, config.get_q_hat, np.zeros_like(config.nodes_coarse[1:-1]), config.dt_coarse, config.dt_coarse * (i - 1), config.dt_coarse * i, False) plt.figure() for index in indices: plt.plot(config.nodes_coarse, unc_Ts[index], label=index) plt.legend() plt.grid() plt.savefig(os.path.join(config.results_dir, 'debug_t/unc.pdf'), bbox_inches='tight') plt.figure() for index in indices: plt.plot(config.nodes_coarse, unc_Ts_constk[index], label=index) plt.legend() plt.grid() plt.savefig(os.path.join(config.results_dir, 'debug_t/unc_const.pdf'), bbox_inches='tight') # Perform fine-scale simulation. ref_Ts = np.zeros((config.Nt_fine, config.N_fine + 2)) ref_Ts[0] = config.get_T0(config.nodes_fine) for i in range(1, config.Nt_fine): ref_Ts[i] = physics.simulate(config.nodes_fine, config.faces_fine, ref_Ts[i - 1], config.T_a, config.T_b, config.get_k, config.get_cV, config.rho, config.A, config.get_q_hat, np.zeros_like(config.nodes_fine[1:-1]), config.dt_fine, config.dt_fine * (i - 1), config.dt_fine * i, False) ref_Ts_downsampled = np.zeros((config.Nt_coarse, config.N_coarse + 2)) counter = 0 for time_level in range(0, config.Nt_fine, int(config.dt_coarse / config.dt_fine)): idx = npi.indices(np.around(config.nodes_fine, decimals=5), np.around(config.nodes_coarse, decimals=5)) for i in range(config.N_coarse + 2): ref_Ts_downsampled[counter][i] = ref_Ts[time_level][idx[i]] counter += 1 error = unc_Ts - ref_Ts_downsampled error_constk = unc_Ts_constk - ref_Ts_downsampled plt.figure() for index in indices: plt.plot(config.nodes_coarse, ref_Ts_downsampled[index], label=index) plt.legend() plt.grid() plt.savefig(os.path.join(config.results_dir, 'debug_t/ref.pdf'), bbox_inches='tight') plt.figure() for index in indices: plt.plot(config.nodes_coarse, error[index], label=index) plt.legend() plt.grid() plt.savefig(os.path.join(config.results_dir, 'debug_t/err.pdf'), bbox_inches='tight') plt.figure() for index in indices: plt.plot(config.nodes_coarse, error_constk[index], label=index) plt.legend() plt.grid() plt.savefig(os.path.join(config.results_dir, 'debug_t/err_const.pdf'), bbox_inches='tight')
def facetsplit_mesh(mesh, imdata, seg, scar_marker, maxden, anisotropy, transpose=False, unit_conversion=1.0): """ Create splits in the mesh according to the image intensities in imdata where the scars are marked by the scar_markers in seg. """ #Make sure elements are int mesh.elems = mesh.elems.astype(int) regist = ImageMeshLgeRegistration(mesh, imdata, seg, scar_marker, transpose, unit_conversion) facets = mesh.get_facets() facet_midpoints = np.mean(mesh.verts[facets], axis=1) elem_midpoints = np.mean(mesh.verts[mesh.elems], axis=1) facet_intensities = regist.register(facet_midpoints) elem_intensities = regist.register(elem_midpoints) assert (facet_intensities != 0).any() assert (elem_intensities != 0).any() lge_facets, lge_facet_elems = get_lge_facet_elements(mesh, scar_marker) local_global_facetmap = npi.indices(facets, lge_facets) #Map intesity data into facets lge_facet_intense = facet_intensities[local_global_facetmap] lge_facet_intense = relative_intensity(lge_facet_intense, lge_facet_intense.min()) lge_facet_fibres = mean_direction(mesh.fibres[lge_facet_elems]) if mesh.edim == 3: #Cosine between fibre and edge costheta = get_fibre_edge_cosine(mesh.verts[lge_facets], lge_facet_fibres[:, :2]) elif mesh.edim == 4: #Cosine between sheet normal and facet normal lge_facet_fibre_sheets = mean_direction( mesh.fibre_sheets[lge_facet_elems]) lge_facet_sheet_normals = np.cross(lge_facet_fibres, lge_facet_fibre_sheets) facet_normals = np.cross( mesh.verts[lge_facets][:, 0] - mesh.verts[lge_facets][:, 1], mesh.verts[lge_facets][:, 0] - mesh.verts[lge_facets][:, 2]) facet_normals /= np.linalg.norm(facet_normals, axis=1)[:, np.newaxis] costheta = np.abs( np.sum(lge_facet_sheet_normals * facet_normals, axis=1)) #-------------------------------------------- #The probability formula for the edge splitting p = maxden * (costheta**anisotropy) * lge_facet_intense #-------------------------------------------- assert not np.isnan(p).any() #Determine which facets are to be split is_split_facet = p >= np.random.rand(len(p)) #from IPython import embed; embed() split_facets = lge_facets[is_split_facet] return split_mesh_along_facets(mesh, split_facets)
def __init__(self, start_time, start_store, customerFile, storeFile): """ Initiates an instance of the VRP problem :param start_time: time at which the problem instance starts ('2014-03-13 15:00:00') :param start_store: store # 49 :param customerFile: location of the file containing the delivery/customer data :param storeFile: location of the file containing the stores data :return: creates attributes of the problem instance : (graph/network , customers, stores, nodes) """ # ** Use pandas to read data-sets: start_time = pd.to_datetime('2014-03-13 15:00:00') try: dt = pd.read_table(customerFile, sep='\t') stores = pd.read_table(storeFile, sep='\t') except FileNotFoundError as e: print(e) # **** Instantiating Delivery/Store(Node) Objects ****: nodes = [] # ** Delivery/Customer Nodes: for i in range(dt.shape[0]): nodes.append( Node(dt.delivery_id[i], dt.latitude[i], dt.longitude[i], 1)) # ** Store Nodes: store_list = [] # List of nodes containing stores for i in range(stores.shape[0]): nodes.append( Node(stores.store_id[i], stores.latitude[i], stores.longitude[i], 0)) store_list.append( Node(stores.store_id[i], stores.latitude[i], stores.longitude[i], 0)) # **** Converting due_dates to pandas datetime # **** this code sets the start date/time as ZERO and transforms other due dates as # **** minutes elapsed from start-time dt['Time'] = pd.to_datetime(dt.due_at) tm = dt['Time'] - start_time tmd = (tm / np.timedelta64(1, 's')) / 60.0 dt['Time'] = tmd # ** Customer List and instantiating all the Customer/Delivery objects: customers = [] for i in range(dt.shape[0]): customers.append(Customer(dt.delivery_id[i],dt.latitude[i],\ dt.longitude[i],dt.Time[i],dt.items_count[i])) # ***** Generating Vertex-List and Edge-List to create Graph object for a problem instance: # ***** The underlying graph is a complete-graph having connections between every pair of nodes. ver_num=np.r_[np.c_[dt.delivery_id,np.ones((dt.delivery_id.shape[0],1))],\ np.c_[stores.store_id,np.zeros((stores.store_id.shape[0],1))]] ver_lat = np.r_[dt.latitude, stores.latitude] ver_lon = np.r_[dt.longitude, stores.longitude] vertices = np.c_[ver_num[:, 0], ver_lat, ver_lon, ver_num[:, 1]] ed = np.array(list(combinations( vertices[:, 0], 2))) #creating connections between every pair ind0 = npi.indices(vertices[:, 0], ed[:, 0]) ind1 = npi.indices(vertices[:, 0], ed[:, 1]) lat0 = vertices[ind0, 1] lon0 = vertices[ind0, 2] lat1 = vertices[ind1, 1] lon1 = vertices[ind1, 2] tmp1 = np.c_[lat0, lon0] tmp2 = np.c_[lat1, lon1] tmp3 = np.c_[tmp1, tmp2] edge = np.c_[ed, tmp3] dist = np.apply_along_axis(VRPTW.haversine, 1, edge[:, 2:6]) drive_time = 5 * dist edges_tmp = np.c_[edge[:, 0:2], drive_time] edges_tmp_2 = np.c_[edges_tmp[:, 1], edges_tmp[:, 0], edges_tmp[:, 2]] edges = np.r_[edges_tmp, edges_tmp_2] #clearup the memory: del (tmp1, tmp2, tmp3, ind0, ind1, lat0, lat1, lon1, lon0, edge, dist, drive_time, edges_tmp, edges_tmp_2) self.graph = Graph(vertices, edges) self.customers = customers self.stores = store_list self.nodes = nodes
def split_mesh_along_facets(mesh, split_facets, remove_disconnected_elems=True): """ A caveat for the disconnected elems. The algorithm removes elements that are edge-disconnected from the network. Some elements may be accessible via a single node and should be included. """ assert len(split_facets) > 0 t0_total = time.time() edim = mesh.elems.shape[1] split_facets = np.sort(split_facets, axis=1) C_elems = get_elem_connectivity(mesh.elems, split_facets) if remove_disconnected_elems: mesh, split_facets, C_elems = remove_disconnected_elems_from_mesh( mesh, split_facets, C_elems) #Create edge markers to indicate facets = mesh.get_facets() facet_markers = np.zeros(len(facets)) facet_markers[npi.indices(facets, split_facets)] = 1 mesh.facets = facets mesh.facet_markers = facet_markers #Make vertex-element data structure split_verticies = np.unique(split_facets) v_elem = pd.DataFrame(np.vstack( (mesh.elems.flatten(), np.arange(len(mesh.elems) * edim) / edim)).transpose(), columns=["vnum", "elemnum"]) v_elem = v_elem[npi.in_(v_elem["vnum"], split_verticies)] v_elem.loc[v_elem["vnum"].isin(split_verticies)] if edim == 3: connectivity_calc = ConnectivityCalculator2d(mesh, C_elems, split_facets) elif edim == 4: connectivity_calc = ConnectivityCalculator3d(mesh, C_elems, split_facets) else: raise Exception("Cannot handle {}D mesh".format(edim - 1)) #Loop over every vertex that is to be split t0_facets = time.time() for vnum, vgroup in v_elem.groupby("vnum"): local_components = connectivity_calc.get_local_components(vgroup) for group in local_components[1:]: #Add a new vertex for every connected element group after the first mesh.verts = np.vstack((mesh.verts, [mesh.verts[vnum]])) new_vnum = len(mesh.verts) - 1 #Update the group elements with the new vertex new_elems = mesh.elems[group] new_elems[new_elems == vnum] = new_vnum mesh.elems[group] = new_elems print "Finished edgesplitting mesh" print "Total time = {:.3f}".format(time.time() - t0_total) print "Facet growing time ={:.3f}".format((time.time() - t0_facets)) return mesh
def put_points(self, points, asiter=False): it = ((self.push(p) if i < 0 else i) for ( p, i) in zip(points, npi.indices(self[:self._length], points, 0, -1))) return it if asiter else list(it)
def indices(self, points): return npi.indices(self, points, 0)
def index(self, point): if self._length > 0: return npi.indices(self[:self._length], [point], 0)[0] raise KeyError('Not all keys in `that` are present in `this`')
def generate_trips(self, customers, stores, graph, trip_size=3, customer_pick='best'): """ :param customers: list of customers :param stores: list of stores :param graph: underlying network :param trip_size: trips of size 1,2,3 :param customer_pick: method of picking the customers to visiti, either 'best' (closest in terms of due date) or 'randomize' pick among the best 20 :return: returns top ten {or less if less than 10 unvisited customers remain} based on the best time criteria """ curr_loc = self.get_current_location() stores = np.array(stores) # cust_data = Customer.due_date_list(customers) # An np array [id , due , items] cust_dt = np.array([[ c.get_id(), c.get_due_time(), c.get_item_count(), c.get_visit_time() ] for c in customers]) cust_data = cust_dt[cust_dt[:, 3] == 0] #** Select unvisited customers (visit_time = 0) # Check whether current location is a Store, if not, send the shopper to a nearby store. if np.sum(np.isin(stores, curr_loc)) == 0: # Randomly choose a method of sending the shopper to a nearby store: p = np.random.uniform(0, 1, 1) if p <= 0.5: self.go_to_store(stores, graph, distance='randomize') else: self.go_to_store(stores, graph, distance='shortest') curr_loc = self.get_current_location() visited_customers = Customer.find_visited_customers(customers) if visited_customers.size == len(customers): print("All Customers have been visited") return customers if visited_customers.size > len(customers) - trip_size: trip_size = len(customers) - visited_customers.size # ----------------------------------------- e = graph.get_edges() # graph edges potentials = e[(e[:, 0] == curr_loc) & (~np.isin(e[:, 1], stores)) & (~np.isin(e[:, 1], visited_customers)), :] potential_cust = potentials[:, 1] # ************ Using Due Times Method to build the trips ************** id = npi.indices(potential_cust, cust_data[:, 0], missing='ignore') idx = np.array(list(set(id))) potentials = cust_data[idx, :] # Adjust the due dates by the current shoppers' time potentials[:, 1] -= self.get_current_time() potentials[:, 1] = abs( potentials[:, 1] ) # ** Absolute difference between current time & due times customer_count = potentials.shape[0] if customer_pick == 'best': if customer_count >= 10: top_ten = potentials[np.argsort(potentials[:, 1]), 0][0:10] # ** top 10 closest due dates elif customer_count < 10 and customer_count >= trip_size: top_ten = potentials[np.argsort(potentials[:, 1]), 0][ 0:customer_count] # ** top closest due dates elif customer_count < trip_size and customer_count > 0: top_ten = potentials[np.argsort(potentials[:, 1]), 0][0:customer_count] # else: print( "******* ZERO CUSTOMERS LEFT ; SOMETHING WENT WRONG *******" ) print("SIZE OF POTENTIALS = ", potentials.shape[0]) return elif customer_pick == 'randomize': if customer_count >= 20: picks = random.sample(range(0, 20), 10) #** 10 random numbers 0 - 19 top_ten = potentials[np.argsort(potentials[:, 1]), 0][picks] # elif customer_count < 20 and customer_count >= 10: picks = random.sample(range(0, customer_count), 10) #** 10 random numbers 1 - 20 top_ten = potentials[np.argsort(potentials[:, 1]), 0][picks] # elif customer_count < 10 and customer_count >= trip_size: top_ten = potentials[np.argsort(potentials[:, 1]), 0][0:customer_count] # elif customer_count < trip_size and customer_count > 0: top_ten = potentials[np.argsort(potentials[:, 1]), 0][0:customer_count] # else: print("$$$$$$ ZERO CUSTOMERS LEFT $$$$$$") return else: print("customer_pick must be either 'best' or 'randomize' ") return return top_ten
def create_datasets(cfg): """ Purpose: Create datasets for supervised learning of data-driven correction models for the 1D heat equation. :return: dataset_train, dataset_val, dataset_test """ # Data config. datasets_location = cfg.datasets_dir data_tag = cfg.data_tag # Load pickled simulation data, or create and pickle new data if none exists already. save_filepath = os.path.join(datasets_location, data_tag + ".sav") if os.path.exists(save_filepath) and False: simulation_data = joblib.load(save_filepath) else: unc_Ts = np.zeros((cfg.Nt_coarse, cfg.N_coarse + 2)) unc_Ts[0] = cfg.get_T0(cfg.nodes_coarse) ref_Ts = np.zeros((cfg.Nt_coarse, cfg.N_coarse + 2)) ref_Ts[0] = cfg.get_T0(cfg.nodes_coarse) IC_Ts = np.zeros((cfg.Nt_coarse, cfg.N_coarse + 2)) IC_Ts[0] = cfg.get_T0(cfg.nodes_coarse) ref_Ts_full = np.zeros((cfg.Nt_coarse, cfg.N_fine + 2)) ref_Ts_full[0] = cfg.get_T0(cfg.nodes_fine) idx = npi.indices(np.around(cfg.nodes_fine, decimals=10), np.around(cfg.nodes_coarse, decimals=10)) for i in range(1, cfg.Nt_coarse): old_time = np.around(cfg.dt_coarse * (i - 1), decimals=10) new_time = np.around(cfg.dt_coarse * i, decimals=10) if i <= cfg.Nt_coarse * (cfg.N_train_examples + cfg.N_val_examples ) or (not cfg.do_simulation_test): unc_IC = ref_Ts[i - 1] else: unc_IC = unc_Ts[i - 1] IC_Ts[i] = unc_IC unc_Ts[i] = physics.simulate(cfg.nodes_coarse, cfg.faces_coarse, unc_IC, cfg.get_T_a, cfg.get_T_b, cfg.get_k_approx, cfg.get_cV, cfg.rho, cfg.A, cfg.get_q_hat_approx, np.zeros_like(cfg.nodes_coarse[1:-1]), cfg.dt_coarse, old_time, new_time, False) if cfg.exact_solution_available: ref_Ts[i] = cfg.get_T_exact(cfg.nodes_coarse, new_time) else: ref_Ts_full[i] = physics.simulate( cfg.nodes_fine, cfg.faces_fine, ref_Ts_full[i - 1], cfg.get_T_a, cfg.get_T_b, cfg.get_k, cfg.get_cV, cfg.rho, cfg.A, cfg.get_q_hat, np.zeros_like(cfg.nodes_fine[1:-1]), cfg.dt_fine, old_time, new_time, False) for j in range(cfg.N_coarse + 2): ref_Ts[i][j] = ref_Ts_full[i][idx[j]] # Calculate correction source terms. sources = np.zeros((cfg.Nt_coarse, cfg.N_coarse)) for i in range(1, cfg.Nt_coarse ): # Intentionally leaves the first entry all-zeros. old_time = np.around(cfg.dt_coarse * (i - 1), decimals=10) new_time = np.around(cfg.dt_coarse * i, decimals=10) sources[i] = physics.get_corrective_src_term( cfg.nodes_coarse, cfg.faces_coarse, ref_Ts[i], ref_Ts[i - 1], cfg.get_T_a, cfg.get_T_b, cfg.get_k_approx, cfg.get_cV, cfg.rho, cfg.A, cfg.get_q_hat_approx, cfg.dt_coarse, old_time, False) corrected = physics.simulate( cfg.nodes_coarse, cfg.faces_coarse, ref_Ts[i - 1], cfg.get_T_a, cfg.get_T_b, cfg.get_k_approx, cfg.get_cV, cfg.rho, cfg.A, cfg.get_q_hat_approx, sources[i], cfg.dt_coarse, old_time, new_time, False) np.testing.assert_allclose(corrected, ref_Ts[i], rtol=1e-10, atol=1e-10) print("Correction source terms generated and verified.") # Store data simulation_data = { 'x': cfg.nodes_coarse, 'ICs': IC_Ts, 'unc': unc_Ts, 'ref': ref_Ts, 'src': sources } joblib.dump(simulation_data, save_filepath) # Remove data for t=0 from datasets. ICs = simulation_data['ICs'][1:, :] unc = simulation_data['unc'][1:, :] ref = simulation_data['ref'][1:, :] src = simulation_data['src'][1:, :] # The entry removed here is all-zeros. times = np.linspace(cfg.dt_coarse, cfg.t_end, cfg.Nt_coarse - 1, endpoint=True) assert times[1] == 2 * cfg.dt_coarse # Shuffle data. assert ICs.shape[0] == unc.shape[0] == ref.shape[0] == src.shape[ 0] == times.shape[0] permutation = np.random.permutation(ICs.shape[0]) ICs = ICs[permutation] unc = unc[permutation] ref = ref[permutation] src = src[permutation] times = times[permutation] # Split data into training, validation and test set. train_ICs = ICs[:cfg.N_train_examples, :] train_unc = unc[:cfg.N_train_examples, :] train_ref = ref[:cfg.N_train_examples, :] train_src = src[:cfg.N_train_examples, :] train_times = times[:cfg.N_train_examples] val_ICs = ICs[cfg.N_train_examples:cfg.N_train_examples + cfg.N_val_examples, :] val_unc = unc[cfg.N_train_examples:cfg.N_train_examples + cfg.N_val_examples, :] val_ref = ref[cfg.N_train_examples:cfg.N_train_examples + cfg.N_val_examples, :] val_src = src[cfg.N_train_examples:cfg.N_train_examples + cfg.N_val_examples, :] val_times = times[cfg.N_train_examples:cfg.N_train_examples + cfg.N_val_examples] test_ICs = ICs[cfg.N_train_examples + cfg.N_val_examples:, :] test_unc = unc[cfg.N_train_examples + cfg.N_val_examples:, :] test_ref = ref[cfg.N_train_examples + cfg.N_val_examples:, :] test_src = src[cfg.N_train_examples + cfg.N_val_examples:, :] test_times = times[cfg.N_train_examples + cfg.N_val_examples:] assert train_ICs.shape[0] == cfg.N_train_examples assert train_unc.shape[0] == cfg.N_train_examples assert train_ref.shape[0] == cfg.N_train_examples assert train_src.shape[0] == cfg.N_train_examples assert train_times.shape[0] == cfg.N_train_examples assert val_ICs.shape[0] == cfg.N_val_examples assert val_unc.shape[0] == cfg.N_val_examples assert val_ref.shape[0] == cfg.N_val_examples assert val_src.shape[0] == cfg.N_val_examples assert val_times.shape[0] == cfg.N_val_examples assert test_ICs.shape[0] == cfg.N_test_examples assert test_unc.shape[0] == cfg.N_test_examples assert test_ref.shape[0] == cfg.N_test_examples assert test_src.shape[0] == cfg.N_test_examples assert test_times.shape[0] == cfg.N_test_examples # Augment training data. if cfg.augment_training_data: # Shift augmentation. train_ICs_orig = train_ICs.copy() train_unc_orig = train_unc.copy() train_ref_orig = train_ref.copy() train_src_orig = train_src.copy() train_times_orig = train_times.copy() for i in range(cfg.N_shift_steps): # IC temperature train_ICs_aug = train_ICs_orig + (i + 1) * cfg.shift_step_size train_ICs = np.concatenate((train_ICs, train_ICs_aug), axis=0) # Uncorrected temperature train_unc_aug = train_unc_orig + (i + 1) * cfg.shift_step_size train_unc = np.concatenate((train_unc, train_unc_aug), axis=0) # Reference temperature train_ref_aug = train_ref_orig + (i + 1) * cfg.shift_step_size train_ref = np.concatenate((train_ref, train_ref_aug), axis=0) # Correction source term train_src = np.concatenate((train_src, train_src_orig), axis=0) # Time levels train_times = np.concatenate((train_times, train_times_orig), axis=0) # Mirror augmentation. # IC temperature train_ICs_mirror = np.flip(train_ICs, axis=1).copy() train_ICs = np.concatenate((train_ICs, train_ICs_mirror), axis=0) # Uncorrected temperature train_unc_mirror = np.flip(train_unc, axis=1).copy() train_unc = np.concatenate((train_unc, train_unc_mirror), axis=0) # Reference temperature train_ref_mirror = np.flip(train_ref, axis=1).copy() train_ref = np.concatenate((train_ref, train_ref_mirror), axis=0) # Correction source term train_src_mirror = np.flip(train_src, axis=1).copy() train_src = np.concatenate((train_src, train_src_mirror), axis=0) # Time levels train_times = np.concatenate((train_times, train_times), axis=0) # Calculate statistical properties of training data. train_unc_mean = np.mean(train_unc) train_ref_mean = np.mean(train_ref) train_src_mean = np.mean(train_src) train_unc_std = np.std(train_unc) train_ref_std = np.std(train_ref) train_src_std = np.std(train_src) # z_normalize data. train_unc_normalized = util.z_normalize(train_unc, train_unc_mean, train_unc_std) val_unc_normalized = util.z_normalize(val_unc, train_unc_mean, train_unc_std) test_unc_normalized = util.z_normalize(test_unc, train_unc_mean, train_unc_std) train_ref_normalized = util.z_normalize(train_ref, train_ref_mean, train_ref_std) val_ref_normalized = util.z_normalize(val_ref, train_ref_mean, train_ref_std) test_ref_normalized = util.z_normalize(test_ref, train_ref_mean, train_ref_std) train_src_normalized = util.z_normalize(train_src, train_src_mean, train_src_std) val_src_normalized = util.z_normalize(val_src, train_src_mean, train_src_std) test_src_normalized = util.z_normalize(test_src, train_src_mean, train_src_std) # Note that the ICs are not to be used in conjunction with the NN directly, # so there is no need to normalize them. Same goes for time levels. # Convert data from Numpy array to Torch tensor. train_ICs_tensor = torch.from_numpy(train_ICs) train_unc_tensor = torch.from_numpy(train_unc_normalized) train_ref_tensor = torch.from_numpy(train_ref_normalized) train_src_tensor = torch.from_numpy(train_src_normalized) train_times_tensor = torch.from_numpy(train_times) val_ICs_tensor = torch.from_numpy(val_ICs) val_unc_tensor = torch.from_numpy(val_unc_normalized) val_ref_tensor = torch.from_numpy(val_ref_normalized) val_src_tensor = torch.from_numpy(val_src_normalized) val_times_tensor = torch.from_numpy(val_times) test_ICs_tensor = torch.from_numpy(test_ICs) test_unc_tensor = torch.from_numpy(test_unc_normalized) test_ref_tensor = torch.from_numpy(test_ref_normalized) test_src_tensor = torch.from_numpy(test_src_normalized) test_times_tensor = torch.from_numpy(test_times) # Create array to store stats used for normalization. stats = np.asarray([ train_unc_mean, train_ref_mean, train_src_mean, train_unc_std, train_ref_std, train_src_std ]) # Pad with zeros to satisfy requirements of Torch's TensorDataset. # (Assumes that all datasets contain 6 or more data examples.) assert train_unc.shape[0] >= 6 and val_unc.shape[ 0] >= 6 and test_unc.shape[0] >= 6 stats_train = np.zeros(train_unc.shape[0]) stats_val = np.zeros(val_unc.shape[0]) stats_test = np.zeros(test_unc.shape[0]) stats_train[:6] = stats stats_val[:6] = stats stats_test[:6] = stats # Convert stats arrays to tensors stats_train_tensor = torch.from_numpy(stats_train) stats_val_tensor = torch.from_numpy(stats_val) stats_test_tensor = torch.from_numpy(stats_test) # Create datasets. dataset_train = torch.utils.data.TensorDataset( train_unc_tensor, train_ref_tensor, train_src_tensor, stats_train_tensor, train_ICs_tensor, train_times_tensor) dataset_val = torch.utils.data.TensorDataset( val_unc_tensor, val_ref_tensor, val_src_tensor, stats_val_tensor, val_ICs_tensor, val_times_tensor) dataset_test = torch.utils.data.TensorDataset( test_unc_tensor, test_ref_tensor, test_src_tensor, stats_test_tensor, test_ICs_tensor, test_times_tensor) return dataset_train, dataset_val, dataset_test
def transform(self, y): return npi.indices(self.mapping, y)