def convert_chapters(self, chapter_start: int, chapter_end: int): name = self.config.name or self.config.book file_name = "{} - Chapters {}-{}.txt".format( name if len(name) <= 20 else self.config.book.upper(), chapter_start, chapter_end, ) output_file = utils.get_book_dir(self.config.book, _OUTPUT_DIR, file_name) title = f"{name} - Chapters {chapter_start}-{chapter_end}" utils.ensure_dir(utils.get_book_dir(self.config.book, _OUTPUT_DIR)) progress = utils.ProgressBar(chapter_end - chapter_start + 1, "Converting") with open(output_file, "wb") as f: f.write("{}\n\n\n".format(title).encode()) for ch in range(chapter_start, chapter_end + 1): progress.update() # if self.config.add_chapter_titles: # f.write("Chapter {}\n\n".format(ch).encode()) f.write(self.process_chapter(ch).encode()) if ch != chapter_end: f.write( "{0}{1}{0}".format( "\n" * _LINES_BETWEEN_CHAPTERS, _CHAPTER_SEPERATOR ).encode() ) progress.finish() print("Result is in '{}'".format(output_file))
def evaluate(self, dataname, pred_f, batch_size=256): N = self.__dict__['nb_{}'.format(dataname)] n = math.ceil(N / batch_size) qid2prv = {} progress_bar = utils.ProgressBar(n, msg='{}'.format(dataname)) y_true = [] y_pred = [] for batch, info in self.gen_data_by_batch(dataname, batch_size): qids, votes, ranks = info pred_y = pred_f(batch) y_pred.extend(pred_y) y_true.extend(self.scale_y(votes)) for i in range(len(qids)): qid = qids[i] qid2prv.setdefault(qid, []) qid2prv[qid].append((pred_y[i], ranks[i], votes[i])) if 0: print(pred_y[:10]) print(self.scale_y(votes)[:10]) input() else: progress_bar.make_a_step() t = progress_bar.stop() if rank_metric: metric = Metric.mean([RankMetric(v) for v in qid2prv.values()]) else: metric = ErrorMetric(y_true, y_pred) return metric, t
def convert_chapters(self, chapter_start: int, chapter_end: int): output_dir = utils.get_book_dir(self.config.book, _OUTPUT_DIR) utils.ensure_dir(output_dir) progress = utils.ProgressBar(chapter_end - chapter_start + 1, "Converting") for ch in range(chapter_start, chapter_end + 1): progress.update() links = { "title": self.config.book.upper() + " — " + utils.chapter_name(ch), "prev": utils.chapter_name(ch - 1) + ".html", "nxt": utils.chapter_name(ch + 1) + ".html", } with open( os.path.join(output_dir, utils.chapter_name(ch) + ".html"), "wb" ) as f: before = _HTML_BEFORE.format(**links) after = _HTML_AFTER.format(**links) output = before + self.process_chapter(ch) + after f.write(output.encode("utf-8")) progress.finish() print("Result is in '{}'".format(output_dir))
def rdm_spearman(vectors): matrix = np.zeros((len(vectors), len(vectors))) # Added because very large RDMs take a lot of time to compute progress_bar = utils.ProgressBar() for i, vec1 in enumerate(vectors): if len(vectors)>100: progress_bar.updateProgress(i, len(vectors), prefix="Generating RDM:") for j, vec2 in enumerate(vectors): matrix[i, j] = 1 - stats.spearmanr(vec1, vec2)[0] return matrix
def generate_sd_grid_mapping_traj(ipath_sd, n_top_grid, ipath_top_grid, ipath_grid_block_gps_range, odir_sd, mapping_rate=1, mapping_bais=None): """generate the gird-mapping traj for SD """ def random_sampling(grid_range): """generate a sample point within a grid range """ x = np.random.uniform(grid_range[0][0], grid_range[1][0]) y = np.random.uniform(grid_range[0][1], grid_range[1][1]) return x, y # for pep8 if mapping_bais is None: mapping_bais = {'lat': 0, 'lon': 0} # privacy budget with open(ipath_sd) as fr_sd: sd = [eval(point) for point in fr_sd.readlines()] # C = n_top_grid ** 2 # with open(ipath_top_grid) as fr_top_grid: # M = eval(fr_top_grid.readline()) with open(ipath_grid_block_gps_range) as fr_top_grid_block_gps_range: fstr = fr_top_grid_block_gps_range.readlines() grid_block_gps_range = eval(fstr[0]) # top_grid_block_gps_range = eval(fstr[1]) reverse_mapped_trajs = [] for traj in sd: reverse_mapped_trajs.append( [random_sampling(grid_block_gps_range[i]) for i in traj]) # write to files fcount = 0 p = utils.ProgressBar(len(reverse_mapped_trajs), '生成脱敏数据集') for i in range(len(reverse_mapped_trajs)): p.update(i) with open(odir_sd + '/sd_traj' + str(fcount) + '.txt', 'w') as fw_traj: for point in reverse_mapped_trajs[i]: # mapping point = [ point[0] / mapping_rate + mapping_bais['lat'], point[1] / mapping_rate + mapping_bais['lon'] ] fw_traj.write(str(point[0]) + ',' + str(point[1]) + '\n') fcount += 1
def packages(): if not CraftCore.cache.availablePackages: CraftCore.cache.availablePackages = [] CraftCore.log.info("Updating search cache:") packages = CraftPackageObject.root().allChildren() total = len(packages) with utils.ProgressBar() as progress: for p in packages: package = SeachPackage(p) CraftCore.cache.availablePackages.append(package) progress.print( int(len(CraftCore.cache.availablePackages) / total * 100)) return CraftCore.cache.availablePackages
async def __download_chapters(self, chapter_list: List[int]): raw_dir = utils.get_raw_dir(self.config.book) utils.ensure_dir(raw_dir) async with aiohttp.ClientSession() as session: print("\nPreparing download ...\r", end="", flush=True) await self.config.website.prepare_download(self.config, session) self.progress = utils.ProgressBar(len(chapter_list), "Downloading") tasks = [] for ch in chapter_list: tasks.append(self.__download_chapter(ch, session)) await asyncio.gather(*tasks) self.progress.finish()
def run_experiment(param_dict): start = time.time() n_runs = np.arange(1e3) progbar = utils.ProgressBar() space = list(np.arange(param_dict['n'])) k = int(param_dict['k']) # Running the experiment n_runs times for run in n_runs: progbar.update_progress(run / (n_runs.shape[0] - 1)) get_sample(space, k) param_dict['elapsed_time'] = time.time() - start return param_dict
def start(self): self.is_started = True self.start_time = time.time() self.round_duration = len(color_set) * self.interval_duration self.pools = ddict(_get_color_pool) def color_builder(): (r, i) = calculate_round_interval(self.start_time, self.interval_duration) return self.pools[r].pop() self.colors = ddict(lambda: ddict(color_builder)) # graphics stuff self.prog_bar = utils.ProgressBar(self.size[0], self.size[1] / 10.0, 1.0, bg_color=background_color, border_width=5, border_color=(255, 69, 0))
def epoch(self, train=False, lr=0.1): if train: self.network.train() loader = self.loader_train forward = self.forward_train else: self.network.eval() loader = self.loader_test forward = self.forward_test loss_total = 0 sample_error = 0 sample_error5 = 0 sample_total = 0 progress = utils.ProgressBar(len(loader), '<progress bar is initialized.>') for batch_idx, (inputs, targets) in enumerate(loader): batchsize = targets.size(0) outputs, loss_batch = forward(inputs, targets) _, predicted = torch.max(outputs.data, 1) _, predicted5 = torch.topk(outputs.data, 5) sample_total += batchsize sample_error += batchsize - predicted.cpu().eq(targets).sum().item() loss_total += loss_batch.data.item() * batchsize loss = float(loss_total / sample_total) err = float(1. * sample_error / sample_total) result = predicted5[:, 0].cpu().eq(targets) for i in range(4): result += predicted5[:, i + 1].cpu().eq(targets) result = result.sum().item() sample_error5 += batchsize - result err5 = float(1. * sample_error5 / sample_total) progress.update( '{}, top1 loss: {:0.4f}, err:{:5.2f}% ({:5d}/{:5d}), top5 err:{:5.2f}% ({:5d}/{:5d}), lr:{}'.format( 'train' if train else ' test', loss, 100 * err, int(sample_error), int(sample_total), 100 * err5, int(sample_error5), int(sample_total), lr)) return [err, loss]
def evaluate(self, dataname, pred_f, batch_size=256): n = np.ceil(len(self.data[dataname]) / batch_size) progress_bar = utils.ProgressBar(n, msg='{}'.format(dataname)) qid2tp = {} yt = [] yp = [] for x, y, aids in self.gen_data_by_batch(dataname, batch_size): pred_y = pred_f(x) if self.rank: for i in range(len(aids)): qid = self.aid2qu[aids[i]][0] qid2tp.setdefault(qid, []) qid2tp[qid].append((y[i], pred_y[i])) else: yt.extend(y) yp.extend(pred_y) progress_bar.make_a_step() t = progress_bar.stop() if self.rank: metric = Metric.mean([RankMetric(v) for v in qid2tp.values()]) else: metric = ErrorMetric(yt, yp) return metric, t
def main(args): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') utils.setup_experiment(args) utils.init_logging(args) # Build data loaders, a model and an optimizer model = models.build_model(args).to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 60, 70, 80, 90, 100], gamma=0.5) logging.info(f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters") if args.resume_training: state_dict = utils.load_checkpoint(args, model, optimizer, scheduler) global_step = state_dict['last_step'] start_epoch = int(state_dict['last_step']/(403200/state_dict['args'].batch_size))+1 else: global_step = -1 start_epoch = 0 train_loader, valid_loader, _ = data.build_dataset(args.dataset, args.data_path, batch_size=args.batch_size) # Track moving average of loss values train_meters = {name: utils.RunningAverageMeter(0.98) for name in (["train_loss", "train_psnr", "train_ssim"])} valid_meters = {name: utils.AverageMeter() for name in (["valid_psnr", "valid_ssim"])} writer = SummaryWriter(log_dir=args.experiment_dir) if not args.no_visual else None for epoch in range(start_epoch, args.num_epochs): if args.resume_training: if epoch %10 == 0: optimizer.param_groups[0]["lr"] /= 2 print('learning rate reduced by factor of 2') train_bar = utils.ProgressBar(train_loader, epoch) for meter in train_meters.values(): meter.reset() for batch_id, inputs in enumerate(train_bar): model.train() global_step += 1 inputs = inputs.to(device) noise = utils.get_noise(inputs, mode = args.noise_mode, min_noise = args.min_noise/255., max_noise = args.max_noise/255., noise_std = args.noise_std/255.) noisy_inputs = noise + inputs; outputs = model(noisy_inputs) loss = F.mse_loss(outputs, inputs, reduction="sum") / (inputs.size(0) * 2) model.zero_grad() loss.backward() optimizer.step() train_psnr = utils.psnr(outputs, inputs) train_ssim = utils.ssim(outputs, inputs) train_meters["train_loss"].update(loss.item()) train_meters["train_psnr"].update(train_psnr.item()) train_meters["train_ssim"].update(train_ssim.item()) train_bar.log(dict(**train_meters, lr=optimizer.param_groups[0]["lr"]), verbose=True) if writer is not None and global_step % args.log_interval == 0: writer.add_scalar("lr", optimizer.param_groups[0]["lr"], global_step) writer.add_scalar("loss/train", loss.item(), global_step) writer.add_scalar("psnr/train", train_psnr.item(), global_step) writer.add_scalar("ssim/train", train_ssim.item(), global_step) gradients = torch.cat([p.grad.view(-1) for p in model.parameters() if p.grad is not None], dim=0) writer.add_histogram("gradients", gradients, global_step) sys.stdout.flush() if epoch % args.valid_interval == 0: model.eval() for meter in valid_meters.values(): meter.reset() valid_bar = utils.ProgressBar(valid_loader) for sample_id, sample in enumerate(valid_bar): with torch.no_grad(): sample = sample.to(device) noise = utils.get_noise(sample, mode = 'S', noise_std = (args.min_noise + args.max_noise)/(2*255.)) noisy_inputs = noise + sample; output = model(noisy_inputs) valid_psnr = utils.psnr(output, sample) valid_meters["valid_psnr"].update(valid_psnr.item()) valid_ssim = utils.ssim(output, sample) valid_meters["valid_ssim"].update(valid_ssim.item()) if writer is not None and sample_id < 10: image = torch.cat([sample, noisy_inputs, output], dim=0) image = torchvision.utils.make_grid(image.clamp(0, 1), nrow=3, normalize=False) writer.add_image(f"valid_samples/{sample_id}", image, global_step) if writer is not None: writer.add_scalar("psnr/valid", valid_meters['valid_psnr'].avg, global_step) writer.add_scalar("ssim/valid", valid_meters['valid_ssim'].avg, global_step) sys.stdout.flush() logging.info(train_bar.print(dict(**train_meters, **valid_meters, lr=optimizer.param_groups[0]["lr"]))) utils.save_checkpoint(args, global_step, model, optimizer, score=valid_meters["valid_psnr"].avg, mode="max") scheduler.step() logging.info(f"Done training! Best PSNR {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}.")
def merisfactor(self, dataobj, inc=0.0, wvl=np.pi * 4.): ''' Write pi-factor from Li et al 2012 to a matrix / HDF5 object or a file directly. This is used when no lat/lon file is known. Bilinear Interpolation is used. Args: * dataobj (str or HDF5 or np.array): Final output. If str, output is written to file. Kwargs: * inc (np.float) : Incidence angle in degrees. * wvl (np.float) : Wavelength in meters. Default output results in delay in meters. .. note:: If dataobj is string, output is written to the file. If np.array or HDF5 object, it should be of size (ny,nx).''' minAltp = self.dict['minAltP'] # Incidence cinc = np.cos(inc * np.pi / 180.0) # Compute the two integrals WonT = self.Vi / self.Ti WonT2 = WonT / self.Ti S1 = intg.cumtrapz(WonT, x=self.hgt, axis=-1) val = 2 * S1[:, -1] - S1[:, -2] val = val[:, None] S1 = np.concatenate((S1, val), axis=-1) del WonT S2 = intg.cumtrapz(WonT2, x=self.hgt, axis=-1) val = 2 * S2[:, -1] - S2[:, -2] val = val[:, None] S2 = np.concatenate((S2, val), axis=-1) del WonT2 Tm = S1 / S2 self.Tm = Tm # Reading in the DEM if self.verb: print 'PROGRESS: READING DEM' fin = open(self.hfile, 'rb') if self.fmt in ('HGT'): dem = np.fromfile(file=fin, dtype=self.demtype, count=self.nx * self.ny).reshape( self.ny, self.nx) elif self.fmt in ('RMG'): dem = np.fromfile(file=fin, dtype=self.demtype, count=2 * self.nx * self.ny).reshape( self.ny, 2 * self.nx) dem = dem[:, self.nx:] dem = np.round(dem).astype(np.int) fin.close() # check output, and open file if necessary outFile = isinstance(dataobj, str) if outFile: fout = open(dataobj, 'wb') dout = np.zeros((self.ny, self.nx)) else: assert ((dataobj.shape[0] == self.ny) & (dataobj.shape[1] == self.nx)), 'PyAPS: Not a valid data object.' dout = dataobj # Create the 1d interpolator if self.verb: print 'PROGRESS: FINE INTERPOLATION OF HEIGHT LEVELS' intp_1d = si.interp1d(self.hgt, Tm, kind='cubic', axis=1) # Interpolate the Tm variable every meter dem[dem < minAltp] = minAltp minH = dem.min() maxH = dem.max() + 1 kh = np.arange(minH, maxH) Tm_1m = intp_1d(kh) self.alti = kh # Reshape Tm Lonu = np.unique(self.lonlist) Latu = np.unique(self.latlist) nLon = len(Lonu) nLat = len(Latu) Tm_1m = np.reshape(Tm_1m.T, (len(kh), nLat, nLon)) self.Tm_1m = Tm_1m # build the x array xarr = np.arange(1., self.nx + 1.) # Create the cube interpolator for the bilinear method if self.verb: print 'PROGRESS: CREATE THE BILINEAR INTERPOLATION FUNCTION' bilicube = processor.Bilinear2DInterpolator(Lonu, Latu, Tm_1m, cube=True) # Get the values from the dictionnary k1 = self.dict['k1'] k2 = self.dict['k2'] k3 = self.dict['k3'] mmO = self.dict['mmO'] mmH = self.dict['mmH'] mma = self.dict['mma'] w = (2 * mmH + mmO) / mma Rv = self.dict['Rv'] Rho = self.dict['Rho'] # Loop on the lines if self.verb: toto = utils.ProgressBar(maxValue=self.ny) print 'PROGRESS: MAPPING THE DELAY' for m in xrange(self.ny): if self.verb: toto.update(m, every=5) # Transfert (range,azimuth) to (lon,lat) yarr = (m + 1) * np.ones((xarr.shape)) [loni, lati] = utils.rdr2glob(self.nx, self.ny, self.lat, self.lon, xarr, yarr) # Make the bilinear interpolation D = dem[m, :] - minH val = bilicube(loni, lati, D) val = 0.000001 * Rho * Rv * (k3 / val + k2 - w * k1) * np.pi * 4.0 / (cinc * wvl) if outFile: resy = val.astype(np.float32) resy.tofile(fout) else: dataobj[m, :] = val if self.verb: toto.close() # Close if outfile if outFile: fout.close()
def geomerisfactor(self, dataobj, lat=None, lon=None, inc=None, wvl=np.pi * 4.): ''' Write pi-factor from Li et al 2012 to a matrix / HDF5 object or a file directly. This is used when the latitude and longitude values are available for each radar pixel. Incidence angle can be a constant or a file name. Bilinear Interpolation is used. Args: * dataobj (str or HDF5 or np.array): Final output. If str, output is written to file. Kwargs: * lat (str) : Path to the latitude file (np.float32). * lon (str) : Path to the longitude file (np.float32). * inc (str or np.float): Path to incidence angle file in degrees (str) or a constant float. * wvl (np.float) : Wavelength in meters. Default output results in delay in meters. .. note:: If dataobj is string, output is written to the file. If np.array or HDF5 object, it should be of size (ny,nx).''' assert lat is not None, 'PyAPS: Need a valid latitude file.' assert lon is not None, 'PyAPS: Need a valid longitude file.' if isinstance(inc, float) or isinstance(inc, np.float64) or isinstance( inc, np.float32): cinc = np.cos(inc * np.pi / 180.) incFileFlag = False else: assert inc is not None, 'PyAPS: Need a valid incidence angle file or constant.' incFileFlag = True incin = open(inc, 'rb') latin = open(lat, 'rb') lonin = open(lon, 'rb') minAltp = self.dict['minAltP'] # Compute the two integrals WonT = self.Vi / self.Ti WonT2 = WonT / self.Ti S1 = intg.cumtrapz(WonT, x=self.hgt, axis=-1) val = 2 * S1[:, -1] - S1[:, -2] val = val[:, None] S1 = np.concatenate((S1, val), axis=-1) del WonT S2 = intg.cumtrapz(WonT2, x=self.hgt, axis=-1) val = 2 * S2[:, -1] - S2[:, -2] val = val[:, None] S2 = np.concatenate((S2, val), axis=-1) del WonT2 Tm = S1 / S2 self.Tm = Tm # Reading in the DEM if self.verb: print 'PROGRESS: READING DEM' fin = open(self.hfile, 'rb') if self.fmt in ('HGT'): dem = np.fromfile(file=fin, dtype=self.demtype, count=self.nx * self.ny).reshape( self.ny, self.nx) elif self.fmt in ('RMG'): dem = np.fromfile(file=fin, dtype=self.demtype, count=2 * self.nx * self.ny).reshape( self.ny, 2 * self.nx) dem = dem[:, self.nx:] dem = np.round(dem).astype(np.int) fin.close() # check output, and open file if necessary outFile = isinstance(dataobj, str) if outFile: fout = open(dataobj, 'wb') dout = np.zeros((self.ny, self.nx)) else: assert ((dataobj.shape[0] == self.ny) & (dataobj.shape[1] == self.nx)), 'PyAPS: Not a valid data object.' dout = dataobj # Create the 1d interpolator if self.verb: print 'PROGRESS: FINE INTERPOLATION OF HEIGHT LEVELS' intp_1d = si.interp1d(self.hgt, Tm, kind='cubic', axis=1) # Interpolate the Tm variable every meter dem[dem < minAltp] = minAltp minH = dem.min() maxH = dem.max() + 1 kh = np.arange(minH, maxH) Tm_1m = intp_1d(kh) self.alti = kh # Reshape Tm Lonu = np.unique(self.lonlist) Latu = np.unique(self.latlist) nLon = len(Lonu) nLat = len(Latu) Tm_1m = np.reshape(Tm_1m.T, (len(kh), nLat, nLon)) self.Tm_1m = Tm_1m # build the x array xarr = np.arange(1., self.nx + 1.) # Create the cube interpolator for the bilinear method if self.verb: print 'PROGRESS: CREATE THE BILINEAR INTERPOLATION FUNCTION' bilicube = processor.Bilinear2DInterpolator(Lonu, Latu, Tm_1m, cube=True) # Get the values from the dictionnary k1 = self.dict['k1'] k2 = self.dict['k2'] k3 = self.dict['k3'] mmO = self.dict['mmO'] mmH = self.dict['mmH'] mma = self.dict['mma'] w = (2 * mmH + mmO) / mma Rv = self.dict['Rv'] Rho = self.dict['Rho'] # Loop on the lines if self.verb: toto = utils.ProgressBar(maxValue=self.ny) print 'PROGRESS: MAPPING THE DELAY' for m in xrange(self.ny): if self.verb: toto.update(m, every=5) # Get Latitude and Longitude arrays lati = np.fromfile(file=latin, dtype=np.float32, count=self.nx) loni = np.fromfile(file=lonin, dtype=np.float32, count=self.nx) loni[loni < 0.] += 360. ii = np.where(np.isnan(lati)) jj = np.where(np.isnan(loni)) xx = np.union1d(ii, jj) lati[xx] = 0.0 loni[xx] = 0.0 # Get incidence if file provided if incFileFlag: incz = np.fromfile(file=incin, dtype=np.float32, count=self.nx) cinc = np.cos(incz * np.pi / 180.) # Make the Bilinear interpolation D = dem[m, :] - minH val = bilicube(loni, lati, D) val = 0.000001 * Rho * Rv * (k3 / val + k2 - w * k1) * np.pi * 4.0 / (cinc * wvl) val[xx] = np.nan if outFile: resy = val.astype(np.float32) resy.tofile(fout) else: dataobj[m, :] = val if self.verb: toto.close() latin.close() lonin.close() if incFileFlag: incin.close() if outFile: fout.close()
def getdelay(self, dataobj, inc=0.0, wvl=4 * np.pi): '''Write delay to a matrix / HDF5 object or a file directly. Bilinear interpolation at each elevation level is used. Args: * dataobj (str or HDF5 or np.array): Final output. If str, output is written to file. Kwargs: * inc (np.float): Incidence angle in degrees. Default is vertical. * wvl (np.float): Wavelength in meters. Default output results in delay in meters. .. note:: If dataobj is string, output is written to the file. If np.array or HDF5 object, it should be of size (ny,nx).''' minAltp = self.dict['minAltP'] # Reading in the DEM if self.verb: print 'PROGRESS: READING DEM' fin = open(self.hfile, 'rb') if self.fmt in ('HGT'): dem = np.fromfile(file=fin, dtype=self.demtype, count=self.nx * self.ny).reshape( self.ny, self.nx) elif self.fmt in ('RMG'): dem = np.fromfile(file=fin, dtype=self.demtype, count=2 * self.nx * self.ny).reshape( self.ny, 2 * self.nx) dem = dem[:, self.nx:] dem = np.round(dem).astype(np.int) fin.close() # check output, and open file if necessary outFile = isinstance(dataobj, str) if outFile: fout = open(dataobj, 'wb') dout = np.zeros((self.ny, self.nx)) else: assert ((dataobj.shape[0] == self.ny) & (dataobj.shape[1] == self.nx)), 'PyAPS: Not a valid data object.' dout = dataobj # Incidence cinc = np.cos(inc * np.pi / 180.0) # Create the 1d interpolator if self.verb: print 'PROGRESS: FINE INTERPOLATION OF HEIGHT LEVELS' intp_1d = si.interp1d(self.hgt, self.Delfn, kind='cubic', axis=-1) # Interpolate the delay function every meter dem[dem < minAltp] = minAltp minH = dem.min() maxH = dem.max() + 1 kh = np.arange(minH, maxH) Delfn_1m = intp_1d(kh) self.Delfn_interp = Delfn_1m.copy() self.alti = kh # Reshape Delfn Lonu = np.unique(self.lonlist) Latu = np.unique(self.latlist) nLon = len(Lonu) nLat = len(Latu) Delfn_1m = np.reshape(Delfn_1m.T, (len(kh), nLat, nLon)) self.Delfn_1m = Delfn_1m # build the x array xarr = np.arange(1., self.nx + 1.) # Create the cube interpolator for the bilinear method if self.verb: print 'PROGRESS: CREATE THE BILINEAR INTERPOLATION FUNCTION' bilicube = processor.Bilinear2DInterpolator(Lonu, Latu, Delfn_1m, cube=True) # Loop on the lines if self.verb: toto = utils.ProgressBar(maxValue=self.ny) print 'PROGRESS: MAPPING THE DELAY' for m in xrange(self.ny): if self.verb: toto.update(m, every=5) # Transfert (range,azimuth) to (lon,lat) yarr = (m + 1) * np.ones((xarr.shape)) [loni, lati] = utils.rdr2glob(self.nx, self.ny, self.lat, self.lon, xarr, yarr) # Make the bilinear interpolation D = dem[m, :] - minH val = bilicube(loni, lati, D) * np.pi * 4.0 / (cinc * wvl) if outFile: resy = val.astype(np.float32) resy.tofile(fout) else: dataobj[m, :] = val if self.verb: toto.close() # Close if outfile if outFile: fout.close()
def getlindelay(self, dataobj, inc=0.0, wvl=4 * np.pi): '''Write delay to a matrix / HDF5 object or a file directly. LinearNDInterpolator is used, and is not really stable. Args: * dataobj (str or HDF5 or np.array): Final output. If str, output is written to file. Kwargs: * inc (np.float): Incidence angle in degrees. Default is vertical. * wvl (np.float): Wavelength in meters. Default output results in delay in meters. .. note:: If dataobj is string, output is written to the file. If np.array or HDF5 object, it should be of size (ny,nx).''' self.rdrfnc = processor.make3dintp(self.Delfn, self.lonlist, self.latlist, self.hgt, self.hgtscale) minAltp = self.dict['minAltP'] xarr = np.arange(1., self.nx + 1.) fin = open(self.hfile, 'rb') outFile = isinstance(dataobj, str) if outFile: fout = open(dataobj, 'wb') else: assert ((dataobj.shape[0] == self.ny) & (dataobj.shape[1] == self.nx)), 'PyAPS: Not a valid data object.' cinc = np.cos(inc * np.pi / 180.0) toto = utils.ProgressBar(maxValue=self.ny) for m in range(self.ny): if self.fmt in ('HGT'): dem = np.fromfile(file=fin, dtype=self.demtype, count=self.nx) elif self.fmt in ('RMG'): dem = np.fromfile(file=fin, dtype=self.demtype, count=2 * self.nx) dem = dem[self.nx:] dem[dem < minAltp] = minAltp demy = dem.astype(np.float64) llh = np.zeros((self.nx, 3)) yarr = (m + 1) * np.ones((xarr.shape)) [xin, yin] = utils.rdr2glob(self.nx, self.ny, self.lat, self.lon, xarr, yarr) llh[:, 0] = xin llh[:, 1] = yin llh[:, 2] = demy / self.hgtscale res = self.rdrfnc(llh) res = res * np.pi * 4.0 / (cinc * wvl) res = res.flatten() if outFile: resy = res.astype(np.float32) resy.tofile(fout) else: dataobj[m, :] = res toto.update(m, every=5) toto.close() if outFile: fout.close() fin.close()
def make_minimap(self, save_to, scale=1 / 4): gx = self.GX * scale gy = self.GY * scale print('gx gy:', (gx, gy)) xmax, ymax = self.xmax, self.ymax # xmax //= 2 # ymax //= 2 # Left most grid: (0, ymax) # Right most grid: (xmax, 0) # Top most grid: (0, 0) # Bottom most grid: (xmax, ymax) centerX, centerY = 0, 0 open(save_to, 'wb').close() # Function to convert from grid to surface def to_spos(pos): x, y = pos return ((centerX + (x - y) * gx), ((x + y + 1) * gy)) width = int(gx * 2 + minus(to_spos((xmax, 0)), to_spos((0, ymax)))[0]) height = int(gy * 2 + minus(to_spos((xmax, ymax)), to_spos((0, 0)))[1]) progress = utils.ProgressBar(xmax * ymax) surface = utils.new_surface((width, height)) print('dest size:', surface.get_size()) centerX, centerY = surface.get_rect().center utils.clear_surface(surface) tmpSize = tmpWidth, tmpHeight = (2**12, ) * 2 print('tmp buffer size:', tmpSize) tmpScaleSize = (int(tmpWidth * scale), int(tmpHeight * scale)) print('tmpScaleSize:', tmpScaleSize) tmpSurface = utils.new_surface(tmpSize) blockDx = blockDy = int( min(tmpWidth / self.GX / 2, tmpHeight / self.GY / 2)) for blockX in range(0, xmax, blockDx): for blockY in range(0, ymax, blockDy): utils.clear_surface(tmpSurface) for x in range(blockDx): for y in range(blockDy): if not 0 <= x < xmax or not 0 <= y < ymax: continue # Draw floor texture = self.get_floor_texture((blockX + x, blockY + y))\ or self.textures.get(0) spos = ( tmpWidth / 2 - texture.xoff + (x - y) * self.GX, -texture.yoff + (x + y + 1) * self.GY, ) tmpSurface.blit(texture.image, spos) # Draw other texture = self.get_grid_texture( (blockX + x, blockY + y)) if texture: spos = ( tmpWidth / 2 - texture.xoff + (x - y) * self.GX, -texture.yoff + (x + y + 1) * self.GY, ) tmpSurface.blit(texture.image, spos) progress.update() surface.blit( pg.transform.scale(tmpSurface, add((1, 1), tmpScaleSize)), minus(to_spos((blockX, blockY)), (tmpScaleSize[0] / 2, gy))) # print((blockX, blockY), to_spos((blockX, blockY))) pg.image.save(surface, save_to) print('\nfinished')
print(f'Wrong directory {path}') sys.exit(1) return _images def get_user_id(): print('Enter a user id:') return input() if __name__ == "__main__": directory, album, token, user = parse_args() images = find_images(directory) if not images: print(f"can't find images in {directory}") sys.exit(2) album_api = VKapi.AlbumApi(token, user) # TODO make error handling album_id = album_api.find_album_by_name(album) progressBar = utils.ProgressBar(len(images)) for image in images: upload_url = album_api.get_upload_server(album_id) response = album_api.upload_image(upload_url, image) album_api.save_photos(album_id, response['server'], response['photos_list'], response['aid'], response['hash']) progressBar.update()
def main(args): # gpu or cpu device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') args = utils.setup_experiment(args) utils.init_logging(args) # Loading models MODEL_PATH_LOAD = "../lidar_experiments/2d/lidar_unet2d/lidar-unet2d-Nov-08-16:29:49/checkpoints/checkpoint_best.pt" train_new_model = True # Build data loaders, a model and an optimizer if train_new_model: model = models.build_model(args).to(device) else: model = models.build_model(args) model.load_state_dict(torch.load(args.MODEL_PATH_LOAD)['model'][0]) model.to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[5, 15, 30, 50, 100, 250], gamma=0.5) logging.info( f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters" ) if args.resume_training: state_dict = utils.load_checkpoint(args, model, optimizer, scheduler) global_step = state_dict['last_step'] start_epoch = int(state_dict['last_step'] / (403200 / state_dict['args'].batch_size)) + 1 else: global_step = -1 start_epoch = 0 ## Load the pts files # Loads as a list of numpy arrays scan_line_tensor = torch.load(args.data_path + 'scan_line_tensor.pts') train_idx_list = torch.load(args.data_path + 'train_idx_list.pts') valid_idx_list = torch.load(args.data_path + 'valid_idx_list.pts') sc = torch.load(args.data_path + 'sc.pts') # Dataloaders train_dataset = LidarLstmDataset(scan_line_tensor, train_idx_list, args.seq_len, args.mask_pts_per_seq) valid_dataset = LidarLstmDataset(scan_line_tensor, valid_idx_list, args.seq_len, args.mask_pts_per_seq) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) # Track moving average of loss values train_meters = { name: utils.RunningAverageMeter(0.98) for name in (["train_loss"]) } valid_meters = {name: utils.AverageMeter() for name in (["valid_loss"])} writer = SummaryWriter( log_dir=args.experiment_dir) if not args.no_visual else None ################################################## # TRAINING for epoch in range(start_epoch, args.num_epochs): if args.resume_training: if epoch % 1 == 0: optimizer.param_groups[0]["lr"] /= 2 print('learning rate reduced by factor of 2') train_bar = utils.ProgressBar(train_loader, epoch) for meter in train_meters.values(): meter.reset() # epoch_loss_sum = 0 for batch_id, (clean, mask) in enumerate(train_bar): # dataloader returns [clean, mask] list model.train() global_step += 1 inputs = clean.to(device) mask_inputs = mask.to(device) # only use the mask part of the outputs raw_outputs = model(inputs, mask_inputs) outputs = ( 1 - mask_inputs[:, :3, :, :] ) * raw_outputs + mask_inputs[:, :3, :, :] * inputs[:, :3, :, :] if args.wtd_loss: loss = weighted_MSELoss(outputs, inputs[:, :3, :, :], sc) / (inputs.size(0) * (args.mask_pts_per_seq**2)) # Regularization? else: # normalized by the number of masked points loss = F.mse_loss(outputs, inputs[:,:3,:,:], reduction="sum") / \ (inputs.size(0) * (args.mask_pts_per_seq**2)) model.zero_grad() loss.backward() optimizer.step() # epoch_loss_sum += loss * inputs.size(0) train_meters["train_loss"].update(loss) train_bar.log(dict(**train_meters, lr=optimizer.param_groups[0]["lr"]), verbose=True) if writer is not None and global_step % args.log_interval == 0: writer.add_scalar("lr", optimizer.param_groups[0]["lr"], global_step) writer.add_scalar("loss/train", loss.item(), global_step) gradients = torch.cat([ p.grad.view(-1) for p in model.parameters() if p.grad is not None ], dim=0) writer.add_histogram("gradients", gradients, global_step) sys.stdout.flush() # epoch_loss = epoch_loss_sum / len(train_loader.dataset) if epoch % args.valid_interval == 0: model.eval() for meter in valid_meters.values(): meter.reset() valid_bar = utils.ProgressBar(valid_loader) val_loss = 0 for sample_id, (clean, mask) in enumerate(valid_bar): with torch.no_grad(): inputs = clean.to(device) mask_inputs = mask.to(device) # only use the mask part of the outputs raw_output = model(inputs, mask_inputs) output = ( 1 - mask_inputs[:, :3, :, :] ) * raw_output + mask_inputs[:, :3, :, :] * inputs[:, : 3, :, :] # TO DO, only run loss on masked part of output if args.wtd_loss: val_loss = weighted_MSELoss( output, inputs[:, :3, :, :], sc) / (inputs.size(0) * (args.mask_pts_per_seq**2)) else: # normalized by the number of masked points val_loss = F.mse_loss(output, inputs[:,:3,:,:], reduction="sum")/(inputs.size(0)* \ (args.mask_pts_per_seq**2)) valid_meters["valid_loss"].update(val_loss.item()) if writer is not None: writer.add_scalar("loss/valid", valid_meters['valid_loss'].avg, global_step) sys.stdout.flush() logging.info( train_bar.print( dict(**train_meters, **valid_meters, lr=optimizer.param_groups[0]["lr"]))) utils.save_checkpoint(args, global_step, model, optimizer, score=valid_meters["valid_loss"].avg, mode="min") scheduler.step() logging.info( f"Done training! Best Loss {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}." )
def plot_trials(exp, out_fn, cols_per_page=2, rows_per_page=5, n_colors=10, max_trials=None, decorations=None): """ Plot the experiment raw data - the characters, as the subject wrote them - and save to a PDF file. :param exp: Experiment object :param out_fn: PDF file name :param cols_per_page: No. of trial columns in each page :param rows_per_page: No. of trial rows in each page :param n_colors: No. of colors to use to denote level of pressure :param max_trials: Plot only the first trials in the experiment """ n_trials_per_page = cols_per_page * rows_per_page pdf = PdfPages(out_fn) trials = list(exp.sorted_trials) z_values = np.array( [point.z for t in trials for point in t.on_paper_points]) max_z = max(z_values) def get_z_levels(z): return _convert_z_to_level(z, max_z, n_colors) if max_trials is not None: trials = trials[:min(max_trials, len(trials))] n_pages = math.ceil(len(trials) / n_trials_per_page) progress = u.ProgressBar(len(trials), 'Preparing pdf...') n_done = 0 while len(trials) > 0: curr_page_n_trials = min(n_trials_per_page, len(trials)) fig, axes = plt.subplots(rows_per_page, cols_per_page) fig.subplots_adjust(hspace=.8, wspace=0.3) axes = np.reshape(axes, [n_trials_per_page]) for i in range(curr_page_n_trials): trial = trials.pop(0) n_done += 1 ax = axes[i] ax.get_yaxis().set_visible(False) ax.get_xaxis().set_visible(False) ax.set_title(_trial_title(trial), fontdict=dict(fontsize=5)) plot_trial(trial, ax=ax, get_z_levels=get_z_levels, decorations=decorations) if curr_page_n_trials < n_trials_per_page: for i in range(curr_page_n_trials, n_trials_per_page): ax = axes[i] ax.get_yaxis().set_visible(False) ax.get_xaxis().set_visible(False) pdf.savefig(fig) plt.close(fig) progress.progress(n_done) pdf.close() if n_pages > 3: print('')
def generate_adaptive_grid( idir_traj, opath_top_grid, opath_grid_traj, opath_grid_block_gps_range, epsilon_alloc, epsilon_tot, gps_range, n_top_grid=7, add_noise=True, is_plot=False, beta_factor=80 ): """ As the name suggests. :param idir_traj: input dir for trajectory in which traj is like (lat, lon) :param opath_top_grid: output path for the top grid partition. :param opath_grid_traj: output path for the grid partition traj. :param opath_grid_block_gps_range: output path for the grid range. :param n_top_grid: the number of top grid. :param epsilon_alloc: the pravacy budget of adaptive grid. :param epsilon_tot: the pravacy budget of DP-Star. :param gps_range: the gps range of traj which has a format like {'lon':(lon_min, lon_max), 'lat':(lat_min, lat_max)} :param add_noise: whether add noise in grid or not. :param is_plot: whether plot the gps point and grid. :param beta_factor: :return: """ def grid_boundary_judge(cal_grid_idx, boundary=n_top_grid): """judge the calculated gird idx is out of the grid boundary like the n_top_grip is 7, but the calculated idx is 7 because its position at the edge of grip. under this situation, we simply choose its idx equal the n_top_grip - 1. """ return cal_grid_idx if cal_grid_idx < boundary else boundary - 1 assert len(gps_range) == 2, 'The format of gps_range is wrong!' def cal_point_idx(_point, _n_grid=n_top_grid, _step=None, _base=None): """ cal the idx of point in grid :param _point: the point which need to caled :param _n_grid: the number of grid :param _step: the gird block length :param _base: the bias or basic of the point :return: """ idx = grid_boundary_judge(int((_point[0]-_base['lat']) / _step['lat']), _n_grid) * _n_grid \ + grid_boundary_judge(int((_point[1]-_base['lon']) / _step['lon']), _n_grid) return idx tot_traj = read_mdl_data(idir_traj) # grid parm according to the paper. beta = (epsilon_tot - epsilon_alloc) / beta_factor # the block num of top gird C = n_top_grid ** 2 # the gps range for each top grid block top_block_gps_step = { 'lon': (gps_range['lon'][1] - gps_range['lon'][0]) / n_top_grid, 'lat': (gps_range['lat'][1] - gps_range['lat'][0]) / n_top_grid } # cal the eta score for each top grid eta_score = [0 for _ in range(C)] for traj in tot_traj: for point in traj: C_idx = cal_point_idx(point, _step=top_block_gps_step, _base={'lon': gps_range['lon'][0], 'lat': gps_range['lat'][0]}) eta_score[C_idx] += 1 / len(traj) if len(traj) else 0 # add lap noise if add_noise: lap_noise = np.random.laplace(0, 1 / epsilon_alloc, C) eta_score = [eta_score[i] + lap_noise[i] for i in range(C)] # puzzle: simple let the minus values equal 0 for i in range(C): if eta_score[i] < 0: eta_score[i] = 0 # the bottom gir num for each top grid block M = [np.sqrt(eta_score[i] * beta) for i in range(C)] for i in range(C): if M[i] < 1: # min grid num is 1 M[i] = 1 else: # rounding M[i] = int(np.rint(M[i])) # cal the grid range grid_block_gps_range = {} for i in range(C): current_idx = 0 for j in range(i): # get the current idx current_idx += M[j] ** 2 # if there is only one grid. if M[i] == 1: row = i // n_top_grid col = i - row * n_top_grid grid_block_gps_range[current_idx] = ( ((row * top_block_gps_step['lat'] + gps_range['lat'][0], col * top_block_gps_step['lon'] + gps_range['lon'][0]), ((row + 1) * top_block_gps_step['lat'] + gps_range['lat'][0], (col + 1) * top_block_gps_step['lon'] + gps_range['lon'][0])) ) # if there are not only one grid. else: row = i // n_top_grid col = i - row * n_top_grid start_point = (row * top_block_gps_step['lat'] + gps_range['lat'][0], col * top_block_gps_step['lon'] + gps_range['lon'][0]) end_point = ((row + 1) * top_block_gps_step['lat'] + gps_range['lat'][0], (col + 1) * top_block_gps_step['lon'] + gps_range['lon'][0]) for k in range(M[i]**2): bottom_block_gps_step = { 'lat': (end_point[0] - start_point[0]) / M[i], 'lon': (end_point[1] - start_point[1]) / M[i] } row = k // M[i] col = k - row * M[i] grid_block_gps_range[current_idx+k] = ( ((row * bottom_block_gps_step['lat'] + start_point[0], col * bottom_block_gps_step['lon'] + start_point[1]), ((row + 1) * bottom_block_gps_step['lat'] + start_point[0], (col + 1) * bottom_block_gps_step['lon'] + start_point[1])) ) # print(grid_block_gps_range) # cal the top grid range top_grid_block_gps_range = [] for i in range(C): row = i // n_top_grid col = i - row * n_top_grid top_grid_block_gps_range.append( ((row * top_block_gps_step['lat'] + gps_range['lat'][0], col * top_block_gps_step['lon'] + gps_range['lon'][0]), ((row + 1) * top_block_gps_step['lat'] + gps_range['lat'][0], (col + 1) * top_block_gps_step['lon'] + gps_range['lon'][0])) ) # cal the grid num n_grid = 0 for i in range(C): n_grid += M[i] ** 2 print('总网格数: %d' % n_grid) # write to file with open(opath_top_grid, 'w') as fw_top_grid: fw_top_grid.writelines(str(M)) with open(opath_grid_block_gps_range, 'w') as fw_grid_block_range: fw_grid_block_range.write(str(grid_block_gps_range)+'\n') fw_grid_block_range.write(str(top_grid_block_gps_range) + '\n') # map the traj into the grid p = utils.ProgressBar(len(tot_traj), '映射网格轨迹') mapped_trajs = [] for i in range(len(tot_traj)): p.update(i) mapped_traj = [] for point in tot_traj[i]: # cal the idx in the top grid # C_idx = cal_point_idx(point, _step=top_block_gps_step, # _base={'lon': gps_range['lon'][0], 'lat': gps_range['lat'][0]}) # # # cal the idx in the bottom grid # m = M[C_idx] # for j in range(C_idx): # # add the privious bottom grid num. # C_idx += M[j] ** 2 if M[j] == 1 else M[j] ** 2 - 1 for k in range(n_grid): grid_range = grid_block_gps_range[k] if grid_range[1][0] >= point[0] >= grid_range[0][0] and \ grid_range[1][1] >= point[1] >= grid_range[0][1]: mapped_traj.append(k) mapped_trajs.append(mapped_traj) # # reverse map to grid # reverse_mapped_trajs = [] # for traj in mapped_trajs: # reverse_mapped_trajs.append([np.mean(grid_block_gps_range[i], axis=0).tolist() for i in traj]) # print(reverse_mapped_trajs) # write to file with open(opath_grid_traj, 'w') as fw_grid_traj: for mt in mapped_trajs: fw_grid_traj.writelines(str(mt)+'\n') # plot the figure if is_plot: plt.figure(figsize=(6, 5)) p = utils.ProgressBar(len(tot_traj), '绘制网格轨迹图') for i in range(len(tot_traj)): p.update(i) plt.plot([x[0] for x in tot_traj[i]], [y[1] for y in tot_traj[i]]) plt.scatter([x[0] for x in tot_traj[i]], [y[1] for y in tot_traj[i]]) # plot top gird lines top_gird_lines = cal_split( (gps_range['lat'][0], gps_range['lat'][1]), (gps_range['lon'][0], gps_range['lon'][1]), n_top_grid) for line in top_gird_lines: plt.plot([x[0] for x in line], [y[1] for y in line], c='black') # plot bottom grid lines for i in range(C): if M[i] > 1: bottom_grid_lines = cal_split( (top_grid_block_gps_range[i][0][0], top_grid_block_gps_range[i][1][0]), (top_grid_block_gps_range[i][0][1], top_grid_block_gps_range[i][1][1]), M[i] ) for line in bottom_grid_lines: plt.plot([x[0] for x in line], [y[1] for y in line], c='black') print(M[i]) plt.xlim(gps_range['lat'][0], gps_range['lat'][1]) plt.ylim(gps_range['lon'][1], gps_range['lon'][0]) plt.xlabel('Lat') plt.ylabel('Lon') ax = plt.gca() ax.xaxis.set_ticks_position('top') plt.savefig('grid_traj') plt.show() return n_grid
def scrap_state(uf, start_page=1, end_page=-1, append=True): """ Extracts all physicians' profiles from a given state (uf). If start_page > 1, a file containing previous scrap for that state is loaded. Arguments: uf {string} -- state abbreviation Keyword Arguments: start_page {int} -- which page to start the scrapping (default: {1}) start_page {int} -- which page to end the scrapping (default: {-1}) Returns: boolean -- whether or not the scrap was successfull """ print("Scrapping ", uf) options = webdriver.chrome.options.Options() options.add_argument("--incognito") driver = webdriver.Chrome(options=options) try: # Setting driver and variables progbar = utils.ProgressBar(elapsed_time=True) url = "https://portal.cfm.org.br/index.php?option=com_medicos&nomeMedico=&ufMedico={}&crmMedico=&municipioMedico=&tipoInscricaoMedico=&situacaoMedico=&detalheSituacaoMedico=&especialidadeMedico=&areaAtuacaoMedico=&pagina=3" url = url.format(uf) driver.get(url) # Searching for the total number of pages to scrap result = re.search("Mostrando página \d de (\d+)", driver.page_source) if result is not None: print("Total number of pages to scrap: ", result.groups()[0]) if end_page == -1: end_page = 15000 if result == None else int(result.groups()[0]) print("Scrapping from pages {} to {} at {}".format( start_page, end_page, uf)) # If start_page > 1, check an existing file with previous scrap df = pd.DataFrame(columns=[ "page", "name", "crm", "state", "subscription_date", "subscription_type", "status", "second_subscription", "address", "phone", "photo_url" ]) progbar.update_progress(0) for page in range(start_page, end_page, 1): progbar.update_progress((page - 1) / end_page) # Checking if any profiles are being shown if (len( driver.find_elements_by_class_name( "resultado-mobile-coluna")) == 0): input("No profiles found for page {}. Reenter the captcha:". format(page)) perfis = driver.find_elements_by_class_name( "resultado-mobile-coluna") photos = driver.find_elements_by_class_name("img-thumbnail") # Extracting information from each profile for i_profile, profile in enumerate(perfis): dados = profile.text.split('\n') name = dados[0] result = re.search("\d+", dados[1]) crm = None if not result else result.group() result = re.search("\w{2}$", dados[1]) state = None if not result else result.group() result = re.search("Data de Inscrição: (.*)", dados[2]) subscription_date = None if result == None else result.groups( )[0] result = re.search("Inscrição: (.*)", dados[3]) subscription_type = None if result == None else result.groups( )[0] result = re.search("Situação: (.*)", dados[4]) status = None if result == None else result.groups()[0] result = re.search("Inscrições em outro estado: (.*)", dados[5]) second_subscription = None if result == None else result.groups( )[0] result = re.search("Especialidades/Áreas de Atuação: (.*)", dados[6]) specialty = None if result == None else result.groups()[0] result = re.search("Endereço: (.*)", dados[7]) address = None if result == 0 else result.groups()[0] result = re.search("Telefone\(s\): (.*)", dados[8]) phone = None if result == None else result.groups()[0] photo_url = photos[i_profile].get_attribute("src") df.loc[df.shape[0]] = [ page, name, crm, state, subscription_date, subscription_type, status, second_subscription, address, phone, photo_url ] # Accessing next page of profiles next_url = re.sub(r"&pagina=\d+", "&pagina={}".format(page + 1), url) driver.get(next_url) print("Number of extracted profiles for {}: {}".format( uf, df.shape[0])) filepath = "./data/profiles/df_{}.csv".format(uf) save_dataframe(df, filepath, append) driver.close() return True except Exception as e: print("Error scrapping {} at page {}: {}".format(uf, page, e)) save_dataframe(df, filepath, append) driver.close() return False
def getFile(url, destdir, filename='', quiet=None) -> bool: """download file from 'url' into 'destdir'""" if quiet is None: quiet = CraftCore.settings.getboolean("ContinuousIntegration", "Enabled", False) CraftCore.log.debug("getFile called. url: %s" % url) if url == "": CraftCore.log.error("fetch: no url given") return False pUrl = urllib.parse.urlparse(url) if not filename: filename = os.path.basename(pUrl.path) utils.createDir(destdir) if pUrl.scheme == "s3": return s3File(url, destdir, filename) elif pUrl.scheme == "minio": return minioGet(pUrl.netloc + pUrl.path, destdir, filename) absFilename = Path(destdir) / filename # try the other methods as fallback if we are bootstrapping bootStrapping = not (CraftCore.standardDirs.etcDir() / "cacert.pem").exists() if not CraftCore.settings.getboolean("General", "NoWget"): if CraftCore.cache.findApplication("wget"): if wgetFile(url, destdir, filename, quiet): return True if not bootStrapping: return False if CraftCore.cache.findApplication("curl"): if curlFile(url, destdir, filename, quiet): return True if not bootStrapping: return False if bootStrapping and absFilename.exists(): os.remove(absFilename) if absFilename.exists(): return True CraftCore.log.info(f"Downloading: {url} to {absFilename}") with utils.ProgressBar() as progress: def dlProgress(count, blockSize, totalSize): if totalSize != -1: progress.print(int(count * blockSize * 100 / totalSize)) else: sys.stdout.write( ("\r%s bytes downloaded" % (count * blockSize))) sys.stdout.flush() try: urllib.request.urlretrieve( url, filename=absFilename, reporthook=dlProgress if CraftCore.debug.verbose() >= 0 else None) except Exception as e: CraftCore.log.warning(e) powershell = Powershell() if powershell.pwsh: return powershell.execute([ f"[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; (new-object net.webclient).DownloadFile(\"{url}\", \"{absFilename}\")" ]) return False return True
# In[19]: len(IM_train), len(y_train) # In[20]: from collections import Counter # In[21]: Counter(y_train) # In[22]: pb = utils.ProgressBar(worksum=len(IM_train)) pb.startjob() features = [] targets = [] for one_image, one_target in zip(IM_train, y_train): one_feature = [] if len(one_image.shape) != 3: continue if one_target == 0 and random.random() > 0.2: pb.complete(1) continue features.append(one_image) targets.append(one_target) pb.complete(1) # In[23]:
activation_func=Tanh(), flatten=True, dropout=0.8, ) # pool = layers.PoolingLayer(pool_size=7, flatten=True) dense = layers.DenseLayer(10, Linear(), weight_initialisation='glorot') nn.add_layer(convLayer) # nn.add_layer(pool) nn.add_layer(dense) optimizer = optimizers.MomentumSGD(learning_rate=0.01, momentum=0.90) trainer = NN.Trainer(nn, optimizer) # statistic batch_size = 50 bar = utils.ProgressBar(len(X), batch_size) trainer.add_batch_callback(bar) # loss_history = utils.LossHistory(nn, avg_over=50) # trainer.add_batch_callback(loss_history) accuracy = utils.TrainAccuracy(nn, x_test[:500:], y_test[:500]) trainer.add_epoch_callback(accuracy) visualiser = utils.ChannelVisualiser(convLayer) trainer.add_on_finish_callback(visualiser) trainer.optimize(X, y, x_test, y_test, batch_size=batch_size, epochs=6)
def main(args): if not torch.cuda.is_available(): raise NotImplementedError("Training on CPU is not supported.") utils.setup_experiment(args) utils.init_logging(args) train_loaders, valid_loaders = data.build_dataset( args.dataset, args.data_path, batch_size=args.batch_size) model = models.build_model(args).cuda() optimizer = optim.build_optimizer(args, model.parameters()) logging.info( f"Built a model consisting of {sum(p.numel() for p in model.parameters() if p.requires_grad):,} parameters" ) meters = { name: utils.RunningAverageMeter(0.98) for name in (["loss", "context", "graph", "target"]) } acc_names = ["overall" ] + [f"task{idx}" for idx in range(len(valid_loaders))] acc_meters = {name: utils.AverageMeter() for name in acc_names} writer = SummaryWriter( log_dir=args.experiment_dir) if not args.no_visual else None global_step = -1 for epoch in range(args.num_epochs): acc_tasks = {f"task{idx}": None for idx in range(len(valid_loaders))} for task_id, train_loader in enumerate(train_loaders): for repeat in range(args.num_repeats_per_task): train_bar = utils.ProgressBar(train_loader, epoch, prefix=f"task {task_id}") for meter in meters.values(): meter.reset() for batch_id, (images, labels) in enumerate(train_bar): model.train() global_step += 1 images, labels = images.cuda(), labels.cuda() outputs = model(images, labels, task_id=task_id) if global_step == 0: continue loss = outputs["loss"] model.zero_grad() loss.backward() optimizer.step() meters["loss"].update(loss.item()) meters["context"].update(outputs["context_loss"].item()) meters["target"].update(outputs["target_loss"].item()) meters["graph"].update(outputs["graph_loss"].item()) train_bar.log(dict( **meters, lr=optimizer.get_lr(), )) if writer is not None: writer.add_scalar("loss/train", loss.item(), global_step) gradients = torch.cat([ p.grad.view(-1) for p in model.parameters() if p.grad is not None ], dim=0) writer.add_histogram("gradients", gradients, global_step) model.eval() for meter in acc_meters.values(): meter.reset() for idx, valid_loader in enumerate(valid_loaders): valid_bar = utils.ProgressBar(valid_loader, epoch, prefix=f"task {task_id}") for batch_id, (images, labels) in enumerate(valid_bar): model.eval() with torch.no_grad(): images, labels = images.cuda(), labels.cuda() outputs = model.predict(images, labels, task_id=idx) correct = outputs["preds"].eq(labels).sum().item() acc_meters[f"task{idx}"].update(100 * correct, n=len(images)) acc_meters["overall"].update(acc_meters[f"task{idx}"].avg) acc_tasks[f"task{task_id}"] = acc_meters[f"task{task_id}"].avg if writer is not None: for name, meter in acc_meters.items(): writer.add_scalar(f"accuracy/{name}", meter.avg, global_step) logging.info( train_bar.print( dict(**meters, **acc_meters, lr=optimizer.get_lr()))) utils.save_checkpoint(args, global_step, model, optimizer, score=acc_meters["overall"].avg, mode="max") bwt = sum(acc_meters[task].avg - acc for task, acc in acc_tasks.items()) / (len(valid_loaders) - 1) logging.info( f"Done training! Final accuracy {acc_meters['overall'].avg:.4f}, backward transfer {bwt:.4f}." )
def fit(self, args, args_i, args_n): self.args_i = args_i self.args_n = args_n self.args = utils.Object(**args) self.pre_fit() self.prt_info() self.make_model() unique_fn = '{}-{}'.format(logger.unique_fn, self.args_i) tensorboard_dir = 'tensorboard/{}'.format(unique_fn) self.tb_dirs.append(tensorboard_dir) train_writer = tf.summary.FileWriter(tensorboard_dir, self.sess.graph) saver = tf.train.Saver() summ_loss = tf.Summary() summ_loss_v = summ_loss.value.add() summ_loss_v.tag = 'loss_per_batch' summaries = tf.summary.merge_all() batch_cnt = 0 best_vali = None brk = 0 # ret, _ = self.data.evaluate('vali', self.predict) # print(ret) has_ckpt = False try: for epochs in range(self.max_epochs): loss = [] progress_bar = utils.ProgressBar(self.args.batch_steps, msg='training') for step in range(self.args.batch_steps): batch = next(self.data_generator) data = dict(zip(self.train_inputs, batch)) # print(len(self.train_inputs), len(batch)); input() # print(data); input() if step == 0 and summaries is not None: summ = self.sess.run(summaries, data) train_writer.add_summary(summ, global_step=batch_cnt) if self.minimize2 is None: _, _loss = self.sess.run([self.minimize, self.loss], data) else: _, _, _loss = self.sess.run([self.minimize, self.minimize2, self.loss], data) batch_cnt += 1 loss.append(_loss) summ_loss_v.simple_value = _loss train_writer.add_summary(summ_loss, global_step=batch_cnt) progress_bar.make_a_step() self.good_log = True train_time = progress_bar.stop() vali, vali_time = self.data.evaluate('vali', self.predict) if vali.is_better_than(best_vali): brk = 0 best_vali = vali saver.save(self.sess, 'save/{}_model.ckpt'.format(unique_fn)) has_ckpt = True else: brk += 1 if self.run_test: test, test_time = self.data.evaluate('test', self.predict) vali = '{} {}'.format(vali, test) vali_time += test_time msg = '#{}/{}, loss: {:.5f}, vali: {}, brk: {}, time: {:.1f}s {:.1f}s'.format( epochs + 1, self.max_epochs, np.mean(loss), vali, brk, train_time, vali_time) log(msg, i=-1, red=(brk == 0)) if self.early_stop > 0 and brk >= self.early_stop: break except KeyboardInterrupt: utils.timer.stop() log('KeyboardInterrupt') except Exception as e: utils.timer.stop() log('Exception: {}'.format(e), red=True) if has_ckpt: saver.restore(self.sess, 'save/{}_model.ckpt'.format(unique_fn)) return self.after_fit()