def index_study_path(args): logger = logging.getLogger(__toolname__ + '.index_study_path_aecg') startmsg = f"Indexing: '{args.dir}' to: '{args.oxlsx}'" print(f"{startmsg}") logger.info( f",,{startmsg}") studyindex_info = aecg.tools.indexer.StudyInfo() studyindex_info.StudyDir = os.path.normpath(args.dir) studyindex_info.IndexFile = os.path.normpath(args.oxlsx) studyindex_info.Description = args.description studyindex_info.Version = aecg.__version__ studyindex_info.AppType = args.apptype studyindex_info.AppNum = f"{int(args.appnum):06d}" studyindex_info.StudyID = args.studyid studyindex_info.NumSubj = args.numsubj studyindex_info.NECGSubj = args.necgsubj studyindex_info.TotalECGs = args.totalecgs studyindex_info.AnMethod = aecg.tools.indexer.AnnotationMethod[ args.annmethod].name studyindex_info.AnLead = args.annlead studyindex_info.AnNbeats = args.nbeatsann studyindex_info.StudyDir = args.dir studyindex_info.Sponsor = args.sponsor n_cores = args.nprocs pbar = tqdm(desc=f"Indexing {studyindex_info.StudyDir} directory") mycb = aecg.tools.indexer.IndexingProgressCallBack(pbar) studyindex_df = aecg.tools.indexer.index_study( studyindex_info, args.allintervals == "Y", n_cores, mycb) pbar.close() return studyindex_df
def validation(model, device, criterion, metrics, dataloader, pbar_desc="validation phase"): model.eval() val_loss = 0.0 val_metrics = {k: 0.0 for k, v in metrics.items()} for origins, masks in tqdm(dataloader, desc=pbar_desc): num = origins.size(0) origins = origins.to(device) masks = masks.to(device) with torch.no_grad(): outs = model(origins) val_loss += criterion(outs, masks).item() * num val_metrics = {k: v + metrics[k](masks, outs).item() * num for k, v in val_metrics.items()} val_loss = val_loss / len(dataloader.sampler) val_metrics = {k: v / len(dataloader.sampler) for k, v in val_metrics.items()} return { "val_loss": val_loss, **val_metrics, }
def train(model, device, criterion, optimizer, dataloader, accumulation_steps, pbar_desc="train phase"): model.train() train_loss = 0.0 for i, (origins, masks) in enumerate(tqdm(dataloader, desc=pbar_desc)): num = origins.size(0) origins = origins.to(device) masks = masks.to(device) outs = model(origins) loss = criterion(outs, masks) train_loss += loss.item() * num loss = loss / accumulation_steps loss.backward() if (i+1) % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() train_loss = train_loss / len(dataloader.sampler) return { "train_loss": train_loss, }
def main(): req = get(URL, stream=True) with open("data.zip", "wb") as file: for chunk in tqdm( req.iter_content(chunk_size=CHUNK_SIZE), total=ceil(int(req.headers['content-length']) / CHUNK_SIZE)): file.write(chunk) system("unzip data.zip") system("rm data.zip") system("rm -rf __MACOSX")
def parse(self, progress_bar: bool = True): w = walk(self.path) self.data = np.array([[[r, pathlib.join(r, f_)] for f_ in f] for r, _, f in list(w)[1:]]).reshape(-1, 2) self.y = np.array(self.data[:, 0]) self.x = np.array([ cv2.cvtColor(cv2.resize(cv2.imread(i), (self.resize, self.resize)), cv2.COLOR_BGR2RGB) / 255 for i in (tqdm(self.data[:, 1]) if progress_bar else self.data[:, 1]) ]).astype(np.float32) return self
def wait(self, query={}): task_stat = self.task_stat(query) total = self._get_total(task_stat) last_undone_n = self._get_undone_n(task_stat) with tqdm(total=total, initial=total - last_undone_n) as pbar: while True: time.sleep(10) undone_n = self._get_undone_n(self.task_stat(query)) pbar.update(last_undone_n - undone_n) last_undone_n = undone_n if undone_n == 0: break
def extract_content_list_wise(arxiv_url, url): content, status_code = get_content(url) if status_code != 200: return urls = content.find_all('a') urls = [x.get('href') for x in urls if '/list/' in str(x)] urls = [url for url in urls if 'recent' not in url] urls = [url for url in urls if '?' not in url] urls = [arxiv_url + x for x in urls] urls = list(set(urls)) for url in tqdm(urls, "Years"): extract_arxiv_links(arxiv_url, url)
def download_file(path, ): with open(path, "r") as file: image_urls = file.read().split("\n") broken = [] for line in tqdm(image_urls): name, url = line.split("\t") with open(f"./images/{name}.jpg", "wb") as image: with requests.get(url) as response: if response.status_code == 200: image.write(response.content) else: broken.append([file, url]) return broken
def main(config): """ Responsible for the whole webscrape of the ARXIV website. :param config: YAML config file content :return: None """ url_list = extract_subjects(config['Arxiv_Website'], config['Subjects']) logging.info("Number of subjects found is {}".format(len(url_list))) if len(url_list) != 0: if 'all' not in config['Subjects']: idx = 0 keys = list(config['Subjects'].keys()) for url in tqdm(url_list, desc="Subjects"): extract_content_year_wise(config['Arxiv_Website'], url, config['Subjects'][keys[idx]]) logging.info("Done with {}".format(url)) else: for url in tqdm(url_list, desc="Subjects"): extract_content_year_wise(config['Arxiv_Website'], url, None) logging.info("Done with {}".format(url)) logging.info("Done")
def extract_content_year_wise(arxiv_url, url, years): content, status_code = get_content(url) if status_code != 200: return urls = content.find_all('a') if years is None: urls = [x.get('href') for x in urls if '/year/' in str(x)] else: years = [str(x) for x in years] urls = [ x.get('href') for x in urls if str(x.text) in years and '/year/' in str(x) ] logging.info("Number of years found for {} is {}".format(url, len(urls))) urls = [arxiv_url + x for x in urls] for link in tqdm(urls, desc="Year for {}".format(url)): extract_content_list_wise(arxiv_url, link)
def run_inferece( config_filename, checkpoint_filename, output_folder, use_tta=False, out_shape=(640, 400), ): set_global_seed(42) output_folder = Path(output_folder) config_parser = ConfigParser( config_filename, False, **{ "checkpoint.filename": checkpoint_filename, "checkpoint.model": True, }) config = config_parser() dataloader = config.dataloaders.test device = config.device model = config.model model.eval() model = tta.TTAWrapper(model, tta.fliplr_image2mask) if use_tta else model print("Inference stage") filenames = [] with torch.no_grad(): for imgs, pos in tqdm(dataloader): imgs = imgs.to(device) outs = model(imgs) outs = outs.argmax(1).cpu() seqs = pos[0].long().tolist() orders = pos[1].long().tolist() for out, seq, order in zip(outs, seqs, orders): out = out.numpy().astype(np.uint8) out = cv2.resize(out, out_shape) filename = f"S_{seq}/{order}.npy" path = output_folder / filename path.parent.mkdir(parents=True, exist_ok=True) np.save(path, out) filenames.append(filename) with open(output_folder / "output.txt", "w") as output_file: output_file.writelines("\n".join(filenames))
def main(): system("mkdir annotations") system("mkdir images") print("[+] Downloading ") for i in tqdm(range(1, 411)): i = str(i) file = f"BloodImage_{(5-len(i))*'0'}{i}" url = f"{URL}/Annotations/{file}.xml" req = get(url) if req.status_code != 200: continue with open(f"annotations/{file}.xml", "wb") as xml: xml.write(req.content) file = f"BloodImage_{(5-len(i))*'0'}{i}" url = f"{URL}/JPEGImages/{file}.jpg" req = get(url) with open(f"images/{file}.jpg", "wb") as jpg: jpg.write(req.content)
def calculatePageBreaks(self, lines: List[Line]): badness = np.full((len(lines) + 1, len(lines) + 1), inf) for i in irange(0, len(lines) - 1): for j in irange(i, len(lines)): badness[i, j] = (self.params.page_height - sum(l.height for l in stripGaps(lines[i:j + 1])))**3 if badness[i, j] < 0: badness[i, j] = inf elif lines[i].no_page_break: badness[i, j] += 1e50 elif j == len(lines): badness[i, j] = 0 scores = np.full((len(lines) + 1, len(lines)), inf) bps = {} j = len(lines) for n in tqdm(irange(0, len(lines) - 1)): for i in irange(len(lines) - 1, 0, -1): if n == 0: scores[i, n] = inf bps[(i, n)] = [] else: min_score = badness[i, j] min_bps = [] for x in irange(i + 1, j): score = scores[x, n - 1] + badness[i, x - 1] if score < min_score: min_score = score min_bps = [x] + bps[(x, n - 1)] scores[i, n] = min_score bps[(i, n)] = min_bps return bps[(0, len(lines) - 1)]
def wait(self, query={}): """ When multiprocessing, the main progress may fetch nothing from TaskManager because there are still some running tasks. So main progress should wait until all tasks are trained well by other progress or machines. Args: query (dict, optional): the query dict. Defaults to {}. """ task_stat = self.task_stat(query) total = self._get_total(task_stat) last_undone_n = self._get_undone_n(task_stat) if last_undone_n == 0: return self.logger.warning(f"Waiting for {last_undone_n} undone tasks. Please make sure they are running.") with tqdm(total=total, initial=total - last_undone_n) as pbar: while True: time.sleep(10) undone_n = self._get_undone_n(self.task_stat(query)) pbar.update(last_undone_n - undone_n) last_undone_n = undone_n if undone_n == 0: break
import dropbox from tqdm.cli import tqdm from pathlib import Path data_folder = Path("data") archive_folder = Path("archive") sparse_segm_folder = data_folder / "sparse-segm" token_filename = archive_folder / "dropbox_token.txt" with open(token_filename) as token_file: TOKEN = token_file.read() dbx = dropbox.Dropbox(TOKEN) folder_name = "/Openedsdata2020/openEDS2020-SparseSegmentation/participant/" for entry in tqdm(dbx.files_list_folder(folder_name).entries): output_filename = str(sparse_segm_folder / entry.path_display.replace(folder_name, "")) output_filename = str(output_filename) if isinstance(entry, dropbox.files.FolderMetadata): dbx.files_download_zip_to_file(output_filename + ".zip", entry.path_lower) else: dbx.files_download_to_file(output_filename, entry.path_lower)
with open('input.txt') as file: enhancer = np.array(list(file.readline().strip())) == '#' file.readline() while line := file.readline().strip(): img.append(np.array(list(line)) == '#') img = np.array(img) for line in img: print(''.join(['.', '#'][int(d)] for d in line)) print() # it takes around 7 seconds from tqdm.cli import tqdm for i in tqdm(range(50)): points_that_might_change_plus_border = np.pad(img, 2, mode='constant', constant_values=outer_value) output = np.zeros(points_that_might_change_plus_border.shape) for x in range(1, points_that_might_change_plus_border.shape[0] - 1): for y in range(1, points_that_might_change_plus_border.shape[1] - 1): data = points_that_might_change_plus_border[x - 1:x + 2, y - 1:y + 2] binary = ''.join(['0', '1'][int(d)] for d in data.flatten()) index = int(binary, 2) output[x, y] = enhancer[index] img = output[1:-1, 1:-1] # trim edge
'pixelphase': xy_pairs, 'fitshape': [(31, 31)], 'σ': [0, 50], 'λ': np.linspace(30_000, 200_000, 3), 'model_oversampling': [2], 'model_degree': [3], 'model_mode': ['grid'], 'fit_accuracy': [1.49012e-08], 'use_weights': [False, True], 'return_imgs': [True] } from thesis_lib.util import DebugPool, dictoflists_to_listofdicts with DebugPool() as p: #with mp.Pool() as p: results = pd.DataFrame.from_records( p.map(fit_models_dictarg, tqdm(create_arg_list(dl)))) results = transform_dataframe(results) print(results.dev.describe()) #results = results[['noise', 'residual', 'use_weights', 'pixelphase']] #results.noise = results.noise.transform(lambda n: (n.gauss_std, n.poisson_std)) #plot_fitshape(results[results.n_sources1d == 1]) plot_fitshape(results) plot_xy_deviation(results) plot_phase_vs_deviation(results) plot_phase_vs_deviation3d(results) plot_noise_vs_weights(results) plt.show()
from io import BytesIO from rh import _get_info, _rgetv from tqdm.cli import tqdm from copy import copy if __name__ == "__main__": i = 0 next_url = "https://api.robinhood.com/orders/" orders = [] while next_url != None: resp = _get_info(next_url) next_url = resp["next"] orders.extend(resp["results"]) i += 1 with tqdm(total=len(orders)) as pbar: for order in orders: for key in ["instrument", "position", "instrument.splits"]: try: _keys = key.split(".") _url = _rgetv(order, _keys) _rgetv(order, _keys[:-1])[_keys[-1]] = copy(_get_info(_url)) except KeyError as e: continue pbar.update(1) with open("summary.json", "w") as fp: json.dump(orders, fp)