def test_progressbar(self): u = MockBuffer() p = ProgressBar(u) p.set(10, 100) a = u.get()[-4:] assert a == '10%)' assert " ===== " in u.get()
def test_progressbar_simple(self): u = MockBuffer() p = ProgressBar(u, 100, 'test') for _ in range(10): p.increment_progress() a = u.get()[-4:] assert a == '10%)' assert " ===== " in u.get()
def download_action(args): action = "download" allowed_states = ["SUBMITTED", "FINISHED"] def action_func_download(study_manager, remote): return study_manager.download(remote, force=args.force, compress_only=args.compress_only) def output_handler_download(output): pass progress_bar_download = ProgressBar("Downloading: ") state_action(args, action, allowed_states, action_func_download, output_handler_download, progress_bar_download)
def upload_action(args): action = "upload" allowed_states = ["CREATED"] def action_func_upload(study_manager, remote): return study_manager.upload(remote, array_job=args.array_job, force=args.force) def output_handler_upload(output): pass progress_bar_upload = ProgressBar("Uploading: ") state_action(args, action, allowed_states, action_func_upload, output_handler_upload, progress_bar_upload)
def enumerate(self, url, base_url_supplied, scanning_method, iterator_returning_method, iterator_len, max_iterator=500, threads=10, verb='head', timeout=15, hide_progressbar=False, imu=None): ''' @param url base URL for the website. @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/' @param scanning_method see ScanningMethod @param iterator_returning_method a function which returns an element that, when iterated, will return a full list of plugins @param iterator_len the number of items the above iterator can return, regardless of user preference. @param max_iterator integer that will be passed unto iterator_returning_method @param threads number of threads @param verb what HTTP verb. Valid options are 'get' and 'head'. @param timeout the time, in seconds, that requests should wait before throwing an exception. @param hide_progressbar if true, the progressbar will not be displayed. @param imu Interesting module urls. A list containing tuples in the following format [('readme.txt', 'default readme')]. ''' if common.is_string(base_url_supplied): base_urls = [base_url_supplied] else: base_urls = base_url_supplied requests_verb = getattr(self.session, verb) futures = [] with ThreadPoolExecutor(max_workers=threads) as executor: for base_url in base_urls: plugins = iterator_returning_method(max_iterator) if scanning_method == ScanningMethod.not_found: url_template = base_url + self.module_common_file else: url_template = base_url for plugin_name in plugins: plugin_url = url_template % (url, plugin_name) future = executor.submit(requests_verb, plugin_url, timeout=timeout) if plugin_url.endswith('/'): final_url = plugin_url else: final_url = dirname(plugin_url) + "/" futures.append({ 'base_url': base_url, 'future': future, 'plugin_name': plugin_name, 'plugin_url': final_url, }) if not hide_progressbar: p = ProgressBar(sys.stderr) items_progressed = 0 max_possible = max_iterator if int(max_iterator) < int(iterator_len) else iterator_len items_total = int(max_possible) * len(base_urls) no_results = True found = [] for future_array in futures: if not hide_progressbar: items_progressed += 1 p.set(items_progressed, items_total) r = future_array['future'].result() if r.status_code in [200, 403]: plugin_url = future_array['plugin_url'] plugin_name = future_array['plugin_name'] no_results = False found.append({ 'name': plugin_name, 'url': plugin_url }) elif r.status_code >= 500: self.out.warn('\rGot a 500 error. Is the server overloaded?') if not hide_progressbar: p.hide() if imu != None and not no_results: found = self._enumerate_plugin_if(found, verb, threads, imu) return found, no_results
def evaluate(args, model, tokenizer, writer): metric = SpanEntityScore(args.id2label) eval_output_dir = args.output_dir if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) eval_features = load_examples(args, tokenizer, data_type='dev') print("***** Running eval *****") eval_loss = 0.0 nb_eval_steps = 0 pbar = ProgressBar(n_total=len(eval_features), desc="Evaluating") for step, f in enumerate(eval_features): input_lens = f.input_len input_ids = torch.tensor([f.input_ids[:input_lens]], dtype=torch.long).to(args.device) input_mask = torch.tensor([f.input_mask[:input_lens]], dtype=torch.long).to(args.device) segment_ids = torch.tensor([f.segment_ids[:input_lens]], dtype=torch.long).to(args.device) start_ids = torch.tensor([f.start_ids[:input_lens]], dtype=torch.long).to(args.device) end_ids = torch.tensor([f.end_ids[:input_lens]], dtype=torch.long).to(args.device) subjects = f.subjects model.eval() with torch.no_grad(): inputs = { "input_ids": input_ids, "attention_mask": input_mask, "start_positions": start_ids, "end_positions": end_ids } if args.model_type != "distilbert": inputs["token_type_ids"] = (segment_ids if args.model_type in ["bert", "xlnet"] else None) outputs = model(**inputs) tmp_eval_loss, start_logits, end_logits = outputs[:3] R = bert_extract_item(start_logits, end_logits) T = subjects metric.update(true_subject=T, pred_subject=R) if args.n_gpu > 1: tmp_eval_loss = tmp_eval_loss.mean() eval_loss += tmp_eval_loss.item() nb_eval_steps += 1 pbar(step) eval_loss = eval_loss / nb_eval_steps eval_info, entity_info = metric.result() results = {f'{key}': value for key, value in eval_info.items()} results['loss'] = eval_loss print("***** Eval results *****") info = "-".join( [f' {key}: {value:.4f} ' for key, value in results.items()]) print(info) for key, value in results.items(): writer.add_scalar(f"Eval_{key}", value, args.eval_count) for key, value in entity_info.items(): writer.add_scalar(f"Eval_class_{key}_f1", value['f1'], args.eval_count) for key in sorted(entity_info.keys()): print("******* %s results ********" % key) info = "-".join([ f' {key}: {value:.4f} ' for key, value in entity_info[key].items() ]) print(info) args.eval_count += 1 return results
def enumerate(self, url, base_url_supplied, scanning_method, iterator_returning_method, max_iterator=500, threads=10, verb='head', timeout=15): ''' @param url base URL for the website. @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/' @param scanning_method see ScanningMethod @param iterator_returning_method a function which returns an element that, when iterated, will return a full list of plugins @param max_iterator integer that will be passed unto iterator_returning_method @param threads number of threads @param verb what HTTP verb. Valid options are 'get' and 'head'. @param timeout the time, in seconds, that requests should wait before throwing an exception. ''' if common.is_string(base_url_supplied): base_urls = [base_url_supplied] else: base_urls = base_url_supplied requests_verb = getattr(self.session, verb) futures = [] with ThreadPoolExecutor(max_workers=threads) as executor: for base_url in base_urls: plugins = iterator_returning_method(max_iterator) if scanning_method == ScanningMethod.not_found: url_template = base_url + self.module_readme_file expected_status = 200 else: url_template = base_url expected_status = common.scan_http_status(scanning_method) for plugin_name in plugins: plugin_url = url_template % (url, plugin_name) future = executor.submit(requests_verb, plugin_url, timeout=timeout) futures.append({ 'base_url': base_url, 'future': future, 'plugin_name': plugin_name, 'plugin_url': plugin_url, }) p = ProgressBar(sys.stderr) items_progressed = 0 items_total = len(base_urls) * int(max_iterator) no_results = True found = [] for future_array in futures: items_progressed += 1 p.set(items_progressed, items_total) r = future_array['future'].result() if r.status_code == expected_status: plugin_url = future_array['plugin_url'] plugin_name = future_array['plugin_name'] no_results = False found.append({'name': plugin_name, 'url': plugin_url}) elif r.status_code >= 500: self.out.warn('Got a 500 error. Is the server overloaded?') p.hide() return found, no_results
def enumerate(self, url, base_url_supplied, scanning_method, iterator_returning_method, max_iterator=500, threads=10, verb='head', timeout=15): ''' @param url base URL for the website. @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/' @param scanning_method see ScanningMethod @param iterator_returning_method a function which returns an element that, when iterated, will return a full list of plugins @param max_iterator integer that will be passed unto iterator_returning_method @param threads number of threads @param verb what HTTP verb. Valid options are 'get' and 'head'. @param timeout the time, in seconds, that requests should wait before throwing an exception. ''' if common.is_string(base_url_supplied): base_urls = [base_url_supplied] else: base_urls = base_url_supplied requests_verb = getattr(self.session, verb) futures = [] with ThreadPoolExecutor(max_workers=threads) as executor: for base_url in base_urls: plugins = iterator_returning_method(max_iterator) if scanning_method == ScanningMethod.not_found: url_template = base_url + self.module_readme_file expected_status = 200 else: url_template = base_url expected_status = common.scan_http_status(scanning_method) for plugin_name in plugins: plugin_url = url_template % (url, plugin_name) future = executor.submit(requests_verb, plugin_url, timeout=timeout) futures.append({ 'base_url': base_url, 'future': future, 'plugin_name': plugin_name, 'plugin_url': plugin_url, }) p = ProgressBar(sys.stderr) items_progressed = 0 items_total = len(base_urls) * int(max_iterator) no_results = True found = [] for future_array in futures: items_progressed += 1 p.set(items_progressed, items_total) r = future_array['future'].result() if r.status_code == expected_status: plugin_url = future_array['plugin_url'] plugin_name = future_array['plugin_name'] no_results = False found.append({ 'name': plugin_name, 'url': plugin_url }) elif r.status_code >= 500: self.out.warn('Got a 500 error. Is the server overloaded?') p.hide() return found, no_results
def __call__(self): self.logger.info('load DMS-seq scores from: {}'.format( self.dmsseq_file)) dmsseq = GenomicData(self.dmsseq_file, ['dmsseq']) scores = dmsseq['dmsseq'] cutoff1 = np.percentile(scores, self.percentile) cutoff2 = np.percentile(scores, 100 - self.percentile) self.logger.info('DMS-seq score cutoffs: {}-{}'.format( cutoff1, cutoff2)) discard = np.logical_and(cutoff1 < scores, scores < cutoff2) scores[(scores <= cutoff1) & np.logical_not(discard)] = 0 scores[(scores >= cutoff2) & np.logical_not(discard)] = 1 fasta_f = IndexedFastaReader(self.sequence_file) # calculate base distribution self.logger.info('calculate base distribution') self.offsets = range(-self.max_offset, self.max_offset + 1) base_dist = np.zeros([len(self.offsets), 2, 4], dtype='int64') progress = ProgressBar(len(dmsseq.names), title='') for name in dmsseq.names: seq = np.frombuffer(fasta_f[name], dtype='S1') values = dmsseq.feature('dmsseq', name) ind_valid = (np.logical_not(np.isnan(values)))[0] ind_one_ts = np.nonzero(values == 1)[0] ind_zero_ts = np.nonzero(values == 0)[0] for i_offset, offset in enumerate(self.offsets): ind_one = ind_one_ts + offset ind_one = ind_one[(ind_one >= 0) & (ind_one < len(seq))] ind_zero = ind_zero_ts + offset ind_zero = ind_zero[(ind_zero >= 0) & (ind_zero < len(seq))] for i in range(len(self.alphabet)): if len(ind_zero) > 0: base_dist[i_offset, 0, i] += ( seq[ind_zero] == self.alphabet[i]).sum() if len(ind_one) > 0: base_dist[i_offset, 1, i] += ( seq[ind_one] == self.alphabet[i]).sum() progress.update() progress.finish() fasta_f.close() base_dist = base_dist.astype('float64') # plot fig, axes = plt.subplots(nrows=2, ncols=len(self.offsets), figsize=(20, 4), sharey=True) fig.tight_layout() for i, offset in enumerate(self.offsets): for label in (0, 1): self.logger.debug('plot_base_dist: {}, {}'.format( label, offset)) base_dist[i, label, :] /= base_dist[i, label, :].sum() ax = axes[label, i] ax.bar(np.arange(len(self.alphabet)), base_dist[i, label, :], color='k', edgecolor='none', align='center') ax.set_xticks(np.arange(len(self.alphabet))) ax.set_xticklabels(self.alphabet) ax.set_ylabel('Density') ax.set_title('({}, {})'.format(label, offset)) self.logger.info('savefig: {}'.format(self.outfile)) make_dir(os.path.dirname(self.outfile)) plt.savefig(self.outfile, dpi=150, bbox_inches='tight')
def train(args, train_dataset, model, tokenizer, writer): args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate_fn) train_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=train_total) if os.path.isfile(os.path.join( args.pretrain_model_path, "optimizer.pt")) and os.path.isfile( os.path.join(args.pretrain_model_path, "scheduler.pt")): optimizer.load_state_dict( torch.load(os.path.join(args.pretrain_model_path, "optimizer.pt"))) scheduler.load_state_dict( torch.load(os.path.join(args.pretrain_model_path, "scheduler.pt"))) if args.fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) print("***** Running training *****") global_step = 0 steps_trained_in_current_epoch = 0 if os.path.exists(args.pretrain_model_path ) and "checkpoint" in args.pretrain_model_path: global_step = int( args.pretrain_model_path.split("-")[-1].split("/")[0]) epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % ( len(train_dataloader) // args.gradient_accumulation_steps) train_loss, logging_loss = 0.0, 0.0 model.zero_grad() for _ in range(int(args.num_train_epochs)): pbar = ProgressBar(n_total=len(train_dataloader), desc='Training') for step, batch in enumerate(train_dataloader): if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() batch = tuple(t.to(args.device) for t in batch) inputs = { "input_ids": batch[0], "attention_mask": batch[1], "start_positions": batch[3], "end_positions": batch[4] } inputs["token_type_ids"] = (batch[2] if args.model_type in ["bert"] else None) outputs = model(**inputs) loss = outputs[0] writer.add_scalar("Train_loss", loss.item(), step) if args.n_gpu > 1: loss = loss.mean() if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() pbar(step, {'loss': loss.item()}) train_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) scheduler.step() optimizer.step() model.zero_grad() global_step += 1 if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: if args.local_rank == -1: evaluate(args, model, tokenizer, writer) if args.local_rank in [ -1, 0 ] and args.save_steps > 0 and global_step % args.save_steps == 0: output_dir = os.path.join( args.output_dir, "checkpoint-{}".format(global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = (model.module if hasattr(model, "module") else model) model_to_save.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, "training_args.bin")) tokenizer.save_vocabulary(output_dir) print("Saving model checkpoint to %s", output_dir) torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) print(" ") if 'cuda' in str(args.device): torch.cuda.empty_cache() return global_step, train_loss / global_step