예제 #1
0
    def test_progressbar(self):
        u = MockBuffer()
        p = ProgressBar(u)
        p.set(10, 100)

        a = u.get()[-4:]

        assert a == '10%)'
        assert " ===== " in u.get()
예제 #2
0
    def test_progressbar(self):
        u = MockBuffer()
        p = ProgressBar(u)
        p.set(10, 100)

        a = u.get()[-4:]

        assert a == '10%)'
        assert " ===== " in u.get()
예제 #3
0
    def test_progressbar_simple(self):
        u = MockBuffer()
        p = ProgressBar(u, 100, 'test')

        for _ in range(10):
            p.increment_progress()

        a = u.get()[-4:]

        assert a == '10%)'
        assert " ===== " in u.get()
예제 #4
0
파일: __main__.py 프로젝트: edu159/paramate
def download_action(args):
    action = "download"
    allowed_states = ["SUBMITTED", "FINISHED"]

    def action_func_download(study_manager, remote):
        return study_manager.download(remote,
                                      force=args.force,
                                      compress_only=args.compress_only)

    def output_handler_download(output):
        pass

    progress_bar_download = ProgressBar("Downloading: ")
    state_action(args, action, allowed_states, action_func_download,
                 output_handler_download, progress_bar_download)
예제 #5
0
파일: __main__.py 프로젝트: edu159/paramate
def upload_action(args):
    action = "upload"
    allowed_states = ["CREATED"]

    def action_func_upload(study_manager, remote):
        return study_manager.upload(remote,
                                    array_job=args.array_job,
                                    force=args.force)

    def output_handler_upload(output):
        pass

    progress_bar_upload = ProgressBar("Uploading: ")
    state_action(args, action, allowed_states, action_func_upload,
                 output_handler_upload, progress_bar_upload)
예제 #6
0
    def enumerate(self, url, base_url_supplied, scanning_method,
            iterator_returning_method, iterator_len, max_iterator=500, threads=10,
            verb='head', timeout=15, hide_progressbar=False, imu=None):
        '''
            @param url base URL for the website.
            @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/'
            @param scanning_method see ScanningMethod
            @param iterator_returning_method a function which returns an
                element that, when iterated, will return a full list of plugins
            @param iterator_len the number of items the above iterator can
                return, regardless of user preference.
            @param max_iterator integer that will be passed unto iterator_returning_method
            @param threads number of threads
            @param verb what HTTP verb. Valid options are 'get' and 'head'.
            @param timeout the time, in seconds, that requests should wait
                before throwing an exception.
            @param hide_progressbar if true, the progressbar will not be
                displayed.
            @param imu Interesting module urls. A list containing tuples in the
                following format [('readme.txt', 'default readme')].
        '''
        if common.is_string(base_url_supplied):
            base_urls = [base_url_supplied]
        else:
            base_urls = base_url_supplied

        requests_verb = getattr(self.session, verb)
        futures = []
        with ThreadPoolExecutor(max_workers=threads) as executor:
            for base_url in base_urls:
                plugins = iterator_returning_method(max_iterator)

                if scanning_method == ScanningMethod.not_found:
                    url_template = base_url + self.module_common_file
                else:
                    url_template = base_url

                for plugin_name in plugins:
                    plugin_url = url_template % (url, plugin_name)
                    future = executor.submit(requests_verb, plugin_url,
                            timeout=timeout)

                    if plugin_url.endswith('/'):
                        final_url = plugin_url
                    else:
                        final_url = dirname(plugin_url) + "/"

                    futures.append({
                        'base_url': base_url,
                        'future': future,
                        'plugin_name': plugin_name,
                        'plugin_url': final_url,
                    })

            if not hide_progressbar:
                p = ProgressBar(sys.stderr)
                items_progressed = 0
                max_possible = max_iterator if int(max_iterator) < int(iterator_len) else iterator_len
                items_total = int(max_possible) * len(base_urls)

            no_results = True
            found = []
            for future_array in futures:
                if not hide_progressbar:
                    items_progressed += 1
                    p.set(items_progressed, items_total)

                r = future_array['future'].result()
                if r.status_code in [200, 403]:
                    plugin_url = future_array['plugin_url']
                    plugin_name = future_array['plugin_name']

                    no_results = False
                    found.append({
                        'name': plugin_name,
                        'url': plugin_url
                    })
                elif r.status_code >= 500:
                    self.out.warn('\rGot a 500 error. Is the server overloaded?')

            if not hide_progressbar:
                p.hide()

        if imu != None and not no_results:
            found = self._enumerate_plugin_if(found, verb, threads, imu)

        return found, no_results
예제 #7
0
def evaluate(args, model, tokenizer, writer):

    metric = SpanEntityScore(args.id2label)
    eval_output_dir = args.output_dir
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)
    eval_features = load_examples(args, tokenizer, data_type='dev')
    print("***** Running eval *****")

    eval_loss = 0.0
    nb_eval_steps = 0
    pbar = ProgressBar(n_total=len(eval_features), desc="Evaluating")
    for step, f in enumerate(eval_features):
        input_lens = f.input_len
        input_ids = torch.tensor([f.input_ids[:input_lens]],
                                 dtype=torch.long).to(args.device)
        input_mask = torch.tensor([f.input_mask[:input_lens]],
                                  dtype=torch.long).to(args.device)
        segment_ids = torch.tensor([f.segment_ids[:input_lens]],
                                   dtype=torch.long).to(args.device)
        start_ids = torch.tensor([f.start_ids[:input_lens]],
                                 dtype=torch.long).to(args.device)
        end_ids = torch.tensor([f.end_ids[:input_lens]],
                               dtype=torch.long).to(args.device)
        subjects = f.subjects
        model.eval()
        with torch.no_grad():
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "start_positions": start_ids,
                "end_positions": end_ids
            }
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (segment_ids if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            tmp_eval_loss, start_logits, end_logits = outputs[:3]
            R = bert_extract_item(start_logits, end_logits)
            T = subjects
            metric.update(true_subject=T, pred_subject=R)
            if args.n_gpu > 1:
                tmp_eval_loss = tmp_eval_loss.mean()
            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        pbar(step)

    eval_loss = eval_loss / nb_eval_steps
    eval_info, entity_info = metric.result()
    results = {f'{key}': value for key, value in eval_info.items()}
    results['loss'] = eval_loss
    print("***** Eval results *****")
    info = "-".join(
        [f' {key}: {value:.4f} ' for key, value in results.items()])
    print(info)

    for key, value in results.items():
        writer.add_scalar(f"Eval_{key}", value, args.eval_count)

    for key, value in entity_info.items():
        writer.add_scalar(f"Eval_class_{key}_f1", value['f1'], args.eval_count)

    for key in sorted(entity_info.keys()):
        print("******* %s results ********" % key)
        info = "-".join([
            f' {key}: {value:.4f} ' for key, value in entity_info[key].items()
        ])
        print(info)

    args.eval_count += 1
    return results
예제 #8
0
    def enumerate(self,
                  url,
                  base_url_supplied,
                  scanning_method,
                  iterator_returning_method,
                  max_iterator=500,
                  threads=10,
                  verb='head',
                  timeout=15):
        '''
            @param url base URL for the website.
            @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/'
            @param scanning_method see ScanningMethod
            @param iterator_returning_method a function which returns an
                element that, when iterated, will return a full list of plugins
            @param max_iterator integer that will be passed unto iterator_returning_method
            @param threads number of threads
            @param verb what HTTP verb. Valid options are 'get' and 'head'.
            @param timeout the time, in seconds, that requests should wait
                before throwing an exception.
        '''
        if common.is_string(base_url_supplied):
            base_urls = [base_url_supplied]
        else:
            base_urls = base_url_supplied

        requests_verb = getattr(self.session, verb)
        futures = []
        with ThreadPoolExecutor(max_workers=threads) as executor:
            for base_url in base_urls:
                plugins = iterator_returning_method(max_iterator)

                if scanning_method == ScanningMethod.not_found:
                    url_template = base_url + self.module_readme_file
                    expected_status = 200
                else:
                    url_template = base_url
                    expected_status = common.scan_http_status(scanning_method)

                for plugin_name in plugins:
                    plugin_url = url_template % (url, plugin_name)
                    future = executor.submit(requests_verb,
                                             plugin_url,
                                             timeout=timeout)

                    futures.append({
                        'base_url': base_url,
                        'future': future,
                        'plugin_name': plugin_name,
                        'plugin_url': plugin_url,
                    })

            p = ProgressBar(sys.stderr)
            items_progressed = 0
            items_total = len(base_urls) * int(max_iterator)

            no_results = True
            found = []
            for future_array in futures:
                items_progressed += 1
                p.set(items_progressed, items_total)
                r = future_array['future'].result()
                if r.status_code == expected_status:
                    plugin_url = future_array['plugin_url']
                    plugin_name = future_array['plugin_name']

                    no_results = False
                    found.append({'name': plugin_name, 'url': plugin_url})
                elif r.status_code >= 500:
                    self.out.warn('Got a 500 error. Is the server overloaded?')

            p.hide()

        return found, no_results
예제 #9
0
파일: __init__.py 프로젝트: ss23/droopescan
    def enumerate(self, url, base_url_supplied, scanning_method, iterator_returning_method, max_iterator=500, threads=10, verb='head', timeout=15):
        '''
            @param url base URL for the website.
            @param base_url_supplied Base url for themes, plugins. E.g. '%ssites/all/modules/%s/'
            @param scanning_method see ScanningMethod
            @param iterator_returning_method a function which returns an
                element that, when iterated, will return a full list of plugins
            @param max_iterator integer that will be passed unto iterator_returning_method
            @param threads number of threads
            @param verb what HTTP verb. Valid options are 'get' and 'head'.
            @param timeout the time, in seconds, that requests should wait
                before throwing an exception.
        '''
        if common.is_string(base_url_supplied):
            base_urls = [base_url_supplied]
        else:
            base_urls = base_url_supplied

        requests_verb = getattr(self.session, verb)
        futures = []
        with ThreadPoolExecutor(max_workers=threads) as executor:
            for base_url in base_urls:
                plugins = iterator_returning_method(max_iterator)

                if scanning_method == ScanningMethod.not_found:
                    url_template = base_url + self.module_readme_file
                    expected_status = 200
                else:
                    url_template = base_url
                    expected_status = common.scan_http_status(scanning_method)

                for plugin_name in plugins:
                    plugin_url = url_template % (url, plugin_name)
                    future = executor.submit(requests_verb, plugin_url,
                            timeout=timeout)

                    futures.append({
                        'base_url': base_url,
                        'future': future,
                        'plugin_name': plugin_name,
                        'plugin_url': plugin_url,
                    })

            p = ProgressBar(sys.stderr)
            items_progressed = 0
            items_total = len(base_urls) * int(max_iterator)

            no_results = True
            found = []
            for future_array in futures:
                items_progressed += 1
                p.set(items_progressed, items_total)
                r = future_array['future'].result()
                if r.status_code == expected_status:
                    plugin_url = future_array['plugin_url']
                    plugin_name = future_array['plugin_name']

                    no_results = False
                    found.append({
                        'name': plugin_name,
                        'url': plugin_url
                    })
                elif r.status_code >= 500:
                    self.out.warn('Got a 500 error. Is the server overloaded?')

            p.hide()

        return found, no_results
예제 #10
0
 def __call__(self):
     self.logger.info('load DMS-seq scores from: {}'.format(
         self.dmsseq_file))
     dmsseq = GenomicData(self.dmsseq_file, ['dmsseq'])
     scores = dmsseq['dmsseq']
     cutoff1 = np.percentile(scores, self.percentile)
     cutoff2 = np.percentile(scores, 100 - self.percentile)
     self.logger.info('DMS-seq score cutoffs: {}-{}'.format(
         cutoff1, cutoff2))
     discard = np.logical_and(cutoff1 < scores, scores < cutoff2)
     scores[(scores <= cutoff1) & np.logical_not(discard)] = 0
     scores[(scores >= cutoff2) & np.logical_not(discard)] = 1
     fasta_f = IndexedFastaReader(self.sequence_file)
     # calculate base distribution
     self.logger.info('calculate base distribution')
     self.offsets = range(-self.max_offset, self.max_offset + 1)
     base_dist = np.zeros([len(self.offsets), 2, 4], dtype='int64')
     progress = ProgressBar(len(dmsseq.names), title='')
     for name in dmsseq.names:
         seq = np.frombuffer(fasta_f[name], dtype='S1')
         values = dmsseq.feature('dmsseq', name)
         ind_valid = (np.logical_not(np.isnan(values)))[0]
         ind_one_ts = np.nonzero(values == 1)[0]
         ind_zero_ts = np.nonzero(values == 0)[0]
         for i_offset, offset in enumerate(self.offsets):
             ind_one = ind_one_ts + offset
             ind_one = ind_one[(ind_one >= 0) & (ind_one < len(seq))]
             ind_zero = ind_zero_ts + offset
             ind_zero = ind_zero[(ind_zero >= 0) & (ind_zero < len(seq))]
             for i in range(len(self.alphabet)):
                 if len(ind_zero) > 0:
                     base_dist[i_offset, 0, i] += (
                         seq[ind_zero] == self.alphabet[i]).sum()
                 if len(ind_one) > 0:
                     base_dist[i_offset, 1, i] += (
                         seq[ind_one] == self.alphabet[i]).sum()
         progress.update()
     progress.finish()
     fasta_f.close()
     base_dist = base_dist.astype('float64')
     # plot
     fig, axes = plt.subplots(nrows=2,
                              ncols=len(self.offsets),
                              figsize=(20, 4),
                              sharey=True)
     fig.tight_layout()
     for i, offset in enumerate(self.offsets):
         for label in (0, 1):
             self.logger.debug('plot_base_dist: {}, {}'.format(
                 label, offset))
             base_dist[i, label, :] /= base_dist[i, label, :].sum()
             ax = axes[label, i]
             ax.bar(np.arange(len(self.alphabet)),
                    base_dist[i, label, :],
                    color='k',
                    edgecolor='none',
                    align='center')
             ax.set_xticks(np.arange(len(self.alphabet)))
             ax.set_xticklabels(self.alphabet)
             ax.set_ylabel('Density')
             ax.set_title('({}, {})'.format(label, offset))
     self.logger.info('savefig: {}'.format(self.outfile))
     make_dir(os.path.dirname(self.outfile))
     plt.savefig(self.outfile, dpi=150, bbox_inches='tight')
예제 #11
0
def train(args, train_dataset, model, tokenizer, writer):

    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)

    train_total = len(
        train_dataloader
    ) // args.gradient_accumulation_steps * args.num_train_epochs
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=train_total)

    if os.path.isfile(os.path.join(
            args.pretrain_model_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.pretrain_model_path, "scheduler.pt")):
        optimizer.load_state_dict(
            torch.load(os.path.join(args.pretrain_model_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.pretrain_model_path, "scheduler.pt")))
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    print("***** Running training *****")

    global_step = 0
    steps_trained_in_current_epoch = 0

    if os.path.exists(args.pretrain_model_path
                      ) and "checkpoint" in args.pretrain_model_path:
        global_step = int(
            args.pretrain_model_path.split("-")[-1].split("/")[0])
        epochs_trained = global_step // (len(train_dataloader) //
                                         args.gradient_accumulation_steps)
        steps_trained_in_current_epoch = global_step % (
            len(train_dataloader) // args.gradient_accumulation_steps)

    train_loss, logging_loss = 0.0, 0.0
    model.zero_grad()

    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "start_positions": batch[3],
                "end_positions": batch[4]
            }

            inputs["token_type_ids"] = (batch[2] if args.model_type
                                        in ["bert"] else None)
            outputs = model(**inputs)
            loss = outputs[0]

            writer.add_scalar("Train_loss", loss.item(), step)

            if args.n_gpu > 1:
                loss = loss.mean()
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            pbar(step, {'loss': loss.item()})
            train_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)
                scheduler.step()
                optimizer.step()
                model.zero_grad()
                global_step += 1
                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    if args.local_rank == -1:
                        evaluate(args, model, tokenizer, writer)
                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    output_dir = os.path.join(
                        args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir): os.makedirs(output_dir)
                    model_to_save = (model.module
                                     if hasattr(model, "module") else model)
                    model_to_save.save_pretrained(output_dir)
                    torch.save(args,
                               os.path.join(output_dir, "training_args.bin"))
                    tokenizer.save_vocabulary(output_dir)
                    print("Saving model checkpoint to %s", output_dir)
                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))

        print(" ")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
    return global_step, train_loss / global_step