Python ProgressBar Examples

Programming Language: Python

Namespace/Package Name: main.common.progressbar

Method/Function: ProgressBar

Examples at hotexamples.com: 6

Python ProgressBar - 6 examples found. These are the top rated real world Python examples of main.common.progressbar.ProgressBar extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def create_examples(self, lines, example_type, cached_file):
     if cached_file.exists():
         tools.logger.info("Loading samples from cached files %s",
                           cached_file)
         examples = torch.load(cached_file)
     else:
         pbar = progressbar.ProgressBar(
             n_total=len(lines), desc=f'create {example_type} samples')
         examples = []
         for i, line in enumerate(lines):
             hadm_id = line['HADM_ID']
             guid = '%s-%s-%d' % (example_type, hadm_id, i)
             sentence = line['token']  # list
             sentence = [' ' if type(t) == float else t for t in sentence]
             label = line['tags']  # list
             code = line['code']  # brat entity Tcode T1 T2
             relations = line['relations']  # brat relations golden standard
             # text_a: string. The untokenized text of the first sequence. For single
             # sequence tasks, only this sequence must be specified.
             text_a = ' '.join(sentence)  # string
             text_b = None
             examples.append(
                 InputExample(guid=guid,
                              text_a=text_a,
                              text_b=text_b,
                              label=label,
                              code=code,
                              relations=relations,
                              hadm_id=hadm_id))
             pbar(step=i)
         tools.logger.info("Saving examples into cached file %s",
                           cached_file)
         torch.save(examples, cached_file)
     return examples

Example #2

Show file

File: NER_Trainer.py Project: ratavaa/NIHSS_IE

    def train_epoch(self, data_loader):
        pbar = progressbar.ProgressBar(n_total=len(data_loader),
                                       desc='Training')
        tr_loss = tools.AverageMeter()
        for step, batch in enumerate(data_loader):
            self.model.train()
            batch = tuple(t.to(self.device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, input_lens = batch
            input_lens = input_lens.cpu().detach().numpy().tolist()
            _, loss = self.model.forward_loss(input_ids, segment_ids,
                                              input_mask, label_ids,
                                              input_lens)
            if len(self.n_gpu.split(',')) >= 2:
                loss = loss.mean()
            if self.gradient_accumulation_steps > 1:
                loss = loss / self.gradient_accumulation_steps
            if self.fp16:
                pass
            else:
                loss.backward()
                clip_grad_norm_(self.model.parameters(), self.grad_clip)
            if (step + 1) % self.gradient_accumulation_steps == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
                self.global_step += 1

            tr_loss.update(loss.item(), n=1)
            pbar(step=step, info={'loss': loss.item()})
        info = {'loss': tr_loss.avg}
        if 'cuda' in str(self.device):
            torch.cuda.empty_cache()
        return info

Example #3

Show file

File: NER_Trainer.py Project: ratavaa/NIHSS_IE

 def valid_epoch(self, data_loader):
     pbar = progressbar.ProgressBar(n_total=len(data_loader),
                                    desc='Evaluating')
     self.entity_score.reset()
     valild_loss = tools.AverageMeter()
     for step, batch in enumerate(data_loader):
         batch = tuple(t.to(self.device) for t in batch)
         input_ids, input_mask, segment_ids, label_ids, input_lens = batch
         input_lens = input_lens.cpu().detach().numpy().tolist()
         self.model.eval()
         with torch.no_grad():
             features, loss = self.model.forward_loss(
                 input_ids, segment_ids, input_mask, label_ids, input_lens)
             tags, _ = self.model.crf._obtain_labels(
                 features, self.id2label, input_lens)
         valild_loss.update(val=loss.item(), n=input_ids.size(0))
         print('tags[0]:', str(tags[0]))
         pbar(step=step, info={'loss': loss.item()})
         label_ids = label_ids.to('cpu').numpy().tolist()
         for i, label in enumerate(label_ids):
             temp_1 = []
             temp_2 = []
             for j, m in enumerate(label):
                 if j == 0:
                     continue
                 elif label_ids[i][j] == self.label2id['[SEP]']:
                     self.entity_score.update(pred_paths=[temp_2],
                                              label_paths=[temp_1])
                     break
                 else:
                     temp_1.append(self.id2label[label_ids[i][j]])
                     temp_2.append(tags[i][j])
         valid_info, class_info = self.entity_score.result()
         info = {f'valid_{key}': value for key, value in valid_info.items()}
         info['valid_loss'] = valild_loss.avg
         if 'cuda' in str(self.device):
             torch.cuda.empty_cache()
         return info, class_info

Example #4

Show file

    def create_features(self, examples, max_seq_len, cached_file):
        if cached_file.exists():
            tools.logger.info('Loading features from cached file %s',
                              cached_file)
            features = torch.load(cached_file)
        else:
            label_list = self.get_labels()
            label2id = {label: i for i, label in enumerate(label_list)}
            pbar = progressbar.ProgressBar(
                n_total=len(examples),
                desc='creating the specified features of examples')
            features = []
            for example_id, example in enumerate(examples):
                hamd_id = example.hadm_id
                text_list = example.text_a.split(' ')  # string
                idx_CR = [
                    idx for idx, text in enumerate(text_list)
                    if text == '<CRLF>'
                ]
                label_list = example.label
                code_list = example.code
                relation_list = example.relations

                new_tokens = []
                new_segment_ids = []
                new_label_ids = []
                new_code = []

                new_tokens.append('[CLS]')
                new_segment_ids.append(0)
                new_label_ids.append(label2id['[CLS]'])
                new_code.append('0')

                for text, label, code in zip(text_list, label_list, code_list):
                    if text == '<CRLF>':
                        continue
                    else:
                        token_list = self.tokenizer.tokenize(text)
                        for idx, token in enumerate(token_list):
                            new_tokens.append(token)
                            new_segment_ids.append(0)
                            if idx == 0:
                                new_label_ids.append(label2id[label])
                                new_code.append(code)
                            elif label == 'O':
                                new_label_ids.append(label2id[label])
                                new_code.append(code)
                            else:
                                temp_l = 'I-' + label.split('-')[1]
                                new_label_ids.append(label2id[temp_l])
                                new_code.append(code)

                assert len(new_tokens) == len(new_segment_ids)
                assert len(new_tokens) == len(new_label_ids)
                assert len(new_tokens) == len(new_code)

                if len(new_tokens) >= max_seq_len:
                    new_tokens = new_tokens[0:(max_seq_len - 1)]
                    new_segment_ids = new_segment_ids[0:(max_seq_len - 1)]
                    new_label_ids = new_label_ids[0:(max_seq_len - 1)]
                    new_code = new_code[0:(max_seq_len - 1)]

                new_tokens.append('[SEP]')
                new_segment_ids.append(0)
                new_label_ids.append(label2id['[SEP]'])
                new_code.append('0')

                input_ids = self.tokenizer.convert_tokens_to_ids(new_tokens)
                input_mask = [1] * len(input_ids)
                input_len = len(new_label_ids)

                if len(input_ids) < max_seq_len:
                    pad_zero = [0] * (max_seq_len - len(input_ids))
                    input_ids.extend(pad_zero)
                    input_mask.extend(pad_zero)
                    new_segment_ids.extend(pad_zero)
                    new_label_ids.extend(pad_zero)
                    new_code.extend(['0'] * len(pad_zero))

                assert len(input_ids) == max_seq_len
                assert len(input_mask) == max_seq_len
                assert len(new_segment_ids) == max_seq_len
                assert len(new_label_ids) == max_seq_len
                assert len(new_code) == max_seq_len

                df_temp = pd.DataFrame({
                    'input_ids': input_ids,
                    'code': new_code
                })
                agg_fun = lambda s: (max(s['code']), s.index.tolist()[0],
                                     s.index.tolist()[-1])
                groupby_code = df_temp.groupby('code').apply(agg_fun)
                code_position = {}
                for key, start, end in groupby_code:
                    if key != '0':
                        code_position[(start - 1, end - 1)] = key
                    else:
                        continue

                if example_id < 2:
                    tools.logger.info('*** Examples: ***')
                    tools.logger.info("guid: %s" % (example.guid))
                    tools.logger.info("tokens: %s" %
                                      " ".join([str(x) for x in new_tokens]))
                    tools.logger.info("input_ids: %s" %
                                      " ".join([str(x) for x in input_ids]))
                    tools.logger.info("input_mask: %s" %
                                      " ".join([str(x) for x in input_mask]))
                    tools.logger.info(
                        "segment_ids: %s" %
                        " ".join([str(x) for x in new_segment_ids]))
                    tools.logger.info("old label name: %s " %
                                      " ".join(example.label))
                    tools.logger.info("new label ids: %s" %
                                      " ".join([str(x)
                                                for x in new_label_ids]))

                features.append(
                    InputFeature(
                        input_ids=input_ids,
                        input_mask=input_mask,
                        segment_ids=new_segment_ids,
                        label_id=new_label_ids,
                        input_len=input_len,
                        code=new_code,
                        new_tokens=new_tokens,
                        relations=relation_list,  # golden standard
                        hamd_id=hamd_id,
                        code_position=code_position))

                pbar(step=example_id)

            tools.logger.info('Saving features into cached file %s',
                              cached_file)
            torch.save(features, cached_file)
        return features

Example #5

Show file

def run_end2end_realtion_extration(args):
    from main.common import progressbar
    from main.common import ner_utils
    from main.common.tools import save_pickle
    from random import choice
    import pandas as pd
    import copy

    args.resume_path = args.model_path
    processor = BertProcessor(vocab_path=args.resume_path / 'vocab.txt',
                              do_lower_case=True)
    label_list = processor.get_labels()  # all labels
    label2id = {label: i for i, label in enumerate(label_list)}
    id2label = {i: label for i, label in enumerate(label_list)}
    model = BERTLSTMCRF
    tools.logger.info(f'loading prtrained model from {args.resume_path}')
    model = model.from_pretrained(args.resume_path,
                                  label2id=label2id,
                                  device=args.device)
    model.to(args.device)

    test_data_path = base.config['data_dir'] / f'RE_Test/re_test.pkl'
    test_data = processor.get_test(test_data_path)

    test_examples_cached_file = base.config[
        'data_dir'] / f'End2End_Test/cached_{args.data_name}_e2e_test_examples.pkl'
    test_examples = processor.create_examples(
        lines=test_data,
        example_type='test',
        cached_file=test_examples_cached_file)
    test_features_cached_file = base.config[
        'data_dir'] / f'End2End_Test/cached_{args.data_name}_e2e_test_features.pkl'
    test_features = processor.create_features(
        examples=test_examples,
        max_seq_len=args.eval_max_seq_len,
        cached_file=test_features_cached_file)

    test_datasets = test_features

    pbar = progressbar.ProgressBar(
        n_total=len(test_datasets),
        desc='Testing End2End relation extraction performace')

    entity_score = common.ner_utils.SeqEntityScore(id2label)
    entity_score.reset()

    ner_output_samples = []
    for step, one_sample in enumerate(test_datasets):
        entity_score.reset()
        hadm_id = one_sample.hamd_id
        input_ids = torch.tensor([one_sample.input_ids], dtype=torch.long)
        input_mask = torch.tensor([one_sample.input_mask], dtype=torch.long)
        segment_ids = torch.tensor([one_sample.segment_ids], dtype=torch.long)
        label_ids = torch.tensor([one_sample.label_id], dtype=torch.long)
        input_lens = torch.tensor([one_sample.input_len], dtype=torch.long)
        true_codes = one_sample.code
        relations = one_sample.relations
        code_position = one_sample.code_position
        new_tokens = one_sample.new_tokens[1:-1]

        batch = (input_ids, input_mask, segment_ids, label_ids, input_lens)
        batch = tuple(t.to(args.device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids, input_lens = batch
        input_lens = input_lens.cpu().detach().numpy().tolist()
        model.eval()

        with torch.no_grad():
            features, loss = model.forward_loss(input_ids, segment_ids,
                                                input_mask, label_ids,
                                                input_lens)
            tags, _ = model.crf._obtain_labels(features, id2label, input_lens)

        label_ids = label_ids.to('cpu').numpy().tolist()
        pbar(step=step, info={'loss': loss.item()})

        for i, label in enumerate(label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif label_ids[i][j] == label2id['[SEP]']:
                    entity_score.update(pred_paths=[temp_2],
                                        label_paths=[temp_1])
                    break
                else:
                    temp_1.append(id2label[label_ids[i][j]])
                    temp_2.append(tags[i][j])

        relation_pairs_NER = []
        relation_pairs_dict = {}

        count_y1 = 0
        for relation in relations:
            if relation[3] == 'Has_Value':
                relation_pairs_NER.append(relation)
                relation_pairs_dict.setdefault(relation[0],
                                               []).append(relation[1])
                count_y1 += 1

        found_entities = []
        found_e1_T_code = []
        found_e2_T_code = []
        TFN_idx = 1
        for entity in entity_score.founds:
            key = (entity[1], entity[2])
            e_type = entity[0]
            if key in code_position.keys():
                T_code = code_position[key]
                found_entities.append((T_code, e_type, entity[1], entity[2]))
            else:
                T_code = 'TFP_' + str(TFN_idx)
                found_entities.append((T_code, e_type, entity[1], entity[2]))
                TFN_idx += 1
            if e_type == 'Measurement':
                found_e2_T_code.append(T_code)
            else:
                found_e1_T_code.append(T_code)

        pred_tags = tags[0][1:-1]
        assert len(new_tokens) == len(pred_tags)
        pred_codes = ['0'] * len(new_tokens)
        for e in found_entities:
            start = e[2]
            end = e[3]
            for idx in range(start, end + 1):
                pred_codes[idx] = e[0]

        b = 1
        count_y0 = 0
        relations_temp = copy.deepcopy(relation_pairs_dict)
        while count_y0 <= (count_y1 + b) and len(found_e1_T_code) > 1:
            e1_random = choice(found_e1_T_code)
            try:
                e1_random_correspond_e2 = relations_temp[e1_random]
                other_e2_codes = list(
                    set(found_e2_T_code).difference(
                        set(e1_random_correspond_e2)))
                e2_random = choice(other_e2_codes)
            except:
                if len(found_e2_T_code) > 0:
                    e2_random = choice(found_e2_T_code)
                else:
                    count_y0 += 1
                    continue

            if e1_random not in relations_temp.keys():
                relations_temp[e1_random] = [e2_random]
                relation_pairs_NER.append(
                    (e1_random, e2_random, 'RTN_' + str(count_y0), '0'))
                count_y0 += 1
                continue
            elif e2_random not in relations_temp[e1_random]:
                value_list = copy.deepcopy(relations_temp[e1_random])
                value_list.append(e2_random)
                relations_temp[e1_random] = value_list
                relation_pairs_NER.append(
                    (e1_random, e2_random, 'RTN_' + str(count_y0), '0'))
                count_y0 += 1
                continue
            else:
                count_y0 += 1
                continue

        single_sample = {
            'token': new_tokens,
            'codes': pred_codes,
            'tags': pred_tags
        }
        df_temp = pd.DataFrame(single_sample)
        current_row = 0
        while current_row <= df_temp.shape[0] - 1:
            i = 0
            temp_token = ''
            current_token = df_temp.iloc[current_row][0]
            if not current_token.startswith('##'):
                current_row += 1
                continue
            else:
                while (current_row +
                       i) <= (df_temp.shape[0] - 1) and df_temp.iloc[
                           current_row + i][0].startswith('##'):
                    temp_token += df_temp.iloc[current_row + i][0].replace(
                        '##', '')
                    i += 1
                start_word_piece_position = current_row - 1
                df_temp.iloc[start_word_piece_position][0] += temp_token
                current_row += i

        df_temp = df_temp[df_temp['token'].str.startswith('##') == False]
        new_tokens = df_temp['token'].values.tolist()
        pred_tags = df_temp['tags'].values.tolist()
        pred_codes = df_temp['codes'].values.tolist()

        df_temp = df_temp.reset_index(drop=True)
        agg_fun = lambda s: (max(s['codes']), s['tags'].iloc[0],
                             s.index.tolist()[0], s.index.tolist()[-1])
        groupby_code = df_temp.groupby('codes').apply(agg_fun)
        new_found_entities = []
        for key, e_type, start, end in groupby_code:
            if key != '0':
                e_type = e_type.split('-')[1]
                new_found_entities.append((key, e_type, start, end))
            else:
                continue

        sample = {
            'HADM_ID': hadm_id,
            'token': new_tokens,
            'tags': pred_tags,
            'relations': relation_pairs_NER,
            'entities': new_found_entities,
            'code': pred_codes
        }
        ner_output_samples.append(sample)

    content = str(ner_output_samples)
    file_path = base.config['data_dir'] / 'End2End_Test/re_e2e_test.txt'
    with open(file_path, 'w+') as new_f:
        new_f.writelines(content)
    new_f.close()

    e2e_test_file_path = base.config[
        'data_dir'] / f'End2End_Test/re_e2e_test_by_fold_{args.fold}.pkl'
    tools.logger.info(f'Saving e2e_test_file into {e2e_test_file_path}')
    save_pickle(ner_output_samples, e2e_test_file_path)

Example #6

Show file

def run_test(args):
    from main.common import progressbar
    from main.common import ner_utils
    from main.common.tools import save_pickle
    args.resume_path = args.model_path
    processor = BertProcessor(args.resume_path / 'vocab.txt',
                              args.do_lower_case)
    label_list = processor.get_labels()  # all labels
    label2id = {label: i for i, label in enumerate(label_list)}
    id2label = {i: label for i, label in enumerate(label_list)}
    model = BERTLSTMCRF
    model = model.from_pretrained(args.resume_path,
                                  label2id=label2id,
                                  device=args.device)
    tools.logger.info(f'loaded model from {args.resume_path}')
    model.to(args.device)
    max_seq_len = args.eval_max_seq_len

    test_data_path = base.config['data_dir'] / 'test/new_nihss_ner_test.pkl'
    test_data = processor.get_test(test_data_path)
    test_examples_cached_file = base.config[
        'data_dir'] / f'test/cached/cached_{args.data_name}_test_examples'
    test_examples = processor.create_examples(
        lines=test_data,
        example_type='test',
        cached_file=test_examples_cached_file)
    test_features_cached_file = base.config[
        'data_dir'] / f'test/cached/cached_{args.data_name}_test_features_{args.eval_max_seq_len}'
    test_features = processor.create_features(
        examples=test_examples,
        max_seq_len=args.eval_max_seq_len,
        cached_file=test_features_cached_file)

    test_dataset = processor.create_dataset(test_features)
    test_sampler = SequentialSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=args.eval_batch_size)

    tools.logger.info('****** Running Testing Model ******')
    tools.logger.info(' Num test examples = %d', len(test_examples))

    pbar = progressbar.ProgressBar(n_total=len(test_dataloader),
                                   desc='Testing')

    entity_score = common.ner_utils.SeqEntityScore(id2label)
    entity_score.reset()
    test_loss = tools.AverageMeter()

    for step, batch in enumerate(test_dataloader):
        batch = tuple(t.to(args.device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids, input_lens = batch
        input_lens = input_lens.cpu().detach().numpy().tolist()
        model.eval()
        with torch.no_grad():
            features, loss = model.forward_loss(input_ids, segment_ids,
                                                input_mask, label_ids,
                                                input_lens)
            tags, _ = model.crf._obtain_labels(features, id2label, input_lens)
        test_loss.update(val=loss.item(), n=input_ids.size(0))
        pbar(step=step, info={'loss': loss.item()})
        label_ids = label_ids.to('cpu').numpy().tolist()
        for i, label in enumerate(label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif label_ids[i][j] == label2id['[SEP]']:
                    entity_score.update(pred_paths=[temp_2],
                                        label_paths=[temp_1])
                    break
                else:
                    temp_1.append(id2label[label_ids[i][j]])
                    temp_2.append(tags[i][j])
    test_info, class_info = entity_score.result()
    info = {f'test_{key}': value for key, value in test_info.items()}
    info['test_loss'] = test_loss.avg
    if 'cuda' in str(args.device):
        torch.cuda.empty_cache()

    logs = dict(**info)
    show_info = f'Test: ' + " -".join(
        [f' {key}: {value:.4f}' for key, value in logs.items()])
    tools.logger.info(show_info)
    tools.logger.info("The entity scores of test data : ")

    result_path = base.config['result_dir'] / args.arch
    result_path.mkdir(exist_ok=True)
    result_file_path = result_path / f'{args.arch}_test_result_{str(datetime.date.today())}.txt'
    tools.logger.info(f'Saving test data to {result_file_path}')
    with open(str(result_file_path), 'a+') as f:
        content = show_info + '\n'
        f.write(content)

        for key, value in class_info.items():
            info = f'Entity: {key} \t' + "-\t".join(
                [f' {key_}: {value_:.4f} ' for key_, value_ in value.items()])
            tools.logger.info(info)
            f.write(info + '\n')
    f.close()