def main():
    today = datetime.date.today().strftime('%Y%m%d')
    num_log_data = len(glob.glob(f'logs/{today}_??'))
    log_dir = f'logs/{today}_{num_log_data:02}'

    if not os.path.exists(f'{log_dir}/models'):
        os.makedirs(f'{log_dir}/models')

    writer = SummaryWriter(log_dir=log_dir)
    cfg = Config(learning_condition_filepath)
    cfg.save_dir = log_dir

    train_dataset = SetEdgeContestData(cfg, split='train')
    val_dataset = SetEdgeContestData(cfg, split='val')

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=cfg.training['train_batch_size'],
                                  shuffle=True,
                                  num_workers=cfg.training['num_workers'])
    val_dataloader = DataLoader(val_dataset,
                                batch_size=cfg.training['val_batch_size'],
                                shuffle=False,
                                num_workers=cfg.training['num_workers'])

    trainer = Trainer(writer, cfg)
    trainer.fit(train_dataloader, val_dataloader)
    writer.close()
Example #2
0
 def main_process(self):
     """
     zsnapd-cfgtest main process
     """
     # Test configuration
     ds_settings = Config.read_ds_config()
     sys.exit(os.EX_OK)
Example #3
0
 def setUpClass(cls) -> None:
     super().setUpClass()
     cls.config = Config(*cls.getLocalEnvs())
     cls.owner = cls.config.owner
     cls.tx_handler = cls.config.tx_handler
     cls.multi_token = Score(cls.tx_handler,
                             deploy(cls.config, 'multi_token'))
     cls.receiver = Score(cls.tx_handler,
                          deploy(cls.config, 'token_receiver'))
Example #4
0
    def main_process(self):
        """
        Main process for zfssnapd
        """
        if (settings['rpdb2_wait']):
            # a wait to attach with rpdb2...
            log_info('Waiting for rpdb2 to attach.')
            time.sleep(float(settings['rpdb2_wait']))

        log_info('program starting.')
        log_debug("The daemon_canary is: '{0}'".format(
            settings['daemon_canary']))
        # Do a nice output message to the log
        pwnam = pwd.getpwnam(settings['run_as_user'])
        if setproctitle_support:
            gpt_output = getproctitle()
        else:
            gpt_output = "no getproctitle()"
        log_debug(
            "PID: {0} process name: '{1}' daemon: '{2}' User: '******' UID: {4} GID {5}"
            .format(os.getpid(), gpt_output, self.i_am_daemon(), pwnam.pw_name,
                    os.getuid(), os.getgid()))

        if (settings['memory_debug']):
            # Turn on memory debugging
            log_info('Turning on GC memory debugging.')
            gc.set_debug(gc.DEBUG_LEAK)

    # Create a Process object so that we can check in on ourself resource
    # wise
        self.proc_monitor = psutil.Process(pid=os.getpid())

        # Initialise  a few nice things for the loop
        debug_mark = get_boolean_setting('debug_mark')
        sleep_time = int(get_numeric_setting('sleep_time', float))
        debug_sleep_time = int(get_numeric_setting('debug_sleep_time', float))
        sleep_time = debug_sleep_time if debug() else sleep_time

        # Initialise Manager stuff
        ds_settings = Config.read_ds_config()

        # Process Main Loop
        while (self.check_signals()):

            try:
                Manager.run(ds_settings, sleep_time)
            except Exception as ex:
                log_error('Exception: {0}'.format(str(ex)))

            if debug_mark:
                log_debug(
                    "----MARK---- sleep({0}) seconds ----".format(sleep_time))
            self.main_sleep(sleep_time)

        log_info('Exited main loop - process terminating normally.')
        sys.exit(os.EX_OK)
Example #5
0
 def main_process(self):
     """
     zsnapd-trigger main process
     """
     self.check_if_root()
     # Read configuration
     ds_settings = Config.read_ds_config()
     # Process triggers
     if not (Manager.touch_trigger(ds_settings, settings['reachable_arg'],
                                   settings['do_trigger_arg'], *
                                   self.argv_left)):
         sys.exit(os.EX_CONFIG)
     sys.exit(os.EX_OK)
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    res = []  # 用来保存输出的路径结果(数字表示路径)
    print('loading corpus')
    config = Config()
    vocab = load_vocab(config.vocab)
    label_dic = load_vocab(config.label_file)
    tagset_size = len(label_dic)
    train_data = read_corpus(config.train_file,
                             max_length=config.max_length,
                             label_dic=label_dic,
                             vocab=vocab)
    dev_data = read_corpus(config.dev_file,
                           max_length=config.max_length,
                           label_dic=label_dic,
                           vocab=vocab)

    train_ids = torch.LongTensor([temp.input_id for temp in train_data])
    train_masks = torch.LongTensor([temp.input_mask for temp in train_data])
    train_tags = torch.LongTensor([temp.label_id for temp in train_data])

    train_dataset = TensorDataset(train_ids, train_masks, train_tags)
    train_loader = DataLoader(train_dataset,
                              shuffle=True,
                              batch_size=config.batch_size)

    dev_ids = torch.LongTensor([temp.input_id for temp in dev_data])
    dev_masks = torch.LongTensor([temp.input_mask for temp in dev_data])
    dev_tags = torch.LongTensor([temp.label_id for temp in dev_data])

    dev_dataset = TensorDataset(dev_ids, dev_masks, dev_tags)
    dev_loader = DataLoader(dev_dataset,
                            shuffle=True,
                            batch_size=config.batch_size)

    model = BERT_BiLSTM_CRF(tagset_size, config.bert_embedding,
                            config.rnn_hidden, config.rnn_layer,
                            config.dropout, config.pretrain_model_name,
                            device).to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.lr,
                                 weight_decay=config.weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.5,
                                                           patience=1)

    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot.
    epochs_count = []
    train_losses = []
    valid_losses = []

    # Continuing training from a checkpoint if one was given as argument.
    if config.checkpoint:
        checkpoint = torch.load(config.checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]

        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))

        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]

    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, start_estimator = valid(model, dev_loader)
    print(
        "\t* Validation loss before training: loss = {:.4f}, accuracy: {:.4f}%, recall: {:.4f}%, F1: {:.4f}%"
        .format(valid_loss, (start_estimator[0] * 100),
                (start_estimator[1] * 100), (start_estimator[2] * 100)))

    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=",
          "Training BERT_BiLSTM_CRF model on device: {}".format(device),
          20 * "=")

    patience_counter = 0
    for epoch in range(start_epoch, config.epochs + 1):
        epochs_count.append(epoch)

        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss = train(model, train_loader, optimizer,
                                       config.max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}".format(
            epoch_time, epoch_loss))

        epoch_time, valid_loss, valid_estimator = valid(model, dev_loader)
        valid_losses.append(valid_losses)
        print(
            "-> Valid time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%, recall: {:.4f}%, F1: {:.4f}%"
            .format(epoch_time, valid_loss, (valid_estimator[0] * 100),
                    (valid_estimator[1] * 100), (valid_estimator[2] * 100)))

        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(valid_estimator[0])

        # Early stopping on validation accuracy.  estimator[0]: 准确率
        if valid_estimator[0] < best_score:
            patience_counter += 1
        else:
            best_score = valid_estimator[0]
            patience_counter = 0
            # Save the best model. The optimizer is not saved to avoid having
            # a checkpoint file that is too heavy to be shared. To resume
            # training from the best model, use the 'esim_*.pth.tar'
            # checkpoints instead.
            torch.save(
                {
                    "epoch": epoch,
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses
                }, os.path.join(config.target_dir, "RoBERTa_best.pth.tar"))

        # Save the model at each epoch.
        torch.save(
            {
                "epoch": epoch,
                "model": model.state_dict(),
                "best_score": best_score,
                "optimizer": optimizer.state_dict(),
                "epochs_count": epochs_count,
                "train_losses": train_losses,
                "valid_losses": valid_losses
            },
            os.path.join(config.target_dir,
                         "RoBERTa_NER_{}.pth.tar".format(epoch)))

        if patience_counter >= config.patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
    # Plotting of the loss curves for the train and validation sets.
    plt.figure()
    plt.plot(epochs_count, train_losses, "-r")
    plt.plot(epochs_count, valid_losses, "-b")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.legend(["Training loss", "Validation loss"])
    plt.title("Cross entropy loss")
    plt.show()
    plt.savefig('../result/loss.png')
Example #7
0
 def deploy(args):
     config = Config(args.endpoint, args.keystore.name)
     deploy(config, args.contract, print)
Example #8
0
 def setUp(self) -> None:
     super().setUp()
     self.config = Config(*self.getLocalEnvs())
     self.owner = self.config.owner
     self.tx_handler = self.config.tx_handler
     self.score = Score(self.tx_handler, deploy(self.config, 'multi_token'))
Example #9
0
def predict(input_seq, max_length=128):
    '''
    :param input_seq: 输入一句话
    :return:
    '''
    config = Config()
    vocab = load_vocab(config.vocab)
    label_dic = load_vocab(config.label_file)
    tagset_size = len(label_dic)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = BERT_BiLSTM_CRF(tagset_size, config.bert_embedding,
                            config.rnn_hidden, config.rnn_layer,
                            config.dropout_ratio, config.dropout1,
                            config.pretrain_model_name, device).to(device)

    checkpoint = torch.load(config.checkpoint)
    model.load_state_dict(checkpoint["model"])

    # 构造输入
    input_list = []
    for i in range(len(input_seq)):
        input_list.append(input_seq[i])

    if len(input_list) > max_length - 2:
        input_list = input_list[0:(max_length - 2)]
    input_list = ['[CLS]'] + input_list + ['[SEP]']

    input_ids = [
        int(vocab[word]) if word in vocab else int(vocab['[UNK]'])
        for word in input_list
    ]
    input_mask = [1] * len(input_ids)

    if len(input_ids) < max_length:
        input_ids.extend([0] * (max_length - len(input_ids)))
        input_mask.extend([0] * (max_length - len(input_mask)))
    assert len(input_ids) == max_length
    assert len(input_mask) == max_length

    # 变为tensor并放到GPU上, 二维, 这里mask在CRF中必须为unit8类型或者bool类型
    input_ids = torch.LongTensor([input_ids]).to(device)
    input_mask = torch.ByteTensor([input_mask]).to(device)

    feats = model(input_ids, input_mask)
    # out_path是一条预测路径(数字列表), [1:-1]表示去掉一头一尾, <START>和<EOS>标志
    out_path = model.predict(feats, input_mask)[0][1:-1]
    res = find_all_tag(out_path)

    PER = []
    LOC = []
    ORG = []
    for name in res:
        if name == 1:
            for i in res[name]:
                PER.append(input_seq[i[0]:(i[0] + i[1])])
        if name == 2:
            for j in res[name]:
                LOC.append(input_seq[j[0]:(j[0] + j[1])])
        if name == 3:
            for k in res[name]:
                ORG.append(input_seq[k[0]:(k[0] + k[1])])

    # 输出结果
    print('预测结果:', '\n', 'PER:', PER, '\n', 'ORG:', ORG, '\n', 'LOC:', LOC)
Example #10
0
 def setUpClass(cls) -> None:
     super().setUpClass()
     config = Config(*cls.getLocalEnvs())
     cls.owner = config.owner
     cls.tx_handler = config.tx_handler
     cls.score = Score(cls.tx_handler, deploy(config, 'multi_token'))