Example #1
0
 def _fit_model(self, train_reader, valid_reader):
     best_score = 1e5
     tqdm_trange = trange(0, self.n_epoch)
     for epoch in tqdm_trange:
         self._train_model(train_reader)
         self._valid_model(valid_reader)
         train_loss = self.acc['train'][-1]
         valid_loss = self.acc['valid'][-1]
         train_loss_str = '{:.3f}'.format(self.acc['train'][-1])
         valid_loss_str = '{:.3f}'.format(self.acc['valid'][-1])
         tqdm_trange.set_description(
             f'tr=>epoch={epoch} Valid Loss: {valid_loss_str}, Train Loss: {train_loss_str}'
         )
         unit = {
             'epoch': epoch,
             'state_dict': self.predictor.state_dict(),
             'score': valid_loss,
             'best_score': best_score,
             'optimizer': self.optimizer.state_dict()
         }
         if valid_loss < best_score:
             best_score = valid_loss
             unit['best_score'] = valid_loss
             self._save_checkpoint(unit, epoch, is_best=True)
         if epoch % self.n_epoch_saved == 0:
             self._save_checkpoint(unit, epoch, is_best=False)
         self._save_checkpoint(unit, -1, is_best=False)
Example #2
0
    def predict(self): 
        self.input_array /= 255
        for i in range(3):
            self.input_array[:,:,:,i] -= self.mean[i]
            self.input_array[:,:,:,i] /= self.std[i]
        if self.input_array.shape[0] <= self.batch_size :
            image_tensor = Variable(torch.from_numpy(self.input_array.copy().transpose((0,3, 1, 2))).float().cuda())
            output =  self.model(image_tensor) 
            output_predict = F.softmax(output)
            output_predict = output_predict.cuda().data.cpu().numpy()
        else:
            batch_count = self.input_array.shape[0]// self.batch_size
#            with tqdm(dataloaders[phase], desc = phase, file=sys.stdout, disable=not (self.verbose)) as iterator: 
            for i in trange(batch_count + 1,disable=not (self.verbose)):
                top = i * self.batch_size
                bottom = min(self.input_array.shape[0], (i+1) * self.batch_size)
                if top < self.input_array.shape[0]:
                    image_tensor = Variable(torch.from_numpy(self.input_array[top:bottom,:,:,:].copy().transpose((0,3, 1, 2))).float().cuda())
                    output =  self.model(image_tensor) 
                    output_predict_batch = F.softmax(output)
                    output_predict_batch = output_predict_batch.cuda().data.cpu().numpy()
                    if i ==0 :
                        output_predict = output_predict_batch.copy()
                    else:
                        output_predict = np.row_stack((output_predict,output_predict_batch.copy()))
                    del image_tensor,output_predict_batch,output
                    gc.collect()
        return output_predict
    def predict(self):
        self.input_array /= 255
        for i in range(3):
            self.input_array[:, :, :, i] -= self.mean[i]
            self.input_array[:, :, :, i] /= self.std[i]
        if self.input_array.shape[0] <= self.batch_size:
            output_predict = self.model.predict(self.input_array,
                                                batch_size=self.batch_size)

        else:
            batch_count = self.input_array.shape[0] // self.batch_size
            #            with tqdm(dataloaders[phase], desc = phase, file=sys.stdout, disable=not (self.verbose)) as iterator:
            for i in trange(batch_count + 1, disable=not (self.verbose)):
                top = i * self.batch_size
                bottom = min(self.input_array.shape[0],
                             (i + 1) * self.batch_size)
                if top < self.input_array.shape[0]:
                    output_predict_batch = self.model.predict(
                        self.input_array[top:bottom, :, :, :].copy(),
                        batch_size=self.batch_size)
                    if i == 0:
                        output_predict = output_predict_batch.copy()
                    else:
                        output_predict = np.row_stack(
                            (output_predict, output_predict_batch.copy()))
                    del image_tensor, output_predict_batch, output
                    gc.collect()
        return output_predict
Example #4
0
def evaluate_player(old_player,
                    new_player,
                    game_size=11,
                    each_times=40,
                    thresh_num=0.55,
                    competitive=False):
    newPlayerWin = 0
    #let new_player win in competitive
    if (competitive == True):
        game = HexGame(game_size)
        _, winner = comparePlay(new_player, old_player, game, competitive=True)
        if (winner == chess.blue):
            print('|competitive loss')
            return old_player, False
        game = HexGame(game_size)
        _, winner = comparePlay(old_player, new_player, game, competitive=True)
        if (winner == chess.red):
            print('|competitive loss')
            return old_player, False

    print('red evaluate: ')
    for i in trange(int(each_times)):
        game = HexGame(game_size)
        _, winner = comparePlay(new_player, old_player, game)
        if (winner == chess.red):
            newPlayerWin += 1

    print('blue evaluate: ')
    for i in trange(int(each_times)):
        game = HexGame(game_size)
        _, winner = comparePlay(old_player, new_player, game)
        if (winner == chess.blue):
            newPlayerWin += 1

    print('|victory_pros:  ' + str(newPlayerWin /
                                   (int(each_times) * 2) * 100) + '%')
    if (newPlayerWin / (int(each_times) * 2) >= thresh_num):
        winPlayer = new_player
        isBeat = True
    else:
        winPlayer = old_player
        isBeat = False

    return winPlayer, isBeat
Example #5
0
    def genGraph(self):
        idx = 0

        for x in trange(self.h):
            for y in range(self.w):
                for (i, j) in [(x, y + 1), (x + 1, y - 1), (x + 1, y),
                               (x + 1, y + 1)]:
                    if (0 <= i < self.h) and (0 <= j < self.w):
                        self.addEdge(idx, (x, y), (i, j))
                        idx += 1
 def Run(self):
     """execute function"""
     random.seed()
     self.Initialize(self.max_gen, self.env)
     for i in trange(self.max_gen):
         time.sleep(0.01)
         # logging.info("\n------This is the %d generation------" % i)
         random.shuffle(self.rat_list)
         self.Crossover_and_Mutation()
         # self.Select()
         self.Elite_select()
         self.rat_best.BestRatDisplay_log()
Example #7
0
    def segementRegion(self):
        """
        separating the region
        :return:
        """
        self.sortEdges()

        print("union regions...")
        for i in trange(self.G.edges_num):
            edge = self.G.edges[i, :]
            r_1 = self.regions.findRoot(edge[0])
            r_2 = self.regions.findRoot(edge[1])

            if r_1 != r_2:
                if edge[2] <= min(self.int_dif[r_1], self.int_dif[r_2]):
                    self.regions.union(r_1, r_2)
                    root_idx = self.regions.findRoot(r_1)
                    self.int_dif[root_idx] = edge[2] + self.tau(
                        self.regions.getSetSize(root_idx))
def data_get():
    n_class = 10
    data_name_list = os.listdir('/home/flr/Desktop/Dadou/')
    data_x = []
    y_list = []
    index = np.arange(7218)
    np.random.shuffle(index)
    for i in trange(len(data_name_list)):
        picture_data = cv2.imread(
            '/home/flr/Desktop/Dadou/' + data_name_list[i], 1)
        data_x.append(picture_data)
        y_list.append(data_name_list[i][0].split('-')[0])
    data_y = np_utils.to_categorical(y_list, 17)
    index_train = index[0:6000]
    index_test = index[6000:7218]
    data_x_train = np.array(data_x)[index_train, :, :, :]
    data_y_train = np.array(data_y)[index_train, :]
    data_x_test = np.array(data_x)[index_test, :, :, :]
    data_y_test = np.array(data_y)[index_test, :]
    return data_x_train, data_y_train, data_x_test, data_y_test, n_class
Example #9
0
def create_player(king_player,
                  base_player=None,
                  game_size=11,
                  total_times=1000,
                  reTrain=False,
                  train_only_win=False,
                  plot=False,
                  debug=False,
                  train_scale=9):
    train_data = []
    train_winner = []

    print("SelfPlay Progress:")
    for i in trange(total_times):
        #tqdm.set_description(f"SelfPlay Progress:")
        #print(i)
        if (debug == True):
            debug_str = 'self play progress : ' + str(i + 1) + '/' + str(
                total_times)
            print(debug_str)
        game = HexGame(game_size)
        new_data, new_winner = selfPlay(king_player, game)
        train_data += new_data
        train_winner.append(new_winner)

    if (train_only_win == True):
        train_data = __pickUp_data(train_data, new_winner)

    if (reTrain == True):
        new_model = __reTrain_model(train_data,
                                    game_size,
                                    plot=plot,
                                    train_scale=train_scale)
    else:
        new_model = __train_model(base_player.model,
                                  train_data,
                                  game_size,
                                  plot=plot,
                                  train_scale=train_scale)
    new_player = DeepLearningPlayer(new_model)
    return new_player
Example #10
0
def get_dissim_distribution(str_A, str_B):
    i = 0
    dissim_distribution = np.zeros((50, ))
    for i in trange(len(str_A)):
        for j in range(0, len(str_B)):
            result = Levenshtein.editops(
                str_A[i],
                str_B[j])  # catch the editting steps of two sequences

            record = []  # Get the different region
            for item in result:
                if item[0] == 'delete' or item[0] == 'replace' or (
                        item[0] == 'insert' and item[2] == 50):
                    record.append(item[1])
                    record = list(set(record))  #remove repeating location

            tmp_dissim_region = []  # Get the dissimilar region in the str_A
            for k in range(0, 50):
                if k in record:
                    tmp_dissim_region.append(k)
            for item in tmp_dissim_region:
                dissim_distribution[item] = dissim_distribution[item] + 1
        print(i)
    return dissim_distribution
    def get_exp_data(self,
                     sel_task='diagnose',
                     shuffle=True,
                     split_ratio=[0.64, 0.16, 0.2],
                     data_root='',
                     n_limit=-1):
        """
        Parameters

        ----------
             
        task : str, optional (default='phenotyping')
            name of current healthcare task
 
        shuffle : bool, optional (default=True) 
            determine whether shuffle data or not
            
        split_ratio : list, optional (default=[0.64,0.16,0.2])
            used for split whole data into train/valid/test
        
        data_root : str, (default='')
            use data in data_root
        
        n_limit : int, optional (default = -1)
            used for sample N-data not for all data, if n_limit==-1, use all data 
        """
        self.sel_task = sel_task

        if data_root == '':
            raise Exception('fill in correct data_root')

        all_list = []
        l_list = []
        episode_dir = os.path.join(data_root, 'x_data')
        feat_n, label_n = 0, 0
        label_seq = pd.read_csv(
            os.path.join(data_root, 'y_data', self.sel_task + '.csv')).values
        for row_id in trange(len(label_seq)):
            if n_limit > 0 and row_id > n_limit:
                break
            time.sleep(0.01)
            row = label_seq[row_id, :]
            concrete_path = os.path.join(episode_dir, row[0])
            if os.path.exists(concrete_path) is False:
                continue
            all_list.append([concrete_path] + row[1:].astype(float).tolist())
            label_n = len(row[1:])
        # shuffle the list
        if shuffle:
            random.shuffle(all_list)
        N = len(all_list)
        x_list = []
        y_list = []
        for item in all_list:
            x_list.append(item[0])
            y_list.append(np.array(item[1:]).astype(float))

        train_ratio = split_ratio[0]
        valid_ratio = split_ratio[1]

        training_x = x_list[:int(train_ratio * N)]
        validing_x = x_list[int(train_ratio *
                                N):int((train_ratio + valid_ratio) * N)]
        testing_x = x_list[int((train_ratio + valid_ratio) * N):]

        training_y = y_list[:int(train_ratio * N)]
        validing_y = y_list[int(train_ratio *
                                N):int((train_ratio + valid_ratio) * N)]
        testing_y = y_list[int((train_ratio + valid_ratio) * N):]

        if os.path.exists(self.expdata_dir) is False:
            os.makedirs(self.expdata_dir)

        pickle.dump(training_x,
                    open(os.path.join(self.expdata_dir, 'train_x.pkl'), 'wb'))
        pickle.dump(validing_x,
                    open(os.path.join(self.expdata_dir, 'valid_x.pkl'), 'wb'))
        pickle.dump(testing_x,
                    open(os.path.join(self.expdata_dir, 'test_x.pkl'), 'wb'))
        print('finished X generate')
        pickle.dump(training_y,
                    open(os.path.join(self.expdata_dir, 'train_y.pkl'), 'wb'))
        pickle.dump(validing_y,
                    open(os.path.join(self.expdata_dir, 'valid_y.pkl'), 'wb'))
        pickle.dump(testing_y,
                    open(os.path.join(self.expdata_dir, 'test_y.pkl'), 'wb'))
        print('finished Y generate')

        expdata_statistic = {
            'task': self.sel_task,
            'raio': split_ratio,
            'label_n': label_n,
            'len_train': len(training_x),
            'len_valid': len(validing_x),
            'len_test': len(testing_x)
        }
        pickle.dump(
            expdata_statistic,
            open(os.path.join(self.expdata_dir, 'expdata_statistic.pkl'),
                 'wb'))

        self.train = {'x': training_x, 'y': training_y, 'label_n': label_n}
        self.valid = {'x': validing_x, 'y': validing_y, 'label_n': label_n}
        self.test = {'x': testing_x, 'y': testing_y, 'label_n': label_n}

        print('generate finished')
        print('target Task:', expdata_statistic['task'])
        print('N of labels:', expdata_statistic['label_n'])
        print('N of TrainData:', expdata_statistic['len_train'])
        print('N of ValidData:', expdata_statistic['len_valid'])
        print('N of TestData:', expdata_statistic['len_test'])
Example #12
0
from tqdm import tqdm
import time
from tqdm._tqdm import trange

for i in tqdm(range(50)):
    time.sleep(0.1)
    pass

for j in trange(50):
    time.sleep(0.05)
Example #13
0
rel2idx = {relation:i for i,relation in enumerate(relations)}

with open(ROOT_PATH+"/data/ent2idx.csv","w+") as f:
    [print(str(k)+","+str(v),file=f) for k,v in ent2idx.items()]

with open(ROOT_PATH+"/data/rel2idx.csv","w+") as f:
    [print(str(k)+","+str(v),file=f) for k,v in rel2idx.items()]

# df = df.head(10)
heads = df["head"]
tails = df["tail"]
relations = df["relation"]

idx_df = pd.DataFrame(columns=["head","tail","relation"])

for i in trange(len(df)):
    # print(heads[i])
    heads[i] = ent2idx[heads[i]]
    # print(heads[i])
    tails[i] = ent2idx[tails[i]]
    relations[i] = rel2idx[relations[i]]

idx_df["head"] = heads
idx_df["tail"] = tails
idx_df["relation"] = relations
# idx_df = idx_df.join(heads)
# idx_df = idx_df.join(tails)
# idx_df = idx_df.join(relations)

idx_df.to_csv(ROOT_PATH+"/data/graph.csv",index=False)
Example #14
0
def test(tracker, cfg):
    def transform(data):
        dets = mot_reader.detection['bboxes'][data['index']]
        image = data['raw']

        if dets.size == 0:
            return image, dets, None, None

        if len(dets) > cfg.datasets.max_object:
            dets = dets[:cfg.datasets.max_object, :]
        input_dets = dets.copy()
        input_dets[:, 2:] += input_dets[:, :2]

        input_image = image.copy()
        input_image, _, input_dets, _, _ = augmentation(img_pre=input_image,
                                                        boxes_pre=input_dets)
        input_image = input_image.unsqueeze(0).to(cfg.solver.device[0])
        input_dets = input_dets.unsqueeze(0).to(cfg.solver.device[0])
        return image, dets, input_image, input_dets

    mot_reader = MOTReader(args.sequence_folder,
                           vis_thresh=cfg.datasets.min_visibility,
                           detection_thresh=cfg.datasets.detection_thresh)
    sequence_name = mot_reader.video_info['name']
    w, h = mot_reader.video_info['shape']
    print('=> Preparing DataLoader for {}...'.format(sequence_name))
    tracker.reset()
    augmentation = DANAugmentation(cfg, type='test')
    method = {'transform': transform}
    loader = DataLoader(os.path.join(args.sequence_folder, 'img1'),
                        max_size=20,
                        **method)
    loader.start()
    time.sleep(2)

    result_file = os.path.join(args.result_folder, sequence_name, '.txt')
    result_video = os.path.join(args.result_folder, sequence_name + '.avi')
    if args.save_video:
        video_writer = cv2.VideoWriter(
            result_video, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
            (w, h))

    print('=> Begin to Track {}...'.format(sequence_name))
    result = []
    for i in trange(len(mot_reader)):
        image, dets, input_image, input_dets = loader.getData()['output']
        if dets.size == 0:
            continue

        draw = tracker.update(image, dets, input_image, input_dets, i,
                              args.show_image)
        if args.show_image and not image_org is None:
            cv2.imshow(sequence_name, draw)
            cv2.waitKey(1)

        if args.save_video:
            video_writer.write(draw)

        for t in tracker.tracks:
            n = t.nodes[-1]
            if t.age == 1:
                b = n.get_box(tracker.frame_index - 1, tracker.recorder)
                result.append([i] + [t.id] + [b[0], b[1], b[2], b[3]] +
                              [-1, -1, -1, -1])

    np.savetxt(result_file, np.array(result).astype(int), fmt='%i')
    if args.evaluate and mot_reader.ground_truth is not None:
        mota, _, idf1, idsw = evaluation_mot(mot_reader.ground_truth, result)
        print('MOTA={:.4f}, IDF1={:.4f}, ID Sw.={}'.format(mota, idf1, idsw))

    loader.stop()
#     #
#     ############################################################################
#
#     return {
#         'src':      np.asarray( src ),
#         'dst':      np.asarray( dst ),
#         'linear':   np.array( < double [ :3, :2 ] > linear ),
#         'weights':  np.array( < double [ :n, :2 ] > W ),
#         'be':       be[0],
#     }
################################################################################

cylst = []
cy2lst = []

for n in trange(5, 100, 1, leave=True, nested=False):
    np.random.seed(1337)

    src = np.random.rand(n, 2) * 10
    dst = src + np.random.rand(n, 2) * 0.1

    tmp1 = []
    tmp2 = []

    for r in trange(0, 10, 1, nested=True):
        with Capturing():
            with Timer("Cy") as t1:
                gCy(src, dst)

            tmp1.append(t1.interval)
Example #16
0
    def train(self, load_model=False, model_path=None):
        if load_model:
            if model_path is not None:
                self.load_weights(model_path)
        ## Training utterances
        all_input_ids, all_input_len, all_label_ids = convert_examples_to_features(
            self.train_examples, self.label_list, args.max_seq_length, self.tokenizer, args.max_turn_length)

        print('all input ids size: ', all_input_ids.size())
        num_train_batches = all_input_ids.size(0)
        num_train_steps = int(
            num_train_batches / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)

        logger.info("***** training *****")
        logger.info("  Num examples = %d", len(self.train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_steps)

        all_input_ids, all_input_len, all_label_ids = all_input_ids.to(DEVICE), all_input_len.to(
            DEVICE), all_label_ids.to(DEVICE)

        train_data = TensorDataset(all_input_ids, all_input_len, all_label_ids)
        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

        all_input_ids_dev, all_input_len_dev, all_label_ids_dev = convert_examples_to_features(
            self.dev_examples, self.label_list, args.max_seq_length, self.tokenizer, args.max_turn_length)

        logger.info("***** validation *****")
        logger.info("  Num examples = %d", len(self.dev_examples))
        logger.info("  Batch size = %d", args.dev_batch_size)

        all_input_ids_dev, all_input_len_dev, all_label_ids_dev = \
            all_input_ids_dev.to(DEVICE), all_input_len_dev.to(DEVICE), all_label_ids_dev.to(DEVICE)

        dev_data = TensorDataset(all_input_ids_dev, all_input_len_dev, all_label_ids_dev)
        dev_sampler = SequentialSampler(dev_data)
        dev_dataloader = DataLoader(dev_data, sampler=dev_sampler, batch_size=args.dev_batch_size)

        logger.info("Loaded data!")

        if args.fp16:
            self.sumbt_model.half()
        self.sumbt_model.to(DEVICE)

        # ## Get domain-slot-type embeddings
        # slot_token_ids, slot_len = \
        #     get_label_embedding(self.processor.target_slot, args.max_label_length, self.tokenizer, DEVICE)

        # # for slot_idx, slot_str in zip(slot_token_ids, self.processor.target_slot):
        # #     self.idx2slot[slot_idx] = slot_str

        # ## Get slot-value embeddings
        # label_token_ids, label_len = [], []
        # for slot_idx, labels in zip(slot_token_ids, self.label_list):
        #     # self.idx2value[slot_idx] = {}
        #     token_ids, lens = get_label_embedding(labels, args.max_label_length, self.tokenizer, DEVICE)
        #     label_token_ids.append(token_ids)
        #     label_len.append(lens)
        #     # for label, token_id in zip(labels, token_ids):
        #     #     self.idx2value[slot_idx][token_id] = label

        # logger.info('embeddings prepared')

        # if USE_CUDA and N_GPU > 1:
        #     self.sumbt_model.module.initialize_slot_value_lookup(label_token_ids, slot_token_ids)
        # else:
        #     self.sumbt_model.initialize_slot_value_lookup(label_token_ids, slot_token_ids)

        def get_optimizer_grouped_parameters(model):
            param_optimizer = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
            no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [
                {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01,
                 'lr': args.learning_rate},
                {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0,
                 'lr': args.learning_rate},
            ]
            return optimizer_grouped_parameters

        if not USE_CUDA or N_GPU == 1:
            optimizer_grouped_parameters = get_optimizer_grouped_parameters(self.sumbt_model)
        else:
            optimizer_grouped_parameters = get_optimizer_grouped_parameters(self.sumbt_model.module)

        t_total = num_train_steps

        if args.fp16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=args.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if args.fp16_loss_scale == 0:
                optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
            else:
                optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.fp16_loss_scale)

        else:
            optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, correct_bias=False)
            scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_proportion*t_total, num_training_steps=t_total)
        logger.info(optimizer)

        # Training code
        ###############################################################################

        print(torch.cuda.memory_allocated())

        logger.info("Training...")

        global_step = 0
        last_update = None
        best_loss = None
        model = self.sumbt_model
        if not args.do_not_use_tensorboard:
            summary_writer = None
        else:
            summary_writer = SummaryWriter("./tensorboard_summary/logs_1214/")

        for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
            # Train
            model.train()
            tr_loss = 0
            nb_tr_examples = 0
            nb_tr_steps = 0

            for step, batch in enumerate(tqdm(train_dataloader)):
                batch = tuple(t.to(DEVICE) for t in batch)
                input_ids, input_len, label_ids = batch
                # print(input_ids.size())

                # Forward
                if N_GPU == 1:
                    loss, loss_slot, acc, acc_slot, _ = model(input_ids, input_len, label_ids, N_GPU)
                else:
                    loss, _, acc, acc_slot, _ = model(input_ids, input_len, label_ids, N_GPU)

                    # average to multi-gpus
                    loss = loss.mean()
                    acc = acc.mean()
                    acc_slot = acc_slot.mean(0)

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                # Backward
                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                # tensrboard logging
                if summary_writer is not None:
                    summary_writer.add_scalar("Epoch", epoch, global_step)
                    summary_writer.add_scalar("Train/Loss", loss, global_step)
                    summary_writer.add_scalar("Train/JointAcc", acc, global_step)
                    if N_GPU == 1:
                        for i, slot in enumerate(self.processor.target_slot):
                            summary_writer.add_scalar("Train/Loss_%s" % slot.replace(' ', '_'), loss_slot[i],
                                                      global_step)
                            summary_writer.add_scalar("Train/Acc_%s" % slot.replace(' ', '_'), acc_slot[i], global_step)

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    # modify lealrning rate with special warm up BERT uses
                    lr_this_step = args.learning_rate * warmup_linear(global_step / t_total, args.warmup_proportion)
                    if summary_writer is not None:
                        summary_writer.add_scalar("Train/LearningRate", lr_this_step, global_step)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                    if scheduler is not None:
                        torch.nn.utils.clip_grad_norm_(optimizer_grouped_parameters, 1.0)
                    optimizer.step()
                    if scheduler is not None:
                        scheduler.step()
                    optimizer.zero_grad()
                    global_step += 1


            # Perform evaluation on validation dataset
            model.eval()
            dev_loss = 0
            dev_acc = 0
            dev_loss_slot, dev_acc_slot = None, None
            nb_dev_examples, nb_dev_steps = 0, 0

            for step, batch in enumerate(tqdm(dev_dataloader, desc="Validation")):
                batch = tuple(t.to(DEVICE) for t in batch)
                input_ids, input_len, label_ids = batch
                if input_ids.dim() == 2:
                    input_ids = input_ids.unsqueeze(0)
                    input_len = input_len.unsqueeze(0)
                    label_ids = label_ids.unsuqeeze(0)

                with torch.no_grad():
                    if N_GPU == 1:
                        loss, loss_slot, acc, acc_slot, _ = model(input_ids, input_len, label_ids, N_GPU)
                    else:
                        loss, _, acc, acc_slot, _ = model(input_ids, input_len, label_ids, N_GPU)

                        # average to multi-gpus
                        loss = loss.mean()
                        acc = acc.mean()
                        acc_slot = acc_slot.mean(0)

                num_valid_turn = torch.sum(label_ids[:, :, 0].view(-1) > -1, 0).item()
                dev_loss += loss.item() * num_valid_turn
                dev_acc += acc.item() * num_valid_turn

                if N_GPU == 1:
                    if dev_loss_slot is None:
                        dev_loss_slot = [l * num_valid_turn for l in loss_slot]
                        dev_acc_slot = acc_slot * num_valid_turn
                    else:
                        for i, l in enumerate(loss_slot):
                            dev_loss_slot[i] = dev_loss_slot[i] + l * num_valid_turn
                        dev_acc_slot += acc_slot * num_valid_turn

                nb_dev_examples += num_valid_turn


            dev_loss = dev_loss / nb_dev_examples
            dev_acc = dev_acc / nb_dev_examples

            if N_GPU == 1:
                dev_acc_slot = dev_acc_slot / nb_dev_examples

            # tensorboard logging
            if summary_writer is not None:
                summary_writer.add_scalar("Validate/Loss", dev_loss, global_step)
                summary_writer.add_scalar("Validate/Acc", dev_acc, global_step)
                if N_GPU == 1:
                    for i, slot in enumerate(self.processor.target_slot):
                        summary_writer.add_scalar("Validate/Loss_%s" % slot.replace(' ', '_'),
                                                  dev_loss_slot[i] / nb_dev_examples, global_step)
                        summary_writer.add_scalar("Validate/Acc_%s" % slot.replace(' ', '_'), dev_acc_slot[i],
                                                  global_step)

            dev_loss = round(dev_loss, 6)

            output_model_file = os.path.join(os.path.join(SUMBT_PATH, args.output_dir), "pytorch_model.bin")

            if last_update is None or dev_loss < best_loss:
                last_update = epoch
                best_loss = dev_loss
                best_acc = dev_acc
                if not USE_CUDA or N_GPU == 1:
                    torch.save(model.state_dict(), output_model_file)
                else:
                    torch.save(model.module.state_dict(), output_model_file)

                logger.info(
                    "*** Model Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d ***" % (
                        last_update, best_loss, best_acc, global_step))
            else:
                logger.info(
                    "*** Model NOT Updated: Epoch=%d, Validation Loss=%.6f, Validation Acc=%.6f, global_step=%d  ***" % (
                        epoch, dev_loss, dev_acc, global_step))

            if last_update + args.patience <= epoch:
                break
Example #17
0
 def stat(self):
     # bar = tqdm(self.urls)
     for _ in trange(len(self.urls) // 20):
         time.sleep(0.01)
Example #18
0
 def modelToDB(self):
     # 读取文件
     if 'table' in self.flag or 'constraint' in self.flag or 'index' in self.flag:
         result = self.flag.split(',')
         for object in result:
             # 表对象
             if object == 'table':
                 with open(self.filedir_tab + os.sep + '%s2%s_model.sql' %
                           (self.dbtype, self.dbtype_tag),
                           'r',
                           encoding='utf-8') as f:
                     lines = f.readlines()
                 array_tab = ''.join(lines).split(';')
                 # 剔除空元素
                 while '' in array_tab:
                     array_tab.remove('')
                 if len(array_tab) > 0:
                     mdtool.log.info("表对象创建进度:")
                     for elem in trange(len(array_tab)):
                         elem_sql = array_tab[elem]
                         try:
                             self.dbtag_executor.sql_execute(elem_sql)
                             self.dbtag_executor.dbclose()
                         except Exception as err:
                             mdtool.log.error("失败的建表语句:%s;" % elem_sql)
                             mdtool.log.error("表对象创建失败:" + str(err))
                     f.close()
                 else:
                     mdtool.log.warning("模型生成器未生成表对象, 请检查是否正确")
                 mdtool.log.info("表对象创建完成")
             # 索引对象
             # 添加唯一性索引后可以再添加主键,故先执行索引对象
             elif object == 'index':
                 with open(
                         self.filedir_idx + os.sep +
                         '%s2%s_index_model.sql' %
                     (self.dbtype, self.dbtype_tag), 'r') as f:
                     lines = f.readlines()
                 array_idx = ''.join(lines).split(';')
                 while '' in array_idx:
                     array_idx.remove('')
                 if len(array_idx) > 0:
                     mdtool.log.info("索引对象创建进度:")
                     for elem in trange(len(array_idx)):
                         elem_sql = array_idx[elem]
                         try:
                             self.dbtag_executor.sql_execute(elem_sql)
                             self.dbtag_executor.dbclose()
                         except Exception as err:
                             mdtool.log.error("失败的索引语句:%s;" % elem_sql)
                             mdtool.log.error("索引对象创建失败:" + str(err))
                     f.close()
                 else:
                     mdtool.log.warning("模型生成器未生成索引对象, 请检查是否正确")
                 mdtool.log.info("索引对象创建完成")
             # 约束对象
             elif object == 'constraint':
                 # 主键 、 检查 、 唯一性
                 with open(
                         self.filedir_cst + os.sep +
                         '%s2%s_constraint_model.sql' %
                     (self.dbtype, self.dbtype_tag), 'r') as f:
                     lines = f.readlines()
                 array_cst = ''.join(lines).split(';')
                 while '' in array_cst:
                     array_cst.remove('')
                 if len(array_cst) > 0:
                     mdtool.log.info("约束对象创建进度:")
                     for elem in trange(len(array_cst)):
                         elem_sql = array_cst[elem]
                         try:
                             self.dbtag_executor.sql_execute(elem_sql)
                             self.dbtag_executor.dbclose()
                         except Exception as err:
                             mdtool.log.error("失败的约束语句:%s;" % elem_sql)
                             mdtool.log.error("约束对象创建失败:" + str(err))
                     f.close()
                 else:
                     mdtool.log.warning("模型生成器未生成约束对象, 请检查是否正确")
                 # 外键最后处理
                 with open(
                         self.filedir_cst + os.sep +
                         '%s2%s_constraint_fk_model.sql' %
                     (self.dbtype, self.dbtype_tag), 'r') as f:
                     lines = f.readlines()
                 array_fk = ''.join(lines).split(';')
                 while '' in array_fk:
                     array_fk.remove('')
                 if len(array_fk) > 0:
                     mdtool.log.info("约束-外键对象创建进度:")
                     for elem in trange(len(array_fk)):
                         elem_sql = array_fk[elem]
                         try:
                             self.dbtag_executor.sql_execute(elem_sql)
                             self.dbtag_executor.dbclose()
                         except Exception as err:
                             mdtool.log.error("失败的约束-外键语句:%s;" % elem_sql)
                             mdtool.log.error("约束-外键对象创建失败:" + str(err))
                     f.close()
                 else:
                     mdtool.log.warning("模型生成器未生成约束-外键对象, 请检查是否正确")
                 mdtool.log.info("约束(&外键)对象创建完成")