Beispiel #1
0
def read_net_info(file_path):
    f = open(file_path, 'r')
    lines = f.readlines()
    name_op_dict = {}
    raw_info = []
    op_list = []
    # Units name, input tensor shapes, output tensor shapes, parent idx, children idx
    for line in lines:
        com = line.split(" ")
        if len(com) < 3:
            utils.get_logger().warn(
                "net info file line length is not 3 {}".format(com))
            continue
        new_com = [c.strip() for c in com]
        raw_info.append(new_com)
    for com in raw_info:
        name = com[0]
        op = net_struct.Operator(name)
        if com[1] != 'none':
            input_tensors = com[1].split(';')
            for it in input_tensors:
                if it.strip() != '':
                    [shape, addr] = it.split('@')
                    op.input_tensors.append((addr, shape))
        if com[2] != 'none':
            output_tensors = com[2].split(';')
            for ot in output_tensors:
                if ot.strip() != '':
                    [shape, addr] = ot.split('@')
                    op.output_tensors.append((addr, shape))
        # if com[3] != 'none':
        #     parents = com[3].split(';')
        #     for p in parents:
        #         if p.strip() != '':
        #             parents_name = raw_info[int(p.strip())][0]
        #             op.parents.add(parents_name)
        # if com[4] != 'none':
        #     children = com[4].split(';')
        #     for c in children:
        #         if c.strip() != '':
        #             child_name = raw_info[int(c.strip())][0]
        #             op.children.add(child_name)
        op_list.append(op)

    op_list = net_struct.build_op_relationship(op_list)
    for op in op_list:
        name_op_dict[op.name] = op
        # for op_name in name_list:
        #     print(name_op_dict[op_name].input_tensors)
    return [op.name for op in op_list], name_op_dict
def prepare_dataset(inter_list: list,
                    dataset_type: str,
                    dataset_path: str):
    logger = get_logger('prepare_dataset')

    logger.info(dataset_type)
    start_t = time.time()

    dataset = pd.read_csv('{}/dataset_raw_{}.csv'.format(dataset_path, dataset_type), sep=';')

    logger.info('Dealing with missing values, outliers, categorical features...')

    # Профили
    dataset['age'] = dataset['age'].fillna(dataset['age'].median())
    dataset['gender'] = dataset['gender'].fillna(dataset['gender'].mode()[0])
    dataset.loc[~dataset['gender'].isin(['M', 'F']), 'gender'] = dataset['gender'].mode()[0]
    dataset['gender'] = dataset['gender'].map({'M': 1., 'F': 0.})
    dataset.loc[(dataset['age'] > 80) | (dataset['age'] < 7), 'age'] = round(dataset['age'].median())
    dataset.loc[dataset['days_between_fl_df'] < -1, 'days_between_fl_df'] = -1
    # Пинги
    for period in range(1, len(inter_list) + 1):
        col = 'avg_min_ping_{}'.format(period)
        dataset.loc[(dataset[col] < 0) |
                    (dataset[col].isnull()), col] = dataset.loc[dataset[col] >= 0][col].median()
    # Сессии и прочее
    dataset.fillna(0, inplace=True)
    dataset.to_csv('{}dataset_{}.csv'.format(dataset_path, dataset_type), sep=';', index=False)

    logger.info('Dataset is successfully prepared and saved to {}, run time (dealing with bad values): {}'. \
          format(dataset_path, time_format(time.time() - start_t)))
Beispiel #3
0
def main(_):
    config_file = os.path.join(FLAGS.output, 'config.json')
    log_file = os.path.join(FLAGS.output, 'model.log')

    config = load_config(config_file)
    config['init_checkpoint'] = FLAGS.init_checkpoint
    logger = get_logger(log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    map_file = os.path.join(FLAGS.output, 'maps.pkl')
    with open(map_file, "rb") as f:
        tag_to_id, id_to_tag = pickle.load(f)

    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model,
                             os.path.join(FLAGS.output, 'checkpoint'), config,
                             logger)
        text = "中国你好成都"
        result = model.evaluate_line(sess,
                                     input_from_line(text, FLAGS.max_seq_len,
                                                     tag_to_id),
                                     id_to_tag,
                                     export=True)
        print(result)
Beispiel #4
0
 def __init__(self, config):
     self.logger = get_logger()
     self.writer = get_tb_writer(config["Logging"]["tb_logdir"])
     self.num_classes = config["Dataset"]["num_classes"]
     self.batch_size = config["Train"]["batch_size"]
     self.epochs = config["Train"]["epochs"]
     self.input_shape = config["Train"]["image_size"]
     self.channels = config["Train"]["channels"]
     self.ckpt_dir = config["Logging"]["ckpt_dir"]
     self.eval_per_epoch = config["Train"]["eval_per_epoch"]
     self.device = get_device(config["Train"]["device"])
     self.model = load_model(config["Train"]["arch"], self.num_classes,
                             self.device, config["Train"]["pretrained"],
                             self.channels, self.logger)
     self.optimizer = self.get_optimizer(
         config["Train"]["optimizer"],
         config["Train"]["lr_scheduler"]["lr_init"])
     self.scheduler = self.get_scheduler(config["Train"]["lr_scheduler"])
     self.train_loader, self.val_loader, self.classes = \
         create_dataloaders(train_data_path=config["Dataset"]["train_data_path"],
                            val_data_path=config["Dataset"]["val_data_path"],
                            classes_path=config["Dataset"]["classes_path"],
                            img_size=self.input_shape,
                            batch_size=self.batch_size,
                            augment=config["Augmentation"],
                            logger=self.logger)
     self.criterion = nn.CrossEntropyLoss()
Beispiel #5
0
def main():
    config = get_config()
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True

    # logging to the file and stdout
    logger = get_logger(config.log_dir, config.exp_name)

    # fix random seed to reproduce results
    set_random_seed(config.random_seed)
    logger.info('Random seed: {:d}'.format(config.random_seed))
    logger.info(pprint.pformat(config))

    if config.method in ['src', 'jigsaw', 'rotate']:
        model = AuxModel(config, logger)
    elif config.method in ['cdan', 'cdan+e', 'dann']:
        model = CDANModel(config, logger)
    else:
        raise ValueError("Unknown method: %s" % config.method)

    # create data loaders
    src_loader, val_loader = get_train_val_dataloader(config.datasets.src)
    test_loader = get_test_dataloader(config.datasets.test)

    tar_loader = None
    if config.datasets.get('tar', None):
        tar_loader = get_target_dataloader(config.datasets.tar)

    # main loop
    if config.mode == 'train':
        model.train(src_loader, tar_loader, val_loader, test_loader)

    elif config.mode == 'test':
        model.test(test_loader)
def evaluate_one():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, BiLSTMModel, config, load_word2vec, id_to_char, logger)
        while True:
            try:
                line = input("请输入测试句子: ")
                if line == "exit":
                    exit(0)
                features = dict()
                if config["use_other_features"]:
                    for feature_i in config["features"]:
                        if feature_i == "0":
                            continue
                        features[feature_i] = input("请输入 feature_" + feature_i + " : ").split()
                result = model.evaluate_one(sess, input_from_line(line, features, char_to_id, feature_to_id),
                                            id_to_target)
                print(result)
            except Exception as e:
                print(e)
                logger.info(e)
def main():
    max_mIoU = 0.0
    ckpter = utils.Checkpointer(args)
    logger = utils.get_logger(args.working_dir)
    train_loader, test_loader = load_data(args)
    model = models.__dict__[args.arch](state_dict_dir=args.baseline_dir, n_classes=args.n_classes).to(device)
    if device == "cuda":
        model = nn.DataParallel(model)
        torch.backends.cudnn.benchmark = True
    print(model)
    print(model(torch.rand([2, 3, 512, 1024])).shape)
    optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=args.momentum, weight_decay=args.weight_decay)
    loss_fn = nn.CrossEntropyLoss(ignore_index=args.ignore_index)
    for epoch in range(args.init_epoch, args.epochs):
        train(train_loader, model, optimizer, loss_fn, epoch, logger)
        mIoU = test(test_loader, model, loss_fn, logger)
        is_best = max_mIoU < mIoU
        max_mIoU = max(max_mIoU, mIoU)

        model_state_dict = model.module.state_dict() if device == 'cuda' else model.state_dict()
        state_dict = {
            "epoch": epoch,
            "max_mIoU": max_mIoU,
            "model_state_dict": model_state_dict,
            "optimizer_state_dict": optimizer.state_dict(),
        }
        ckpter.save_model(state_dict, epoch, is_best)
        logger.info("Max mIoU: {:.3f}".format(max_mIoU))
        break
Beispiel #8
0
def train_main(cfg):
    MachineConfig(cfg["machine"])
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")

    if "name" not in cfg:
        cfg["name"] = "test" + run_id
    cfg['training']['log_path'] += cfg["name"]
    name = cfg['name']
    print('Start', name)

    expand_cfg_vars(cfg)

    logdir = cfg['training']['log_path']
    writer = SummaryWriter(log_dir=logdir, filename_suffix='.metrics')
    img_writer = SummaryWriter(log_dir=logdir, filename_suffix='.tensorboardimgs')

    print("RUNDIR: {}".format(logdir))
    with open(logdir + "/cfg.yml", 'w') as fp:
        yaml.dump(cfg, fp)

    logger = get_logger(logdir)
    logger.info("Let the games begin")

    trainer = Trainer(cfg, writer, img_writer, logger, os.path.join(name, str(run_id)))
    trainer.train()
    def __init__(self, cfg, net_arch, loss_f, rank=0):
        self.cfg = cfg
        self.device = self.cfg.device
        self.net = net_arch.to(self.device)
        self.rank = rank
        if self.device != "cpu" and self.cfg.dist.gpus != 0:
            self.net = DDP(self.net, device_ids=[self.rank])
        self.input = None
        self.GT = None
        self.step = 0
        self.epoch = -1
        self._logger = get_logger(cfg, os.path.basename(__file__))

        # init optimizer
        optimizer_mode = self.cfg.train.optimizer.mode
        if optimizer_mode == "adam":
            self.optimizer = torch.optim.Adam(
                self.net.parameters(),
                **(self.cfg.train.optimizer[optimizer_mode]))
        else:
            raise Exception("%s optimizer not supported" % optimizer_mode)

        # init loss
        self.loss_f = loss_f
        self.log = OmegaConf.create()
Beispiel #10
0
def main():
    args = get_args()
    config = process_config(args.config)

    # create the experiments dirs
    create_dirs([config.cache_dir, config.model_dir,
        config.log_dir, config.img_dir])

    # logging to the file and stdout
    logger = get_logger(config.log_dir, config.exp_name)
    
    # Intialize wandb model
    run = wandb.init(project="SemiSupervised", config=args)
    run.save()
    args.run_name = wandb.run.name

    # fix random seed to reproduce results
    random.seed(config.random_seed)
    logger.info('Random seed: {:d}'.format(config.random_seed))

    if config.method in ['src', 'jigsaw', 'rotate']:
        model = AuxModel(config, logger)
    else:
        raise ValueError("Unknown method: %s" % config.method)
    
    src_loader, val_loader = get_train_val_dataloader(config.datasets.src)
    # test_loader = get_test_dataloader(config.datasets.test)
    test_loader = None

    tar_loader = None
    if config.datasets.get('tar', None):
        tar_loader = get_target_dataloader(config.datasets.tar)

    if config.mode == 'train':
        model.train(src_loader, tar_loader, val_loader, test_loader)
Beispiel #11
0
def build_trainer(cfg, experiment_name, tensorboard_in_subdir=True):
    global is_first_trainer
    cfg = deepcopy(cfg)
    name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + cfg["general"]["tag"]
    run_id = experiment_name + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    base_log_dir = cfg['training']['log_path']
    log_dir = f"{base_log_dir}/{experiment_name}/"
    os.makedirs(log_dir, exist_ok=True)

    cfg["name"] = name
    cfg["training"]["log_path"] = log_dir
    cfg['training']['disable_depth_estimator'] = not is_first_trainer or cfg['training'].get('disable_depth_estimator', False)

    if tensorboard_in_subdir:
        writer = SummaryWriter(log_dir=log_dir, filename_suffix=f'{experiment_name}.metrics')
        img_writer = SummaryWriter(log_dir=log_dir, filename_suffix=f'{experiment_name}.tensorboardimgs')
    else:
        writer = SummaryWriter(log_dir=base_log_dir, filename_suffix=f'{experiment_name}.metrics')
        img_writer = SummaryWriter(log_dir=base_log_dir, filename_suffix=f'{experiment_name}.tensorboardimgs')
    logger = get_logger(log_dir)
    with open(log_dir + "/cfg.yml", 'w') as fp:
        yaml.dump(cfg, fp)

    is_first_trainer = False

    return Trainer(cfg, writer, img_writer, logger, os.path.join(name, str(run_id)))
Beispiel #12
0
def gather_model_profile(raw_info_file_path, data_trans_file_path, inference_latency_file_path, \
    CPU_thread_index, SCALE = 1.0, CPU_little_thread_index = None):
    data_trans_dict = read_data_trans(data_trans_file_path)
    op_name_list, latency_dict = read_latency(inference_latency_file_path, \
        CPU_thread_index, OP_LATENCY_SCALE = SCALE, CPU_little_thread_index=CPU_little_thread_index)
    op_name_list, name_op_dict = read_net_info(raw_info_file_path)
    net_def = net_struct.NetDef()
    utils.get_logger().info(data_trans_dict.keys())
    # Gather three file into name_op_dict
    for op_name in op_name_list:
        op = name_op_dict[op_name]
        op_latency = latency_dict[op_name]
        op_type = op_name.split('/')[-1]
        op_def = net_struct.OperatorDef()
        op_def.type = op_type
        # Set operator_latency transformation latency
        # The data transformation latency is the sum of all the input tensor transformation latency
        for (tensor_addr, tensor_shape) in op.input_tensors:
            # (TODO): measure the communication latency between CPU big and little cluster
            if CPU_little_thread_index != None:
                op_latency.input_data_trans_latency[tensor_addr] = [0, 0]
                op_latency.Transpose_latency_NCHW_to_NHWC = 0
                op_latency.Transpose_latency_NHWC_to_NCHW = 0
                break
            # (TODO): Add support for different layout convert(NC4HW4<->Image, NHWC<->Image)
            if (len(tensor_shape) >= 1):
                if tensor_shape in data_trans_dict.keys():
                    op_latency.Transpose_latency_NCHW_to_NHWC += data_trans_dict[
                        tensor_shape][0]
                    op_latency.Transpose_latency_NHWC_to_NCHW += data_trans_dict[
                        tensor_shape][1]
                    op_latency.input_data_trans_latency[
                        tensor_addr] = data_trans_dict[tensor_shape]
                    # utils.get_logger().info("{} {} {} {}".format(op_name, tensor_addr, tensor_shape, data_trans_dict[tensor_shape]))
                else:
                    op_latency.Transpose_latency_NCHW_to_NHWC = TRANSFORM_OVERHEAD
                    op_latency.Transpose_latency_NHWC_to_NCHW = TRANSFORM_OVERHEAD
                    op_latency.input_data_trans_latency[tensor_addr] = [
                        TRANSFORM_OVERHEAD, TRANSFORM_OVERHEAD
                    ]

        op_def.operator_latency = op_latency
        op.op_def = op_def
        name_op_dict[op_name] = op
        net_def.op.append(op)

    return op_name_list, name_op_dict, net_def
def main_train():
    # load data sets
    # sentences = [[(words11, tag11), ...], [(word21, tag21), ...], ...]
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower, FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    # 更新在train_sentences和test_sentences中
    update_tag_scheme(train_sentences, FLAGS.tag_schema)
    update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    # 创建或加载字符、词、特征、target的映射字典
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        # 若存在pre-trained embedding file,则同时使用pre-trained和训练集构建字典
        if FLAGS.pre_emb:
            # 统计train中字,返回字典
            dico_chars_train = char_mapping(train_sentences, FLAGS.lower)[0]
            # 使用pre-trained的字增大训练集字的字典
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(),
                FLAGS.emb_file,
                list(itertools.chain.from_iterable(
                    [[w[0] for w in s] for s in test_sentences])
                )
            )
        # 否则只是用训练集构建字典
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, target_to_id, id_to_target = tag_mapping(train_sentences)

        # 创建其他特征的映射字典,返回的三个都为dict
        _f, feature_to_id, id_to_feature = feature_mapping(train_sentences, FLAGS.features)

        # 存储字、target、feature的映射关系
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, target_to_id, id_to_target, feature_to_id, id_to_feature = pickle.load(f)

    # make path for store log and model config if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = create_config_model(FLAGS, char_to_id, target_to_id, feature_to_id)
    logger = get_logger(FLAGS.log_file)
    print_config(config, logger)

    train(config, train_sentences, dev_sentences, test_sentences, char_to_id, feature_to_id, target_to_id, id_to_char,
          id_to_target, logger)
Beispiel #14
0
def run(args):
    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    logger = get_logger(os.path.join(args.logdir, 'train_sgada.log'))
    logger.info(args)

    # data loaders
    dataset_root = os.environ["DATASETDIR"]
    source_train_loader = get_mscoco(dataset_root, args.batch_size, train=True)
    target_train_loader = get_flir(dataset_root, args.batch_size, train=True)
    target_val_loader = get_flir(dataset_root, args.batch_size, train=False)
    target_conf_train_loader = get_flir_from_list_wdomain(dataset_root, args.batch_size, train=True)

    args.classInfo = {'classes': torch.unique(torch.tensor(source_train_loader.dataset.targets)),
                    'classNames': source_train_loader.dataset.classes}

    logger.info('SGADA training')

    # train source CNN
    source_cnn = CNN(in_channels=args.in_channels).to(args.device)
    if os.path.isfile(args.trained):
        c = torch.load(args.trained)
        source_cnn.load_state_dict(c['model'])
        logger.info('Loaded `{}`'.format(args.trained))
    for param in source_cnn.parameters():
        param.requires_grad = False

    # train target CNN
    target_cnn = CNN(in_channels=args.in_channels, target=True, srcTrain=False).to(args.device)
    target_cnn.load_state_dict(source_cnn.state_dict())
    for param in target_cnn.classifier.parameters():
        param.requires_grad = False
    optimizer = optim.Adam(
        target_cnn.encoder.parameters(), 
        lr=args.lr, betas=args.betas, 
        weight_decay=args.weight_decay)

    discriminator = Discriminator(args=args).to(args.device)
    criterion = nn.CrossEntropyLoss()
    d_optimizer = optim.Adam(
        discriminator.parameters(),
        lr=args.d_lr, betas=args.betas, weight_decay=args.weight_decay)
    best_acc, best_class, classNames = train_target_cnnP_domain(
        source_cnn, target_cnn, discriminator,
        criterion, optimizer, d_optimizer,
        source_train_loader, target_conf_train_loader, target_val_loader,
        logger, args=args)
    bestClassWiseDict = {}
    for cls_idx, clss in enumerate(classNames):
        bestClassWiseDict[clss] = best_class[cls_idx].item()
    logger.info('Best acc.: {}'.format(best_acc))
    logger.info('Best acc. (Classwise):')
    logger.info(bestClassWiseDict)
    
    return best_acc, bestClassWiseDict
Beispiel #15
0
def test_model(cfg, model, test_loader, writer):
    logger = get_logger(cfg, os.path.basename(__file__))
    model.net.eval()
    total_test_loss = 0
    test_loop_len = 0
    total_test_accuracy = 0
    total_test = 0
    with torch.no_grad():
        for model_input, target in test_loader:
            model.feed_data(input=model_input, GT=target)
            output = model.run_network()
            loss_v = model.loss_f(output, model.GT)
            _, predicted = torch.max(output.data, 1)
            total_v = torch.tensor(int(target.size(0))).to('cuda')

            accuracy_v = torch.tensor(
                float(
                    (predicted == target.to('cuda')).sum().item())).to('cuda')

            # print(f"loss_v: {type(loss_v)}")
            # print(f"predicted: {type(accuracy_v)}")
            # print(f"target: {target.device}")
            # print(f"accuracy_v: {accuracy_v.device}")

            if cfg.dist.gpus > 0:
                # Aggregate loss_v from all GPUs. loss_v is set as the sum of all GPUs' loss_v.
                torch.distributed.all_reduce(loss_v)
                loss_v /= torch.tensor(float(cfg.dist.gpus))

                # Aggregate accuracy_v from all GPUs. loss_v is set as the sum of all GPUs' accuracy_v.
                torch.distributed.all_reduce(accuracy_v)
                torch.distributed.all_reduce(total_v)

            total_test += total_v.to("cpu").item()
            total_test_loss += loss_v.to("cpu").item()
            total_test_accuracy += accuracy_v.to("cpu").item()

            test_loop_len += 1
        # print(f"total_v = {total_test}")
        # print(f"accuracy_v = {total_test_accuracy}")
        total_test_loss /= test_loop_len
        total_test_accuracy /= total_test

        if writer is not None:
            writer.logging_with_step(total_test_accuracy, model.step,
                                     "test_accuracy")
            writer.logging_with_step(total_test_loss, model.step, "test_loss")
            writer.logging_with_epoch(total_test_accuracy, model.step,
                                      model.epoch,
                                      "total_test_accuracy_per_epoch")
            writer.logging_with_epoch(total_test_loss, model.step, model.epoch,
                                      "test_loss_per_epoch")
        if is_logging_process():
            logger.info("Test Loss %.04f at step %d" %
                        (total_test_loss, model.step))
Beispiel #16
0
def main() -> None:
    utils.get_logger("bot")

    # Terminate if the config failed to load.
    if not utils.load_config():
        return

    # Set logging level to debug if verbose is true.
    if g.config["verbose"]:
        g.log.setLevel(logging.DEBUG)
        g.log.handlers[0].setLevel(logging.DEBUG)

    try:
        g.exchanges = exchanges.get_exchanges()
        g.db = Database()
        image.init()
        Twitter(callback)
    except Exception as e:
        g.log.critical(f"{type(e).__name__}: {e}")
        g.log.critical(traceback.format_exc())
Beispiel #17
0
def evaluate_line():
    # 加载意图识别模型

    id_to_cat = get_id_to_cat('{}/categories.txt'.format(data_path))

    print(
        "==========================Loading the Intention Classification model....=========================="
    )
    model_1 = ImportGraph('{}/model_cnn'.format(model_path))
    model_2 = ImportGraph('{}/model_rnn'.format(model_path))
    print("Model loaded..")
    flag = 0

    # 加载命名实体识别模型
    print(
        "==========================Loading the NER model....=========================="
    )
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        # 循环识别

        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            # 获取测试句子
            text = input("请输入要进行识别的句子:")

            # intent 识别
            id_text = process_text(text, '{}/vocab.txt'.format(data_path))
            pred_1 = model_1.run(id_text, 1.0)
            pred_2 = model_2.run(id_text, 1.0)
            pred = pred_1 + pred_2
            res = id_to_cat[int(np.argmax(pred))]
            print(res)

            # NER 识别
            result = model.evaluate_line(sess,
                                         input_from_line(text, char_to_id),
                                         id_to_tag)
            print(result)
Beispiel #18
0
def get_ops_total_latency(op, name_op_dict):
    """Get the op's execution latency on CPU and GPU
  based on the device placement result of op's parents
  considering the communication latency
  """
    to_CPU_transpose_latency = 0.0
    to_GPU_transpose_latency = 0.0
    utils.get_logger().info(
        "op name: {}, input tensors:{}, data trans dict {}".format(
            op.name, op.input_tensors,
            op.op_def.operator_latency.input_data_trans_latency))
    for op_parent_name in op.parents:
        op_parent = name_op_dict[op_parent_name]
        for child_tensor_addr, child_tensor_shape in op.input_tensors:
            for parent_tensor_addr, parent_tensor_shape in op_parent.output_tensors:
                if child_tensor_addr == parent_tensor_addr:
                    utils.get_logger().info("{} {} {}".format(
                        op.name, op_parent.name, parent_tensor_shape))
                    if op_parent.op_def.device_type == net_struct.DeviceType.CPU:
                        to_GPU_transpose_latency += op.op_def.operator_latency.input_data_trans_latency[
                            child_tensor_addr][1]
                    elif op_parent.op_def.device_type == net_struct.DeviceType.GPU:
                        to_CPU_transpose_latency += op.op_def.operator_latency.input_data_trans_latency[
                            child_tensor_addr][0]
    utils.get_logger().info("{} {} {} {} {}".format(op.name, \
      op.op_def.operator_latency.CPU_latency, op.op_def.operator_latency.GPU_latency,\
       to_CPU_transpose_latency, to_GPU_transpose_latency))
    CPU_latency = op.op_def.operator_latency.CPU_latency + to_CPU_transpose_latency
    GPU_latency = op.op_def.operator_latency.GPU_latency + to_GPU_transpose_latency
    return CPU_latency, GPU_latency
Beispiel #19
0
def main():
    logger = utils.get_logger(__name__)
    texts, tags = read_wnut('../example_data/wnut17train.conll')
    #mlflow.set_tracking_uri(uri=settings.MLFLOW_URI)

    train_texts, test_texts, train_tags, test_tags = model_utils.train_test_split(texts, tags, test_size=.2)

    unique_tags = set(tag for doc in tags for tag in doc)
    tag2id = {tag: id for id, tag in enumerate(unique_tags)}
    id2tag = {id: tag for tag, id in tag2id.items()}

    params = bert.BertParams(unique_tags=unique_tags, tag2id=tag2id, id2tag=id2tag, epochs=100)
    trainer = bert.BertTokenTrainer(params=params, num_labels=len(unique_tags))

    trainer.train(train_texts, train_tags)
def train_model(cfg, model, train_loader, writer):
    logger = get_logger(cfg, os.path.basename(__file__))
    model.net.train()
    for model_input, model_target in train_loader:
        model.optimize_parameters(model_input, model_target)
        loss = model.log.loss_v
        model.step += 1

        if is_logging_process() and (loss > 1e8 or math.isnan(loss)):
            logger.error("Loss exploded to %.02f at step %d!" %
                         (loss, model.step))
            raise Exception("Loss exploded")

        if model.step % cfg.log.summary_interval == 0:
            if writer is not None:
                writer.logging_with_step(loss, model.step, "train_loss")
            if is_logging_process():
                logger.info("Train Loss %.04f at step %d" % (loss, model.step))
Beispiel #21
0
    def setup_method(self):
        # set log/checkpoint dir
        self.TEST_DIR = pathlib.Path(TEST_DIR)
        self.working_dir = self.TEST_DIR
        chkpt_dir = (self.TEST_DIR / "chkpt").resolve()
        os.makedirs(self.TEST_DIR, exist_ok=True)
        os.makedirs(chkpt_dir, exist_ok=True)

        # set cfg
        with initialize(config_path="../config"):
            self.cfg = compose(
                config_name="default", overrides=[f"working_dir={self.working_dir}"]
            )
        self.cfg.device = "cpu"
        self.cfg.log.chkpt_dir = str(chkpt_dir)
        self.cfg.log.use_wandb = False
        self.cfg.log.use_tensorboard = False

        # load job_logging_cfg
        project_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        hydra_conf = OmegaConf.load(
            os.path.join(project_root_path, "config/default.yaml")
        )
        job_logging_name = None
        for job_logging in hydra_conf.defaults:
            job_logging_name = job_logging.get("hydra/job_logging")
            if job_logging_name is not None:
                break
        job_logging_cfg_path = os.path.join(
            project_root_path,
            "config/hydra/job_logging",
            str(job_logging_name) + ".yaml",
        )
        if os.path.exists(job_logging_cfg_path):
            job_logging_cfg = OmegaConf.load(job_logging_cfg_path)
        else:
            job_logging_cfg = dict()
        with open_dict(self.cfg):
            self.cfg.job_logging_cfg = job_logging_cfg
        self.cfg.job_logging_cfg.handlers.file.filename = str(
            (self.working_dir / "trainer.log").resolve()
        )
        # set logger
        self.logger = get_logger(self.cfg, os.path.basename(__file__))
Beispiel #22
0
    def __init__(self, cfg, net_arch, loss_f, rank=0):
        self.cfg = cfg
        self.device = self.cfg.device
        self.net = net_arch.to(self.device)
        self.rank = rank
        if self.device != "cpu" and self.cfg.dist.gpus != 0:
            self.net = DDP(self.net, device_ids=[self.rank])
        self.input = None
        self.GT = None
        self.step = 0
        self.epoch = -1
        self._logger = get_logger(cfg, os.path.basename(__file__))

        self.optimizer = get_optimizer(cfg, self.net)
        self.scheduler = get_scheduler(cfg, self.optimizer)

        # init loss
        self.loss_f = loss_f
        self.log = OmegaConf.create()
def evaluate_line():
    config = load_config(args.config_file)
    logger = get_logger(args.log_file)
    # limit GPU memory 限制GPU的内存大小
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(args.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag, intent_to_id, id_to_intent = pickle.load(
            f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, args.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            try:
                line = input("请输入测试句子:")
                result = model.evaluate_line(sess,
                                             input_from_line(line, char_to_id),
                                             id_to_tag, id_to_intent)
                print(result)
            except Exception as e:
                logger.info(e)
def main():
    logger = get_logger('make_prediction')

    with open('{}/baseline_xgb.pcl'.format(settings.MODEL_PATH), 'rb') as f:
        model = pickle.load(f)

    X = pd.read_csv('{}dataset_test.csv'.format(settings.DATASET_PATH),
                    sep=';')
    X_mm = X.drop(['user_id'], axis=1)
    X_mm = MinMaxScaler().fit_transform(X_mm)

    predict_test = model.predict(X_mm)
    X_pred = pd.DataFrame(X['user_id'])
    X_pred['is_churned'] = predict_test

    X_pred.to_csv('{}dataset_pred.csv'.format(settings.PREDICTION_PATH),
                  sep=';',
                  index=False)

    logger.info('Prediction is successfully saved to {}'.format(
        settings.PREDICTION_PATH))
Beispiel #25
0
 def __init__(self, args, embeddings, tag2label, vocab, paths, config):
     self.batch_size = args.batch_size
     self.epoch_num = args.epoch
     self.hidden_dim = args.hidden_dim
     self.embeddings = embeddings
     self.CRF = args.CRF
     self.update_embedding = args.update_embedding
     self.dropout_keep_prob = args.dropout
     self.optimizer = args.optimizer
     self.lr = args.lr
     self.clip_grad = args.clip
     self.tag2label = tag2label
     self.num_tags = len(tag2label)
     self.vocab = vocab
     self.shuffle = args.shuffle
     self.model_path = paths['model_path']
     self.summary_path = paths['summary_path']
     self.logger = get_logger(paths['log_path'])
     self.result_path = paths['result_path']
     self.test_result_path = paths['test_result_path']
     self.config = config
def evaluate_test():
    config = load_config(args.config_file)
    logger = get_logger(args.log_file)

    with open(args.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag, intent_to_id, id_to_intent = pickle.load(
            f)

    test_sentences = load_sentences(args.test_file, args.lower, args.zeros)
    update_tag_scheme(test_sentences, args.tag_schema)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                intent_to_id, args.lower)
    test_manager = BatchManager(test_data, 100)

    # limit GPU memory 限制GPU的内存大小
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, args.ckpt_path, load_word2vec,
                             config, id_to_char, logger)

        evaluate(sess, model, "test", test_manager, id_to_tag, logger)
Beispiel #27
0
def main(wandb):

    # create the experiments dirs
    create_dirs([
        config.cache_dir, config.model_dir, config.log_dir,
        config.best_model_dir
    ])

    # logging to the file and stdout
    logger = get_logger(config.log_dir, config.dataset)

    # fix random seed to reproduce results
    logger.info('Random seed: {:d}'.format(config.random_seed))

    # model = get_model(config)
    model = AuxModel(config, logger, wandb)
    src_loader, tar_loader, val_loader, test_loader = get_loaders(config)
    if config.mode == 'train':
        model.train(src_loader, tar_loader, val_loader, None)
    elif config.mode == 'val':
        model.test(val_loader)
    elif config.mode == 'test':
        model.test(test_loader)
def test_model(cfg, model, test_loader, writer):
    logger = get_logger(cfg, os.path.basename(__file__))
    model.net.eval()
    total_test_loss = 0
    test_loop_len = 0
    with torch.no_grad():
        for model_input, model_target in test_loader:
            output = model.inference(model_input)
            loss_v = model.loss_f(output, model_target.to(cfg.device))
            if cfg.dist.gpus > 0:
                # Aggregate loss_v from all GPUs. loss_v is set as the sum of all GPUs' loss_v.
                torch.distributed.all_reduce(loss_v)
                loss_v /= torch.tensor(float(cfg.dist.gpus))
            total_test_loss += loss_v.to("cpu").item()
            test_loop_len += 1

        total_test_loss /= test_loop_len

        if writer is not None:
            writer.logging_with_step(total_test_loss, model.step, "test_loss")
        if is_logging_process():
            logger.info("Test Loss %.04f at step %d" %
                        (total_test_loss, model.step))
Beispiel #29
0
def evaluate_line_ner():
    config = load_config(FLAGS.config_file)
    logger = get_logger(FLAGS.log_file)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    with open(FLAGS.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, Model, FLAGS.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        while True:
            # try:
            #     line = input("请输入测试句子:")
            #     result = model.evaluate_line(sess, input_from_line(line, char_to_id), id_to_tag)
            #     print(result)
            # except Exception as e:
            #     logger.info(e)

            line = input("请输入测试句子:")
            result = model.evaluate_line(sess,
                                         input_from_line(line, char_to_id),
                                         id_to_tag)
            print(result)
Beispiel #30
0
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import argparse
import os
import xml.etree.ElementTree as ET

from jinja2 import Environment, FileSystemLoader

from utils import utils

logger = utils.get_logger('Summarize')


def parse_arguments():
    parser = argparse.ArgumentParser()

    xml_help = """
        Paths to xml summary files from layer tests.
        In case of entries intersection, results will
        be merged basing on timestamp - entry from latest
        report is be kept.
    """
    out_help = "Path where to save html report"
    report_tag = "Report tag"
    output_filename_help = "Output report filename"
    conformance_mode_help = "Allow to align test number"

    parser.add_argument("--xml", help=xml_help, nargs="*", required=True)
    parser.add_argument("--out", help=out_help, default="")
    parser.add_argument("--output_filename",
Beispiel #31
0
import os
from itertools import tee

from gensim.models import Word2Vec

from utils.utils import get_logger

_logger = get_logger(__name__)


def _train_model(tokenized_lines, params):
    params_str = '_w' + str(params['win_size']) + '_m' + str(params['min_w_num']) + '_v' + str(params['vect_size'])

    _logger.info('Word2Vec model will be trained now. It can take long, so relax and have fun')
    _logger.info('Parameters for training: %s' % params_str)

    tokenized_lines_for_voc, tokenized_lines_for_train = tee(tokenized_lines)

    model = Word2Vec(window=int(params['win_size']), min_count=int(params['min_w_num']), size=int(params['vect_size']),
                     workers=int(params['workers_num']))
    model.build_vocab(tokenized_lines_for_voc)
    model.train(tokenized_lines_for_train)

    return model


def _save_model(model, model_filename):
    _logger.info('Trained model will now be saved as %s for later use' % model_filename)
    model.save(model_filename)