Python Classifier Examples

Programming Language: Python

Namespace/Package Name: finetune.run_classifier

Class/Type: Classifier

Examples at hotexamples.com: 3

Python Classifier - 3 examples found. These are the top rated real world Python examples of finetune.run_classifier.Classifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Classifier(3)

to(3)

eval(2)

train(1)

Frequently Used Methods

Classifier (3)

to (3)

eval (2)

train (1)

Example #1

Show file

def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    infer_opts(parser)

    parser.add_argument("--pooling",
                        choices=["mean", "max", "first", "last"],
                        default="first",
                        help="Pooling type.")

    parser.add_argument("--labels_num",
                        type=int,
                        required=True,
                        help="Number of prediction labels.")

    tokenizer_opts(parser)

    parser.add_argument("--output_logits",
                        action="store_true",
                        help="Write logits to output file.")
    parser.add_argument("--output_prob",
                        action="store_true",
                        help="Write probabilities to output file.")

    args = parser.parse_args()

    # Load the hyperparameters from the config file.
    args = load_hyperparam(args)

    # Build tokenizer.
    args.tokenizer = str2tokenizer[args.tokenizer](args)

    # Build classification model and load parameters.
    args.soft_targets, args.soft_alpha = False, False
    model = Classifier(args)
    model = load_model(model, args.load_model_path)

    # For simplicity, we use DataParallel wrapper to use multiple GPUs.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    if torch.cuda.device_count() > 1:
        print("{} GPUs are available. Let's use them.".format(
            torch.cuda.device_count()))
        model = torch.nn.DataParallel(model)

    dataset = read_dataset(args, args.test_path)

    src = torch.LongTensor([sample[0] for sample in dataset])
    seg = torch.LongTensor([sample[1] for sample in dataset])

    batch_size = args.batch_size
    instances_num = src.size()[0]

    print("The number of prediction instances: ", instances_num)

    model.eval()

    with open(args.prediction_path, mode="w", encoding="utf-8") as f:
        f.write("label")
        if args.output_logits:
            f.write("\t" + "logits")
        if args.output_prob:
            f.write("\t" + "prob")
        f.write("\n")
        for i, (src_batch,
                seg_batch) in enumerate(batch_loader(batch_size, src, seg)):
            src_batch = src_batch.to(device)
            seg_batch = seg_batch.to(device)
            with torch.no_grad():
                _, logits = model(src_batch, None, seg_batch)

            pred = torch.argmax(logits, dim=1)
            pred = pred.cpu().numpy().tolist()
            prob = nn.Softmax(dim=1)(logits)
            logits = logits.cpu().numpy().tolist()
            prob = prob.cpu().numpy().tolist()

            for j in range(len(pred)):
                f.write(str(pred[j]))
                if args.output_logits:
                    f.write("\t" + " ".join([str(v) for v in logits[j]]))
                if args.output_prob:
                    f.write("\t" + " ".join([str(v) for v in prob[j]]))
                f.write("\n")

Example #2

Show file

File: run_classifier_infer_cv.py Project: withchencheng/UER-py

def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Path options.
    parser.add_argument("--load_model_path",
                        default=None,
                        type=str,
                        help="Path of the classfier model.")
    parser.add_argument("--vocab_path",
                        default=None,
                        type=str,
                        help="Path of the vocabulary file.")
    parser.add_argument("--spm_model_path",
                        default=None,
                        type=str,
                        help="Path of the sentence piece model.")
    parser.add_argument("--test_path", type=str, help="Path of the testset.")
    parser.add_argument("--test_features_path",
                        default=None,
                        type=str,
                        help="Path of the test features for stacking.")
    parser.add_argument("--config_path",
                        default="models/bert/base_config.json",
                        type=str,
                        help="Path of the config file.")

    # Model options.
    model_opts(parser)
    parser.add_argument("--pooling",
                        choices=["mean", "max", "first", "last"],
                        default="first",
                        help="Pooling type.")

    # Inference options.
    parser.add_argument("--batch_size",
                        type=int,
                        default=64,
                        help="Batch size.")
    parser.add_argument("--seq_length",
                        type=int,
                        default=128,
                        help="Sequence length.")
    parser.add_argument("--labels_num",
                        type=int,
                        required=True,
                        help="Number of prediction labels.")

    # Tokenizer options.
    tokenizer_opts(parser)

    # Output options.
    parser.add_argument("--output_logits",
                        action="store_true",
                        help="Write logits to output file.")
    parser.add_argument("--output_prob",
                        action="store_true",
                        help="Write probabilities to output file.")

    # Cross validation options.
    parser.add_argument("--folds_num",
                        type=int,
                        default=5,
                        help="The number of folds for cross validation.")

    args = parser.parse_args()

    # Load the hyperparameters from the config file.
    args = load_hyperparam(args)

    # Build tokenizer.
    args.tokenizer = str2tokenizer[args.tokenizer](args)

    # Build classification model and load parameters.
    args.soft_targets, args.soft_alpha = False, False

    dataset = read_dataset(args, args.test_path)

    src = torch.LongTensor([sample[0] for sample in dataset])
    seg = torch.LongTensor([sample[1] for sample in dataset])

    batch_size = args.batch_size
    instances_num = src.size()[0]

    print("The number of prediction instances: ", instances_num)

    test_features = [[] for _ in range(args.folds_num)]
    for fold_id in range(args.folds_num):
        load_model_name = ".".join(args.load_model_path.split(".")[:-1])
        load_model_suffix = args.load_model_path.split(".")[-1]

        model = Classifier(args)
        model = load_model(
            model, load_model_name + "-fold_" + str(fold_id) + "." +
            load_model_suffix)

        # For simplicity, we use DataParallel wrapper to use multiple GPUs.
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        if torch.cuda.device_count() > 1:
            print("{} GPUs are available. Let's use them.".format(
                torch.cuda.device_count()))
            model = torch.nn.DataParallel(model)

        model.eval()
        for i, (src_batch,
                seg_batch) in enumerate(batch_loader(batch_size, src, seg)):
            src_batch = src_batch.to(device)
            seg_batch = seg_batch.to(device)
            with torch.no_grad():
                _, logits = model(src_batch, None, seg_batch)

            prob = nn.Softmax(dim=1)(logits)
            prob = prob.cpu().numpy().tolist()
            test_features[fold_id].extend(prob)

    test_features = np.array(test_features)
    test_features = np.mean(test_features, axis=0)
    np.save(args.test_features_path, test_features)

Example #3

Show file

def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    finetune_opts(parser)

    parser.add_argument("--pooling",
                        choices=["mean", "max", "first", "last"],
                        default="first",
                        help="Pooling type.")

    tokenizer_opts(parser)

    parser.add_argument("--soft_targets",
                        action='store_true',
                        help="Train model with logits.")
    parser.add_argument("--soft_alpha",
                        type=float,
                        default=0.5,
                        help="Weight of the soft targets loss.")

    args = parser.parse_args()

    # Load the hyperparameters from the config file.
    args = load_hyperparam(args)

    set_seed(args.seed)

    # Count the number of labels.
    args.labels_num = count_labels_num(args.train_path)

    # Build tokenizer.
    args.tokenizer = str2tokenizer[args.tokenizer](args)

    # Build classification model.
    model = Classifier(args)

    # Load or initialize parameters.
    load_or_initialize_parameters(args, model)

    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(args.device)

    # Training phase.
    trainset = read_dataset(args, args.train_path)
    random.shuffle(trainset)
    instances_num = len(trainset)
    batch_size = args.batch_size

    src = torch.LongTensor([example[0] for example in trainset])
    tgt = torch.LongTensor([example[1] for example in trainset])
    seg = torch.LongTensor([example[2] for example in trainset])

    args.train_steps = int(instances_num * args.epochs_num / batch_size) + 1

    print("Batch size: ", batch_size)
    print("The number of training instances:", instances_num)

    optimizer, scheduler = build_optimizer(args, model)

    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)
        args.amp = amp

    if torch.cuda.device_count() > 1:
        print("{} GPUs are available. Let's use them.".format(
            torch.cuda.device_count()))
        model = torch.nn.DataParallel(model)
    args.model = model

    total_loss, result, best_result = 0.0, 0.0, 0.0

    print("Start training.")

    for epoch in range(1, args.epochs_num + 1):
        model.train()
        for i, (src_batch, tgt_batch, seg_batch,
                _) in enumerate(batch_loader(batch_size, src, tgt, seg)):
            loss = train_model(args, model, optimizer, scheduler, src_batch,
                               tgt_batch, seg_batch)
            total_loss += loss.item()
            if (i + 1) % args.report_steps == 0:
                print("Epoch id: {}, Training steps: {}, Avg loss: {:.3f}".
                      format(epoch, i + 1, total_loss / args.report_steps))
                total_loss = 0.0

        result = evaluate(args, read_dataset(args, args.dev_path))
        if result > best_result:
            best_result = result
            save_model(model, args.output_model_path)

    # Evaluation phase.
    if args.test_path is not None:
        print("Test set evaluation.")
        if torch.cuda.device_count() > 1:
            args.model.module.load_state_dict(
                torch.load(args.output_model_path))
        else:
            args.model.load_state_dict(torch.load(args.output_model_path))
        evaluate(args, read_dataset(args, args.test_path))