Ejemplo n.º 1
0
        accr = model_eval(test_df, model, istransfer=True)
        if accr > high_acc:
            high_acc = accr
            best_model = model.state_dict()
            #            torch.save(model.state_dict(), 'maml/transfer')
            print('model is saved')

        writer.add_scalar("Loss/Train", total_loss / total_count, epoch + 1)
        writer.add_scalar("LearningRate/Train",
                          scheduler.get_last_lr()[0], epoch + 1)

        print("[Epoch {}/{}] Train Loss: {:.4f}, Learning Rate: {:.7f}".format(
            epoch + 1,
            epochs,
            total_loss / total_count,
            scheduler.get_last_lr()[0],
        ))

    torch.save(best_model, './reptile')
    #    compress_object('reptile.zip', './reptile')
    compress_object(args.transfer, './reptile')

    try:
        save_object(client, args.bucket, args.transfer)
    except:
        print("model  save error to minio")

    metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]}
    with open("/opt/mlpipeline-ui-metadata.json", "w") as fd:
        json.dump(metadata, fd)
Ejemplo n.º 2
0
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=args.warmupsteps,
            num_training_steps=len(dataset) * args.epochs,
        )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=dataset,
        prediction_loss_only=True,
        optimizers=(optimizer, scheduler),
    )

    trainer.train()

    trainer.save_model("./pretrained")

    compress_object(args.pretrained, "./pretrained")

    try:
        save_object(client, args.bucket, args.pretrained)
    except:
        print("*****************model save error to minio*******************")
        #pass

    metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]}
    with open("/opt/mlpipeline-ui-metadata.json", "w") as fd:
        json.dump(metadata, fd)
Ejemplo n.º 3
0
    parser.add_argument("-K", "--secretkey", help="secret key")
    args = parser.parse_args()

    try:
        client = connect_server(args.host, args.accesskey, args.secretkey)
        load_object(client, args.bucket, args.corpusdata)
#    except Exception as e:
#	print('error', e)
    except :
        pass

    os.makedirs("./pretrained", exist_ok=True)

    paths = [str(x) for x in Path(".").glob("**/{}".format(args.corpusdata))]

    tokenizer = ByteLevelBPETokenizer()
    tokenizer.train(
        files=paths,
        vocab_size=args.vocabsize,
        min_frequency=50,
        special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>"],
    )
    tokenizer.save_model("./pretrained")

    compress_object(args.tokenizer, "./pretrained")

    try:
        save_object(client, args.bucket, args.tokenizer)
    except:
        pass
Ejemplo n.º 4
0
                param.requires_grad = True
    
        downstream_training(epochs=20, learning_rate=LEARN_RATE/2, denum=4)
    
        #unfreezing the classifier except model for fine tuning
        print("**********************freezing model**************************")
        for param, state in zip(model.parameters(), model.state_dict()) :
            if 'fc.' in state :
                param.requires_grad = False
            else :
                param.requires_grad = True
            
        downstream_training(epochs=10, learning_rate=LEARN_RATE, denum=4)

    for param, state in zip(model.parameters(), model.state_dict()) :
        param.requires_grad = True

    torch.save(model.state_dict(), './contra-downstream')
    compress_object('contrastive.zip', './contra-downstream')

    try:
        save_object(client, args.bucket, 'contrastive.zip')
    except:
        print("model  save error to minio") 


    metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]}
    with open("/opt/mlpipeline-ui-metadata.json", "w") as fd:
        json.dump(metadata, fd)

Ejemplo n.º 5
0
            tokenizer.encode(t, max_length=512, truncation=True) for t in text
        ]
        padded_list = [
            e[:512] + [0] * (512 - len(e[:512])) for e in encoded_list
        ]
        sample = torch.tensor(padded_list)
        sample, label = sample.to(device), label.to(device)
        labels = torch.tensor(label)
        outputs = model(sample, labels=labels)
        _, logits = outputs

        pred = torch.argmax(F.softmax(logits), dim=1)
        correct = pred.eq(labels)
        total_correct += correct.sum().item()
        total_len += len(labels)

    print("Test accuracy: ", total_correct / total_len)

    model.save_pretrained("./pretrained")

    compress_object(args.downstream, "./pretrained")

    try:
        save_object(client, args.bucket, args.downstream)
    except:
        pass

    metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]}
    with open("/opt/mlpipeline-ui-metadata.json", "w") as fd:
        json.dump(metadata, fd)