print("Train Model")
    model = Model(binaryClassification=args["binaryClassification"],
                  model_str=tokenizer_model[1],
                  doLower=args["doLower"],
                  train_batchSize=args["train_batchSize"],
                  testval_batchSize=args["testval_batchSize"],
                  learningRate=args["learningRate"],
                  doLearningRateScheduler=args["doLearningRateScheduler"],
                  labelSentences=labelSentencesDict,
                  max_label_len=max_label_len,
                  device=device)
    model.run(train_data=train_df[data_column],
              train_target=train_df[args["targets"]],
              val_data=val_df[data_column],
              val_target=val_df[args["targets"]],
              test_data=test_df[data_column],
              test_target=test_df[args["targets"]],
              epochs=args["numEpochs"])

    wandb.log({'finished': True})

    run_infos = wandb_summarizer.download.get_results(wandb_project_name)
    names = []
    scores = []
    for run_info in run_infos:
        try:
            scores.append(run_info["end_test_macroAuc"])
            names.append(run_info["name"])
        except:
            pass
Beispiel #2
0
    max_label_len = max(
        [len(word_tokenize(x)) for x in labelSentencesDict.values()])

    print("Train Model")
    model = Model(args=tokenizer_model,
                  doLower=args["doLower"],
                  train_batchSize=args["train_batchSize"],
                  testval_batchSize=args["testval_batchSize"],
                  learningRate=args["learningRate"],
                  doLearningRateScheduler=args["doLearningRateScheduler"],
                  labelSentences=labelSentencesDict,
                  smartBatching=args["smartBatching"],
                  max_label_len=max_label_len,
                  device=device,
                  target_columns=args["targets"])

    # train and test the model
    model.run(train_data=train_data,
              train_target=train_target,
              val_data=val_data,
              val_target=val_target,
              test_data=test_data,
              test_target=test_target,
              epochs=args["numEpochs"])

    # close the logging
    wandb.log({'finished': True})

    # save the model
    #model.save(os.path.join(args["model_path"], "{}".format(wandb.run.name)))