Esempio n. 1
0
def hyperargs():  # type: () -> {}
    """
  Builds different sets of arguments for the classifier.  Must be the same for
  training and predicting.

  :return: the labeled arguments
  :rtype: {}
  """

    retdict = {}

    for curwindow in [128, 64, 32, 256]:
        for curstride in [0.7, 0.8, 0.9]:
            accargs = ClassificationArgs()
            accargs.num_train_epochs = 5
            accargs.fp16 = False
            accargs.overwrite_output_dir = True
            accargs.evaluate_during_training = False
            accargs.sliding_window = True
            accargs.max_seq_length = curwindow
            accargs.stride = curstride
            accargs.labels_list = [1, 0]
            accargs.save_eval_checkpoints = False
            accargs.save_model_every_epoch = False
            accargs.silent = True
            accargs.manual_seed = 18
            retdict['basic5epochs' + str(curwindow) + 'win' +
                    str(int(curstride * 10.0)) + 'stride'] = accargs

    return retdict
Esempio n. 2
0
def buildbertargs():  # type: () -> ClassificationArgs
    """
  Builds arguments for the classifier.  Must be the same for
  training and predicting.

  :return: the arguments
  :rtype: ClassificationArgs
  """

    accargs = ClassificationArgs()
    accargs.num_train_epochs = 5
    accargs.fp16 = False
    accargs.overwrite_output_dir = True
    accargs.evaluate_during_training = False
    accargs.sliding_window = True
    accargs.max_seq_length = 256
    accargs.stride = 0.9
    accargs.labels_list = [1, 0]
    accargs.save_model_every_epoch = False
    accargs.silent = True
    accargs.manual_seed = 18

    return accargs
]
train_df = pd.DataFrame(train_data)
train_df.columns = ["text", "labels"]

# Preparing eval data
eval_data = [
    ["Theoden was the king of Rohan", "true"],
    ["Merry was the king of Rohan", "false"],
]
eval_df = pd.DataFrame(eval_data)
eval_df.columns = ["text", "labels"]

model_args = ClassificationArgs()
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = True
model_args.manual_seed = 4
model_args.use_multiprocessing = True
model_args.train_batch_size = 16
model_args.eval_batch_size = 8
model_args.labels_list = ["true", "false"]
model_args.wandb_project = "Simple Sweep"


def train():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = ClassificationModel("roberta", "roberta-base", use_cuda=True, args=model_args, sweep_config=wandb.config,)