Beispiel #1
0
for key, value in tokens_pt2.items():
    print(f"{key}:\n\t{value}")

outputs2, pooled2 = model(**tokens_pt2)
print("Difference with previous code: ({}, {})".\
       format((outputs2 - outputs).sum(), (pooled2 - pooled).sum()))

# Single segment input
single_seg_input = tokenizer.encode_plus("This is a sample input.")

# Multiple segment input
multi_seg_input = tokenizer.encode_plus("This is segment A",
                                        "This is segment B")

print("Single segment token (str): {}".format(
    tokenizer.convert_ids_to_tokens(single_seg_input['input_ids'])))
print("Single segment token (int): {}".format(single_seg_input['input_ids']))
print("Single segment type       : {}".format(
    single_seg_input['token_type_ids']))

# Segments are concatened in the input to the model, with
print()
print("Multi segment token (str): {}".format(
    tokenizer.convert_ids_to_tokens(multi_seg_input['input_ids'])))
print("Multi segment token (int): {}".format(multi_seg_input['input_ids']))
print("Multi segment type       : {}".format(
    multi_seg_input['token_type_ids']))

# Padding highlight
tokens = tokenizer.batch_encode_plus(
    ["This is a sample", "This is another longer sample text"],