if gold_label == 'neutral': float_gold_label = 2 / 3.0 inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=float_gold_label) test_samples.append(inp_example) print(len(test_samples)) print('Primeiro: ') for t in train_samples: print(t) print(train_samples[0]) with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn: reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE) print(word_embedding_model.fc1(x).size()) import math from sentence_transformers import losses from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator train_dataloader = DataLoader(train_samples[:100000], shuffle=True, batch_size=train_batch_size) # train_dataloader = DataLoader(train_samples[:80000], shuffle=True, batch_size=train_batch_size) train_loss = losses.CosineSimilarityLoss(model=word_embedding_model) # Development set: Measure correlation between cosine score and gold labels logging.info("Read SNLI benchmark dev dataset")