def __init__(self, embedder, encoding, bert_model = 'bert-base-chinese'): super(BertForWordSegmentation, self).__init__() self.embedder = embedder self.encoding = encoding self.tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case = False) self.model = cudaify(BertModel.from_pretrained(bert_model, output_hidden_states=True)) self.classifier = cudaify(DropoutClassifier(self.embedder.embedding_width(), self.encoding.domain_size()))
def train_lemma_classifiers(vectorize, lemmas, n_fold, max_sample_size, instance_db, cached, verbose=True): lemma_info_dict = defaultdict(tuple) for lemma in lemmas: print('Training classifier for: {}.'.format(lemma)) sense_hist = instance_db.sense_histogram(lemma) sense1 = sense_hist[0][1] sense2 = sense_hist[1][1] print(' ...sampling sense pairs.') data = instance_db.sample_sense_pairs(vectorize, max_sample_size // 2, lemma, sense1, sense2, n_fold) sum_acc = 0 fold_count = 0 for training_data, test_data in data: print(' ...training fold {}.'.format(fold_count + 1)) sum_acc += create_and_train_net( DropoutClassifier(2 * vectorize.dim(), 100, 2), training_data, test_data, verbose) fold_count += 1 avg_acc = sum_acc / fold_count print(" ...best epoch accuracy average = {:.2f}".format(avg_acc)) lemma_info_dict[lemma] = (avg_acc, sense1, sense2) return dict(lemma_info_dict)
def __init__(self, embedder): super(BertForWordSegmentation, self).__init__() self.embedder = embedder self.tokenizer = BertTokenizer.from_pretrained( 'bert-base-multilingual-cased', do_lower_case=False) self.model = cudaify( BertModel.from_pretrained('bert-base-multilingual-cased', output_hidden_states=True)) self.classifier = cudaify( DropoutClassifier(self.embedder.embedding_width(), 2))
def create_and_train_net(training_data, test_data): training_data = cudaify(training_data) test_data = cudaify(test_data) print("training size:", training_data.shape) print("testing size:", test_data.shape) classifier = cudaify(DropoutClassifier(1536, 2, 200)) return train_net(classifier, training_data, test_data, lambda x, y: tensor_batcher(x, y, False), batch_size=96, n_epochs=12, learning_rate=0.001, verbose=True)
def train_parser(train_csv, dev_csv): print('loading train') train = torch.tensor(pd.read_csv(train_csv).values).float() print('train size: {}'.format(train.shape[0])) print('loading dev') dev = torch.tensor(pd.read_csv(dev_csv).values).float() print('dev size: {}'.format(dev.shape[0])) classifier = DropoutClassifier(768 * 2, 200, 2) net = train_net(classifier, train, dev, tensor_batcher, batch_size=96, n_epochs=30, learning_rate=0.001, verbose=True) return net
def test_training(d, k): def nth_dim_positive_data(n, d, k): data = torch.randn(d, k) u = torch.cat([torch.clamp(torch.sign(data[2:3]), min=0), data]) return u.t() train = nth_dim_positive_data(2, d, k) dev = nth_dim_positive_data(2, d, 500) #test = nth_dim_positive_data(2, d, 500) classifier = DropoutClassifier(d, 100, 2) train_net(classifier, train, dev, tensor_batcher, batch_size=96, n_epochs=30, learning_rate=0.001, verbose=True)
def createAndTrainNN(file_name, trainingData, testData): if torch.cuda.is_available(): print("using gpu") cuda = torch.device('cuda:2') FloatTensor = torch.FloatTensor LongTensor = torch.LongTensor def cudaify(model): return model.cuda(cuda) else: print("using cpu") cuda = torch.device('cpu') FloatTensor = torch.FloatTensor LongTensor = torch.LongTensor def cudaify(model): return model #trainingData, testData = sampleFromFileTwoSenses(num_pairs, file_name, 0.8, senses) trainingData = cudaify(trainingData) testData = cudaify(testData) print("training size:", trainingData.shape) print("testing size:", testData.shape) print(file_name) classifier = cudaify(DropoutClassifier(1536, 100, 2)) train_net(classifier, trainingData, testData, tensor_batcher, batch_size=96, n_epochs=10, learning_rate=0.001, verbose=True)
def run(self): model = DropoutClassifier(self.generator.get_vocab(), self.num_choices, self.hidden_layer_size) model = nn.DataParallel(model) cudaify(model) return self.batch_train(model)