def __init__(self, fold): self.bestEpoch = None self.maxF1 = 0 self.persistence = ModelPersistence() self.patternRegular = Config.Instance().resultsDir + '/fold_' + str( fold) + '/model_' self.patternEpoch = Config.Instance().resultsDir + '/fold_' + str( fold) + '/regular_' self.keepBestEpoch = 5 self.keepFixedPoint = 3 self.savedByBestEpoch = [] self.savedForFixedPoint = []
def readConfig(file='/srl-config.json'): config = Config.Instance() config.prepare(Utils.getWorkingDirectory()) modelConfig = ModelConfig.Instance() modelConfig.prepare(config.srlConfig + file) return config, modelConfig
def prepareEmbeddings(useWiki=False): config = Config.Instance() modelConfig = ModelConfig.Instance() tokens = extractAllTokens(config.convertedCorpusDir+'/propbank_full.csv') if useWiki: tokens.update(extractAllTokens(config.convertedCorpusDir+'/wiki.csv')) print '{} tokens found'.format(len(tokens)) predicates = extractAllTokens(config.convertedCorpusDir+'/propbank_full.csv', 'predicate') w2vFiles = { "npzFile":config.embeddingsDir+"/wordEmbeddings.npy", "npzModel":config.embeddingsDir+"/wordEmbeddings", "vecFile":__getVecFile(config.embeddingsDir, modelConfig.embeddingSize), "w2idxFile":config.embeddingsDir+"/vocabulary.json" } w2v = W2VModel() w2v.setResources(w2vFiles) loader = EmbeddingLoader(w2v) word2idx, idx2word, weights = loader.process() if modelConfig.embeddingType == 'w2v': return loader, loader sentHybridFiles = { "npzFile":config.embeddingsDir+"/sent_hybrid.npy", "npzModel":config.embeddingsDir+"/sent_hybrid", "w2idxFile":config.embeddingsDir+"/sent_hybrid.json" } sentHybrid = HybridModel() sentHybrid.setResources(sentHybridFiles) print 'creating sentence corpus' sentHybrid.generateCorpus(tokens, weights, word2idx) Hloader = EmbeddingLoader(sentHybrid) Hword2idx, Hidx2word, Hweights = Hloader.process() predHybridFiles = { "npzFile":config.embeddingsDir+"/pred_hybrid.npy", "npzModel":config.embeddingsDir+"/pred_hybrid", "w2idxFile":config.embeddingsDir+"/pred_hybrid.json" } predHybrid = HybridModel() predHybrid.setResources(predHybridFiles) print 'creating predicate corpus' predHybrid.generateCorpus(predicates, weights, word2idx) Ploader = EmbeddingLoader(predHybrid) Pword2idx, Pidx2word, Pweights = Ploader.process() return Hloader, Ploader
def deleteTrainingResources(k_folds=20): __delete(Config.Instance().resourceDir + '/feature_file.npy') for i in xrange(1, k_folds + 1): __delete(Config.Instance().resourceDir + '/feature_file_' + str(i) + '.npy') __delete(Config.Instance().resourceDir + '/embeddings/vocabulary.json') __delete(Config.Instance().resourceDir + '/embeddings/pred_hybrid.json') __delete(Config.Instance().resourceDir + '/embeddings/pred_hybrid.npy') __delete(Config.Instance().resourceDir + '/embeddings/sent_hybrid.json') __delete(Config.Instance().resourceDir + '/embeddings/sent_hybrid.npy') __delete(Config.Instance().resourceDir + '/embeddings/wordEmbeddings.npy')
def getEmbeddings(): config = Config.Instance() modelConfig = ModelConfig.Instance() if modelConfig.embeddingType == 'w2v': w2vFiles = { "npzFile":config.embeddingsDir+"/wordEmbeddings.npy", "npzModel":config.embeddingsDir+"/wordEmbeddings", "vecFile":__getVecFile(config.embeddingsDir, modelConfig.embeddingSize), "w2idxFile":config.embeddingsDir+"/vocabulary.json" } w2v = W2VModel() w2v.setResources(w2vFiles) loader = EmbeddingLoader(w2v) word2idx, idx2word, weights = loader.process() return loader, loader else: sentHybridFiles = { "npzFile":config.embeddingsDir+"/sent_hybrid.npy", "npzModel":config.embeddingsDir+"/sent_hybrid", "w2idxFile":config.embeddingsDir+"/sent_hybrid.json" } sentHybrid = HybridModel() sentHybrid.setResources(sentHybridFiles) Hloader = EmbeddingLoader(sentHybrid) Hword2idx, Hidx2word, Hweights = Hloader.process() predHybridFiles = { "npzFile":config.embeddingsDir+"/pred_hybrid.npy", "npzModel":config.embeddingsDir+"/pred_hybrid", "w2idxFile":config.embeddingsDir+"/pred_hybrid.json" } predHybrid = HybridModel() predHybrid.setResources(predHybridFiles) Ploader = EmbeddingLoader(predHybrid) Pword2idx, Pidx2word, Pweights = Ploader.process() return Hloader, Ploader
from model.configuration import Config from model.configuration.model_config import ModelConfig from utils.function_utils import Utils from utils import extractFeaturesFromSentence, toNNFormat from embeddings import getEmbeddings import pandas as pd print 'loading configuration' config = Config.Instance() config.prepare(Utils.getWorkingDirectory()) modelConfig = ModelConfig.Instance() modelConfig.prepare(config.srlConfig + '/srl-config.json') print 'configuration loaded' sentenceLoader, predicateLoader = getEmbeddings(config, modelConfig.embeddingType) wikiFile = pd.read_csv(config.convertedCorpusDir + '/wiki.csv') for i in xrange(0, len(wikiFile)): predicate = wikiFile['predicate'][i] sentence = wikiFile['sentence'][i] convertedSentence, convertedPredicate, allCaps, firstCaps, noCaps, context, distance = extractFeaturesFromSentence( sentence, predicate, sentenceLoader.word2idx, predicateLoader.word2idx) inputSentence, inputPredicate, inputAux = toNNFormat( convertedSentence, convertedPredicate, allCaps, firstCaps, noCaps, context, distance) print inputSentence.shape, inputPredicate.shape, inputAux.shape break
clr = CyclicLearningRate(base_lr=0.00020, max_lr=0.0012, step_size=(204. * 3), mode='exp_range', gamma=0.99996) msaver = ModelEvaluation() print 'prepared' print 'creating neural network model' file = str(sys.argv[1]) startingEpoch = int(sys.argv[2]) lrDefault = float(sys.argv[3]) print 'model to load : {} - epoch : {} - lr : {}'.format( file, startingEpoch, lrDefault) model = LSTMModel(ModelConfig.Instance()) nn = model.load(Config.Instance().resultsDir + '/' + file + '.json', Config.Instance().resultsDir + '/' + file + '.h5py') nn.summary() lrReducer.setNetwork(nn) es = EarlyStopper() lrReducer.setLearningRate(lrDefault) print 'model loaded' print 'start training' number_of_epochs = ModelConfig.Instance().trainingEpochs for epoch in xrange(startingEpoch, number_of_epochs): print "--------- Epoch %d -----------" % (epoch) start_time = time.time() numIterations = len(container)
batcher = Batcher() batcher.addAll(trainingData[0], trainingData[1], trainingData[2], trainingData[3]) container = batcher.getBatches() inference = SRLInference(tagMap, tagList) evaluator = Evaluator(testData, inference, nnUtils, config.resultsDir + '/finalResult.json') lrReducer = RateBasedLrReducer(modelConfig.trainingEpochs) msaver = ModelEvaluation() print 'prepared' print 'creating neural network model' mp = ModelPersistence() nn = mp.load( Config.Instance().resultsDir + '/model_50.json', Config.Instance().resultsDir + '/model_50.h5py', ) nn.compile(optimizer=modelConfig.optimizer, loss=modelConfig.lossFunction, metrics=['accuracy']) nn.summary() lrReducer.setNetwork(nn) print 'model loaded' print 'start training' number_of_epochs = ModelConfig.Instance().trainingEpochs for epoch in xrange(50, 50 + number_of_epochs): print "--------- Epoch %d -----------" % (epoch + 1) start_time = time.time()