def main(self): self.textData = TextData(self.args) self.args['vocabularySize'] = self.textData.getVocabularySize() print(self.textData.getVocabularySize()) self.model = Model(self.args) self.train()
def set_up_things(self, args=None): self.args = {} self.args['rootDir'] = os.getcwd() # Use the current working directory self.args['corpus'] = 'cornell' self.args['maxLength'] = 10 self.args['hiddenSize'] = 256 self.args['numLayers'] = 2 self.args['embeddingSize'] = 32 self.args['softmaxSamples'] = 0 self.args['numEpochs'] = 50 self.args['saveEvery'] = 5000 self.args['batchSize'] = 10 self.args['learningRate'] = 0.001 self.args['reset'] = False test_yes = True self.args['interactive'] = test_yes self.args['test'] = test_yes self.loadModelParams( ) # Update the self.modelDir and self.globStep, for now, not used when loading Model (but need to be called before _getSummaryName) self.textData = TextData(self.args) self.model = Model(self.args, self.textData) self.writer = tf.train.SummaryWriter(self.modelDir) if '12' in tf.__version__: # HACK: Solve new tf Saver V2 format self.saver = tf.train.Saver(max_to_keep=200, write_version=1) # Arbitrary limit ? else: self.saver = tf.train.Saver(max_to_keep=200) self.sess = tf.Session() print('Initialize variables...') self.sess.run(tf.initialize_all_variables()) self.managePreviousModel(self.sess)
def __init__(self, encoder_hidden_units, input_embedding_size, bath_size): self.textData = TextData("train.tsv", "data", 100, "test_", 800) self.vocab_size = self.textData.getVocabularySize() self.input_embedding_size = input_embedding_size self.encoder_hidden_units = encoder_hidden_units self.batch_size = bath_size self.buildNetwork()
def main(): print('Welcome to DeepQA v0.1 !') print() args = parseArgs() textData = TextData(args) pass
def main(self): args['datasetsize'] = 'big' if args['model_arch'] in ['lstmgrid']: args['batchSize'] = 64 elif args['model_arch'] in ['lstmibgan']: args['classify_type'] = 'single' args['batchSize'] = 64 elif args['model_arch'] in ['lstmibgan_law']: args['classify_type'] = 'single' args['batchSize'] = 64 args['task'] = 'law' elif args['model_arch'] in ['lstmibgan_toi']: args['classify_type'] = 'single' args['batchSize'] = 64 args['task'] = 'toi' self.textData = TextData('cail') self.start_token = self.textData.word2index['START_TOKEN'] self.end_token = self.textData.word2index['END_TOKEN'] args['vocabularySize'] = self.textData.getVocabularySize() if args['model_arch'] in ['lstmibgan_law']: args['chargenum'] = self.textData.getLawNum() elif args['model_arch'] in ['lstmibgan_toi']: args['chargenum'] = 11 else: args['chargenum'] = self.textData.getChargeNum() print(self.textData.getVocabularySize()) if args['model_arch'] == 'lstm': print('Using LSTM model.') self.model = LSTM_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmatt': print('Using LSTM attention model.') self.model = LSTM_att_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'transformer': print('Using Transformer model.') self.model = TransformerModel(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmib': print('Using LSTM information bottleneck model.') self.model = LSTM_IB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'].startswith('lstmibgan'): print('Using LSTM information bottleneck GAN model. Task: ' + args['task']) LM = torch.load(args['rootDir'] + '/LM' + args['datasetsize'] + '.pkl', map_location=args['device']) for param in LM.parameters(): param.requires_grad = False LSTM_IB_GAN.train(self.textData, LM) elif args['model_arch'] == 'lstmibcp': print('Using LSTM information bottleneck model. -- complete words') self.model = LSTM_IB_CP_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmcapib': print('Using LSTM capsule information bottleneck model.') self.model = LSTM_capsule_IB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmiterib': print('Using LSTM iteratively information bottleneck model.') self.model = LSTM_iterIB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmcap': print('Using LSTM capsule model.') self.model = LSTM_capsule_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmgrid': print('Using LSTM grid model.') self.model = LSTM_grid_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmgmib': print('Using LSTM Gaussian Mixture IB model.') self.model = LSTM_GMIB_Model(self.textData.word2index, self.textData.index2word) self.model = self.model.to(args['device']) self.train()
def main(self): args['batchSize'] = 32 self.textData = TextData('cail') self.start_token = self.textData.word2index['START_TOKEN'] self.end_token = self.textData.word2index['END_TOKEN'] args['vocabularySize'] = self.textData.getVocabularySize() args['chargenum'] = self.textData.getChargeNum() print(self.textData.getVocabularySize()) if args['model_arch'] == 'lstm': print('Using LSTM model.') self.model = LSTM_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmatt': print('Using LSTM attention model.') self.model = LSTM_att_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'transformer': print('Using Transformer model.') self.model = TransformerModel(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmib': print('Using LSTM information bottleneck model.') self.model = LSTM_IB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmibgan': print('Using LSTM information bottleneck GAN model.') LSTM_IB_GAN.train(self.textData) elif args['model_arch'] == 'lstmibcp': print('Using LSTM information bottleneck model. -- complete words') self.model = LSTM_IB_CP_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmcapib': print('Using LSTM capsule information bottleneck model.') self.model = LSTM_capsule_IB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmiterib': print('Using LSTM iteratively information bottleneck model.') self.model = LSTM_iterIB_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmcap': print('Using LSTM capsule model.') self.model = LSTM_capsule_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmgrid': print('Using LSTM grid model.') self.model = LSTM_grid_Model(self.textData.word2index, self.textData.index2word) self.train() elif args['model_arch'] == 'lstmgmib': print('Using LSTM Gaussian Mixture IB model.') self.model = nn.parallel.DataParallel( LSTM_GMIB_Model(self.textData.word2index, self.textData.index2word)) self.train() args['device'] = "cuda:0" if torch.cuda.is_available() else "cpu" self.model.to(args['device'])
def main(self, args=None): """ Launch the training and/or the interactive mode """ print('Welcome to DeepQA v0.1 !') print() print('TensorFlow detected: v{}'.format(tf.__version__)) # General initialisation self.args = self.parseArgs(args) if not self.args.rootDir: self.args.rootDir = os.getcwd() # Use the current working directory #tf.logging.set_verbosity(tf.logging.INFO) # DEBUG, INFO, WARN (default), ERROR, or FATAL self.loadModelParams() # Update the self.modelDir and self.globStep, for now, not used when loading Model (but need to be called before _getSummaryName) self.textData = TextData(self.args) # TODO: Add a mode where we can force the input of the decoder // Try to visualize the predictions for # each word of the vocabulary / decoder input # TODO: For now, the model are trained for a specific dataset (because of the maxLength which define the # vocabulary). Add a compatibility mode which allow to launch a model trained on a different vocabulary ( # remap the word2id/id2word variables). if self.args.createDataset: print('Dataset created! Thanks for using this program') return # No need to go further # Prepare the model with tf.device(self.getDevice()): self.model = Model(self.args, self.textData) # Saver/summaries self.writer = tf.summary.FileWriter(self._getSummaryName()) self.saver = tf.train.Saver(max_to_keep=200) # TODO: Fixed seed (WARNING: If dataset shuffling, make sure to do that after saving the # dataset, otherwise, all which cames after the shuffling won't be replicable when # reloading the dataset). How to restore the seed after loading ?? # Also fix seed for random.shuffle (does it works globally for all files ?) # Running session self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, # Allows backup device for non GPU-available operations (when forcing GPU) log_device_placement=False) # Too verbose ? ) # TODO: Replace all sess by self.sess (not necessary a good idea) ? if self.args.debug: self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) self.sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) print('Initialize variables...') self.sess.run(tf.global_variables_initializer()) # Reload the model eventually (if it exist.), on testing mode, the models are not loaded here (but in predictTestset) if self.args.test != Chatbot.TestMode.ALL: self.managePreviousModel(self.sess) # Initialize embeddings with pre-trained word2vec vectors if self.args.initEmbeddings: self.loadEmbedding(self.sess) if self.args.test: if self.args.test == Chatbot.TestMode.INTERACTIVE: self.mainTestInteractive(self.sess) elif self.args.test == Chatbot.TestMode.ALL: print('Start predicting...') self.predictTestset(self.sess) print('All predictions done') elif self.args.test == Chatbot.TestMode.DAEMON: print('Daemon mode, running in background...') else: raise RuntimeError('Unknown test mode: {}'.format(self.args.test)) # Should never happen else: self.mainTrain(self.sess) if self.args.test != Chatbot.TestMode.DAEMON: self.sess.close() print("The End! Thanks for using this program")