Beispiel #1
0
    def main(self):
        self.textData = TextData(self.args)
        self.args['vocabularySize'] = self.textData.getVocabularySize()
        print(self.textData.getVocabularySize())
        self.model = Model(self.args)

        self.train()
Beispiel #2
0
 def set_up_things(self, args=None):
     self.args = {}
     self.args['rootDir'] = os.getcwd()  # Use the current working directory
     self.args['corpus'] = 'cornell'
     self.args['maxLength'] = 10
     self.args['hiddenSize'] = 256
     self.args['numLayers'] = 2
     self.args['embeddingSize'] = 32
     self.args['softmaxSamples'] = 0
     self.args['numEpochs'] = 50
     self.args['saveEvery'] = 5000
     self.args['batchSize'] = 10
     self.args['learningRate'] = 0.001
     self.args['reset'] = False
     test_yes = True
     self.args['interactive'] = test_yes
     self.args['test'] = test_yes
     self.loadModelParams(
     )  # Update the self.modelDir and self.globStep, for now, not used when loading Model (but need to be called before _getSummaryName)
     self.textData = TextData(self.args)
     self.model = Model(self.args, self.textData)
     self.writer = tf.train.SummaryWriter(self.modelDir)
     if '12' in tf.__version__:  # HACK: Solve new tf Saver V2 format
         self.saver = tf.train.Saver(max_to_keep=200,
                                     write_version=1)  # Arbitrary limit ?
     else:
         self.saver = tf.train.Saver(max_to_keep=200)
     self.sess = tf.Session()
     print('Initialize variables...')
     self.sess.run(tf.initialize_all_variables())
     self.managePreviousModel(self.sess)
Beispiel #3
0
 def __init__(self, encoder_hidden_units, input_embedding_size, bath_size):
     self.textData = TextData("train.tsv", "data", 100, "test_", 800)
     self.vocab_size = self.textData.getVocabularySize()
     self.input_embedding_size = input_embedding_size
     self.encoder_hidden_units = encoder_hidden_units
     self.batch_size = bath_size
     self.buildNetwork()
Beispiel #4
0
def main():
    print('Welcome to DeepQA v0.1 !')
    print()

    args = parseArgs()

    textData = TextData(args)

    pass
Beispiel #5
0
    def main(self):
        args['datasetsize'] = 'big'
        if args['model_arch'] in ['lstmgrid']:
            args['batchSize'] = 64
        elif args['model_arch'] in ['lstmibgan']:
            args['classify_type'] = 'single'
            args['batchSize'] = 64
        elif args['model_arch'] in ['lstmibgan_law']:
            args['classify_type'] = 'single'
            args['batchSize'] = 64
            args['task'] = 'law'
        elif args['model_arch'] in ['lstmibgan_toi']:
            args['classify_type'] = 'single'
            args['batchSize'] = 64
            args['task'] = 'toi'

        self.textData = TextData('cail')
        self.start_token = self.textData.word2index['START_TOKEN']
        self.end_token = self.textData.word2index['END_TOKEN']
        args['vocabularySize'] = self.textData.getVocabularySize()

        if args['model_arch'] in ['lstmibgan_law']:
            args['chargenum'] = self.textData.getLawNum()
        elif args['model_arch'] in ['lstmibgan_toi']:
            args['chargenum'] = 11
        else:
            args['chargenum'] = self.textData.getChargeNum()

        print(self.textData.getVocabularySize())

        if args['model_arch'] == 'lstm':
            print('Using LSTM model.')
            self.model = LSTM_Model(self.textData.word2index,
                                    self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmatt':
            print('Using LSTM attention model.')
            self.model = LSTM_att_Model(self.textData.word2index,
                                        self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'transformer':
            print('Using Transformer model.')
            self.model = TransformerModel(self.textData.word2index,
                                          self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmib':
            print('Using LSTM information bottleneck model.')
            self.model = LSTM_IB_Model(self.textData.word2index,
                                       self.textData.index2word)
            self.train()
        elif args['model_arch'].startswith('lstmibgan'):
            print('Using LSTM information bottleneck GAN model. Task: ' +
                  args['task'])
            LM = torch.load(args['rootDir'] + '/LM' + args['datasetsize'] +
                            '.pkl',
                            map_location=args['device'])
            for param in LM.parameters():
                param.requires_grad = False

            LSTM_IB_GAN.train(self.textData, LM)
        elif args['model_arch'] == 'lstmibcp':
            print('Using LSTM information bottleneck model. -- complete words')
            self.model = LSTM_IB_CP_Model(self.textData.word2index,
                                          self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmcapib':
            print('Using LSTM capsule information bottleneck model.')
            self.model = LSTM_capsule_IB_Model(self.textData.word2index,
                                               self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmiterib':
            print('Using LSTM iteratively information bottleneck model.')
            self.model = LSTM_iterIB_Model(self.textData.word2index,
                                           self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmcap':
            print('Using LSTM capsule model.')
            self.model = LSTM_capsule_Model(self.textData.word2index,
                                            self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmgrid':
            print('Using LSTM grid model.')
            self.model = LSTM_grid_Model(self.textData.word2index,
                                         self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmgmib':
            print('Using LSTM Gaussian Mixture IB model.')
            self.model = LSTM_GMIB_Model(self.textData.word2index,
                                         self.textData.index2word)
            self.model = self.model.to(args['device'])
            self.train()
    def main(self):
        args['batchSize'] = 32
        self.textData = TextData('cail')
        self.start_token = self.textData.word2index['START_TOKEN']
        self.end_token = self.textData.word2index['END_TOKEN']
        args['vocabularySize'] = self.textData.getVocabularySize()
        args['chargenum'] = self.textData.getChargeNum()
        print(self.textData.getVocabularySize())

        if args['model_arch'] == 'lstm':
            print('Using LSTM model.')
            self.model = LSTM_Model(self.textData.word2index,
                                    self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmatt':
            print('Using LSTM attention model.')
            self.model = LSTM_att_Model(self.textData.word2index,
                                        self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'transformer':
            print('Using Transformer model.')
            self.model = TransformerModel(self.textData.word2index,
                                          self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmib':
            print('Using LSTM information bottleneck model.')
            self.model = LSTM_IB_Model(self.textData.word2index,
                                       self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmibgan':
            print('Using LSTM information bottleneck GAN model.')
            LSTM_IB_GAN.train(self.textData)
        elif args['model_arch'] == 'lstmibcp':
            print('Using LSTM information bottleneck model. -- complete words')
            self.model = LSTM_IB_CP_Model(self.textData.word2index,
                                          self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmcapib':
            print('Using LSTM capsule information bottleneck model.')
            self.model = LSTM_capsule_IB_Model(self.textData.word2index,
                                               self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmiterib':
            print('Using LSTM iteratively information bottleneck model.')
            self.model = LSTM_iterIB_Model(self.textData.word2index,
                                           self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmcap':
            print('Using LSTM capsule model.')
            self.model = LSTM_capsule_Model(self.textData.word2index,
                                            self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmgrid':
            print('Using LSTM grid model.')
            self.model = LSTM_grid_Model(self.textData.word2index,
                                         self.textData.index2word)
            self.train()
        elif args['model_arch'] == 'lstmgmib':
            print('Using LSTM Gaussian Mixture IB model.')
            self.model = nn.parallel.DataParallel(
                LSTM_GMIB_Model(self.textData.word2index,
                                self.textData.index2word))
            self.train()
            args['device'] = "cuda:0" if torch.cuda.is_available() else "cpu"
            self.model.to(args['device'])
Beispiel #7
0
    def main(self, args=None):
        """
        Launch the training and/or the interactive mode
        """
        print('Welcome to DeepQA v0.1 !')
        print()
        print('TensorFlow detected: v{}'.format(tf.__version__))

        # General initialisation

        self.args = self.parseArgs(args)

        if not self.args.rootDir:
            self.args.rootDir = os.getcwd()  # Use the current working directory

        #tf.logging.set_verbosity(tf.logging.INFO) # DEBUG, INFO, WARN (default), ERROR, or FATAL

        self.loadModelParams()  # Update the self.modelDir and self.globStep, for now, not used when loading Model (but need to be called before _getSummaryName)

        self.textData = TextData(self.args)
        # TODO: Add a mode where we can force the input of the decoder // Try to visualize the predictions for
        # each word of the vocabulary / decoder input
        # TODO: For now, the model are trained for a specific dataset (because of the maxLength which define the
        # vocabulary). Add a compatibility mode which allow to launch a model trained on a different vocabulary (
        # remap the word2id/id2word variables).
        if self.args.createDataset:
            print('Dataset created! Thanks for using this program')
            return  # No need to go further

        # Prepare the model
        with tf.device(self.getDevice()):
            self.model = Model(self.args, self.textData)

        # Saver/summaries
        self.writer = tf.summary.FileWriter(self._getSummaryName())
        self.saver = tf.train.Saver(max_to_keep=200)

        # TODO: Fixed seed (WARNING: If dataset shuffling, make sure to do that after saving the
        # dataset, otherwise, all which cames after the shuffling won't be replicable when
        # reloading the dataset). How to restore the seed after loading ??
        # Also fix seed for random.shuffle (does it works globally for all files ?)

        # Running session
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,  # Allows backup device for non GPU-available operations (when forcing GPU)
            log_device_placement=False)  # Too verbose ?
        )  # TODO: Replace all sess by self.sess (not necessary a good idea) ?

        if self.args.debug:
            self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
            self.sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

        print('Initialize variables...')
        self.sess.run(tf.global_variables_initializer())

        # Reload the model eventually (if it exist.), on testing mode, the models are not loaded here (but in predictTestset)
        if self.args.test != Chatbot.TestMode.ALL:
            self.managePreviousModel(self.sess)

        # Initialize embeddings with pre-trained word2vec vectors
        if self.args.initEmbeddings:
            self.loadEmbedding(self.sess)

        if self.args.test:
            if self.args.test == Chatbot.TestMode.INTERACTIVE:
                self.mainTestInteractive(self.sess)
            elif self.args.test == Chatbot.TestMode.ALL:
                print('Start predicting...')
                self.predictTestset(self.sess)
                print('All predictions done')
            elif self.args.test == Chatbot.TestMode.DAEMON:
                print('Daemon mode, running in background...')
            else:
                raise RuntimeError('Unknown test mode: {}'.format(self.args.test))  # Should never happen
        else:
            self.mainTrain(self.sess)

        if self.args.test != Chatbot.TestMode.DAEMON:
            self.sess.close()
            print("The End! Thanks for using this program")