Beispiel #1
0
    def loadCorpus(self, dirName):
        """Load/create the conversations data
        Args:
            dirName (str): The directory where to load/save the model
        """
        datasetExist = False
        if os.path.exists(os.path.join(dirName, self.samplesName)):
            datasetExist = True

        if not datasetExist:  # First time we load the database: creating all files
            print('Training samples not found. Creating dataset...')
            # Corpus creation
            if self.args.corpus == 'cornell':
                cornellData = CornellData(self.corpusDir)
                self.createCorpus(cornellData.getConversations())
            elif self.args.corpus == 'opensubs':
                opensubsData = OpensubsData(self.corpusDir)
                self.createCorpus(opensubsData.getConversations())
            elif self.args.corpus == 'fbdata':
                fbData = FBData(self.corpusDir)
                self.createCorpus(fbData.getConversations())

            # Saving
            print('Saving dataset...')
            self.saveDataset(dirName)  # Saving tf samples
        else:
            print('Loading dataset from {}...'.format(dirName))
            self.loadDataset(dirName)

        assert self.padToken == 0
Beispiel #2
0
    def loadCorpus(self, dirName):
        """Load/create the conversations data
        Args:
            dirName (str): The directory where to load/save the model
        """
        datasetExist = False
        if os.path.exists(os.path.join(dirName, self.samplesName)):
            datasetExist = True

        if not datasetExist:  # First time we load the database: creating all files
            print('Training samples not found. Creating dataset...')
            # Corpus creation
            if self.args.corpus == 'cornell':
                cornellData = CornellData(self.corpusDir)
                self.createCorpus(cornellData.getConversations())
            elif self.args.corpus == 'nutrition':
                mealData = MealData(self.corpusDir)

                if self.args.encode_food_descrips:
                    self.createCorpus(
                        zip(mealData.getFoodDescrips(), mealData.getMeals()))
                elif self.args.encode_single_food_descrip:
                    self.createCorpus(
                        zip(mealData.getSingleFoodDescrips(),
                            mealData.getAlignments()))
                elif self.args.encode_food_ids:
                    self.createCorpus(
                        zip(mealData.getFoodIDs(), mealData.getMeals()))
                else:
                    self.createCorpus(mealData.getMeals())
            elif self.args.corpus == 'healthy-comments':
                self.healthyData = HealthyData(
                    self.corpusDir, self.args.usda_vecs,
                    self.args.healthy_flag, self.args.augment,
                    self.args.motivate_only, self.args.advice_only,
                    self.args.all_data)
                if self.args.encode_food_ids:
                    self.createCorpus(
                        zip(self.healthyData.getFoodIDs(),
                            self.healthyData.getResponses()))
                else:
                    self.createCorpus(
                        zip(self.healthyData.getMeals(),
                            self.healthyData.getResponses(),
                            self.healthyData.getFoodEmb()))

            # Saving
            print('Saving dataset...')
            self.saveDataset(dirName)  # Saving tf samples
        else:
            print('Loading dataset from {}...'.format(dirName))
            self.loadDataset(dirName)
            if self.args.finetune and not self.args.test:
                self.trainingSamples = []
                mealData = MealData('/usr/users/zcollins/Data_Files/allfood/')
                self.createCorpus(mealData.getMeals())

        assert self.padToken == 0