def json(self) -> dict: if self._json is None: self._json = Data.load(self._path_json) return self._json
def TeamDriveDict(self, user)->dict: if self._TeamDriveDicts.get(user) is None: TDDict = Data.load(self.Path_TDDict(user)) self._TeamDriveDicts[user] = TDDict if TDDict else {} return self._TeamDriveDicts[user]
def load(self): self.logger.info('Loading data') data = Data() trainCSV = self.configuration['trainCSV'] testCSV = self.configuration['testCSV'] featuresPKL = self.configuration['featuresPKL'] targetPKL = self.configuration['targetPKL'] testPKL = self.configuration['testPKL'] expectedCSV = None expectedPKL = None try: expectedCSV = self.configuration['expectedCSV'] expectedPKL = self.configuration['expectedPKL'] except: pass # If the dataframe (pickled) file exists, then load it # Otherwise, load the CSV, preprocess it, and then save it as a # PKL file which will reduce load times tmpFeatures = None tmpTarget = None tmptest = None if( os.path.exists(featuresPKL) ): self.logger.info('Loading train PKL: {0}'.format(featuresPKL)) tmpFeatures = data.loadDataFrame(featuresPKL) self.logger.info('Loading target PKL: {0}'.format(targetPKL)) tmpTarget = data.loadDataFrame(targetPKL) self.logger.info('Loading test PKL: {0}'.format(testPKL)) tmpTest = data.loadDataFrame(testPKL) else: self.logger.info('Loading train CSV: {0}'.format(trainCSV)) rawtrain = data.load(trainCSV) self.logger.info('Loading test CSV: {0}'.format(testCSV)) rawtest = data.load(testCSV) # Preprocess the data tmpFeatures, tmpTarget, tmpTest = self.preprocessor.execute(rawtrain, rawtest) # Save the dataframe (lower load times) self.logger.info('Saving features PKL: {0}'.format(featuresPKL)) data.saveDataFrame(tmpFeatures, featuresPKL) self.logger.info('Saving target PKL: {0}'.format(targetPKL)) data.saveDataFrame(tmpTarget, targetPKL) self.logger.info('Saving test PKL: {0}'.format(testPKL)) data.saveDataFrame(tmpTest, testPKL) if( expectedPKL and os.path.exists(expectedPKL) ): self.logger.info('loading expected PKL: {0}'.format(expectedPKL)) tmpExpected = data.loadDataFrame(expectedPKL) self.hasExpected = True elif( expectedCSV and os.path.exists(expectedCSV) ): self.logger.info('Loading expected CSV: {0}'.format(expectedCSV)) tmpExpected = data.load(expectedCSV) self.logger.info('Saving expected PKL: {0}'.format(expectedPKL)) data.saveDataFrame(tmpExpected, expectedPKL) self.hasExpected = True self.features = tmpFeatures self.target = tmpTarget self.test = tmpTest if self.hasExpected: self.expected = tmpExpected return