예제 #1
0
 def json(self) -> dict:
     if self._json is None:
         self._json = Data.load(self._path_json)
     return self._json
예제 #2
0
 def TeamDriveDict(self, user)->dict:
     if self._TeamDriveDicts.get(user) is None:
         TDDict = Data.load(self.Path_TDDict(user))
         self._TeamDriveDicts[user] = TDDict if TDDict else {}
     return self._TeamDriveDicts[user]
예제 #3
0
    def load(self):
        self.logger.info('Loading data')

        data = Data()

        trainCSV = self.configuration['trainCSV']
        testCSV = self.configuration['testCSV']

        featuresPKL = self.configuration['featuresPKL']
        targetPKL = self.configuration['targetPKL']
        testPKL = self.configuration['testPKL']

        expectedCSV = None
        expectedPKL = None
        try:
            expectedCSV = self.configuration['expectedCSV']
            expectedPKL = self.configuration['expectedPKL']
        except:
            pass

        # If the dataframe (pickled) file exists, then load it
        # Otherwise, load the CSV, preprocess it, and then save it as a
        # PKL file which will reduce load times
        tmpFeatures = None
        tmpTarget = None
        tmptest = None

        if( os.path.exists(featuresPKL) ):
            self.logger.info('Loading train PKL: {0}'.format(featuresPKL))
            tmpFeatures = data.loadDataFrame(featuresPKL)
            self.logger.info('Loading target PKL: {0}'.format(targetPKL))
            tmpTarget = data.loadDataFrame(targetPKL)
            self.logger.info('Loading test PKL: {0}'.format(testPKL))
            tmpTest = data.loadDataFrame(testPKL)

        else:
            self.logger.info('Loading train CSV: {0}'.format(trainCSV))
            rawtrain = data.load(trainCSV)
            self.logger.info('Loading test CSV: {0}'.format(testCSV))
            rawtest = data.load(testCSV)

            # Preprocess the data
            tmpFeatures, tmpTarget, tmpTest = self.preprocessor.execute(rawtrain, rawtest)

            # Save the dataframe (lower load times)
            self.logger.info('Saving features PKL: {0}'.format(featuresPKL))
            data.saveDataFrame(tmpFeatures, featuresPKL)
            self.logger.info('Saving target PKL: {0}'.format(targetPKL))
            data.saveDataFrame(tmpTarget, targetPKL)
            self.logger.info('Saving test PKL: {0}'.format(testPKL))
            data.saveDataFrame(tmpTest, testPKL)

        if( expectedPKL and os.path.exists(expectedPKL) ):
            self.logger.info('loading expected PKL: {0}'.format(expectedPKL))
            tmpExpected = data.loadDataFrame(expectedPKL)
            self.hasExpected = True

        elif( expectedCSV and os.path.exists(expectedCSV) ):
            self.logger.info('Loading expected CSV: {0}'.format(expectedCSV))
            tmpExpected = data.load(expectedCSV)
            self.logger.info('Saving expected PKL: {0}'.format(expectedPKL))
            data.saveDataFrame(tmpExpected, expectedPKL)
            self.hasExpected = True

        self.features = tmpFeatures
        self.target = tmpTarget
        self.test = tmpTest

        if self.hasExpected:
            self.expected = tmpExpected

        return