def persistExternal(self, parentDirURI: str, fileName: str, df: DataFrame, partitionCols: List[str] = None, overwrite: bool = True, fileFormat: str = None, **kwargs): fullPath = "%s%s" % (parentDirURI,fileName or "") if parentDirURI.endswith("/") else \ "%s/%s" % (parentDirURI,fileName or "") fullPath = fullPath.replace("//", "/") schma = df.schema() fileFormat = fileFormat or self.__fileFmt self.write2ExtrFile(fullPath=fullPath, fileFormat=fileFormat, df=df, partitionCols=partitionCols, overwrite=overwrite, **kwargs) df.unpersist() if fileFormat == "parquet": return self.readParquet(uri=fullPath, schema=schma, **kwargs) elif fileFormat == "orc": return self.readOrc(uri=fullPath, schema=schma, **kwargs) elif fileFormat == "csv": return self.readCSV(uri=fullPath, schema=schma, **kwargs) else: return self.readParquet(uri=fullPath, schema=schma, **kwargs)
def persistExternal(self, parentDirURI: str, fileName: str, df: DataFrame, partitionCols: list[str] = None, overwrite: bool = True): ''' Persist the input Dataframe to the external File storage. ''' fullPath = \ "%s%s" % (parentDirURI,fileName or "").replace("//", "/") \ if parentDirURI.endswith("/") \ else \ "%s/%s" % (parentDirURI,fileName or "").replace("//", "/") schma = df.schema() write2ExtrFile(self.__fileFmt, path=fullPath, fileName=fileName, df=df, partitionCols=partitionCols, overwrite=overwrite) if parentDirURI.startswith("s3://"): pass #TODO:Yet to Implement df.unpersist() if self.__fileFmt == 'parquet': return self.readParquet(fullPath=fullPath, schma=schma) elif self.__fileFmt == 'orc': return self.readOrc(fullPath=fullPath, schma=schma) elif self.__fileFmt == 'orc': return self.readOrc(fullPath=fullPath, schma=schma) elif self.__fileFmt == 'csv': return self.readCSV(fullPath=fullPath, schma=schma) elif self.__fileFmt == 'avro': return self.readAvro(fullPath=fullPath, schma=schma) else: return self.readParquet(fullPath=fullPath, schma=schma)