Example #1
0
    def pushBundle(self, gitRepositoryPath, accessToken, gitHost="github.com", gitBranch="master", remoteStashPrefix="A", maxSizeMB=95):
        """Push bundle to remote stash git repository.

        Args:
            gitRepositoryPath (str): git repository path (e.g., rcsb/py-rcsb_exdb_assets_stash)
            accessToken (str): git repository access token
            gitHost (str, optional): git repository host name. Defaults to github.com.
            gitBranch (str, optional): git branch name. Defaults to master.
            remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')
            maxSizeMB (int, optional): maximum stash bundle file size that will be committed. Defaults to 95MB.

        Returns:
          bool:  True for success or False otherwise

        """
        try:
            ok = False
            gU = GitUtil(token=accessToken, repositoryHost=gitHost)
            fU = FileUtil()
            localRepositoryPath = os.path.join(self.__localBundlePath, "stash_repository")
            fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix)
            #
            # Update existing local repository, otherwise clone a new copy
            if fU.exists(localRepositoryPath):
                ok = gU.pull(localRepositoryPath, branch=gitBranch)
                logger.debug("After pull status %r", gU.status(localRepositoryPath))
            else:
                ok = gU.clone(gitRepositoryPath, localRepositoryPath, branch=gitBranch)
            #
            # Split all bundles
            mbSize = float(fU.size(self.__localStashTarFilePath)) / 1000000.0
            logger.info("Splitting bundle %r (%.3f MB/Max %d MB)", fn, mbSize, maxSizeMB)
            sj = SplitJoin()
            splitDirPath = os.path.join(localRepositoryPath, "stash", fn[:-7])
            sj.split(self.__localStashTarFilePath, splitDirPath, maxSizeMB=maxSizeMB)
            fU.remove(self.__localStashTarFilePath)
            # else:
            # fU.put(self.__localStashTarFilePath, os.path.join(localRepositoryPath, "stash", fn))

            ok = gU.addAll(localRepositoryPath, branch=gitBranch)
            ok = gU.commit(localRepositoryPath, branch=gitBranch)
            logger.debug("After commit status %r", gU.status(localRepositoryPath))
            #
            if accessToken:
                ok = gU.push(localRepositoryPath, branch=gitBranch)
                logger.info("After push status %r", gU.status(localRepositoryPath))
            #
            return ok
        except Exception as e:
            logger.exception("For %r %r failing with %s", gitHost, gitRepositoryPath, str(e))
        return False
Example #2
0
    def setUp(self):
        self.__workPath = os.path.join(HERE, "test-output")
        #
        self.__testLogFileMin = os.path.join(self.__workPath,
                                             "logfile-min.json")
        self.__testLogFileDetailed = os.path.join(self.__workPath,
                                                  "logfile-detailed.json")
        fU = FileUtil()
        fU.remove(self.__testLogFileMin)
        fU.remove(self.__testLogFileDetailed)

        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
Example #3
0
 def testGitOps(self):
     """Test case - git clone"""
     try:
         branch = "test-branch"
         repositoryPath = "rcsb/py-rcsb_exdb_assets_stash_test.git"
         localRepositoryPath = os.path.join(self.__workPath, "test-stash")
         fU = FileUtil()
         fU.remove(localRepositoryPath)
         gitToken = None
         gU = GitUtil(gitToken)
         ok = gU.clone(repositoryPath, localRepositoryPath, branch=branch)
         self.assertTrue(ok)
         logger.info("status %r", gU.status(localRepositoryPath))
         #
         gU.pull(localRepositoryPath, branch=branch)
         self.assertTrue(ok)
         logger.info("status %r", gU.status(localRepositoryPath))
         #
         testPath = os.path.join(localRepositoryPath, "stash",
                                 "TESTFILE.txt")
         self.__makeFile(testPath, 10)
         #
         ok = gU.addAll(localRepositoryPath, branch=branch)
         self.assertTrue(ok)
         logger.info("status %r",
                     gU.status(localRepositoryPath, branch=branch))
         #
         ok = gU.commit(localRepositoryPath, branch=branch)
         self.assertTrue(ok)
         logger.info("status %r",
                     gU.status(localRepositoryPath, branch=branch))
         #
         if gitToken:
             gU.push(localRepositoryPath, branch=branch)
             self.assertTrue(ok)
             logger.info("status %r",
                         gU.status(localRepositoryPath, branch=branch))
         #
         gU.pull(localRepositoryPath, branch=branch)
         self.assertTrue(ok)
         logger.info("status %r",
                     gU.status(localRepositoryPath, branch=branch))
         #
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
 def clearRawCache(self):
     try:
         rawDirPath = os.path.join(self.__cachePath,
                                   self.__dirName + "-raw")
         fU = FileUtil()
         return fU.remove(rawDirPath)
     except Exception:
         pass
     return False
    def testAABuildDependenciesAndStash(self):
        """Test case - build, stash and restore dependencies -"""
        try:
            ccsw = ChemCompSearchWrapper()
            ccUrlTarget = os.path.join(
                self.__dataPath,
                "components-abbrev.cif") if not self.__testFlagFull else None
            birdUrlTarget = os.path.join(
                self.__dataPath,
                "prdcc-abbrev.cif") if not self.__testFlagFull else None
            ok = ccsw.buildDependenices(ccUrlTarget=ccUrlTarget,
                                        birdUrlTarget=birdUrlTarget)
            self.assertTrue(ok)
            #
            if self.__testStash:
                url = "sftp://bl-east.rcsb.org"
                userName = ""
                pw = ""
                dirPath = "4-coastal"
                ok = ccsw.stashDependencies(url,
                                            dirPath,
                                            userName=userName,
                                            pw=pw)
                self.assertTrue(ok)
                #
                fileU = FileUtil()
                fileU.remove(self.__cachePath)
                #
                url = "http://bl-east.rcsb.org"
                ok = ccsw.restoreDependencies(url, dirPath)
                #
                fileU.remove(self.__cachePath)
                #
                url = "sftp://bl-east.rcsb.org"
                ok = ccsw.restoreDependencies(url,
                                              dirPath,
                                              userName=userName,
                                              pw=pw)
                self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
 def __pharosFixture(self):
     try:
         ok = False
         fU = FileUtil()
         srcPath = os.path.join(self.__dataPath, "Pharos")
         dstPath = os.path.join(self.__cachePath, "Pharos-targets")
         for fn in ["drug_activity", "cmpd_activity", "protein"]:
             inpPath = os.path.join(srcPath, fn + ".tdd.gz")
             outPath = os.path.join(dstPath, fn + ".tdd.gz")
             fU.get(inpPath, outPath)
             fU.uncompress(outPath, outputDir=dstPath)
             fU.remove(outPath)
         fU.put(os.path.join(srcPath, "pharos-readme.txt"),
                os.path.join(dstPath, "pharos-readme.txt"))
         ok = True
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         ok = False
     return ok
 def __fetchFromSource(self, urlTarget):
     """Fetch the classification names and domain assignments from the ECOD repo."""
     fU = FileUtil()
     fn = fU.getFileName(urlTarget)
     fp = os.path.join(self.__dirPath, fn)
     if not fU.exists(fp):
         fU.get(urlTarget, fp)
     #
     with open(fp, "r", encoding="utf-8") as ifh:
         line = ifh.readline()
         line = ifh.readline()
         line = ifh.readline()
         ff = line[:-1].split()
         self.__version = ff[-1]
     #
     nmL = self.__mU.doImport(fp, fmt="list", uncomment=True)
     fU.remove(fp)
     #
     return nmL
 def clearCache(self):
     try:
         self.__nameL = []
         self.__mapD = {}
         self.__cacheFieldD = {}
         self.__nameLegacyL = []
         self.__mapLegacyD = {}
         self.__cacheFieldLegacyD = {}
         dirPath = os.path.join(self.__cachePath, self.__dirName)
         fU = FileUtil()
         return fU.remove(dirPath)
     except Exception:
         pass
     return False
Example #9
0
class IoUtil(object):
    def __init__(self, **kwargs):
        self.__fileU = FileUtil(**kwargs)

    def serialize(self, filePath, myObj, fmt="pickle", **kwargs):
        """Public method to serialize format appropriate objects

        Args:
            filePath (str): local file path'
            myObj (object): format appropriate object to be serialized
            format (str, optional): one of ['mmcif', mmcif-dict', json', 'list', 'text-dump', pickle' (default)]
            **kwargs: additional keyword arguments passed to worker methods -

        Returns:
            bool: status of serialization operation; true for success or false otherwise

        """
        ret = False
        fmt = str(fmt).lower()
        ret = self.__fileU.mkdirForFile(filePath)
        if not ret:
            return ret
        if fmt in ["mmcif"]:
            ret = self.__serializeMmCif(filePath, myObj, **kwargs)
        elif fmt in ["json"]:
            ret = self.__serializeJson(filePath, myObj, **kwargs)
        elif fmt in ["pickle"]:
            ret = self.__serializePickle(filePath, myObj, **kwargs)
        elif fmt in ["list"]:
            ret = self.__serializeList(filePath,
                                       myObj,
                                       enforceAscii=True,
                                       **kwargs)
        elif fmt in ["mmcif-dict"]:
            ret = self.__serializeMmCifDict(filePath, myObj, **kwargs)
        elif fmt in ["text-dump"]:
            ret = self.__textDump(filePath, myObj, **kwargs)
        elif fmt in ["fasta"]:
            ret = self.__serializeFasta(filePath, myObj, **kwargs)
        elif fmt in ["csv"]:
            ret = self.__serializeCsv(filePath, myObj, **kwargs)
        else:
            pass

        return ret

    def deserialize(self, filePath, fmt="pickle", **kwargs):
        """Public method to deserialize objects in supported formats.

        Args:
            filePath (str): local file path
            format (str, optional): one of ['mmcif', 'json', 'list', ..., 'pickle' (default)]
            **kwargs:  additional keyword arguments passed to worker methods -

        Returns:
            object: deserialized object data

        """
        fmt = str(fmt).lower()
        if fmt in ["mmcif"]:
            ret = self.__deserializeMmCif(filePath, **kwargs)  # type: ignore
        elif fmt in ["json"]:
            ret = self.__deserializeJson(filePath, **kwargs)  # type: ignore
        elif fmt in ["pickle"]:
            ret = self.__deserializePickle(filePath, **kwargs)  # type: ignore
        elif fmt in ["list"]:
            ret = self.__deserializeList(filePath, enforceAscii=True,
                                         **kwargs)  # type: ignore
        elif fmt in ["mmcif-dict"]:
            ret = self.__deserializeMmCifDict(filePath,
                                              **kwargs)  # type: ignore
        elif fmt in ["fasta"]:
            ret = self.__deserializeFasta(filePath, **kwargs)  # type: ignore
        # elif fmt in ["vrpt-xml-to-cif"]:
        #    ret = self.__deserializeVrptToCif(filePath, **kwargs)  # type: ignore
        elif fmt in ["csv", "tdd"]:
            delimiter = kwargs.get("csvDelimiter",
                                   "," if fmt == "csv" else "\t")
            ret = self.__deserializeCsv(filePath,
                                        delimiter=delimiter,
                                        **kwargs)  # type: ignore
        elif fmt in ["xml"]:
            ret = self.__deserializeXml(filePath, **kwargs)  # type: ignore
        else:
            ret = None  # type: ignore

        return ret

    def __sliceInChunks(self, myList, numChunks):
        mc = min(len(myList), numChunks)
        chunkSize = int(len(myList) / mc)
        if len(myList) % mc:
            chunkSize += 1
        for i in range(0, len(myList), chunkSize):
            yield myList[i:i + chunkSize]

    def serializeInParts(self,
                         filePath,
                         myObj,
                         numParts,
                         fmt="json",
                         **kwargs):
        """Public method to serialize format appropriate (json, pickle) objects in multiple parts

        Args:
            filePath (str): local file path
            myObj (object): format appropriate object to be serialized
            numParts (int): divide the data into numParts segments
            format (str, optional): one of ['json' or 'pickle']. Defaults to json
            **kwargs: additional keyword arguments passed to worker methods -

        Returns:
            bool: True for success or False otherwise
        """
        if fmt not in ["json", "pickle"]:
            logger.error("Unsupported format for %s", fmt)
            return False
        pth, fn = os.path.split(filePath)
        self.__fileU.mkdirForFile(pth)
        bn, ext = os.path.splitext(fn)
        ret = True
        if isinstance(myObj, list):
            for ii, subList in enumerate(self.__sliceInChunks(myObj,
                                                              numParts)):
                fp = os.path.join(pth, bn + "_part_%d" % (ii + 1) + ext)
                ok = self.serialize(fp, subList, fmt=fmt, **kwargs)
                ret = ret and ok
        elif isinstance(myObj, dict):
            for ii, keyList in enumerate(
                    self.__sliceInChunks(list(myObj.keys()), numParts)):
                fp = os.path.join(pth, bn + "_part_%d" % (ii + 1) + ext)
                ok = self.serialize(fp,
                                    OrderedDict([(k, myObj[k])
                                                 for k in keyList]),
                                    fmt=fmt,
                                    **kwargs)
                ret = ret and ok
        else:
            logger.error("Unsupported data type for serialization in parts")
            ret = False
        #
        return ret

    def deserializeInParts(self, filePath, numParts, fmt="json", **kwargs):
        """Public method to deserialize objects in supported formats from multiple parts

        Args:
            filePath (str): local file path
            numParts (int): reconstruct the data object from numParts segments
            format (str, optional): one of ['json' or 'pickle']. Defaults to json
            **kwargs: additional keyword arguments passed to worker methods -

        Returns:
            object: deserialized object data
        """
        rObj = None
        if fmt not in ["json", "pickle"]:
            logger.error("Unsupported format for %s", fmt)
            return rObj
        #
        pth, fn = os.path.split(filePath)
        bn, ext = os.path.splitext(fn)
        if not numParts:
            fp = os.path.join(pth, bn + "_part_*" + ext)
            numParts = len(glob.glob(fp))
        #
        for ii in range(numParts):
            fp = os.path.join(pth, bn + "_part_%d" % (ii + 1) + ext)
            tObj = self.deserialize(fp, fmt=fmt, **kwargs)
            if isinstance(tObj, list):
                if not rObj:
                    rObj = []
                rObj.extend(tObj)
            elif isinstance(tObj, dict):
                if not rObj:
                    rObj = OrderedDict()
                rObj.update(tObj)
            else:
                logger.error(
                    "Unsupported data type for deserialization in parts")
        return rObj

    def exists(self, filePath, mode=os.R_OK):
        return self.__fileU.exists(filePath, mode=mode)

    def mkdir(self, dirPath, mode=0o755):
        return self.__fileU.mkdir(dirPath, mode=mode)

    def remove(self, pth):
        return self.__fileU.remove(pth)

    def __deserializeFasta(self, filePath, **kwargs):
        try:
            commentStyle = kwargs.get("commentStyle", "uniprot")
            fau = FastaUtil()
            return fau.readFasta(filePath, commentStyle=commentStyle)
        except Exception as e:
            logger.error("Unable to deserialize %r %r ", filePath, str(e))
        return {}

    def __serializeFasta(self, filePath, myObj, **kwargs):
        try:
            maxLineLength = int(kwargs.get("maxLineLength", 70))
            makeComment = kwargs.get("makeComment", False)
            fau = FastaUtil()
            ok = fau.writeFasta(filePath,
                                myObj,
                                maxLineLength=maxLineLength,
                                makeComment=makeComment)
            return ok
        except Exception as e:
            logger.error("Unable to serialize FASTA file %r  %r", filePath,
                         str(e))
        return False

    def __textDump(self, filePath, myObj, **kwargs):
        try:
            indent = kwargs.get("indent", 1)
            width = kwargs.get("width", 120)
            sOut = pprint.pformat(myObj, indent=indent, width=width)
            with open(filePath, "w") as ofh:
                ofh.write("\n%s\n" % sOut)
            return True
        except Exception as e:
            logger.error("Unable to dump to %r  %r", filePath, str(e))
        return False

    def __serializePickle(self, filePath, myObj, **kwargs):
        try:
            pickleProtocol = kwargs.get("pickleProtocol",
                                        pickle.DEFAULT_PROTOCOL)

            with open(filePath, "wb") as outfile:
                pickle.dump(myObj, outfile, pickleProtocol)
            return True
        except Exception as e:
            logger.error("Unable to serialize %r  %r", filePath, str(e))
        return False

    def __deserializePickle(self, filePath, **kwargs):
        myDefault = kwargs.get("default", {})
        try:
            if sys.version_info[0] > 2:
                encoding = kwargs.get("encoding", "ASCII")
                errors = kwargs.get("errors", "strict")
                with open(filePath, "rb") as outfile:
                    return pickle.load(outfile,
                                       encoding=encoding,
                                       errors=errors)
            else:
                with open(filePath, "rb") as outfile:
                    return pickle.load(outfile)
        except Exception as e:
            logger.warning("Unable to deserialize %r %r", filePath, str(e))
        return myDefault

    def __serializeJson(self, filePath, myObj, **kwargs):
        """Internal method to serialize the input object as JSON.  An encoding
        helper class is included to handle selected python data types (e.g., datetime)
        """
        indent = kwargs.get("indent", 0)
        enforceAscii = kwargs.get("enforceAscii", True)
        try:
            if enforceAscii:
                with open(filePath, "w") as outfile:
                    json.dump(myObj,
                              outfile,
                              indent=indent,
                              cls=JsonTypeEncoder,
                              ensure_ascii=enforceAscii)
            else:
                with io.open(filePath, "w", encoding="utf-8") as outfile:
                    json.dump(myObj,
                              outfile,
                              indent=indent,
                              cls=JsonTypeEncoder,
                              ensure_ascii=enforceAscii)
            return True
        except Exception as e:
            logger.error("Unable to serialize %r  %r", filePath, str(e))
        return False

    def __deserializeJson(self, filePath, **kwargs):
        myDefault = kwargs.get("default", {})
        encoding = kwargs.get("encoding", "utf-8-sig")
        encodingErrors = kwargs.get("encodingErrors", "ignore")
        try:
            if filePath[-3:] == ".gz":
                if sys.version_info[0] > 2:
                    with gzip.open(filePath,
                                   "rt",
                                   encoding=encoding,
                                   errors=encodingErrors) as inpFile:
                        return json.load(inpFile,
                                         object_pairs_hook=OrderedDict)
                else:
                    # Py2 situation non-ascii encodings is problematic
                    # with gzip.open(filePath, "rb") as csvFile:
                    #    oL = self.__csvReader(csvFile, rowFormat, delimiter)
                    tPath = self.__fileU.uncompress(filePath, outputDir=None)
                    with io.open(tPath,
                                 newline="",
                                 encoding=encoding,
                                 errors="ignore") as inpFile:
                        return json.load(inpFile,
                                         object_pairs_hook=OrderedDict)
            else:
                with open(filePath, "r") as inpFile:
                    return json.load(inpFile, object_pairs_hook=OrderedDict)
        except Exception as e:
            logger.warning("Unable to deserialize %r %r", filePath, str(e))
        return myDefault

    def __hasMinSize(self, pth, minSize):
        try:
            return os.path.getsize(pth) >= minSize
        except Exception:
            return False

    def __deserializeMmCif(self, locator, **kwargs):
        """ """
        try:
            containerList = []
            workPath = kwargs.get("workPath", None)
            enforceAscii = kwargs.get("enforceAscii", True)
            raiseExceptions = kwargs.get("raiseExceptions", True)
            useCharRefs = kwargs.get("useCharRefs", True)
            minSize = kwargs.get("minSize", 5)
            #
            if self.__fileU.isLocal(locator):
                if minSize >= 0 and not self.__hasMinSize(locator, minSize):
                    logger.warning("Minimum file size not satisfied for: %r",
                                   locator)
                myIo = IoAdapter(raiseExceptions=raiseExceptions,
                                 useCharRefs=useCharRefs)
                containerList = myIo.readFile(
                    locator, enforceAscii=enforceAscii,
                    outDirPath=workPath)  # type: ignore
            else:
                # myIo = IoAdapterPy(raiseExceptions=raiseExceptions, useCharRefs=useCharRefs)
                # containerList = myIo.readFile(locator, enforceAscii=enforceAscii, outDirPath=workPath)
                containerList = self.__deserializeMmCifRemote(
                    locator, useCharRefs, enforceAscii, workPath)

        except Exception as e:
            logger.error("Failing for %s with %s", locator, str(e))
        return containerList

    @retry((requests.exceptions.RequestException),
           maxAttempts=3,
           delaySeconds=1,
           multiplier=2,
           defaultValue=[],
           logger=logger)
    def __deserializeMmCifRemote(self, locator, useCharRefs, enforceAscii,
                                 workPath):
        containerList = []
        try:
            myIo = IoAdapterPy(raiseExceptions=True, useCharRefs=useCharRefs)
            containerList = myIo.readFile(locator,
                                          enforceAscii=enforceAscii,
                                          outDirPath=workPath)
        except Exception as e:
            raise e
        return containerList

    def __serializeMmCif(self, filePath, containerList, **kwargs):
        """ """
        try:
            ret = False
            workPath = kwargs.get("workPath", None)
            enforceAscii = kwargs.get("enforceAscii", True)
            raiseExceptions = kwargs.get("raiseExceptions", True)
            useCharRefs = kwargs.get("useCharRefs", True)
            #
            myIo = IoAdapter(raiseExceptions=raiseExceptions,
                             useCharRefs=useCharRefs)
            if filePath.endswith(".gz") and workPath:
                rfn = "".join(
                    random.choice(string.ascii_uppercase + string.digits)
                    for _ in range(10))
                tPath = os.path.join(workPath, rfn)
                ret = myIo.writeFile(tPath,
                                     containerList=containerList,
                                     enforceAscii=enforceAscii)
                ret = self.__fileU.compress(tPath,
                                            filePath,
                                            compressType="gzip")
            else:
                ret = myIo.writeFile(filePath,
                                     containerList=containerList,
                                     enforceAscii=enforceAscii)
        except Exception as e:
            logger.error("Failing for %s with %s", filePath, str(e))
        return ret

    def __deserializeMmCifDict(self, filePath, **kwargs):
        """ """
        try:
            containerList = []
            workPath = kwargs.get("workPath", None)
            enforceAscii = kwargs.get("enforceAscii", True)
            raiseExceptions = kwargs.get("raiseExceptions", True)
            useCharRefs = kwargs.get("useCharRefs", True)
            #
            myIo = IoAdapterPy(raiseExceptions=raiseExceptions,
                               useCharRefs=useCharRefs)
            containerList = myIo.readFile(filePath,
                                          enforceAscii=enforceAscii,
                                          outDirPath=workPath)
        except Exception as e:
            logger.error("Failing for %s with %s", filePath, str(e))
        return containerList

    def __serializeMmCifDict(self, filePath, containerList, **kwargs):
        """ """
        try:
            ret = False
            # workPath = kwargs.get('workPath', None)
            enforceAscii = kwargs.get("enforceAscii", True)
            raiseExceptions = kwargs.get("raiseExceptions", True)
            useCharRefs = kwargs.get("useCharRefs", True)
            #
            myIo = IoAdapterPy(raiseExceptions=raiseExceptions,
                               useCharRefs=useCharRefs)
            ret = myIo.writeFile(filePath,
                                 containerList=containerList,
                                 enforceAscii=enforceAscii)
        except Exception as e:
            logger.error("Failing for %s with %s", filePath, str(e))
        return ret

    def __serializeList(self, filePath, aList, enforceAscii=True, **kwargs):
        """ """

        try:
            _ = kwargs
            if enforceAscii:
                encoding = "ascii"
            else:
                encoding = "utf-8"
            #
            if sys.version_info[0] > 2:
                with open(filePath, "w") as ofh:
                    if enforceAscii:
                        for st in aList:
                            ofh.write("%s\n" % st.encode(
                                "ascii", "xmlcharrefreplace").decode("ascii"))
                    else:
                        for st in aList:
                            ofh.write("%s\n" % st)
            else:
                if enforceAscii:
                    with io.open(filePath, "w", encoding=encoding) as ofh:
                        for st in aList:
                            ofh.write("%s\n" % st.encode(
                                "ascii", "xmlcharrefreplace").decode("ascii"))
                else:
                    with open(filePath, "wb") as ofh:
                        for st in aList:
                            ofh.write("%s\n" % st)
            return True
        except Exception as e:
            logger.error("Unable to serialize %r %r", filePath, str(e))
        return False

    def __processList(self, ifh, enforceAscii=True, **kwargs):
        uncomment = kwargs.get("uncomment", True)
        aList = []
        for line in ifh:
            if enforceAscii:
                pth = line[:-1].encode("ascii",
                                       "xmlcharrefreplace").decode("ascii")
            else:
                pth = line[:-1]
            if not pth or (uncomment and pth.startswith("#")):
                continue
            aList.append(pth)
        return aList

    def __deserializeList(self,
                          filePath,
                          enforceAscii=True,
                          encodingErrors="ignore",
                          **kwargs):
        aList = []
        _ = kwargs
        try:
            if filePath[-3:] == ".gz":
                if sys.version_info[0] > 2:
                    with gzip.open(filePath,
                                   "rt",
                                   encoding="utf-8-sig",
                                   errors=encodingErrors) as ifh:
                        aList = self.__processList(ifh,
                                                   enforceAscii=enforceAscii,
                                                   **kwargs)
                else:
                    tPath = self.__fileU.uncompress(filePath, outputDir=None)
                    # for py2 this commented code is problematic for non-ascii data
                    # with gzip.open(filePath, "rb") as ifh:
                    #    aList = self.__processList(ifh, enforceAscii=enforceAscii)
                    with io.open(tPath, encoding="utf-8-sig",
                                 errors="ignore") as ifh:
                        aList = self.__processList(ifh,
                                                   enforceAscii=enforceAscii)
            else:
                with io.open(filePath, encoding="utf-8-sig",
                             errors="ignore") as ifh:
                    aList = self.__processList(ifh,
                                               enforceAscii=enforceAscii,
                                               **kwargs)
        except Exception as e:
            logger.error("Unable to deserialize %r %s", filePath, str(e))
        #
        logger.debug("Reading list length %d", len(aList))
        return aList

    def __csvReader(self, csvFile, rowFormat, delimiter, uncomment=True):
        oL = []

        maxInt = sys.maxsize
        csv.field_size_limit(maxInt)
        if rowFormat == "dict":
            if uncomment:
                reader = csv.DictReader(uncommentFilter(csvFile),
                                        delimiter=delimiter)
            else:
                reader = csv.DictReader(csvFile, delimiter=delimiter)
            for rowD in reader:
                oL.append(rowD)
        elif rowFormat == "list":
            if uncomment:
                reader = csv.reader(uncommentFilter(csvFile),
                                    delimiter=delimiter)
            else:
                reader = csv.reader(csvFile, delimiter=delimiter)
            for rowL in reader:
                oL.append(rowL)
        return oL

    def deserializeCsvIter(self,
                           filePath,
                           delimiter=",",
                           rowFormat="dict",
                           encodingErrors="ignore",
                           uncomment=True,
                           **kwargs):
        """Return an iterator to input CSV format file.

        Args:
            filePath (str): input file path
            delimiter (str, optional): CSV delimiter. Defaults to ",".
            rowFormat (str, optional): format for each process row (list or dict). Defaults to "dict".
            encodingErrors (str, optional): treatment of encoding errors. Defaults to "ignore".
            uncomment (bool, optional): flag to ignore leading comments. Defaults to True.

        Returns:
            (iterator): iterator for rowwise access to processed CSV data
        """
        encoding = kwargs.get("encoding", "utf-8-sig")
        maxInt = sys.maxsize
        csv.field_size_limit(maxInt)
        try:
            if filePath[-3:] == ".gz":
                with gzip.open(filePath,
                               "rt",
                               encoding=encoding,
                               errors=encodingErrors) as csvFile:
                    startIt = itertools.dropwhile(
                        lambda x: x.startswith("#"),
                        csvFile) if uncomment else csvFile
                    if rowFormat == "dict":
                        reader = csv.DictReader(startIt, delimiter=delimiter)
                    elif rowFormat == "list":
                        reader = csv.reader(startIt, delimiter=delimiter)
                    for row in reader:
                        yield row
            else:
                with io.open(filePath,
                             newline="",
                             encoding=encoding,
                             errors="ignore") as csvFile:
                    startIt = itertools.dropwhile(
                        lambda x: x.startswith("#"),
                        csvFile) if uncomment else csvFile
                    if rowFormat == "dict":
                        reader = csv.DictReader(startIt, delimiter=delimiter)
                    elif rowFormat == "list":
                        reader = csv.reader(startIt, delimiter=delimiter)
                    for row in reader:
                        # if uncomment and row.startswith("#"):
                        #    continue
                        yield row
        except Exception as e:
            logger.error("Unable to deserialize %r %s", filePath, str(e))

    def __deserializeCsv(self,
                         filePath,
                         delimiter=",",
                         rowFormat="dict",
                         encodingErrors="ignore",
                         uncomment=True,
                         **kwargs):
        oL = []
        encoding = kwargs.get("encoding", "utf-8-sig")
        try:
            if filePath[-3:] == ".gz":
                if sys.version_info[0] > 2:
                    with gzip.open(filePath,
                                   "rt",
                                   encoding=encoding,
                                   errors=encodingErrors) as csvFile:
                        oL = self.__csvReader(csvFile,
                                              rowFormat,
                                              delimiter,
                                              uncomment=uncomment)
                else:
                    # Py2 situation non-ascii encodings is problematic
                    # with gzip.open(filePath, "rb") as csvFile:
                    #    oL = self.__csvReader(csvFile, rowFormat, delimiter)
                    tPath = self.__fileU.uncompress(filePath, outputDir=None)
                    with io.open(tPath,
                                 newline="",
                                 encoding=encoding,
                                 errors="ignore") as csvFile:
                        oL = self.__csvReader(csvFile,
                                              rowFormat,
                                              delimiter,
                                              uncomment=uncomment)
            else:
                with io.open(filePath,
                             newline="",
                             encoding=encoding,
                             errors="ignore") as csvFile:
                    oL = self.__csvReader(csvFile,
                                          rowFormat,
                                          delimiter,
                                          uncomment=uncomment)

            return oL
        except Exception as e:
            logger.error("Unable to deserialize %r %s", filePath, str(e))
        #
        logger.debug("Reading list length %d", len(oL))
        return oL

    def __serializeCsv(self, filePath, rowDictList, fieldNames=None, **kwargs):
        """ """
        _ = kwargs
        try:
            wD = {}
            ret = False
            fNames = fieldNames if fieldNames else list(rowDictList[0].keys())
            # with io.open(filePath, 'w', newline='') as csvFile:
            with open(filePath, "w") as csvFile:
                writer = csv.DictWriter(csvFile, fieldnames=fNames)
                writer.writeheader()
                for ii, rowDict in enumerate(rowDictList):
                    try:
                        wD = {k: v for k, v in rowDict.items() if k in fNames}
                        writer.writerow(wD)
                    except Exception as e:
                        logger.error(
                            "Skipping bad CSV record %d wD %r rowDict %r with %s",
                            ii + 1, wD, rowDict, str(e))
                        continue

            ret = True
        except Exception as e:
            logger.error("Failing for %s : %r with %s", filePath, wD, str(e))
        return ret

    def __csvEncoder(self,
                     csvData,
                     encoding="utf-8-sig",
                     encodingErrors="ignore"):
        """Handle encoding issues for gzipped data in Py2. (beware of the BOM chars)

        Args:
            csvData (text lines): uncompressed data from gzip open
            encoding (str, optional): character encoding. Defaults to "utf-8-sig".
            encodingErrors (str, optional): error treatment. Defaults to "ignore".
        """
        for line in csvData:
            yield line.decode("utf-8-sig", errors=encodingErrors).encode(
                encoding, errors=encodingErrors)

    def __deserializeXmlPrev(self, filePath, **kwargs):
        """Read the input XML file path and return an ElementTree data object instance.

        Args:
            filePath (sting): input XML file path

        Returns:
            object: instance of an ElementTree tree object
        """
        _ = kwargs
        tree = None
        try:
            logger.debug("Parsing XML path %s", filePath)
            if filePath[-3:] == ".gz":
                with gzip.open(filePath, mode="rb") as ifh:
                    tV = time.time()
                    tree = ET.parse(ifh)
            else:
                with open(filePath, mode="rb") as ifh:
                    tV = time.time()
                    tree = ET.parse(ifh)
            logger.debug("Parsed %s in %.2f seconds", filePath,
                         time.time() - tV)
        except Exception as e:
            logger.error("Unable to deserialize %r %s", filePath, str(e))
        #
        return tree

    def __testGzip(self, filePath):
        ok = True
        with gzip.open(filePath, "r") as fh:
            try:
                fh.read(1)
            except gzip.BadGzipFile:
                ok = False
            except Exception:
                ok = False
        logger.debug("Gzip file check %r", ok)
        return ok

    def __deserializeXml(self, filePath, **kwargs):
        """Read the input XML file path and return an ElementTree data object instance.

        Args:
            filePath (sting): input XML file path

        Returns:
            object: instance of an ElementTree tree object
        """
        _ = kwargs
        tree = None
        encoding = kwargs.get("encoding", "utf-8-sig")
        encodingErrors = kwargs.get("encodingErrors", "ignore")
        #
        try:
            logger.debug("Parsing XML path %s", filePath)
            if filePath[-3:] == ".gz" and self.__testGzip(filePath):
                if sys.version_info[0] > 2:
                    with gzip.open(filePath,
                                   "rt",
                                   encoding=encoding,
                                   errors=encodingErrors) as ifh:
                        tV = time.time()
                        tree = ET.parse(ifh)
                else:
                    tPath = self.__fileU.uncompress(filePath, outputDir=None)
                    with io.open(tPath,
                                 encoding=encoding,
                                 errors=encodingErrors) as ifh:
                        tV = time.time()
                        tree = ET.parse(ifh)
            else:
                with io.open(filePath,
                             encoding=encoding,
                             errors=encodingErrors) as ifh:
                    tV = time.time()
                    tree = ET.parse(ifh)
            logger.debug("Parsed %s in %.2f seconds", filePath,
                         time.time() - tV)
        except Exception as e:
            logger.error("Unable to deserialize %r %s", filePath, str(e))
        #
        return tree
Example #10
0
class FileUtilTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        self.__pathPdbxDictionaryFile = os.path.join(TOPDIR, "rcsb",
                                                     "mock-data",
                                                     "dictionaries",
                                                     "mmcif_pdbx_v5_next.dic")

        self.__pathTaxonomyFile = os.path.join(TOPDIR, "rcsb", "mock-data",
                                               "NCBI", "names.dmp.gz")
        self.__zipFileUrl = "https://inventory.data.gov/dataset/794cd3d7-4d28-4408-8f7d-84b820dbf7f2/resource/6b78ec0c-4980-4ad8-9cbd-2d6eb9eda8e7/download/myfoodapediadata.zip"
        self.__xzFile = os.path.join(TOPDIR, "rcsb", "mock-data",
                                     "MOCK_MODBASE_MODELS",
                                     "NP_001030614.1_1.pdb.xz")
        #
        self.__ftpFileUrl = "ftp://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz"
        self.__httpsFileUrl = "https://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz"
        #
        self.__workPath = os.path.join(HERE, "test-output")
        self.__inpDirPath = os.path.join(HERE, "test-data")
        self.__fileU = FileUtil()
        self.__startTime = time.time()
        logger.debug("Running tests on version %s", __version__)
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testTarBundling(self):
        """Test case for tarfile bundling and unbundling"""
        try:
            tP = os.path.join(self.__workPath, "t0.tar.gz")
            dirPath = os.path.join(self.__inpDirPath, "topdir")

            ok = self.__fileU.bundleTarfile(tP, [dirPath],
                                            mode="w:gz",
                                            recursive=True)
            self.assertTrue(ok)

            numBytes = self.__fileU.size(tP)
            self.assertGreaterEqual(numBytes, 250)
            #
            md5 = self.__fileU.hash(tP, hashType="md5")
            self.assertTrue(md5 is not None)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)
            #
            tP = os.path.join(self.__workPath, "t1.tar.gz")
            dirPathList = [
                os.path.join(self.__inpDirPath, "topdir", "subdirA"),
                os.path.join(self.__inpDirPath, "topdir", "subdirB")
            ]

            ok = self.__fileU.bundleTarfile(tP,
                                            dirPathList,
                                            mode="w:gz",
                                            recursive=True)
            self.assertTrue(ok)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)

            tP = os.path.join(self.__workPath, "t2.tar")
            dirPathList = [
                os.path.join(self.__inpDirPath, "topdir", "subdirA"),
                os.path.join(self.__inpDirPath, "topdir", "subdirB")
            ]

            ok = self.__fileU.bundleTarfile(tP,
                                            dirPathList,
                                            mode="w",
                                            recursive=True)
            self.assertTrue(ok)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testGetFile(self):
        """Test case for a local files and directories"""
        try:
            remoteLocator = self.__pathPdbxDictionaryFile
            fn = self.__fileU.getFileName(remoteLocator)
            # _, fn = os.path.split(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            ok = self.__fileU.remove(lPath)
            self.assertTrue(ok)
            dPath = os.path.join(self.__workPath, "tdir")
            ok = self.__fileU.mkdir(dPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(dPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(";lakdjf")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testMoveAndCopyFile(self):
        """Test case for copying ("put") and moving ("replace") local files"""
        try:
            remoteLocator = self.__pathPdbxDictionaryFile
            fn = self.__fileU.getFileName(remoteLocator)
            # _, fn = os.path.split(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            # Test copy file
            dPath2 = os.path.join(self.__workPath, "tdir")
            ok = self.__fileU.mkdir(dPath2)
            self.assertTrue(ok)
            lPath2 = os.path.join(dPath2, fn)
            ok = self.__fileU.put(lPath, lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertTrue(ok)
            # Remove copied file (to test moving file next)
            ok = self.__fileU.remove(lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertFalse(ok)
            # Test move file
            ok = self.__fileU.replace(lPath, lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertFalse(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertTrue(ok)
            # Now clean up files and dirs
            ok = self.__fileU.remove(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(dPath2)
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testZipUrl(self):
        """Test case for downloading remote zip file and extracting contents."""
        try:
            remoteLocator = self.__zipFileUrl
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            lPath = os.path.join(self.__workPath,
                                 self.__fileU.getFileName(self.__zipFileUrl))
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath)
            ok = fp.endswith("Food_Display_Table.xlsx")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testFtpUrl(self):
        """Test case for downloading remote file ftp protocol and extracting contents."""
        try:
            remoteLocator = self.__ftpFileUrl
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            dirPath = os.path.join(self.__workPath, "chem_comp_models")
            lPath = os.path.join(dirPath,
                                 self.__fileU.getFileName(self.__ftpFileUrl))
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=dirPath)
            ok = fp.endswith("chem_comp_model.cif")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testRemote(self):
        """Test case remote status"""
        try:
            remoteLocator = self.__httpsFileUrl
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            ok = self.__fileU.exists(remoteLocator)
            self.assertTrue(ok)
            size = self.__fileU.size(remoteLocator)
            self.assertGreaterEqual(size, 1000)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    @unittest.skip("DrugBank example -- skipping")
    def testGetDrugBankUrl(self):
        """Test case for downloading drugbank master xml file"""
        try:
            remoteLocator = "https://www.drugbank.ca/releases/latest/downloads/all-full-database"
            un = "username"
            pw = "password"
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            lPath = os.path.join(self.__workPath, "db-download.zip")
            ok = self.__fileU.get(remoteLocator,
                                  lPath,
                                  username=un,
                                  password=pw)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            self.__fileU.uncompress(lPath, outputDir=self.__workPath)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testXzFile(self):
        """Test case for extracting contents from xz file"""
        try:
            remoteLocator = self.__xzFile
            fn = self.__fileU.getFileName(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath)
            ok = fp.endswith(".pdb")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Example #11
0
class MarshalUtil(object):
    """Wrapper for serialization and deserialization methods."""
    def __init__(self, **kwargs):
        self.__workPath = kwargs.get("workPath", ".")
        self.__workDirSuffix = kwargs.get("workDirSuffix", "marshall_")
        self.__workDirPrefix = kwargs.get("workDirSuffix", "_tempdir")
        #
        self.__fileU = FileUtil(workPath=self.__workPath)
        self.__ioU = IoUtil()

    def doExport(self,
                 locator,
                 obj,
                 fmt="list",
                 marshalHelper=None,
                 numParts=None,
                 **kwargs):
        """Serialize the input object at locator path in specified format.  The
        input object is optionally preprocessed by the helper method.

        Args:
            locator (str): target path or URI
            obj (object): data to be serialized
            fmt (str, optional): format for serialization (mmcif, tdd, csv, list). Defaults to "list".
            marshalHelper (method, optional): pre-processor method applied to input data object. Defaults to None.
            numParts (int, optional): serialize the data in parts. Defaults to None. (json and pickle formats)
        Returns:
            bool: True for sucess or False otherwise
        """
        try:
            ret = False
            localFlag = self.__fileU.isLocal(locator)
            if marshalHelper:
                myObj = marshalHelper(obj, **kwargs)
            else:
                myObj = obj
            #
            if localFlag and numParts and fmt in ["json", "pickle"]:
                localFilePath = self.__fileU.getFilePath(locator)
                ret = self.__ioU.serializeInParts(localFilePath,
                                                  myObj,
                                                  numParts,
                                                  fmt=fmt,
                                                  **kwargs)
            elif localFlag:
                localFilePath = self.__fileU.getFilePath(locator)
                ret = self.__ioU.serialize(localFilePath,
                                           myObj,
                                           fmt=fmt,
                                           workPath=self.__workPath,
                                           **kwargs)
            else:
                with tempfile.TemporaryDirectory(
                        suffix=self.__workDirSuffix,
                        prefix=self.__workDirPrefix,
                        dir=self.__workPath) as tmpDirName:
                    # write a local copy then copy to destination -
                    #
                    localFilePath = os.path.join(
                        self.__workPath, tmpDirName,
                        self.__fileU.getFileName(locator))
                    ok1 = self.__ioU.serialize(localFilePath,
                                               myObj,
                                               fmt=fmt,
                                               workPath=self.__workPath,
                                               **kwargs)
                    ok2 = True
                    if ok1:
                        ok2 = self.__fileU.put(localFilePath, locator,
                                               **kwargs)
                ret = ok1 and ok2
        except Exception as e:
            logger.exception("Exporting locator %r failing with %s", locator,
                             str(e))

        return ret

    def doImport(self,
                 locator,
                 fmt="list",
                 marshalHelper=None,
                 numParts=None,
                 **kwargs):
        """Deserialize data at the target locator in specified format. The deserialized
        data is optionally post-processed by the input helper method.

        Args:
            locator (str): path or URI to input data
            fmt (str, optional): format for deserialization (mmcif, tdd, csv, list). Defaults to "list".
            marshalHelper (method, optional): post-processor method applied to deserialized data object. Defaults to None.
            numParts (int, optional): deserialize the data in parts. Defaults to None. (json and pickle formats)
            tarMember (str, optional): name of a member of tar file bundle. Defaults to None. (tar file format)

        Returns:
            Any: format specific return type
        """
        try:
            tarMember = kwargs.get("tarMember", None)
            localFlag = self.__fileU.isLocal(locator) and not tarMember
            #
            if localFlag and numParts and fmt in ["json", "pickle"]:
                filePath = self.__fileU.getFilePath(locator)
                ret = self.__ioU.deserializeInParts(filePath,
                                                    numParts,
                                                    fmt=fmt,
                                                    **kwargs)
            elif localFlag:
                filePath = self.__fileU.getFilePath(locator)
                ret = self.__ioU.deserialize(filePath,
                                             fmt=fmt,
                                             workPath=self.__workPath,
                                             **kwargs)
            else:
                #
                if fmt == "mmcif":
                    ret = self.__ioU.deserialize(locator,
                                                 fmt=fmt,
                                                 workPath=self.__workPath,
                                                 **kwargs)
                else:
                    with tempfile.TemporaryDirectory(
                            suffix=self.__workDirSuffix,
                            prefix=self.__workDirPrefix,
                            dir=self.__workPath) as tmpDirName:
                        #
                        # Fetch first then read a local copy -
                        #
                        if tarMember:
                            localFilePath = os.path.join(
                                self.__workPath, tmpDirName, tarMember)
                        else:
                            localFilePath = os.path.join(
                                self.__workPath, tmpDirName,
                                self.__fileU.getFileName(locator))

                        # ---  Local copy approach ---
                        self.__fileU.get(locator, localFilePath, **kwargs)
                        ret = self.__ioU.deserialize(localFilePath,
                                                     fmt=fmt,
                                                     workPath=self.__workPath,
                                                     **kwargs)

            if marshalHelper:
                ret = marshalHelper(ret, **kwargs)
        except Exception as e:
            logger.exception("Importing locator %r failing with %s", locator,
                             str(e))
            ret = None
        return ret

    def exists(self, filePath, mode=os.R_OK):
        return self.__fileU.exists(filePath, mode=mode)

    def mkdir(self, dirPath, mode=0o755):
        return self.__fileU.mkdir(dirPath, mode=mode)

    def remove(self, pth):
        return self.__fileU.remove(pth)