Beispiel #1
0
def get_config(var):
    ci = ConfigInfo()
    valsect = ci.get("VALIDATION_SERVICES")
    if valsect is not None and var in valsect:
        return valsect[var]
    else:
        return ci.get(var)
Beispiel #2
0
    def __init__(self, log=sys.stderr, verbose=False):
        """ """
        self.__lfh = log
        self.__verbose = verbose
        self.__debug = True
        self.__siteId = getSiteId()
        cI = ConfigInfo()
        cIcommon = ConfigInfoAppCommon(self.__siteId)
        self.__schemaPath = cIcommon.get_site_da_internal_schema_path()
        self.__dbHost = cI.get("SITE_DB_HOST_NAME")
        self.__dbUser = cI.get("SITE_DB_USER_NAME")
        self.__dbPw = cI.get("SITE_DB_PASSWORD")
        self.__dbPort = str(cI.get("SITE_DB_PORT_NUMBER"))
        self.__dbSocket = cI.get("SITE_DB_SOCKET")
        self.__archivePath = cI.get("SITE_ARCHIVE_STORAGE_PATH")

        self.__dbName = "da_internal"
        self.__workPath = os.path.join(self.__archivePath, "archive")
        self.__mysql = "mysql "
        self.__dbLoader = cIcommon.get_db_loader_path()

        self.__mapping = self.__schemaPath

        self.__workDir = "dbdata"
        self._dbCon = None
Beispiel #3
0
 def setUp(self):
     cI = ConfigInfo()
     self.__testFilePath = cI.get("TEST_FILE_PATH")
     self.__testFile = cI.get("TEST_FILE")
     self.__testFileGzip = cI.get("TEST_FILE_GZIP")
     self.__testFileZlib = cI.get("TEST_FILE_ZLIB")
     self.__testFileBzip = cI.get("TEST_FILE_BZIP")
     self.__outPath = TESTOUTPUT
     self.__outFileList = ["OUTPUT.dat.gz", "OUTPUT.dat", "OUTPUT.dat.bz2", "OUTPUT.dat.Z"]
     self.lfh = sys.stdout
Beispiel #4
0
 def testGetSiteLocation(self):
     """Test case -  return site location"""
     try:
         for siteId in self.__siteIdList:
             ci = ConfigInfo(siteId=siteId,
                             verbose=self.__verbose,
                             log=self.__lfh)
             siteName = ci.get("SITE_NAME", default=None)
             siteLoc = ci.get("WWPDB_SITE_LOC", default=None)
             logger.info(" siteId %-30s siteName %s siteLoc %s", siteId,
                         siteName, siteLoc)
     except Exception as e:  # pragma: no cover
         logger.exception("Unable to get group site location %s", str(e))
         self.fail()
class ConfigInfoGroupDataSet(object):
    """
    Provides accessors for the correspondence between group deposition data identifiers and
    deposition and annotation sites (e.g. wwpdb_site_id).

    """
    def __init__(self, verbose=False, log=sys.stderr):  # pylint: disable=unused-argument
        self.__verbose = verbose
        self.__debug = True
        self.__cI = ConfigInfo(siteId=None, verbose=self.__verbose)
        self.__groupIdAssignments = self.__cI.get(
            "SITE_GROUP_DATASET_ID_ASSIGNMENT_DICTIONARY")

    def getDefaultGroupIdRange(self, siteId):
        """Return the default upper and lower group deposition data set identifier codes
        assigned to the input siteId.

        Any site lacking a default range will get return tuple (-1,-1)

        Returns:   (lower bound, upper bound) for data set identifiers (int)
        """
        if siteId in self.__groupIdAssignments:
            GID_START, GID_STOP = self.__groupIdAssignments[siteId]
        # elif 'UNASSIGNED' in self.__groupIdAssignments:
        #    GID_START, GID_STOP = self.__groupIdAssignments['UNASSIGNED']
        else:
            GID_START, GID_STOP = (-1, -1)
        return (GID_START, GID_STOP)

    def getDefaultSiteId(self, groupId):
        """Get the default site assignment for the input group data set id."""
        return self.__getSiteIdForGroup(groupId)

    def __getSiteIdForGroup(self, groupId):
        """Return the siteId to which the input groupId is within the default
        code assignment range.

        Input may be either a string "G_xxxxxxx" or an integer/string "xxxxxx".

        """
        # check default group range assignment --
        try:
            if str(groupId).startswith("G_"):
                idVal = int(str(groupId)[2:])
            else:
                idVal = int(str(groupId))
            #
            for ky in self.__groupIdAssignments.keys():
                idMin, idMax = self.__groupIdAssignments[ky]
                if (idVal >= idMin) and (idVal <= idMax):
                    return ky
        except Exception as e:
            if self.__debug:
                logger.exception("failed checking group range for %r with %s",
                                 groupId, str(e))

        return None
class DensityWrapper:
    def __init__(self):
        self.__site_id = getSiteId()
        self.__cI = ConfigInfo(siteId=self.__site_id)
        self.__packagePath = self.__cI.get('SITE_PACKAGES_PATH')
        self.node_path = os.path.join(self.__packagePath, 'node', 'bin',
                                      'node')
        self.volume_server_pack = self.__cI.get('VOLUME_SERVER_PACK')
        self.volume_server_query = self.__cI.get('VOLUME_SERVER_QUERY')

    def convert_xray_density_map(self, coord_file, in_2fofc_map, in_fofc_map,
                                 out_binary_cif, working_dir):
        xray_conversion = XrayVolumeServerMap(
            coord_path=coord_file,
            binary_map_out=out_binary_cif,
            node_path=self.node_path,
            volume_server_query_path=self.volume_server_query,
            volume_server_pack_path=self.volume_server_pack,
            two_fofc_mmcif_map_coeff_in=in_2fofc_map,
            fofc_mmcif_map_coeff_in=in_fofc_map,
            working_dir=working_dir)
        return xray_conversion.run_process()

    def convert_em_volume(self, in_em_volume, out_binary_volume, working_dir):

        logging.debug(working_dir)
        rdb = RcsbDpUtility(tmpPath=working_dir,
                            siteId=self.__site_id,
                            verbose=True)
        rdb.imp(in_em_volume)
        rdb.op('em-density-bcif')
        rdb.exp(out_binary_volume)

        if out_binary_volume:
            if os.path.exists(out_binary_volume):
                return True
        return False
Beispiel #7
0
class TaxDbManager(object):
    """A class to manage updates to the various schemas"""
    def __init__(self, taxdbsize, maxsize, noop, useftp):
        self.__noop = noop
        self.__taxdbsize = taxdbsize
        self.__maxsize = maxsize
        self.__useftp = useftp
        self.__cI = ConfigInfo()

    def updatedb(self):
        mydb = MyConnectionBase()
        mydb.setResource(resourceName="STATUS")
        ok = mydb.openConnection()
        if not ok:
            print("ERROR: Could not open status db")
            return

        myq = MyDbQuery(dbcon=mydb._dbCon)
        query = "select count(ordinal) from taxonomy "

        rows = myq.selectRows(queryString=query)

        count = rows[0][0]

        mydb.closeConnection()

        if count >= self.__taxdbsize and count < self.__maxsize:
            print("Taxdb at least as big as expected")
            return

        if self.__useftp:
            command = "python -m wwpdb.apps.deposit.depui.taxonomy.loadTaxonomyFromFTP --write_sql"
        else:
            taxfile = self.__cI.get("TAXONOMY_FILE_NAME")
            if not taxfile:
                print("Could not find site-config TAXONOMY_FILE_NAME -- cannot load taxonomy")
                return

            command = "python -m wwpdb.apps.deposit.depui.taxonomy.loadData --input_csv {}".format(taxfile)
        self.__exec(command)


    def __exec(self, cmd):
        print(cmd)
        ret = 0
        if not self.__noop :
            ret = subprocess.call(cmd, shell=True)
        return ret
Beispiel #8
0
class ArchiveIoBase(object):
    """A base class for for archive data transfer operation utilities."""
    def __init__(self, *args, **kwargs):  # pylint: disable=unused-argument
        self._raiseExceptions = kwargs.get("raiseExceptions", False)
        self._siteId = kwargs.get("siteId", getSiteId())
        self._serverId = kwargs.get("serverId", None)

        self.__cI = ConfigInfo(siteId=getSiteId())
        #
        cD = self.__cI.get(self._serverId, {})
        self._hostName = cD.get("HOST_NAME", None)
        self._userName = cD.get("HOST_USERNAME", None)
        self._password = cD.get("HOST_PASSWORD", None)
        self._hostPort = int(cD.get("HOST_PORT", 22))
        self._protocol = cD.get("HOST_PROTOCOL", None)
        self._rootPath = cD.get("HOST_ROOT_PATH", None)
        self._keyFilePath = cD.get("HOST_KEY_FILE_PATH", None)
        self._keyFileType = cD.get("HOST_KEY_FILE_TYPE", None)
        #

    def connect(self, hostName, userName, **kwargs):
        raise NotImplementedError("To be implemented in subclass")

    def mkdir(self, path, **kwargs):
        raise NotImplementedError("To be implemented in subclass")

    def stat(self, path):
        raise NotImplementedError("To be implemented in subclass")

    def put(self, localPath, remotePath):
        raise NotImplementedError("To be implemented in subclass")

    def get(self, remotePath, localPath):
        raise NotImplementedError("To be implemented in subclass")

    def listdir(self, path):
        raise NotImplementedError("To be implemented in subclass")

    def rmdir(self, path):
        raise NotImplementedError("To be implemented in subclass")

    def remove(self, path):
        raise NotImplementedError("To be implemented in subclass")

    def close(self):
        raise NotImplementedError("To be implemented in subclass")
Beispiel #9
0
    def testParseCutoff(self):
        """Tests if common built in definitions are set"""
        cI = ConfigInfo()
        val = cI.get("PROJECT_VAL_REL_CUTOFF")

        self.assertEqual(len(val), 2)

        time_t = self._parseTime(val["start"])
        self.assertEqual(time_t.hour, 9)
        self.assertEqual(time_t.minute, 0)
        self.assertEqual(time_t.second, 0)
        self.assertEqual(time_t.isoweekday(), 5)

        time_t = self._parseTime(val["end"])
        self.assertEqual(time_t.hour, 23)
        self.assertEqual(time_t.minute, 59)
        self.assertEqual(time_t.second, 59)
        self.assertEqual(time_t.isoweekday(), 5)
Beispiel #10
0
    def __setResource(self, resource):
        """Loads resources for access"""

        cI = ConfigInfo(self.__siteId)
        if resource == "DA_INTERNAL":
            self.__dbServer = cI.get("SITE_DB_SERVER")
            self.__dbHost = cI.get("SITE_DB_HOST_NAME")
            self.__dbUser = cI.get("SITE_DB_USER_NAME")
            self.__dbPw = cI.get("SITE_DB_PASSWORD")
            self.__dbPort = str(cI.get("SITE_DB_PORT_NUMBER"))
            self.__dbSocket = cI.get("SITE_DB_SOCKET")
            self.__dbName = cI.get("SITE_DA_INTERNAL_DB_NAME")

        else:
            raise NameError("Unknown resource %s" % resource)
class MessageQueueConnection(object):
    def __init__(self):
        self._siteId = getSiteId(defaultSiteId=None)
        self._cI = ConfigInfo(self._siteId)

    def _getDefaultConnectionUrl(self):
        """ Provide the connection URL appropriate for the configured protocol..
        """
        rbmqClientProtocol = self._cI.get('SITE_RBMQ_CLIENT_PROTOCOL',
                                          default='')
        if 'SSL' in rbmqClientProtocol:
            return self._getSslConnectionUrl()
        else:
            return self._getConnectionUrl()

    def _getDefaultConnectionParameters(self):
        """ Provide the connection parameters appropriate for the configured protocol..
        """
        rbmqClientProtocol = self._cI.get('SITE_RBMQ_CLIENT_PROTOCOL',
                                          default='')
        if 'SSL' in rbmqClientProtocol:
            return self._getSslConnectionParameters()
        else:
            return self._getConnectionParameters()

    def _getSslConnectionParameters(self):
        pObj, url = self.__getSslConnectionParameters()
        return pObj

    def _getSslConnectionUrl(self):
        pObj, url = self.__getSslConnectionParameters()
        return url

    def __getSslConnectionParameters(self):
        """  Return connection parameter object for SSL client connection -
        """
        parameters = None
        rbmqUrl = None
        try:
            rbmqServerHost = self._cI.get('SITE_RBMQ_SERVER_HOST')
            rbmqServerPort = self._cI.get('SITE_RBMQ_SSL_SERVER_PORT')
            rbmqUser = self._cI.get('SITE_RBMQ_USER_NAME')
            rbmqPassword = self._cI.get('SITE_RBMQ_PASSWORD')
            rbmqVirtualHost = self._cI.get('SITE_RBMQ_VIRTUAL_HOST')
            clientSslCaCertFile = self._cI.get('SITE_RBMQ_SSL_CA_CERT_FILE')
            clientSslKeyFile = self._cI.get('SITE_RBMQ_SSL_KEY_FILE')
            clientSslCertFile = self._cI.get('SITE_RBMQ_SSL_CERT_FILE')
            ssl_opts = urlencode({
                "ssl_options": {
                    "ca_certs": clientSslCaCertFile,
                    "keyfile": clientSslKeyFile,
                    "certfile": clientSslCertFile
                }
            })
            rbmqUrl = "amqps://%s:%s@%s:%d/%s?%s" % (
                rbmqUser, rbmqPassword, rbmqServerHost, int(rbmqServerPort),
                rbmqVirtualHost, ssl_opts)
            logger.debug("rbmq URL: %s " % rbmqUrl)
            parameters = pika.URLParameters(rbmqUrl)
        except:
            logger.exception("Failing")

        return parameters, rbmqUrl
        #

    def _getConnectionParameters(self):
        """  Return connection parameters for the standard TCP client connection --
        """
        pObj, url = self.__getConnectionParameters()
        return pObj

    def _getConnectionUrl(self):
        """  Return connection parameters as a URL for the standard TCP client connection
        """
        pObj, url = self.__getConnectionParameters()
        return url

    def __getConnectionParameters(self):
        """  Return connection parameter object for client connection using basic authentication.
        """
        parameters = None
        try:
            rbmqServerHost = self._cI.get('SITE_RBMQ_SERVER_HOST')
            rbmqServerPort = self._cI.get('SITE_RBMQ_SERVER_PORT')
            rbmqUser = self._cI.get('SITE_RBMQ_USER_NAME')
            rbmqPassword = self._cI.get('SITE_RBMQ_PASSWORD')
            rbmqVirtualHost = self._cI.get('SITE_RBMQ_VIRTUAL_HOST')

            credentials = pika.PlainCredentials(rbmqUser, rbmqPassword)
            parameters = pika.ConnectionParameters(
                host=rbmqServerHost,
                port=int(rbmqServerPort),
                virtual_host=rbmqVirtualHost,
                credentials=credentials)
            rbmqUrl = "amqp://%s:%s@%s:%d/%s" % (
                rbmqUser, rbmqPassword, rbmqServerHost, int(rbmqServerPort),
                rbmqVirtualHost)
            logger.debug("rbmq URL: %s " % rbmqUrl)
        except:
            logger.exception("Failing")

        return parameters, rbmqUrl
Beispiel #12
0
class LocalFTPPathInfo(object):
    def __init__(self, siteId=None):
        self.__siteId = siteId
        self.__cI = ConfigInfo(siteId=self.__siteId)

        self.ftp_pdb_root = self.__cI.get("SITE_PDB_FTP_ROOT_DIR")
        self.ftp_emdb_root = self.__cI.get("SITE_EMDB_FTP_ROOT_DIR")
        self.__mapping = {
            "model": "mmCIF",
            "structure_factors": "structure_factors",
            "chemical_shifts": "nmr_chemical_shifts",
            "nmr_data": "nmr_data",
        }

    def __get_mapping(self, file_type):
        return self.__mapping.get(file_type)

    def set_ftp_pdb_root(self, ftp_pdb_root):
        if ftp_pdb_root:
            self.ftp_pdb_root = ftp_pdb_root

    def set_ftp_emdb_root(self, ftp_emdb_root):
        if ftp_emdb_root is not None:
            self.ftp_emdb_root = ftp_emdb_root

    def get_ftp_pdb(self):
        if self.ftp_pdb_root:
            return os.path.join(self.ftp_pdb_root, "pdb", "data", "structures",
                                "all")
        return ""

    def get_ftp_emdb(self):
        if self.ftp_emdb_root:
            return os.path.join(self.ftp_emdb_root, "emdb", "structures")
        return ""

    def get_model_path(self):
        return os.path.join(self.get_ftp_pdb(), self.__get_mapping("model"))

    def get_sf_path(self):
        return os.path.join(self.get_ftp_pdb(),
                            self.__get_mapping("structure_factors"))

    def get_cs_path(self):
        return os.path.join(self.get_ftp_pdb(),
                            self.__get_mapping("chemical_shifts"))

    def get_nmr_data_path(self):
        return os.path.join(self.get_ftp_pdb(), self.__get_mapping("nmr_data"))

    def get_model_fname(self, accession):
        model_file_name = ReleaseFileNames().get_model(accession=accession,
                                                       for_release=False)
        return os.path.join(self.get_model_path(), model_file_name)

    def get_structure_factors_fname(self, accession):
        sf_file_name = ReleaseFileNames().get_structure_factor(
            accession=accession, for_release=False)
        return os.path.join(self.get_sf_path(), sf_file_name)

    def get_chemical_shifts_fname(self, accession):
        cs_file_name = ReleaseFileNames().get_chemical_shifts(
            accession=accession, for_release=False)
        return os.path.join(self.get_cs_path(), cs_file_name)

    def get_nmr_data_fname(self, accession):
        nmr_data_file_name = ReleaseFileNames().get_nmr_data(
            accession=accession, for_release=False)
        return os.path.join(self.get_nmr_data_path(), nmr_data_file_name)
Beispiel #13
0
class BuildTools(object):
    def __init__(self, config_file, noop, build_version='v-5200'):
        self.__configfile = config_file
        self.__noop = noop
        self.__build_version = build_version

        # Infer topdir from where running from
        topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
        cdict = {'topdir': topdir}

        self.__cparser = ConfigParser(cdict)
        self.__cparser.read(self.__configfile)
        self.__ci = ConfigInfo()
        pass

    def __exec(self, cmd, overridenoop=False):
        print(cmd)
        ret = 0
        if not self.__noop or overridenoop:
            ret = subprocess.call(cmd, shell=True)
        return ret

    def build(self):
        try:
            packages = self.__cparser.get('DEFAULT', 'buildupdates')
        except:
            # Missing stanza - exit
            return

        packs = packages.split(' ')
        for pack in packs:
            p = pack.strip()
            if p:
                pbuild = "pkg_build_{}".format(p)
                print("About to build %s" % pbuild)
                self.run_build_command(pbuild=pbuild)

    def run_build_command(self, pbuild):
        onedep_build_dir = self.__ci.get('WWPDB_ONEDEP_BUILD')
        onedep_build_dir_version = os.path.join(onedep_build_dir,
                                                self.__build_version)

        cmd = ['#!/bin/bash']

        # set the environment and ensure we are up to date
        cmd.append('cd {}'.format(onedep_build_dir))
        cmd.append('git pull')
        cmd.append('. {}/utils/pkg-utils-v2.sh'.format(onedep_build_dir))
        cmd.append('get_environment')
        cmd.append('export FORCE_REBUILD="YES"')
        cmd.append(
            '. {}/packages/all-packages.sh'.format(onedep_build_dir_version))

        # clear out the existing distrib dir so files are re-fetched
        cmd.append('if [ -z "$DISTRIB_DIR" ]')
        cmd.append('then')
        cmd.append('echo "DISTRIB_DIR not defined - exiting"')
        cmd.append('exit 1')
        cmd.append('else')
        cmd.append(
            'echo "remove files from distrib_dir so that they are rebuilt"')
        cmd.append('rm -rf ${DISTRIB_DIR}/*')
        cmd.append('fi')

        # append the actual command
        cmd.append(pbuild)

        # write everything to a temp file
        working_dir = tempfile.mkdtemp()
        temp_file = os.path.join(working_dir, 'cmd.sh')
        print('writing out commands to: {}'.format(temp_file))

        with open(temp_file, 'w') as outFile:
            outFile.write('\n'.join(cmd))

        print('commands to run')
        print('\n'.join(cmd))

        # run the temp file
        cmd_string = 'chmod +x {0}; {0}; rm -rf {1}'.format(
            temp_file, working_dir)
        return self.__exec(cmd_string)
Beispiel #14
0
 def testMock(self):
     cI = ConfigInfo()
     self.assertEqual(cI.get("DEPLOY_PATH"),
                      os.path.join(rwMockTopPath, "da_top"))
Beispiel #15
0
def main():
    # adding a conservative permission mask for this
    # os.umask(0o022)
    #
    siteId = getSiteId(defaultSiteId=None)
    cI = ConfigInfo(siteId)

    #    topPath = cI.get('SITE_WEB_APPS_TOP_PATH')
    topSessionPath = cI.get("SITE_WEB_APPS_TOP_SESSIONS_PATH")

    #
    myFullHostName = platform.uname()[1]
    myHostName = str(myFullHostName.split(".")[0]).lower()
    #
    wsLogDirPath = os.path.join(topSessionPath, "ws-logs")

    #  Setup logging  --
    now = time.strftime("%Y-%m-%d", time.localtime())

    usage = "usage: %prog [options]"
    parser = OptionParser(usage)
    parser.add_option("--start",
                      default=False,
                      action="store_true",
                      dest="startOp",
                      help="Start consumer client process")
    parser.add_option("--stop",
                      default=False,
                      action="store_true",
                      dest="stopOp",
                      help="Stop consumer client process")
    parser.add_option("--restart",
                      default=False,
                      action="store_true",
                      dest="restartOp",
                      help="Restart consumer client process")
    parser.add_option("--status",
                      default=False,
                      action="store_true",
                      dest="statusOp",
                      help="Report consumer client process status")

    # parser.add_option("-v", "--verbose", default=False, action="store_true", dest="verbose", help="Enable verbose output")
    parser.add_option("--debug",
                      default=1,
                      type="int",
                      dest="debugLevel",
                      help="Debug level (default=1) [0-3]")
    parser.add_option("--instance",
                      default=1,
                      type="int",
                      dest="instanceNo",
                      help="Instance number [1-n]")
    #
    (options, _args) = parser.parse_args()
    #
    pidFilePath = os.path.join(
        wsLogDirPath, myHostName + "_" + str(options.instanceNo) + ".pid")
    stdoutFilePath = os.path.join(
        wsLogDirPath,
        myHostName + "_" + str(options.instanceNo) + "_stdout.log")
    stderrFilePath = os.path.join(
        wsLogDirPath,
        myHostName + "_" + str(options.instanceNo) + "_stderr.log")
    wfLogFilePath = os.path.join(
        wsLogDirPath,
        myHostName + "_" + str(options.instanceNo) + "_" + now + ".log")
    #
    logger = logging.getLogger(name="root")  # pylint: disable=redefined-outer-name
    logging.captureWarnings(True)
    formatter = logging.Formatter(
        "%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
    handler = logging.FileHandler(wfLogFilePath)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    #
    lt = time.strftime("%Y %m %d %H:%M:%S", time.localtime())
    #
    if options.debugLevel > 2:
        logger.setLevel(logging.DEBUG)
    elif options.debugLevel > 0:
        logger.setLevel(logging.INFO)
    else:
        logger.setLevel(logging.ERROR)
    #
    #
    myDP = MyDetachedProcess(pidFile=pidFilePath,
                             stdout=stdoutFilePath,
                             stderr=stderrFilePath,
                             wrkDir=wsLogDirPath)

    if options.startOp:
        sys.stdout.write(
            "+DetachedMessageConsumer() starting consumer service at %s\n" %
            lt)
        logger.info(
            "DetachedMessageConsumer() starting consumer service at %s", lt)
        myDP.start()
    elif options.stopOp:
        sys.stdout.write(
            "+DetachedMessageConsumer() stopping consumer service at %s\n" %
            lt)
        logger.info(
            "DetachedMessageConsumer() stopping consumer service at %s", lt)
        myDP.stop()
    elif options.restartOp:
        sys.stdout.write(
            "+DetachedMessageConsumer() restarting consumer service at %s\n" %
            lt)
        logger.info(
            "DetachedMessageConsumer() restarting consumer service at %s", lt)
        myDP.restart()
    elif options.statusOp:
        sys.stdout.write(
            "+DetachedMessageConsumer() reporting status for consumer service at %s\n"
            % lt)
        sys.stdout.write(myDP.status())
    else:
        pass
Beispiel #16
0
class ArchiveIoSftpTests(unittest.TestCase):
    def setUp(self):
        self.__lfh = sys.stderr
        self.__verbose = False
        #
        self.__serverId = "BACKUP_SERVER_RDI2"
        self.__cI = ConfigInfo(siteId=getSiteId(),
                               verbose=self.__verbose,
                               log=self.__lfh)
        cD = self.__cI.get(self.__serverId, {})
        self.__hostName = cD.get("HOST_NAME")
        self.__userName = cD.get("HOST_USERNAME")
        self.__hostPort = int(cD.get("HOST_PORT"))
        # self.__protocol = cD.get("HOST_PROTOCOL")
        self.__rootPath = cD.get("HOST_ROOT_PATH")
        self.__keyFilePath = cD.get("HOST_KEY_FILE_PATH")
        self.__keyFileType = cD.get("HOST_KEY_FILE_TYPE")
        #
        self.__testLocalFilePath = "./data/TEST-FILE.DAT"
        self.__testLocalOutputFilePath = "./JUNK.JUNK"
        #
        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)\n", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testSftpConnect(self):
        """Test case - for connection-"""

        try:
            aio = ArchiveIoSftp()
            ok = aio.connect(self.__hostName,
                             self.__userName,
                             self.__hostPort,
                             keyFilePath=self.__keyFilePath,
                             keyFileType=self.__keyFileType)
            aio.close()
            self.assertEqual(ok, True)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    #

    def testSftpStatOps(self):
        """Test case -  get directory list and stat details-"""
        try:
            aio = ArchiveIoSftp(serverId=self.__serverId)
            ok = aio.connectToServer()
            result = aio.listdir(".")
            logger.info("listdir: %r", result)
            result = aio.stat(".")
            logger.info("stat: %r", result)
            ok = aio.close()
            self.assertEqual(ok, True)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testSftpDirOps(self):
        """Test case -  create and remove directory -"""
        try:
            aio = ArchiveIoSftp(serverId=self.__serverId)
            ok = aio.connectToServer()
            testPath = os.path.join(self.__rootPath, "test")
            ok = aio.mkdir(testPath)
            result = aio.listdir(self.__rootPath)
            logger.debug("listdir: %r", result)
            result = aio.stat(testPath)
            logger.info("stat good: %r", result)
            ok = aio.rmdir(testPath)
            result = aio.listdir(self.__rootPath)
            logger.debug("listdir after remove: %r", result)
            #
            testPathBad = os.path.join(self.__rootPath, "test_bad")
            result = aio.listdir(testPathBad)
            logger.debug("listdir bad : %r", result)
            result = aio.stat(testPathBad)
            logger.info("bad stat: %r", result)

            ok = aio.close()
            self.assertEqual(ok, True)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testSftpTransferOps(self):
        """Test case -  transfer and remove files and directories -"""
        try:
            aio = ArchiveIoSftp(serverId=self.__serverId)
            ok = aio.connectToServer()
            testDirPath = os.path.join(self.__rootPath, "test")
            testFilePath1 = os.path.join(testDirPath, "TEST-FILE-1.DAT")
            testFilePath2 = os.path.join(testDirPath, "TEST-FILE-2.DAT")
            ok = aio.mkdir(testDirPath)
            ok = aio.put(self.__testLocalFilePath, testFilePath1)
            ok = aio.put(self.__testLocalFilePath, testFilePath2)
            #
            aio.get(testFilePath1, self.__testLocalOutputFilePath)
            aio.get(testFilePath2, self.__testLocalOutputFilePath)
            #
            result = aio.listdir(testDirPath)
            logger.debug("listdir: %r", result)
            ok = aio.remove(testFilePath1)
            ok = aio.remove(testFilePath2)
            #
            result = aio.listdir(testDirPath)
            logger.debug("listdir: %r", result)
            #
            ok = aio.rmdir(testDirPath)
            result = aio.listdir(self.__rootPath)
            logger.debug("listdir after remove: %r", result)
            ok = aio.close()
            self.assertEqual(ok, True)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
class ConfigInfoAppBase(object):
    """Base class to provide common application lookups"""
    def __init__(self, siteId=None, verbose=True, log=sys.stderr):
        self._cI = ConfigInfo(siteId=siteId, verbose=verbose, log=log)
        self._resourcedir = None
        self._rwresourcedir = None
        self._referencedir = None
        self._site_archive_dir = None
        self._site_local_apps_path = None
        self._top_webapps_path = None
        self._top_sessions_path = None

    def _getlegacy(self, key, default=None, stacklevel=4):
        """Retrieves key from configuration.  If key is found, provide a warning"""
        val = self._cI.get(key)
        if val is not None:
            # logging will repeat with each occurance
            self.__warndeprecated(
                "Access key %s has been used but is deprecated" % key,
                stacklevel=stacklevel)
        else:
            val = default
        return val

    def _getValue(self, key, default=None):
        val = self._cI.get(key)
        if val is None:
            val = default
        return val

    def _getresourcedir(self):
        if self._resourcedir is None:
            self._resourcedir = self._cI.get("RO_RESOURCE_PATH")
        return self._resourcedir

    def _getrwresourcedir(self):
        """Returns the RW resource directory if set in site-config"""
        if self._rwresourcedir is None:
            self._rwresourcedir = self._cI.get("RW_RESOURCE_PATH")
        return self._rwresourcedir

    def _getreferencedir(self):
        if self._referencedir is None:
            self._referencedir = self._cI.get("REFERENCE_PATH")
        return self._referencedir

    def _get_site_archive_dir(self):
        if self._site_archive_dir is None:
            self._site_archive_dir = self._cI.get("SITE_ARCHIVE_STORAGE_PATH")
        return self._site_archive_dir

    def _get_site_local_apps(self):
        if self._site_local_apps_path is None:
            self._site_local_apps_path = self._cI.get("SITE_LOCAL_APPS_PATH")
        return self._site_local_apps_path

    def _get_top_web_apps_top_path(self):
        if self._top_webapps_path is None:
            self._top_webapps_path = self._cI.get("SITE_WEB_APPS_TOP_PATH")
        return self._top_webapps_path

    def _get_top_sessions_path(self):
        if self._top_sessions_path is None:
            self._top_sessions_path = self._cI.get(
                "SITE_WEB_APPS_TOP_SESSIONS_PATH")
        return self._top_sessions_path

    def get_site_packages_path(self):
        return self._getlegacy(
            "SITE_PACKAGES_PATH",
            os.path.join(self._get_site_local_apps(), "packages"))

    def __warndeprecated(self, msg, stacklevel=4):
        """Logs warning message"""
        # stacklevel is to get up high enough to get caller
        warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
class FileUtils(FileUtilsBase):
    """
    Manage the presentation of project files for download.

    """

    def __init__(self, entryId, reqObj=None, verbose=False, log=sys.stderr):
        self.__verbose = verbose
        self.__lfh = log
        self.__reqObj = reqObj
        # Reassign siteId for the following special case --
        self.__entryId = entryId
        siteId = self.__reqObj.getValue("WWPDB_SITE_ID")
        # This is for viewing the entries from the standalone validation server from annotation --
        if siteId in ["WWPDB_DEPLOY_PRODUCTION_RU", "WWPDB_DEPLOY_VALSRV_RU", "WWPDB_DEPLOY_TEST", "WWPDB_DEPLOY_INTERNAL_RU"] and entryId.startswith("D_90"):
            siteId = "WWPDB_DEPLOY_VALSRV_RU"
        #
        # Get inventory of file types
        super(FileUtils, self).__init__()
        #
        self.__setup(siteId=siteId)

    def __setup(self, siteId=None):
        if siteId is not None:
            self.__siteId = siteId
        else:
            self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID")
        #
        self.__lfh.write("+FileUtils.__setup() starting with entryId %r adjusted WWPDB_SITE_ID %r\n" % (self.__entryId, self.__siteId))
        #
        self.__sObj = self.__reqObj.getSessionObj()
        self.__sessionId = self.__sObj.getId()
        self.__sessionPath = self.__sObj.getPath()
        self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh)
        self.__cI = ConfigInfo(self.__siteId)
        self.__msL = self.__cI.get("CONTENT_MILESTONE_LIST")
        #

    def renderFileList(self, fileSource="archive", rDList=None, titlePrefix="", titleSuffix="", displayImageFlag=False):
        """"""
        if rDList is None:
            rDList = self._rDList

        htmlList = []
        nTot = 0
        if fileSource in ["archive", "deposit", "wf-archive"]:
            for ky in rDList:
                if ky not in self._rD:
                    continue
                ctList = self._rD[ky]
                title = titlePrefix + ky + titleSuffix
                fList = []
                fList.extend(ctList)
                for ct in ctList:
                    for ms in self.__msL:
                        mt = ct + "-" + ms
                        fList.append(mt)
                nF, oL = self.__renderContentTypeFileList(
                    self.__entryId, fileSource=fileSource, wfInstanceId=None, contentTypeList=fList, title=title, displayImageFlag=displayImageFlag
                )
                if nF > 0:
                    htmlList.extend(oL)
                    nTot += nF

        if fileSource in ["archive", "wf-archive"]:
            nF, oL = self.__renderLogFileList(self.__entryId, fileSource="archive", title="Archive Log Files")
            if nF > 0:
                htmlList.extend(oL)
                nTot += nF

        if fileSource in ["deposit"]:
            nF, oL = self.__renderLogFileList(self.__entryId, fileSource="deposit", title="Deposit Log Files")
            if nF > 0:
                htmlList.extend(oL)
                nTot += nF

        #
        if fileSource in ["wf-instance", "instance"]:
            iTopPath = self.__pI.getInstanceTopPath(self.__entryId)
            fPattern = os.path.join(iTopPath, "*")
            wfInstancePathList = filter(os.path.isdir, glob.glob(fPattern))
            for wfInstancePath in wfInstancePathList:
                (_pth, wfInstId) = os.path.split(wfInstancePath)
                title = "Files in " + wfInstId
                nF, oL = self.__renderWfInstanceFileList(self.__entryId, wfInstancePath, title=title)
                if nF > 0:
                    htmlList.extend(oL)
                    nTot += nF
        #
        return nTot, htmlList

    def __renderContentTypeFileList(self, entryId, fileSource="archive", wfInstanceId=None, contentTypeList=None, title=None, displayImageFlag=False):
        if contentTypeList is None:
            contentTypeList = ["model"]
        if self.__verbose:
            self.__lfh.write(
                "+FileUtils.renderContentTypeFileList() entryId %r fileSource %r wfInstanceId %r contentTypeList %r \n" % (entryId, fileSource, wfInstanceId, contentTypeList)
            )
        de = DataExchange(
            reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=wfInstanceId, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh
        )
        tupL = de.getContentTypeFileList(fileSource=fileSource, contentTypeList=contentTypeList)
        #
        rTupL = []
        for tup in tupL:
            href, fN = self.__makeDownloadHref(tup[0])
            if tup[2] > 1:
                sz = "%d" % int(tup[2])
            else:
                sz = "%.3f" % tup[2]
            rTup = (href, tup[1], sz)
            rTupL.append(rTup)
            if displayImageFlag and fN.startswith(entryId + "_img-emdb"):
                imgFile = os.path.join(self.__sessionPath, fN)
                if os.access(imgFile, os.F_OK):
                    os.remove(imgFile)
                #
                os.symlink(tup[0], imgFile)
                imgHtml = '<img src="/sessions/' + self.__sessionId + "/" + fN + '" border="0" alt="Image" width="400" height="400">'
                rTupL.append(("displayImage", imgHtml, ""))
            #
        #
        if title is None:
            cS = ",".join(contentTypeList)
            title = "File Source %s (%s)" % (fileSource, cS)
        nF, htmlList = self.__renderFileList(rTupL, title)

        return nF, htmlList

    def __renderWfInstanceFileList(self, entryId, wfPath, title=None):
        if self.__verbose:
            self.__lfh.write("+FileUtils.renderWfInstanceFileList() wfPath %s\n" % wfPath)

        wfPattern = os.path.join(wfPath, "*")
        de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=None, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh)
        tupL = de.getMiscFileList(fPatternList=[wfPattern], sortFlag=True)
        #
        rTupL = []
        for tup in tupL:
            href, _fN = self.__makeDownloadHref(tup[0])
            if tup[2] > 1:
                sz = "%d" % int(tup[2])
            else:
                sz = "%.3f" % tup[2]
            rTup = (href, tup[1], sz)
            rTupL.append(rTup)

        if title is None:
            title = "Workflow instance files for %s" % entryId
        nF, htmlList = self.__renderFileList(rTupL, title)

        return nF, htmlList

    def __renderLogFileList(self, entryId, fileSource="archive", title=None):
        if self.__verbose:
            self.__lfh.write("+FileUtils.renderLogFileList() entryId %r fileSource %r\n" % (entryId, fileSource))
        de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh)
        tupL = de.getLogFileList(entryId, fileSource=fileSource)
        #
        rTupL = []
        for tup in tupL:
            href, _fN = self.__makeDownloadHref(tup[0])
            if tup[2] > 1:
                sz = "%d" % int(tup[2])
            else:
                sz = "%.3f" % tup[2]
            rTup = (href, tup[1], sz)
            rTupL.append(rTup)

        if title is None:
            title = "Log Files in Source %s" % fileSource
        nF, htmlList = self.__renderFileList(rTupL, title)

        return nF, htmlList

    def __renderFileList(self, fileTupleList, title, embeddedTitle=True):
        #
        oL = []
        if len(fileTupleList) > 0:
            if embeddedTitle:
                oL.append('<table class="table table-bordered table-striped table-condensed">')
                oL.append('<tr><th class="width50">%s</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>' % title)
            else:
                oL.append("<h4>%s</h4>" % title)
                oL.append('<table class="table table-bordered table-striped table-condensed">')
                oL.append('<tr><th class="width50">Files</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>')
            for tup in fileTupleList:
                oL.append("<tr>")
                if tup[0] == "displayImage":
                    oL.append('<td align="center" colspan="3">%s</td>' % tup[1])
                else:
                    oL.append("<td>%s</td>" % tup[0])
                    oL.append("<td>%s</td>" % tup[1])
                    oL.append("<td>%s</td>" % tup[2])
                #
                oL.append("</tr>")
            #
            oL.append("</table>")
        #
        return len(fileTupleList), oL

    def __makeDownloadHref(self, filePath):
        _dP, fN = os.path.split(filePath)
        tS = "/service/review_v2/download_file?sessionid=" + self.__sessionId + "&file_path=" + filePath
        href = "<a class='my-file-downloadable' href='" + tS + "'>" + fN + "</a>"
        return href, fN
class DataMaintenanceTests(unittest.TestCase):
    def setUp(self):
        self.__lfh = sys.stderr
        self.__verbose = True
        # Get siteId from the environment -
        self.__siteId = getSiteId()
        self.__cI = ConfigInfo(siteId=self.__siteId)
        # In test mode, no deletions are performed -
        self.__testMode = True
        #
        #  An data set ID list must be set --
        self.__idListPath = "RELEASED.LIST"
        #
        self.__milestoneL = []
        self.__milestoneL.append(None)
        self.__milestoneL.extend(self.__cI.get("CONTENT_MILESTONE_LIST"))
        self.__cTBD = self.__cI.get("CONTENT_TYPE_BASE_DICTIONARY")
        self.__cTD = self.__cI.get("CONTENT_TYPE_DICTIONARY")
        self.__cTL = sorted(self.__cTBD.keys())
        # Example list of candidate content types for purging  -- this is based on system V15x for X-ray content types
        self.__cTypesOtherL = [
            "assembly-assign",
            "assembly-model",
            "assembly-model-xyz",
            "assembly-report",
            "chem-comp-assign",
            "chem-comp-assign-details",
            "chem-comp-assign-final",
            "chem-comp-depositor-info",
            "chem-comp-link",
            "component-image",
            "dcc-report",
            "dict-check-report",
            "dict-check-report-r4",
            "format-check-report",
            "geometry-check-report",
            "merge-xyz-report",
            "misc-check-report",
            "notes-from-annotator",
            "polymer-linkage-distances",
            "polymer-linkage-report",
            "secondary-structure-topology",
            "seq-align-data",
            "seq-assign",
            "seq-data-stats",
            "seqdb-match",
            "sequence-fasta",
            "sf-convert-report",
            "site-assign",
            "special-position-report",
            "validation-data",
            "validation-report",
            "validation-report-depositor",
            "validation-report-full",
            "validation-report-slider",
        ]
        #
        # Test snapshot directory required for recovery tests -
        self.__snapShotPath = "/net/wwpdb_da_data_archive/.snapshot/nightly.1/data"

    def tearDown(self):
        pass

    def __getIdList(self, fPath):
        if not os.access(fPath, os.R_OK):
            self.__lfh.write("__getIdList() Missing data set ID list file %s\n" % fPath)
            self.fail()
        #
        ifh = open(fPath, "r")
        fL = []
        #  D_10 00 00 00 01
        for line in ifh:
            tId = line[:-1]
            if len(tId) == 12 and tId.startswith("D_"):
                fL.append(tId)
        ifh.close()
        return fL

    def __getRecoveryInfo(self, purgeType="exp"):
        """Return the list of tuple describing content type and milestones to be recovered.

        return [{fileSource,contentType,formatType,mileStone,purgeType},]
        """
        rL = []
        if purgeType in ["exp"]:
            for ct in ["model"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["pdbx", "pdb"]:
                        for milestone in self.__milestoneL:
                            if milestone in ["release", "annotate", "review"]:
                                rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
            for ct in ["structure-factors"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["pdbx", "mtz"]:
                        for milestone in self.__milestoneL:
                            if milestone in ["release", "annotate", "review"]:
                                rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
        elif purgeType in ["other", "report"]:
            for ct in self.__cTypesOtherL:
                if ct not in ["validation-report", "validation-data", "validation-report-full"]:
                    continue
                for fs in ["archive", "deposit"]:
                    for fm in self.__cTD[ct][0]:
                        for milestone in self.__milestoneL:
                            if milestone in ["release", "annotate", "review"]:
                                rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "other"})
        return rL

    def testRecoverProductionList(self):
        """Test case for selected recovery of selected content types and milestone files from snapshot directory"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        try:
            idList = self.__getIdList(self.__idListPath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                recL = []
                for pType in ["exp", "other"]:
                    pTL = self.__getRecoveryInfo(purgeType=pType)
                    for pT in pTL:
                        vfL = dm.getVersionFileListSnapshot(
                            basePath=self.__snapShotPath,
                            dataSetId=id,
                            wfInstanceId=None,
                            fileSource=pT["fileSource"],
                            contentType=pT["contentType"],
                            formatType=pT["formatType"],
                            partitionNumber="1",
                            mileStone=pT["mileStone"],
                        )

                        self.__lfh.write("\n+testRecoverProductionList - id %13s cType %s\n" % (id, pT["contentType"]))
                        for ii, p in enumerate(vfL):
                            self.__lfh.write("+testRecoverProductionList- %4d  pair - %r\n" % (ii, p))
                        recL.extend(vfL)

                if len(recL) > 0:
                    for ii, p in enumerate(recL):
                        self.__lfh.write("+testRecoverProductionList- %4d  pairL - %r\n" % (ii, p))
                        shutil.copyfile(p[0], p[1])

        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()

    def __getPurgeInfo(self, purgeType="exp"):
        """Return a list of tuples describing content types and milestone data files to be purged -

        return [{fileSource,contentType,formatType,mileStone,purgeType},]
        """
        rL = []
        if purgeType in ["exp"]:
            for ct in ["model"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["pdbx", "pdb"]:
                        for milestone in self.__milestoneL:
                            rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
            for ct in ["structure-factors", "em-structure-factors"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["pdbx", "mtz"]:
                        for milestone in self.__milestoneL:
                            rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
            for ct in ["nmr-chemical-shifts", "nmr-chemical-shifts-raw", "nmr-chemical-shifts-auth"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["pdbx", "nmr-star"]:
                        for milestone in self.__milestoneL:
                            rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
            for ct in ["em-volume", "em-mask-volume", "em-additional-volume"]:
                for fs in ["archive", "deposit"]:
                    for fm in ["map", "ccp4", "mrc2000"]:
                        for milestone in self.__milestoneL:
                            rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"})
        elif purgeType in ["other", "report"]:
            for ct in self.__cTypesOtherL:
                for fs in ["archive", "deposit"]:
                    for fm in self.__cTD[ct][0]:
                        for milestone in self.__milestoneL:
                            rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "other"})

        return rL

    def __removePathList(self, pthList):
        #
        for pth in pthList:
            try:
                if self.__testMode:
                    self.__lfh.write("__removePathList() TEST MODE skip removing path %s\n" % pth)
                else:
                    os.remove(pth)
            except:
                pass

    def testCreatePurgeProductionList(self):
        """Test case for generating canditate files for purge -"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        try:
            idList = self.__getIdList(self.__idListPath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                rmLL = []
                for pType in ["exp", "other"]:
                    pTL = self.__getPurgeInfo(purgeType=pType)
                    for pT in pTL:
                        latest, rmL, gzL = dm.getPurgeCandidates(
                            id,
                            wfInstanceId=None,
                            fileSource=pT["fileSource"],
                            contentType=pT["contentType"],
                            formatType=pT["formatType"],
                            partitionNumber="1",
                            mileStone=pT["mileStone"],
                            purgeType=pT["purgeType"],
                        )
                        if latest is None:
                            continue
                        self.__lfh.write("\n+testPurgeCandidatesList - id %13s cType %s LATEST version %s\n" % (id, pT["contentType"], latest))
                        for ii, p in enumerate(rmL):
                            self.__lfh.write("+testPurgeCandidateList- %4d  rm - %r\n" % (ii, p))
                        for ii, p in enumerate(gzL):
                            self.__lfh.write("+testPurgeCandidateList- %4d  gz - %r\n" % (ii, p))
                        if len(rmL) > 0:
                            rmLL.extend(rmL)

                rmLL.extend(dm.getLogFiles(id, fileSource="deposit"))
                rmLL.extend(dm.getLogFiles(id, fileSource="archive"))
                if len(rmLL) > 0:
                    for ii, p in enumerate(rmLL):
                        self.__lfh.write("+testPurgeCandidateList- %4d  rmLL - %r\n" % (ii, p))

        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()

    def testPurgeProductionList(self):
        """Preliminary version of purge operations post release -"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        try:

            idList = self.__getIdList(self.__idListPath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                rmLL = []
                for pType in ["exp", "other"]:
                    pTL = self.__getPurgeInfo(purgeType=pType)
                    for pT in pTL:
                        latest, rmL, gzL = dm.getPurgeCandidates(
                            id,
                            wfInstanceId=None,
                            fileSource=pT["fileSource"],
                            contentType=pT["contentType"],
                            formatType=pT["formatType"],
                            partitionNumber="1",
                            mileStone=pT["mileStone"],
                            purgeType=pT["purgeType"],
                        )
                        if latest is None:
                            continue
                        self.__lfh.write("\n+testPurgeCandidatesList - id %13s cType %s LATEST version %s\n" % (id, pT["contentType"], latest))
                        for ii, p in enumerate(rmL):
                            self.__lfh.write("+testPurgeCandidateList- %4d  rm - %r\n" % (ii, p))
                        for ii, p in enumerate(gzL):
                            self.__lfh.write("+testPurgeCandidateList- %4d  gz - %r\n" % (ii, p))
                        if len(rmL) > 0:
                            rmLL.extend(rmL)

                rmLL.extend(dm.getLogFiles(id, fileSource="deposit"))
                rmLL.extend(dm.getLogFiles(id, fileSource="archive"))
                if len(rmLL) > 0:
                    for ii, p in enumerate(rmLL):
                        self.__lfh.write("+testPurgeCandidateList- %4d  rmLL - %r\n" % (ii, p))
                    self.__removePathList(rmLL)

        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()

    def testPurgeCandidatesList(self):
        """"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        try:
            idList = self.__getIdList(self.__idListPath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                latest, rmL, gzL = dm.getPurgeCandidates(id, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None)
                self.__lfh.write("\n\n+testPurgeCandidatesList - id %s LATEST version %s\n" % (id, latest))
                for ii, p in enumerate(rmL):
                    self.__lfh.write("+testPurgeCandidateList- %r  rm - %r\n" % (ii, p))
                for ii, p in enumerate(gzL):
                    self.__lfh.write("+testPurgeCandidateList- %r  gz - %r\n" % (ii, p))
            self.__lfh.write("%s\n" % "\n".join(self.__cTL))

        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()

    def testVersionList(self):
        """"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        try:
            idList = self.__getIdList(self.__idListPath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                pL = dm.getVersionFileList(id, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None)
                self.__lfh.write("\n\n+testVersionList- id %s file list\n" % (id))
                for ii, p in enumerate(pL):
                    self.__lfh.write("+testVersionList- %r  %r\n" % (ii, p))
        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()

    def __makeEntryPathList(self, archivePath):
        """Return the list of entries in the archive directory names and paths -"""
        pathList = []
        dataList = []
        for root, dirs, files in scandir.walk(archivePath, topdown=False):
            for dir in dirs:
                if dir.startswith("D_") and len(dir) == 12:
                    pathList.append(os.path.join(root, dir))
                    dataList.append(dir)
        return dataList, pathList

    def __splitFilePath(self, pth):
        id = None
        contentType = None
        fileFormat = None
        partNo = None
        versionNo = None
        try:
            dn, fn = os.path.split(pth)
            fFields = fn.split(".")
            fileName = fFields[0]
            fileFormat = fFields[1]
            if len(fFields) > 2:
                versionNo = int(fFields[2][1:])
            else:
                versionNo = int(0)

            fParts = fileName.split("_")
            if len(fParts) == 4:
                id = fParts[0] + "_" + fParts[1]
                contentType = fParts[2]
                partNo = int(fParts[3][1:])
            else:
                if len(fParts) > 2:
                    id = fParts[0] + "_" + fParts[1]
                else:
                    id = fileName
                if len(fParts) > 3:
                    contentType = fParts[2]
                else:
                    contentType = None
                partNo = None
            return id, contentType, fileFormat, partNo, versionNo
        except:
            traceback.print_exc(file=sys.stdout)
        return id, contentType, fileFormat, partNo, versionNo

    def testGetFileInventory(self):
        """"""
        self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
        archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
        try:
            idList, pathList = self.__makeEntryPathList(archivePath)
            dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh)
            for id in idList:
                dirPath = os.path.join(archivePath, "archive", id, "*")
                self.__lfh.write("+testGetFileInventoryList- inventory in directory  %s\n" % (dirPath))
                pL = dm.getMiscFileList(fPatternList=[dirPath], sortFlag=True)
                self.__lfh.write("\n\n+testGetFileInventoryList- id %s file list\n" % (id))
                for ii, p in enumerate(pL):
                    tup0 = self.__splitFilePath(p[0])
                    retR = [t for t in tup0]
                    retR.append(p[1])
                    retR.append(p[2])
                    self.__lfh.write("+testGetFileInventoryList- %r  %r\n" % (ii, retR))
        except:
            traceback.print_exc(file=sys.stdout)
            self.fail()
Beispiel #20
0
class DataExchange(object):

    """
    Implements common data exchange operations
    including: moving annotation data files between session
    and workflow storage, accessing files in workflow directories,
    and routine file maintenance operations.

    """

    def __init__(self, reqObj=None, depDataSetId=None, wfInstanceId=None, fileSource="archive", siteId=None, verbose=False, log=sys.stderr):

        self.__reqObj = reqObj
        self.__depDataSetId = depDataSetId
        self.__wfInstanceId = wfInstanceId
        self.__fileSource = fileSource
        self.__verbose = verbose
        self.__lfh = log
        #
        self.__debug = False
        self.__inputSessionPath = None
        #
        self.__setup(siteId=siteId)

    def __setup(self, siteId=None):
        if siteId is not None:
            self.__siteId = siteId
        else:
            self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID")

        self.__sessionObj = self.__reqObj.getSessionObj()
        self.__sessionPath = self.__sessionObj.getPath()

        self.__cI = ConfigInfo(self.__siteId)
        self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh)

        #
        if self.__debug:
            self.__lfh.write("+DataExchange.__setup() - session id   %s\n" % (self.__sessionObj.getId()))
            self.__lfh.write("+DataExchange.__setup() - session path %s\n" % (self.__sessionObj.getPath()))

            self.__lfh.write("+DataExchange.__setup() - data set %s  instance %s file source %s\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource))
            self.__pI.setDebugFlag(flag=self.__debug)

    def setFileSource(self, fileSource):
        """Override fileSource="archive" """
        self.__fileSource = fileSource

    def setInputSessionPath(self, inputSessionPath=None):
        """Override the path to files with fileSource="session" """
        self.__inputSessionPath = inputSessionPath

    def purgeLogs(self):
        archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
        dirPath = os.path.join(archivePath, "archive", self.__depDataSetId, "log")
        if self.__verbose:
            self.__lfh.write("+DataExchange.purgeLogs() - purging logs in directory  %s\n" % (dirPath))

        if os.access(dirPath, os.W_OK):
            fpattern = os.path.join(dirPath, "*log")
            if self.__verbose:
                self.__lfh.write("+DataExchange.purgeLogs() - purging pattern is %s\n" % (fpattern))

            pthList = glob.glob(fpattern)
            if self.__verbose:
                self.__lfh.write("+DataExchange.purgeLogs() candidate path length is %d\n" % len(pthList))
            #
            for pth in pthList:
                try:
                    os.remove(pth)
                except:  # noqa: E722 pylint: disable=bare-except
                    pass
            #
        return pthList

    def reversePurge(self, contentType, formatType="pdbx", partitionNumber=1):
        fn = self.__getArchiveFileName(contentType=contentType, formatType=formatType, version="none", partitionNumber=partitionNumber)

        archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
        dirPath = os.path.join(archivePath, "archive", self.__depDataSetId)
        if self.__verbose:
            self.__lfh.write("+DataExchange.__setup() - purging in directory  %s\n" % (dirPath))

        if len(dirPath) < 2:
            return []
        fpattern = os.path.join(dirPath, fn + ".V*")
        if self.__verbose:
            self.__lfh.write("+DataExchange.__setup() - purging pattern is %s\n" % (fpattern))

        pthList = glob.glob(fpattern)
        if self.__verbose:
            self.__lfh.write("+DataExchange.__reversePurge() candidate length is %d\n" % len(pthList))
        #
        fList = []
        for pth in pthList:
            if not pth.endswith(".V1"):
                fList.append(pth)

        for pth in fList:
            try:
                os.remove(pth)
            except:  # noqa: E722 pylint: disable=bare-except
                pass
            #
        return fList

    def removeWorkflowDir(self):
        if (self.__depDataSetId is not None) and self.__depDataSetId.startswith("D_") and (len(self.__depDataSetId) > 7):
            workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
            dirPath = os.path.join(workflowPath, "workflow", self.__depDataSetId)
            if os.access(dirPath, os.W_OK):
                shutil.rmtree(dirPath)
                return True
            else:
                return False
        else:
            return False

    def createArchiveDir(self, purgeFlag=True):
        """Create new the archive directory if this is needed."""

        if self.__verbose:
            self.__lfh.write("+DataExchange.export() creating archive directory for data set %s\n" % self.__depDataSetId)

        try:
            archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
            dirPath = os.path.join(archivePath, "archive", self.__depDataSetId)

            if not os.access(dirPath, os.W_OK):
                if self.__verbose:
                    self.__lfh.write("+DataExchange.createArchiveDir() creating archive directory path %s\n" % dirPath)
                os.makedirs(dirPath)
                return True
            else:
                if purgeFlag:
                    if self.__verbose:
                        self.__lfh.write("+DataExchange.export() existing archive directory path purged: %s\n" % dirPath)
                    shutil.rmtree(dirPath)
                    os.makedirs(dirPath)
                    return True
                else:
                    if self.__verbose:
                        self.__lfh.write("+DataExchange.export() archive directory exists: %s\n" % dirPath)
                    return False
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                traceback.print_exc(file=self.__lfh)
            return False

    def fetch(self, contentType, formatType, version="latest", partitionNumber=1):
        """Copy the input content object into the current session directory (session naming semantics follow source file object)

        Return the full path of the copied file or None

        """
        inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber)
        if self.__verbose:
            self.__lfh.write("+DataExchange.fetch() source type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath))

        try:
            if os.access(inpFilePath, os.R_OK):
                (_dirPath, fileName) = os.path.split(inpFilePath)
                # trim of the trailing version -
                # lastIdx=tfileName.rfind(".V")
                # if lastIdx > 0:
                #    fileName=tfileName[:lastIdx]
                # else:
                #    fileName=tfileName
                outFilePath = os.path.join(self.__sessionPath, fileName)
                if self.__verbose:
                    self.__lfh.write("+DataExchange.fetch() destination file path %s\n" % outFilePath)
                shutil.copyfile(inpFilePath, outFilePath)
                return outFilePath
            else:
                if self.__verbose:
                    self.__lfh.write("+DataExchange.fetch() missing input file at path %s\n" % inpFilePath)
                return None
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                traceback.print_exc(file=self.__lfh)
            return None

    def export(self, inpFilePath, contentType, formatType, version="latest", partitionNumber=1):
        """Copy input file to workflow instance or archival storage.

        Return True on success or False otherwise.

        """
        outFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber)
        if self.__verbose:
            self.__lfh.write("+DataExchange.export() destination type %s format %s version %s path %s\n" % (contentType, formatType, version, outFilePath))

        try:
            if os.access(inpFilePath, os.R_OK) and (os.path.getsize(inpFilePath) > 0):
                if self.__verbose:
                    self.__lfh.write("+DataExchange.export() destination file path %s\n" % outFilePath)
                if inpFilePath.endswith(".gz"):
                    self.__copyGzip(inpFilePath, outFilePath)
                else:
                    shutil.copyfile(inpFilePath, outFilePath)
                return True
            else:
                if self.__verbose:
                    self.__lfh.write("+DataExchange.export() missing or zero length input file at path %s\n" % inpFilePath)
                return False
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                traceback.print_exc(file=self.__lfh)
            return False

    def __copyGzip(self, inpFilePath, outFilePath):
        """"""
        try:
            cmd = " gzip -cd  %s > %s " % (inpFilePath, outFilePath)
            os.system(cmd)
            return True
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                traceback.print_exc(file=self.__lfh)
            return False

    def copyDirToSession(self, dirName):
        """Replicate the input diretory in the session directory -"""
        try:
            if self.__fileSource in ["archive", "wf-archive"]:
                pth = self.__pI.getArchivePath(self.__depDataSetId)
            elif self.__fileSource in ["deposit"]:
                pth = self.__pI.getDepositPath(self.__depDataSetId)
            elif self.__fileSource in ["wf-instance"]:
                pth = self.__pI.getInstancePath(self.__depDataSetId, self.__wfInstanceId)
            else:
                return False

            srcPath = os.path.join(pth, dirName)
            if not os.access(srcPath, os.R_OK):
                return False

            dstPath = os.path.join(self.__sessionPath, dirName)
            if not os.path.isdir(dstPath):
                os.makedirs(dstPath, 0o755)
            #
            fPattern = os.path.join(srcPath, "*")
            fpL = filter(os.path.isfile, glob.glob(fPattern))
            for fp in fpL:
                _dN, fN = os.path.split(fp)
                oP = os.path.join(dstPath, fN)
                shutil.copyfile(fp, oP)

            if self.__verbose:
                self.__lfh.write("+DataExchange.copyDirToSession() successful session copy of dirName %s\n" % (dirName))
            return True
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                self.__lfh.write("+DataExchange.copyDirToSession() fails for dirName %s\n" % (dirName))
                traceback.print_exc(file=self.__lfh)
            return False

        return True

    def copyToSession(self, contentType, formatType, version="latest", partitionNumber=1):
        """Copy the input content object into the session directory using archive naming conventions less version details.

        Return the full path of the session file or None

        """
        inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber)
        if self.__debug:
            self.__lfh.write("+DataExchange.copyToSession() source file type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath))

        try:
            outFilePath = None
            if os.access(inpFilePath, os.R_OK):
                fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber)
                outFilePath = os.path.join(self.__sessionPath, fn)
                if self.__verbose:
                    self.__lfh.write("+DataExchange.copyToSession() content type %s format %s copied to session path %s\n" % (contentType, formatType, outFilePath))
                shutil.copyfile(inpFilePath, outFilePath)
                return outFilePath
            else:
                if self.__debug:
                    self.__lfh.write("+DataExchange.copyToSession() missing input file at path %s\n" % inpFilePath)
                return None
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                if self.__verbose:
                    self.__lfh.write("+DataExchange.copyToSession() Failing for content type %s format %s with session path %s\n" % (contentType, formatType, outFilePath))
                traceback.print_exc(file=self.__lfh)
            return None

    def updateArchiveFromSession(self, contentType, formatType, version="next", partitionNumber=1):
        """Copy the input content object from the session directory stored using  archive naming conventions less version details
        to archive storage.

        Return the full path of the archive file or None

        """
        fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber)
        inpFilePath = os.path.join(self.__sessionPath, fn)
        if self.__verbose:
            self.__lfh.write("+DataExchange.updateArchiveDromSession() source file type %s format %s path %s\n" % (contentType, formatType, inpFilePath))

        try:
            if os.access(inpFilePath, os.R_OK):
                outFilePath = self.__getFilePath(fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber)
                if self.__verbose:
                    self.__lfh.write("+DataExchange.updateArchiveFromSession() archive destination file path %s\n" % outFilePath)
                shutil.copyfile(inpFilePath, outFilePath)
                return outFilePath
            else:
                if self.__verbose:
                    self.__lfh.write("+DataExchange.updateArchiveFrom() missing session input file at path %s\n" % inpFilePath)
                return None
        except:  # noqa: E722 pylint: disable=bare-except
            if self.__verbose:
                traceback.print_exc(file=self.__lfh)
            return None

    ##
    def getVersionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None):
        """
        For the input content object return a list of file versions sorted by modification time.

        Return:
              List of [(file path, modification date string,size),...]

        """
        try:
            if fileSource == "session" and self.__inputSessionPath is not None:
                self.__pI.setSessionPath(self.__inputSessionPath)

            fPattern = self.__pI.getFilePathVersionTemplate(
                dataSetId=self.__depDataSetId,
                wfInstanceId=self.__wfInstanceId,
                contentType=contentType,
                formatType=formatType,
                fileSource=fileSource,
                partNumber=partitionNumber,
                mileStone=mileStone,
            )
            return self.__getFileList([fPattern], sortFlag=True)
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n"
                    % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e))
                )
                traceback.print_exc(file=self.__lfh)
            return []

    def getPartitionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", mileStone=None):
        """
        For the input content object return a list of file partitions sorted by modification time.

        Return:
              List of [(file path, modification date string,size),...]

        """
        try:
            if fileSource == "session" and self.__inputSessionPath is not None:
                self.__pI.setSessionPath(self.__inputSessionPath)

            fPattern = self.__pI.getFilePathPartitionTemplate(
                dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, mileStone=mileStone
            )
            tL = self.__getFileList([fPattern], sortFlag=True)
            if self.__debug:
                self.__lfh.write("+DataExchange.getPartionFileList() pattern %r\n" % fPattern)
                self.__lfh.write("+DataExchange.getPartionFileList() file list %r\n" % tL)
            #
            return tL
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n"
                    % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e))
                )
                traceback.print_exc(file=self.__lfh)
            return []

    def getContentTypeFileList(self, fileSource="archive", contentTypeList=None):
        """
        For the input content object return a list of file versions sorted by modification time.

        Return:
              List of [(file path, modification date string,size),...]

        """
        if contentTypeList is None:
            contentTypeList = ["model"]
        try:
            if fileSource == "session" and self.__inputSessionPath is not None:
                self.__pI.setSessionPath(self.__inputSessionPath)
            fPatternList = []
            for contentType in contentTypeList:
                fPattern = self.__pI.getFilePathContentTypeTemplate(dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, fileSource=fileSource)

                fPatternList.append(fPattern)
            if self.__debug:
                self.__lfh.write("+DataExchange.getContentTypeFileList() patterns %r\n" % fPatternList)
            return self.__getFileList(fPatternList, sortFlag=True)
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n"
                    % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e))
                )
                traceback.print_exc(file=self.__lfh)
            return []

    def getMiscFileList(self, fPatternList=None, sortFlag=True):
        if fPatternList is None:
            fPatternList = ["*"]
        return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag)

    def getLogFileList(self, entryId, fileSource="archive"):
        if fileSource in ["archive", "wf-archive"]:
            pth = self.__pI.getArchivePath(entryId)
            fpat1 = os.path.join(pth, "*log")
            fpat2 = os.path.join(pth, "log", "*")
            patList = [fpat1, fpat2]
        elif fileSource in ["deposit"]:
            pth = self.__pI.getDepositPath(entryId)
            fpat1 = os.path.join(pth, "*log")
            fpat2 = os.path.join(pth, "log", "*")
            patList = [fpat1, fpat2]
        else:
            return []
        return self.__getFileList(fPatternList=patList, sortFlag=True)

    def __getFileList(self, fPatternList=None, sortFlag=True):
        """
        For the input glob compatible file pattern produce a file list sorted by modification date.

        If sortFlag is set then file list is sorted by modification date (e.g. recently changes first)

        Return:
              List of [(file path, modification date string, KBytes),...]

        """
        if fPatternList is None:
            fPatternList = ["*"]
        rTup = []
        try:
            files = []
            for fPattern in fPatternList:
                if fPattern is not None and len(fPattern) > 0:
                    files.extend(filter(os.path.isfile, glob.glob(fPattern)))

            file_date_tuple_list = []
            for x in files:
                d = os.path.getmtime(x)
                s = float(os.path.getsize(x)) / 1000.0
                file_date_tuple = (x, d, s)
                file_date_tuple_list.append(file_date_tuple)

            # Sort the tuple list by the modification time (recent changes first)
            if sortFlag:
                file_date_tuple_list.sort(key=lambda x: x[1], reverse=True)

            for fP, mT, sZ in file_date_tuple_list:
                tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S")
                rTup.append((fP, tS, sZ))
            return rTup
        except Exception as e:
            if self.__verbose:
                self.__lfh.write("+DataExchange.__getFileList() failing for patternlist %r error %r\n" % (fPatternList, str(e)))
                traceback.print_exc(file=self.__lfh)
        return rTup

    ##
    def __getArchiveFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
        (_fp, _d, f) = self.__targetFilePath(
            fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone
        )
        return f

    # def __getInstanceFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
    #     (_fp, _d, f) = self.__targetFilePath(
    #         fileSource="wf-instance", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone
    #     )
    #     return f

    def __getFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
        (fp, _d, _f) = self.__targetFilePath(
            fileSource=fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone
        )
        return fp

    def __targetFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
        """Return the file path, directory path, and filename  for the input content object if this object is valid.

        If the file path cannot be verified return None for all values
        """
        try:
            if fileSource == "session" and self.__inputSessionPath is not None:
                self.__pI.setSessionPath(self.__inputSessionPath)
            fP = self.__pI.getFilePath(
                dataSetId=self.__depDataSetId,
                wfInstanceId=self.__wfInstanceId,
                contentType=contentType,
                formatType=formatType,
                fileSource=fileSource,
                versionId=version,
                partNumber=partitionNumber,
                mileStone=mileStone,
            )
            dN, fN = os.path.split(fP)
            return fP, dN, fN
        except Exception as e:
            if self.__debug:
                self.__lfh.write(
                    "+DataExchange.__targetFilePath() failing for data set %s instance %s file source %s error %r\n"
                    % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e))
                )
                traceback.print_exc(file=self.__lfh)

            return (None, None, None)
Beispiel #21
0
class DataFileReference(DataReferenceBase):
    """"""
    def __init__(self, siteId=None, verbose=False, log=sys.stderr):
        super(DataFileReference, self).__init__()
        #
        self.__siteId = siteId
        self.__verbose = verbose
        self.__debug = False
        self.__lfh = log
        #
        self.__cI = ConfigInfo(siteId=self.__siteId,
                               verbose=self.__verbose,
                               log=self.__lfh)
        #
        self.__contentType = None
        """A supported content type:
           - model
           - structure-factors
           - nmr-restraints
           - em-volume
           - others to be enumerated
           """
        self.__fileFormat = None
        """A supported file format:
           - pdbx/mmcif
           - pdb
           - pdbml
           - nmr-star
           - others to be enumerated
           """
        self.__storageType = None
        """Storage type:
           - archive or wf-archive
           - wf-instance
           - wf-shared
           - deposit
           - tempdep
           - session or wf-session
           - inline
           - others to be enumerated
           """
        self.__versionId = None
        """Version identifier:
           - latest
           - orginal
           - next
           - previous
           - version number (1,2,...,)
           """
        #
        self.__filePartNumber = 1
        """Placeholder for future integer index for file partitioning.
        """
        #
        self.__depositionDataSetId = None
        """Deposition data set identifier (e.g. D_123456)
        """
        self.__workflowInstanceId = None
        """Workflow instance identifier (e.g. W_123456)
        """
        #
        self.__workflowNameSpace = None
        """Workflow name space identifier (alpha-numeric character string)
        """
        #
        self.__contentInfoD = self.__cI.get("CONTENT_TYPE_DICTIONARY")
        """Dictionary of supported file formats for each recognized content type.
           An acronym for each content type is included.
        """
        #
        self.__formatExtensionD = self.__cI.get(
            "FILE_FORMAT_EXTENSION_DICTIONARY")
        """Dictionary of recognized file formats and file name extensions"""
        #
        self.__storageTypeList = [
            "archive", "autogroup", "wf-archive", "wf-instance", "wf-shared",
            "session", "wf-session", "deposit", "inline", "tempdep", "uploads"
        ]
        """List of supported storage types/locations"""
        #
        self.__depositionDataSetIdPrefix = "D_"
        """A depostion data set identifier begins with this prefix and is followed
           by a string of digits (e.g. D_123456789)"""
        #
        self.__groupDataSetIdPrefix = "G_"
        """A group data set identifier begins with this prefix and is followed
           by a string of digits (e.g. G_1234567)"""
        #
        self.__workflowInstanceIdPrefix = "W_"
        """A workflow instance identifier begins with this prefix and is followed
           by a string of digits (e.g. W_123456789)"""
        #
        self.__versionNameList = [
            "latest", "original", "previous", "next", "none"
        ]
        self.__partitionNameList = [
            "latest", "original", "previous", "next", "none"
        ]
        #
        #
        self.__externalFilePath = None
        """Placeholder for referencing a file name that is *external* to the archive
           or workflow system.  Setting this path implies a content type of *external*
           and other attributes of the reference will be treated as unknown/unassignable.
        """
        self.__sessionPath = "."
        """Optional path used as a file system directory for any files with 'session' storage type.

           The default value for the session storage is the current directory.
        """
        self.__sessionDataSetId = None
        """Optional session data set identifier (e.g. 1abc)
        """

        #

    def getSitePrefix(self):
        """Returns:

        Current setting of the site prefix.

        """
        return self.__cI.get("SITE_PREFIX")

    def setSessionPath(self, dirPath=None):
        """Set the full directory path for 'session' type storage.  The 'session' feature provides
        a means to support workflow file naming conventions for applications with transient
        storage requirements.

        Returns True for any non-null argument.  No check is performed for the existence of
        this path on input.
        """
        if dirPath is not None:
            self.__sessionPath = dirPath
            return True
        else:
            return False

    def setExternalFilePath(self, filePath, fileFormat="any"):
        """Set the full file path for this reference outside of the archive/workflow system.
        Other identifying attributes of this file reference are ignored/by-passed when
        this path is set.   This feature is provided to permit external data with alternative
        file name conventions to be used within data file references.

        File format may be optionall specified and must correspond to a supported
        format or the defaule 'any'.

        Returns:

        True if the assignment was successful or False otherwise.

        """
        if (filePath is None) or (len(filePath) < 1):
            return False

        if fileFormat in self.__formatExtensionD.keys():
            self.__fileFormat = fileFormat
        else:
            return False
        #
        # reset attributes -
        #
        self.__contentType = None
        self.__storageType = None
        self.__versionId = None
        self.__filePartNumber = 1
        self.__depositionDataSetId = None
        self.__workflowInstanceId = None
        self.__workflowNameSpace = None
        #
        self.__externalFilePath = None
        #
        try:
            self.__externalFilePath = os.path.abspath(filePath)
            (pth, fn) = os.path.split(self.__externalFilePath)
            if pth is None or fn is None:
                return False
            return True
        except Exception as _e:  # noqa: F841
            pass

        return False

    def setContentTypeAndFormat(self, contentType, fileFormat):
        """Set the content type and file format for the file reference.

        Examples of supported content types include:
        - model
        - structure-factors
        - nmr-restraints
        - nmr-chemical-shifts
        - component-image
        - component-definition
        - validation-report
        - em-volume

        Supported formats for each content type are defined in file format
        dictionary (`self.__contentInfoD`).

        Returns:

        True for a recognized content type  or False otherwise.
        """
        tS = str(contentType).lower()
        try:
            self.__contentInfoD.keys()
        except Exception as e:
            logger.exception("Failing with %r", str(e))

        if tS in self.__contentInfoD.keys():
            self.__contentType = tS
            fS = str(fileFormat).lower()
            if (fS in self.__contentInfoD[tS][0]) or (
                    "any" in self.__contentInfoD[tS][0]):
                self.__contentType = tS
                self.__fileFormat = fS
                if self.__debug:
                    logger.debug(
                        "++setContentTypeAndFormat -- returning True with self.__contentType: %s",
                        self.__contentType)
                    logger.debug(
                        "++setContentTypeAndFormat -- returning True with self.__fileFormat: %s",
                        self.__fileFormat)
                self.setReferenceType("file")
                return True
            else:
                if self.__debug:
                    logger.debug(
                        "++setContentTypeAndFormat -- returning False with tS: %s",
                        tS)
                    logger.debug(
                        "++setContentTypeAndFormat -- returning False with fS: %s",
                        fS)
                return False
        else:
            if self.__debug:
                logger.debug(
                    "++setContentTypeAndFormat -- unrecognized cotentent type %r",
                    tS)
            return False

    def getStorageTypeList(self):
        return self.__storageTypeList

    def setStorageType(self, storageType):
        """Set the storage type for this file reference.

        Supported storage types include:
        - archive or wf-archive
        - wf-instance
        - wf-shared
        - deposit
        - tempdep
        - session or wf-session

        Returns:

        True for a recognized storage type or False otherwise.

        """
        tS = str(storageType).lower()
        if tS in self.__storageTypeList:
            self.__storageType = tS
            if tS not in ["inline", "constant"]:
                self.setReferenceType("file")
            return True
        else:
            return False

    def setVersionId(self, versionId):
        """Set the version identifier for this file reference.

        Supported version identifiers include:
        - latest, ...
        - orginal
        - an integer version number (1,2,...,)

        Returns:

        True for a valid version identifier or False otherwise.

        """
        tS = str(versionId).lower()
        if versionId in self.__versionNameList:
            self.__versionId = tS
            return True
        elif self.__isInteger(tS):
            self.__versionId = tS
            return True
        else:
            return False

    def __isInteger(self, str_in):
        """ Is the given string an integer?	"""
        ok = True
        try:
            num = int(str_in)  # noqa: F841 pylint: disable=unused-variable
        except ValueError:
            ok = False
        return ok

    def setDepositionDataSetId(self, dId):
        """Set the deposition data set identifier.

        A depostion data set identifier begins with the prefix *D_* and is followed
        by a string of digits (e.g. D_123456789).

        Returns:

        True if the input identifier is a properly formed identifier
        or False otherwise.

        """
        tS = str(dId).upper()
        if (not tS.startswith(self.__depositionDataSetIdPrefix)) and (
                not self.__groupDataSetIdPrefix):
            return False
        tSL = tS.split("_")
        if (len(tSL) > 1) and self.__isInteger(tSL[1]):
            self.__depositionDataSetId = tS
            return True
        else:
            return False

    def setWorkflowInstanceId(self, wId):
        """Set the workflow instance identifier.

        A workflow instance identifier begins with the prefix *W_* and is followed
        by a string of digits (e.g. W_123456789)

        Returns:

        True if the input identifier is a properly formed identifier
        or False otherwise.

        """
        tS = str(wId).upper()
        if not tS.startswith(self.__workflowInstanceIdPrefix):
            return False
        tSL = tS.split("_")
        if (len(tSL) > 1) and self.__isInteger(tSL[1]):
            self.__workflowInstanceId = tS
            return True
        else:
            return False

    def setSessionDataSetId(self, sId):
        """Set the session data set identifier.

        Data set identifier applied for session storage. No conventions are
        assumed for this identifier.

        Returns:

        True if the input identifier is non-blank
        or False otherwise.

        """
        if sId is not None and len(sId) > 0:
            self.__sessionDataSetId = str(sId).upper()
            return True
        else:
            return False

    def setWorkflowNameSpace(self, wNameSpace):
        """Set the workflow name space identifier.

        This identifier must be an alpha numeric string containing only
        characters [a-zA-Z0-9].

        Returns:

        True if the input identifier is a properly formed identifier
        or False otherwise.

        """
        if (wNameSpace is None) or (len(str(wNameSpace)) < 1):
            return False
        for cv in str(wNameSpace):
            if (cv not in string.ascii_letters) and (cv not in string.digits):
                return False
        self.__workflowNameSpace = wNameSpace
        return True

    def setPartitionNumber(self, iPartitionNumber=1):
        """Set the integer file partition number.  This is used to identify the physical
        pieces of a single logical data file.

        Supported values for partition include:
        - latest, ...
        - orginal
        - an integer version number (1,2,...,)

        Returns:

        True if the input partition is properly formed or False otherwise.

        """
        ok = False
        try:
            tS = str(iPartitionNumber).lower()
            if iPartitionNumber in self.__partitionNameList:
                self.__filePartNumber = tS
                ok = True
            elif self.__isInteger(tS):
                self.__filePartNumber = int(tS)
                ok = True
            else:
                ok = False
        except Exception:
            ok = False
        if self.__debug:
            logger.debug(
                "+DataFileReference.setPartitionNumber() setting is  %r",
                self.__filePartNumber)
        return ok

    def getPartitionNumber(self):
        """Returns:

        The current partition number  or *1* if this is not set.
        """
        return self.__filePartNumber

    def getContentType(self):
        """Returns:

        The current content type or *None* if this is not set.
        """
        return self.__contentType

    def getFileFormat(self):
        """Returns:

        The current file format or *None* if this is not set.
        """
        return self.__fileFormat

    def getStorageType(self):
        """Returns:

        The current storage type or *None* if this is not set.
        """

        return self.__storageType

    def getVersionId(self):
        """Returns:

        The current version identifier or *None* if this is not set.
        """
        return self.__versionId

    def getDepositionDataSetId(self):
        """Returns:

        The current deposition data set identifier  or *None* if this is not set.
        """
        return self.__depositionDataSetId

    def getWorkflowInstanceId(self):
        """Returns:

        The current workflow instance identifier  or *None* if this is not set.
        """
        return self.__workflowInstanceId

    def getWorkflowNameSpace(self):
        """Returns:

        The current workflow name space identifier  or *None* if this is not set.
        """
        return self.__workflowNameSpace

    #
    # ------------------------------------------------------------------------------------------------------------------------------------
    #
    # --- The following public methods derive information from the settings in the previous methods --
    #

    def isReferenceValid(self):
        """Test if the reference information is complete and the data reference is valid.

        Valid references are:

        - A path external to the archive/worflow system
        - A fully defined internal reference consisting or identifiers,
          content type, storage type, format, and version.

        Note that this is NOT an existence test.  References may be defined and validated
        before the file objects which they reference are created.

        Returns:

        True for either a valid external or internal reference or False otherwise.

        """
        if self.__externalFilePath is not None:
            return True
        else:
            return self.__isInternalReferenceValid()

    def getDirPathReference(self):
        """Get the path to the directory containing the data file reference.

        Returns:

        The file system path to the directory containing the file reference or *None*
        if this cannot be determined.

        """
        if self.__externalFilePath is not None:
            return self.__externalFilePath
        # if (self.__isInternalReferenceValid()):
        #    return self.__getInternalPath()
        else:
            return self.__getInternalPath()

    def getFilePathReference(self):
        """Get the versioned file path for an internal data file reference or the path
        to an external data file reference.

        Returns:

        The file system path to the file reference or *None* if this cannot be determined.

        """
        if self.__externalFilePath is not None:
            return self.__externalFilePath
        if not self.__isInternalReferenceValid():
            return None
        return self.__getInternalFilePath()

    def getFilePathExists(self, fP):
        try:
            if os.access(fP, os.R_OK):
                return True
            else:
                return False
        except Exception as _e:  # noqa: F841
            if self.__verbose:
                traceback.print_exc(self.__lfh)
            return False

    def getFileVersionNumber(self):
        """Get the version number corresponding to the current data file reference.

        Returns:

        The version number 1-N of the current data reference or 0 otherwise.
        External references are treated as having no version and 0 is returned for
        these cases.

        """

        if self.__externalFilePath is not None:
            return 0
        if not self.__isInternalReferenceValid():
            return 0
        return self.__getInternalVersionNumber()

    #
    # ------------------------------------------------------------------------------------------------------------------------------------
    #
    # --- The following private worker methods support the public path and validation methods.
    #

    def __isInternalReferenceValid(self):
        """Test if the current reference information is complete for an internal reference.
        A reference is considered internal which points within the archive, workflow
        instance, deposit or session file systems.  Otherwise the reference is considered external
        and not subject to internal naming or path conventions.

        Note that this is NOT an existence test.  References may be defined and validated
        before the file objects which they reference are created.

        Returns:

        True if the internal reference is complete or False otherwise.
        """
        referenceType = self.getReferenceType()

        if referenceType == "file":
            if (self.__contentType is None) or (self.__fileFormat is None) or (
                    self.__storageType is None) or (self.__versionId is None):
                # logger.debug("self.__contentType is: %s", self.__contentType)
                # logger.debug("self.__fileFormat is: %s", self.__fileFormat)
                # logger.debug("self.__storageType is: %s", self.__storageType)
                # logger.debug("self.__versionId is: %s", self.__versionId)

                return False

            if (self.__storageType in [
                    "archive", "autogroup", "wf-archive", "wf-instance",
                    "wf-shared", "deposit", "tempdep"
            ]) and (self.__depositionDataSetId is None):
                logger.debug("self.__depositionDataSetId is: %s",
                             self.__depositionDataSetId)
                return False

            if (self.__storageType in [
                    "session", "wf-session"
            ]) and (self.__sessionDataSetId is None):
                return False

            if (self.__storageType
                    == "wf-instance") and (self.__workflowInstanceId is None):
                return False

            if (self.__storageType
                    == "wf-shared") and (self.__workflowNameSpace is None):
                return False

            return True

        elif referenceType == "directory":
            if self.__storageType is None:
                return False

            if (self.__storageType in [
                    "archive", "autogroup", "wf-archive", "wf-instance",
                    "wf-shared", "deposit", "tempdep"
            ]) and (self.__depositionDataSetId is None):
                return False

            if (self.__storageType
                    == "wf-instance") and (self.__workflowInstanceId is None):
                return False

            if (self.__storageType
                    == "wf-shared") and (self.__workflowNameSpace is None):
                return False

            if (self.__storageType in ["wf-session", "session"
                                       ]) and (self.__sessionPath is None):
                return False

            return True

        else:
            return False

    # def __getExternalPath(self):
    #     """Get the path of the current external file reference.

    #     Returns:

    #     The external file path.  *None* is returned on failure.
    #     """
    #     try:
    #         (pth, _fn) = os.path.split(self.__externalFilePath)
    #         return pth
    #     except Exception as _e:  # noqa: F841
    #         return None

    # def __getExternalFileNameBase(self):
    #     """Get the base file name for the current external file reference.

    #     Returns:

    #     The external base file name.  *None* is returned on failure.
    #     """
    #     try:
    #         (_pth, fn) = os.path.split(self.__externalFilePath)
    #         return fn
    #     except Exception as _e:  # noqa: F841
    #         return None

    def __getInternalPath(self):
        """Compute the path to the current file reference within the archive/workflow file system.

        The file path convention is:
        - archive files     = <SITE_ARCHIVE_STORAGE_PATH>/archive/<deposition data set id>/
        - deposit files     = <SITE_ARCHIVE_STORAGE_PATH>/archive/<deposition data set id>/
        - temp deposit files     = <SITE_ARCHIVE_STORAGE_PATH>/tempdep/<deposition data set id>/
        - workflow shared   = <SITE_ARCHIVE_STORAGE_PATH>/workflow/<deposition data set id>/shared/<self.__workflowNameSpace>
        - workflow instance = <SITE_ARCHIVE_STORAGE_PATH>/workflow/<deposition data set id>/instance/<self.__workflowInstanceId>
        - session files     = session path/

        Top-level site-specific path details are obtained from the SiteInfo() class.

        Returns:

        The path of the directory containing this data file reference.  *None* is returned on failure.

        """
        try:
            if self.__storageType == "archive" or self.__storageType == "wf-archive":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "archive", self.__depositionDataSetId)
            elif self.__storageType == "autogroup":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "autogroup", self.__depositionDataSetId)
            elif self.__storageType == "deposit":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "deposit", self.__depositionDataSetId)
            elif self.__storageType == "tempdep":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "tempdep", self.__depositionDataSetId)
            elif self.__storageType == "wf-shared":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "workflow", self.__depositionDataSetId,
                                    "shared", self.__workflowNameSpace)
            elif self.__storageType == "wf-instance":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "workflow", self.__depositionDataSetId,
                                    "instance", self.__workflowInstanceId)
            elif self.__storageType in ["session", "wf-session"]:
                tpth = self.__sessionPath
            elif self.__storageType == "uploads":
                tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"),
                                    "deposit", "temp_files",
                                    "deposition_uploads",
                                    self.__depositionDataSetId)
            else:
                tpth = None
            pth = os.path.abspath(tpth)
        except Exception as e:
            logger.exception("Failing with %r", str(e))

            pth = None

        return pth

    def __getInternalFileNameBase(self):
        """Compute the base file name based on the current values of storage type, identifer, content type, file format.

        The file name convention is:
        - archive/shared  files  = <deposition data set id>_<content acronym>_<part number>.<format_extenstion>
        - instance files         = <deposition data set id>_<content acronym>_<part number>.<format_extension>
        - session  files         = <session data set id>_<content acronym>_<part number>.<format_extenstion>

        Returns:

        The base file name. This base file name lacks version details.

        """

        try:
            if self.getReferenceType() != "file":
                return None

            if self.__storageType in [
                    "archive", "autogroup", "wf-archive", "wf-shared",
                    "deposit", "tempdep"
            ]:
                fn = (self.__depositionDataSetId + "_" +
                      self.__contentInfoD[self.__contentType][1] + "_P" +
                      str(self.__filePartNumber) + "." +
                      self.__formatExtensionD[self.__fileFormat])
            elif self.__storageType in ["session", "wf-session"]:
                fn = (self.__sessionDataSetId + "_" +
                      self.__contentInfoD[self.__contentType][1] + "_P" +
                      str(self.__filePartNumber) + "." +
                      self.__formatExtensionD[self.__fileFormat])
            elif self.__storageType in ["wf-instance"]:
                fn = (self.__depositionDataSetId + "_" +
                      self.__contentInfoD[self.__contentType][1] + "_P" +
                      str(self.__filePartNumber) + "." +
                      self.__formatExtensionD[self.__fileFormat])
            else:
                fn = None
        except Exception as e:
            logger.exception("Failing with %r", str(e))
            fn = None

        return fn

    def __getInternalFilePath(self):
        """Compute the versioned file path for a file within the archive/worflow file system.

           If either the *latest*, *next*, or *previous* version of the referenced file is
           selected then a file system check is performed to determine the appropriate
           version number.

        Returns:

        File path including version or None on failure.
        """

        try:
            if self.getReferenceType() != "file":
                return None
            dirPath = self.__getInternalPath()
            fN = self.__getInternalFileNameVersioned()
            pth = os.path.join(dirPath, fN)
            return pth
        except Exception as _e:  # noqa: F841
            return None

    def getVersionIdSearchTarget(self):
        """Create a search target for the files containing any version identifier consistent with the
        current file settings.

        Returns a the search target appropriate for glob() or None
        """
        try:
            if self.getReferenceType() != "file":
                return None
            baseName = self.__getInternalFileNameBase()
            vst = baseName + ".V*"
            return vst
        except Exception as _e:  # noqa: F841
            return None

    def __getInternalFileNameVersioned(self):
        """Compute the versioned file name for a file within the archive/worflow file system.

           If either the *latest*, *next*, or *previous* version of the referenced file is
           selected then a file system check is performed to determine the appropriate
           version number.

        Returns:

        File name including version or None on failure.
        """
        try:
            if self.getReferenceType() != "file":
                return None
            dirPath = self.__getInternalPath()
            #
            # First resolve any symbolic partition information -
            #
            self.__filePartNumber = self.__getInternalPartitionNumber()
            #
            baseName = self.__getInternalFileNameBase()
            if self.__versionId == "latest":
                iV = self.__latestVersion(dirPath, baseName)
                if iV == 0:
                    # No version exists so start at 1
                    fn = baseName + ".V1"
                else:
                    #
                    fn = baseName + ".V" + str(int(iV))

            elif self.__versionId == "next":
                iV = self.__latestVersion(dirPath, baseName)
                if iV == 0:
                    # No version exists so start at 1
                    fn = baseName + ".V1"
                else:
                    #
                    fn = baseName + ".V" + str(int(iV + 1))

            elif self.__versionId == "previous":
                iV = self.__latestVersion(dirPath, baseName)
                if iV <= 1:
                    # No previous version.
                    fn = None
                else:
                    #
                    fn = baseName + ".V" + str(int(iV - 1))

            elif self.__versionId == "original":
                fn = baseName + ".V1"
            elif self.__versionId == "none":
                fn = baseName
            else:
                fn = baseName + ".V" + str(int(self.__versionId))

            return fn
        except Exception as e:
            logger.exception("failure in getInternalFileNameVersioned %r",
                             str(e))
            return None

    def __getInternalVersionNumber(self):
        """Determine the version number corresponding to the current version Id setting.

           If either the *latest*, *next*, or *previous* version of the referenced file is
           selected then a file system check is performed to determine the appropriate
           version number.

        Returns:

        Return a version number from 1-N   or 0 failure.
        """
        try:
            if self.getReferenceType() != "file":
                return 0
            dirPath = self.__getInternalPath()
            self.__filePartNumber = self.__getInternalPartitionNumber()
            baseName = self.__getInternalFileNameBase()
            if self.__versionId == "latest":
                iV = self.__latestVersion(dirPath, baseName)
            elif self.__versionId == "next":
                iV = self.__latestVersion(dirPath, baseName)
                iV += 1
            elif self.__versionId == "previous":
                iV = self.__latestVersion(dirPath, baseName)
                iV -= 1
                if iV < 0:
                    iV = 0
            elif self.__versionId == "original":
                iV = 1
            else:
                iV = int(self.__versionId)
            return iV
        except Exception as e:
            if self.__debug:
                logger.exception("Failing with %r", str(e))
        return 0

    def __latestVersion(self, dirPath, baseName):
        """Get the latest version of file *baseName* in path *dirPath*.

        The convention for version numbering is <baseName>.V#

        Returns:

        The latest integer version number  or 0 if no versions exist.

        """
        try:
            fN = None
            if self.getReferenceType() != "file":
                return 0
            vList = []
            fileList = os.listdir(dirPath)
            for fN in fileList:
                # logger.debug("__latestVersion - baseName %s fN %s", baseName,fN)
                if fN.startswith(baseName):
                    fSp = fN.split(".V")
                    if (len(fSp) < 2) or (not fSp[1].isdigit()):
                        continue
                    vList.append(int(fSp[1]))
                    # logger.debug("__latestVersion - vList %r\n" % (fSp))
            if len(vList) > 0:
                vList.sort()
                return vList[-1]
            else:
                return 0
        except Exception as e:
            if self.__debug:
                logger.exception(
                    "Failing -dirPath %s  baseName %s fN %s with %s", dirPath,
                    baseName, fN, str(e))

        return 0

    ##
    ##
    def __latestPartitionNumber(self, dirPath, searchTarget):
        """Get the latest partition number of file in path *dirPath*
        consistent with current file settings.

        Returns:

        The latest integer partition number  or 0 if no files exist.

        """
        try:
            fN = None
            if self.getReferenceType() != "file":
                return 0
            pList = []
            searchPath = os.path.join(dirPath, searchTarget)
            if self.__debug:
                logger.debug(
                    "+DataFileReference.__lastestPartitionNumber() search target %s",
                    searchPath)
            pathList = glob.glob(searchPath)
            for pth in pathList:
                if self.__debug:
                    logger.debug(
                        "+DataFileReference.__lastestPartitionNumber() search path %s",
                        pth)
                (_td, fN) = os.path.split(pth)
                fL1 = fN.split(".")
                fL2 = fL1[0].split("_")
                pList.append(int(fL2[3][1:]))

            if self.__debug:
                logger.debug(
                    "+DataFileReference.__lastestPartitionNumber() part number list  %r",
                    pList)
            if len(pList) > 0:
                pList.sort()
                return pList[-1]
            else:
                return 0
        except Exception as e:
            if self.__debug:
                logger.exception("Failing with %r", str(e))

        return 0

    def getPartitionNumberSearchTarget(self):
        """Create a search target for the files containing any partition number consistent with the
           current file settings.   The seach target is independent of version identifier.

        The file name convention is:
        - archive/shared  files  = <deposition data set id>_<content acronym>_P*.<format_extenstion>
        - instance files         = <deposition data set id>_<content acronym>_P*.<format_extension>
        - session  files         = <session data set id>_<content acronym>_P*.<format_extenstion>

        Returns:

        A search string appopriate for glob().

        """

        try:
            if self.getReferenceType() != "file":
                return None

            if self.__storageType in [
                    "archive", "autogroup", "wf-archive", "wf-shared",
                    "deposit", "tempdep"
            ]:
                fn = self.__depositionDataSetId + "_" + self.__contentInfoD[
                    self.
                    __contentType][1] + "_P*" + "." + self.__formatExtensionD[
                        self.__fileFormat] + "*"
            elif self.__storageType in ["session", "wf-session"]:
                fn = self.__sessionDataSetId + "_" + self.__contentInfoD[
                    self.
                    __contentType][1] + "_P*" + "." + self.__formatExtensionD[
                        self.__fileFormat] + "*"
            elif self.__storageType in ["wf-instance"]:
                fn = self.__depositionDataSetId + "_" + self.__contentInfoD[
                    self.
                    __contentType][1] + "_P*" + "." + self.__formatExtensionD[
                        self.__fileFormat] + "*"
            else:
                fn = None
        except Exception as e:
            logger.exception("Failing with %r", str(e))

            fn = None

        return fn

    def getContentTypeSearchTarget(self):
        """Create a search target for the files containing any variation consistent with the
           content type in current file settings.   The seach target is independent of partition,
           format and version identifier.

        The file name convention is:
        - archive/shared  files  = <deposition data set id>_<content acronym>_P*
        - instance files         = <deposition data set id>_<content acronym>_P*
        - session  files         = <session data set id>_<content acronym>_P*

        Returns:

        A search string appopriate for glob().

        """

        try:
            # if (self.getReferenceType() != 'file'):
            #  return None

            if self.__storageType in [
                    "archive", "autogroup", "wf-archive", "wf-shared",
                    "deposit", "tempdep"
            ]:
                fn = self.__depositionDataSetId + "_" + self.__contentInfoD[
                    self.__contentType][1] + "_P*"
            elif self.__storageType in ["session", "wf-session"]:
                fn = self.__sessionDataSetId + "_" + self.__contentInfoD[
                    self.__contentType][1] + "_P*"
            elif self.__storageType in ["wf-instance"]:
                fn = self.__depositionDataSetId + "_" + self.__contentInfoD[
                    self.__contentType][1] + "_P*"
            else:
                fn = None
        except Exception as e:
            logger.exception(
                "Failing storage %r data set id %r content type %r with %r",
                self.__storageType, self.__depositionDataSetId,
                self.__contentType, str(e))
            fn = None

        return fn

    def __getInternalPartitionNumber(self):
        """Determine the partition number corresponding to the current partition number setting.

           If either the *latest*, *next*, or *previous* version of the referenced file is
           selected then a file system check is performed to determine the appropriate
           partition number.

        Returns:

        Return a partition number from 1-N   or 0 failure.
        """
        try:
            if self.getReferenceType() != "file":
                return 0
            dirPath = self.__getInternalPath()
            searchTarget = self.getPartitionNumberSearchTarget()
            if self.__filePartNumber == "latest":
                iP = self.__latestPartitionNumber(dirPath, searchTarget)
            elif self.__filePartNumber == "next":
                iP = self.__latestPartitionNumber(dirPath, searchTarget)
                iP += 1
            elif self.__filePartNumber == "previous":
                iP = self.__latestPartitionNumber(dirPath, searchTarget)
                iP -= 1
                if iP < 0:
                    iP = 0
            elif self.__filePartNumber == "original":
                iP = 1
            else:
                iP = int(self.__filePartNumber)
            return iP
        except Exception as e:
            logger.exception("Failing with %r", str(e))

        return 0
Beispiel #22
0
class DataMaintenance(object):
    """Collection of data maintenance utilities supporting
    purge and recovery of data files post release.

    This class duplicates some methods from class DataExchange for
    anticipated future use.

    """
    def __init__(self,
                 siteId=None,
                 testMode=False,
                 verbose=False,
                 log=sys.stderr):

        self.__verbose = verbose
        self.__lfh = log
        self.__siteId = siteId
        # In test mode no deletions are performed -
        self.__testMode = testMode
        self.__debug = False
        self.__sessionPath = None
        #
        self.__setup(siteId=siteId)

    def __setup(self, siteId=None):
        self.__siteId = siteId
        self.__cI = ConfigInfo(self.__siteId)
        self.__sessionPath = None
        self.__pI = PathInfo(siteId=self.__siteId,
                             sessionPath=self.__sessionPath,
                             verbose=self.__verbose,
                             log=self.__lfh)

    def setSessionPath(self, inputSessionPath=None):
        """Override the path to files with fileSource="session" """
        self.__sessionPath = inputSessionPath

    def purgeLogs(self, dataSetId):
        archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
        dirPath = os.path.join(archivePath, "archive", dataSetId, "log")
        if self.__verbose:
            self.__lfh.write(
                "+DataMaintenance.purgeLogs() - purging logs in directory  %s\n"
                % (dirPath))

        if os.access(dirPath, os.W_OK):
            fpattern = os.path.join(dirPath, "*log")
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.purgeLogs() - purging pattern is %s\n" %
                    (fpattern))

            pthList = glob.glob(fpattern)
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.purgeLogs() candidate path length is %d\n"
                    % len(pthList))
            #
            for pth in pthList:
                try:
                    if self.__testMode:
                        self.__lfh.write(
                            "+DataMaintenance.purgeLogs() TEST MODE skip remove %s\n"
                            % pth)
                    else:
                        os.remove(pth)
                except:  # noqa: E722 pylint: disable=bare-except
                    pass
            #
        return pthList

    def reversePurge(self,
                     dataSetId,
                     contentType,
                     formatType="pdbx",
                     partitionNumber=1):
        fn = self.__getArchiveFileName(dataSetId,
                                       contentType=contentType,
                                       formatType=formatType,
                                       version="none",
                                       partitionNumber=partitionNumber)

        archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
        dirPath = os.path.join(archivePath, "archive", dataSetId)
        if self.__verbose:
            self.__lfh.write(
                "+DataMaintenance.__setup() - purging in directory  %s\n" %
                (dirPath))

        if len(dirPath) < 2:
            return []
        fpattern = os.path.join(dirPath, fn + ".V*")
        if self.__verbose:
            self.__lfh.write(
                "+DataMaintenance.__setup() - purging pattern is %s\n" %
                (fpattern))

        pthList = glob.glob(fpattern)
        if self.__verbose:
            self.__lfh.write(
                "+DataMaintenance.__reversePurge() candidate length is %d\n" %
                len(pthList))
        #
        fList = []
        for pth in pthList:
            if not pth.endswith(".V1"):
                fList.append(pth)

        for pth in fList:
            try:
                if self.__testMode:
                    self.__lfh.write(
                        "+DataMaintenance.reversePurge() TEST MODE skip remove %s\n"
                        % pth)
                else:
                    os.remove(pth)
            except:  # noqa: E722 pylint: disable=bare-except
                pass
            #
        return fList

    def removeWorkflowDir(self, dataSetId):
        if (dataSetId is not None) and dataSetId.startswith("D_") and (
                len(dataSetId) > 10):
            workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH")
            dirPath = os.path.join(workflowPath, "workflow", dataSetId)
            if os.access(dirPath, os.W_OK):
                if self.__testMode:
                    self.__lfh.write(
                        "+DataMaintenance.removeWorkflowDir() TEST MODE skip remove %s\n"
                        % dirPath)
                else:
                    shutil.rmtree(dirPath)
                return True
            else:
                return False
        else:
            return False

    def getLogFiles(self, dataSetId, fileSource="archive"):
        pL = []
        if fileSource in ["archive"]:
            dirPath = self.__pI.getArchivePath(dataSetId)
        elif fileSource in ["deposit"]:
            dirPath = self.__pI.getDepositPath(dataSetId)
        else:
            return pL
        fpattern = os.path.join(dirPath, "*.log")
        pthList = glob.glob(fpattern)
        return pthList

    def getPurgeCandidates(self,
                           dataSetId,
                           wfInstanceId=None,
                           fileSource="archive",
                           contentType="model",
                           formatType="pdbx",
                           partitionNumber="1",
                           mileStone=None,
                           purgeType="exp"):
        """Return the latest version, and candidates for removal and compression.

        purgeType = 'exp'    use strategy for experimental and model fileSource V<last>, V2, V1
                    'other'  use strategy for other file types -- V<last> & V1

        """
        latestV = None
        rmL = []
        gzL = []
        vtL = self.getVersionFileList(dataSetId,
                                      wfInstanceId=wfInstanceId,
                                      fileSource=fileSource,
                                      contentType=contentType,
                                      formatType=formatType,
                                      partitionNumber=partitionNumber,
                                      mileStone=mileStone)
        n = len(vtL)
        if n > 0:
            latestV = vtL[0][0]
        if purgeType in ["exp"]:
            if n < 2:
                return latestV, rmL, gzL
            elif n == 2:
                gzL.append(vtL[1][0])
            elif n == 3:
                gzL.append(vtL[1][0])
                gzL.append(vtL[2][0])
            elif n > 3:
                gzL.append(vtL[n - 2][0])
                gzL.append(vtL[n - 1][0])
                for i in range(1, n - 2):
                    rmL.append(vtL[i][0])
            else:
                pass
        elif purgeType in ["report", "other"]:
            if n < 2:
                return latestV, rmL, gzL
            elif n == 2:
                gzL.append(vtL[1][0])
            elif n > 2:
                gzL.append(vtL[n - 1][0])
                for i in range(1, n - 1):
                    rmL.append(vtL[i][0])
            else:
                pass

        return latestV, rmL, gzL

    def getVersionFileListSnapshot(self,
                                   basePath,
                                   dataSetId,
                                   wfInstanceId=None,
                                   fileSource="archive",
                                   contentType="model",
                                   formatType="pdbx",
                                   partitionNumber="1",
                                   mileStone=None):
        """
        For the input content object return a list of file versions in a snapshot directory (recovery mode).

        Return:
              List of [(file path, modification date string,size),...]

        """
        pairL = []
        # basePath = '/net/wwpdb_da_data_archive/.snapshot/nightly.1/data'
        try:
            if fileSource == "archive":
                pth = self.__pI.getArchivePath(dataSetId)
                snPth = os.path.join(basePath, "archive", dataSetId)
            elif fileSource == "deposit":
                pth = self.__pI.getDepositPath(dataSetId)
                snPth = os.path.join(basePath, "deposit", dataSetId)

            fPattern = self.__pI.getFilePathVersionTemplate(
                dataSetId=dataSetId,
                wfInstanceId=wfInstanceId,
                contentType=contentType,
                formatType=formatType,
                fileSource=fileSource,
                partNumber=partitionNumber,
                mileStone=mileStone,
            )
            _dir, fn = os.path.split(fPattern)
            altPattern = os.path.join(snPth, fn)
            srcL = self.__getFileListWithVersion([altPattern], sortFlag=True)
            for src in srcL:
                _d, f = os.path.split(src[0])
                dst = os.path.join(pth, f)
                if not os.access(dst, os.F_OK):
                    pairL.append((src[0], dst))

            return pairL

        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %s\n"
                    % (dataSetId, wfInstanceId, fileSource, str(e)))
                traceback.print_exc(file=self.__lfh)
            return []

    ##

    def getVersionFileList(self,
                           dataSetId,
                           wfInstanceId=None,
                           fileSource="archive",
                           contentType="model",
                           formatType="pdbx",
                           partitionNumber="1",
                           mileStone=None):
        """
        For the input content object return a list of file versions sorted by modification time.

        Return:
              List of [(file path, modification date string,size),...]

        """
        try:
            if fileSource == "session" and self.__sessionPath is not None:
                self.__pI.setSessionPath(self.__sessionPath)

            fPattern = self.__pI.getFilePathVersionTemplate(
                dataSetId=dataSetId,
                wfInstanceId=wfInstanceId,
                contentType=contentType,
                formatType=formatType,
                fileSource=fileSource,
                partNumber=partitionNumber,
                mileStone=mileStone,
            )
            return self.__getFileListWithVersion([fPattern], sortFlag=True)
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %r\n"
                    % (dataSetId, wfInstanceId, fileSource, str(e)))
                traceback.print_exc(file=self.__lfh)
            return []

    def getContentTypeFileList(self,
                               dataSetId,
                               wfInstanceId,
                               fileSource="archive",
                               contentTypeList=None):
        """
        For the input content object return a list of file versions sorted by modification time.

        Return:
              List of [(file path, modification date string,size),...]

        """
        if contentTypeList is None:
            contentTypeList = ["model"]
        try:
            if fileSource == "session" and self.__sessionPath is not None:
                self.__pI.setSessionPath(self.__sessionPath)
            fPatternList = []
            for contentType in contentTypeList:
                fPattern = self.__pI.getFilePathContentTypeTemplate(
                    dataSetId=dataSetId,
                    wfInstanceId=wfInstanceId,
                    contentType=contentType,
                    fileSource=fileSource)

                fPatternList.append(fPattern)
            if self.__debug:
                self.__lfh.write(
                    "+DataMaintenance.getContentTypeFileList() patterns %r\n" %
                    fPatternList)
            return self.__getFileListWithVersion(fPatternList, sortFlag=True)
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s error %r\n"
                    % (dataSetId, wfInstanceId, fileSource, str(e)))
                traceback.print_exc(file=self.__lfh)
            return []

    def getMiscFileList(self, fPatternList=None, sortFlag=True):
        if fPatternList is None:
            fPatternList = ["*"]
        return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag)

    def getLogFileList(self, entryId, fileSource="archive"):
        if fileSource in ["archive", "wf-archive"]:
            pth = self.__pI.getArchivePath(entryId)
            fpat1 = os.path.join(pth, "*log")
            fpat2 = os.path.join(pth, "log", "*")
            patList = [fpat1, fpat2]
        elif fileSource in ["deposit"]:
            pth = self.__pI.getDepositPath(entryId)
            fpat1 = os.path.join(pth, "*log")
            fpat2 = os.path.join(pth, "log", "*")
            patList = [fpat1, fpat2]
        else:
            return []
        return self.__getFileList(fPatternList=patList, sortFlag=True)

    def __getFileListWithVersion(self, fPatternList=None, sortFlag=False):
        """
        For the input glob compatible file pattern produce a file list sorted by modification date.

        If sortFlag is set then file list is sorted by modification date (e.g. recently changes first)

        Return:
              List of [(file path, modification date string, KBytes),...]

        """
        if fPatternList is None:
            fPatternList = ["*"]
        try:
            files = []
            for fPattern in fPatternList:
                if fPattern is not None and len(fPattern) > 0:
                    files.extend(filter(os.path.isfile, glob.glob(fPattern)))

            file_ver_tuple_list = []
            for f in files:
                tL = f.split(".")
                vId = tL[-1]
                if vId.startswith("V"):
                    if vId[-1] not in [
                            "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
                    ]:
                        file_ver_tuple_list.append((f, int(vId[1:-1])))
                    else:
                        file_ver_tuple_list.append((f, int(vId[1:])))

            # Sort the tuple list by version id
            #
            if sortFlag:
                file_ver_tuple_list.sort(key=lambda x: x[1], reverse=True)

            return file_ver_tuple_list
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.getVersionFileList() failing for pattern %r error %r\n"
                    % (fPatternList, str(e)))
                traceback.print_exc(file=self.__lfh)
            return []

    def __getFileList(self, fPatternList=None, sortFlag=True):
        """
        For the input glob compatible file pattern produce a file list sorted by modification date.

        If sortFlag is set then file list is sorted by modification date (e.g. recently changes first)

        Return:
              List of [(file path, modification date string, KBytes),...]

        """
        if fPatternList is None:
            fPatternList = ["*"]
        try:
            files = []
            for fPattern in fPatternList:
                if fPattern is not None and len(fPattern) > 0:
                    files.extend(filter(os.path.isfile, glob.glob(fPattern)))

            file_date_tuple_list = []
            for x in files:
                d = os.path.getmtime(x)
                s = float(os.path.getsize(x)) / 1000.0
                file_date_tuple = (x, d, s)
                file_date_tuple_list.append(file_date_tuple)

            # Sort the tuple list by the modification time (recent changes first)
            if sortFlag:
                file_date_tuple_list.sort(key=lambda x: x[1], reverse=True)
            rTup = []
            for fP, mT, sZ in file_date_tuple_list:
                tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S")
                rTup.append((fP, tS, sZ))
            return rTup
        except Exception as e:
            if self.__verbose:
                self.__lfh.write(
                    "+DataMaintenance.getVersionFileList() failing for patter %r error %r\n"
                    % (fPatternList, str(e)))
                traceback.print_exc(file=self.__lfh)
            return []

    ##
    def __getArchiveFileName(self,
                             dataSetId,
                             wfInstanceId=None,
                             contentType="model",
                             formatType="pdbx",
                             version="latest",
                             partitionNumber="1",
                             mileStone=None):
        (_fp, _d, f) = self.__targetFilePath(
            dataSetId=dataSetId,
            wfInstanceId=wfInstanceId,
            fileSource="archive",
            contentType=contentType,
            formatType=formatType,
            version=version,
            partitionNumber=partitionNumber,
            mileStone=mileStone,
        )
        return f

    # def __getInstanceFileName(self, dataSetId, wfInstanceId=None, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
    #     (_fp, _d, f) = self.__targetFilePath(
    #         dataSetId=dataSetId,
    #         wfInstanceId=wfInstanceId,
    #         fileSource="wf-instance",
    #         contentType=contentType,
    #         formatType=formatType,
    #         version=version,
    #         partitionNumber=partitionNumber,
    #         mileStone=mileStone,
    #     )
    #     return f

    # def __getFilePath(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None):
    #     (fp, _d, _f) = self.__targetFilePath(
    #         dataSetId=dataSetId,
    #         wfInstanceId=wfInstanceId,
    #         fileSource=fileSource,
    #         contentType=contentType,
    #         formatType=formatType,
    #         version=version,
    #         partitionNumber=partitionNumber,
    #         mileStone=mileStone,
    #     )
    #     return fp

    def __targetFilePath(self,
                         dataSetId,
                         wfInstanceId=None,
                         fileSource="archive",
                         contentType="model",
                         formatType="pdbx",
                         version="latest",
                         partitionNumber="1",
                         mileStone=None):
        """Return the file path, directory path, and filen ame  for the input content object if this object is valid.

        If the file path cannot be verified return None for all values
        """
        try:
            if fileSource == "session" and self.__sessionPath is not None:
                self.__pI.setSessionPath(self.__sessionPath)
            fP = self.__pI.getFilePath(
                dataSetId=dataSetId,
                wfInstanceId=wfInstanceId,
                contentType=contentType,
                formatType=formatType,
                fileSource=fileSource,
                versionId=version,
                partNumber=partitionNumber,
                mileStone=mileStone,
            )
            dN, fN = os.path.split(fP)
            return fP, dN, fN
        except Exception as e:
            if self.__debug:
                self.__lfh.write(
                    "+DataMaintenance.__targetFilePath() failing for data set %s instance %s file source %s error %r\n"
                    % (dataSetId, wfInstanceId, fileSource, str(e)))
                traceback.print_exc(file=self.__lfh)

            return (None, None, None)
class RcsbDpUtilityAnnotTests(unittest.TestCase):
    def setUp(self):
        self.__lfh = sys.stderr
        # Pick up site information from the environment or failover to the development site id.
        self.__siteId = getSiteId(defaultSiteId='WWPDB_DEPLOY_TEST')
        self.__lfh.write("\nTesting with site environment for:  %s\n" %
                         self.__siteId)
        #
        self.FILE_ROOT = os.path.dirname(os.path.realpath(__file__))
        self.__cI = ConfigInfo(self.__siteId)
        self.__siteWebAppsSessionsPath = self.__cI.get(
            'SITE_WEB_APPS_SESSIONS_PATH')
        self.__tmpPath = tempfile.mkdtemp(dir=self.__siteWebAppsSessionsPath)

        self.__testFilePath = os.path.join(self.FILE_ROOT, 'data')
        self.__testFileAnnotSS = 'rcsb070236.cif'
        self.__testFileAnnotSSTop = 'topology.txt'
        #
        self.__testFileAnnotLink = '3rij.cif'
        self.__testFileAnnotCisPeptide = '5hoh.cif'
        self.__testFileAnnotSolvent = 'D_900002_model_P1.cif'
        self.__testFileAnnotValidate = '3rij.cif'
        self.__testFileAnnotNA = '1o3q.cif'
        self.__testFileAnnotSite = '1xbb.cif'
        self.__testIdAnnotSite = '1xbb'
        #
        self.__testFileAnnotSiteAlt = 'D_1000200391_model_P1.cif.V27'
        self.__testIdAnnotSiteAlt = 'D_1000200391'

        #
        self.__testFileAnnotRcsb = 'rcsb033781.cif'
        self.__testFileAnnotRcsbEps = 'rcsb013067.cifeps'
        #
        self.__testFilePdbPisa = self.__cI.get('DP_TEST_FILE_PDB_PISA')
        self.__testFileCifPisa = self.__cI.get('DP_TEST_FILE_CIF_PISA')
        #
        self.__testFileStarCs = "star_16703_test_2.str"
        self.__testFileCsRelatedCif = "cor_16703_test.cif"
        #
        self.__testFileValidateXyz = "1cbs.cif"
        self.__testFileValidateSf = "1cbs-sf.cif"
        self.__testValidateIdList = ["1cbs", "3of4", "3oqp"]
        self.__testArchiveIdList = [("D_900002", "4EC0"), ("D_600000", "4F3R")]
        #
        self.__testFileCifSeq = "RCSB095269_model_P1.cif.V1"
        self.__testFileSeqAssign = "RCSB095269_seq-assign_P1.cif.V1"

        self.__testFileMtzBad = "mtz-bad.mtz"
        self.__testFileMtzGood = "mtz-good.mtz"

        self.__testFileMtzRunaway = "bad-runaway.mtz"
        self.__testFileXyzRunaway = "bad-runaway.cif"

        self.__testMapNormal = "normal.map"
        self.__testMapSpider = "testmap.spi"
        self.__testMapLarge = "large.map"

        # self.__testFilePrdSearch       = '3RUN.cif'
        self.__testFilePrdSearch = 'D_1200000237_model_P1.cif.V1'

        self.__testValidateXrayIdList = ['1cbs', '4hea', '4u4r']
        self.__testValidateNmrIdList = ['2MM4', '2MMZ']

        self.__testValidateXrayNeutronModel = 'D_1200007116_model-upload_P1.cif.V1'
        self.__testValidateXrayNeutronSF = 'D_1200007116_sf-upload_P1.cif.V1'
        #self.__testValidateXrayLargeIdList = ['4u4r']
        #self.__testValidateNmrIdList = ['2MM4']
        #self.__testValidateNmrLargeIdList = ['2MMZ']

        self.__testDccModelId = '4wpo'

        self.__testSpecialPosition = 'D_1000225739_model_P1.cif.V4'
        self.__testDepAssembly = "testassem.cif"

    def tearDown(self):
        pass
        # if os.path.exists(self.__tmpPath):
        #     shutil.rmtree(self.__tmpPath)

    def test_AnnotValidateGeometryCheck(self):
        """  Test of updating geometrical validation diagnostics -
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        of = os.path.join(self.__tmpPath, "annot-validate-geometry-check.cif")
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        inp_path = os.path.join(self.__testFilePath, self.__testFileAnnotSite)
        dp.imp(inp_path)
        ret = dp.op("annot-validate-geometry")
        dp.expLog(
            os.path.join(self.__tmpPath,
                         "annot-validate-geometry-check-pdbx.log"))
        dp.exp(of)
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(of))

    def test_AnnotValidateGeometryCheckRemote(self):
        """  Test of updating geometrical validation diagnostics -
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))

        of = os.path.join(self.__tmpPath,
                          "annot-validate-geometry-check-remote.cif")
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        inp_path = os.path.join(self.__testFilePath, self.__testFileAnnotSite)
        dp.imp(inp_path)
        # dp.setRunRemote()
        ret = dp.op("annot-validate-geometry")
        dp.expLog(
            os.path.join(self.__tmpPath,
                         "annot-validate-geometry-check-pdbx-remote.log"))
        dp.exp(of)
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(of))

    def testAnnotRcsb2PdbxRemote(self):
        """  RCSB CIF -> PDBx conversion  (Using the smaller application in the annotation package)

             Converting to RCSB to PDB id in _entry.id and related items.
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        of = os.path.join(
            self.__tmpPath,
            "annot-rcsb2pdbx-withpdbid-" + self.__testFileAnnotRcsb)
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        inpPath = os.path.join(self.__testFilePath, self.__testFileAnnotRcsb)
        dp.imp(inpPath)
        # dp.setRunRemote()
        ret = dp.op("annot-rcsb2pdbx-withpdbid")
        dp.expLog(os.path.join(self.__tmpPath, "annot-rcsb2pdbx.log"))
        dp.exp(of)
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(of))

    def testAnnotValidateListXrayTestRemote(self):
        """  Test create validation report for the test list of example PDB ids (x-ray examples)
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        for pdbId in self.__testValidateXrayIdList:
            self.__tmpPath = tempfile.mkdtemp(
                dir=self.__siteWebAppsSessionsPath)
            self.__lfh.write("\nStarting {} in {}\n".format(
                pdbId, self.__tmpPath))
            ofpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt.pdf")
            ofxml = os.path.join(self.__tmpPath, pdbId + "-valdata.xml")
            offullpdf = os.path.join(self.__tmpPath,
                                     pdbId + "-valrpt_full.pdf")
            ofpng = os.path.join(self.__tmpPath, pdbId + "-val-slider.png")
            ofsvg = os.path.join(self.__tmpPath, pdbId + "-val-slider.svg")
            #
            testFileValidateXyz = pdbId + ".cif"
            testFileValidateSf = pdbId + "-sf.cif"
            dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                               siteId=self.__siteId,
                               verbose=True)
            # dp.setDebugMode(True)

            xyzPath = os.path.abspath(
                os.path.join(self.__testFilePath, testFileValidateXyz))
            sfPath = os.path.abspath(
                os.path.join(self.__testFilePath, testFileValidateSf))
            # dp.addInput(name="request_annotation_context", value="yes")
            dp.addInput(name="request_validation_mode", value="annotate")
            dp.addInput(name='run_dir',
                        value=os.path.join(
                            self.__siteWebAppsSessionsPath,
                            "validation_%s" % random.randrange(9999999)))
            # dp.addInput(name="request_validation_mode", value="server")
            dp.imp(xyzPath)
            dp.addInput(name="sf_file_path", value=sfPath)
            # dp.setRunRemote()
            ret = dp.op("annot-wwpdb-validate-all")
            dp.expLog(
                os.path.join(self.__tmpPath,
                             pdbId + "-annot-validate-test.log"))
            dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg])
            # dp.cleanup()

            self.assertTrue(ret == 0)
            self.assertTrue(os.path.exists(ofpdf))
            self.assertTrue(os.path.exists(ofxml))
            self.assertTrue(os.path.exists(offullpdf))
            self.assertTrue(os.path.exists(ofpng))
            self.assertTrue(os.path.exists(ofsvg))

    def testAnnotValidateXrayNeutronRemote(self):
        """  Test create validation report for the test list of example PDB ids (x-ray examples)
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))

        self.__tmpPath = tempfile.mkdtemp(dir=self.__siteWebAppsSessionsPath)
        self.__lfh.write("\nStarting x-ray neutron in {}\n".format(
            self.__tmpPath))
        ofpdf = os.path.join(self.__tmpPath, "test-valrpt.pdf")
        ofxml = os.path.join(self.__tmpPath, "test-valdata.xml")
        offullpdf = os.path.join(self.__tmpPath, "test-valrpt_full.pdf")
        ofpng = os.path.join(self.__tmpPath, "test-val-slider.png")
        ofsvg = os.path.join(self.__tmpPath, "test-val-slider.svg")
        #
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        # dp.setDebugMode(True)

        xyzPath = os.path.abspath(
            os.path.join(self.__testFilePath,
                         self.__testValidateXrayNeutronModel))
        sfPath = os.path.abspath(
            os.path.join(self.__testFilePath,
                         self.__testValidateXrayNeutronSF))
        # dp.addInput(name="request_annotation_context", value="yes")
        dp.addInput(name="request_validation_mode", value="annotate")
        dp.addInput(name='run_dir',
                    value=os.path.join(
                        self.__siteWebAppsSessionsPath,
                        "validation_%s" % random.randrange(9999999)))
        # dp.addInput(name="request_validation_mode", value="server")
        dp.imp(xyzPath)
        dp.addInput(name="sf_file_path", value=sfPath)
        # dp.setRunRemote()
        ret = dp.op("annot-wwpdb-validate-all")
        dp.expLog(os.path.join(self.__tmpPath, "test-annot-validate-test.log"))
        dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg])
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(ofpdf))
        self.assertTrue(os.path.exists(ofxml))
        self.assertTrue(os.path.exists(offullpdf))
        self.assertTrue(os.path.exists(ofpng))
        self.assertTrue(os.path.exists(ofsvg))

    def testAnnotValidateListNmrTestRemote(self):
        """  Test create validation report for the test list of example PDB ids (NMR examples)
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        for pdbId in self.__testValidateNmrIdList:
            self.__tmpPath = tempfile.mkdtemp(
                dir=self.__siteWebAppsSessionsPath)
            self.__lfh.write("\nStarting {} in {}\n".format(
                pdbId, self.__tmpPath))
            ofpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt.pdf")
            ofxml = os.path.join(self.__tmpPath, pdbId + "-valdata.xml")
            offullpdf = os.path.join(self.__tmpPath,
                                     pdbId + "-valrpt_full.pdf")
            ofpng = os.path.join(self.__tmpPath, pdbId + "-val-slider.png")
            ofsvg = os.path.join(self.__tmpPath, pdbId + "-val-slider.svg")
            #
            testFileValidateXyz = pdbId + ".cif"
            testFileValidateCs = pdbId + "-cs.cif"
            dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                               siteId=self.__siteId,
                               verbose=True)

            xyzPath = os.path.abspath(
                os.path.join(self.__testFilePath, testFileValidateXyz))
            csPath = os.path.abspath(
                os.path.join(self.__testFilePath, testFileValidateCs))
            dp.addInput(name="request_annotation_context", value="yes")
            dp.addInput(name='run_dir',
                        value=os.path.join(
                            self.__siteWebAppsSessionsPath,
                            "validation_%s" % random.randrange(9999999)))
            # adding explicit selection of steps --
            # Alternate
            #dp.addInput(name="step_list", value=" coreclust,chemicalshifts,writexml,writepdf ")
            dp.addInput(name='kind', value='nmr')
            dp.imp(xyzPath)
            dp.addInput(name="cs_file_path", value=csPath)
            # dp.setRunRemote()
            ret = dp.op("annot-wwpdb-validate-all")
            dp.expLog(
                os.path.join(self.__tmpPath,
                             pdbId + "-annot-validate-test.log"))
            dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg])
            # dp.cleanup()

            self.assertTrue(ret == 0)
            self.assertTrue(os.path.exists(ofpdf))
            self.assertTrue(os.path.exists(ofxml))
            self.assertTrue(os.path.exists(offullpdf))
            self.assertTrue(os.path.exists(ofpng))
            self.assertTrue(os.path.exists(ofsvg))

    def testMapFixRemote(self):
        """  Test mapfix utility
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))

        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        #
        inpPath = os.path.join(self.__testFilePath, self.__testMapNormal)
        of = os.path.join(self.__tmpPath, self.__testMapNormal + "-fix.map")
        dp.imp(inpPath)
        pixelSize = 2.54
        #dp.addInput(name="pixel-spacing-x", value=pixelSize)
        #dp.addInput(name="pixel-spacing-y", value=pixelSize)
        #dp.addInput(name="pixel-spacing-z", value=pixelSize)
        dp.addInput(name="input_map_file_path", value=inpPath)
        dp.addInput(name="output_map_file_path", value=of)
        dp.addInput(name="label", value='test')
        dp.addInput(name="voxel", value='{0} {0} {0}'.format(pixelSize))
        # dp.setRunRemote()
        ret = dp.op("deposit-update-map-header-in-place")
        dp.expLog(os.path.join(self.__tmpPath, "mapfix-big.log"))
        dp.exp(of)
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(of))

    # def testMapFixLargeMapRemote(self):
    #     """  Test mapfix utility
    #     """
    #     self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name))
    #     self.__lfh.write("\nRunning in {}\n".format(self.__tmpPath))
    #
    #     dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True)
    #     #
    #     inpPath = os.path.join(self.__testFilePath, self.__testMapLarge)
    #     of = os.path.join(self.__tmpPath, self.__testMapLarge + "-fix.map")
    #     dp.imp(inpPath)
    #     pixelSize = 1.327
    #     dp.addInput(name="input_map_file_path", value=inpPath)
    #     dp.addInput(name="output_map_file_path", value=of)
    #     dp.addInput(name="label", value='test')
    #     dp.addInput(name="voxel", value='{0} {0} {0}'.format(pixelSize))
    #     # dp.setRunRemote()
    #     ret = dp.op("deposit-update-map-header-in-place")
    #     dp.expLog(os.path.join(self.__tmpPath, "mapfix-big.log"))
    #     dp.exp(of)
    #     # dp.cleanup()
    #
    #     self.assertTrue(ret == 0)
    #     self.assertTrue(os.path.exists(of))

    def testAnnotSiteRemote(self):
        """  Calculate site environment
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        of = os.path.join(self.__tmpPath,
                          "annot-site-" + self.__testFileAnnotSite)
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        inpPath = os.path.join(self.__testFilePath, self.__testFileAnnotSite)
        dp.imp(inpPath)
        dp.addInput(name="block_id", value=self.__testIdAnnotSite)
        # dp.setRunRemote()
        ret = dp.op("annot-site")
        dp.expLog(os.path.join(self.__tmpPath, "annot-site.log"))
        dp.exp(of)
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(os.path.exists(of))

    def test_AnnotMergeRemote(self):
        """  Test of updating geometrical validation diagnostics -
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        for pdbId in self.__testValidateXrayIdList:
            self.__tmpPath = tempfile.mkdtemp(
                dir=self.__siteWebAppsSessionsPath)
            testFileValidateXyz = pdbId + ".cif"
            xyzPath = os.path.abspath(
                os.path.join(self.__testFilePath, testFileValidateXyz))

            dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                               siteId=self.__siteId,
                               verbose=True)
            of = os.path.join(self.__tmpPath, "annot-merge-xyz-remote.cif")
            dp.imp(xyzPath)
            dp.addInput(name="new_coordinate_file_path", value=xyzPath)
            dp.addInput(name="new_coordinate_format", value='cif')
            # dp.setRunRemote()
            ret = dp.op("annot-merge-xyz")
            dp.expLog(
                os.path.join(self.__tmpPath, "annot-merge-xyz-remote.log"))
            dp.exp(of)
            # dp.cleanup()

            self.assertTrue(ret == 0)
            self.assertTrue(os.path.exists(of))

    def testAnnotMtz2PdbxGood(self):
        """  Test mtz to pdbx conversion  (good mtz)
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        diagfn = os.path.join(self.__tmpPath, "sf-convert-diags.cif")
        ciffn = os.path.join(self.__tmpPath, "sf-convert-datafile.cif")
        dmpfn = os.path.join(self.__tmpPath, "sf-convert-mtzdmp.log")
        #
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        mtzPath = os.path.join(self.__testFilePath, self.__testFileMtzGood)
        dp.imp(mtzPath)
        dp.setTimeout(15)
        ret = dp.op("annot-sf-convert")
        dp.expLog(os.path.join(self.__tmpPath, "sf-convert.log"))
        dp.expList(dstPathList=[ciffn, diagfn, dmpfn])
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(ciffn)
        self.assertTrue(diagfn)
        self.assertTrue(dmpfn)

    def testCif2pdbx_public(self):
        """  Test cif to pdbx conversion  (good cif)
        """
        self.__lfh.write(
            "\nStarting %s %s\n" %
            (self.__class__.__name__, sys._getframe().f_code.co_name))
        cifout = os.path.join(self.__tmpPath, self.__testFileAnnotSiteAlt)
        #
        dp = RcsbDpUtility(tmpPath=self.__tmpPath,
                           siteId=self.__siteId,
                           verbose=True)
        cifin = os.path.join(self.__testFilePath, self.__testFileAnnotSiteAlt)
        dp.imp(cifin)
        dp.exp(cifout)
        dp.expLog(os.path.join(self.__tmpPath, "cif2pdbx-public.log"))
        ret = dp.op("cif2pdbx-public")
        # dp.cleanup()

        self.assertTrue(ret == 0)
        self.assertTrue(cifin)
        self.assertTrue(cifout)
Beispiel #24
0
class StatusHistoryExec(object):
    def __init__(self,
                 defSiteId="WWWDPB_INTERNAL_RU",
                 sessionId=None,
                 verbose=True,
                 log=sys.stderr):
        self.__lfh = log
        self.__verbose = verbose
        self.__setup(defSiteId=defSiteId, sessionId=sessionId)

    def __setup(self, defSiteId=None, sessionId=None):
        """Simulate the web application environment for managing session storage of  temporaty data files."""
        self.__siteId = getSiteId(defaultSiteId=defSiteId)
        #
        self.__cI = ConfigInfo(self.__siteId)
        self.__topPath = self.__cI.get("SITE_WEB_APPS_TOP_PATH")
        self.__topSessionPath = self.__cI.get(
            "SITE_WEB_APPS_TOP_SESSIONS_PATH")
        #
        self.__reqObj = InputRequest({},
                                     verbose=self.__verbose,
                                     log=self.__lfh)
        self.__reqObj.setValue("TopSessionPath", self.__topSessionPath)
        self.__reqObj.setValue("TopPath", self.__topPath)
        self.__reqObj.setValue("WWPDB_SITE_ID", self.__siteId)
        #
        self.__reqObj.setValue("SITE_DA_INTERNAL_DB_USER",
                               os.environ["SITE_DA_INTERNAL_DB_USER"])
        self.__reqObj.setValue("SITE_DA_INTERNAL_DB_PASSWORD",
                               os.environ["SITE_DA_INTERNAL_DB_PASSWORD"])

        os.environ["WWPDB_SITE_ID"] = self.__siteId
        if sessionId is not None:
            self.__reqObj.setValue("sessionid", sessionId)

        # retained due to side effects
        _sessionObj = self.__reqObj.newSessionObj()  # noqa: F841
        self.__reqObj.printIt(ofh=self.__lfh)
        #

    def doCreateStatusHistory(self, numProc=1, overWrite=False):
        """ """
        try:
            shu = StatusHistoryUtils(reqObj=self.__reqObj,
                                     verbose=self.__verbose,
                                     log=self.__lfh)
            entryIdList = shu.getEntryIdList()
            if numProc > 1:
                rL = shu.createHistoryMulti(entryIdList,
                                            numProc=numProc,
                                            overWrite=overWrite)
            else:
                rL = shu.createHistory(entryIdList, overWrite=overWrite)
            self.__lfh.write(
                "StatusHistoryExec.doCreateStatusHistory() %d status files created.\n\n"
                % len(rL))
        except:  # noqa: E722  pylint: disable=bare-except
            traceback.print_exc(file=self.__lfh)

    def doLoadStatusHistory(self, numProc=1, newTable=False):
        """ """
        try:
            shu = StatusHistoryUtils(reqObj=self.__reqObj,
                                     verbose=self.__verbose,
                                     log=self.__lfh)
            if numProc > 1:
                return shu.loadStatusHistoryMulti(numProc, newTable=newTable)
            else:
                return shu.loadStatusHistory(newTable=newTable)
        except:  # noqa: E722  pylint: disable=bare-except
            traceback.print_exc(file=self.__lfh)

        return False

    def doLoadEntryStatusHistory(self, entryId):
        """Load/reload status history file for the input entryId"""
        try:
            shu = StatusHistoryUtils(reqObj=self.__reqObj,
                                     verbose=self.__verbose,
                                     log=self.__lfh)
            return shu.loadEntryStatusHistory(entryIdList=[entryId])
        except:  # noqa: E722  pylint: disable=bare-except
            traceback.print_exc(file=self.__lfh)
        return False

    def doCreateEntryStatusHistory(self, entryId, overWrite=False):
        """ """
        try:
            shu = StatusHistoryUtils(reqObj=self.__reqObj,
                                     verbose=self.__verbose,
                                     log=self.__lfh)
            rL = shu.createHistory([entryId], overWrite=overWrite)
            self.__lfh.write(
                "StatusHistoryExec.doCreateEntryStatusHistory() %d status files created.\n\n"
                % len(rL))
        except:  # noqa: E722  pylint: disable=bare-except
            traceback.print_exc(file=self.__lfh)

    def doCreateStatusHistorySchema(self):
        """Create/recreate status history schema -"""
        try:
            shu = StatusHistoryUtils(reqObj=self.__reqObj,
                                     verbose=self.__verbose,
                                     log=self.__lfh)
            return shu.createStatusHistorySchema()
        except:  # noqa: E722  pylint: disable=bare-except
            traceback.print_exc(file=self.__lfh)
        return False
Beispiel #25
0
 def testCache(self):
     cI = ConfigInfo()
     self.assertEqual(cI.get("VARTEST"), "Hello")
     self.assertEqual(cI.get("TESTVAR1"), "1")
     self.assertEqual(cI.get("TESTVAR2"), "2")
class ConfigInfoDataSet(object):
    """
    Provides accessors for the correspondence between deposition data identifiers and
    deposition and annotation sites (e.g. wwpdb_site_id).

    """
    def __init__(self, verbose=False, log=sys.stderr):  # pylint: disable=unused-argument
        self.__verbose = verbose
        self.__debug = True
        self.__cI = ConfigInfo(siteId=None, verbose=self.__verbose)
        self.__cIDepUI = ConfigInfoAppDepUI(siteId=getSiteId())
        # Default data set id range assignments
        self.__depIdAssignments = self.__cI.get(
            "SITE_DATASET_ID_ASSIGNMENT_DICTIONARY")
        self.__depTestIdAssignments = self.__cI.get(
            "SITE_DATASET_TEST_ID_ASSIGNMENT_DICTIONARY")
        self.__siteBackupD = self.__cI.get("SITE_BACKUP_DICT", default={})
        self.__dsLocD = None
        #
        self.__lockDirPath = self.__cI.get(
            "SITE_SERVICE_REGISTRATION_LOCKDIR_PATH", "/tmp")
        lockutils.set_defaults(self.__lockDirPath)

    def getSiteId(self, depSetId):
        """Return siteId for the input depSetId subject to site backup details -

        siteBackupD[prodSite] = [backupSite1, backupSite2,...]
        """
        siteId = self.__getSiteId(depSetId)
        mySiteId = self.__cI.get("SITE_PREFIX", default=None)
        #
        if mySiteId and siteId:
            # is mySiteId a backup for siteId?
            if siteId in self.__siteBackupD and mySiteId in self.__siteBackupD[
                    siteId]:
                if self.__debug:
                    logger.debug("using backup %s for %s", mySiteId, siteId)
                siteId = mySiteId

        return siteId

    def getDataSetLocationDict(self):
        d = {}
        try:
            d = self.__readLocationDictionary()
            return d
        except Exception as e:
            logger.error("failed reading data set location dictionary: %s",
                         str(e))
            if self.__debug:
                logger.exception("failed reading data set location dictionary")
        return d

    def getDataSetLocations(self, siteId):
        dsL = []
        try:
            d = self.__readLocationDictionary()
            for ky in d:
                if d[ky] == siteId:
                    dsL.append(ky)
            return dsL
        except Exception as e:
            logger.info("failed reading data set locations for site %r - %s",
                        siteId, str(e))
            if self.__debug:
                logger.exception(
                    "failed reading data set locations for site %r - %s",
                    siteId, str(e))
        return []

    def removeDataSets(self, dataSetIdList):
        try:
            d = self.__readLocationDictionary()
            for dsId in dataSetIdList:
                if dsId in d:
                    del d[dsId]
            return self.__writeLocationDictionary(d)
        except Exception as e:
            logger.error("failed %s", str(e))
            if self.__debug:
                logger.exception("failed")
        return False

    def writeLocationList(self, siteId, dataSetIdList):
        try:
            d = self.__readLocationDictionary()
            for dsId in dataSetIdList:
                d[dsId] = siteId
            return self.__writeLocationDictionary(d)
        except Exception as e:
            logger.error("failed data set locations for site %r - %s", siteId,
                         str(e))
            if self.__debug:
                logger.exception("failed data set locations for site %rs",
                                 siteId)
        return False

    def __readLocationDictionary(self):
        """Read the dictionary cotaining data set site location information.

        Returns: d[<data_set_id>] = <site_id> or a empty dictionary.
        """
        fp = self.__cIDepUI.get_site_dataset_siteloc_file_path()
        try:
            with open(fp, "r") as infile:
                return json.load(infile)
        except Exception as e:
            logger.error("failed reading json resource file %s - %s", fp,
                         str(e))
            if self.__debug:
                logger.exception("failed reading json resource file %s", fp)
        return {}

    @lockutils.synchronized("configdataset.exceptionfile-lock", external=True)
    def __writeLocationDictionary(self, dsLocD, backup=True):
        """Write the input dictionary cotaining exceptional data set to site correspondences,

        Returns: True for success or False otherwise
        """
        fp = self.__cIDepUI.get_site_dataset_siteloc_file_path()

        try:
            if backup:
                bp = fp + datetime.datetime.now().strftime(
                    "-%Y-%m-%d-%H-%M-%S")
                d = self.__readLocationDictionary()
                with open(bp, "w") as outfile:
                    json.dump(d, outfile, indent=4)
            #
            with open(fp, "w") as outfile:
                json.dump(dsLocD, outfile, indent=4)
            return True
        except Exception as e:
            logger.error("failed writing json resource file %s - %s", fp,
                         str(e))
            if self.__debug:
                logger.exception("failed writing json resource file %s", fp)
        return False

    def getDefaultIdRange(self, siteId):
        """Return the default upper and lower deposition data set identifier codes
        assigned to the input siteId.

        Any site lacking a default range will get the range assigned to the UNASSIGNED site.

        Returns:   (lower bound, upper bound) for data set identifiers (int)
        """
        if siteId in self.__depIdAssignments:
            DEPID_START, DEPID_STOP = self.__depIdAssignments[siteId]
        elif "UNASSIGNED" in self.__depIdAssignments:
            DEPID_START, DEPID_STOP = self.__depIdAssignments["UNASSIGNED"]
        else:
            DEPID_START, DEPID_STOP = (-1, -1)
        return (DEPID_START, DEPID_STOP)

    def getTestIdRange(self, siteId):
        """Return the upper and lower deposition data set identifier codes
        assigned to the input siteId.

        Any site lacking a default range will get the range (-1, -1)

        Returns:   (lower bound, upper bound) for data set identifiers (int)
        """
        if siteId in self.__depTestIdAssignments:
            DEPID_START, DEPID_STOP = self.__depTestIdAssignments[siteId]
        else:
            DEPID_START, DEPID_STOP = (-1, -1)
        return (DEPID_START, DEPID_STOP)

    def getDefaultSiteId(self, depSetId):
        """Get the default site assignment for the input data set id."""
        return self.__getSiteId(depSetId)

    def __getSiteId(self, depSetId):
        """Return the siteId to which the input depSetId is within the default
        code assignment range.

        Input may be either a string "D_xxxxxxxxxx" or an integer/string "xxxxxxxxxx".

        """
        # check for exceptional cases --
        try:
            if self.__dsLocD is None:
                self.__dsLocD = self.__readLocationDictionary()
            if str(depSetId)[:2] == "D_":
                if depSetId in self.__dsLocD:
                    return self.__dsLocD[depSetId]
            else:
                tId = "D_" + str("%010d" % int(depSetId))
                if tId in self.__dsLocD:
                    return self.__dsLocD[tId]
        except Exception as e:
            if self.__debug:
                logger.exception(
                    "failed checking for exception dictionary for %r - %s",
                    depSetId, str(e))
        #
        # check default range assignment --
        try:
            if str(depSetId).startswith("D_"):
                idVal = int(str(depSetId)[2:])
            else:
                idVal = int(str(depSetId))
            for ky in self.__depIdAssignments.keys():
                idMin, idMax = self.__depIdAssignments[ky]
                if (idVal >= idMin) and (idVal <= idMax):
                    return ky
        except Exception as e:
            if self.__debug:
                logger.exception(
                    "failed checking deposition range for %r - %s", depSetId,
                    str(e))
        return None
Beispiel #27
0
 def testBuiltin(self):
     """Tests if common built in definitions are set"""
     cI = ConfigInfo()
     self.assertIsNotNone(cI.get("PROJECT_VAL_REL_CUTOFF"))
     self.assertIsNone(cI.get("PROJECT_RANDOM"))
class MyConnectionBase(object):
    def __init__(self, siteId=None, verbose=False, log=sys.stderr):  # pylint: disable=unused-argument
        #
        self.__siteId = siteId
        self._cI = ConfigInfo(self.__siteId)
        self._dbCon = None
        self.__authD = {}
        self.__databaseName = None
        self.__dbHost = None
        self.__dbUser = None
        self.__dbPw = None
        self.__dbSocket = None
        self.__dbPort = None
        self.__dbPort = 3306
        self.__dbServer = "mysql"

    def setResource(self, resourceName=None):
        #
        if resourceName == "PRD":
            self.__databaseName = self._cI.get("SITE_REFDATA_PRD_DB_NAME")
            self.__dbHost = self._cI.get("SITE_REFDATA_DB_HOST_NAME")
            self.__dbSocket = self._cI.get("SITE_REFDATA_DB_SOCKET")
            self.__dbPort = self._cI.get("SITE_REFDATA_DB_PORT_NUMBER")

            self.__dbUser = self._cI.get("SITE_REFDATA_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_REFDATA_DB_PASSWORD")

        elif resourceName == "CC":
            self.__databaseName = self._cI.get("SITE_REFDATA_CC_DB_NAME")
            self.__dbHost = self._cI.get("SITE_REFDATA_DB_HOST_NAME")
            self.__dbSocket = self._cI.get("SITE_REFDATA_DB_SOCKET")
            self.__dbPort = self._cI.get("SITE_REFDATA_DB_PORT_NUMBER")

            self.__dbUser = self._cI.get("SITE_REFDATA_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_REFDATA_DB_PASSWORD")

        elif resourceName == "RCSB_INSTANCE":
            self.__databaseName = self._cI.get("SITE_INSTANCE_DB_NAME")
            self.__dbHost = self._cI.get("SITE_INSTANCE_DB_HOST_NAME")
            self.__dbSocket = self._cI.get("SITE_INSTANCE_DB_SOCKET")
            self.__dbPort = self._cI.get("SITE_INSTANCE_DB_PORT_NUMBER")

            self.__dbUser = self._cI.get("SITE_INSTANCE_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_INSTANCE_DB_PASSWORD")

        elif resourceName == "DA_INTERNAL":
            self.__databaseName = self._cI.get("SITE_DA_INTERNAL_DB_NAME")
            self.__dbHost = self._cI.get("SITE_DA_INTERNAL_DB_HOST_NAME")
            self.__dbPort = self._cI.get("SITE_DA_INTERNAL_DB_PORT_NUMBER")
            self.__dbSocket = self._cI.get("SITE_DA_INTERNAL_DB_SOCKET")

            self.__dbUser = self._cI.get("SITE_DA_INTERNAL_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_DA_INTERNAL_DB_PASSWORD")

        elif resourceName == "DA_INTERNAL_COMBINE":
            self.__databaseName = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_NAME")
            self.__dbHost = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_HOST_NAME")
            self.__dbPort = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_PORT_NUMBER")
            self.__dbSocket = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_SOCKET")

            self.__dbUser = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_PASSWORD")
        elif resourceName == "DISTRO":
            self.__databaseName = self._cI.get("SITE_DISTRO_DB_NAME")
            self.__dbHost = self._cI.get("SITE_DISTRO_DB_HOST_NAME")
            self.__dbPort = self._cI.get("SITE_DISTRO_DB_PORT_NUMBER")
            self.__dbSocket = self._cI.get("SITE_DISTRO_DB_SOCKET")

            self.__dbUser = self._cI.get("SITE_DISTRO_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_DISTRO_DB_PASSWORD")

        elif resourceName == "STATUS":
            self.__databaseName = self._cI.get("SITE_DB_DATABASE_NAME")
            self.__dbHost = self._cI.get("SITE_DB_HOST_NAME")
            self.__dbPort = self._cI.get("SITE_DB_PORT_NUMBER")
            self.__dbSocket = self._cI.get("SITE_DB_SOCKET")

            self.__dbUser = self._cI.get("SITE_DB_USER_NAME")
            self.__dbPw = self._cI.get("SITE_DB_PASSWORD")
        else:
            pass

        if self.__dbSocket is None or len(self.__dbSocket) < 2:
            self.__dbSocket = None

        if self.__dbPort is None:
            self.__dbPort = 3306
        else:
            self.__dbPort = int(str(self.__dbPort))

        logger.info(
            "+MyConnectionBase(setResource) %s resource name %s server %s dns %s host %s user %s socket %s port %r",
            self.__siteId,
            resourceName,
            self.__dbServer,
            self.__databaseName,
            self.__dbHost,
            self.__dbUser,
            self.__dbSocket,
            self.__dbPort,
        )
        #
        self.__authD["DB_NAME"] = self.__databaseName
        self.__authD["DB_HOST"] = self.__dbHost
        self.__authD["DB_USER"] = self.__dbUser
        self.__authD["DB_PW"] = self.__dbPw
        self.__authD["DB_SOCKET"] = self.__dbSocket
        self.__authD["DB_PORT"] = int(str(self.__dbPort))
        self.__authD["DB_SERVER"] = self.__dbServer
        #

    def getAuth(self):
        return self.__authD

    def setAuth(self, authD):
        try:
            self.__authD = authD
            self.__databaseName = self.__authD["DB_NAME"]
            self.__dbHost = self.__authD["DB_HOST"]
            self.__dbUser = self.__authD["DB_USER"]
            self.__dbPw = self.__authD["DB_PW"]
            self.__dbSocket = self.__authD["DB_SOCKET"]
            if "DB_PORT" in self.__authD:
                self.__dbPort = int(str(self.__authD["DB_PORT"]))
            else:
                self.__dbPort = 3306
            self.__dbServer = self.__authD["DB_SERVER"]
        except:  # noqa: E722 pylint: disable=bare-except
            pass

    def openConnection(self):
        """Create a database connection and return a connection object.

        Returns None on failure
        """
        #
        if self._dbCon is not None:
            # Close an open connection -
            logger.info("+MyDbConnect.connect() WARNING Closing an existing connection.")
            self.closeConnection()

        try:
            if self.__dbSocket is None:
                dbcon = MySQLdb.connect(
                    db="%s" % self.__databaseName, user="******" % self.__dbUser, passwd="%s" % self.__dbPw, host="%s" % self.__dbHost, port=self.__dbPort, local_infile=1
                )
            else:
                dbcon = MySQLdb.connect(
                    db="%s" % self.__databaseName,
                    user="******" % self.__dbUser,
                    passwd="%s" % self.__dbPw,
                    host="%s" % self.__dbHost,
                    port=self.__dbPort,
                    unix_socket="%s" % self.__dbSocket,
                    local_infile=1,
                )

            self._dbCon = dbcon
            return True
        except:  # noqa: E722 pylint: disable=bare-except
            logger.exception(
                "+MyDbConnect.connect() Connection error to server %s host %s dsn %s user %s pw %s socket %s port %d \n",
                self.__dbServer,
                self.__dbHost,
                self.__databaseName,
                self.__dbUser,
                self.__dbPw,
                self.__dbSocket,
                self.__dbPort,
            )
            self._dbCon = None

        return False

    def getConnection(self):
        return self._dbCon

    def closeConnection(self):
        """Close db session"""
        if self._dbCon is not None:
            self._dbCon.close()
            self._dbCon = None
            return True
        else:
            return False

    def getCursor(self):
        try:
            return self._dbCon.cursor()
        except:  # noqa: E722 pylint: disable=bare-except
            logger.exception("+MyConnectionBase(getCursor) failing.\n")

        return None
Beispiel #29
0
TESTOUTPUT = os.path.join(HERE, "test-output", platform.python_version())
if not os.path.exists(TESTOUTPUT):
    os.makedirs(TESTOUTPUT)
mockTopPath = os.path.join(TOPDIR, "wwpdb", "mock-data")
rwMockTopPath = os.path.join(TESTOUTPUT)

# Must create config file before importing ConfigInfo
from wwpdb.utils.testing.SiteConfigSetup import SiteConfigSetup  # noqa: E402

mockTopPath = os.path.join(TOPDIR, "wwpdb", "mock-data")
SiteConfigSetup().setupEnvironment(TESTOUTPUT, mockTopPath)

from wwpdb.utils.config.ConfigInfo import ConfigInfo  # noqa: E402

cI = ConfigInfo()
packagedir = cI.get("SITE_PACKAGES_PATH")

if packagedir and os.path.exists(packagedir):
    toolsmissing = False
else:
    toolsmissing = True

dictlist = cI.get("SITE_PDBX_DICTIONARY_NAME_DICT")
if dictlist:
    dictsmissing = False
else:
    dictsmissing = True


class commonsetup(object):
    def __init__(self):
Beispiel #30
0
class UpdateManager(object):
    def __init__(self, config_file, noop):
        self.__configfile = config_file
        self.__noop = noop
        self.__ci = ConfigInfo()
        self.__ci_common = ConfigInfoAppCommon()

        self.__extraconf = self.get_variable("ADMIN_EXTRA_CONF",
                                             environment='INSTALL_ENVIRONMENT')
        self.__confvars = {}
        self.__extraconfdir = None

        if self.__extraconf is not None:
            self.__extraconfdir = os.path.abspath(
                os.path.dirname(self.__extraconf))
            self.__confvars["extraconfdir"] = self.__extraconfdir

        # Infer topdir from where running from
        topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
        cdict = {'topdir': topdir}

        self.__cparser = ConfigParser(cdict)

        cfiles = self.__configfile
        if self.__extraconf is not None:
            cfiles = [self.__configfile, self.__extraconf]
        self.__cparser.read(cfiles)

        self.web_apps_path = self.get_variable('TOP_WWPDB_WEBAPPS_DIR')
        self.resources_ro_path = self.get_variable('RO_RESOURCE_PATH')

    def __exec(self, cmd, overridenoop=False, working_directory=None):
        print(cmd)
        ret = 0
        if not self.__noop or overridenoop:
            if working_directory:
                print('Working Directory= {}'.format(working_directory))
                original_wd = os.getcwd()
                os.chdir(working_directory)
                ret = subprocess.call(cmd, shell=True)
                os.chdir(original_wd)
            else:
                ret = subprocess.call(cmd, shell=True)
        return ret

    def get_variable(self, variable, environment=None):
        ret = None
        if environment:
            ret = self.__ci.get(environment, {}).get(variable)
        if not ret:
            ret = self.__ci.get(variable)
        if not ret:
            ret = os.getenv(variable)
        return ret

    def updatepyenv(self, dev_build):
        cs_user = self.get_variable('CS_USER',
                                    environment='INSTALL_ENVIRONMENT')
        cs_pass = self.get_variable('CS_PW', environment='INSTALL_ENVIRONMENT')
        cs_url = self.get_variable('CS_URL', environment='INSTALL_ENVIRONMENT')

        script_dir = os.path.dirname(os.path.realpath(__file__))
        constraintfile = os.path.abspath(
            os.path.join(script_dir, '../base_packages/constraints.txt'))

        urlreq = urlparse(cs_url)
        urlpath = "{}://{}:{}@{}{}/dist/simple/".format(
            urlreq.scheme, cs_user, cs_pass, urlreq.netloc, urlreq.path)
        # pip_extra_urls = "--extra-index-url {} --trusted-host {} --extra-index-url https://pypi.anaconda.org/OpenEye/simple ".format(
        #                   urlpath, urlreq.netloc)

        self.__exec("pip config --site set global.trusted-host {}".format(
            urlreq.netloc))
        self.__exec(
            'pip config --site set global.extra-index-url "{} https://pypi.anaconda.org/OpenEye/simple"'
            .format(urlpath))
        self.__exec("pip config --site set global.no-cache-dir false")

        pip_extra_urls = '-c {}'.format(constraintfile)

        # pip installing from requirements.txt in base_packages

        reqfile = os.path.abspath(
            os.path.join(script_dir, '../base_packages/pre-requirements.txt'))

        command = 'pip install {} -r {}'.format(pip_extra_urls, reqfile)
        self.__exec(command)

        reqfile = os.path.abspath(
            os.path.join(script_dir, '../base_packages/requirements.txt'))
        command = 'pip install {} -r {}'.format(pip_extra_urls, reqfile)
        self.__exec(command)

        if self.__cparser.has_option('DEFAULT', 'pip_extra_reqs'):
            opt_req = self.__cparser.get('DEFAULT',
                                         'pip_extra_reqs',
                                         vars=self.__confvars)
        else:
            opt_req = None

        reqfile = self.__cparser.get('DEFAULT', 'piprequirements')
        if dev_build:
            # Clone and do pip edit install

            # Checking if source directory exist
            source_dir = os.path.abspath(
                os.path.join(self.web_apps_path, '../..'))
            if not os.path.isdir(source_dir):
                os.makedirs(source_dir)

            path_to_list_of_repo = os.path.abspath(
                os.path.join(
                    script_dir,
                    '../base_packages/requirements_wwpdb_dependencies.txt'))
            with open(path_to_list_of_repo) as list_of_repo:
                for repo in list_of_repo:
                    command = 'git clone --recursive [email protected]:wwPDB/{0}.git; cd {0}; git checkout develop; cd ..'.format(
                        repo.rstrip())
                    self.__exec(command, working_directory=source_dir)
                    command = 'pip install {} --edit {}'.format(
                        pip_extra_urls, repo)
                    self.__exec(command, working_directory=source_dir)
        else:
            command = 'pip install -U {} -r {}'.format(pip_extra_urls, reqfile)
            self.__exec(command)

        if opt_req:
            command = 'export CS_USER={}; export CS_PW={}; export CS_URL={}; export URL_NETLOC={}; export URL_PATH={}; pip install -U {} -r {}'.format(
                cs_user, cs_pass, cs_url, urlreq.netloc, urlreq.path,
                pip_extra_urls, opt_req)
            self.__exec(command)

    def updateresources(self):
        restag = self.__cparser.get('DEFAULT', 'resourcestag')
        if self.resources_ro_path:
            if not os.path.exists(self.resources_ro_path):
                command = 'git clone [email protected]:wwPDB/onedep-resources_ro.git {}'.format(
                    self.resources_ro_path)
                self.__exec(command)

            command = 'cd {}; git pull; git checkout master; git pull; git checkout {}; git pull origin {}'.format(
                self.resources_ro_path, restag, restag)
            self.__exec(command)

    def checkwebfe(self, overridenoop=False):
        webdir = os.path.abspath(os.path.join(self.web_apps_path, '..'))
        curdir = os.path.dirname(__file__)
        checkscript = os.path.join(curdir, 'ManageWebFE.py')
        webfecheck = self.__cparser.get('DEFAULT', 'webfeconf')

        command = 'python {} --webroot {} check -r {}'.format(
            checkscript, webdir, webfecheck)
        ret = self.__exec(command, overridenoop=overridenoop)
        if ret:
            print("ERROR: check of webfe directory failed")

    def updatewebfe(self):

        # Checking if source directory exist
        source_dir = os.path.abspath(os.path.join(self.web_apps_path, '../..'))
        if not os.path.isdir(source_dir):
            os.makedirs(source_dir)

        # Check if repo is cloned
        webfe_repo = os.path.abspath(os.path.join(self.web_apps_path, '..'))
        if not os.path.isdir(webfe_repo):
            command = 'git clone --recurse-submodules [email protected]:wwPDB/onedep-webfe.git'
            self.__exec(command, working_directory=source_dir)
            self.checkwebfe()

        webfetag = self.__cparser.get('DEFAULT', 'webfetag')

        command = 'cd {}; git pull; git checkout {}; git pull origin {}; git submodule init; git submodule update'.format(
            webfe_repo, webfetag, webfetag)
        self.__exec(command)

        # Now check the results
        self.checkwebfe()

    def updatetaxdb(self):
        # Checks the number of rows in db and decides if to update
        taxdbsize = int(self.__cparser.get('DEFAULT', 'taxdbminsize'))
        if self.__cparser.has_option('DEFAULT', 'taxdbmaxsize'):
            maxsize = int(self.__cparser.get('DEFAULT', 'taxdbmaxsize'))
        else:
            maxsize = 999999999

        taxuseftp = self.__cparser.has_option('DEFAULT', 'taxuseftp')
        if not taxuseftp:
            taxresource = self.get_variable('TAXONOMY_FILE_NAME')

            if not taxresource:
                print("ERROR: TAXONOMY_FILE_NAME is not set in site-config")
                return

        curdir = os.path.dirname(__file__)
        checkscript = os.path.join(curdir, 'ManageTaxDB.py')

        if taxuseftp:
            addftp = " --ftpload"
        else:
            addftp = ""

        if self.__noop:
            command = 'python {} --noop --maxsize {} --taxdbsize {}{}'.format(
                checkscript, maxsize, taxdbsize, addftp)
        else:
            command = 'python {} --maxsize {} --taxdbsize {}{}'.format(
                checkscript, maxsize, taxdbsize, addftp)
        self.__exec(command)

    def updateschema(self):
        dbs = DbSchemaManager(self.__noop)
        dbs.updateschema()

    def postflightdbcheck(self):
        dbs = DbSchemaManager(self.__noop)
        dbs.checkviews()

    def checktoolvers(self):
        #  vers_config_var,  configinfovar,             relative path    ConfiginfoAppMethod
        confs = [
            [
                'annotver', 'SITE_ANNOT_TOOLS_PATH', 'etc/bundleversion.json',
                'get_site_annot_tools_path'
            ],
            ['webfever', 'TOP_WWPDB_WEBAPPS_DIR', 'version.json', ''],
            ['resourcever', 'RO_RESOURCE_PATH', 'version.json', ''],
            [
                'cctoolsver', 'SITE_CC_APPS_PATH', 'etc/bundleversion.json',
                'get_site_cc_apps_path'
            ],
            [
                'sfvalidver', 'SITE_PACKAGES_PATH',
                'sf-valid/etc/bundleversion.json', 'get_site_packages_path'
            ],
            [
                'dictver', 'SITE_PACKAGES_PATH', 'dict/etc/bundleversion.json',
                'get_site_packages_path'
            ],
        ]

        for c in confs:
            varname = c[0]
            confvar = c[1]
            fpart = c[2]
            config_info_app_method = c[3]

            try:
                tvers = self.__cparser.get('DEFAULT', varname)
                if config_info_app_method:
                    class_method = getattr(self.__ci_common,
                                           config_info_app_method)
                    toolspath = class_method()
                else:
                    toolspath = self.get_variable(confvar)
                fname = os.path.join(toolspath, fpart)
                if not os.path.exists(fname):
                    print("WARNING: Tool out of date. %s not found" % fname)
                    continue
                with open(fname, 'r') as fin:
                    jdata = json.load(fin)
                    vstring = jdata['Version']
                    if vstring != tvers:
                        print("***ERROR: Version mismatch %s != %s in %s" %
                              (tvers, vstring, fname))
            except NoOptionError as e:
                # Option not in config file - continue
                pass

    def buildtools(self, build_version='v-5200'):
        curdir = os.path.dirname(__file__)
        buildscript = os.path.join(curdir, 'BuildTools.py')

        command = 'python {} --config {} --build-version {}'.format(
            buildscript, self.__configfile, build_version)

        ret = self.__exec(command)
        if ret:
            print("ERROR: buildtools failed")
        pass

    def checkoelicense(self):
        try:
            # If not in config will fall through
            expdate = self.__cparser.get('DEFAULT', 'openeyeexp')
        except NoOptionError as e:
            # Option not in config file - continue
            return

        oelicfile = self.__ci_common.get_site_cc_oe_licence()
        # Might be in OS_ENVIRONMENT
        if not oelicfile:
            oelicfile = self.get_variable('SITE_CC_OE_LICENSE')
        if not oelicfile:
            print("***ERROR: Cannot determine open eye license from config")
            return

        with open(oelicfile, 'r') as fin:
            data = fin.readlines()
            for d in data:
                if "#EXP_DATE:" not in d:
                    continue
                edate = d.split(':')[1].strip()
                if edate != expdate:
                    print("ERROR: Openeye Licence expiration wrong  %s vs %s" %
                          (edate, expdate))
                    # Only need single report
                    return