def get_config(var): ci = ConfigInfo() valsect = ci.get("VALIDATION_SERVICES") if valsect is not None and var in valsect: return valsect[var] else: return ci.get(var)
def __init__(self, log=sys.stderr, verbose=False): """ """ self.__lfh = log self.__verbose = verbose self.__debug = True self.__siteId = getSiteId() cI = ConfigInfo() cIcommon = ConfigInfoAppCommon(self.__siteId) self.__schemaPath = cIcommon.get_site_da_internal_schema_path() self.__dbHost = cI.get("SITE_DB_HOST_NAME") self.__dbUser = cI.get("SITE_DB_USER_NAME") self.__dbPw = cI.get("SITE_DB_PASSWORD") self.__dbPort = str(cI.get("SITE_DB_PORT_NUMBER")) self.__dbSocket = cI.get("SITE_DB_SOCKET") self.__archivePath = cI.get("SITE_ARCHIVE_STORAGE_PATH") self.__dbName = "da_internal" self.__workPath = os.path.join(self.__archivePath, "archive") self.__mysql = "mysql " self.__dbLoader = cIcommon.get_db_loader_path() self.__mapping = self.__schemaPath self.__workDir = "dbdata" self._dbCon = None
def setUp(self): cI = ConfigInfo() self.__testFilePath = cI.get("TEST_FILE_PATH") self.__testFile = cI.get("TEST_FILE") self.__testFileGzip = cI.get("TEST_FILE_GZIP") self.__testFileZlib = cI.get("TEST_FILE_ZLIB") self.__testFileBzip = cI.get("TEST_FILE_BZIP") self.__outPath = TESTOUTPUT self.__outFileList = ["OUTPUT.dat.gz", "OUTPUT.dat", "OUTPUT.dat.bz2", "OUTPUT.dat.Z"] self.lfh = sys.stdout
def testGetSiteLocation(self): """Test case - return site location""" try: for siteId in self.__siteIdList: ci = ConfigInfo(siteId=siteId, verbose=self.__verbose, log=self.__lfh) siteName = ci.get("SITE_NAME", default=None) siteLoc = ci.get("WWPDB_SITE_LOC", default=None) logger.info(" siteId %-30s siteName %s siteLoc %s", siteId, siteName, siteLoc) except Exception as e: # pragma: no cover logger.exception("Unable to get group site location %s", str(e)) self.fail()
class ConfigInfoGroupDataSet(object): """ Provides accessors for the correspondence between group deposition data identifiers and deposition and annotation sites (e.g. wwpdb_site_id). """ def __init__(self, verbose=False, log=sys.stderr): # pylint: disable=unused-argument self.__verbose = verbose self.__debug = True self.__cI = ConfigInfo(siteId=None, verbose=self.__verbose) self.__groupIdAssignments = self.__cI.get( "SITE_GROUP_DATASET_ID_ASSIGNMENT_DICTIONARY") def getDefaultGroupIdRange(self, siteId): """Return the default upper and lower group deposition data set identifier codes assigned to the input siteId. Any site lacking a default range will get return tuple (-1,-1) Returns: (lower bound, upper bound) for data set identifiers (int) """ if siteId in self.__groupIdAssignments: GID_START, GID_STOP = self.__groupIdAssignments[siteId] # elif 'UNASSIGNED' in self.__groupIdAssignments: # GID_START, GID_STOP = self.__groupIdAssignments['UNASSIGNED'] else: GID_START, GID_STOP = (-1, -1) return (GID_START, GID_STOP) def getDefaultSiteId(self, groupId): """Get the default site assignment for the input group data set id.""" return self.__getSiteIdForGroup(groupId) def __getSiteIdForGroup(self, groupId): """Return the siteId to which the input groupId is within the default code assignment range. Input may be either a string "G_xxxxxxx" or an integer/string "xxxxxx". """ # check default group range assignment -- try: if str(groupId).startswith("G_"): idVal = int(str(groupId)[2:]) else: idVal = int(str(groupId)) # for ky in self.__groupIdAssignments.keys(): idMin, idMax = self.__groupIdAssignments[ky] if (idVal >= idMin) and (idVal <= idMax): return ky except Exception as e: if self.__debug: logger.exception("failed checking group range for %r with %s", groupId, str(e)) return None
class DensityWrapper: def __init__(self): self.__site_id = getSiteId() self.__cI = ConfigInfo(siteId=self.__site_id) self.__packagePath = self.__cI.get('SITE_PACKAGES_PATH') self.node_path = os.path.join(self.__packagePath, 'node', 'bin', 'node') self.volume_server_pack = self.__cI.get('VOLUME_SERVER_PACK') self.volume_server_query = self.__cI.get('VOLUME_SERVER_QUERY') def convert_xray_density_map(self, coord_file, in_2fofc_map, in_fofc_map, out_binary_cif, working_dir): xray_conversion = XrayVolumeServerMap( coord_path=coord_file, binary_map_out=out_binary_cif, node_path=self.node_path, volume_server_query_path=self.volume_server_query, volume_server_pack_path=self.volume_server_pack, two_fofc_mmcif_map_coeff_in=in_2fofc_map, fofc_mmcif_map_coeff_in=in_fofc_map, working_dir=working_dir) return xray_conversion.run_process() def convert_em_volume(self, in_em_volume, out_binary_volume, working_dir): logging.debug(working_dir) rdb = RcsbDpUtility(tmpPath=working_dir, siteId=self.__site_id, verbose=True) rdb.imp(in_em_volume) rdb.op('em-density-bcif') rdb.exp(out_binary_volume) if out_binary_volume: if os.path.exists(out_binary_volume): return True return False
class TaxDbManager(object): """A class to manage updates to the various schemas""" def __init__(self, taxdbsize, maxsize, noop, useftp): self.__noop = noop self.__taxdbsize = taxdbsize self.__maxsize = maxsize self.__useftp = useftp self.__cI = ConfigInfo() def updatedb(self): mydb = MyConnectionBase() mydb.setResource(resourceName="STATUS") ok = mydb.openConnection() if not ok: print("ERROR: Could not open status db") return myq = MyDbQuery(dbcon=mydb._dbCon) query = "select count(ordinal) from taxonomy " rows = myq.selectRows(queryString=query) count = rows[0][0] mydb.closeConnection() if count >= self.__taxdbsize and count < self.__maxsize: print("Taxdb at least as big as expected") return if self.__useftp: command = "python -m wwpdb.apps.deposit.depui.taxonomy.loadTaxonomyFromFTP --write_sql" else: taxfile = self.__cI.get("TAXONOMY_FILE_NAME") if not taxfile: print("Could not find site-config TAXONOMY_FILE_NAME -- cannot load taxonomy") return command = "python -m wwpdb.apps.deposit.depui.taxonomy.loadData --input_csv {}".format(taxfile) self.__exec(command) def __exec(self, cmd): print(cmd) ret = 0 if not self.__noop : ret = subprocess.call(cmd, shell=True) return ret
class ArchiveIoBase(object): """A base class for for archive data transfer operation utilities.""" def __init__(self, *args, **kwargs): # pylint: disable=unused-argument self._raiseExceptions = kwargs.get("raiseExceptions", False) self._siteId = kwargs.get("siteId", getSiteId()) self._serverId = kwargs.get("serverId", None) self.__cI = ConfigInfo(siteId=getSiteId()) # cD = self.__cI.get(self._serverId, {}) self._hostName = cD.get("HOST_NAME", None) self._userName = cD.get("HOST_USERNAME", None) self._password = cD.get("HOST_PASSWORD", None) self._hostPort = int(cD.get("HOST_PORT", 22)) self._protocol = cD.get("HOST_PROTOCOL", None) self._rootPath = cD.get("HOST_ROOT_PATH", None) self._keyFilePath = cD.get("HOST_KEY_FILE_PATH", None) self._keyFileType = cD.get("HOST_KEY_FILE_TYPE", None) # def connect(self, hostName, userName, **kwargs): raise NotImplementedError("To be implemented in subclass") def mkdir(self, path, **kwargs): raise NotImplementedError("To be implemented in subclass") def stat(self, path): raise NotImplementedError("To be implemented in subclass") def put(self, localPath, remotePath): raise NotImplementedError("To be implemented in subclass") def get(self, remotePath, localPath): raise NotImplementedError("To be implemented in subclass") def listdir(self, path): raise NotImplementedError("To be implemented in subclass") def rmdir(self, path): raise NotImplementedError("To be implemented in subclass") def remove(self, path): raise NotImplementedError("To be implemented in subclass") def close(self): raise NotImplementedError("To be implemented in subclass")
def testParseCutoff(self): """Tests if common built in definitions are set""" cI = ConfigInfo() val = cI.get("PROJECT_VAL_REL_CUTOFF") self.assertEqual(len(val), 2) time_t = self._parseTime(val["start"]) self.assertEqual(time_t.hour, 9) self.assertEqual(time_t.minute, 0) self.assertEqual(time_t.second, 0) self.assertEqual(time_t.isoweekday(), 5) time_t = self._parseTime(val["end"]) self.assertEqual(time_t.hour, 23) self.assertEqual(time_t.minute, 59) self.assertEqual(time_t.second, 59) self.assertEqual(time_t.isoweekday(), 5)
def __setResource(self, resource): """Loads resources for access""" cI = ConfigInfo(self.__siteId) if resource == "DA_INTERNAL": self.__dbServer = cI.get("SITE_DB_SERVER") self.__dbHost = cI.get("SITE_DB_HOST_NAME") self.__dbUser = cI.get("SITE_DB_USER_NAME") self.__dbPw = cI.get("SITE_DB_PASSWORD") self.__dbPort = str(cI.get("SITE_DB_PORT_NUMBER")) self.__dbSocket = cI.get("SITE_DB_SOCKET") self.__dbName = cI.get("SITE_DA_INTERNAL_DB_NAME") else: raise NameError("Unknown resource %s" % resource)
class MessageQueueConnection(object): def __init__(self): self._siteId = getSiteId(defaultSiteId=None) self._cI = ConfigInfo(self._siteId) def _getDefaultConnectionUrl(self): """ Provide the connection URL appropriate for the configured protocol.. """ rbmqClientProtocol = self._cI.get('SITE_RBMQ_CLIENT_PROTOCOL', default='') if 'SSL' in rbmqClientProtocol: return self._getSslConnectionUrl() else: return self._getConnectionUrl() def _getDefaultConnectionParameters(self): """ Provide the connection parameters appropriate for the configured protocol.. """ rbmqClientProtocol = self._cI.get('SITE_RBMQ_CLIENT_PROTOCOL', default='') if 'SSL' in rbmqClientProtocol: return self._getSslConnectionParameters() else: return self._getConnectionParameters() def _getSslConnectionParameters(self): pObj, url = self.__getSslConnectionParameters() return pObj def _getSslConnectionUrl(self): pObj, url = self.__getSslConnectionParameters() return url def __getSslConnectionParameters(self): """ Return connection parameter object for SSL client connection - """ parameters = None rbmqUrl = None try: rbmqServerHost = self._cI.get('SITE_RBMQ_SERVER_HOST') rbmqServerPort = self._cI.get('SITE_RBMQ_SSL_SERVER_PORT') rbmqUser = self._cI.get('SITE_RBMQ_USER_NAME') rbmqPassword = self._cI.get('SITE_RBMQ_PASSWORD') rbmqVirtualHost = self._cI.get('SITE_RBMQ_VIRTUAL_HOST') clientSslCaCertFile = self._cI.get('SITE_RBMQ_SSL_CA_CERT_FILE') clientSslKeyFile = self._cI.get('SITE_RBMQ_SSL_KEY_FILE') clientSslCertFile = self._cI.get('SITE_RBMQ_SSL_CERT_FILE') ssl_opts = urlencode({ "ssl_options": { "ca_certs": clientSslCaCertFile, "keyfile": clientSslKeyFile, "certfile": clientSslCertFile } }) rbmqUrl = "amqps://%s:%s@%s:%d/%s?%s" % ( rbmqUser, rbmqPassword, rbmqServerHost, int(rbmqServerPort), rbmqVirtualHost, ssl_opts) logger.debug("rbmq URL: %s " % rbmqUrl) parameters = pika.URLParameters(rbmqUrl) except: logger.exception("Failing") return parameters, rbmqUrl # def _getConnectionParameters(self): """ Return connection parameters for the standard TCP client connection -- """ pObj, url = self.__getConnectionParameters() return pObj def _getConnectionUrl(self): """ Return connection parameters as a URL for the standard TCP client connection """ pObj, url = self.__getConnectionParameters() return url def __getConnectionParameters(self): """ Return connection parameter object for client connection using basic authentication. """ parameters = None try: rbmqServerHost = self._cI.get('SITE_RBMQ_SERVER_HOST') rbmqServerPort = self._cI.get('SITE_RBMQ_SERVER_PORT') rbmqUser = self._cI.get('SITE_RBMQ_USER_NAME') rbmqPassword = self._cI.get('SITE_RBMQ_PASSWORD') rbmqVirtualHost = self._cI.get('SITE_RBMQ_VIRTUAL_HOST') credentials = pika.PlainCredentials(rbmqUser, rbmqPassword) parameters = pika.ConnectionParameters( host=rbmqServerHost, port=int(rbmqServerPort), virtual_host=rbmqVirtualHost, credentials=credentials) rbmqUrl = "amqp://%s:%s@%s:%d/%s" % ( rbmqUser, rbmqPassword, rbmqServerHost, int(rbmqServerPort), rbmqVirtualHost) logger.debug("rbmq URL: %s " % rbmqUrl) except: logger.exception("Failing") return parameters, rbmqUrl
class LocalFTPPathInfo(object): def __init__(self, siteId=None): self.__siteId = siteId self.__cI = ConfigInfo(siteId=self.__siteId) self.ftp_pdb_root = self.__cI.get("SITE_PDB_FTP_ROOT_DIR") self.ftp_emdb_root = self.__cI.get("SITE_EMDB_FTP_ROOT_DIR") self.__mapping = { "model": "mmCIF", "structure_factors": "structure_factors", "chemical_shifts": "nmr_chemical_shifts", "nmr_data": "nmr_data", } def __get_mapping(self, file_type): return self.__mapping.get(file_type) def set_ftp_pdb_root(self, ftp_pdb_root): if ftp_pdb_root: self.ftp_pdb_root = ftp_pdb_root def set_ftp_emdb_root(self, ftp_emdb_root): if ftp_emdb_root is not None: self.ftp_emdb_root = ftp_emdb_root def get_ftp_pdb(self): if self.ftp_pdb_root: return os.path.join(self.ftp_pdb_root, "pdb", "data", "structures", "all") return "" def get_ftp_emdb(self): if self.ftp_emdb_root: return os.path.join(self.ftp_emdb_root, "emdb", "structures") return "" def get_model_path(self): return os.path.join(self.get_ftp_pdb(), self.__get_mapping("model")) def get_sf_path(self): return os.path.join(self.get_ftp_pdb(), self.__get_mapping("structure_factors")) def get_cs_path(self): return os.path.join(self.get_ftp_pdb(), self.__get_mapping("chemical_shifts")) def get_nmr_data_path(self): return os.path.join(self.get_ftp_pdb(), self.__get_mapping("nmr_data")) def get_model_fname(self, accession): model_file_name = ReleaseFileNames().get_model(accession=accession, for_release=False) return os.path.join(self.get_model_path(), model_file_name) def get_structure_factors_fname(self, accession): sf_file_name = ReleaseFileNames().get_structure_factor( accession=accession, for_release=False) return os.path.join(self.get_sf_path(), sf_file_name) def get_chemical_shifts_fname(self, accession): cs_file_name = ReleaseFileNames().get_chemical_shifts( accession=accession, for_release=False) return os.path.join(self.get_cs_path(), cs_file_name) def get_nmr_data_fname(self, accession): nmr_data_file_name = ReleaseFileNames().get_nmr_data( accession=accession, for_release=False) return os.path.join(self.get_nmr_data_path(), nmr_data_file_name)
class BuildTools(object): def __init__(self, config_file, noop, build_version='v-5200'): self.__configfile = config_file self.__noop = noop self.__build_version = build_version # Infer topdir from where running from topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) cdict = {'topdir': topdir} self.__cparser = ConfigParser(cdict) self.__cparser.read(self.__configfile) self.__ci = ConfigInfo() pass def __exec(self, cmd, overridenoop=False): print(cmd) ret = 0 if not self.__noop or overridenoop: ret = subprocess.call(cmd, shell=True) return ret def build(self): try: packages = self.__cparser.get('DEFAULT', 'buildupdates') except: # Missing stanza - exit return packs = packages.split(' ') for pack in packs: p = pack.strip() if p: pbuild = "pkg_build_{}".format(p) print("About to build %s" % pbuild) self.run_build_command(pbuild=pbuild) def run_build_command(self, pbuild): onedep_build_dir = self.__ci.get('WWPDB_ONEDEP_BUILD') onedep_build_dir_version = os.path.join(onedep_build_dir, self.__build_version) cmd = ['#!/bin/bash'] # set the environment and ensure we are up to date cmd.append('cd {}'.format(onedep_build_dir)) cmd.append('git pull') cmd.append('. {}/utils/pkg-utils-v2.sh'.format(onedep_build_dir)) cmd.append('get_environment') cmd.append('export FORCE_REBUILD="YES"') cmd.append( '. {}/packages/all-packages.sh'.format(onedep_build_dir_version)) # clear out the existing distrib dir so files are re-fetched cmd.append('if [ -z "$DISTRIB_DIR" ]') cmd.append('then') cmd.append('echo "DISTRIB_DIR not defined - exiting"') cmd.append('exit 1') cmd.append('else') cmd.append( 'echo "remove files from distrib_dir so that they are rebuilt"') cmd.append('rm -rf ${DISTRIB_DIR}/*') cmd.append('fi') # append the actual command cmd.append(pbuild) # write everything to a temp file working_dir = tempfile.mkdtemp() temp_file = os.path.join(working_dir, 'cmd.sh') print('writing out commands to: {}'.format(temp_file)) with open(temp_file, 'w') as outFile: outFile.write('\n'.join(cmd)) print('commands to run') print('\n'.join(cmd)) # run the temp file cmd_string = 'chmod +x {0}; {0}; rm -rf {1}'.format( temp_file, working_dir) return self.__exec(cmd_string)
def testMock(self): cI = ConfigInfo() self.assertEqual(cI.get("DEPLOY_PATH"), os.path.join(rwMockTopPath, "da_top"))
def main(): # adding a conservative permission mask for this # os.umask(0o022) # siteId = getSiteId(defaultSiteId=None) cI = ConfigInfo(siteId) # topPath = cI.get('SITE_WEB_APPS_TOP_PATH') topSessionPath = cI.get("SITE_WEB_APPS_TOP_SESSIONS_PATH") # myFullHostName = platform.uname()[1] myHostName = str(myFullHostName.split(".")[0]).lower() # wsLogDirPath = os.path.join(topSessionPath, "ws-logs") # Setup logging -- now = time.strftime("%Y-%m-%d", time.localtime()) usage = "usage: %prog [options]" parser = OptionParser(usage) parser.add_option("--start", default=False, action="store_true", dest="startOp", help="Start consumer client process") parser.add_option("--stop", default=False, action="store_true", dest="stopOp", help="Stop consumer client process") parser.add_option("--restart", default=False, action="store_true", dest="restartOp", help="Restart consumer client process") parser.add_option("--status", default=False, action="store_true", dest="statusOp", help="Report consumer client process status") # parser.add_option("-v", "--verbose", default=False, action="store_true", dest="verbose", help="Enable verbose output") parser.add_option("--debug", default=1, type="int", dest="debugLevel", help="Debug level (default=1) [0-3]") parser.add_option("--instance", default=1, type="int", dest="instanceNo", help="Instance number [1-n]") # (options, _args) = parser.parse_args() # pidFilePath = os.path.join( wsLogDirPath, myHostName + "_" + str(options.instanceNo) + ".pid") stdoutFilePath = os.path.join( wsLogDirPath, myHostName + "_" + str(options.instanceNo) + "_stdout.log") stderrFilePath = os.path.join( wsLogDirPath, myHostName + "_" + str(options.instanceNo) + "_stderr.log") wfLogFilePath = os.path.join( wsLogDirPath, myHostName + "_" + str(options.instanceNo) + "_" + now + ".log") # logger = logging.getLogger(name="root") # pylint: disable=redefined-outer-name logging.captureWarnings(True) formatter = logging.Formatter( "%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s") handler = logging.FileHandler(wfLogFilePath) handler.setFormatter(formatter) logger.addHandler(handler) # lt = time.strftime("%Y %m %d %H:%M:%S", time.localtime()) # if options.debugLevel > 2: logger.setLevel(logging.DEBUG) elif options.debugLevel > 0: logger.setLevel(logging.INFO) else: logger.setLevel(logging.ERROR) # # myDP = MyDetachedProcess(pidFile=pidFilePath, stdout=stdoutFilePath, stderr=stderrFilePath, wrkDir=wsLogDirPath) if options.startOp: sys.stdout.write( "+DetachedMessageConsumer() starting consumer service at %s\n" % lt) logger.info( "DetachedMessageConsumer() starting consumer service at %s", lt) myDP.start() elif options.stopOp: sys.stdout.write( "+DetachedMessageConsumer() stopping consumer service at %s\n" % lt) logger.info( "DetachedMessageConsumer() stopping consumer service at %s", lt) myDP.stop() elif options.restartOp: sys.stdout.write( "+DetachedMessageConsumer() restarting consumer service at %s\n" % lt) logger.info( "DetachedMessageConsumer() restarting consumer service at %s", lt) myDP.restart() elif options.statusOp: sys.stdout.write( "+DetachedMessageConsumer() reporting status for consumer service at %s\n" % lt) sys.stdout.write(myDP.status()) else: pass
class ArchiveIoSftpTests(unittest.TestCase): def setUp(self): self.__lfh = sys.stderr self.__verbose = False # self.__serverId = "BACKUP_SERVER_RDI2" self.__cI = ConfigInfo(siteId=getSiteId(), verbose=self.__verbose, log=self.__lfh) cD = self.__cI.get(self.__serverId, {}) self.__hostName = cD.get("HOST_NAME") self.__userName = cD.get("HOST_USERNAME") self.__hostPort = int(cD.get("HOST_PORT")) # self.__protocol = cD.get("HOST_PROTOCOL") self.__rootPath = cD.get("HOST_ROOT_PATH") self.__keyFilePath = cD.get("HOST_KEY_FILE_PATH") self.__keyFileType = cD.get("HOST_KEY_FILE_TYPE") # self.__testLocalFilePath = "./data/TEST-FILE.DAT" self.__testLocalOutputFilePath = "./JUNK.JUNK" # self.__startTime = time.time() logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) def tearDown(self): endTime = time.time() logger.debug("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) def testSftpConnect(self): """Test case - for connection-""" try: aio = ArchiveIoSftp() ok = aio.connect(self.__hostName, self.__userName, self.__hostPort, keyFilePath=self.__keyFilePath, keyFileType=self.__keyFileType) aio.close() self.assertEqual(ok, True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() # def testSftpStatOps(self): """Test case - get directory list and stat details-""" try: aio = ArchiveIoSftp(serverId=self.__serverId) ok = aio.connectToServer() result = aio.listdir(".") logger.info("listdir: %r", result) result = aio.stat(".") logger.info("stat: %r", result) ok = aio.close() self.assertEqual(ok, True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testSftpDirOps(self): """Test case - create and remove directory -""" try: aio = ArchiveIoSftp(serverId=self.__serverId) ok = aio.connectToServer() testPath = os.path.join(self.__rootPath, "test") ok = aio.mkdir(testPath) result = aio.listdir(self.__rootPath) logger.debug("listdir: %r", result) result = aio.stat(testPath) logger.info("stat good: %r", result) ok = aio.rmdir(testPath) result = aio.listdir(self.__rootPath) logger.debug("listdir after remove: %r", result) # testPathBad = os.path.join(self.__rootPath, "test_bad") result = aio.listdir(testPathBad) logger.debug("listdir bad : %r", result) result = aio.stat(testPathBad) logger.info("bad stat: %r", result) ok = aio.close() self.assertEqual(ok, True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testSftpTransferOps(self): """Test case - transfer and remove files and directories -""" try: aio = ArchiveIoSftp(serverId=self.__serverId) ok = aio.connectToServer() testDirPath = os.path.join(self.__rootPath, "test") testFilePath1 = os.path.join(testDirPath, "TEST-FILE-1.DAT") testFilePath2 = os.path.join(testDirPath, "TEST-FILE-2.DAT") ok = aio.mkdir(testDirPath) ok = aio.put(self.__testLocalFilePath, testFilePath1) ok = aio.put(self.__testLocalFilePath, testFilePath2) # aio.get(testFilePath1, self.__testLocalOutputFilePath) aio.get(testFilePath2, self.__testLocalOutputFilePath) # result = aio.listdir(testDirPath) logger.debug("listdir: %r", result) ok = aio.remove(testFilePath1) ok = aio.remove(testFilePath2) # result = aio.listdir(testDirPath) logger.debug("listdir: %r", result) # ok = aio.rmdir(testDirPath) result = aio.listdir(self.__rootPath) logger.debug("listdir after remove: %r", result) ok = aio.close() self.assertEqual(ok, True) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
class ConfigInfoAppBase(object): """Base class to provide common application lookups""" def __init__(self, siteId=None, verbose=True, log=sys.stderr): self._cI = ConfigInfo(siteId=siteId, verbose=verbose, log=log) self._resourcedir = None self._rwresourcedir = None self._referencedir = None self._site_archive_dir = None self._site_local_apps_path = None self._top_webapps_path = None self._top_sessions_path = None def _getlegacy(self, key, default=None, stacklevel=4): """Retrieves key from configuration. If key is found, provide a warning""" val = self._cI.get(key) if val is not None: # logging will repeat with each occurance self.__warndeprecated( "Access key %s has been used but is deprecated" % key, stacklevel=stacklevel) else: val = default return val def _getValue(self, key, default=None): val = self._cI.get(key) if val is None: val = default return val def _getresourcedir(self): if self._resourcedir is None: self._resourcedir = self._cI.get("RO_RESOURCE_PATH") return self._resourcedir def _getrwresourcedir(self): """Returns the RW resource directory if set in site-config""" if self._rwresourcedir is None: self._rwresourcedir = self._cI.get("RW_RESOURCE_PATH") return self._rwresourcedir def _getreferencedir(self): if self._referencedir is None: self._referencedir = self._cI.get("REFERENCE_PATH") return self._referencedir def _get_site_archive_dir(self): if self._site_archive_dir is None: self._site_archive_dir = self._cI.get("SITE_ARCHIVE_STORAGE_PATH") return self._site_archive_dir def _get_site_local_apps(self): if self._site_local_apps_path is None: self._site_local_apps_path = self._cI.get("SITE_LOCAL_APPS_PATH") return self._site_local_apps_path def _get_top_web_apps_top_path(self): if self._top_webapps_path is None: self._top_webapps_path = self._cI.get("SITE_WEB_APPS_TOP_PATH") return self._top_webapps_path def _get_top_sessions_path(self): if self._top_sessions_path is None: self._top_sessions_path = self._cI.get( "SITE_WEB_APPS_TOP_SESSIONS_PATH") return self._top_sessions_path def get_site_packages_path(self): return self._getlegacy( "SITE_PACKAGES_PATH", os.path.join(self._get_site_local_apps(), "packages")) def __warndeprecated(self, msg, stacklevel=4): """Logs warning message""" # stacklevel is to get up high enough to get caller warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
class FileUtils(FileUtilsBase): """ Manage the presentation of project files for download. """ def __init__(self, entryId, reqObj=None, verbose=False, log=sys.stderr): self.__verbose = verbose self.__lfh = log self.__reqObj = reqObj # Reassign siteId for the following special case -- self.__entryId = entryId siteId = self.__reqObj.getValue("WWPDB_SITE_ID") # This is for viewing the entries from the standalone validation server from annotation -- if siteId in ["WWPDB_DEPLOY_PRODUCTION_RU", "WWPDB_DEPLOY_VALSRV_RU", "WWPDB_DEPLOY_TEST", "WWPDB_DEPLOY_INTERNAL_RU"] and entryId.startswith("D_90"): siteId = "WWPDB_DEPLOY_VALSRV_RU" # # Get inventory of file types super(FileUtils, self).__init__() # self.__setup(siteId=siteId) def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") # self.__lfh.write("+FileUtils.__setup() starting with entryId %r adjusted WWPDB_SITE_ID %r\n" % (self.__entryId, self.__siteId)) # self.__sObj = self.__reqObj.getSessionObj() self.__sessionId = self.__sObj.getId() self.__sessionPath = self.__sObj.getPath() self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) self.__cI = ConfigInfo(self.__siteId) self.__msL = self.__cI.get("CONTENT_MILESTONE_LIST") # def renderFileList(self, fileSource="archive", rDList=None, titlePrefix="", titleSuffix="", displayImageFlag=False): """""" if rDList is None: rDList = self._rDList htmlList = [] nTot = 0 if fileSource in ["archive", "deposit", "wf-archive"]: for ky in rDList: if ky not in self._rD: continue ctList = self._rD[ky] title = titlePrefix + ky + titleSuffix fList = [] fList.extend(ctList) for ct in ctList: for ms in self.__msL: mt = ct + "-" + ms fList.append(mt) nF, oL = self.__renderContentTypeFileList( self.__entryId, fileSource=fileSource, wfInstanceId=None, contentTypeList=fList, title=title, displayImageFlag=displayImageFlag ) if nF > 0: htmlList.extend(oL) nTot += nF if fileSource in ["archive", "wf-archive"]: nF, oL = self.__renderLogFileList(self.__entryId, fileSource="archive", title="Archive Log Files") if nF > 0: htmlList.extend(oL) nTot += nF if fileSource in ["deposit"]: nF, oL = self.__renderLogFileList(self.__entryId, fileSource="deposit", title="Deposit Log Files") if nF > 0: htmlList.extend(oL) nTot += nF # if fileSource in ["wf-instance", "instance"]: iTopPath = self.__pI.getInstanceTopPath(self.__entryId) fPattern = os.path.join(iTopPath, "*") wfInstancePathList = filter(os.path.isdir, glob.glob(fPattern)) for wfInstancePath in wfInstancePathList: (_pth, wfInstId) = os.path.split(wfInstancePath) title = "Files in " + wfInstId nF, oL = self.__renderWfInstanceFileList(self.__entryId, wfInstancePath, title=title) if nF > 0: htmlList.extend(oL) nTot += nF # return nTot, htmlList def __renderContentTypeFileList(self, entryId, fileSource="archive", wfInstanceId=None, contentTypeList=None, title=None, displayImageFlag=False): if contentTypeList is None: contentTypeList = ["model"] if self.__verbose: self.__lfh.write( "+FileUtils.renderContentTypeFileList() entryId %r fileSource %r wfInstanceId %r contentTypeList %r \n" % (entryId, fileSource, wfInstanceId, contentTypeList) ) de = DataExchange( reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=wfInstanceId, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh ) tupL = de.getContentTypeFileList(fileSource=fileSource, contentTypeList=contentTypeList) # rTupL = [] for tup in tupL: href, fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if displayImageFlag and fN.startswith(entryId + "_img-emdb"): imgFile = os.path.join(self.__sessionPath, fN) if os.access(imgFile, os.F_OK): os.remove(imgFile) # os.symlink(tup[0], imgFile) imgHtml = '<img src="/sessions/' + self.__sessionId + "/" + fN + '" border="0" alt="Image" width="400" height="400">' rTupL.append(("displayImage", imgHtml, "")) # # if title is None: cS = ",".join(contentTypeList) title = "File Source %s (%s)" % (fileSource, cS) nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderWfInstanceFileList(self, entryId, wfPath, title=None): if self.__verbose: self.__lfh.write("+FileUtils.renderWfInstanceFileList() wfPath %s\n" % wfPath) wfPattern = os.path.join(wfPath, "*") de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=None, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) tupL = de.getMiscFileList(fPatternList=[wfPattern], sortFlag=True) # rTupL = [] for tup in tupL: href, _fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if title is None: title = "Workflow instance files for %s" % entryId nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderLogFileList(self, entryId, fileSource="archive", title=None): if self.__verbose: self.__lfh.write("+FileUtils.renderLogFileList() entryId %r fileSource %r\n" % (entryId, fileSource)) de = DataExchange(reqObj=self.__reqObj, depDataSetId=entryId, wfInstanceId=None, fileSource=fileSource, siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) tupL = de.getLogFileList(entryId, fileSource=fileSource) # rTupL = [] for tup in tupL: href, _fN = self.__makeDownloadHref(tup[0]) if tup[2] > 1: sz = "%d" % int(tup[2]) else: sz = "%.3f" % tup[2] rTup = (href, tup[1], sz) rTupL.append(rTup) if title is None: title = "Log Files in Source %s" % fileSource nF, htmlList = self.__renderFileList(rTupL, title) return nF, htmlList def __renderFileList(self, fileTupleList, title, embeddedTitle=True): # oL = [] if len(fileTupleList) > 0: if embeddedTitle: oL.append('<table class="table table-bordered table-striped table-condensed">') oL.append('<tr><th class="width50">%s</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>' % title) else: oL.append("<h4>%s</h4>" % title) oL.append('<table class="table table-bordered table-striped table-condensed">') oL.append('<tr><th class="width50">Files</th><th class="width30">Modification Time</th><th class="width20">Size (KBytes)</th></tr>') for tup in fileTupleList: oL.append("<tr>") if tup[0] == "displayImage": oL.append('<td align="center" colspan="3">%s</td>' % tup[1]) else: oL.append("<td>%s</td>" % tup[0]) oL.append("<td>%s</td>" % tup[1]) oL.append("<td>%s</td>" % tup[2]) # oL.append("</tr>") # oL.append("</table>") # return len(fileTupleList), oL def __makeDownloadHref(self, filePath): _dP, fN = os.path.split(filePath) tS = "/service/review_v2/download_file?sessionid=" + self.__sessionId + "&file_path=" + filePath href = "<a class='my-file-downloadable' href='" + tS + "'>" + fN + "</a>" return href, fN
class DataMaintenanceTests(unittest.TestCase): def setUp(self): self.__lfh = sys.stderr self.__verbose = True # Get siteId from the environment - self.__siteId = getSiteId() self.__cI = ConfigInfo(siteId=self.__siteId) # In test mode, no deletions are performed - self.__testMode = True # # An data set ID list must be set -- self.__idListPath = "RELEASED.LIST" # self.__milestoneL = [] self.__milestoneL.append(None) self.__milestoneL.extend(self.__cI.get("CONTENT_MILESTONE_LIST")) self.__cTBD = self.__cI.get("CONTENT_TYPE_BASE_DICTIONARY") self.__cTD = self.__cI.get("CONTENT_TYPE_DICTIONARY") self.__cTL = sorted(self.__cTBD.keys()) # Example list of candidate content types for purging -- this is based on system V15x for X-ray content types self.__cTypesOtherL = [ "assembly-assign", "assembly-model", "assembly-model-xyz", "assembly-report", "chem-comp-assign", "chem-comp-assign-details", "chem-comp-assign-final", "chem-comp-depositor-info", "chem-comp-link", "component-image", "dcc-report", "dict-check-report", "dict-check-report-r4", "format-check-report", "geometry-check-report", "merge-xyz-report", "misc-check-report", "notes-from-annotator", "polymer-linkage-distances", "polymer-linkage-report", "secondary-structure-topology", "seq-align-data", "seq-assign", "seq-data-stats", "seqdb-match", "sequence-fasta", "sf-convert-report", "site-assign", "special-position-report", "validation-data", "validation-report", "validation-report-depositor", "validation-report-full", "validation-report-slider", ] # # Test snapshot directory required for recovery tests - self.__snapShotPath = "/net/wwpdb_da_data_archive/.snapshot/nightly.1/data" def tearDown(self): pass def __getIdList(self, fPath): if not os.access(fPath, os.R_OK): self.__lfh.write("__getIdList() Missing data set ID list file %s\n" % fPath) self.fail() # ifh = open(fPath, "r") fL = [] # D_10 00 00 00 01 for line in ifh: tId = line[:-1] if len(tId) == 12 and tId.startswith("D_"): fL.append(tId) ifh.close() return fL def __getRecoveryInfo(self, purgeType="exp"): """Return the list of tuple describing content type and milestones to be recovered. return [{fileSource,contentType,formatType,mileStone,purgeType},] """ rL = [] if purgeType in ["exp"]: for ct in ["model"]: for fs in ["archive", "deposit"]: for fm in ["pdbx", "pdb"]: for milestone in self.__milestoneL: if milestone in ["release", "annotate", "review"]: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) for ct in ["structure-factors"]: for fs in ["archive", "deposit"]: for fm in ["pdbx", "mtz"]: for milestone in self.__milestoneL: if milestone in ["release", "annotate", "review"]: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) elif purgeType in ["other", "report"]: for ct in self.__cTypesOtherL: if ct not in ["validation-report", "validation-data", "validation-report-full"]: continue for fs in ["archive", "deposit"]: for fm in self.__cTD[ct][0]: for milestone in self.__milestoneL: if milestone in ["release", "annotate", "review"]: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "other"}) return rL def testRecoverProductionList(self): """Test case for selected recovery of selected content types and milestone files from snapshot directory""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: idList = self.__getIdList(self.__idListPath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: recL = [] for pType in ["exp", "other"]: pTL = self.__getRecoveryInfo(purgeType=pType) for pT in pTL: vfL = dm.getVersionFileListSnapshot( basePath=self.__snapShotPath, dataSetId=id, wfInstanceId=None, fileSource=pT["fileSource"], contentType=pT["contentType"], formatType=pT["formatType"], partitionNumber="1", mileStone=pT["mileStone"], ) self.__lfh.write("\n+testRecoverProductionList - id %13s cType %s\n" % (id, pT["contentType"])) for ii, p in enumerate(vfL): self.__lfh.write("+testRecoverProductionList- %4d pair - %r\n" % (ii, p)) recL.extend(vfL) if len(recL) > 0: for ii, p in enumerate(recL): self.__lfh.write("+testRecoverProductionList- %4d pairL - %r\n" % (ii, p)) shutil.copyfile(p[0], p[1]) except: traceback.print_exc(file=sys.stdout) self.fail() def __getPurgeInfo(self, purgeType="exp"): """Return a list of tuples describing content types and milestone data files to be purged - return [{fileSource,contentType,formatType,mileStone,purgeType},] """ rL = [] if purgeType in ["exp"]: for ct in ["model"]: for fs in ["archive", "deposit"]: for fm in ["pdbx", "pdb"]: for milestone in self.__milestoneL: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) for ct in ["structure-factors", "em-structure-factors"]: for fs in ["archive", "deposit"]: for fm in ["pdbx", "mtz"]: for milestone in self.__milestoneL: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) for ct in ["nmr-chemical-shifts", "nmr-chemical-shifts-raw", "nmr-chemical-shifts-auth"]: for fs in ["archive", "deposit"]: for fm in ["pdbx", "nmr-star"]: for milestone in self.__milestoneL: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) for ct in ["em-volume", "em-mask-volume", "em-additional-volume"]: for fs in ["archive", "deposit"]: for fm in ["map", "ccp4", "mrc2000"]: for milestone in self.__milestoneL: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "exp"}) elif purgeType in ["other", "report"]: for ct in self.__cTypesOtherL: for fs in ["archive", "deposit"]: for fm in self.__cTD[ct][0]: for milestone in self.__milestoneL: rL.append({"fileSource": fs, "contentType": ct, "formatType": fm, "mileStone": milestone, "purgeType": "other"}) return rL def __removePathList(self, pthList): # for pth in pthList: try: if self.__testMode: self.__lfh.write("__removePathList() TEST MODE skip removing path %s\n" % pth) else: os.remove(pth) except: pass def testCreatePurgeProductionList(self): """Test case for generating canditate files for purge -""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: idList = self.__getIdList(self.__idListPath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: rmLL = [] for pType in ["exp", "other"]: pTL = self.__getPurgeInfo(purgeType=pType) for pT in pTL: latest, rmL, gzL = dm.getPurgeCandidates( id, wfInstanceId=None, fileSource=pT["fileSource"], contentType=pT["contentType"], formatType=pT["formatType"], partitionNumber="1", mileStone=pT["mileStone"], purgeType=pT["purgeType"], ) if latest is None: continue self.__lfh.write("\n+testPurgeCandidatesList - id %13s cType %s LATEST version %s\n" % (id, pT["contentType"], latest)) for ii, p in enumerate(rmL): self.__lfh.write("+testPurgeCandidateList- %4d rm - %r\n" % (ii, p)) for ii, p in enumerate(gzL): self.__lfh.write("+testPurgeCandidateList- %4d gz - %r\n" % (ii, p)) if len(rmL) > 0: rmLL.extend(rmL) rmLL.extend(dm.getLogFiles(id, fileSource="deposit")) rmLL.extend(dm.getLogFiles(id, fileSource="archive")) if len(rmLL) > 0: for ii, p in enumerate(rmLL): self.__lfh.write("+testPurgeCandidateList- %4d rmLL - %r\n" % (ii, p)) except: traceback.print_exc(file=sys.stdout) self.fail() def testPurgeProductionList(self): """Preliminary version of purge operations post release -""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: idList = self.__getIdList(self.__idListPath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: rmLL = [] for pType in ["exp", "other"]: pTL = self.__getPurgeInfo(purgeType=pType) for pT in pTL: latest, rmL, gzL = dm.getPurgeCandidates( id, wfInstanceId=None, fileSource=pT["fileSource"], contentType=pT["contentType"], formatType=pT["formatType"], partitionNumber="1", mileStone=pT["mileStone"], purgeType=pT["purgeType"], ) if latest is None: continue self.__lfh.write("\n+testPurgeCandidatesList - id %13s cType %s LATEST version %s\n" % (id, pT["contentType"], latest)) for ii, p in enumerate(rmL): self.__lfh.write("+testPurgeCandidateList- %4d rm - %r\n" % (ii, p)) for ii, p in enumerate(gzL): self.__lfh.write("+testPurgeCandidateList- %4d gz - %r\n" % (ii, p)) if len(rmL) > 0: rmLL.extend(rmL) rmLL.extend(dm.getLogFiles(id, fileSource="deposit")) rmLL.extend(dm.getLogFiles(id, fileSource="archive")) if len(rmLL) > 0: for ii, p in enumerate(rmLL): self.__lfh.write("+testPurgeCandidateList- %4d rmLL - %r\n" % (ii, p)) self.__removePathList(rmLL) except: traceback.print_exc(file=sys.stdout) self.fail() def testPurgeCandidatesList(self): """""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: idList = self.__getIdList(self.__idListPath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: latest, rmL, gzL = dm.getPurgeCandidates(id, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None) self.__lfh.write("\n\n+testPurgeCandidatesList - id %s LATEST version %s\n" % (id, latest)) for ii, p in enumerate(rmL): self.__lfh.write("+testPurgeCandidateList- %r rm - %r\n" % (ii, p)) for ii, p in enumerate(gzL): self.__lfh.write("+testPurgeCandidateList- %r gz - %r\n" % (ii, p)) self.__lfh.write("%s\n" % "\n".join(self.__cTL)) except: traceback.print_exc(file=sys.stdout) self.fail() def testVersionList(self): """""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) try: idList = self.__getIdList(self.__idListPath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: pL = dm.getVersionFileList(id, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None) self.__lfh.write("\n\n+testVersionList- id %s file list\n" % (id)) for ii, p in enumerate(pL): self.__lfh.write("+testVersionList- %r %r\n" % (ii, p)) except: traceback.print_exc(file=sys.stdout) self.fail() def __makeEntryPathList(self, archivePath): """Return the list of entries in the archive directory names and paths -""" pathList = [] dataList = [] for root, dirs, files in scandir.walk(archivePath, topdown=False): for dir in dirs: if dir.startswith("D_") and len(dir) == 12: pathList.append(os.path.join(root, dir)) dataList.append(dir) return dataList, pathList def __splitFilePath(self, pth): id = None contentType = None fileFormat = None partNo = None versionNo = None try: dn, fn = os.path.split(pth) fFields = fn.split(".") fileName = fFields[0] fileFormat = fFields[1] if len(fFields) > 2: versionNo = int(fFields[2][1:]) else: versionNo = int(0) fParts = fileName.split("_") if len(fParts) == 4: id = fParts[0] + "_" + fParts[1] contentType = fParts[2] partNo = int(fParts[3][1:]) else: if len(fParts) > 2: id = fParts[0] + "_" + fParts[1] else: id = fileName if len(fParts) > 3: contentType = fParts[2] else: contentType = None partNo = None return id, contentType, fileFormat, partNo, versionNo except: traceback.print_exc(file=sys.stdout) return id, contentType, fileFormat, partNo, versionNo def testGetFileInventory(self): """""" self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") try: idList, pathList = self.__makeEntryPathList(archivePath) dm = DataMaintenance(siteId=self.__siteId, testMode=self.__testMode, verbose=self.__verbose, log=self.__lfh) for id in idList: dirPath = os.path.join(archivePath, "archive", id, "*") self.__lfh.write("+testGetFileInventoryList- inventory in directory %s\n" % (dirPath)) pL = dm.getMiscFileList(fPatternList=[dirPath], sortFlag=True) self.__lfh.write("\n\n+testGetFileInventoryList- id %s file list\n" % (id)) for ii, p in enumerate(pL): tup0 = self.__splitFilePath(p[0]) retR = [t for t in tup0] retR.append(p[1]) retR.append(p[2]) self.__lfh.write("+testGetFileInventoryList- %r %r\n" % (ii, retR)) except: traceback.print_exc(file=sys.stdout) self.fail()
class DataExchange(object): """ Implements common data exchange operations including: moving annotation data files between session and workflow storage, accessing files in workflow directories, and routine file maintenance operations. """ def __init__(self, reqObj=None, depDataSetId=None, wfInstanceId=None, fileSource="archive", siteId=None, verbose=False, log=sys.stderr): self.__reqObj = reqObj self.__depDataSetId = depDataSetId self.__wfInstanceId = wfInstanceId self.__fileSource = fileSource self.__verbose = verbose self.__lfh = log # self.__debug = False self.__inputSessionPath = None # self.__setup(siteId=siteId) def __setup(self, siteId=None): if siteId is not None: self.__siteId = siteId else: self.__siteId = self.__reqObj.getValue("WWPDB_SITE_ID") self.__sessionObj = self.__reqObj.getSessionObj() self.__sessionPath = self.__sessionObj.getPath() self.__cI = ConfigInfo(self.__siteId) self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) # if self.__debug: self.__lfh.write("+DataExchange.__setup() - session id %s\n" % (self.__sessionObj.getId())) self.__lfh.write("+DataExchange.__setup() - session path %s\n" % (self.__sessionObj.getPath())) self.__lfh.write("+DataExchange.__setup() - data set %s instance %s file source %s\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource)) self.__pI.setDebugFlag(flag=self.__debug) def setFileSource(self, fileSource): """Override fileSource="archive" """ self.__fileSource = fileSource def setInputSessionPath(self, inputSessionPath=None): """Override the path to files with fileSource="session" """ self.__inputSessionPath = inputSessionPath def purgeLogs(self): archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId, "log") if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() - purging logs in directory %s\n" % (dirPath)) if os.access(dirPath, os.W_OK): fpattern = os.path.join(dirPath, "*log") if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write("+DataExchange.purgeLogs() candidate path length is %d\n" % len(pthList)) # for pth in pthList: try: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return pthList def reversePurge(self, contentType, formatType="pdbx", partitionNumber=1): fn = self.__getArchiveFileName(contentType=contentType, formatType=formatType, version="none", partitionNumber=partitionNumber) archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId) if self.__verbose: self.__lfh.write("+DataExchange.__setup() - purging in directory %s\n" % (dirPath)) if len(dirPath) < 2: return [] fpattern = os.path.join(dirPath, fn + ".V*") if self.__verbose: self.__lfh.write("+DataExchange.__setup() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write("+DataExchange.__reversePurge() candidate length is %d\n" % len(pthList)) # fList = [] for pth in pthList: if not pth.endswith(".V1"): fList.append(pth) for pth in fList: try: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return fList def removeWorkflowDir(self): if (self.__depDataSetId is not None) and self.__depDataSetId.startswith("D_") and (len(self.__depDataSetId) > 7): workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(workflowPath, "workflow", self.__depDataSetId) if os.access(dirPath, os.W_OK): shutil.rmtree(dirPath) return True else: return False else: return False def createArchiveDir(self, purgeFlag=True): """Create new the archive directory if this is needed.""" if self.__verbose: self.__lfh.write("+DataExchange.export() creating archive directory for data set %s\n" % self.__depDataSetId) try: archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", self.__depDataSetId) if not os.access(dirPath, os.W_OK): if self.__verbose: self.__lfh.write("+DataExchange.createArchiveDir() creating archive directory path %s\n" % dirPath) os.makedirs(dirPath) return True else: if purgeFlag: if self.__verbose: self.__lfh.write("+DataExchange.export() existing archive directory path purged: %s\n" % dirPath) shutil.rmtree(dirPath) os.makedirs(dirPath) return True else: if self.__verbose: self.__lfh.write("+DataExchange.export() archive directory exists: %s\n" % dirPath) return False except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def fetch(self, contentType, formatType, version="latest", partitionNumber=1): """Copy the input content object into the current session directory (session naming semantics follow source file object) Return the full path of the copied file or None """ inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.fetch() source type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath)) try: if os.access(inpFilePath, os.R_OK): (_dirPath, fileName) = os.path.split(inpFilePath) # trim of the trailing version - # lastIdx=tfileName.rfind(".V") # if lastIdx > 0: # fileName=tfileName[:lastIdx] # else: # fileName=tfileName outFilePath = os.path.join(self.__sessionPath, fileName) if self.__verbose: self.__lfh.write("+DataExchange.fetch() destination file path %s\n" % outFilePath) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__verbose: self.__lfh.write("+DataExchange.fetch() missing input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return None def export(self, inpFilePath, contentType, formatType, version="latest", partitionNumber=1): """Copy input file to workflow instance or archival storage. Return True on success or False otherwise. """ outFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.export() destination type %s format %s version %s path %s\n" % (contentType, formatType, version, outFilePath)) try: if os.access(inpFilePath, os.R_OK) and (os.path.getsize(inpFilePath) > 0): if self.__verbose: self.__lfh.write("+DataExchange.export() destination file path %s\n" % outFilePath) if inpFilePath.endswith(".gz"): self.__copyGzip(inpFilePath, outFilePath) else: shutil.copyfile(inpFilePath, outFilePath) return True else: if self.__verbose: self.__lfh.write("+DataExchange.export() missing or zero length input file at path %s\n" % inpFilePath) return False except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def __copyGzip(self, inpFilePath, outFilePath): """""" try: cmd = " gzip -cd %s > %s " % (inpFilePath, outFilePath) os.system(cmd) return True except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return False def copyDirToSession(self, dirName): """Replicate the input diretory in the session directory -""" try: if self.__fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(self.__depDataSetId) elif self.__fileSource in ["deposit"]: pth = self.__pI.getDepositPath(self.__depDataSetId) elif self.__fileSource in ["wf-instance"]: pth = self.__pI.getInstancePath(self.__depDataSetId, self.__wfInstanceId) else: return False srcPath = os.path.join(pth, dirName) if not os.access(srcPath, os.R_OK): return False dstPath = os.path.join(self.__sessionPath, dirName) if not os.path.isdir(dstPath): os.makedirs(dstPath, 0o755) # fPattern = os.path.join(srcPath, "*") fpL = filter(os.path.isfile, glob.glob(fPattern)) for fp in fpL: _dN, fN = os.path.split(fp) oP = os.path.join(dstPath, fN) shutil.copyfile(fp, oP) if self.__verbose: self.__lfh.write("+DataExchange.copyDirToSession() successful session copy of dirName %s\n" % (dirName)) return True except: # noqa: E722 pylint: disable=bare-except if self.__verbose: self.__lfh.write("+DataExchange.copyDirToSession() fails for dirName %s\n" % (dirName)) traceback.print_exc(file=self.__lfh) return False return True def copyToSession(self, contentType, formatType, version="latest", partitionNumber=1): """Copy the input content object into the session directory using archive naming conventions less version details. Return the full path of the session file or None """ inpFilePath = self.__getFilePath(fileSource=self.__fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__debug: self.__lfh.write("+DataExchange.copyToSession() source file type %s format %s version %s path %s\n" % (contentType, formatType, version, inpFilePath)) try: outFilePath = None if os.access(inpFilePath, os.R_OK): fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber) outFilePath = os.path.join(self.__sessionPath, fn) if self.__verbose: self.__lfh.write("+DataExchange.copyToSession() content type %s format %s copied to session path %s\n" % (contentType, formatType, outFilePath)) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__debug: self.__lfh.write("+DataExchange.copyToSession() missing input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: if self.__verbose: self.__lfh.write("+DataExchange.copyToSession() Failing for content type %s format %s with session path %s\n" % (contentType, formatType, outFilePath)) traceback.print_exc(file=self.__lfh) return None def updateArchiveFromSession(self, contentType, formatType, version="next", partitionNumber=1): """Copy the input content object from the session directory stored using archive naming conventions less version details to archive storage. Return the full path of the archive file or None """ fn = self.__getArchiveFileName(contentType, formatType, version="none", partitionNumber=partitionNumber) inpFilePath = os.path.join(self.__sessionPath, fn) if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveDromSession() source file type %s format %s path %s\n" % (contentType, formatType, inpFilePath)) try: if os.access(inpFilePath, os.R_OK): outFilePath = self.__getFilePath(fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber) if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveFromSession() archive destination file path %s\n" % outFilePath) shutil.copyfile(inpFilePath, outFilePath) return outFilePath else: if self.__verbose: self.__lfh.write("+DataExchange.updateArchiveFrom() missing session input file at path %s\n" % inpFilePath) return None except: # noqa: E722 pylint: disable=bare-except if self.__verbose: traceback.print_exc(file=self.__lfh) return None ## def getVersionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) return self.__getFileList([fPattern], sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getPartitionFileList(self, fileSource="archive", contentType="model", formatType="pdbx", mileStone=None): """ For the input content object return a list of file partitions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPattern = self.__pI.getFilePathPartitionTemplate( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, mileStone=mileStone ) tL = self.__getFileList([fPattern], sortFlag=True) if self.__debug: self.__lfh.write("+DataExchange.getPartionFileList() pattern %r\n" % fPattern) self.__lfh.write("+DataExchange.getPartionFileList() file list %r\n" % tL) # return tL except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getContentTypeFileList(self, fileSource="archive", contentTypeList=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ if contentTypeList is None: contentTypeList = ["model"] try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fPatternList = [] for contentType in contentTypeList: fPattern = self.__pI.getFilePathContentTypeTemplate(dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, fileSource=fileSource) fPatternList.append(fPattern) if self.__debug: self.__lfh.write("+DataExchange.getContentTypeFileList() patterns %r\n" % fPatternList) return self.__getFileList(fPatternList, sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataExchange.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return [] def getMiscFileList(self, fPatternList=None, sortFlag=True): if fPatternList is None: fPatternList = ["*"] return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag) def getLogFileList(self, entryId, fileSource="archive"): if fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] elif fileSource in ["deposit"]: pth = self.__pI.getDepositPath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] else: return [] return self.__getFileList(fPatternList=patList, sortFlag=True) def __getFileList(self, fPatternList=None, sortFlag=True): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] rTup = [] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_date_tuple_list = [] for x in files: d = os.path.getmtime(x) s = float(os.path.getsize(x)) / 1000.0 file_date_tuple = (x, d, s) file_date_tuple_list.append(file_date_tuple) # Sort the tuple list by the modification time (recent changes first) if sortFlag: file_date_tuple_list.sort(key=lambda x: x[1], reverse=True) for fP, mT, sZ in file_date_tuple_list: tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S") rTup.append((fP, tS, sZ)) return rTup except Exception as e: if self.__verbose: self.__lfh.write("+DataExchange.__getFileList() failing for patternlist %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return rTup ## def __getArchiveFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (_fp, _d, f) = self.__targetFilePath( fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone ) return f # def __getInstanceFileName(self, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (_fp, _d, f) = self.__targetFilePath( # fileSource="wf-instance", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone # ) # return f def __getFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (fp, _d, _f) = self.__targetFilePath( fileSource=fileSource, contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone ) return fp def __targetFilePath(self, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): """Return the file path, directory path, and filename for the input content object if this object is valid. If the file path cannot be verified return None for all values """ try: if fileSource == "session" and self.__inputSessionPath is not None: self.__pI.setSessionPath(self.__inputSessionPath) fP = self.__pI.getFilePath( dataSetId=self.__depDataSetId, wfInstanceId=self.__wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, versionId=version, partNumber=partitionNumber, mileStone=mileStone, ) dN, fN = os.path.split(fP) return fP, dN, fN except Exception as e: if self.__debug: self.__lfh.write( "+DataExchange.__targetFilePath() failing for data set %s instance %s file source %s error %r\n" % (self.__depDataSetId, self.__wfInstanceId, self.__fileSource, str(e)) ) traceback.print_exc(file=self.__lfh) return (None, None, None)
class DataFileReference(DataReferenceBase): """""" def __init__(self, siteId=None, verbose=False, log=sys.stderr): super(DataFileReference, self).__init__() # self.__siteId = siteId self.__verbose = verbose self.__debug = False self.__lfh = log # self.__cI = ConfigInfo(siteId=self.__siteId, verbose=self.__verbose, log=self.__lfh) # self.__contentType = None """A supported content type: - model - structure-factors - nmr-restraints - em-volume - others to be enumerated """ self.__fileFormat = None """A supported file format: - pdbx/mmcif - pdb - pdbml - nmr-star - others to be enumerated """ self.__storageType = None """Storage type: - archive or wf-archive - wf-instance - wf-shared - deposit - tempdep - session or wf-session - inline - others to be enumerated """ self.__versionId = None """Version identifier: - latest - orginal - next - previous - version number (1,2,...,) """ # self.__filePartNumber = 1 """Placeholder for future integer index for file partitioning. """ # self.__depositionDataSetId = None """Deposition data set identifier (e.g. D_123456) """ self.__workflowInstanceId = None """Workflow instance identifier (e.g. W_123456) """ # self.__workflowNameSpace = None """Workflow name space identifier (alpha-numeric character string) """ # self.__contentInfoD = self.__cI.get("CONTENT_TYPE_DICTIONARY") """Dictionary of supported file formats for each recognized content type. An acronym for each content type is included. """ # self.__formatExtensionD = self.__cI.get( "FILE_FORMAT_EXTENSION_DICTIONARY") """Dictionary of recognized file formats and file name extensions""" # self.__storageTypeList = [ "archive", "autogroup", "wf-archive", "wf-instance", "wf-shared", "session", "wf-session", "deposit", "inline", "tempdep", "uploads" ] """List of supported storage types/locations""" # self.__depositionDataSetIdPrefix = "D_" """A depostion data set identifier begins with this prefix and is followed by a string of digits (e.g. D_123456789)""" # self.__groupDataSetIdPrefix = "G_" """A group data set identifier begins with this prefix and is followed by a string of digits (e.g. G_1234567)""" # self.__workflowInstanceIdPrefix = "W_" """A workflow instance identifier begins with this prefix and is followed by a string of digits (e.g. W_123456789)""" # self.__versionNameList = [ "latest", "original", "previous", "next", "none" ] self.__partitionNameList = [ "latest", "original", "previous", "next", "none" ] # # self.__externalFilePath = None """Placeholder for referencing a file name that is *external* to the archive or workflow system. Setting this path implies a content type of *external* and other attributes of the reference will be treated as unknown/unassignable. """ self.__sessionPath = "." """Optional path used as a file system directory for any files with 'session' storage type. The default value for the session storage is the current directory. """ self.__sessionDataSetId = None """Optional session data set identifier (e.g. 1abc) """ # def getSitePrefix(self): """Returns: Current setting of the site prefix. """ return self.__cI.get("SITE_PREFIX") def setSessionPath(self, dirPath=None): """Set the full directory path for 'session' type storage. The 'session' feature provides a means to support workflow file naming conventions for applications with transient storage requirements. Returns True for any non-null argument. No check is performed for the existence of this path on input. """ if dirPath is not None: self.__sessionPath = dirPath return True else: return False def setExternalFilePath(self, filePath, fileFormat="any"): """Set the full file path for this reference outside of the archive/workflow system. Other identifying attributes of this file reference are ignored/by-passed when this path is set. This feature is provided to permit external data with alternative file name conventions to be used within data file references. File format may be optionall specified and must correspond to a supported format or the defaule 'any'. Returns: True if the assignment was successful or False otherwise. """ if (filePath is None) or (len(filePath) < 1): return False if fileFormat in self.__formatExtensionD.keys(): self.__fileFormat = fileFormat else: return False # # reset attributes - # self.__contentType = None self.__storageType = None self.__versionId = None self.__filePartNumber = 1 self.__depositionDataSetId = None self.__workflowInstanceId = None self.__workflowNameSpace = None # self.__externalFilePath = None # try: self.__externalFilePath = os.path.abspath(filePath) (pth, fn) = os.path.split(self.__externalFilePath) if pth is None or fn is None: return False return True except Exception as _e: # noqa: F841 pass return False def setContentTypeAndFormat(self, contentType, fileFormat): """Set the content type and file format for the file reference. Examples of supported content types include: - model - structure-factors - nmr-restraints - nmr-chemical-shifts - component-image - component-definition - validation-report - em-volume Supported formats for each content type are defined in file format dictionary (`self.__contentInfoD`). Returns: True for a recognized content type or False otherwise. """ tS = str(contentType).lower() try: self.__contentInfoD.keys() except Exception as e: logger.exception("Failing with %r", str(e)) if tS in self.__contentInfoD.keys(): self.__contentType = tS fS = str(fileFormat).lower() if (fS in self.__contentInfoD[tS][0]) or ( "any" in self.__contentInfoD[tS][0]): self.__contentType = tS self.__fileFormat = fS if self.__debug: logger.debug( "++setContentTypeAndFormat -- returning True with self.__contentType: %s", self.__contentType) logger.debug( "++setContentTypeAndFormat -- returning True with self.__fileFormat: %s", self.__fileFormat) self.setReferenceType("file") return True else: if self.__debug: logger.debug( "++setContentTypeAndFormat -- returning False with tS: %s", tS) logger.debug( "++setContentTypeAndFormat -- returning False with fS: %s", fS) return False else: if self.__debug: logger.debug( "++setContentTypeAndFormat -- unrecognized cotentent type %r", tS) return False def getStorageTypeList(self): return self.__storageTypeList def setStorageType(self, storageType): """Set the storage type for this file reference. Supported storage types include: - archive or wf-archive - wf-instance - wf-shared - deposit - tempdep - session or wf-session Returns: True for a recognized storage type or False otherwise. """ tS = str(storageType).lower() if tS in self.__storageTypeList: self.__storageType = tS if tS not in ["inline", "constant"]: self.setReferenceType("file") return True else: return False def setVersionId(self, versionId): """Set the version identifier for this file reference. Supported version identifiers include: - latest, ... - orginal - an integer version number (1,2,...,) Returns: True for a valid version identifier or False otherwise. """ tS = str(versionId).lower() if versionId in self.__versionNameList: self.__versionId = tS return True elif self.__isInteger(tS): self.__versionId = tS return True else: return False def __isInteger(self, str_in): """ Is the given string an integer? """ ok = True try: num = int(str_in) # noqa: F841 pylint: disable=unused-variable except ValueError: ok = False return ok def setDepositionDataSetId(self, dId): """Set the deposition data set identifier. A depostion data set identifier begins with the prefix *D_* and is followed by a string of digits (e.g. D_123456789). Returns: True if the input identifier is a properly formed identifier or False otherwise. """ tS = str(dId).upper() if (not tS.startswith(self.__depositionDataSetIdPrefix)) and ( not self.__groupDataSetIdPrefix): return False tSL = tS.split("_") if (len(tSL) > 1) and self.__isInteger(tSL[1]): self.__depositionDataSetId = tS return True else: return False def setWorkflowInstanceId(self, wId): """Set the workflow instance identifier. A workflow instance identifier begins with the prefix *W_* and is followed by a string of digits (e.g. W_123456789) Returns: True if the input identifier is a properly formed identifier or False otherwise. """ tS = str(wId).upper() if not tS.startswith(self.__workflowInstanceIdPrefix): return False tSL = tS.split("_") if (len(tSL) > 1) and self.__isInteger(tSL[1]): self.__workflowInstanceId = tS return True else: return False def setSessionDataSetId(self, sId): """Set the session data set identifier. Data set identifier applied for session storage. No conventions are assumed for this identifier. Returns: True if the input identifier is non-blank or False otherwise. """ if sId is not None and len(sId) > 0: self.__sessionDataSetId = str(sId).upper() return True else: return False def setWorkflowNameSpace(self, wNameSpace): """Set the workflow name space identifier. This identifier must be an alpha numeric string containing only characters [a-zA-Z0-9]. Returns: True if the input identifier is a properly formed identifier or False otherwise. """ if (wNameSpace is None) or (len(str(wNameSpace)) < 1): return False for cv in str(wNameSpace): if (cv not in string.ascii_letters) and (cv not in string.digits): return False self.__workflowNameSpace = wNameSpace return True def setPartitionNumber(self, iPartitionNumber=1): """Set the integer file partition number. This is used to identify the physical pieces of a single logical data file. Supported values for partition include: - latest, ... - orginal - an integer version number (1,2,...,) Returns: True if the input partition is properly formed or False otherwise. """ ok = False try: tS = str(iPartitionNumber).lower() if iPartitionNumber in self.__partitionNameList: self.__filePartNumber = tS ok = True elif self.__isInteger(tS): self.__filePartNumber = int(tS) ok = True else: ok = False except Exception: ok = False if self.__debug: logger.debug( "+DataFileReference.setPartitionNumber() setting is %r", self.__filePartNumber) return ok def getPartitionNumber(self): """Returns: The current partition number or *1* if this is not set. """ return self.__filePartNumber def getContentType(self): """Returns: The current content type or *None* if this is not set. """ return self.__contentType def getFileFormat(self): """Returns: The current file format or *None* if this is not set. """ return self.__fileFormat def getStorageType(self): """Returns: The current storage type or *None* if this is not set. """ return self.__storageType def getVersionId(self): """Returns: The current version identifier or *None* if this is not set. """ return self.__versionId def getDepositionDataSetId(self): """Returns: The current deposition data set identifier or *None* if this is not set. """ return self.__depositionDataSetId def getWorkflowInstanceId(self): """Returns: The current workflow instance identifier or *None* if this is not set. """ return self.__workflowInstanceId def getWorkflowNameSpace(self): """Returns: The current workflow name space identifier or *None* if this is not set. """ return self.__workflowNameSpace # # ------------------------------------------------------------------------------------------------------------------------------------ # # --- The following public methods derive information from the settings in the previous methods -- # def isReferenceValid(self): """Test if the reference information is complete and the data reference is valid. Valid references are: - A path external to the archive/worflow system - A fully defined internal reference consisting or identifiers, content type, storage type, format, and version. Note that this is NOT an existence test. References may be defined and validated before the file objects which they reference are created. Returns: True for either a valid external or internal reference or False otherwise. """ if self.__externalFilePath is not None: return True else: return self.__isInternalReferenceValid() def getDirPathReference(self): """Get the path to the directory containing the data file reference. Returns: The file system path to the directory containing the file reference or *None* if this cannot be determined. """ if self.__externalFilePath is not None: return self.__externalFilePath # if (self.__isInternalReferenceValid()): # return self.__getInternalPath() else: return self.__getInternalPath() def getFilePathReference(self): """Get the versioned file path for an internal data file reference or the path to an external data file reference. Returns: The file system path to the file reference or *None* if this cannot be determined. """ if self.__externalFilePath is not None: return self.__externalFilePath if not self.__isInternalReferenceValid(): return None return self.__getInternalFilePath() def getFilePathExists(self, fP): try: if os.access(fP, os.R_OK): return True else: return False except Exception as _e: # noqa: F841 if self.__verbose: traceback.print_exc(self.__lfh) return False def getFileVersionNumber(self): """Get the version number corresponding to the current data file reference. Returns: The version number 1-N of the current data reference or 0 otherwise. External references are treated as having no version and 0 is returned for these cases. """ if self.__externalFilePath is not None: return 0 if not self.__isInternalReferenceValid(): return 0 return self.__getInternalVersionNumber() # # ------------------------------------------------------------------------------------------------------------------------------------ # # --- The following private worker methods support the public path and validation methods. # def __isInternalReferenceValid(self): """Test if the current reference information is complete for an internal reference. A reference is considered internal which points within the archive, workflow instance, deposit or session file systems. Otherwise the reference is considered external and not subject to internal naming or path conventions. Note that this is NOT an existence test. References may be defined and validated before the file objects which they reference are created. Returns: True if the internal reference is complete or False otherwise. """ referenceType = self.getReferenceType() if referenceType == "file": if (self.__contentType is None) or (self.__fileFormat is None) or ( self.__storageType is None) or (self.__versionId is None): # logger.debug("self.__contentType is: %s", self.__contentType) # logger.debug("self.__fileFormat is: %s", self.__fileFormat) # logger.debug("self.__storageType is: %s", self.__storageType) # logger.debug("self.__versionId is: %s", self.__versionId) return False if (self.__storageType in [ "archive", "autogroup", "wf-archive", "wf-instance", "wf-shared", "deposit", "tempdep" ]) and (self.__depositionDataSetId is None): logger.debug("self.__depositionDataSetId is: %s", self.__depositionDataSetId) return False if (self.__storageType in [ "session", "wf-session" ]) and (self.__sessionDataSetId is None): return False if (self.__storageType == "wf-instance") and (self.__workflowInstanceId is None): return False if (self.__storageType == "wf-shared") and (self.__workflowNameSpace is None): return False return True elif referenceType == "directory": if self.__storageType is None: return False if (self.__storageType in [ "archive", "autogroup", "wf-archive", "wf-instance", "wf-shared", "deposit", "tempdep" ]) and (self.__depositionDataSetId is None): return False if (self.__storageType == "wf-instance") and (self.__workflowInstanceId is None): return False if (self.__storageType == "wf-shared") and (self.__workflowNameSpace is None): return False if (self.__storageType in ["wf-session", "session" ]) and (self.__sessionPath is None): return False return True else: return False # def __getExternalPath(self): # """Get the path of the current external file reference. # Returns: # The external file path. *None* is returned on failure. # """ # try: # (pth, _fn) = os.path.split(self.__externalFilePath) # return pth # except Exception as _e: # noqa: F841 # return None # def __getExternalFileNameBase(self): # """Get the base file name for the current external file reference. # Returns: # The external base file name. *None* is returned on failure. # """ # try: # (_pth, fn) = os.path.split(self.__externalFilePath) # return fn # except Exception as _e: # noqa: F841 # return None def __getInternalPath(self): """Compute the path to the current file reference within the archive/workflow file system. The file path convention is: - archive files = <SITE_ARCHIVE_STORAGE_PATH>/archive/<deposition data set id>/ - deposit files = <SITE_ARCHIVE_STORAGE_PATH>/archive/<deposition data set id>/ - temp deposit files = <SITE_ARCHIVE_STORAGE_PATH>/tempdep/<deposition data set id>/ - workflow shared = <SITE_ARCHIVE_STORAGE_PATH>/workflow/<deposition data set id>/shared/<self.__workflowNameSpace> - workflow instance = <SITE_ARCHIVE_STORAGE_PATH>/workflow/<deposition data set id>/instance/<self.__workflowInstanceId> - session files = session path/ Top-level site-specific path details are obtained from the SiteInfo() class. Returns: The path of the directory containing this data file reference. *None* is returned on failure. """ try: if self.__storageType == "archive" or self.__storageType == "wf-archive": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "archive", self.__depositionDataSetId) elif self.__storageType == "autogroup": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "autogroup", self.__depositionDataSetId) elif self.__storageType == "deposit": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "deposit", self.__depositionDataSetId) elif self.__storageType == "tempdep": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "tempdep", self.__depositionDataSetId) elif self.__storageType == "wf-shared": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "workflow", self.__depositionDataSetId, "shared", self.__workflowNameSpace) elif self.__storageType == "wf-instance": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "workflow", self.__depositionDataSetId, "instance", self.__workflowInstanceId) elif self.__storageType in ["session", "wf-session"]: tpth = self.__sessionPath elif self.__storageType == "uploads": tpth = os.path.join(self.__cI.get("SITE_ARCHIVE_STORAGE_PATH"), "deposit", "temp_files", "deposition_uploads", self.__depositionDataSetId) else: tpth = None pth = os.path.abspath(tpth) except Exception as e: logger.exception("Failing with %r", str(e)) pth = None return pth def __getInternalFileNameBase(self): """Compute the base file name based on the current values of storage type, identifer, content type, file format. The file name convention is: - archive/shared files = <deposition data set id>_<content acronym>_<part number>.<format_extenstion> - instance files = <deposition data set id>_<content acronym>_<part number>.<format_extension> - session files = <session data set id>_<content acronym>_<part number>.<format_extenstion> Returns: The base file name. This base file name lacks version details. """ try: if self.getReferenceType() != "file": return None if self.__storageType in [ "archive", "autogroup", "wf-archive", "wf-shared", "deposit", "tempdep" ]: fn = (self.__depositionDataSetId + "_" + self.__contentInfoD[self.__contentType][1] + "_P" + str(self.__filePartNumber) + "." + self.__formatExtensionD[self.__fileFormat]) elif self.__storageType in ["session", "wf-session"]: fn = (self.__sessionDataSetId + "_" + self.__contentInfoD[self.__contentType][1] + "_P" + str(self.__filePartNumber) + "." + self.__formatExtensionD[self.__fileFormat]) elif self.__storageType in ["wf-instance"]: fn = (self.__depositionDataSetId + "_" + self.__contentInfoD[self.__contentType][1] + "_P" + str(self.__filePartNumber) + "." + self.__formatExtensionD[self.__fileFormat]) else: fn = None except Exception as e: logger.exception("Failing with %r", str(e)) fn = None return fn def __getInternalFilePath(self): """Compute the versioned file path for a file within the archive/worflow file system. If either the *latest*, *next*, or *previous* version of the referenced file is selected then a file system check is performed to determine the appropriate version number. Returns: File path including version or None on failure. """ try: if self.getReferenceType() != "file": return None dirPath = self.__getInternalPath() fN = self.__getInternalFileNameVersioned() pth = os.path.join(dirPath, fN) return pth except Exception as _e: # noqa: F841 return None def getVersionIdSearchTarget(self): """Create a search target for the files containing any version identifier consistent with the current file settings. Returns a the search target appropriate for glob() or None """ try: if self.getReferenceType() != "file": return None baseName = self.__getInternalFileNameBase() vst = baseName + ".V*" return vst except Exception as _e: # noqa: F841 return None def __getInternalFileNameVersioned(self): """Compute the versioned file name for a file within the archive/worflow file system. If either the *latest*, *next*, or *previous* version of the referenced file is selected then a file system check is performed to determine the appropriate version number. Returns: File name including version or None on failure. """ try: if self.getReferenceType() != "file": return None dirPath = self.__getInternalPath() # # First resolve any symbolic partition information - # self.__filePartNumber = self.__getInternalPartitionNumber() # baseName = self.__getInternalFileNameBase() if self.__versionId == "latest": iV = self.__latestVersion(dirPath, baseName) if iV == 0: # No version exists so start at 1 fn = baseName + ".V1" else: # fn = baseName + ".V" + str(int(iV)) elif self.__versionId == "next": iV = self.__latestVersion(dirPath, baseName) if iV == 0: # No version exists so start at 1 fn = baseName + ".V1" else: # fn = baseName + ".V" + str(int(iV + 1)) elif self.__versionId == "previous": iV = self.__latestVersion(dirPath, baseName) if iV <= 1: # No previous version. fn = None else: # fn = baseName + ".V" + str(int(iV - 1)) elif self.__versionId == "original": fn = baseName + ".V1" elif self.__versionId == "none": fn = baseName else: fn = baseName + ".V" + str(int(self.__versionId)) return fn except Exception as e: logger.exception("failure in getInternalFileNameVersioned %r", str(e)) return None def __getInternalVersionNumber(self): """Determine the version number corresponding to the current version Id setting. If either the *latest*, *next*, or *previous* version of the referenced file is selected then a file system check is performed to determine the appropriate version number. Returns: Return a version number from 1-N or 0 failure. """ try: if self.getReferenceType() != "file": return 0 dirPath = self.__getInternalPath() self.__filePartNumber = self.__getInternalPartitionNumber() baseName = self.__getInternalFileNameBase() if self.__versionId == "latest": iV = self.__latestVersion(dirPath, baseName) elif self.__versionId == "next": iV = self.__latestVersion(dirPath, baseName) iV += 1 elif self.__versionId == "previous": iV = self.__latestVersion(dirPath, baseName) iV -= 1 if iV < 0: iV = 0 elif self.__versionId == "original": iV = 1 else: iV = int(self.__versionId) return iV except Exception as e: if self.__debug: logger.exception("Failing with %r", str(e)) return 0 def __latestVersion(self, dirPath, baseName): """Get the latest version of file *baseName* in path *dirPath*. The convention for version numbering is <baseName>.V# Returns: The latest integer version number or 0 if no versions exist. """ try: fN = None if self.getReferenceType() != "file": return 0 vList = [] fileList = os.listdir(dirPath) for fN in fileList: # logger.debug("__latestVersion - baseName %s fN %s", baseName,fN) if fN.startswith(baseName): fSp = fN.split(".V") if (len(fSp) < 2) or (not fSp[1].isdigit()): continue vList.append(int(fSp[1])) # logger.debug("__latestVersion - vList %r\n" % (fSp)) if len(vList) > 0: vList.sort() return vList[-1] else: return 0 except Exception as e: if self.__debug: logger.exception( "Failing -dirPath %s baseName %s fN %s with %s", dirPath, baseName, fN, str(e)) return 0 ## ## def __latestPartitionNumber(self, dirPath, searchTarget): """Get the latest partition number of file in path *dirPath* consistent with current file settings. Returns: The latest integer partition number or 0 if no files exist. """ try: fN = None if self.getReferenceType() != "file": return 0 pList = [] searchPath = os.path.join(dirPath, searchTarget) if self.__debug: logger.debug( "+DataFileReference.__lastestPartitionNumber() search target %s", searchPath) pathList = glob.glob(searchPath) for pth in pathList: if self.__debug: logger.debug( "+DataFileReference.__lastestPartitionNumber() search path %s", pth) (_td, fN) = os.path.split(pth) fL1 = fN.split(".") fL2 = fL1[0].split("_") pList.append(int(fL2[3][1:])) if self.__debug: logger.debug( "+DataFileReference.__lastestPartitionNumber() part number list %r", pList) if len(pList) > 0: pList.sort() return pList[-1] else: return 0 except Exception as e: if self.__debug: logger.exception("Failing with %r", str(e)) return 0 def getPartitionNumberSearchTarget(self): """Create a search target for the files containing any partition number consistent with the current file settings. The seach target is independent of version identifier. The file name convention is: - archive/shared files = <deposition data set id>_<content acronym>_P*.<format_extenstion> - instance files = <deposition data set id>_<content acronym>_P*.<format_extension> - session files = <session data set id>_<content acronym>_P*.<format_extenstion> Returns: A search string appopriate for glob(). """ try: if self.getReferenceType() != "file": return None if self.__storageType in [ "archive", "autogroup", "wf-archive", "wf-shared", "deposit", "tempdep" ]: fn = self.__depositionDataSetId + "_" + self.__contentInfoD[ self. __contentType][1] + "_P*" + "." + self.__formatExtensionD[ self.__fileFormat] + "*" elif self.__storageType in ["session", "wf-session"]: fn = self.__sessionDataSetId + "_" + self.__contentInfoD[ self. __contentType][1] + "_P*" + "." + self.__formatExtensionD[ self.__fileFormat] + "*" elif self.__storageType in ["wf-instance"]: fn = self.__depositionDataSetId + "_" + self.__contentInfoD[ self. __contentType][1] + "_P*" + "." + self.__formatExtensionD[ self.__fileFormat] + "*" else: fn = None except Exception as e: logger.exception("Failing with %r", str(e)) fn = None return fn def getContentTypeSearchTarget(self): """Create a search target for the files containing any variation consistent with the content type in current file settings. The seach target is independent of partition, format and version identifier. The file name convention is: - archive/shared files = <deposition data set id>_<content acronym>_P* - instance files = <deposition data set id>_<content acronym>_P* - session files = <session data set id>_<content acronym>_P* Returns: A search string appopriate for glob(). """ try: # if (self.getReferenceType() != 'file'): # return None if self.__storageType in [ "archive", "autogroup", "wf-archive", "wf-shared", "deposit", "tempdep" ]: fn = self.__depositionDataSetId + "_" + self.__contentInfoD[ self.__contentType][1] + "_P*" elif self.__storageType in ["session", "wf-session"]: fn = self.__sessionDataSetId + "_" + self.__contentInfoD[ self.__contentType][1] + "_P*" elif self.__storageType in ["wf-instance"]: fn = self.__depositionDataSetId + "_" + self.__contentInfoD[ self.__contentType][1] + "_P*" else: fn = None except Exception as e: logger.exception( "Failing storage %r data set id %r content type %r with %r", self.__storageType, self.__depositionDataSetId, self.__contentType, str(e)) fn = None return fn def __getInternalPartitionNumber(self): """Determine the partition number corresponding to the current partition number setting. If either the *latest*, *next*, or *previous* version of the referenced file is selected then a file system check is performed to determine the appropriate partition number. Returns: Return a partition number from 1-N or 0 failure. """ try: if self.getReferenceType() != "file": return 0 dirPath = self.__getInternalPath() searchTarget = self.getPartitionNumberSearchTarget() if self.__filePartNumber == "latest": iP = self.__latestPartitionNumber(dirPath, searchTarget) elif self.__filePartNumber == "next": iP = self.__latestPartitionNumber(dirPath, searchTarget) iP += 1 elif self.__filePartNumber == "previous": iP = self.__latestPartitionNumber(dirPath, searchTarget) iP -= 1 if iP < 0: iP = 0 elif self.__filePartNumber == "original": iP = 1 else: iP = int(self.__filePartNumber) return iP except Exception as e: logger.exception("Failing with %r", str(e)) return 0
class DataMaintenance(object): """Collection of data maintenance utilities supporting purge and recovery of data files post release. This class duplicates some methods from class DataExchange for anticipated future use. """ def __init__(self, siteId=None, testMode=False, verbose=False, log=sys.stderr): self.__verbose = verbose self.__lfh = log self.__siteId = siteId # In test mode no deletions are performed - self.__testMode = testMode self.__debug = False self.__sessionPath = None # self.__setup(siteId=siteId) def __setup(self, siteId=None): self.__siteId = siteId self.__cI = ConfigInfo(self.__siteId) self.__sessionPath = None self.__pI = PathInfo(siteId=self.__siteId, sessionPath=self.__sessionPath, verbose=self.__verbose, log=self.__lfh) def setSessionPath(self, inputSessionPath=None): """Override the path to files with fileSource="session" """ self.__sessionPath = inputSessionPath def purgeLogs(self, dataSetId): archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", dataSetId, "log") if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() - purging logs in directory %s\n" % (dirPath)) if os.access(dirPath, os.W_OK): fpattern = os.path.join(dirPath, "*log") if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write( "+DataMaintenance.purgeLogs() candidate path length is %d\n" % len(pthList)) # for pth in pthList: try: if self.__testMode: self.__lfh.write( "+DataMaintenance.purgeLogs() TEST MODE skip remove %s\n" % pth) else: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return pthList def reversePurge(self, dataSetId, contentType, formatType="pdbx", partitionNumber=1): fn = self.__getArchiveFileName(dataSetId, contentType=contentType, formatType=formatType, version="none", partitionNumber=partitionNumber) archivePath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(archivePath, "archive", dataSetId) if self.__verbose: self.__lfh.write( "+DataMaintenance.__setup() - purging in directory %s\n" % (dirPath)) if len(dirPath) < 2: return [] fpattern = os.path.join(dirPath, fn + ".V*") if self.__verbose: self.__lfh.write( "+DataMaintenance.__setup() - purging pattern is %s\n" % (fpattern)) pthList = glob.glob(fpattern) if self.__verbose: self.__lfh.write( "+DataMaintenance.__reversePurge() candidate length is %d\n" % len(pthList)) # fList = [] for pth in pthList: if not pth.endswith(".V1"): fList.append(pth) for pth in fList: try: if self.__testMode: self.__lfh.write( "+DataMaintenance.reversePurge() TEST MODE skip remove %s\n" % pth) else: os.remove(pth) except: # noqa: E722 pylint: disable=bare-except pass # return fList def removeWorkflowDir(self, dataSetId): if (dataSetId is not None) and dataSetId.startswith("D_") and ( len(dataSetId) > 10): workflowPath = self.__cI.get("SITE_ARCHIVE_STORAGE_PATH") dirPath = os.path.join(workflowPath, "workflow", dataSetId) if os.access(dirPath, os.W_OK): if self.__testMode: self.__lfh.write( "+DataMaintenance.removeWorkflowDir() TEST MODE skip remove %s\n" % dirPath) else: shutil.rmtree(dirPath) return True else: return False else: return False def getLogFiles(self, dataSetId, fileSource="archive"): pL = [] if fileSource in ["archive"]: dirPath = self.__pI.getArchivePath(dataSetId) elif fileSource in ["deposit"]: dirPath = self.__pI.getDepositPath(dataSetId) else: return pL fpattern = os.path.join(dirPath, "*.log") pthList = glob.glob(fpattern) return pthList def getPurgeCandidates(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None, purgeType="exp"): """Return the latest version, and candidates for removal and compression. purgeType = 'exp' use strategy for experimental and model fileSource V<last>, V2, V1 'other' use strategy for other file types -- V<last> & V1 """ latestV = None rmL = [] gzL = [] vtL = self.getVersionFileList(dataSetId, wfInstanceId=wfInstanceId, fileSource=fileSource, contentType=contentType, formatType=formatType, partitionNumber=partitionNumber, mileStone=mileStone) n = len(vtL) if n > 0: latestV = vtL[0][0] if purgeType in ["exp"]: if n < 2: return latestV, rmL, gzL elif n == 2: gzL.append(vtL[1][0]) elif n == 3: gzL.append(vtL[1][0]) gzL.append(vtL[2][0]) elif n > 3: gzL.append(vtL[n - 2][0]) gzL.append(vtL[n - 1][0]) for i in range(1, n - 2): rmL.append(vtL[i][0]) else: pass elif purgeType in ["report", "other"]: if n < 2: return latestV, rmL, gzL elif n == 2: gzL.append(vtL[1][0]) elif n > 2: gzL.append(vtL[n - 1][0]) for i in range(1, n - 1): rmL.append(vtL[i][0]) else: pass return latestV, rmL, gzL def getVersionFileListSnapshot(self, basePath, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions in a snapshot directory (recovery mode). Return: List of [(file path, modification date string,size),...] """ pairL = [] # basePath = '/net/wwpdb_da_data_archive/.snapshot/nightly.1/data' try: if fileSource == "archive": pth = self.__pI.getArchivePath(dataSetId) snPth = os.path.join(basePath, "archive", dataSetId) elif fileSource == "deposit": pth = self.__pI.getDepositPath(dataSetId) snPth = os.path.join(basePath, "deposit", dataSetId) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) _dir, fn = os.path.split(fPattern) altPattern = os.path.join(snPth, fn) srcL = self.__getFileListWithVersion([altPattern], sortFlag=True) for src in srcL: _d, f = os.path.split(src[0]) dst = os.path.join(pth, f) if not os.access(dst, os.F_OK): pairL.append((src[0], dst)) return pairL except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %s\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] ## def getVersionFileList(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", partitionNumber="1", mileStone=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fPattern = self.__pI.getFilePathVersionTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, partNumber=partitionNumber, mileStone=mileStone, ) return self.__getFileListWithVersion([fPattern], sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s err %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] def getContentTypeFileList(self, dataSetId, wfInstanceId, fileSource="archive", contentTypeList=None): """ For the input content object return a list of file versions sorted by modification time. Return: List of [(file path, modification date string,size),...] """ if contentTypeList is None: contentTypeList = ["model"] try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fPatternList = [] for contentType in contentTypeList: fPattern = self.__pI.getFilePathContentTypeTemplate( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, fileSource=fileSource) fPatternList.append(fPattern) if self.__debug: self.__lfh.write( "+DataMaintenance.getContentTypeFileList() patterns %r\n" % fPatternList) return self.__getFileListWithVersion(fPatternList, sortFlag=True) except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for data set %s instance %s file source %s error %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return [] def getMiscFileList(self, fPatternList=None, sortFlag=True): if fPatternList is None: fPatternList = ["*"] return self.__getFileList(fPatternList=fPatternList, sortFlag=sortFlag) def getLogFileList(self, entryId, fileSource="archive"): if fileSource in ["archive", "wf-archive"]: pth = self.__pI.getArchivePath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] elif fileSource in ["deposit"]: pth = self.__pI.getDepositPath(entryId) fpat1 = os.path.join(pth, "*log") fpat2 = os.path.join(pth, "log", "*") patList = [fpat1, fpat2] else: return [] return self.__getFileList(fPatternList=patList, sortFlag=True) def __getFileListWithVersion(self, fPatternList=None, sortFlag=False): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_ver_tuple_list = [] for f in files: tL = f.split(".") vId = tL[-1] if vId.startswith("V"): if vId[-1] not in [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" ]: file_ver_tuple_list.append((f, int(vId[1:-1]))) else: file_ver_tuple_list.append((f, int(vId[1:]))) # Sort the tuple list by version id # if sortFlag: file_ver_tuple_list.sort(key=lambda x: x[1], reverse=True) return file_ver_tuple_list except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for pattern %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return [] def __getFileList(self, fPatternList=None, sortFlag=True): """ For the input glob compatible file pattern produce a file list sorted by modification date. If sortFlag is set then file list is sorted by modification date (e.g. recently changes first) Return: List of [(file path, modification date string, KBytes),...] """ if fPatternList is None: fPatternList = ["*"] try: files = [] for fPattern in fPatternList: if fPattern is not None and len(fPattern) > 0: files.extend(filter(os.path.isfile, glob.glob(fPattern))) file_date_tuple_list = [] for x in files: d = os.path.getmtime(x) s = float(os.path.getsize(x)) / 1000.0 file_date_tuple = (x, d, s) file_date_tuple_list.append(file_date_tuple) # Sort the tuple list by the modification time (recent changes first) if sortFlag: file_date_tuple_list.sort(key=lambda x: x[1], reverse=True) rTup = [] for fP, mT, sZ in file_date_tuple_list: tS = datetime.fromtimestamp(mT).strftime("%Y-%b-%d %H:%M:%S") rTup.append((fP, tS, sZ)) return rTup except Exception as e: if self.__verbose: self.__lfh.write( "+DataMaintenance.getVersionFileList() failing for patter %r error %r\n" % (fPatternList, str(e))) traceback.print_exc(file=self.__lfh) return [] ## def __getArchiveFileName(self, dataSetId, wfInstanceId=None, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): (_fp, _d, f) = self.__targetFilePath( dataSetId=dataSetId, wfInstanceId=wfInstanceId, fileSource="archive", contentType=contentType, formatType=formatType, version=version, partitionNumber=partitionNumber, mileStone=mileStone, ) return f # def __getInstanceFileName(self, dataSetId, wfInstanceId=None, contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (_fp, _d, f) = self.__targetFilePath( # dataSetId=dataSetId, # wfInstanceId=wfInstanceId, # fileSource="wf-instance", # contentType=contentType, # formatType=formatType, # version=version, # partitionNumber=partitionNumber, # mileStone=mileStone, # ) # return f # def __getFilePath(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): # (fp, _d, _f) = self.__targetFilePath( # dataSetId=dataSetId, # wfInstanceId=wfInstanceId, # fileSource=fileSource, # contentType=contentType, # formatType=formatType, # version=version, # partitionNumber=partitionNumber, # mileStone=mileStone, # ) # return fp def __targetFilePath(self, dataSetId, wfInstanceId=None, fileSource="archive", contentType="model", formatType="pdbx", version="latest", partitionNumber="1", mileStone=None): """Return the file path, directory path, and filen ame for the input content object if this object is valid. If the file path cannot be verified return None for all values """ try: if fileSource == "session" and self.__sessionPath is not None: self.__pI.setSessionPath(self.__sessionPath) fP = self.__pI.getFilePath( dataSetId=dataSetId, wfInstanceId=wfInstanceId, contentType=contentType, formatType=formatType, fileSource=fileSource, versionId=version, partNumber=partitionNumber, mileStone=mileStone, ) dN, fN = os.path.split(fP) return fP, dN, fN except Exception as e: if self.__debug: self.__lfh.write( "+DataMaintenance.__targetFilePath() failing for data set %s instance %s file source %s error %r\n" % (dataSetId, wfInstanceId, fileSource, str(e))) traceback.print_exc(file=self.__lfh) return (None, None, None)
class RcsbDpUtilityAnnotTests(unittest.TestCase): def setUp(self): self.__lfh = sys.stderr # Pick up site information from the environment or failover to the development site id. self.__siteId = getSiteId(defaultSiteId='WWPDB_DEPLOY_TEST') self.__lfh.write("\nTesting with site environment for: %s\n" % self.__siteId) # self.FILE_ROOT = os.path.dirname(os.path.realpath(__file__)) self.__cI = ConfigInfo(self.__siteId) self.__siteWebAppsSessionsPath = self.__cI.get( 'SITE_WEB_APPS_SESSIONS_PATH') self.__tmpPath = tempfile.mkdtemp(dir=self.__siteWebAppsSessionsPath) self.__testFilePath = os.path.join(self.FILE_ROOT, 'data') self.__testFileAnnotSS = 'rcsb070236.cif' self.__testFileAnnotSSTop = 'topology.txt' # self.__testFileAnnotLink = '3rij.cif' self.__testFileAnnotCisPeptide = '5hoh.cif' self.__testFileAnnotSolvent = 'D_900002_model_P1.cif' self.__testFileAnnotValidate = '3rij.cif' self.__testFileAnnotNA = '1o3q.cif' self.__testFileAnnotSite = '1xbb.cif' self.__testIdAnnotSite = '1xbb' # self.__testFileAnnotSiteAlt = 'D_1000200391_model_P1.cif.V27' self.__testIdAnnotSiteAlt = 'D_1000200391' # self.__testFileAnnotRcsb = 'rcsb033781.cif' self.__testFileAnnotRcsbEps = 'rcsb013067.cifeps' # self.__testFilePdbPisa = self.__cI.get('DP_TEST_FILE_PDB_PISA') self.__testFileCifPisa = self.__cI.get('DP_TEST_FILE_CIF_PISA') # self.__testFileStarCs = "star_16703_test_2.str" self.__testFileCsRelatedCif = "cor_16703_test.cif" # self.__testFileValidateXyz = "1cbs.cif" self.__testFileValidateSf = "1cbs-sf.cif" self.__testValidateIdList = ["1cbs", "3of4", "3oqp"] self.__testArchiveIdList = [("D_900002", "4EC0"), ("D_600000", "4F3R")] # self.__testFileCifSeq = "RCSB095269_model_P1.cif.V1" self.__testFileSeqAssign = "RCSB095269_seq-assign_P1.cif.V1" self.__testFileMtzBad = "mtz-bad.mtz" self.__testFileMtzGood = "mtz-good.mtz" self.__testFileMtzRunaway = "bad-runaway.mtz" self.__testFileXyzRunaway = "bad-runaway.cif" self.__testMapNormal = "normal.map" self.__testMapSpider = "testmap.spi" self.__testMapLarge = "large.map" # self.__testFilePrdSearch = '3RUN.cif' self.__testFilePrdSearch = 'D_1200000237_model_P1.cif.V1' self.__testValidateXrayIdList = ['1cbs', '4hea', '4u4r'] self.__testValidateNmrIdList = ['2MM4', '2MMZ'] self.__testValidateXrayNeutronModel = 'D_1200007116_model-upload_P1.cif.V1' self.__testValidateXrayNeutronSF = 'D_1200007116_sf-upload_P1.cif.V1' #self.__testValidateXrayLargeIdList = ['4u4r'] #self.__testValidateNmrIdList = ['2MM4'] #self.__testValidateNmrLargeIdList = ['2MMZ'] self.__testDccModelId = '4wpo' self.__testSpecialPosition = 'D_1000225739_model_P1.cif.V4' self.__testDepAssembly = "testassem.cif" def tearDown(self): pass # if os.path.exists(self.__tmpPath): # shutil.rmtree(self.__tmpPath) def test_AnnotValidateGeometryCheck(self): """ Test of updating geometrical validation diagnostics - """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) of = os.path.join(self.__tmpPath, "annot-validate-geometry-check.cif") dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) inp_path = os.path.join(self.__testFilePath, self.__testFileAnnotSite) dp.imp(inp_path) ret = dp.op("annot-validate-geometry") dp.expLog( os.path.join(self.__tmpPath, "annot-validate-geometry-check-pdbx.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) def test_AnnotValidateGeometryCheckRemote(self): """ Test of updating geometrical validation diagnostics - """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) of = os.path.join(self.__tmpPath, "annot-validate-geometry-check-remote.cif") dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) inp_path = os.path.join(self.__testFilePath, self.__testFileAnnotSite) dp.imp(inp_path) # dp.setRunRemote() ret = dp.op("annot-validate-geometry") dp.expLog( os.path.join(self.__tmpPath, "annot-validate-geometry-check-pdbx-remote.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) def testAnnotRcsb2PdbxRemote(self): """ RCSB CIF -> PDBx conversion (Using the smaller application in the annotation package) Converting to RCSB to PDB id in _entry.id and related items. """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) of = os.path.join( self.__tmpPath, "annot-rcsb2pdbx-withpdbid-" + self.__testFileAnnotRcsb) dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) inpPath = os.path.join(self.__testFilePath, self.__testFileAnnotRcsb) dp.imp(inpPath) # dp.setRunRemote() ret = dp.op("annot-rcsb2pdbx-withpdbid") dp.expLog(os.path.join(self.__tmpPath, "annot-rcsb2pdbx.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) def testAnnotValidateListXrayTestRemote(self): """ Test create validation report for the test list of example PDB ids (x-ray examples) """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) for pdbId in self.__testValidateXrayIdList: self.__tmpPath = tempfile.mkdtemp( dir=self.__siteWebAppsSessionsPath) self.__lfh.write("\nStarting {} in {}\n".format( pdbId, self.__tmpPath)) ofpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt.pdf") ofxml = os.path.join(self.__tmpPath, pdbId + "-valdata.xml") offullpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt_full.pdf") ofpng = os.path.join(self.__tmpPath, pdbId + "-val-slider.png") ofsvg = os.path.join(self.__tmpPath, pdbId + "-val-slider.svg") # testFileValidateXyz = pdbId + ".cif" testFileValidateSf = pdbId + "-sf.cif" dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) # dp.setDebugMode(True) xyzPath = os.path.abspath( os.path.join(self.__testFilePath, testFileValidateXyz)) sfPath = os.path.abspath( os.path.join(self.__testFilePath, testFileValidateSf)) # dp.addInput(name="request_annotation_context", value="yes") dp.addInput(name="request_validation_mode", value="annotate") dp.addInput(name='run_dir', value=os.path.join( self.__siteWebAppsSessionsPath, "validation_%s" % random.randrange(9999999))) # dp.addInput(name="request_validation_mode", value="server") dp.imp(xyzPath) dp.addInput(name="sf_file_path", value=sfPath) # dp.setRunRemote() ret = dp.op("annot-wwpdb-validate-all") dp.expLog( os.path.join(self.__tmpPath, pdbId + "-annot-validate-test.log")) dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg]) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(ofpdf)) self.assertTrue(os.path.exists(ofxml)) self.assertTrue(os.path.exists(offullpdf)) self.assertTrue(os.path.exists(ofpng)) self.assertTrue(os.path.exists(ofsvg)) def testAnnotValidateXrayNeutronRemote(self): """ Test create validation report for the test list of example PDB ids (x-ray examples) """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) self.__tmpPath = tempfile.mkdtemp(dir=self.__siteWebAppsSessionsPath) self.__lfh.write("\nStarting x-ray neutron in {}\n".format( self.__tmpPath)) ofpdf = os.path.join(self.__tmpPath, "test-valrpt.pdf") ofxml = os.path.join(self.__tmpPath, "test-valdata.xml") offullpdf = os.path.join(self.__tmpPath, "test-valrpt_full.pdf") ofpng = os.path.join(self.__tmpPath, "test-val-slider.png") ofsvg = os.path.join(self.__tmpPath, "test-val-slider.svg") # dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) # dp.setDebugMode(True) xyzPath = os.path.abspath( os.path.join(self.__testFilePath, self.__testValidateXrayNeutronModel)) sfPath = os.path.abspath( os.path.join(self.__testFilePath, self.__testValidateXrayNeutronSF)) # dp.addInput(name="request_annotation_context", value="yes") dp.addInput(name="request_validation_mode", value="annotate") dp.addInput(name='run_dir', value=os.path.join( self.__siteWebAppsSessionsPath, "validation_%s" % random.randrange(9999999))) # dp.addInput(name="request_validation_mode", value="server") dp.imp(xyzPath) dp.addInput(name="sf_file_path", value=sfPath) # dp.setRunRemote() ret = dp.op("annot-wwpdb-validate-all") dp.expLog(os.path.join(self.__tmpPath, "test-annot-validate-test.log")) dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg]) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(ofpdf)) self.assertTrue(os.path.exists(ofxml)) self.assertTrue(os.path.exists(offullpdf)) self.assertTrue(os.path.exists(ofpng)) self.assertTrue(os.path.exists(ofsvg)) def testAnnotValidateListNmrTestRemote(self): """ Test create validation report for the test list of example PDB ids (NMR examples) """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) for pdbId in self.__testValidateNmrIdList: self.__tmpPath = tempfile.mkdtemp( dir=self.__siteWebAppsSessionsPath) self.__lfh.write("\nStarting {} in {}\n".format( pdbId, self.__tmpPath)) ofpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt.pdf") ofxml = os.path.join(self.__tmpPath, pdbId + "-valdata.xml") offullpdf = os.path.join(self.__tmpPath, pdbId + "-valrpt_full.pdf") ofpng = os.path.join(self.__tmpPath, pdbId + "-val-slider.png") ofsvg = os.path.join(self.__tmpPath, pdbId + "-val-slider.svg") # testFileValidateXyz = pdbId + ".cif" testFileValidateCs = pdbId + "-cs.cif" dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) xyzPath = os.path.abspath( os.path.join(self.__testFilePath, testFileValidateXyz)) csPath = os.path.abspath( os.path.join(self.__testFilePath, testFileValidateCs)) dp.addInput(name="request_annotation_context", value="yes") dp.addInput(name='run_dir', value=os.path.join( self.__siteWebAppsSessionsPath, "validation_%s" % random.randrange(9999999))) # adding explicit selection of steps -- # Alternate #dp.addInput(name="step_list", value=" coreclust,chemicalshifts,writexml,writepdf ") dp.addInput(name='kind', value='nmr') dp.imp(xyzPath) dp.addInput(name="cs_file_path", value=csPath) # dp.setRunRemote() ret = dp.op("annot-wwpdb-validate-all") dp.expLog( os.path.join(self.__tmpPath, pdbId + "-annot-validate-test.log")) dp.expList(dstPathList=[ofpdf, ofxml, offullpdf, ofpng, ofsvg]) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(ofpdf)) self.assertTrue(os.path.exists(ofxml)) self.assertTrue(os.path.exists(offullpdf)) self.assertTrue(os.path.exists(ofpng)) self.assertTrue(os.path.exists(ofsvg)) def testMapFixRemote(self): """ Test mapfix utility """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) # inpPath = os.path.join(self.__testFilePath, self.__testMapNormal) of = os.path.join(self.__tmpPath, self.__testMapNormal + "-fix.map") dp.imp(inpPath) pixelSize = 2.54 #dp.addInput(name="pixel-spacing-x", value=pixelSize) #dp.addInput(name="pixel-spacing-y", value=pixelSize) #dp.addInput(name="pixel-spacing-z", value=pixelSize) dp.addInput(name="input_map_file_path", value=inpPath) dp.addInput(name="output_map_file_path", value=of) dp.addInput(name="label", value='test') dp.addInput(name="voxel", value='{0} {0} {0}'.format(pixelSize)) # dp.setRunRemote() ret = dp.op("deposit-update-map-header-in-place") dp.expLog(os.path.join(self.__tmpPath, "mapfix-big.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) # def testMapFixLargeMapRemote(self): # """ Test mapfix utility # """ # self.__lfh.write("\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) # self.__lfh.write("\nRunning in {}\n".format(self.__tmpPath)) # # dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) # # # inpPath = os.path.join(self.__testFilePath, self.__testMapLarge) # of = os.path.join(self.__tmpPath, self.__testMapLarge + "-fix.map") # dp.imp(inpPath) # pixelSize = 1.327 # dp.addInput(name="input_map_file_path", value=inpPath) # dp.addInput(name="output_map_file_path", value=of) # dp.addInput(name="label", value='test') # dp.addInput(name="voxel", value='{0} {0} {0}'.format(pixelSize)) # # dp.setRunRemote() # ret = dp.op("deposit-update-map-header-in-place") # dp.expLog(os.path.join(self.__tmpPath, "mapfix-big.log")) # dp.exp(of) # # dp.cleanup() # # self.assertTrue(ret == 0) # self.assertTrue(os.path.exists(of)) def testAnnotSiteRemote(self): """ Calculate site environment """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) of = os.path.join(self.__tmpPath, "annot-site-" + self.__testFileAnnotSite) dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) inpPath = os.path.join(self.__testFilePath, self.__testFileAnnotSite) dp.imp(inpPath) dp.addInput(name="block_id", value=self.__testIdAnnotSite) # dp.setRunRemote() ret = dp.op("annot-site") dp.expLog(os.path.join(self.__tmpPath, "annot-site.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) def test_AnnotMergeRemote(self): """ Test of updating geometrical validation diagnostics - """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) for pdbId in self.__testValidateXrayIdList: self.__tmpPath = tempfile.mkdtemp( dir=self.__siteWebAppsSessionsPath) testFileValidateXyz = pdbId + ".cif" xyzPath = os.path.abspath( os.path.join(self.__testFilePath, testFileValidateXyz)) dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) of = os.path.join(self.__tmpPath, "annot-merge-xyz-remote.cif") dp.imp(xyzPath) dp.addInput(name="new_coordinate_file_path", value=xyzPath) dp.addInput(name="new_coordinate_format", value='cif') # dp.setRunRemote() ret = dp.op("annot-merge-xyz") dp.expLog( os.path.join(self.__tmpPath, "annot-merge-xyz-remote.log")) dp.exp(of) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(os.path.exists(of)) def testAnnotMtz2PdbxGood(self): """ Test mtz to pdbx conversion (good mtz) """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) diagfn = os.path.join(self.__tmpPath, "sf-convert-diags.cif") ciffn = os.path.join(self.__tmpPath, "sf-convert-datafile.cif") dmpfn = os.path.join(self.__tmpPath, "sf-convert-mtzdmp.log") # dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) mtzPath = os.path.join(self.__testFilePath, self.__testFileMtzGood) dp.imp(mtzPath) dp.setTimeout(15) ret = dp.op("annot-sf-convert") dp.expLog(os.path.join(self.__tmpPath, "sf-convert.log")) dp.expList(dstPathList=[ciffn, diagfn, dmpfn]) # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(ciffn) self.assertTrue(diagfn) self.assertTrue(dmpfn) def testCif2pdbx_public(self): """ Test cif to pdbx conversion (good cif) """ self.__lfh.write( "\nStarting %s %s\n" % (self.__class__.__name__, sys._getframe().f_code.co_name)) cifout = os.path.join(self.__tmpPath, self.__testFileAnnotSiteAlt) # dp = RcsbDpUtility(tmpPath=self.__tmpPath, siteId=self.__siteId, verbose=True) cifin = os.path.join(self.__testFilePath, self.__testFileAnnotSiteAlt) dp.imp(cifin) dp.exp(cifout) dp.expLog(os.path.join(self.__tmpPath, "cif2pdbx-public.log")) ret = dp.op("cif2pdbx-public") # dp.cleanup() self.assertTrue(ret == 0) self.assertTrue(cifin) self.assertTrue(cifout)
class StatusHistoryExec(object): def __init__(self, defSiteId="WWWDPB_INTERNAL_RU", sessionId=None, verbose=True, log=sys.stderr): self.__lfh = log self.__verbose = verbose self.__setup(defSiteId=defSiteId, sessionId=sessionId) def __setup(self, defSiteId=None, sessionId=None): """Simulate the web application environment for managing session storage of temporaty data files.""" self.__siteId = getSiteId(defaultSiteId=defSiteId) # self.__cI = ConfigInfo(self.__siteId) self.__topPath = self.__cI.get("SITE_WEB_APPS_TOP_PATH") self.__topSessionPath = self.__cI.get( "SITE_WEB_APPS_TOP_SESSIONS_PATH") # self.__reqObj = InputRequest({}, verbose=self.__verbose, log=self.__lfh) self.__reqObj.setValue("TopSessionPath", self.__topSessionPath) self.__reqObj.setValue("TopPath", self.__topPath) self.__reqObj.setValue("WWPDB_SITE_ID", self.__siteId) # self.__reqObj.setValue("SITE_DA_INTERNAL_DB_USER", os.environ["SITE_DA_INTERNAL_DB_USER"]) self.__reqObj.setValue("SITE_DA_INTERNAL_DB_PASSWORD", os.environ["SITE_DA_INTERNAL_DB_PASSWORD"]) os.environ["WWPDB_SITE_ID"] = self.__siteId if sessionId is not None: self.__reqObj.setValue("sessionid", sessionId) # retained due to side effects _sessionObj = self.__reqObj.newSessionObj() # noqa: F841 self.__reqObj.printIt(ofh=self.__lfh) # def doCreateStatusHistory(self, numProc=1, overWrite=False): """ """ try: shu = StatusHistoryUtils(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) entryIdList = shu.getEntryIdList() if numProc > 1: rL = shu.createHistoryMulti(entryIdList, numProc=numProc, overWrite=overWrite) else: rL = shu.createHistory(entryIdList, overWrite=overWrite) self.__lfh.write( "StatusHistoryExec.doCreateStatusHistory() %d status files created.\n\n" % len(rL)) except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) def doLoadStatusHistory(self, numProc=1, newTable=False): """ """ try: shu = StatusHistoryUtils(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) if numProc > 1: return shu.loadStatusHistoryMulti(numProc, newTable=newTable) else: return shu.loadStatusHistory(newTable=newTable) except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) return False def doLoadEntryStatusHistory(self, entryId): """Load/reload status history file for the input entryId""" try: shu = StatusHistoryUtils(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) return shu.loadEntryStatusHistory(entryIdList=[entryId]) except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) return False def doCreateEntryStatusHistory(self, entryId, overWrite=False): """ """ try: shu = StatusHistoryUtils(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) rL = shu.createHistory([entryId], overWrite=overWrite) self.__lfh.write( "StatusHistoryExec.doCreateEntryStatusHistory() %d status files created.\n\n" % len(rL)) except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) def doCreateStatusHistorySchema(self): """Create/recreate status history schema -""" try: shu = StatusHistoryUtils(reqObj=self.__reqObj, verbose=self.__verbose, log=self.__lfh) return shu.createStatusHistorySchema() except: # noqa: E722 pylint: disable=bare-except traceback.print_exc(file=self.__lfh) return False
def testCache(self): cI = ConfigInfo() self.assertEqual(cI.get("VARTEST"), "Hello") self.assertEqual(cI.get("TESTVAR1"), "1") self.assertEqual(cI.get("TESTVAR2"), "2")
class ConfigInfoDataSet(object): """ Provides accessors for the correspondence between deposition data identifiers and deposition and annotation sites (e.g. wwpdb_site_id). """ def __init__(self, verbose=False, log=sys.stderr): # pylint: disable=unused-argument self.__verbose = verbose self.__debug = True self.__cI = ConfigInfo(siteId=None, verbose=self.__verbose) self.__cIDepUI = ConfigInfoAppDepUI(siteId=getSiteId()) # Default data set id range assignments self.__depIdAssignments = self.__cI.get( "SITE_DATASET_ID_ASSIGNMENT_DICTIONARY") self.__depTestIdAssignments = self.__cI.get( "SITE_DATASET_TEST_ID_ASSIGNMENT_DICTIONARY") self.__siteBackupD = self.__cI.get("SITE_BACKUP_DICT", default={}) self.__dsLocD = None # self.__lockDirPath = self.__cI.get( "SITE_SERVICE_REGISTRATION_LOCKDIR_PATH", "/tmp") lockutils.set_defaults(self.__lockDirPath) def getSiteId(self, depSetId): """Return siteId for the input depSetId subject to site backup details - siteBackupD[prodSite] = [backupSite1, backupSite2,...] """ siteId = self.__getSiteId(depSetId) mySiteId = self.__cI.get("SITE_PREFIX", default=None) # if mySiteId and siteId: # is mySiteId a backup for siteId? if siteId in self.__siteBackupD and mySiteId in self.__siteBackupD[ siteId]: if self.__debug: logger.debug("using backup %s for %s", mySiteId, siteId) siteId = mySiteId return siteId def getDataSetLocationDict(self): d = {} try: d = self.__readLocationDictionary() return d except Exception as e: logger.error("failed reading data set location dictionary: %s", str(e)) if self.__debug: logger.exception("failed reading data set location dictionary") return d def getDataSetLocations(self, siteId): dsL = [] try: d = self.__readLocationDictionary() for ky in d: if d[ky] == siteId: dsL.append(ky) return dsL except Exception as e: logger.info("failed reading data set locations for site %r - %s", siteId, str(e)) if self.__debug: logger.exception( "failed reading data set locations for site %r - %s", siteId, str(e)) return [] def removeDataSets(self, dataSetIdList): try: d = self.__readLocationDictionary() for dsId in dataSetIdList: if dsId in d: del d[dsId] return self.__writeLocationDictionary(d) except Exception as e: logger.error("failed %s", str(e)) if self.__debug: logger.exception("failed") return False def writeLocationList(self, siteId, dataSetIdList): try: d = self.__readLocationDictionary() for dsId in dataSetIdList: d[dsId] = siteId return self.__writeLocationDictionary(d) except Exception as e: logger.error("failed data set locations for site %r - %s", siteId, str(e)) if self.__debug: logger.exception("failed data set locations for site %rs", siteId) return False def __readLocationDictionary(self): """Read the dictionary cotaining data set site location information. Returns: d[<data_set_id>] = <site_id> or a empty dictionary. """ fp = self.__cIDepUI.get_site_dataset_siteloc_file_path() try: with open(fp, "r") as infile: return json.load(infile) except Exception as e: logger.error("failed reading json resource file %s - %s", fp, str(e)) if self.__debug: logger.exception("failed reading json resource file %s", fp) return {} @lockutils.synchronized("configdataset.exceptionfile-lock", external=True) def __writeLocationDictionary(self, dsLocD, backup=True): """Write the input dictionary cotaining exceptional data set to site correspondences, Returns: True for success or False otherwise """ fp = self.__cIDepUI.get_site_dataset_siteloc_file_path() try: if backup: bp = fp + datetime.datetime.now().strftime( "-%Y-%m-%d-%H-%M-%S") d = self.__readLocationDictionary() with open(bp, "w") as outfile: json.dump(d, outfile, indent=4) # with open(fp, "w") as outfile: json.dump(dsLocD, outfile, indent=4) return True except Exception as e: logger.error("failed writing json resource file %s - %s", fp, str(e)) if self.__debug: logger.exception("failed writing json resource file %s", fp) return False def getDefaultIdRange(self, siteId): """Return the default upper and lower deposition data set identifier codes assigned to the input siteId. Any site lacking a default range will get the range assigned to the UNASSIGNED site. Returns: (lower bound, upper bound) for data set identifiers (int) """ if siteId in self.__depIdAssignments: DEPID_START, DEPID_STOP = self.__depIdAssignments[siteId] elif "UNASSIGNED" in self.__depIdAssignments: DEPID_START, DEPID_STOP = self.__depIdAssignments["UNASSIGNED"] else: DEPID_START, DEPID_STOP = (-1, -1) return (DEPID_START, DEPID_STOP) def getTestIdRange(self, siteId): """Return the upper and lower deposition data set identifier codes assigned to the input siteId. Any site lacking a default range will get the range (-1, -1) Returns: (lower bound, upper bound) for data set identifiers (int) """ if siteId in self.__depTestIdAssignments: DEPID_START, DEPID_STOP = self.__depTestIdAssignments[siteId] else: DEPID_START, DEPID_STOP = (-1, -1) return (DEPID_START, DEPID_STOP) def getDefaultSiteId(self, depSetId): """Get the default site assignment for the input data set id.""" return self.__getSiteId(depSetId) def __getSiteId(self, depSetId): """Return the siteId to which the input depSetId is within the default code assignment range. Input may be either a string "D_xxxxxxxxxx" or an integer/string "xxxxxxxxxx". """ # check for exceptional cases -- try: if self.__dsLocD is None: self.__dsLocD = self.__readLocationDictionary() if str(depSetId)[:2] == "D_": if depSetId in self.__dsLocD: return self.__dsLocD[depSetId] else: tId = "D_" + str("%010d" % int(depSetId)) if tId in self.__dsLocD: return self.__dsLocD[tId] except Exception as e: if self.__debug: logger.exception( "failed checking for exception dictionary for %r - %s", depSetId, str(e)) # # check default range assignment -- try: if str(depSetId).startswith("D_"): idVal = int(str(depSetId)[2:]) else: idVal = int(str(depSetId)) for ky in self.__depIdAssignments.keys(): idMin, idMax = self.__depIdAssignments[ky] if (idVal >= idMin) and (idVal <= idMax): return ky except Exception as e: if self.__debug: logger.exception( "failed checking deposition range for %r - %s", depSetId, str(e)) return None
def testBuiltin(self): """Tests if common built in definitions are set""" cI = ConfigInfo() self.assertIsNotNone(cI.get("PROJECT_VAL_REL_CUTOFF")) self.assertIsNone(cI.get("PROJECT_RANDOM"))
class MyConnectionBase(object): def __init__(self, siteId=None, verbose=False, log=sys.stderr): # pylint: disable=unused-argument # self.__siteId = siteId self._cI = ConfigInfo(self.__siteId) self._dbCon = None self.__authD = {} self.__databaseName = None self.__dbHost = None self.__dbUser = None self.__dbPw = None self.__dbSocket = None self.__dbPort = None self.__dbPort = 3306 self.__dbServer = "mysql" def setResource(self, resourceName=None): # if resourceName == "PRD": self.__databaseName = self._cI.get("SITE_REFDATA_PRD_DB_NAME") self.__dbHost = self._cI.get("SITE_REFDATA_DB_HOST_NAME") self.__dbSocket = self._cI.get("SITE_REFDATA_DB_SOCKET") self.__dbPort = self._cI.get("SITE_REFDATA_DB_PORT_NUMBER") self.__dbUser = self._cI.get("SITE_REFDATA_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_REFDATA_DB_PASSWORD") elif resourceName == "CC": self.__databaseName = self._cI.get("SITE_REFDATA_CC_DB_NAME") self.__dbHost = self._cI.get("SITE_REFDATA_DB_HOST_NAME") self.__dbSocket = self._cI.get("SITE_REFDATA_DB_SOCKET") self.__dbPort = self._cI.get("SITE_REFDATA_DB_PORT_NUMBER") self.__dbUser = self._cI.get("SITE_REFDATA_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_REFDATA_DB_PASSWORD") elif resourceName == "RCSB_INSTANCE": self.__databaseName = self._cI.get("SITE_INSTANCE_DB_NAME") self.__dbHost = self._cI.get("SITE_INSTANCE_DB_HOST_NAME") self.__dbSocket = self._cI.get("SITE_INSTANCE_DB_SOCKET") self.__dbPort = self._cI.get("SITE_INSTANCE_DB_PORT_NUMBER") self.__dbUser = self._cI.get("SITE_INSTANCE_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_INSTANCE_DB_PASSWORD") elif resourceName == "DA_INTERNAL": self.__databaseName = self._cI.get("SITE_DA_INTERNAL_DB_NAME") self.__dbHost = self._cI.get("SITE_DA_INTERNAL_DB_HOST_NAME") self.__dbPort = self._cI.get("SITE_DA_INTERNAL_DB_PORT_NUMBER") self.__dbSocket = self._cI.get("SITE_DA_INTERNAL_DB_SOCKET") self.__dbUser = self._cI.get("SITE_DA_INTERNAL_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_DA_INTERNAL_DB_PASSWORD") elif resourceName == "DA_INTERNAL_COMBINE": self.__databaseName = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_NAME") self.__dbHost = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_HOST_NAME") self.__dbPort = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_PORT_NUMBER") self.__dbSocket = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_SOCKET") self.__dbUser = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_DA_INTERNAL_COMBINE_DB_PASSWORD") elif resourceName == "DISTRO": self.__databaseName = self._cI.get("SITE_DISTRO_DB_NAME") self.__dbHost = self._cI.get("SITE_DISTRO_DB_HOST_NAME") self.__dbPort = self._cI.get("SITE_DISTRO_DB_PORT_NUMBER") self.__dbSocket = self._cI.get("SITE_DISTRO_DB_SOCKET") self.__dbUser = self._cI.get("SITE_DISTRO_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_DISTRO_DB_PASSWORD") elif resourceName == "STATUS": self.__databaseName = self._cI.get("SITE_DB_DATABASE_NAME") self.__dbHost = self._cI.get("SITE_DB_HOST_NAME") self.__dbPort = self._cI.get("SITE_DB_PORT_NUMBER") self.__dbSocket = self._cI.get("SITE_DB_SOCKET") self.__dbUser = self._cI.get("SITE_DB_USER_NAME") self.__dbPw = self._cI.get("SITE_DB_PASSWORD") else: pass if self.__dbSocket is None or len(self.__dbSocket) < 2: self.__dbSocket = None if self.__dbPort is None: self.__dbPort = 3306 else: self.__dbPort = int(str(self.__dbPort)) logger.info( "+MyConnectionBase(setResource) %s resource name %s server %s dns %s host %s user %s socket %s port %r", self.__siteId, resourceName, self.__dbServer, self.__databaseName, self.__dbHost, self.__dbUser, self.__dbSocket, self.__dbPort, ) # self.__authD["DB_NAME"] = self.__databaseName self.__authD["DB_HOST"] = self.__dbHost self.__authD["DB_USER"] = self.__dbUser self.__authD["DB_PW"] = self.__dbPw self.__authD["DB_SOCKET"] = self.__dbSocket self.__authD["DB_PORT"] = int(str(self.__dbPort)) self.__authD["DB_SERVER"] = self.__dbServer # def getAuth(self): return self.__authD def setAuth(self, authD): try: self.__authD = authD self.__databaseName = self.__authD["DB_NAME"] self.__dbHost = self.__authD["DB_HOST"] self.__dbUser = self.__authD["DB_USER"] self.__dbPw = self.__authD["DB_PW"] self.__dbSocket = self.__authD["DB_SOCKET"] if "DB_PORT" in self.__authD: self.__dbPort = int(str(self.__authD["DB_PORT"])) else: self.__dbPort = 3306 self.__dbServer = self.__authD["DB_SERVER"] except: # noqa: E722 pylint: disable=bare-except pass def openConnection(self): """Create a database connection and return a connection object. Returns None on failure """ # if self._dbCon is not None: # Close an open connection - logger.info("+MyDbConnect.connect() WARNING Closing an existing connection.") self.closeConnection() try: if self.__dbSocket is None: dbcon = MySQLdb.connect( db="%s" % self.__databaseName, user="******" % self.__dbUser, passwd="%s" % self.__dbPw, host="%s" % self.__dbHost, port=self.__dbPort, local_infile=1 ) else: dbcon = MySQLdb.connect( db="%s" % self.__databaseName, user="******" % self.__dbUser, passwd="%s" % self.__dbPw, host="%s" % self.__dbHost, port=self.__dbPort, unix_socket="%s" % self.__dbSocket, local_infile=1, ) self._dbCon = dbcon return True except: # noqa: E722 pylint: disable=bare-except logger.exception( "+MyDbConnect.connect() Connection error to server %s host %s dsn %s user %s pw %s socket %s port %d \n", self.__dbServer, self.__dbHost, self.__databaseName, self.__dbUser, self.__dbPw, self.__dbSocket, self.__dbPort, ) self._dbCon = None return False def getConnection(self): return self._dbCon def closeConnection(self): """Close db session""" if self._dbCon is not None: self._dbCon.close() self._dbCon = None return True else: return False def getCursor(self): try: return self._dbCon.cursor() except: # noqa: E722 pylint: disable=bare-except logger.exception("+MyConnectionBase(getCursor) failing.\n") return None
TESTOUTPUT = os.path.join(HERE, "test-output", platform.python_version()) if not os.path.exists(TESTOUTPUT): os.makedirs(TESTOUTPUT) mockTopPath = os.path.join(TOPDIR, "wwpdb", "mock-data") rwMockTopPath = os.path.join(TESTOUTPUT) # Must create config file before importing ConfigInfo from wwpdb.utils.testing.SiteConfigSetup import SiteConfigSetup # noqa: E402 mockTopPath = os.path.join(TOPDIR, "wwpdb", "mock-data") SiteConfigSetup().setupEnvironment(TESTOUTPUT, mockTopPath) from wwpdb.utils.config.ConfigInfo import ConfigInfo # noqa: E402 cI = ConfigInfo() packagedir = cI.get("SITE_PACKAGES_PATH") if packagedir and os.path.exists(packagedir): toolsmissing = False else: toolsmissing = True dictlist = cI.get("SITE_PDBX_DICTIONARY_NAME_DICT") if dictlist: dictsmissing = False else: dictsmissing = True class commonsetup(object): def __init__(self):
class UpdateManager(object): def __init__(self, config_file, noop): self.__configfile = config_file self.__noop = noop self.__ci = ConfigInfo() self.__ci_common = ConfigInfoAppCommon() self.__extraconf = self.get_variable("ADMIN_EXTRA_CONF", environment='INSTALL_ENVIRONMENT') self.__confvars = {} self.__extraconfdir = None if self.__extraconf is not None: self.__extraconfdir = os.path.abspath( os.path.dirname(self.__extraconf)) self.__confvars["extraconfdir"] = self.__extraconfdir # Infer topdir from where running from topdir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) cdict = {'topdir': topdir} self.__cparser = ConfigParser(cdict) cfiles = self.__configfile if self.__extraconf is not None: cfiles = [self.__configfile, self.__extraconf] self.__cparser.read(cfiles) self.web_apps_path = self.get_variable('TOP_WWPDB_WEBAPPS_DIR') self.resources_ro_path = self.get_variable('RO_RESOURCE_PATH') def __exec(self, cmd, overridenoop=False, working_directory=None): print(cmd) ret = 0 if not self.__noop or overridenoop: if working_directory: print('Working Directory= {}'.format(working_directory)) original_wd = os.getcwd() os.chdir(working_directory) ret = subprocess.call(cmd, shell=True) os.chdir(original_wd) else: ret = subprocess.call(cmd, shell=True) return ret def get_variable(self, variable, environment=None): ret = None if environment: ret = self.__ci.get(environment, {}).get(variable) if not ret: ret = self.__ci.get(variable) if not ret: ret = os.getenv(variable) return ret def updatepyenv(self, dev_build): cs_user = self.get_variable('CS_USER', environment='INSTALL_ENVIRONMENT') cs_pass = self.get_variable('CS_PW', environment='INSTALL_ENVIRONMENT') cs_url = self.get_variable('CS_URL', environment='INSTALL_ENVIRONMENT') script_dir = os.path.dirname(os.path.realpath(__file__)) constraintfile = os.path.abspath( os.path.join(script_dir, '../base_packages/constraints.txt')) urlreq = urlparse(cs_url) urlpath = "{}://{}:{}@{}{}/dist/simple/".format( urlreq.scheme, cs_user, cs_pass, urlreq.netloc, urlreq.path) # pip_extra_urls = "--extra-index-url {} --trusted-host {} --extra-index-url https://pypi.anaconda.org/OpenEye/simple ".format( # urlpath, urlreq.netloc) self.__exec("pip config --site set global.trusted-host {}".format( urlreq.netloc)) self.__exec( 'pip config --site set global.extra-index-url "{} https://pypi.anaconda.org/OpenEye/simple"' .format(urlpath)) self.__exec("pip config --site set global.no-cache-dir false") pip_extra_urls = '-c {}'.format(constraintfile) # pip installing from requirements.txt in base_packages reqfile = os.path.abspath( os.path.join(script_dir, '../base_packages/pre-requirements.txt')) command = 'pip install {} -r {}'.format(pip_extra_urls, reqfile) self.__exec(command) reqfile = os.path.abspath( os.path.join(script_dir, '../base_packages/requirements.txt')) command = 'pip install {} -r {}'.format(pip_extra_urls, reqfile) self.__exec(command) if self.__cparser.has_option('DEFAULT', 'pip_extra_reqs'): opt_req = self.__cparser.get('DEFAULT', 'pip_extra_reqs', vars=self.__confvars) else: opt_req = None reqfile = self.__cparser.get('DEFAULT', 'piprequirements') if dev_build: # Clone and do pip edit install # Checking if source directory exist source_dir = os.path.abspath( os.path.join(self.web_apps_path, '../..')) if not os.path.isdir(source_dir): os.makedirs(source_dir) path_to_list_of_repo = os.path.abspath( os.path.join( script_dir, '../base_packages/requirements_wwpdb_dependencies.txt')) with open(path_to_list_of_repo) as list_of_repo: for repo in list_of_repo: command = 'git clone --recursive [email protected]:wwPDB/{0}.git; cd {0}; git checkout develop; cd ..'.format( repo.rstrip()) self.__exec(command, working_directory=source_dir) command = 'pip install {} --edit {}'.format( pip_extra_urls, repo) self.__exec(command, working_directory=source_dir) else: command = 'pip install -U {} -r {}'.format(pip_extra_urls, reqfile) self.__exec(command) if opt_req: command = 'export CS_USER={}; export CS_PW={}; export CS_URL={}; export URL_NETLOC={}; export URL_PATH={}; pip install -U {} -r {}'.format( cs_user, cs_pass, cs_url, urlreq.netloc, urlreq.path, pip_extra_urls, opt_req) self.__exec(command) def updateresources(self): restag = self.__cparser.get('DEFAULT', 'resourcestag') if self.resources_ro_path: if not os.path.exists(self.resources_ro_path): command = 'git clone [email protected]:wwPDB/onedep-resources_ro.git {}'.format( self.resources_ro_path) self.__exec(command) command = 'cd {}; git pull; git checkout master; git pull; git checkout {}; git pull origin {}'.format( self.resources_ro_path, restag, restag) self.__exec(command) def checkwebfe(self, overridenoop=False): webdir = os.path.abspath(os.path.join(self.web_apps_path, '..')) curdir = os.path.dirname(__file__) checkscript = os.path.join(curdir, 'ManageWebFE.py') webfecheck = self.__cparser.get('DEFAULT', 'webfeconf') command = 'python {} --webroot {} check -r {}'.format( checkscript, webdir, webfecheck) ret = self.__exec(command, overridenoop=overridenoop) if ret: print("ERROR: check of webfe directory failed") def updatewebfe(self): # Checking if source directory exist source_dir = os.path.abspath(os.path.join(self.web_apps_path, '../..')) if not os.path.isdir(source_dir): os.makedirs(source_dir) # Check if repo is cloned webfe_repo = os.path.abspath(os.path.join(self.web_apps_path, '..')) if not os.path.isdir(webfe_repo): command = 'git clone --recurse-submodules [email protected]:wwPDB/onedep-webfe.git' self.__exec(command, working_directory=source_dir) self.checkwebfe() webfetag = self.__cparser.get('DEFAULT', 'webfetag') command = 'cd {}; git pull; git checkout {}; git pull origin {}; git submodule init; git submodule update'.format( webfe_repo, webfetag, webfetag) self.__exec(command) # Now check the results self.checkwebfe() def updatetaxdb(self): # Checks the number of rows in db and decides if to update taxdbsize = int(self.__cparser.get('DEFAULT', 'taxdbminsize')) if self.__cparser.has_option('DEFAULT', 'taxdbmaxsize'): maxsize = int(self.__cparser.get('DEFAULT', 'taxdbmaxsize')) else: maxsize = 999999999 taxuseftp = self.__cparser.has_option('DEFAULT', 'taxuseftp') if not taxuseftp: taxresource = self.get_variable('TAXONOMY_FILE_NAME') if not taxresource: print("ERROR: TAXONOMY_FILE_NAME is not set in site-config") return curdir = os.path.dirname(__file__) checkscript = os.path.join(curdir, 'ManageTaxDB.py') if taxuseftp: addftp = " --ftpload" else: addftp = "" if self.__noop: command = 'python {} --noop --maxsize {} --taxdbsize {}{}'.format( checkscript, maxsize, taxdbsize, addftp) else: command = 'python {} --maxsize {} --taxdbsize {}{}'.format( checkscript, maxsize, taxdbsize, addftp) self.__exec(command) def updateschema(self): dbs = DbSchemaManager(self.__noop) dbs.updateschema() def postflightdbcheck(self): dbs = DbSchemaManager(self.__noop) dbs.checkviews() def checktoolvers(self): # vers_config_var, configinfovar, relative path ConfiginfoAppMethod confs = [ [ 'annotver', 'SITE_ANNOT_TOOLS_PATH', 'etc/bundleversion.json', 'get_site_annot_tools_path' ], ['webfever', 'TOP_WWPDB_WEBAPPS_DIR', 'version.json', ''], ['resourcever', 'RO_RESOURCE_PATH', 'version.json', ''], [ 'cctoolsver', 'SITE_CC_APPS_PATH', 'etc/bundleversion.json', 'get_site_cc_apps_path' ], [ 'sfvalidver', 'SITE_PACKAGES_PATH', 'sf-valid/etc/bundleversion.json', 'get_site_packages_path' ], [ 'dictver', 'SITE_PACKAGES_PATH', 'dict/etc/bundleversion.json', 'get_site_packages_path' ], ] for c in confs: varname = c[0] confvar = c[1] fpart = c[2] config_info_app_method = c[3] try: tvers = self.__cparser.get('DEFAULT', varname) if config_info_app_method: class_method = getattr(self.__ci_common, config_info_app_method) toolspath = class_method() else: toolspath = self.get_variable(confvar) fname = os.path.join(toolspath, fpart) if not os.path.exists(fname): print("WARNING: Tool out of date. %s not found" % fname) continue with open(fname, 'r') as fin: jdata = json.load(fin) vstring = jdata['Version'] if vstring != tvers: print("***ERROR: Version mismatch %s != %s in %s" % (tvers, vstring, fname)) except NoOptionError as e: # Option not in config file - continue pass def buildtools(self, build_version='v-5200'): curdir = os.path.dirname(__file__) buildscript = os.path.join(curdir, 'BuildTools.py') command = 'python {} --config {} --build-version {}'.format( buildscript, self.__configfile, build_version) ret = self.__exec(command) if ret: print("ERROR: buildtools failed") pass def checkoelicense(self): try: # If not in config will fall through expdate = self.__cparser.get('DEFAULT', 'openeyeexp') except NoOptionError as e: # Option not in config file - continue return oelicfile = self.__ci_common.get_site_cc_oe_licence() # Might be in OS_ENVIRONMENT if not oelicfile: oelicfile = self.get_variable('SITE_CC_OE_LICENSE') if not oelicfile: print("***ERROR: Cannot determine open eye license from config") return with open(oelicfile, 'r') as fin: data = fin.readlines() for d in data: if "#EXP_DATE:" not in d: continue edate = d.split(':')[1].strip() if edate != expdate: print("ERROR: Openeye Licence expiration wrong %s vs %s" % (edate, expdate)) # Only need single report return