def main(): import os.path from optparse import OptionParser from lofar.common import dbcredentials # Check the invocation arguments parser = OptionParser("%prog [options] <path_to_jobfile.xml>", description='Run the ingestpipeline on a single jobfile.') parser.add_option('-q', '--broker', dest='broker', type='string', default=DEFAULT_BROKER, help='Address of the qpid broker, default: %default') parser.add_option('--busname', dest='busname', type='string', default=DEFAULT_BUSNAME, help='Name of the bus exchange on the qpid broker on which the ingest notifications are published, default: %default') parser.add_option("-u", "--user", dest="user", type="string", default=getpass.getuser(), help="username for to login on <host>, [default: %default]") parser.add_option('-s', '--minimal-SIP', dest='minimal_SIP', action='store_true', help='create and upload a minimal SIP to the LTA catalogue when the normal SIP is not accepted.') parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging') parser.add_option('-t', '--timeout', dest='globus_timeout', type='int', default=GLOBUS_TIMEOUT, help='number of seconds (default=%default) to wait for globus-url-copy to finish after the transfer is done (while lta-site is computing checksums)') parser.add_option("-l", "--lta_credentials", dest="lta_credentials", type="string", default='LTA' if isProductionEnvironment() else 'LTA_test', help="Name of lofar credentials for lta user/pass (see ~/.lofar/dbcredentials) [default=%default]") parser.add_option("-m", "--mom_credentials", dest="mom_credentials", type="string", default='MoM_site' if isProductionEnvironment() else 'MoM_site_test', help="Name of credentials for MoM user/pass (see ~/.lofar/dbcredentials) [default=%default]") (options, args) = parser.parse_args() logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG if options.verbose else logging.INFO) if len(args) != 1: parser.print_help() sys.exit(1) try: path = args[0] if os.path.isfile(path): job = parseJobXmlFile(path) job['filename'] = path logger.info("Parsed jobfile %s: %s", path, job) ltacreds = dbcredentials.DBCredentials().get(options.lta_credentials) ltaClient = LTAClient(ltacreds.user, ltacreds.password) momcreds = dbcredentials.DBCredentials().get(options.mom_credentials) momClient = MoMClient(momcreds.user, momcreds.password) jobPipeline = IngestPipeline(job, momClient, ltaClient, busname=options.busname, broker=options.broker, user=options.user, globus_timeout=options.globus_timeout, minimal_SIP=options.minimal_SIP) jobPipeline.run() exit(0) else: logger.info("No such file %s", path) exit(1) except Exception as e: logger.error(e) exit(1)
def dragnetDataPath(self, otdb_id, resource_name): ''' Return the hostname:path to the data dir on DRAGNET, depending on environment. ''' if resource_name.startswith('drg'): host = resource_name[ 0:5] + '-ib.dragnet.infiniband.lofar' # e.g. 'drg08-ib.[...]' dirname = resource_name[-5:] # 'data1', 'data2', ... if not dirname.startswith('data'): # bug or test: log error & try as requested is best in both cases logger.error('dragnetDataPath: unexpected resource_name: %s', resource_name) elif resource_name.startswith('dragproc'): host = 'dragproc-10g.online.lofar' dirname = 'data' else: raise ValueError( 'dragnetDataPath: not a DRAGNET resource_name: %s' % resource_name) if not isProductionEnvironment(): dirname += '/test' # easy to identify, du(1) and auto-delete return '%s:/%s/L%s' % (host, dirname, otdb_id)
def getCep4LocationTestEnvPrefix(self): if isProductionEnvironment(): return '' if isTestEnvironment(): return 'test-' return 'dev-'
def defaultDockerTag(): if isProductionEnvironment(): # "latest" refers to the current /production/ image return "latest" else: # test/dev environments want to use their specific version, since they # share images with the production environment return runCommand("docker-template", "${LOFAR_TAG}")
def adaptNameToEnvironment(name): if isProductionEnvironment(): return name #return original name only for PRODUCTION LOFARENV if isTestEnvironment(): return 'test.%s' % name #return 'test.' prefixed name only for TEST LOFARENV # in all other cases prefix queue/bus name with 'devel.' return 'devel.%s' % name
def cep4DataPath(self): '''returns the path to the data dir on CEP4, depending on environment''' if isProductionEnvironment(): return "CEP4:/data/projects" if isTestEnvironment(): return "CEP4:/data/test-projects" return "CEP4:/data/dev-projects"
def __init__(self, user=None, password=None): if user == None or password == None: # (mis)use dbcredentials to read user/pass from disk from lofar.common import dbcredentials dbc = dbcredentials.DBCredentials() creds = dbc.get('LTA' if isProductionEnvironment() else 'LTA_test') user = creds.user password = creds.password url = LTA_BASE_URL % (user, password) self.__rpc = xmlrpc.client.ServerProxy(url) logger.info('LTAClient connected to: %s', self.__hidePassword(url))
def __init__(self, mountpoint=CEP4_DATA_MOUNTPOINT, exchange=DEFAULT_BUSNAME, broker=DEFAULT_BROKER): self.mountpoint = mountpoint self.projects_path = os.path.join( self.mountpoint, 'projects' if isProductionEnvironment() else 'test-projects') self.scratch_path = os.path.join(self.mountpoint, 'scratch', 'pipeline') self.share_path = os.path.join(self.mountpoint, 'share', 'pipeline') self.radbrpc = RADBRPC.create(exchange=exchange, broker=broker) self.momrpc = MoMQueryRPC.create(exchange=exchange, broker=broker)
def config(): config = { 'mom_base_url': '', 'lta_base_url': '', 'inspection_plots_base_url': 'https://proxy.lofar.eu/inspect/HTML/', 'sky_view_base_url': 'http://dop344.astron.nl:5000/uvis/id' } if isProductionEnvironment(): config['mom_base_url'] = 'https://lofar.astron.nl/mom3' config['lta_base_url'] = 'http://lofar.target.rug.nl/' elif isTestEnvironment(): config['mom_base_url'] = 'http://lofartest.control.lofar:8080/mom3' config['lta_base_url'] = 'http://lofar-test.target.rug.nl/' return jsonify({'config': config})
def __init__(self, mom_base_url, user=None, password=None): if user == None or password == None: # (mis)use dbcredentials to read user/pass from disk from lofar.common import dbcredentials dbc = dbcredentials.DBCredentials() creds = dbc.get( 'MoM_site' if isProductionEnvironment() else 'MoM_site_test') user = creds.user password = creds.password self.mom_base_url = mom_base_url self.__user = user self.__password = password self.session = None self.__momURLlogin = self.mom_base_url + 'useradministration/user/systemlogin.do' self.__momUR_security_check = self.mom_base_url + 'useradministration/user/j_security_check' self.__momURLlogout = self.mom_base_url + 'useradministration/user/logout.do'
def GetStorageTicket(self): do_check_already_in_lta=isProductionEnvironment() result = self.ltaClient.GetStorageTicket(self.Project, self.FileName, self.FileSize, self.ArchiveId, self.JobId, self.ObsId, do_check_already_in_lta, self.Type) error = result.get('error') if error: if 'StorageTicket with mom ID "%i"' % (self.ArchiveId) in error: if 'existing_ticket_id' in result and 'existing_ticket_state' in result: logger.warning("Got a Tier 1 GetStorageTicket error for an incomplete storage ticket %s with status %s" % (result['existing_ticket_id'],result['existing_ticket_state'])) if result['existing_ticket_state'] < IngestSuccessful: try: self.ticket = result['existing_ticket_id'] logger.warning("trying to repair status of StorageTicket %s" % self.ticket) self.SendStatusToLTA(IngestFailed) except Exception as e: logger.exception('ResettingStatus IngestFailed failed for %s' % self.ticket) raise Exception ('Had to reset state for %s' % self.ticket) else: raise PipelineError('GetStorageTicket error: Dataproduct already in LTA for %s' % (self.JobId), PipelineAlreadyInLTAError) else: raise Exception('GetStorageTicket error I can''t interpret: %s' % result) if 'no storage resources defined for project' in error or "project does not exists" in error: raise PipelineError('GetStorageTicket error for project not known in LTA: %s' % error, PipelineNoProjectInLTAError) raise Exception('GetStorageTicket error: %s' % error) else: self.ticket = result.get('ticket') self.PrimaryUri = result.get('primary_uri_rnd') self.SecondaryUri = result.get('secondary_uri_rnd') if 'sara' in self.PrimaryUri: self.lta_site = 'sara' elif 'juelich' in self.PrimaryUri: self.lta_site = 'juelich' elif 'psnc' in self.PrimaryUri: self.lta_site = 'poznan'
from lofar.common import isProductionEnvironment, isTestEnvironment """ Config file for specification services. """ # Messaging VALIDATION_SERVICENAME = "specificationvalidationservice" SPECIFICATION_SERVICENAME = "specificationservice" SPECIFICATIONTRANSLATION_SERVICENAME = "specificationtranslationservice" # TODO: mom.importxml does not prepend "test." on the test system? MOMIMPORTXML_BUSNAME = "mom.importxml" MOMIMPORTXML_BROKER = "lcs023.control.lofar" if isProductionEnvironment() else \ "lcs028.control.lofar" if isTestEnvironment() else \ "localhost" # XSD paths (for validation service) TRIGGER_XSD = "$LOFARROOT/share/SAS/LofarTrigger.xsd" LOFARSPEC_XSD = "$LOFARROOT/share/SAS/LofarSpecification.xsd" MOMSPEC_XSD = "$LOFARROOT/share/MoM/LofarMoM2.xsd" # Telescope Model XML paths (for xml generators used by translation service) TELESCOPE_MODEL_TYPE1_XML = "$LOFARROOT/share/xml/telescope_model_type1_template.xml" TELESCOPE_MODEL_TYPE2_XML = "$LOFARROOT/share/xml/telescope_model_type2_template.xml"
def __init__(self, user=None, password=None): mom_base_url = 'https://lcs023.control.lofar:8443/' if isProductionEnvironment( ) else 'http://lofartest.control.lofar:8080/' super().__init__(mom_base_url, user, password) self.__momURLImportXML = self.mom_base_url + 'mom3/interface/importXML2.do'
import os import logging logger = logging.getLogger() import kombu # make default kombu/amqp logger less spammy logging.getLogger("amqp").setLevel(logging.INFO) from lofar.messaging import adaptNameToEnvironment from lofar.common import isProductionEnvironment, isTestEnvironment # the DEFAULT_BROKER that's used in lofar's messaging refers to the single # broker at either the production or test scu, depending on the runtime environment. # For a non-production/non-test env, just use localhost. DEFAULT_BROKER = "scu001.control.lofar" if isProductionEnvironment() else \ "scu199.control.lofar" if isTestEnvironment() else \ "localhost" if 'LOFAR_DEFAULT_BROKER' in os.environ.keys(): DEFAULT_BROKER = os.environ.get('LOFAR_DEFAULT_BROKER') DEFAULT_USER = os.environ.get('RABBITMQ_DEFAULT_USER', 'guest') DEFAULT_PASSWORD = os.environ.get('RABBITMQ_DEFAULT_PASS', 'guest') if isProductionEnvironment() or isTestEnvironment(): # import the user and password from RabbitMQ 'db'credentials try: from lofar.common.dbcredentials import DBCredentials _db_creds = DBCredentials().get("RabbitMQ") DEFAULT_USER = _db_creds.user DEFAULT_PASSWORD = _db_creds.password
from lofar.common import isProductionEnvironment from lofar.common import isTestEnvironment from socket import gethostname #server config is same as common config plus extra's from lofar.lta.ingest.common.config import * DEFAULT_INGEST_INCOMING_JOB_SUBJECT = DEFAULT_INGEST_PREFIX + ".incoming_job" DEFAULT_INGEST_JOB_FOR_TRANSFER_SUBJECT = DEFAULT_INGEST_PREFIX + ".job_for_transfer" DEFAULT_MOM_XMLRPC_HOST = hostnameToIp( 'lexar003.lexar.control.lofar' if isProductionEnvironment() and 'lexar' in gethostname() else 'lexar004.lexar.control.lofar' if isTestEnvironment( ) and 'lexar' in gethostname() else 'localhost') DEFAULT_MOM_XMLRPC_PORT = 2010 if isProductionEnvironment() else 2009 MOM_BASE_URL = 'https://lcs029.control.lofar:8443/' # if isProductionEnvironment() else 'http://lofartest.control.lofar:8080/' LTA_BASE_URL = 'https://%s:%[email protected]:9443/' if isProductionEnvironment( ) else 'https://%s:%[email protected]:19443/' JOBS_DIR = '/local/ingest/jobs' if isProductionEnvironment( ) else '/local/ingesttest/jobs' if isTestEnvironment() else '/tmp/ingest/jobs' MAX_NR_OF_RETRIES = 4 DEFAULT_JOB_PRIORITY = 4 MAX_NR_OF_JOBS = 40 MAX_USED_BANDWITH_TO_START_NEW_JOBS = 9.9e9 #Gbps NET_IF_TO_MONITOR = [ 'p2p1.2030', # outgoing traffic to Juelich 'p2p1.2033', # outgoing traffic to Poznan
def main(): # make sure we run in UTC timezone import os os.environ['TZ'] = 'UTC' from optparse import OptionParser # Check the invocation arguments parser = OptionParser( "%prog [options]", description= 'runs the ingest transfer server which picks up as many jobs as it can handle from the given --ingest_job_queuename and tranfers the dataproducts to the LTA, updates the LTA catalogue, and updates MoM' ) parser.add_option('-b', '--broker', dest='broker', type='string', default=DEFAULT_BROKER, help='Address of the qpid broker, default: %default') parser.add_option("-p", "--max_nr_of_parallel_jobs", dest="max_nr_of_parallel_jobs", type="int", default=MAX_NR_OF_JOBS, help="Name of the job queue. [default: %default]") parser.add_option( '-e', '--exchange', dest='exchange', type='string', default=DEFAULT_BUSNAME, help='Name of the common bus exchange on the broker, default: %default' ) parser.add_option( "-u", "--user", dest="user", type="string", default=getpass.getuser(), help="username for to login on data source host, [default: %default]") parser.add_option( "-l", "--lta_credentials", dest="lta_credentials", type="string", default='LTA' if isProductionEnvironment() else 'LTA_test', help= "Name of lofar credentials for lta user/pass (see ~/.lofar/dbcredentials) [default=%default]" ) parser.add_option( "-m", "--mom_credentials", dest="mom_credentials", type="string", default='MoM_site' if isProductionEnvironment() else 'MoM_site_test', help= "Name of credentials for MoM user/pass (see ~/.lofar/dbcredentials) [default=%default]" ) parser.add_option('-V', '--verbose', dest='verbose', action='store_true', help='verbose logging') (options, args) = parser.parse_args() logging.basicConfig( format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG if options.verbose else logging.INFO) logger.info('*****************************************') logger.info('Started ingest server on host %s', socket.gethostname()) logger.info('*****************************************') logger.info("environment:") for k in sorted(os.environ): logger.info("%s=%s", k, os.environ[k]) logger.info('*****************************************') ltacreds = dbcredentials.DBCredentials().get(options.lta_credentials) momcreds = dbcredentials.DBCredentials().get(options.mom_credentials) transfer_server = IngestTransferServer( exchange=options.exchange, broker=options.broker, mom_credentials=momcreds, lta_credentials=ltacreds, max_nr_of_parallel_jobs=options.max_nr_of_parallel_jobs) incoming_jobs_listener = BusListener( IngestJobsForTransferHandler, {'transfer_server': transfer_server}, exchange=options.exchange, routing_key="%s.#" % DEFAULT_INGEST_JOB_FOR_TRANSFER_SUBJECT) with incoming_jobs_listener: transfer_server.run()
class BaseMoMClient: MOM_BASE_URL = 'https://lcs023.control.lofar:8443/' if isProductionEnvironment( ) else 'http://lofartest.control.lofar:8080/' def __init__(self, mom_base_url, user=None, password=None): if user == None or password == None: # (mis)use dbcredentials to read user/pass from disk from lofar.common import dbcredentials dbc = dbcredentials.DBCredentials() creds = dbc.get( 'MoM_site' if isProductionEnvironment() else 'MoM_site_test') user = creds.user password = creds.password self.mom_base_url = mom_base_url self.__user = user self.__password = password self.session = None self.__momURLlogin = self.mom_base_url + 'useradministration/user/systemlogin.do' self.__momUR_security_check = self.mom_base_url + 'useradministration/user/j_security_check' self.__momURLlogout = self.mom_base_url + 'useradministration/user/logout.do' def login(self): try: if self.session is not None: self.logout() logger.debug("logging in to MoM on url: %s", self.__momURLlogin) session = requests.session() r = session.get(self.__momURLlogin, verify=False) if 200 != r.status_code: raise Exception( "Logging into MoM on %s failed: http return code = %s" % (self.__momURLlogin, r.status_code)) r = session.post(self.__momUR_security_check, data={ 'j_username': self.__user, 'j_password': self.__password }, verify=False) if 200 != r.status_code: raise Exception( "Logging into MoM on %s failed: http return code = %s" % (self.__momUR_security_check, r.status_code)) logger.debug("logged in on MoM on url: %s", self.__momURLlogin) self.session = session except Exception as e: raise Exception("Logging into MoM on %s failed: %s" % (self.__momURLlogin, str(e))) def logout(self): try: if self.session is not None: logger.debug("logging out of MoM on url: %s", self.__momURLlogout) self.session.get(self.__momURLlogout, verify=False) self.session.close() self.session = None logger.debug("logged out of MoM on url: %s", self.__momURLlogout) except Exception as e: logger.warning("Logging out of MoM failed: " + str(e)) def __enter__(self): self.login() return self def __exit__(self, exc_type, exc_val, exc_tb): self.logout()
def putTask(task_id): if 'Content-Type' in request.headers and \ request.headers['Content-Type'].startswith('application/json'): try: updatedTask = json_loads(request.data.decode('utf-8')) if task_id != int(updatedTask['id']): abort(404, 'task_id in url is not equal to id in request.data') #check if task is known task = radb().getTask(task_id) if not task: abort(404, "unknown task %s" % str(updatedTask)) # first handle start- endtimes... if 'starttime' in updatedTask or 'endtime' in updatedTask: logger.info('starttime or endtime in updatedTask: %s', updatedTask) if isProductionEnvironment(): abort( 403, 'Editing of %s of tasks by users is not yet approved' % (time, )) #update dict for otdb spec spec_update = {} for timeprop in ['starttime', 'endtime']: if timeprop in updatedTask: try: updatedTask[timeprop] = asDatetime( updatedTask[timeprop]) except ValueError: abort( 400, 'timestamp not in iso format: ' + updatedTask[timeprop]) otdb_key = 'LOFAR.ObsSW.Observation.' + ( 'startTime' if timeprop == 'starttime' else 'stopTime') spec_update[otdb_key] = updatedTask[timeprop].strftime( '%Y-%m-%d %H:%M:%S') #update timestamps in both otdb and radb otdbrpc.taskSetSpecification(task['otdb_id'], spec_update) # update the task's (and its claims) start/endtime # do not update the tasks status directly via the radb. See few lines below. task status is routed via otdb (and then ends up in radb automatically) # it might be that editing the start/end time results in a (rabd)task status update (for example to 'conflict' due to conflicting claims) # that's ok, since we'll update the status to the requested status later via otdb (see few lines below) radb().updateTaskAndResourceClaims( task_id, starttime=updatedTask.get('starttime'), endtime=updatedTask.get('endtime')) # ...then, handle status update which might trigger resource assignment, # for which the above updated times are needed if 'status' in updatedTask: if isProductionEnvironment( ) and task['type'] == 'observation' and updatedTask[ 'status'] == 'prescheduled': abort( 403, 'Scheduling of observations via the webscheduler by users is not (yet) allowed' ) try: #update status in otdb only #the status change will propagate automatically into radb via other services (by design) otdbrpc.taskSetStatus(task['otdb_id'], updatedTask['status']) #we expect the status in otdb/radb to eventually become what we asked for... expected_status = updatedTask['status'] #block until radb and mom task status are equal to the expected_statuses (with timeout) start_wait = datetime.utcnow() while True: task = radb().getTask(otdb_id=task['otdb_id']) otdb_status = otdbrpc.taskGetStatus(task['otdb_id']) logger.info( 'waiting for otdb/radb task status to be in [%s].... otdb:%s radb:%s', expected_status, otdb_status, task['status']) if (task['status'] == expected_status and otdb_status == expected_status): logger.info( 'otdb/radb task status now has the expected status %s otdb:%s radb:%s', expected_status, otdb_status, task['status']) break if datetime.utcnow() - start_wait > timedelta( seconds=10): logger.warning( 'timeout while waiting for otdb/radb task status to get the expected status %s otdb:%s radb:%s', expected_status, otdb_status, task['status']) break time.sleep(0.1) except RPCException as e: if 'does not exist' in str(e): # task does not exist (anymore) in otdb #so remove it from radb as well (with cascading deletes on specification) logger.warning( 'task with otdb_id %s does not exist anymore in OTDB. removing task radb_id %s from radb', task['otdb_id'], task['id']) radb().deleteSpecification(task['specification_id']) if 'data_pinned' in updatedTask: task = radb().getTask(task_id) if not task: abort(404, "unknown task %s" % str(updatedTask)) curpc.setTaskDataPinned(task['otdb_id'], updatedTask['data_pinned']) return "", 204 except Exception as e: logger.error(e) abort(404, str(e)) abort(406)