def put_pd(self, pd): for i in pd.list_data_units(): remote_path = os.path.join(self.__get_pd_path(pd.id), os.path.basename(i.local_url)) logger.debug("Put file: %s to %s"%(i.local_url, remote_path)) if i.local_url.startswith("file://") or i.local_url.startswith("/"): if stat.S_ISDIR(os.stat(i.local_url).st_mode): logger.warning("Path %s is a directory. Ignored."%i.local_url) continue self.__webhdfs.copyFromLocal(i.local_url, remote_path) else: logger.error("File URLs: %s not supported"%i.local_url)
def __init__(self, pjs_url=None): """ Create a PilotJobService object. Keyword arguments: pjs_id -- Don't create a new, but connect to an existing (optional) """ self.__mjs = None self.pilot_jobs=[] if pjs_url==None: # new pjs self.id = self.PJS_ID_PREFIX+str(uuid.uuid1()) self.url = "pilotjob://localhost/"+self.id else: logger.error("Reconnect to PJS currently not supported.")
def __init__(self, service_url): self.service_url = service_url try: result = urlparse.urlparse(service_url) self.host = result.netloc self.path = result.path except: logger.error("Error parsing URL.") self.__state=State.New self.__webhdfs= WebHDFS(self.HDFS_SERVICE_HOST, self.HDFS_SERVICE_PORT, self.HDFS_USER_NAME)
def copy_pd_to_url(self, pd, local_url, remote_url): if not remote_url.startswith("file://") and not remote_url.startswith("/"): logger.error("Only local URLs supported") return result = urlparse.urlparse(remote_url) path = result.path # create directory try: os.makedirs(path) except: logger.debug("Directory: %s already exists."%path) base_dir = self.__get_pd_path(pd.id) for filename in self.__webhdfs.listdir(base_dir): file_url = local_url + "/" + filename file_remote_url = remote_url + "/" + filename logger.debug("GET " + file_url + " to " + file_remote_url) self.__webhdfs.copyToLocal(file_url, file_remote_url)
def __create_remote_directory(self, target_url): result = urlparse.urlparse(target_url) target_host = result.netloc target_path = result.path try: client = paramiko.SSHClient() client.load_system_host_keys() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(target_host) sftp = client.open_sftp() #s = sftp.stat(os.path.dirname(target_path)) #if stat.S_ISDIR(s.st_mode): # logger.debug("Directory: " + target_path + " exists.") #else: sftp.mkdir(target_path) sftp.close() client.close() except: logger.error("Error creating directory: " + str(target_path) + " at: " + str(target_host)) self.__print_traceback()
def _scheduler_thread(self): while True and self.stop.isSet()==False: try: logging.debug("Scheduler Thread: " + str(self.__class__) + " Pilot Data") pd = self.pd_queue.get(True, 1) # check whether this is a real pd object if isinstance(pd, PilotData): ps=self._schedule_pd(pd) if(ps!=None): logging.debug("Transfer to PS finished.") pd.add_pilot_store(ps) pd.update_state(State.Running) else: self.pd_queue.put(pd) except Queue.Empty: pass try: logging.debug("Scheduler Thread: " + str(self.__class__) + " Pilot Job") wu = self.wu_queue.get(True, 1) if isinstance(wu, WorkUnit): pj=self._schedule_wu(wu) if pj !=None: wu = self.__expand_working_directory(wu, pj) pj._submit_wu(wu) else: self.wu_queue.put(pd) except Queue.Empty: pass except: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("*** print_tb:") traceback.print_tb(exc_traceback, limit=1, file=sys.stderr) logger.error("*** print_exception:") traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stderr) time.sleep(5) logging.debug("Re-Scheduler terminated")