def initialize_pilot_data(self): if self.pilot_data_description!=None: self.service_url=self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size()
def __initialize_pilot_data(self, service_url): # initialize file adaptor # Pilot Data API for File Management if service_url.startswith("ssh:"): logger.debug("Use SSH backend") try: from pilot.filemanagement.ssh_adaptor import SSHFileAdaptor self.__filemanager = SSHFileAdaptor(service_url) except: logger.debug("SSH/Paramiko package not found.") self.__print_traceback() elif service_url.startswith("http:"): logger.debug("Use WebHDFS backend") try: from pilot.filemanagement.webhdfs_adaptor import WebHDFSFileAdaptor self.__filemanager = WebHDFSFileAdaptor(service_url) except: logger.debug("WebHDFS package not found.") elif service_url.startswith("go:"): logger.debug("Use Globus Online backend") try: from pilot.filemanagement.globusonline_adaptor import GlobusOnlineFileAdaptor self.__filemanager = GlobusOnlineFileAdaptor(service_url) except: logger.debug("Globus Online package not found.") self.__print_traceback()
def __initialize_pilot_data(self): if self.pilot_data_description != None: self.service_url = self.pilot_data_description["service_url"] if self.pilot_data_description.has_key("size"): self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor( self.service_url, self.security_context, self.pilot_data_description) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) elif self.service_url.startswith("go:"): logger.debug("Use Globus Online backend") self.__filemanager = GlobusOnlineFileAdaptor(self.service_url) elif self.service_url.startswith("gs:"): logger.debug("Use Google Cloud Storage backend") self.__filemanager = GSFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("irods:"): logger.debug("Use iRods Storage backend") self.__filemanager = iRodsFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("s3:") \ or self.service_url.startswith("walrus:") \ or self.service_url.startswith("swift:"): logger.debug( "Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend") self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description) else: raise PilotError("No File Plugin found.") self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() # Update security context self.security_context = self.__filemanager.get_security_context()
class PilotData(PilotData): """ PilotData. Reserves a space of physical storage on the resource specified in the pilot_data_description """ PD_ID_PREFIX="pd-" def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None): """ Initialize PilotData at given service url: ssh://<hostname> gsissh://<hostname> Currently only ssh schemes are supported. In the future all SAGA URL schemes/adaptors should be supported. """ self.id = None self.url = None self.pilot_data_description = None self.service_url=None self.size = None self.data_unit_description = None self.data_units={} if pd_url==None and pilot_data_service!=None: # new pd self.id = self.PD_ID_PREFIX+str(uuid.uuid1()) self.pilot_data_description = pilot_data_description self.url = CoordinationAdaptor.add_pd(CoordinationAdaptor.get_base_url(application_id)+"/"+pilot_data_service.id, self) elif pd_url != None: logger.warn("Reconnect to PilotData: %s"%pd_url) dictionary = CoordinationAdaptor.get_pd(pd_url) pd_dict = dictionary["pilot_data"] for i in pd_dict: self.__setattr__(i, pd_dict[i]) self.initialize_pilot_data() def initialize_pilot_data(self): if self.pilot_data_description!=None: self.service_url=self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() def __get_pd_id(self, pd_url): start = pd_url.index(self.PD_ID_PREFIX) end =pd_url.index("/", start) return pd_url[start:end] def cancel(self): """ Cancel PilotData Keyword arguments: None """ #self.__filemanager.delete_pilotdata() pass def url_for_du(self, du): if self.data_units.has_key(du.id): return self.service_url + "/" + str(du.id) return None def create_du(self, du): self.__filemanager.create_du(du.id) def put_du(self, du): logging.debug("Put PD: %s to PS: %s"%(du.id,self.service_url)) self.__filemanager.create_du(du.id) self.__filemanager.put_du(du) self.data_units[du.id] = du CoordinationAdaptor.update_pd(self) def remove_du(self, du): """ Remove pilot data from pilot data """ if self.data_units.has_key(du.id): self.__filemanager.remove_du(du) del self.data_units[du.id] CoordinationAdaptor.update_pd(self) def copy_du(self, du, pd_new): pd_new.create_du(du) self.__filemanager.copy_du(du, pd_new) # update meta data at pd_new pd_new.data_units[du.id] = du CoordinationAdaptor.update_pd(pd_new) def list_data_units(self): return self.data_units.values() #return self.data_units.values() def get_state(self): return self.__filemanager.get_state() def wait(self): while 1: finish_counter=0 result_map = {} dus = self.data_units.values() for du in dus: du.wait() state = du.get_state() #state = job_detail["state"] if result_map.has_key(state)==False: result_map[state]=1 else: result_map[state] = result_map[state]+1 if self.__has_finished(state)==True: finish_counter = finish_counter + 1 logger.debug("PD ID: " + str(self.id) + " Total DUs: %s States: %s"%(len(dus), str(result_map))) if finish_counter == len(dus): break time.sleep(2) def export_du(self, du, target_url): self.__filemanager.get_du(du, target_url) def to_dict(self): pd_dict = {} pd_dict["id"]=self.id pd_dict["url"]=self.url pd_dict["pilot_data_description"]=self.pilot_data_description logger.debug("PS Dictionary: " + str(pd_dict)) return pd_dict def __repr__(self): return self.service_url def __has_finished(self, state): state = state.lower() if state=="running" or state=="failed" or state=="canceled": return True else: return False @classmethod def create_pilot_data_from_dict(cls, pd_dict): pd = PilotData() for i in pd_dict.keys(): pd.__setattr__(i, pd_dict[i]) pd.initialize_pilot_data() logger.debug("created pd " + str(pd)) return pd
class PilotData(PilotData): """ PilotData. Reserves a space of physical storage on the resource specified in the pilot_data_description """ PS_ID_PREFIX="ps-" def __init__(self, pilot_data_service=None, pilot_data_description=None, ps_url=None): """ Initialize PilotData at given service url: ssh://<hostname> gsissh://<hostname> Currently only ssh schemes are supported. In the future all SAGA URL schemes/adaptors should be supported. """ self.id = None self.url = None self.pilot_data_description = None self.service_url=None self.size = None self.data_unit_description = None self.data_units={} if ps_url==None and pilot_data_service!=None: # new ps self.id = self.PS_ID_PREFIX+str(uuid.uuid1()) self.pilot_data_description = pilot_data_description self.url = CoordinationAdaptor.add_ps(CoordinationAdaptor.get_base_url(bigdata.application_id)+"/"+pilot_data_service.id, self) elif ps_url != None: logger.warn("Reconnect to PilotData: %s"%ps_url) dictionary = CoordinationAdaptor.get_ps(ps_url) ps_dict = dictionary["pilot_data"] for i in ps_dict: self.__setattr__(i, ps_dict[i]) self.initialize_pilot_data() def initialize_pilot_data(self): if self.pilot_data_description!=None: self.service_url=self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() def __get_ps_id(self, ps_url): start = ps_url.index(self.PS_ID_PREFIX) end =ps_url.index("/", start) return ps_url[start:end] def cancel(self): """ Cancel PilotData Keyword arguments: None """ self.__filemanager.delete_pilotdata() def url_for_du(self, du): if self.pilot_data.has_key(du.id): return self.service_url + "/" + str(du.id) return None def create_du(self, du): self.__filemanager.create_du(du.id) def put_du(self, du): logging.debug("Put PD: %s to PS: %s"%(du.id,self.service_url)) self.__filemanager.create_du(du.id) self.__filemanager.put_du(du) self.pilot_data[du.id] = du CoordinationAdaptor.update_ps(self) def remove_du(self, du): """ Remove pilot data from pilot data """ if self.pilot_data.has_key(du.id): self.__filemanager.remove_du(du) del self.pilot_data[du.id] CoordinationAdaptor.update_ps(self) def copy_du(self, du, ps_new): ps_new.create_du(du) self.__filemanager.copy_du(du, ps_new) # update meta data at ps_new ps_new.pilot_data[du.id] = du CoordinationAdaptor.update_ps(ps_new) def list_pilotdata(self): return self.pilot_data.values() def get_state(self): return self.__filemanager.get_state() def export_du(self, du, target_url): self.__filemanager.get_du(du, target_url) def to_dict(self): ps_dict = {} ps_dict["id"]=self.id ps_dict["url"]=self.url ps_dict["pilot_data_description"]=self.pilot_data_description logger.debug("PS Dictionary: " + str(ps_dict)) return ps_dict def __repr__(self): return self.service_url @classmethod def create_pilot_data_from_dict(cls, ps_dict): ps = PilotData() for i in ps_dict.keys(): ps.__setattr__(i, ps_dict[i]) ps.initialize_pilot_data() logger.debug("created ps " + str(ps)) return ps