def __initialize_pilot_data(self): if self.pilot_data_description != None: self.service_url = self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) elif self.service_url.startswith("go:"): logger.debug("Use Globus Online backend") self.__filemanager = GSFileAdaptor(self.service_url) elif self.service_url.startswith("gs:"): logger.debug("Use Google Cloud Storage backend") self.__filemanager = GSFileAdaptor(self.service_url, self.security_context) elif ( self.service_url.startswith("s3:") or self.service_url.startswith("walrus:") or self.service_url.startswith("swift:") ): logger.debug("Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend") self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description) else: raise PilotError("No File Plugin found.") self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() # Update security context self.security_context = self.__filemanager.get_security_context()
def __initialize_pilot_data(self): if self.pilot_data_description != None: self.service_url = self.pilot_data_description["service_url"] if self.pilot_data_description.has_key("size"): self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor( self.service_url, self.security_context, self.pilot_data_description) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) elif self.service_url.startswith("go:"): logger.debug("Use Globus Online backend") self.__filemanager = GlobusOnlineFileAdaptor(self.service_url) elif self.service_url.startswith("gs:"): logger.debug("Use Google Cloud Storage backend") self.__filemanager = GSFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("irods:"): logger.debug("Use iRods Storage backend") self.__filemanager = iRodsFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("s3:") \ or self.service_url.startswith("walrus:") \ or self.service_url.startswith("swift:"): logger.debug( "Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend") self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description) else: raise PilotError("No File Plugin found.") self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() # Update security context self.security_context = self.__filemanager.get_security_context()
class PilotData(PilotData): """ B{PilotData (PD).} This is the object that is returned by the PilotDataService when a new PilotData is created based on a PilotDataDescription. A PilotData represents a finite amount of physical space on a certain resource. It can be populated with L{DataUnit}s. The PilotData object can be used by the application to keep track of a pilot. A PilotData has state, can be queried, can be cancelled. """ PD_ID_PREFIX="pd-" def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None): """ Initialize PilotData at given service url:: ssh://<hostname> gsissh://<hostname> go://<hostname> gs://google.com s3://aws.amazon.com In the future more SAGA/Bliss URL schemes/adaptors are supported. """ self.id = None self.url = pd_url self.pilot_data_description = None self.pilot_data_service = pilot_data_service self.service_url=None self.size = None self.data_unit_urls = [] self.security_context = None if pd_url==None and pilot_data_service!=None: # new pd self.id = self.PD_ID_PREFIX+str(uuid.uuid1()) self.pilot_data_description = pilot_data_description self.url = CoordinationAdaptor.add_pd(CoordinationAdaptor.get_base_url(application_id)+":"+pilot_data_service.id, self) elif pd_url != None: logger.warn("Reconnect to PilotData: %s"%pd_url) dictionary = CoordinationAdaptor.get_pd(pd_url) if dictionary.has_key("security_context"): self.security_context=dictionary["security_context"] pd_dict = eval(dictionary["pilot_data"]) for i in pd_dict: self.__setattr__(i, pd_dict[i]) # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored) self.data_unit_urls = eval(dictionary["data_unit_urls"]) self.__initialize_pilot_data() CoordinationAdaptor.update_pd(self) def cancel(self): """ Cancel PilotData """ #self.__filemanager.delete_pilotdata() pass def get_url(self): """ Get URL of PilotData. Used for reconnecting to PilotData """ return self.url def url_for_du(self, du): """ Get full URL to DataUnit within PilotData """ return self.service_url + "/" + str(du.id) def submit_data_unit(self, data_unit_description=None, data_unit=None): """ creates a data unit object and initially imports data specified in data_unit_description """ if data_unit!=None: du = data_unit else: du = DataUnit(pilot_data=self, data_unit_description=data_unit_description) self.data_unit_urls.append(du.get_url()) du.add_pilot_data(self) return du def list_data_units(self): """ List all data units of Pilot Data """ return self.data_unit_urls def get_state(self): """ Return current state of Pilot Data """ return self.__filemanager.get_state() def get_du(self, du_url): """ Returns Data Unit if part of Pilot Data """ if self.data_unit_urls.count(du_url)>0: du = DataUnit(du_url=du_url) return du return None def wait(self): """ Wait until PD enters a final state (Done, Canceled or Failed).""" while 1: finish_counter=0 result_map = {} for du_url in self.data_units_urls: du = DataUnit(du_url=du_url) du.wait() state = du.get_state() #state = job_detail["state"] if result_map.has_key(state)==False: result_map[state]=1 else: result_map[state] = result_map[state]+1 if self.__has_finished(state)==True: finish_counter = finish_counter + 1 logger.debug("PD ID: " + str(self.id) + " Total DUs: %s States: %s"%(len(self.data_units_urls), str(result_map))) if finish_counter == len(self.data_units_urls): break time.sleep(2) def export_du(self, du, target_url): """ Export Data Unit to a local directory """ if target_url.startswith("/") and os.path.exists(target_url)==False: os.mkdir(target_url) logger.debug("Export Data-Unit to %s"%target_url) self.__filemanager.get_du(du, target_url) def put_du(self, du): """Copy Data Unit to Pilot Data""" logger.debug("Put DU: %s to Pilot-Data: %s"%(du.id,self.service_url)) self.__filemanager.create_du(du.id) self.__filemanager.put_du(du) self.data_unit_urls.append(du.get_url()) CoordinationAdaptor.update_pd(self) def remove_du(self, du): """ Remove Data Unit from Pilot Data """ if self.data_unit_urls.count(du.get_url())>0: self.__filemanager.remove_du(du) self.data_unit_urls.remove(du.get_url()) CoordinationAdaptor.update_pd(self) def copy_du(self, du, pd_new): """ Copy DataUnit to another Pilot Data """ pd_new.create_du(du) self.__filemanager.copy_du(du, pd_new) # update meta data at pd_new #pd_new.data_units[du.id] = du pd_new.data_unit_urls.append(du.get_url()) CoordinationAdaptor.update_pd(pd_new) # END API methods ########################################################################### # Auxillary Methods def create_du(self, du): """ Create a new Data Unit within Pilot """ self.__filemanager.create_du(du.id) def __initialize_pilot_data(self): if self.pilot_data_description!=None: self.service_url=self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url, self.security_context, self.pilot_data_description) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) elif self.service_url.startswith("go:"): logger.debug("Use Globus Online backend") self.__filemanager = GlobusOnlineFileAdaptor(self.service_url) elif self.service_url.startswith("gs:"): logger.debug("Use Google Cloud Storage backend") self.__filemanager = GSFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("irods:"): logger.debug("Use iRods Storage backend") self.__filemanager = iRodsFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("s3:") \ or self.service_url.startswith("walrus:") \ or self.service_url.startswith("swift:"): logger.debug("Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend") self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description) else: raise PilotError("No File Plugin found.") self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() # Update security context self.security_context = self.__filemanager.get_security_context() def __get_pd_id(self, pd_url): start = pd_url.index(self.PD_ID_PREFIX) end =pd_url.index("/", start) return pd_url[start:end] def to_dict(self): """ Internal method that returns a dict with all data contained in this Pilot Data""" pd_dict = {} pd_dict["id"]=self.id pd_dict["url"]=self.url pd_dict["pilot_data_description"]=self.pilot_data_description logger.debug("PilotData Dictionary: " + str(pd_dict)) return pd_dict def __repr__(self): """Returns Pilot Data URL""" return self.service_url def __has_finished(self, state): state = state.lower() if state=="running" or state=="failed" or state=="canceled": return True else: return False @classmethod def create_pilot_data_from_dict(cls, pd_dict): """Restore Pilot Data from dictionary""" pd = PilotData() for i in pd_dict.keys(): pd.__setattr__(i, pd_dict[i]) pd.__initialize_pilot_data() logger.debug("created pd " + str(pd)) return pd
class PilotData(PilotData): """ B{PilotData (PD).} This is the object that is returned by the PilotDataService when a new PilotData is created based on a PilotDataDescription. A PilotData represents a finite amount of physical space on a certain resource. It can be populated with L{DataUnit}s. The PilotData object can be used by the application to keep track of a pilot. A PilotData has state, can be queried, can be cancelled. """ PD_ID_PREFIX = "pd-" def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None): """ Initialize PilotData at given service url:: ssh://<hostname> gsissh://<hostname> go://<hostname> gs://google.com s3://aws.amazon.com In the future more SAGA/Bliss URL schemes/adaptors are supported. """ self.id = None self.url = pd_url self.pilot_data_description = None self.pilot_data_service = pilot_data_service self.service_url = None self.size = None self.data_unit_urls = [] self.security_context = None if pd_url == None and pilot_data_service != None: # new pd self.id = self.PD_ID_PREFIX + str(uuid.uuid1()) self.pilot_data_description = pilot_data_description self.url = CoordinationAdaptor.add_pd( CoordinationAdaptor.get_base_url(application_id) + ":" + pilot_data_service.id, self) elif pd_url != None: logger.warn("Reconnect to PilotData: %s" % pd_url) dictionary = CoordinationAdaptor.get_pd(pd_url) if dictionary.has_key("security_context"): self.security_context = dictionary["security_context"] pd_dict = eval(dictionary["pilot_data"]) for i in pd_dict: self.__setattr__(i, pd_dict[i]) # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored) self.data_unit_urls = eval(dictionary["data_unit_urls"]) self.__initialize_pilot_data() CoordinationAdaptor.update_pd(self) def cancel(self): """ Cancel PilotData """ #self.__filemanager.delete_pilotdata() pass def get_url(self): """ Get URL of PilotData. Used for reconnecting to PilotData """ return self.url def url_for_du(self, du): """ Get full URL to DataUnit within PilotData """ return self.service_url + "/" + str(du.id) def submit_data_unit(self, data_unit_description): """ creates a data unit object and initially imports data specified in data_unit_description """ du = DataUnit(pilot_data=self, data_unit_description=data_unit_description) self.data_unit_urls.append(du.get_url()) du.add_pilot_data(self) return du def list_data_units(self): """ List all data units of Pilot Data """ return self.data_unit_urls def get_state(self): """ Return current state of Pilot Data """ return self.__filemanager.get_state() def get_du(self, du_url): """ Returns Data Unit if part of Pilot Data """ if self.data_unit_urls.count(du_url) > 0: du = DataUnit(du_url=du_url) return du return None def wait(self): """ Wait until PD enters a final state (Done, Canceled or Failed).""" while 1: finish_counter = 0 result_map = {} for du_url in self.data_units_urls: du = DataUnit(du_url=du_url) du.wait() state = du.get_state() #state = job_detail["state"] if result_map.has_key(state) == False: result_map[state] = 1 else: result_map[state] = result_map[state] + 1 if self.__has_finished(state) == True: finish_counter = finish_counter + 1 logger.debug("PD ID: " + str(self.id) + " Total DUs: %s States: %s" % (len(self.data_units_urls), str(result_map))) if finish_counter == len(self.data_units_urls): break time.sleep(2) def export_du(self, du, target_url): """ Export Data Unit to a local directory """ if target_url.startswith("/") and os.path.exists(target_url) == False: os.mkdir(target_url) self.__filemanager.get_du(du, target_url) def put_du(self, du): """Copy Data Unit to Pilot Data""" logger.debug("Put DU: %s to Pilot-Data: %s" % (du.id, self.service_url)) self.__filemanager.create_du(du.id) self.__filemanager.put_du(du) self.data_unit_urls.append(du.get_url()) CoordinationAdaptor.update_pd(self) def remove_du(self, du): """ Remove Data Unit from Pilot Data """ if self.data_unit_urls.count(du.get_url()) > 0: self.__filemanager.remove_du(du) self.data_unit_urls.remove(du.get_url()) CoordinationAdaptor.update_pd(self) def copy_du(self, du, pd_new): """ Copy DataUnit to another Pilot Data """ pd_new.create_du(du) self.__filemanager.copy_du(du, pd_new) # update meta data at pd_new #pd_new.data_units[du.id] = du pd_new.data_unit_urls.append(du.get_url()) CoordinationAdaptor.update_pd(pd_new) # END API methods ########################################################################### # Auxillary Methods def create_du(self, du): """ Create a new Data Unit within Pilot """ self.__filemanager.create_du(du.id) def __initialize_pilot_data(self): if self.pilot_data_description != None: self.service_url = self.pilot_data_description["service_url"] self.size = self.pilot_data_description["size"] # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) elif self.service_url.startswith("go:"): logger.debug("Use Globus Online backend") self.__filemanager = GSFileAdaptor(self.service_url) elif self.service_url.startswith("gs:"): logger.debug("Use Google Cloud Storage backend") self.__filemanager = GSFileAdaptor(self.service_url, self.security_context) elif self.service_url.startswith("s3:") \ or self.service_url.startswith("walrus:") \ or self.service_url.startswith("swift:"): logger.debug( "Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend") self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description) else: raise PilotError("No File Plugin found.") self.__filemanager.initialize_pilotdata() self.__filemanager.get_pilotdata_size() # Update security context self.security_context = self.__filemanager.get_security_context() def __get_pd_id(self, pd_url): start = pd_url.index(self.PD_ID_PREFIX) end = pd_url.index("/", start) return pd_url[start:end] def to_dict(self): """ Internal method that returns a dict with all data contained in this Pilot Data""" pd_dict = {} pd_dict["id"] = self.id pd_dict["url"] = self.url pd_dict["pilot_data_description"] = self.pilot_data_description logger.debug("PilotData Dictionary: " + str(pd_dict)) return pd_dict def __repr__(self): """Returns Pilot Data URL""" return self.service_url def __has_finished(self, state): state = state.lower() if state == "running" or state == "failed" or state == "canceled": return True else: return False @classmethod def create_pilot_data_from_dict(cls, pd_dict): """Restore Pilot Data from dictionary""" pd = PilotData() for i in pd_dict.keys(): pd.__setattr__(i, pd_dict[i]) pd.__initialize_pilot_data() logger.debug("created pd " + str(pd)) return pd