Beispiel #1
0
    def __initialize_pilot_data(self):

        if self.pilot_data_description != None:
            self.service_url = self.pilot_data_description["service_url"]
            self.size = self.pilot_data_description["size"]

            # initialize file adaptor
            if self.service_url.startswith("ssh:"):
                logger.debug("Use SSH backend")
                self.__filemanager = SSHFileAdaptor(self.service_url)
            elif self.service_url.startswith("http:"):
                logger.debug("Use WebHDFS backend")
                self.__filemanager = WebHDFSFileAdaptor(self.service_url)
            elif self.service_url.startswith("go:"):
                logger.debug("Use Globus Online backend")
                self.__filemanager = GSFileAdaptor(self.service_url)
            elif self.service_url.startswith("gs:"):
                logger.debug("Use Google Cloud Storage backend")
                self.__filemanager = GSFileAdaptor(self.service_url, self.security_context)
            elif (
                self.service_url.startswith("s3:")
                or self.service_url.startswith("walrus:")
                or self.service_url.startswith("swift:")
            ):
                logger.debug("Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend")
                self.__filemanager = S3FileAdaptor(self.service_url, self.security_context, self.pilot_data_description)
            else:
                raise PilotError("No File Plugin found.")

            self.__filemanager.initialize_pilotdata()
            self.__filemanager.get_pilotdata_size()

            # Update security context
            self.security_context = self.__filemanager.get_security_context()
    def __initialize_pilot_data(self):

        if self.pilot_data_description != None:
            self.service_url = self.pilot_data_description["service_url"]
            if self.pilot_data_description.has_key("size"):
                self.size = self.pilot_data_description["size"]

            # initialize file adaptor
            if self.service_url.startswith("ssh:"):
                logger.debug("Use SSH backend")
                self.__filemanager = SSHFileAdaptor(
                    self.service_url, self.security_context,
                    self.pilot_data_description)
            elif self.service_url.startswith("http:"):
                logger.debug("Use WebHDFS backend")
                self.__filemanager = WebHDFSFileAdaptor(self.service_url)
            elif self.service_url.startswith("go:"):
                logger.debug("Use Globus Online backend")
                self.__filemanager = GlobusOnlineFileAdaptor(self.service_url)
            elif self.service_url.startswith("gs:"):
                logger.debug("Use Google Cloud Storage backend")
                self.__filemanager = GSFileAdaptor(self.service_url,
                                                   self.security_context)
            elif self.service_url.startswith("irods:"):
                logger.debug("Use iRods Storage backend")
                self.__filemanager = iRodsFileAdaptor(self.service_url,
                                                      self.security_context)
            elif self.service_url.startswith("s3:") \
                or self.service_url.startswith("walrus:") \
                or self.service_url.startswith("swift:"):
                logger.debug(
                    "Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend")
                self.__filemanager = S3FileAdaptor(self.service_url,
                                                   self.security_context,
                                                   self.pilot_data_description)
            else:
                raise PilotError("No File Plugin found.")

            self.__filemanager.initialize_pilotdata()
            self.__filemanager.get_pilotdata_size()

            # Update security context
            self.security_context = self.__filemanager.get_security_context()
Beispiel #3
0
class PilotData(PilotData):
    """ B{PilotData (PD).} 
                
        This is the object that is returned by the PilotDataService when a 
        new PilotData is created based on a PilotDataDescription. A PilotData represents
        a finite amount of physical space on a certain resource. It can be populated
        with L{DataUnit}s.

        The PilotData object can be used by the application to keep track 
        of a pilot. A PilotData has state, can be queried, can be cancelled.
        
    """   
    
    PD_ID_PREFIX="pd-"   

        
    def __init__(self, pilot_data_service=None, pilot_data_description=None, pd_url=None):    
        """ 
            Initialize PilotData at given service url::
            
                ssh://<hostname>
                gsissh://<hostname>
                go://<hostname>
                gs://google.com
                s3://aws.amazon.com
            
            In the future more SAGA/Bliss URL schemes/adaptors are supported.        
        """ 
        self.id = None
        self.url = pd_url
        self.pilot_data_description = None
        self.pilot_data_service = pilot_data_service
        self.service_url=None
        self.size = None
        self.data_unit_urls = []
        self.security_context = None
        
        if pd_url==None and pilot_data_service!=None:      # new pd          
            self.id = self.PD_ID_PREFIX+str(uuid.uuid1())
            self.pilot_data_description = pilot_data_description
            self.url = CoordinationAdaptor.add_pd(CoordinationAdaptor.get_base_url(application_id)+":"+pilot_data_service.id, self)
        elif pd_url != None:
            logger.warn("Reconnect to PilotData: %s"%pd_url)
            dictionary = CoordinationAdaptor.get_pd(pd_url)
            if dictionary.has_key("security_context"):
                self.security_context=dictionary["security_context"]
            pd_dict = eval(dictionary["pilot_data"])
            for i in pd_dict:
                self.__setattr__(i, pd_dict[i])
            # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored)
            self.data_unit_urls = eval(dictionary["data_unit_urls"])
                        
        self.__initialize_pilot_data()
        CoordinationAdaptor.update_pd(self)
    

    def cancel(self):        
        """ Cancel PilotData  """
        #self.__filemanager.delete_pilotdata()
        pass
    
     
    def get_url(self):
        """ Get URL of PilotData. Used for reconnecting to PilotData """
        return self.url
       
    
    def url_for_du(self, du):
        """ Get full URL to DataUnit within PilotData """
        return self.service_url + "/" + str(du.id)
        

    def submit_data_unit(self, data_unit_description=None, data_unit=None):
        """ creates a data unit object and initially imports data specified in data_unit_description """
        if data_unit!=None:
            du = data_unit
        else:
            du = DataUnit(pilot_data=self, 
                      data_unit_description=data_unit_description)
        self.data_unit_urls.append(du.get_url())
        du.add_pilot_data(self)
        return du
   
    
    def list_data_units(self):
        """ List all data units of Pilot Data """
        return self.data_unit_urls          
    
    
    def get_state(self):
        """ Return current state of Pilot Data """
        return self.__filemanager.get_state()
    
    
    def get_du(self, du_url):
        """ Returns Data Unit if part of Pilot Data """
        if self.data_unit_urls.count(du_url)>0:
            du = DataUnit(du_url=du_url)
            return du        
        return None
    
    
    def wait(self):
        """ Wait until PD enters a final state (Done, Canceled or Failed).""" 
        while 1:
            finish_counter=0
            result_map = {}
            for du_url in self.data_units_urls: 
                du = DataUnit(du_url=du_url)
                du.wait()
                state = du.get_state()           
                #state = job_detail["state"]                
                if result_map.has_key(state)==False:
                    result_map[state]=1
                else:
                    result_map[state] = result_map[state]+1
                if self.__has_finished(state)==True:
                    finish_counter = finish_counter + 1                   
            logger.debug("PD ID: " + str(self.id) + " Total DUs: %s States: %s"%(len(self.data_units_urls), str(result_map)))
            if finish_counter == len(self.data_units_urls):
                break
            time.sleep(2)

    
    def export_du(self, du, target_url):
        """ Export Data Unit to a local directory """
        if target_url.startswith("/") and os.path.exists(target_url)==False:
            os.mkdir(target_url)
        logger.debug("Export Data-Unit to %s"%target_url)
        self.__filemanager.get_du(du, target_url)
            
                
    def put_du(self, du):
        """Copy Data Unit to Pilot Data"""
        logger.debug("Put DU: %s to Pilot-Data: %s"%(du.id,self.service_url))
        self.__filemanager.create_du(du.id)
        self.__filemanager.put_du(du)
        self.data_unit_urls.append(du.get_url())
        CoordinationAdaptor.update_pd(self)
        
        
    def remove_du(self, du):
        """ Remove Data Unit from Pilot Data """
        if self.data_unit_urls.count(du.get_url())>0:
            self.__filemanager.remove_du(du)
            self.data_unit_urls.remove(du.get_url())
        CoordinationAdaptor.update_pd(self)
        
    
    def copy_du(self, du, pd_new):
        """ Copy DataUnit to another Pilot Data """
        pd_new.create_du(du)
        self.__filemanager.copy_du(du, pd_new)
        
        # update meta data at pd_new
        #pd_new.data_units[du.id] = du
        pd_new.data_unit_urls.append(du.get_url())
        CoordinationAdaptor.update_pd(pd_new)
        
    
    # END API methods
    ###########################################################################
    # Auxillary Methods

    def create_du(self, du):
        """ Create a new Data Unit within Pilot """
        self.__filemanager.create_du(du.id)
  
  
    def __initialize_pilot_data(self):
        
        if self.pilot_data_description!=None:
            self.service_url=self.pilot_data_description["service_url"]
            self.size = self.pilot_data_description["size"]
            
            # initialize file adaptor
            if self.service_url.startswith("ssh:"):
                logger.debug("Use SSH backend")
                self.__filemanager = SSHFileAdaptor(self.service_url,
                                                    self.security_context, 
                                                    self.pilot_data_description)
            elif self.service_url.startswith("http:"):
                logger.debug("Use WebHDFS backend")
                self.__filemanager = WebHDFSFileAdaptor(self.service_url)
            elif self.service_url.startswith("go:"):
                logger.debug("Use Globus Online backend")
                self.__filemanager = GlobusOnlineFileAdaptor(self.service_url)
            elif self.service_url.startswith("gs:"):
                logger.debug("Use Google Cloud Storage backend")
                self.__filemanager = GSFileAdaptor(self.service_url, self.security_context)
            elif self.service_url.startswith("irods:"):
                logger.debug("Use iRods Storage backend")
                self.__filemanager = iRodsFileAdaptor(self.service_url, self.security_context)
            elif self.service_url.startswith("s3:") \
                or self.service_url.startswith("walrus:") \
                or self.service_url.startswith("swift:"):
                logger.debug("Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend")
                self.__filemanager = S3FileAdaptor(self.service_url, 
                                                   self.security_context, 
                                                   self.pilot_data_description)
            else:
                raise PilotError("No File Plugin found.")
            
            self.__filemanager.initialize_pilotdata()
            self.__filemanager.get_pilotdata_size()
            
            # Update security context
            self.security_context = self.__filemanager.get_security_context()
            

    def __get_pd_id(self, pd_url):
        start = pd_url.index(self.PD_ID_PREFIX)
        end =pd_url.index("/", start)
        return pd_url[start:end]


    
    def to_dict(self):
        """ Internal method that returns a dict with all data contained in this Pilot Data"""
        pd_dict = {}
        pd_dict["id"]=self.id
        pd_dict["url"]=self.url
        pd_dict["pilot_data_description"]=self.pilot_data_description
        logger.debug("PilotData Dictionary: " + str(pd_dict))
        return pd_dict
    
    
    def __repr__(self):
        """Returns Pilot Data URL"""
        return self.service_url
    
    
    def __has_finished(self, state):
        state = state.lower()
        if state=="running" or state=="failed" or state=="canceled":
            return True
        else:
            return False
    
    @classmethod
    def create_pilot_data_from_dict(cls, pd_dict):
        """Restore Pilot Data from dictionary"""
        pd = PilotData()
        for i in pd_dict.keys():
            pd.__setattr__(i, pd_dict[i])
        pd.__initialize_pilot_data()
        logger.debug("created pd " + str(pd))
        return pd
Beispiel #4
0
class PilotData(PilotData):
    """ B{PilotData (PD).} 
                
        This is the object that is returned by the PilotDataService when a 
        new PilotData is created based on a PilotDataDescription. A PilotData represents
        a finite amount of physical space on a certain resource. It can be populated
        with L{DataUnit}s.

        The PilotData object can be used by the application to keep track 
        of a pilot. A PilotData has state, can be queried, can be cancelled.
        
    """

    PD_ID_PREFIX = "pd-"

    def __init__(self,
                 pilot_data_service=None,
                 pilot_data_description=None,
                 pd_url=None):
        """ 
            Initialize PilotData at given service url::
            
                ssh://<hostname>
                gsissh://<hostname>
                go://<hostname>
                gs://google.com
                s3://aws.amazon.com
            
            In the future more SAGA/Bliss URL schemes/adaptors are supported.        
        """
        self.id = None
        self.url = pd_url
        self.pilot_data_description = None
        self.pilot_data_service = pilot_data_service
        self.service_url = None
        self.size = None
        self.data_unit_urls = []
        self.security_context = None

        if pd_url == None and pilot_data_service != None:  # new pd
            self.id = self.PD_ID_PREFIX + str(uuid.uuid1())
            self.pilot_data_description = pilot_data_description
            self.url = CoordinationAdaptor.add_pd(
                CoordinationAdaptor.get_base_url(application_id) + ":" +
                pilot_data_service.id, self)
        elif pd_url != None:
            logger.warn("Reconnect to PilotData: %s" % pd_url)
            dictionary = CoordinationAdaptor.get_pd(pd_url)
            if dictionary.has_key("security_context"):
                self.security_context = dictionary["security_context"]
            pd_dict = eval(dictionary["pilot_data"])
            for i in pd_dict:
                self.__setattr__(i, pd_dict[i])
            # A Pilot Data does not hold a direct reference to a Data Unit (only URL refs are stored)
            self.data_unit_urls = eval(dictionary["data_unit_urls"])

        self.__initialize_pilot_data()
        CoordinationAdaptor.update_pd(self)

    def cancel(self):
        """ Cancel PilotData  """
        #self.__filemanager.delete_pilotdata()
        pass

    def get_url(self):
        """ Get URL of PilotData. Used for reconnecting to PilotData """
        return self.url

    def url_for_du(self, du):
        """ Get full URL to DataUnit within PilotData """
        return self.service_url + "/" + str(du.id)

    def submit_data_unit(self, data_unit_description):
        """ creates a data unit object and initially imports data specified in data_unit_description """
        du = DataUnit(pilot_data=self,
                      data_unit_description=data_unit_description)
        self.data_unit_urls.append(du.get_url())
        du.add_pilot_data(self)
        return du

    def list_data_units(self):
        """ List all data units of Pilot Data """
        return self.data_unit_urls

    def get_state(self):
        """ Return current state of Pilot Data """
        return self.__filemanager.get_state()

    def get_du(self, du_url):
        """ Returns Data Unit if part of Pilot Data """
        if self.data_unit_urls.count(du_url) > 0:
            du = DataUnit(du_url=du_url)
            return du
        return None

    def wait(self):
        """ Wait until PD enters a final state (Done, Canceled or Failed)."""
        while 1:
            finish_counter = 0
            result_map = {}
            for du_url in self.data_units_urls:
                du = DataUnit(du_url=du_url)
                du.wait()
                state = du.get_state()
                #state = job_detail["state"]
                if result_map.has_key(state) == False:
                    result_map[state] = 1
                else:
                    result_map[state] = result_map[state] + 1
                if self.__has_finished(state) == True:
                    finish_counter = finish_counter + 1
            logger.debug("PD ID: " + str(self.id) +
                         " Total DUs: %s States: %s" %
                         (len(self.data_units_urls), str(result_map)))
            if finish_counter == len(self.data_units_urls):
                break
            time.sleep(2)

    def export_du(self, du, target_url):
        """ Export Data Unit to a local directory """
        if target_url.startswith("/") and os.path.exists(target_url) == False:
            os.mkdir(target_url)
        self.__filemanager.get_du(du, target_url)

    def put_du(self, du):
        """Copy Data Unit to Pilot Data"""
        logger.debug("Put DU: %s to Pilot-Data: %s" %
                     (du.id, self.service_url))
        self.__filemanager.create_du(du.id)
        self.__filemanager.put_du(du)
        self.data_unit_urls.append(du.get_url())
        CoordinationAdaptor.update_pd(self)

    def remove_du(self, du):
        """ Remove Data Unit from Pilot Data """
        if self.data_unit_urls.count(du.get_url()) > 0:
            self.__filemanager.remove_du(du)
            self.data_unit_urls.remove(du.get_url())
        CoordinationAdaptor.update_pd(self)

    def copy_du(self, du, pd_new):
        """ Copy DataUnit to another Pilot Data """
        pd_new.create_du(du)
        self.__filemanager.copy_du(du, pd_new)

        # update meta data at pd_new
        #pd_new.data_units[du.id] = du
        pd_new.data_unit_urls.append(du.get_url())
        CoordinationAdaptor.update_pd(pd_new)

    # END API methods
    ###########################################################################
    # Auxillary Methods

    def create_du(self, du):
        """ Create a new Data Unit within Pilot """
        self.__filemanager.create_du(du.id)

    def __initialize_pilot_data(self):

        if self.pilot_data_description != None:
            self.service_url = self.pilot_data_description["service_url"]
            self.size = self.pilot_data_description["size"]

            # initialize file adaptor
            if self.service_url.startswith("ssh:"):
                logger.debug("Use SSH backend")
                self.__filemanager = SSHFileAdaptor(self.service_url)
            elif self.service_url.startswith("http:"):
                logger.debug("Use WebHDFS backend")
                self.__filemanager = WebHDFSFileAdaptor(self.service_url)
            elif self.service_url.startswith("go:"):
                logger.debug("Use Globus Online backend")
                self.__filemanager = GSFileAdaptor(self.service_url)
            elif self.service_url.startswith("gs:"):
                logger.debug("Use Google Cloud Storage backend")
                self.__filemanager = GSFileAdaptor(self.service_url,
                                                   self.security_context)
            elif self.service_url.startswith("s3:") \
                or self.service_url.startswith("walrus:") \
                or self.service_url.startswith("swift:"):
                logger.debug(
                    "Use Amazon S3/Eucalyptus Walrus/SWIFT Storage backend")
                self.__filemanager = S3FileAdaptor(self.service_url,
                                                   self.security_context,
                                                   self.pilot_data_description)
            else:
                raise PilotError("No File Plugin found.")

            self.__filemanager.initialize_pilotdata()
            self.__filemanager.get_pilotdata_size()

            # Update security context
            self.security_context = self.__filemanager.get_security_context()

    def __get_pd_id(self, pd_url):
        start = pd_url.index(self.PD_ID_PREFIX)
        end = pd_url.index("/", start)
        return pd_url[start:end]

    def to_dict(self):
        """ Internal method that returns a dict with all data contained in this Pilot Data"""
        pd_dict = {}
        pd_dict["id"] = self.id
        pd_dict["url"] = self.url
        pd_dict["pilot_data_description"] = self.pilot_data_description
        logger.debug("PilotData Dictionary: " + str(pd_dict))
        return pd_dict

    def __repr__(self):
        """Returns Pilot Data URL"""
        return self.service_url

    def __has_finished(self, state):
        state = state.lower()
        if state == "running" or state == "failed" or state == "canceled":
            return True
        else:
            return False

    @classmethod
    def create_pilot_data_from_dict(cls, pd_dict):
        """Restore Pilot Data from dictionary"""
        pd = PilotData()
        for i in pd_dict.keys():
            pd.__setattr__(i, pd_dict[i])
        pd.__initialize_pilot_data()
        logger.debug("created pd " + str(pd))
        return pd