def create_pilot_store_from_dict(cls, ps_dict): ps = PilotStore() for i in ps_dict.keys(): ps.__setattr__(i, ps_dict[i]) ps.initialize_pilot_store() logger.debug("created ps " + str(ps)) return ps
def to_dict(self): ps_dict = {} ps_dict["id"]=self.id ps_dict["url"]=self.url ps_dict["pilot_store_description"]=self.pilot_store_description logger.debug("PS Dictionary: " + str(ps_dict)) return ps_dict
def create_pd(self, pd_id): pd_dir = os.path.join(self.path, str(pd_id)) logger.debug("mkdir: " + pd_dir) try: self.__sftp.mkdir(pd_dir) except: pass # dir already exists
def __init__(self, work_unit_description, work_data_service): self.id = self.WU_ID_PREFIX + str(uuid.uuid1()) self.url = work_data_service.url + "/" + self.id self.state = State.New self.subjob = None # reference to BigJob Subjob self.work_unit_description = work_unit_description # WU Description self.subjob_description = self.__translate_wu_sj_description(work_unit_description) logger.debug("Created WU: %s"%self.url)
def copy_pd_to_url(self, pd, local_url, remote_url): base_dir = self.__get_path_for_pd(pd) self.__create_remote_directory(remote_url) for filename in self.__sftp.listdir(base_dir): file_url = local_url + "/" + filename file_remote_url = remote_url + "/" + filename logger.debug("Copy " + file_url + " to " + file_remote_url) self.__third_party_transfer_host(file_url, file_remote_url)
def put_pd(self, pd): for i in pd.list_data_units(): remote_path = os.path.join(self.__get_pd_path(pd.id), os.path.basename(i.local_url)) logger.debug("Put file: %s to %s"%(i.local_url, remote_path)) if i.local_url.startswith("file://") or i.local_url.startswith("/"): if stat.S_ISDIR(os.stat(i.local_url).st_mode): logger.warning("Path %s is a directory. Ignored."%i.local_url) continue self.__webhdfs.copyFromLocal(i.local_url, remote_path) else: logger.error("File URLs: %s not supported"%i.local_url)
def initialize_pilot_store(self): if self.pilot_store_description!=None: self.service_url=self.pilot_store_description["service_url"] self.size = self.pilot_store_description["size"] self.pilot_store_description = self.pilot_store_description # initialize file adaptor if self.service_url.startswith("ssh:"): logger.debug("Use SSH backend") self.__filemanager = SSHFileAdaptor(self.service_url) elif self.service_url.startswith("http:"): logger.debug("Use WebHDFS backend") self.__filemanager = WebHDFSFileAdaptor(self.service_url) self.__filemanager.initialize_pilotstore() self.__filemanager.get_pilotstore_size()
def copy_pd_to_url(self, pd, local_url, remote_url): if not remote_url.startswith("file://") and not remote_url.startswith("/"): logger.error("Only local URLs supported") return result = urlparse.urlparse(remote_url) path = result.path # create directory try: os.makedirs(path) except: logger.debug("Directory: %s already exists."%path) base_dir = self.__get_pd_path(pd.id) for filename in self.__webhdfs.listdir(base_dir): file_url = local_url + "/" + filename file_remote_url = remote_url + "/" + filename logger.debug("GET " + file_url + " to " + file_remote_url) self.__webhdfs.copyToLocal(file_url, file_remote_url)
def create_pd(self, pd_id): pd_dir = self.__get_pd_path(pd_id) logger.debug("mkdir: " + pd_dir) self.__webhdfs.mkdir(pd_dir)
import sys, os import stat import urlparse sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + "/../../../webhdfs-py/") from bigdata.troy.compute.api import State from bigdata import logger logger.debug(str(sys.path)) from webhdfs.webhdfs import WebHDFS class WebHDFSFileAdaptor(object): HDFS_USER_NAME="luckow" HDFS_SERVICE_HOST="192.168.2.108" HDFS_SERVICE_PORT=50070 def __init__(self, service_url): self.service_url = service_url try: result = urlparse.urlparse(service_url) self.host = result.netloc self.path = result.path except: logger.error("Error parsing URL.")