class HDFSSErvice: namenode_host = "localhost" namenode_port = "9870" root_folder = "/" chunck_size = 100000 def __init__(self): self._client = HdfsClient(hosts=self.namenode_host + ":" + self.namenode_port, user_name="root") def get(self, hdfs_path: str): file_size = self.get_file_size(hdfs_path) for i in range(0, file_size, self.chunck_size): file_response = self._client.open(hdfs_path, offset=i, length=i + self.chunck_size) yield file_response.read() def append(self, hdfs_path: str, data: bytes): self.create_if_not_exist(hdfs_path) self._client.append(hdfs_path, data) def create_if_not_exist(self, hdfs_path: str): if not self._client.exists(hdfs_path): self._client.create(hdfs_path, b"") def get_messages_number(self, hdfs_path: str): return int(self.get_file_size(hdfs_path) / self.chunck_size + 1) def get_file_size(self, hdfs_path): file_infos = self._client.get_content_summary(hdfs_path) return file_infos.length def test(self): pass
class hdfs(object): #默认50070端口 def __init__(self, cur_database_param): # super(HdfsClients, self).__init__() # self.quert_db_info = super(HdfsClients, self).getDBConfig() # self.hdfsHost=self.quert_db_info["host"] hdfsHost = cur_database_param['url'] path = cur_database_param['dbname'] self.hdfs = HdfsClient(hosts='{hdfs_host}'.format(hdfs_host=hdfsHost)) self.host = hdfsHost self.path = path def append(self, path, data): self.hdfs.append(path, data) pass def concat(self, target, sources): self.concat(target, sources) # self, taskJobId,tableName=None,jobTemplateFieldList=None def createTableByTaskJobId(self, taskJobId, tableName=None, jobTemplateFieldList=None, data=None): if tableName == None: taskJob = TaskJobDao.loadTaskById(taskJobId) tableName = taskJob.tableName path = self.path + '/' + tableName self.hdfs.create(path, data, replication=2) def hmkdirs(self, path): self.hdfs.mkdirs(path) def open(self, path): return self.hdfs.open(path=path) def delete(self, path): self.hdfs.delete(path=path) def listdir(self, rule): f = self.hdfs.listdir(rule) return f def insert(self, jobid, tablename, column_dict, paramMap=None): if tablename == None: taskJob = TaskJobDao.loadTaskById(jobid) tablename = taskJob.tableName path = self.path + '/' + tablename createTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) task_job_id_sequenceValue = paramMap.get( "task_job_id_sequence") if paramMap != None else None if task_job_id_sequenceValue != None: column_dict.update( {"task_job_id_sequence": str(task_job_id_sequenceValue)}) column_dict.update({ "task_job_del_flag": "False", "task_job_create_time": createTime }) # self.append(path, column_dict) if self.isTableExist(tablename): self.append(path, column_dict) else: self.createTableByTaskJobId(jobid, tablename, column_dict) # return column_dict def isTableExist(self, tablename): path = self.path + '/' + tablename exist = self.hdfs.exists(path) return exist def save_to_hdfs(self, jobid, path, data): if self.isTableExist(path): self.append(path, data) else: self.createTableByTaskJobId(jobid, path, data) def save_to_hdfs2(self, path, data): if self.hdfs.exists(path): self.hdfs.append(path, data) else: self.hdfs.create(path, data, replication=2) def execute(self, sqls="append", path=None, data=None): try: if isinstance(sqls, list) and len(sqls) > 0: for sql in sqls: # method = eval(sql) method = getattr(self, sql) method(path, data) else: # method = eval(sqls) method = getattr(self, sqls) method(path, data) except Exception, e: logging.error("hdfs,execute," + str(e)) raise Exception()