def write_to_hdfs(rows: List[Tuple[str, str]]): conn: Connection = Connection.get_connection_from_secrets('local_hdfs') uri = conn.get_uri() pat = re.compile("http://(\w+(:\w+)?)?@") print(conn.get_uri()) uri = pat.sub("http://", uri) print(uri) print(conn.login) client = InsecureClient(uri, user=conn.login) sch = avro.schema.make_avsc_object({ 'type':'record', 'name':'Video', 'fields': [ {'type': {'type': 'string', 'avro.java.string': 'String'}, 'name': 'title'}, {'type': ["null", {'type': 'string', 'avro.java.string': 'String'}], 'name': 'description'}, ] }) local_file_name = 'videos.avro' writer = DataFileWriter(open(local_file_name, "wb"), DatumWriter(), sch) for row in rows: print(row) writer.append({"title":row[0], "description":row[1]}) writer.close() client.upload('/tmp/videos.avro', local_file_name)
class DataProcessor: def __init__(self, data_path=None): if data_path is None: self.data_path = r'./config/connect_info.json' else: assert type(data_path) == str self.data_path = data_path if not os.path.exists(self.data_path): self.data_path = r'./connect_info.json' with open(self.data_path) as data_file: data = json.load(data_file) self.hdfs_client = InsecureClient( url='http://' + data['namenode_url'] + ':' + str(data['port']), user=data['user'], root=data['root_path']) self.img_dir = data['img_dir'] if self.img_dir[-1] != '/': self.img_dir += '/' self.file_name = 1 def InitImgDir(self): try: list_rslt = self.hdfs_client.list(self.img_dir) if len(list_rslt) > 0: for name in list_rslt: file_path = self.img_dir + name self.hdfs_client.delete(file_path) except util.HdfsError: self.hdfs_client.makedirs(self.img_dir) print("Mkdir ...") return True def DataProcess(self, data, append=False, file_name=None): assert type(data) == str if file_name is None: file_name = self.img_dir + str(self.file_name) else: assert (type(file_name)) == str print("start writing...") start = time.time() self.hdfs_client.write(file_name, data, overwrite=True, replication=1, append=append) delta = time.time() - start print("writing complete, time delta is " + str(delta)) return True def Upload(self, remote_name, local_path): assert os.path.exists(local_path) remote_path = self.img_dir + remote_name self.hdfs_client.upload(remote_path, local_path, True) return True
def uploadHDFS(filename): # if ' ' in filename: # aoi_file = rename(aoi_file) # else: aoi_file = filename client = InsecureClient('http://10.41.158.65:50070', user='******') fname1 = client.list(hdfs_path) # if aoi_file.split("@").count('-') >= 2: dt = str(aoi_file.split("@")[1].split("-", 1)[0][:8]) # else: # dt = str(aoi_file.split("_")[-1].split("-")[-2].split("@")[1][:8]) # dt = str(aoi_file.split("@")[1].split("-",1)[0][:8]) folder1 = dt if folder1 in fname1: client.upload(hdfs_path + folder1 + "/" + aoi_file, upload_path + aoi_file, overwrite=True) print "uploadHDFS ok" shutil.move(upload_path + aoi_file, backup_path + aoi_file) else: client.makedirs(hdfs_path + folder1) client.upload(hdfs_path + folder1 + "/" + aoi_file, upload_path + aoi_file, overwrite=True) print "uploadHDFS ok" shutil.move(upload_path + aoi_file, backup_path + aoi_file)
def post(self): image_name = int( time.time() ) #per avere sempre immagini con nomi diversi utilizzo la funzione time di python image_path = "/root/ZAGA/ZoraOD/Images_bbx/{}.jpg".format(image_name) with open(image_path, 'wb') as image: image.write( request.data ) #l'immagine contenuta in request.data viene salvata in locale # result e' il risultato dell'object detection. Puo' essere la stringa che deve pronunciare il robot, # oppure le possibili labels che identificano un oggetto nell'immagine se lo score e' compreso tra due soglie. # Viene restituito il vettore nullo se non e' stato trovato nessun oggetto result = obj_detection.find_result(image_path, image_name) # se l'hdfs e' connesso vi salvo l'immagine # uso il modulo socket per controllare se la connessione con la porta dell'hdfs e'attiva sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) port_result = sock.connect_ex(('localhost', 50070)) # se la porta e' aperta restituisce 0, altrimenti restituisce un valore diverso da 0 if port_result == 0: client_hdfs = InsecureClient('http://localhost:50070') # sposto l'immagine nel HDFS client_hdfs.upload( '/zora-object-detection/images/{}.jpg'.format(image_name), image_path) os.remove(image_path) return result #il risultato viene inviato al robot
def main(): client = InsecureClient(f'http://127.0.0.1:50070/', user='******') # create directory in HDFS client.makedirs('/test') #list content ll = client.list('/') print(ll) # create file in HDFS data = [{ "name": "Anne", "salary": 10000 }, { "name": "Victor", "salary": 9500 }] with client.write('/test/sample_file.json', encoding='utf-8') as json_file_in_hdfs: json.dump(data, json_file_in_hdfs) # OR client.write(os.path.join('/', 'test', 'sample_file2.json'), data=json.dumps(data), encoding='utf-8') # download file from HDFS client.download('/test/sample_file.json', './file_from_hadoop.json') # upload file to HDFS client.upload('/test/local_file_in_hadoop.json', './file_from_hadoop.json')
def upload_file(self): ip_address = self.ip_input.toPlainText() port_number = self.port_input.toPlainText() user_name = self.user_input.toPlainText() upload_file = self.dir_input.toPlainText() host_address = 'http://'+ip_address + ':' + port_number hadoop = InsecureClient(host_address,user_name) hadoop.upload('',upload_file)
def put_in_hdfs(hdfs_path, local_path): print('uploading...') client = InsecureClient('http://quickstart.cloudera:50070', user='******') client.upload(hdfs_path=hdfs_path, local_path=local_path, progress=lambda x, y: print(x, y), overwrite=True, temp_dir='/tmp/{}'.format(local_path)) print('done!')
def upload_file(self): ip_address = self.ip_input.toPlainText() port_number = self.port_input.toPlainText() user_name = self.user_input.toPlainText() file_name = self.File_directory_display.toPlainText() dir_name = self.dir_input.toPlainText() host_address = 'http://' + ip_address + ':' + port_number hadoop = InsecureClient(host_address, user_name) hadoop.upload(dir_name, file_name)
class StorageClient(object): """Represent a storage client that supports saving results to HDFS Methods ------- save(self, file_name, result) Save results permanently to persistent storage. """ def __init__(self, namenode_url, username, submission_id, camera_id): """Initialize an internal client This constructor initializes an HDFS client. """ self._internal_client = InsecureClient(namenode_url, user='******', root='/'.join(['/users', username, str(submission_id), str(camera_id)])) def save(self, file_name, result): """Save results permanently to persistent storage. This method saves results permanently to persistent storage so that they can be retrieved by the user later. This method currently accepts results as numpy.ndarray. If an instance with any other type is passed, the method will save the string representation of the instance. This enables the method to save strings, integers, and other primitive data types. Parameters ---------- file_name : str The file name to be used to save the results. result : object The results to be saved. The `result` can be numpy.ndarray. If an instance with any other type is passed, the method will save the string representation of the instance. This enables the method to save strings, integers, and other primitive data types. """ # Make sure the file name is legit file_name = file_name.replace('/', '.') # If the result is an OpenCV image, save it as an image. if (isinstance(result, numpy.ndarray)): # Create temp files temp_directory = tempfile.mkdtemp() temp_image_path = os.path.join(temp_directory, file_name) cv2.imwrite(temp_image_path, result) self._internal_client.upload(file_name, temp_image_path, overwrite=True) # Remove temp files os.remove(temp_image_path) os.rmdir(temp_directory) # Else, save the string representation of the object in a text file. else: self._internal_client.write(file_name, str(result), overwrite=True)
def load_data_to_hdfs(): try: file = '/home/student/Project/Dataset.csv' except: print("File not found") hdfsclient = InsecureClient("http://localhost:50070", user="******") hdfs_path = "/" hdfsclient.upload(hdfs_path, file) #Dumping file into the hadoop
class HDFSStorage(Storage): def __init__(self, bucket_name: str, folder_name: str): super().__init__(bucket_name, folder_name) self.client = InsecureClient(url=settings.HDFS_CONN, user=settings.HDFS_USERNAME) def setup(self) -> HDFSResource: super().setup() self.client.makedirs(f"{self.bucket_name}/{self.folder_name}") return HDFSResource( resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/") def put_file(self, file_path: Union[str, Path], rename: Optional[str] = None) -> HDFSResource: if isinstance(file_path, Path): file_path = str(file_path) file_name = Path(file_path).name if not rename else rename # copy file to task directory if not file_path.startswith(str(self.local_dir)): file_path = shutil.copy(file_path, Path(self.local_dir, file_name)) try: self.client.upload( f"{self.bucket_name}/{self.folder_name}/{file_name}", file_path) except (gaierror, NewConnectionError): raise return HDFSResource( resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/{file_name}" ) def get_file(self, data_file: str) -> str: if not data_file.startswith("hdfs:"): raise NotValidScheme( "Object file prefix is invalid: expected `hdfs:`") _, bucket_name, folder_name, file_name = data_file.split("/") file_path = Path(self.temp_dir, bucket_name, folder_name, file_name) if not file_path.is_file(): try: self.client.download(data_file, file_path) except Exception as err: print(err) return str(file_path) def remove_remote_dir(self, omit_files: List[str] = None) -> None: pass
def load_enedis(): client = InsecureClient('http://localhost:50070', user='******') client.makedirs('data') print(client.list('/user/cloudera')) # load 10 lignes client.upload( '/user/cloudera/data', '/home/fitec/projet_fil_rouge/source_des_données/data/consommation_elec_regions_2019_l10.json', overwrite=True)
def handleHdfsUpload(file_path, proj_id, task_id): try: client = InsecureClient("http://hdfs.neurolearn.com:50070", user="******") hdfs_path = "/neurolearn/files/" + proj_id + "/results/" + task_id client.makedirs(hdfs_path) client.upload(hdfs_path, file_path) print('Uploaded Images to HDFS.') except Exception as e: print(e) hdfs_path = '' return hdfs_path
class SavedModelUploader(object): """upload a saved model to hadoop file system""" def __init__(self, url, user, base_path=""): self._logger = logging.getLogger(self.__class__.__name__) self._url = url self._user_ = user self._base_path = base_path self._client = InsecureClient(url, user) if not self._exist(base_path): self._mkdir(base_path) def _exist(self, path): if self._client.content(path, strict=False): return True else: return False def _mkdir(self, path): self._client.makedirs(path) def _del(self, path): self._client.delete(path, recursive=True) def _upload(self, local_path, hdfs_path): self._client.upload(hdfs_path, local_path) def _logging_progress(self, local_path, nbytes): msg = None if nbytes > 0: msg = "uploading: '{}' [{} bytes]".format(local_path, nbytes) else: msg = "uploading: '{}' [done]".format(local_path) self._logger.info(msg) def upload(self, local_model_path, overwrite=False): hdfs_model_path = self._base_path + '/' + basename(local_model_path) existed = self._exist(hdfs_model_path) if overwrite and existed: self._del(hdfs_model_path) elif not overwrite and existed: raise RuntimeError( "could not overwrite the model, already existed.") try: self._client.upload(self._base_path, local_model_path, progress=self._logging_progress) except HdfsError as e: self._logger.error(e) self._logger.info("model upload done")
def uploadHDFS(filename): aoi_file = filename client = InsecureClient('http://10.41.158.65:50070', user='******') fname1 = client.list(hdfs_path) dt = str(aoi_file.split("@")[1].split("_")[0][:8]) folder1 = dt if folder1 in fname1: client.upload(hdfs_path+folder1+"/"+aoi_file,upload_path+aoi_file,overwrite=True) shutil.move(upload_path + aoi_file, backup_path + aoi_file) else: client.makedirs(hdfs_path+folder1) client.upload(hdfs_path+folder1+"/"+aoi_file,upload_path+aoi_file,overwrite=True) shutil.move(upload_path + aoi_file, backup_path + aoi_file)
class DataProcessor: def __init__(self, data_path=None): if data_path == None: self.data_path = r'./config/connect_info.json' else: assert type(data_path) == str self.data_path = data_path if not os.path.exists(self.data_path): self.data_path = r'./connect_info.json' with open(self.data_path) as data_file: data = json.load(data_file) print("Data: ", data) self.hdfs_client = InsecureClient( url='http://' + data['namenode_url'] + ':' + str(data['port']), user=data['user'], root=data['root_path']) print("hdfs client: ", self.hdfs_client) self.img_dir = data['img_dir'] print("img dir: ", self.img_dir) if self.img_dir[-1] != '/': self.img_dir += '/' else: pass self.file_name = 1 def InitImgDir(self): try: list_rslt = self.hdfs_client.list(self.img_dir) if len(list_rslt) > 0: for name in list_rslt: file_path = self.img_dir + name self.hdfs_client.delete(file_path) except util.HdfsError: self.hdfs_client.makedirs(self.img_dir) return True def Upload(self, file_path, threads=2): print("FilePath: ", file_path) print("img_dir: ", self.img_dir[:-1]) self.hdfs_client.upload(hdfs_path=self.img_dir[:-1], local_path=file_path, n_threads=threads, overwrite=True) return 0
class HDFSService(object): def __init__(self): self.hdfs = InsecureClient('http://127.0.0.1:9870', user='******') self.base_path = '/users/root' def mkdir(self, path): return self.hdfs.makedirs(path) def list(self, path): try: return self.hdfs.list(path) except HdfsError as e: print(e) return [] def get(self, path): pass def upload(self, path, local_path=None, data=None): path = self.base_path + path if data is not None: return self.hdfs.write(path, data=data) elif local_path is not None: return self.hdfs.upload(path, local_path) return False pass def download(self, path): path = self.base_path + path with self.hdfs.read(path) as reader: print(path) buf = reader.read() print(len(buf)) return buf
def upload_directory(self, directory_path, archive_directory_data): '''Untars the archive_directory_data provided as input, and uploads all the contents of the tar to the directory path specified on HDFS. ''' logger.log_info("Uploading the directory to HDFS") web_hdfs_url = Environment().get_web_hdfs_url() hdfs_file_base_url = Environment().get_hdfs_file_base_url() session = SwSessionManager().get_session() user_name = session.get_username() client = InsecureClient(web_hdfs_url, user_name) directory_name_with_path = "/" + directory_path directory_name = os.path.split(directory_path)[1] try: with tempfile.TemporaryDirectory() as temp: local_dir_path = temp + "/" + directory_name + ".tar.gz" with open(local_dir_path, "wb") as dir_archive: dir_archive.write(archive_directory_data) with tarfile.open(local_dir_path, "r:gz") as tar: tar.extractall(temp) os.remove(local_dir_path) response = client.upload(hdfs_path=directory_name_with_path, local_path=temp) logger.log_info( "Successfully uploaded the directory {0} to HDFS".format( response)) return hdfs_file_base_url + directory_name_with_path except Exception as e: raise ServiceError( "Uploading the directory to HDFS failed with the error: {0}". format(str(e)))
class Storage: def __init__(self, protocol: str = 'webHDFS', *args, **kwargs): self.protocol, self.client = protocol.lower(), None if protocol.lower() == 'webHDFS'.lower(): from hdfs import InsecureClient self.client = InsecureClient(*args, **kwargs) for f in 'upload download list status delete'.split(): setattr(self, f, getattr(self, '%s_%s' % (f, protocol.lower()))) def upload_webhdfs(self, local_path: str, remote_path: str, **kwargs): to_screen("upload %s -> %s" % (local_path, remote_path)) return self.client.upload(local_path=local_path, hdfs_path=remote_path, **kwargs) def download_webhdfs(self, remote_path: str, local_path: str, **kwargs): mkdir_for(local_path) to_screen("download %s -> %s" % (remote_path, local_path)) return self.client.download(local_path=local_path, hdfs_path=remote_path, overwrite=True, **kwargs) def list_webhdfs(self, remote_path: str, **kwargs): return self.client.list(hdfs_path=remote_path, **kwargs) def status_webhdfs(self, remote_path: str, **kwargs): return self.client.status(hdfs_path=remote_path, **kwargs) def delete_webhdfs(self, remote_path: str, **kwargs): return self.client.delete(hdfs_path=remote_path, **kwargs)
def handle_uploaded_file(f, data_id, proj_id): file_name = str(f.name) with open(file_name, 'wb+') as destination: for chunk in f.chunks(): destination.write(chunk) data_content = pd.read_csv(file_name, encoding='utf-8') data_json = data_content.to_json() try: client = InsecureClient("http://hdfs.neurolearn.com:50070", user="******") hdfs_path = "/neurolearn/files/" + proj_id + "/datasets/" + data_id client.makedirs(hdfs_path) client.upload(hdfs_path, file_name) except: hdfs_path = '' return data_json, hdfs_path
def move_files(tbl): from hdfs import InsecureClient client = InsecureClient('http://172.16.4.144:50070') client.upload("/user/root/greenplum/scripts/" + tbl, "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_FL.hive") client.upload("/user/root/greenplum/scripts/" + tbl, "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_IL.hive") client.upload("/user/root/greenplum/scripts/" + tbl, "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_FL.sh") client.upload("/user/root/greenplum/scripts/" + tbl, "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_IL.sh")
def upload_img_to_hdfs(self, upload_file_path): print "test", upload_file_path try: client = InsecureClient(self.HDFS_ADDR, user='******') client.upload(self.IMG_FOLDER, upload_file_path, overwrite=False, n_threads=0, temp_dir=None, chunk_size=65536, progress=None, cleanup=True) except NameError as n: print(n) return {'error_msg': 'HDFS 上傳失敗'} # 顯示圖片上傳hdfs的路徑 _ = upload_file_path.split('/') path_for_show = self.IMG_FOLDER + "/" + _[len(_) - 1] return {'success': "上傳成功,hdfs路徑:" + path_for_show}
def putFile(server, source, destination): """ Uploads a file to HDFS. Args: server: hdfs server and port, example: "http://hadoop1:50070". source: localpath of file to upload. destination: remote file, inclusing path. """ from hdfs import InsecureClient client = InsecureClient(server) return client.upload(hdfs_path=destination, local_path=source)
def increment_load(tables, cur): for table in tables: tableName = table ts = datetime.datetime.now().strftime('_%Y%m%d_%H%M%S') query = "COPY (SELECT * FROM " + tableName + " where LastModifiedDate>(select run_time from control_table where table_name='" + tableName + "')) TO '/tmp/" + tableName + "_CDC" + ts + ".csv'" cur.execute(query) ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect('172.16.6.89', username='******', password='******') ftp = ssh.open_sftp() ftp.get("/tmp/" + tableName + "_CDC" + ts + ".csv", "Gp/" + tableName + "_CDC" + ts + ".csv") ftp.close() #Connect To hadoop client = InsecureClient('http://172.16.4.144:50070', user='******') client.makedirs("/user/root/greenplum/source/" + tableName + "__ct", "0777") client.upload( "/user/root/greenplum/source/" + tableName + "__ct/", "F:/Srilatha/Attunity-POC/Greenplum/Gp/" + tableName + "_CDC" + ts + ".csv")
def on_data(self, data): try: if self.count <= 10000000: with open(self.outfile, 'a+') as f: f.write(data) self.count += len(data) return True else: hdfs_path = '/team40/stream_data/' + time.strftime( '%Y-%m-%d_%H-%M', time.localtime()) + self.outfile client = InsecureClient('http://115.146.86.32:50070', user='******') client.upload(hdfs_path, self.outfile) print(client.status(hdfs_path, strict=False)) self.count = 0 with open(self.outfile, 'w') as f: f.write(data) self.count += len(data) return True except BaseException as e: print("Error on_data: %s" % str(e)) return True
def post(self): gen_log.info(self.request.headers) gen_log.info(self.request.body) #data = self.get_all_request_arguments() job_id = self.get_request_argument('jid', None) gen_log.info(job_id) if job_id and utils.is_object_id(job_id): job = yield self.db.jobs.find_one({"_id": ObjectId(job_id)}) from hdfs import InsecureClient hdfs_client = InsecureClient("http://169.24.2.194:50070", user='******') content = hdfs_client.list("/tmp") gen_log.info(content) content = hdfs_client.list("/tmp") gen_log.info(content) work_dir = os.path.join(UPLOAD_DIR, job.get('uuid', None)) data_dir = os.path.join(work_dir, "data") model_dir = os.path.join(work_dir, "model") # 判断数据文件目录是否存在 if not os.path.exists(data_dir): self.write_json("数据文件未上传,请上传数据文件", code=1) return # 判断模型文件目录是否存在 if not os.path.exists(model_dir): self.write_json("模型文件未上传,请上传模型文件", code=1) return #开始上传数据文件 remote_hdfs_data_dir, local_data_dir = job.get('input', "").split("#") hdfs_client.upload(remote_hdfs_data_dir, data_dir, overwrite=True) # 切换到model目录 os.chdir(model_dir)
def upload_to_hdfs(input_dir, output_dir, chunk_size): # locate files in directory files = [ os.path.abspath("{}/{}".format(input_dir, f)) for f in listdir(input_dir) if isfile(join(input_dir, f)) ] tmp_dir = "{}/tmp".format(input_dir) # setup temp dir if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) # split files into 128mb chunks for f in files: fs = FileSplit(file=f, splitsize=(chunk_size) * 1e6, output_dir=tmp_dir) fs.split(callback=split_callback) # upload to hdfs hdfs_client = InsecureClient("http://{}:9870".format( settings.HDFS_HOST_VALUE), user=settings.HDFS_USER_VALUE) # delete existing output dir if hdfs_client.content(output_dir, strict=False) != None: hdfs_client.delete(output_dir, recursive=True) # upload files to tmp dir remote_path = hdfs_client.upload(hdfs_path="/tmp", local_path=tmp_dir, n_threads=-1, overwrite=True) # rename to output_dir hdfs_client.rename("/tmp", output_dir) print( "{} files uploaded to hdfs host '{}{}' ({} file chunks total)".format( len(files), settings.HDFS_HOST_VALUE, output_dir, len(split_files), )) # delete temp files shutil.rmtree(tmp_dir) return hdfs_file_paths
def full_load(tables, cur): for table in tables: tableName = table ts = datetime.datetime.now().strftime('_%Y%m%d_%H%M%S') query = "COPY (SELECT * FROM " + tableName + ") TO '/tmp/" + tableName + "_FL" + ts + ".csv'" cur.execute(query) ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect('172.16.6.89', username='******', password='******') ftp = ssh.open_sftp() ftp.get("/tmp/" + tableName + "_FL" + ts + ".csv", "Gp/" + tableName + "_FL" + ts + ".csv") ftp.close() #Connect To hadoop client = InsecureClient('http://172.16.4.144:50070', user='******') client.delete("/user/root/greenplum/source/" + tableName, True) client.makedirs("/user/root/greenplum/source/" + tableName, "0777") client.upload( "/user/root/greenplum/source/" + tableName + "/", "F:/Srilatha/Attunity-POC/Greenplum/Gp/" + tableName + "_FL" + ts + ".csv") sql = "INSERT INTO control_table(table_name) VALUES(%s);" cur.execute(sql, (tableName, )) connection.commit()
def get(self): # Récupération du Dataset pour l'évaluation df = get_data_cassandra() print(df.head()) X = df['total_estimated_load'].values # evaluate parameters (p,d,q) <=> (AR, I, MA) p_values = 7 d_values = 0 q_values = 5 #best_cfg, best_score = evaluate_models(X, p_values, d_values, q_values) best_cfg = (p_values,d_values,q_values) # Entrainement du meilleur modèle model = ARIMA(X, order=best_cfg) model_fit = model.fit() # save model if not os.path.exists(model_local_path): # Création du dossier d'export local qui n'existe pas os.makedirs(model_local_path,exist_ok=False) model_fit.save(model_local_path + model_name) # Connexion au client HDFS client = InsecureClient(url='http://namenode:9870', user='******') # Création du dossier de stockage des fichiers traités if client.status(model_hdfs_remote_path,strict=False) == None: client.makedirs(model_hdfs_remote_path) # Copie du modèle sur HDFS remote_load_path = client.upload(model_hdfs_remote_path, model_local_path + model_name,overwrite=True) #print(remote_load_path) print(client.list(model_hdfs_remote_path)) return { 'best_cfg': best_cfg , 'status': 'Terminated'}
def find_labels(image_path, image_name, stub, request, model, n): """ Args: image_path: path dell'immagine in input image_name: nome dell'immagine ottenuto con la funzione time di python stub: viene utilizzato per la comunicazione client-server request: richiesta da inviare al server model: nome del modello di object detection, puo' essere pet model o people model n: numero massimo delle labels che si vogliono considerare """ labels = [] # vettore con le labels del dataset specifico bbx = [] # vettore con le coordinate dei bounding box trovati request.model_spec.name = model result = stub.Predict( request, 10.0) # risultati della richiesta di prediction, 10 secs timeout classes = result.outputs[ 'detection_classes'].float_val # id delle classi trovate, in ordine dalla classe con score piu' alto scores = result.outputs[ 'detection_scores'].float_val # score delle classi,dallo score piu' alto #print zip(classes, scores) boxes = result.outputs[ 'detection_boxes'].float_val # posizione dei bounding box # trasformo il vettore in modo che ogni elemento sia una quadrupla che identifica il bounding box boxes = np.reshape(boxes, [100, 4]) # per salvare l'immagine con i bounding box, dobbiamo aprire l'immagine e sfruttare la libreria vis_util di tensorflow im = imageio.imread( image_path) # legge l'immagine come un array multidimensionale if (model == "pets_model"): label_map_path = "Label_maps/pets_label_map.pbtxt" # mappa delle label label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map=label_map, max_num_classes=37) else: label_map_path = "Label_maps/people_label_map.pbtxt" label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map=label_map, max_num_classes=2) category_index = label_map_util.create_category_index( categories) # dizionario coppie chiave ("id"), valore ("nome classe") # viene creato un array (img_height, img_width, 3) con i bounding box sovrapposti image_vis = vis_util.visualize_boxes_and_labels_on_image_array( im, boxes, np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, max_boxes_to_draw=10, # num max di bounding box da visualizzare min_score_thresh=.6, # soglia minima dei bounding box da visualizzare use_normalized_coordinates=True, line_thickness=5) # larghezza linea del contorno dei box sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) port_result = sock.connect_ex(('localhost', 50070)) client_hdfs = InsecureClient( 'http://localhost:50070') # client per accedere al HDFS if (model == "pets_model"): imageio.imwrite( "Images_bbx/{}_pets.jpg".format(image_name), image_vis) # salva l'array in locale come un'immagine JPEG if port_result == 0: # se l'HDFS e' connesso, vi sposto l'immagine client_hdfs.upload( '/zora-object-detection/images/{}_pets.jpg'.format(image_name), 'Images_bbx/{}_pets.jpg'.format(image_name)) os.remove("Images_bbx/{}_pets.jpg".format(image_name)) else: imageio.imwrite("Images_bbx/{}_people.jpg".format(image_name), image_vis) if port_result == 0: client_hdfs.upload( '/zora-object-detection/images/{}_people.jpg'.format( image_name), 'Images_bbx/{}_people.jpg'.format(image_name)) os.remove("Images_bbx/{}_people.jpg".format(image_name)) # inseriamo le labels trovate nella detection in un vettore da passare allo script obj_detection per formare la stringa # da far pronunciare al robot. Le coordinate del bounding box invece verranno salvate nel file log dell'HDFS. boxes = boxes.tolist() # trasforma l'array multidimensionale in una lista for i in range(0, n): # considero solo le labels con uno score >= 0.6 ed escludo quelle che identificano un bounding box gia' inserito # con uno score piu' alto if (scores[i] >= 0.6 and boxes[i] not in bbx): bbx.append(boxes[i]) labels.append(str(category_index[int(classes[i])]['name'])) return labels, bbx