def write_to_hdfs(rows: List[Tuple[str, str]]):
        conn: Connection = Connection.get_connection_from_secrets('local_hdfs')
        uri = conn.get_uri()
        pat = re.compile("http://(\w+(:\w+)?)?@")
        print(conn.get_uri())

        uri = pat.sub("http://", uri)
        print(uri)
        print(conn.login)
        client = InsecureClient(uri, user=conn.login)
        sch = avro.schema.make_avsc_object({
            'type':'record',
            'name':'Video',
            'fields': [
                {'type': {'type': 'string', 'avro.java.string': 'String'}, 'name': 'title'},
                {'type': ["null", {'type': 'string', 'avro.java.string': 'String'}], 'name': 'description'},
            ]
        })
        local_file_name = 'videos.avro'
        writer = DataFileWriter(open(local_file_name, "wb"), DatumWriter(), sch)
        for row in rows:
            print(row)
            writer.append({"title":row[0], "description":row[1]})
        writer.close()
        client.upload('/tmp/videos.avro', local_file_name)
Example #2
0
class DataProcessor:
    def __init__(self, data_path=None):
        if data_path is None:
            self.data_path = r'./config/connect_info.json'
        else:
            assert type(data_path) == str
            self.data_path = data_path
        if not os.path.exists(self.data_path):
            self.data_path = r'./connect_info.json'

        with open(self.data_path) as data_file:
            data = json.load(data_file)
            self.hdfs_client = InsecureClient(
                url='http://' + data['namenode_url'] + ':' + str(data['port']),
                user=data['user'],
                root=data['root_path'])
            self.img_dir = data['img_dir']

        if self.img_dir[-1] != '/':
            self.img_dir += '/'

        self.file_name = 1

    def InitImgDir(self):
        try:
            list_rslt = self.hdfs_client.list(self.img_dir)
            if len(list_rslt) > 0:
                for name in list_rslt:
                    file_path = self.img_dir + name
                    self.hdfs_client.delete(file_path)

        except util.HdfsError:
            self.hdfs_client.makedirs(self.img_dir)
            print("Mkdir ...")

        return True

    def DataProcess(self, data, append=False, file_name=None):
        assert type(data) == str
        if file_name is None:
            file_name = self.img_dir + str(self.file_name)
        else:
            assert (type(file_name)) == str
        print("start writing...")
        start = time.time()
        self.hdfs_client.write(file_name,
                               data,
                               overwrite=True,
                               replication=1,
                               append=append)
        delta = time.time() - start
        print("writing complete, time delta is " + str(delta))
        return True

    def Upload(self, remote_name, local_path):
        assert os.path.exists(local_path)

        remote_path = self.img_dir + remote_name
        self.hdfs_client.upload(remote_path, local_path, True)
        return True
Example #3
0
def uploadHDFS(filename):
    #    if ' ' in filename:
    #        aoi_file = rename(aoi_file)
    #    else:
    aoi_file = filename
    client = InsecureClient('http://10.41.158.65:50070', user='******')
    fname1 = client.list(hdfs_path)
    #    if aoi_file.split("@").count('-') >= 2:
    dt = str(aoi_file.split("@")[1].split("-", 1)[0][:8])
    #    else:
    #        dt = str(aoi_file.split("_")[-1].split("-")[-2].split("@")[1][:8])
    #        dt = str(aoi_file.split("@")[1].split("-",1)[0][:8])
    folder1 = dt
    if folder1 in fname1:
        client.upload(hdfs_path + folder1 + "/" + aoi_file,
                      upload_path + aoi_file,
                      overwrite=True)
        print "uploadHDFS ok"
        shutil.move(upload_path + aoi_file, backup_path + aoi_file)
    else:
        client.makedirs(hdfs_path + folder1)
        client.upload(hdfs_path + folder1 + "/" + aoi_file,
                      upload_path + aoi_file,
                      overwrite=True)
        print "uploadHDFS ok"
        shutil.move(upload_path + aoi_file, backup_path + aoi_file)
Example #4
0
    def post(self):
        image_name = int(
            time.time()
        )  #per avere sempre immagini con nomi diversi utilizzo la funzione time di python
        image_path = "/root/ZAGA/ZoraOD/Images_bbx/{}.jpg".format(image_name)
        with open(image_path, 'wb') as image:
            image.write(
                request.data
            )  #l'immagine contenuta in request.data viene salvata in locale
        # result e' il risultato dell'object detection. Puo' essere la stringa che deve pronunciare il robot,
        # oppure le possibili labels che identificano un oggetto nell'immagine se lo score e' compreso tra due soglie.
        # Viene restituito il vettore nullo se non e' stato trovato nessun oggetto
        result = obj_detection.find_result(image_path, image_name)

        # se l'hdfs e' connesso vi salvo l'immagine
        # uso il modulo socket per controllare se la connessione con la porta dell'hdfs e'attiva
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        port_result = sock.connect_ex(('localhost', 50070))
        # se la porta e' aperta restituisce 0, altrimenti restituisce un valore diverso da 0
        if port_result == 0:
            client_hdfs = InsecureClient('http://localhost:50070')
            # sposto l'immagine nel HDFS
            client_hdfs.upload(
                '/zora-object-detection/images/{}.jpg'.format(image_name),
                image_path)
            os.remove(image_path)

        return result  #il risultato viene inviato al robot
Example #5
0
def main():

    client = InsecureClient(f'http://127.0.0.1:50070/', user='******')

    # create directory in HDFS
    client.makedirs('/test')

    #list content
    ll = client.list('/')
    print(ll)

    # create file in HDFS
    data = [{
        "name": "Anne",
        "salary": 10000
    }, {
        "name": "Victor",
        "salary": 9500
    }]
    with client.write('/test/sample_file.json',
                      encoding='utf-8') as json_file_in_hdfs:
        json.dump(data, json_file_in_hdfs)
    # OR
    client.write(os.path.join('/', 'test', 'sample_file2.json'),
                 data=json.dumps(data),
                 encoding='utf-8')

    # download file from HDFS
    client.download('/test/sample_file.json', './file_from_hadoop.json')

    # upload file to HDFS
    client.upload('/test/local_file_in_hadoop.json', './file_from_hadoop.json')
Example #6
0
 def upload_file(self):
     ip_address = self.ip_input.toPlainText()
     port_number = self.port_input.toPlainText()
     user_name = self.user_input.toPlainText()
     upload_file = self.dir_input.toPlainText()
     host_address = 'http://'+ip_address + ':' + port_number
     hadoop = InsecureClient(host_address,user_name)
     hadoop.upload('',upload_file)
Example #7
0
def put_in_hdfs(hdfs_path, local_path):
    print('uploading...')
    client = InsecureClient('http://quickstart.cloudera:50070', user='******')
    client.upload(hdfs_path=hdfs_path,
                  local_path=local_path,
                  progress=lambda x, y: print(x, y),
                  overwrite=True,
                  temp_dir='/tmp/{}'.format(local_path))
    print('done!')
Example #8
0
 def upload_file(self):
     ip_address = self.ip_input.toPlainText()
     port_number = self.port_input.toPlainText()
     user_name = self.user_input.toPlainText()
     file_name = self.File_directory_display.toPlainText()
     dir_name = self.dir_input.toPlainText()
     host_address = 'http://' + ip_address + ':' + port_number
     hadoop = InsecureClient(host_address, user_name)
     hadoop.upload(dir_name, file_name)
Example #9
0
class StorageClient(object):
    
    """Represent a storage client that supports saving results to HDFS

    Methods
    -------
    save(self, file_name, result)
        Save results permanently to persistent storage.

    """

    def __init__(self, namenode_url, username, submission_id, camera_id):
        """Initialize an internal client

        This constructor initializes an HDFS client.

        """

        self._internal_client = InsecureClient(namenode_url, user='******', root='/'.join(['/users', username, str(submission_id), str(camera_id)]))
        
    def save(self, file_name, result):
        """Save results permanently to persistent storage.

        This method saves results permanently to persistent storage so that they
        can be retrieved by the user later. This method currently accepts results as
        numpy.ndarray. If an instance with any other type is passed, the method
        will save the string representation of the instance. This enables the method
        to save strings, integers, and other primitive data types.

        Parameters
        ----------
        file_name : str
            The file name to be used to save the results.
        result : object
            The results to be saved. The `result` can be numpy.ndarray.
            If an instance with any other type is passed, the method will
            save the string representation of the instance. This enables the
            method to save strings, integers, and other primitive data types.

        """

        # Make sure the file name is legit
        file_name = file_name.replace('/', '.')
        # If the result is an OpenCV image, save it as an image.
        if (isinstance(result, numpy.ndarray)):
            # Create temp files
            temp_directory = tempfile.mkdtemp()
            temp_image_path = os.path.join(temp_directory, file_name)
            cv2.imwrite(temp_image_path, result)
            self._internal_client.upload(file_name, temp_image_path, overwrite=True)
            # Remove temp files
            os.remove(temp_image_path)
            os.rmdir(temp_directory)
        # Else, save the string representation of the object in a text file.
        else:
            self._internal_client.write(file_name, str(result), overwrite=True)
Example #10
0
    def load_data_to_hdfs():
        try:
            file = '/home/student/Project/Dataset.csv'

        except:
            print("File not found")

        hdfsclient = InsecureClient("http://localhost:50070", user="******")
        hdfs_path = "/"
        hdfsclient.upload(hdfs_path, file)  #Dumping file into the hadoop
Example #11
0
class HDFSStorage(Storage):
    def __init__(self, bucket_name: str, folder_name: str):
        super().__init__(bucket_name, folder_name)
        self.client = InsecureClient(url=settings.HDFS_CONN,
                                     user=settings.HDFS_USERNAME)

    def setup(self) -> HDFSResource:
        super().setup()

        self.client.makedirs(f"{self.bucket_name}/{self.folder_name}")

        return HDFSResource(
            resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/")

    def put_file(self,
                 file_path: Union[str, Path],
                 rename: Optional[str] = None) -> HDFSResource:
        if isinstance(file_path, Path):
            file_path = str(file_path)

        file_name = Path(file_path).name if not rename else rename

        # copy file to task directory
        if not file_path.startswith(str(self.local_dir)):
            file_path = shutil.copy(file_path, Path(self.local_dir, file_name))

        try:
            self.client.upload(
                f"{self.bucket_name}/{self.folder_name}/{file_name}",
                file_path)
        except (gaierror, NewConnectionError):
            raise

        return HDFSResource(
            resource=f"hdfs:/{self.bucket_name}/{self.folder_name}/{file_name}"
        )

    def get_file(self, data_file: str) -> str:
        if not data_file.startswith("hdfs:"):
            raise NotValidScheme(
                "Object file prefix is invalid: expected `hdfs:`")

        _, bucket_name, folder_name, file_name = data_file.split("/")
        file_path = Path(self.temp_dir, bucket_name, folder_name, file_name)

        if not file_path.is_file():
            try:
                self.client.download(data_file, file_path)
            except Exception as err:
                print(err)

        return str(file_path)

    def remove_remote_dir(self, omit_files: List[str] = None) -> None:
        pass
Example #12
0
def load_enedis():

    client = InsecureClient('http://localhost:50070', user='******')
    client.makedirs('data')
    print(client.list('/user/cloudera'))

    # load 10 lignes
    client.upload(
        '/user/cloudera/data',
        '/home/fitec/projet_fil_rouge/source_des_données/data/consommation_elec_regions_2019_l10.json',
        overwrite=True)
Example #13
0
def handleHdfsUpload(file_path, proj_id, task_id):
    try:
        client = InsecureClient("http://hdfs.neurolearn.com:50070",
                                user="******")
        hdfs_path = "/neurolearn/files/" + proj_id + "/results/" + task_id
        client.makedirs(hdfs_path)
        client.upload(hdfs_path, file_path)
        print('Uploaded Images to HDFS.')
    except Exception as e:
        print(e)
        hdfs_path = ''
    return hdfs_path
Example #14
0
class SavedModelUploader(object):
    """upload a saved model to hadoop file system"""
    def __init__(self, url, user, base_path=""):
        self._logger = logging.getLogger(self.__class__.__name__)
        self._url = url
        self._user_ = user
        self._base_path = base_path
        self._client = InsecureClient(url, user)

        if not self._exist(base_path):
            self._mkdir(base_path)

    def _exist(self, path):
        if self._client.content(path, strict=False):
            return True
        else:
            return False

    def _mkdir(self, path):
        self._client.makedirs(path)

    def _del(self, path):
        self._client.delete(path, recursive=True)

    def _upload(self, local_path, hdfs_path):
        self._client.upload(hdfs_path, local_path)

    def _logging_progress(self, local_path, nbytes):
        msg = None
        if nbytes > 0:
            msg = "uploading: '{}' [{} bytes]".format(local_path, nbytes)
        else:
            msg = "uploading: '{}' [done]".format(local_path)
        self._logger.info(msg)

    def upload(self, local_model_path, overwrite=False):
        hdfs_model_path = self._base_path + '/' + basename(local_model_path)

        existed = self._exist(hdfs_model_path)
        if overwrite and existed:
            self._del(hdfs_model_path)
        elif not overwrite and existed:
            raise RuntimeError(
                "could not overwrite the model, already existed.")

        try:
            self._client.upload(self._base_path,
                                local_model_path,
                                progress=self._logging_progress)
        except HdfsError as e:
            self._logger.error(e)

        self._logger.info("model upload done")
def uploadHDFS(filename):
    aoi_file = filename
    client = InsecureClient('http://10.41.158.65:50070', user='******')
    fname1 = client.list(hdfs_path)
    dt = str(aoi_file.split("@")[1].split("_")[0][:8])
    folder1 = dt
    if folder1 in fname1:
        client.upload(hdfs_path+folder1+"/"+aoi_file,upload_path+aoi_file,overwrite=True)
        shutil.move(upload_path + aoi_file, backup_path + aoi_file)
    else:
        client.makedirs(hdfs_path+folder1)
        client.upload(hdfs_path+folder1+"/"+aoi_file,upload_path+aoi_file,overwrite=True)
        shutil.move(upload_path + aoi_file, backup_path + aoi_file)
Example #16
0
class DataProcessor:
    def __init__(self, data_path=None):
        if data_path == None:
            self.data_path = r'./config/connect_info.json'
        else:
            assert type(data_path) == str
            self.data_path = data_path
        if not os.path.exists(self.data_path):
            self.data_path = r'./connect_info.json'

        with open(self.data_path) as data_file:
            data = json.load(data_file)
            print("Data: ", data)
            self.hdfs_client = InsecureClient(
                url='http://' + data['namenode_url'] + ':' + str(data['port']),
                user=data['user'],
                root=data['root_path'])
            print("hdfs client: ", self.hdfs_client)
            self.img_dir = data['img_dir']
            print("img dir: ", self.img_dir)

        if self.img_dir[-1] != '/':
            self.img_dir += '/'
        else:
            pass

        self.file_name = 1

    def InitImgDir(self):
        try:
            list_rslt = self.hdfs_client.list(self.img_dir)
            if len(list_rslt) > 0:
                for name in list_rslt:
                    file_path = self.img_dir + name
                    self.hdfs_client.delete(file_path)

        except util.HdfsError:
            self.hdfs_client.makedirs(self.img_dir)

        return True

    def Upload(self, file_path, threads=2):
        print("FilePath: ", file_path)
        print("img_dir: ", self.img_dir[:-1])
        self.hdfs_client.upload(hdfs_path=self.img_dir[:-1],
                                local_path=file_path,
                                n_threads=threads,
                                overwrite=True)
        return 0
Example #17
0
class HDFSService(object):
    def __init__(self):
        self.hdfs = InsecureClient('http://127.0.0.1:9870', user='******')
        self.base_path = '/users/root'

    def mkdir(self, path):
        return self.hdfs.makedirs(path)

    def list(self, path):
        try:
            return self.hdfs.list(path)
        except HdfsError as e:
            print(e)
            return []

    def get(self, path):
        pass

    def upload(self, path, local_path=None, data=None):
        path = self.base_path + path
        if data is not None:
            return self.hdfs.write(path, data=data)
        elif local_path is not None:
            return self.hdfs.upload(path, local_path)
        return False
        pass

    def download(self, path):
        path = self.base_path + path
        with self.hdfs.read(path) as reader:
            print(path)
            buf = reader.read()
        print(len(buf))
        return buf
    def upload_directory(self, directory_path, archive_directory_data):
        '''Untars the archive_directory_data provided as input,
        and uploads all the contents of the tar to the directory path
        specified on HDFS.
        '''
        logger.log_info("Uploading the directory to HDFS")
        web_hdfs_url = Environment().get_web_hdfs_url()
        hdfs_file_base_url = Environment().get_hdfs_file_base_url()
        session = SwSessionManager().get_session()
        user_name = session.get_username()
        client = InsecureClient(web_hdfs_url, user_name)
        directory_name_with_path = "/" + directory_path
        directory_name = os.path.split(directory_path)[1]
        try:
            with tempfile.TemporaryDirectory() as temp:
                local_dir_path = temp + "/" + directory_name + ".tar.gz"
                with open(local_dir_path, "wb") as dir_archive:
                    dir_archive.write(archive_directory_data)
                with tarfile.open(local_dir_path, "r:gz") as tar:
                    tar.extractall(temp)
                os.remove(local_dir_path)
                response = client.upload(hdfs_path=directory_name_with_path,
                                         local_path=temp)
                logger.log_info(
                    "Successfully uploaded the directory {0} to HDFS".format(
                        response))
            return hdfs_file_base_url + directory_name_with_path

        except Exception as e:
            raise ServiceError(
                "Uploading the directory to HDFS failed with the error: {0}".
                format(str(e)))
Example #19
0
class Storage:
    def __init__(self, protocol: str = 'webHDFS', *args, **kwargs):
        self.protocol, self.client = protocol.lower(), None
        if protocol.lower() == 'webHDFS'.lower():
            from hdfs import InsecureClient
            self.client = InsecureClient(*args, **kwargs)
            for f in 'upload download list status delete'.split():
                setattr(self, f, getattr(self,
                                         '%s_%s' % (f, protocol.lower())))

    def upload_webhdfs(self, local_path: str, remote_path: str, **kwargs):
        to_screen("upload %s -> %s" % (local_path, remote_path))
        return self.client.upload(local_path=local_path,
                                  hdfs_path=remote_path,
                                  **kwargs)

    def download_webhdfs(self, remote_path: str, local_path: str, **kwargs):
        mkdir_for(local_path)
        to_screen("download %s -> %s" % (remote_path, local_path))
        return self.client.download(local_path=local_path,
                                    hdfs_path=remote_path,
                                    overwrite=True,
                                    **kwargs)

    def list_webhdfs(self, remote_path: str, **kwargs):
        return self.client.list(hdfs_path=remote_path, **kwargs)

    def status_webhdfs(self, remote_path: str, **kwargs):
        return self.client.status(hdfs_path=remote_path, **kwargs)

    def delete_webhdfs(self, remote_path: str, **kwargs):
        return self.client.delete(hdfs_path=remote_path, **kwargs)
Example #20
0
def handle_uploaded_file(f, data_id, proj_id):
    file_name = str(f.name)
    with open(file_name, 'wb+') as destination:
        for chunk in f.chunks():
            destination.write(chunk)
    data_content = pd.read_csv(file_name, encoding='utf-8')
    data_json = data_content.to_json()

    try:
        client = InsecureClient("http://hdfs.neurolearn.com:50070",
                                user="******")
        hdfs_path = "/neurolearn/files/" + proj_id + "/datasets/" + data_id
        client.makedirs(hdfs_path)
        client.upload(hdfs_path, file_name)
    except:
        hdfs_path = ''

    return data_json, hdfs_path
Example #21
0
def move_files(tbl):
    from hdfs import InsecureClient
    client = InsecureClient('http://172.16.4.144:50070')
    client.upload("/user/root/greenplum/scripts/" + tbl,
                  "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_FL.hive")
    client.upload("/user/root/greenplum/scripts/" + tbl,
                  "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_IL.hive")
    client.upload("/user/root/greenplum/scripts/" + tbl,
                  "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_FL.sh")
    client.upload("/user/root/greenplum/scripts/" + tbl,
                  "/root/srilatha/attunity_poc/gp_scripts/" + tbl + "_IL.sh")
Example #22
0
    def upload_img_to_hdfs(self, upload_file_path):
        print "test", upload_file_path
        try:
            client = InsecureClient(self.HDFS_ADDR, user='******')
            client.upload(self.IMG_FOLDER,
                          upload_file_path,
                          overwrite=False,
                          n_threads=0,
                          temp_dir=None,
                          chunk_size=65536,
                          progress=None,
                          cleanup=True)
        except NameError as n:
            print(n)
            return {'error_msg': 'HDFS 上傳失敗'}

        # 顯示圖片上傳hdfs的路徑
        _ = upload_file_path.split('/')
        path_for_show = self.IMG_FOLDER + "/" + _[len(_) - 1]
        return {'success': "上傳成功,hdfs路徑:" + path_for_show}
Example #23
0
    def putFile(server, source, destination):
        """ Uploads a file to HDFS.
        
            Args:
                server: hdfs server and port, example: "http://hadoop1:50070".
                source: localpath of file to upload. 
                destination: remote file, inclusing path.
        """
        from hdfs import InsecureClient

        client = InsecureClient(server)
        return client.upload(hdfs_path=destination, local_path=source)
Example #24
0
def increment_load(tables, cur):
    for table in tables:
        tableName = table
        ts = datetime.datetime.now().strftime('_%Y%m%d_%H%M%S')
        query = "COPY (SELECT * FROM " + tableName + " where LastModifiedDate>(select run_time from control_table where table_name='" + tableName + "')) TO '/tmp/" + tableName + "_CDC" + ts + ".csv'"
        cur.execute(query)
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect('172.16.6.89', username='******', password='******')
        ftp = ssh.open_sftp()
        ftp.get("/tmp/" + tableName + "_CDC" + ts + ".csv",
                "Gp/" + tableName + "_CDC" + ts + ".csv")
        ftp.close()
        #Connect To hadoop
        client = InsecureClient('http://172.16.4.144:50070', user='******')
        client.makedirs("/user/root/greenplum/source/" + tableName + "__ct",
                        "0777")
        client.upload(
            "/user/root/greenplum/source/" + tableName + "__ct/",
            "F:/Srilatha/Attunity-POC/Greenplum/Gp/" + tableName + "_CDC" +
            ts + ".csv")
    def on_data(self, data):
        try:
            if self.count <= 10000000:
                with open(self.outfile, 'a+') as f:
                    f.write(data)
                self.count += len(data)
                return True
            else:
                hdfs_path = '/team40/stream_data/' + time.strftime(
                    '%Y-%m-%d_%H-%M', time.localtime()) + self.outfile
                client = InsecureClient('http://115.146.86.32:50070',
                                        user='******')
                client.upload(hdfs_path, self.outfile)
                print(client.status(hdfs_path, strict=False))
                self.count = 0
                with open(self.outfile, 'w') as f:
                    f.write(data)
                self.count += len(data)
                return True

        except BaseException as e:
            print("Error on_data: %s" % str(e))
        return True
    def post(self):
        gen_log.info(self.request.headers)
        gen_log.info(self.request.body)
        #data = self.get_all_request_arguments()
        job_id = self.get_request_argument('jid', None)
        gen_log.info(job_id)
        if job_id and utils.is_object_id(job_id):
            job = yield self.db.jobs.find_one({"_id": ObjectId(job_id)})
            from hdfs import InsecureClient
            hdfs_client = InsecureClient("http://169.24.2.194:50070",
                                         user='******')
            content = hdfs_client.list("/tmp")
            gen_log.info(content)
            content = hdfs_client.list("/tmp")
            gen_log.info(content)

            work_dir = os.path.join(UPLOAD_DIR, job.get('uuid', None))
            data_dir = os.path.join(work_dir, "data")
            model_dir = os.path.join(work_dir, "model")

            # 判断数据文件目录是否存在
            if not os.path.exists(data_dir):
                self.write_json("数据文件未上传,请上传数据文件", code=1)
                return

            # 判断模型文件目录是否存在
            if not os.path.exists(model_dir):
                self.write_json("模型文件未上传,请上传模型文件", code=1)
                return

            #开始上传数据文件
            remote_hdfs_data_dir, local_data_dir = job.get('input',
                                                           "").split("#")
            hdfs_client.upload(remote_hdfs_data_dir, data_dir, overwrite=True)

            # 切换到model目录
            os.chdir(model_dir)
Example #27
0
def upload_to_hdfs(input_dir, output_dir, chunk_size):
    # locate files in directory
    files = [
        os.path.abspath("{}/{}".format(input_dir, f))
        for f in listdir(input_dir) if isfile(join(input_dir, f))
    ]
    tmp_dir = "{}/tmp".format(input_dir)

    # setup temp dir
    if os.path.isdir(tmp_dir):
        shutil.rmtree(tmp_dir)
    os.mkdir(tmp_dir)

    # split files into 128mb chunks
    for f in files:
        fs = FileSplit(file=f,
                       splitsize=(chunk_size) * 1e6,
                       output_dir=tmp_dir)
        fs.split(callback=split_callback)

    # upload to hdfs
    hdfs_client = InsecureClient("http://{}:9870".format(
        settings.HDFS_HOST_VALUE),
                                 user=settings.HDFS_USER_VALUE)

    # delete existing output dir
    if hdfs_client.content(output_dir, strict=False) != None:
        hdfs_client.delete(output_dir, recursive=True)

    # upload files to tmp dir
    remote_path = hdfs_client.upload(hdfs_path="/tmp",
                                     local_path=tmp_dir,
                                     n_threads=-1,
                                     overwrite=True)

    # rename to output_dir
    hdfs_client.rename("/tmp", output_dir)

    print(
        "{} files uploaded to hdfs host '{}{}'  ({} file chunks total)".format(
            len(files),
            settings.HDFS_HOST_VALUE,
            output_dir,
            len(split_files),
        ))
    # delete temp files
    shutil.rmtree(tmp_dir)

    return hdfs_file_paths
Example #28
0
def full_load(tables, cur):
    for table in tables:
        tableName = table
        ts = datetime.datetime.now().strftime('_%Y%m%d_%H%M%S')
        query = "COPY (SELECT * FROM " + tableName + ") TO '/tmp/" + tableName + "_FL" + ts + ".csv'"
        cur.execute(query)
        ssh = paramiko.SSHClient()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        ssh.connect('172.16.6.89', username='******', password='******')
        ftp = ssh.open_sftp()
        ftp.get("/tmp/" + tableName + "_FL" + ts + ".csv",
                "Gp/" + tableName + "_FL" + ts + ".csv")
        ftp.close()
        #Connect To hadoop
        client = InsecureClient('http://172.16.4.144:50070', user='******')
        client.delete("/user/root/greenplum/source/" + tableName, True)
        client.makedirs("/user/root/greenplum/source/" + tableName, "0777")
        client.upload(
            "/user/root/greenplum/source/" + tableName + "/",
            "F:/Srilatha/Attunity-POC/Greenplum/Gp/" + tableName + "_FL" + ts +
            ".csv")
        sql = "INSERT INTO control_table(table_name) VALUES(%s);"
        cur.execute(sql, (tableName, ))
        connection.commit()
Example #29
0
    def get(self):
        
        # Récupération du Dataset pour l'évaluation
        df = get_data_cassandra()
        
        print(df.head())
        X = df['total_estimated_load'].values

        # evaluate parameters (p,d,q)  <=> (AR, I, MA)
        p_values = 7
        d_values = 0
        q_values = 5
        #best_cfg, best_score = evaluate_models(X, p_values, d_values, q_values)
        best_cfg = (p_values,d_values,q_values)
        
        # Entrainement du meilleur modèle
        model = ARIMA(X, order=best_cfg)
        model_fit = model.fit()
        
        # save model
        if not os.path.exists(model_local_path):
               # Création du dossier d'export local qui n'existe pas
               os.makedirs(model_local_path,exist_ok=False)
        
        model_fit.save(model_local_path + model_name)
            
        # Connexion au client HDFS
        client = InsecureClient(url='http://namenode:9870', user='******')
    
        # Création du dossier de stockage des fichiers traités
        if client.status(model_hdfs_remote_path,strict=False) == None:
                client.makedirs(model_hdfs_remote_path)

	# Copie du modèle sur HDFS
        remote_load_path = client.upload(model_hdfs_remote_path, model_local_path + model_name,overwrite=True)
        #print(remote_load_path)

        print(client.list(model_hdfs_remote_path))

	
        return { 'best_cfg': best_cfg , 'status': 'Terminated'}
Example #30
0
def find_labels(image_path, image_name, stub, request, model, n):
    """
    Args:
        image_path: path dell'immagine in input
        image_name: nome dell'immagine ottenuto con la funzione time di python
        stub: viene utilizzato per la comunicazione client-server
        request: richiesta da inviare al server
        model: nome del modello di object detection, puo' essere pet model o people model
        n: numero massimo delle labels che si vogliono considerare
    """
    labels = []  # vettore con le labels del dataset specifico
    bbx = []  # vettore con le coordinate dei bounding box trovati
    request.model_spec.name = model
    result = stub.Predict(
        request,
        10.0)  # risultati della richiesta di prediction, 10 secs timeout
    classes = result.outputs[
        'detection_classes'].float_val  # id delle classi trovate, in ordine dalla classe con score piu' alto
    scores = result.outputs[
        'detection_scores'].float_val  # score delle classi,dallo score piu' alto
    #print zip(classes, scores)
    boxes = result.outputs[
        'detection_boxes'].float_val  # posizione dei bounding box
    # trasformo il vettore in modo che ogni elemento sia una quadrupla che identifica il bounding box
    boxes = np.reshape(boxes, [100, 4])

    # per salvare l'immagine con i bounding box, dobbiamo aprire l'immagine e sfruttare la libreria vis_util di tensorflow
    im = imageio.imread(
        image_path)  # legge l'immagine come un array multidimensionale
    if (model == "pets_model"):
        label_map_path = "Label_maps/pets_label_map.pbtxt"  # mappa delle label
        label_map = label_map_util.load_labelmap(label_map_path)
        categories = label_map_util.convert_label_map_to_categories(
            label_map=label_map, max_num_classes=37)
    else:
        label_map_path = "Label_maps/people_label_map.pbtxt"
        label_map = label_map_util.load_labelmap(label_map_path)
        categories = label_map_util.convert_label_map_to_categories(
            label_map=label_map, max_num_classes=2)
    category_index = label_map_util.create_category_index(
        categories)  # dizionario coppie chiave ("id"), valore ("nome classe")

    # viene creato un array (img_height, img_width, 3) con i bounding box sovrapposti
    image_vis = vis_util.visualize_boxes_and_labels_on_image_array(
        im,
        boxes,
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        max_boxes_to_draw=10,  # num max di bounding box da visualizzare
        min_score_thresh=.6,  # soglia minima dei bounding box da visualizzare
        use_normalized_coordinates=True,
        line_thickness=5)  # larghezza linea del contorno dei box

    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    port_result = sock.connect_ex(('localhost', 50070))
    client_hdfs = InsecureClient(
        'http://localhost:50070')  # client per accedere al HDFS
    if (model == "pets_model"):
        imageio.imwrite(
            "Images_bbx/{}_pets.jpg".format(image_name),
            image_vis)  # salva l'array in locale come un'immagine JPEG
        if port_result == 0:  # se l'HDFS e' connesso, vi sposto l'immagine
            client_hdfs.upload(
                '/zora-object-detection/images/{}_pets.jpg'.format(image_name),
                'Images_bbx/{}_pets.jpg'.format(image_name))
            os.remove("Images_bbx/{}_pets.jpg".format(image_name))
    else:
        imageio.imwrite("Images_bbx/{}_people.jpg".format(image_name),
                        image_vis)
        if port_result == 0:
            client_hdfs.upload(
                '/zora-object-detection/images/{}_people.jpg'.format(
                    image_name), 'Images_bbx/{}_people.jpg'.format(image_name))
            os.remove("Images_bbx/{}_people.jpg".format(image_name))

    # inseriamo le labels trovate nella detection in un vettore da passare allo script obj_detection per formare la stringa
    # da far pronunciare al robot. Le coordinate del bounding box invece verranno salvate nel file log dell'HDFS.
    boxes = boxes.tolist()  # trasforma l'array multidimensionale in una lista
    for i in range(0, n):
        # considero solo le labels con uno score >= 0.6 ed escludo quelle che identificano un bounding box gia' inserito
        # con uno score piu' alto
        if (scores[i] >= 0.6 and boxes[i] not in bbx):
            bbx.append(boxes[i])
            labels.append(str(category_index[int(classes[i])]['name']))

    return labels, bbx