Esempio n. 1
0
    def upload_file(fname, fpath):
        daemonname = fname.split(".")[0]
        i = 0
        fail_msg = ""

        while i <= MAX_RETRIES:
            try:
                svc = FileService(account_name=acctname, account_key=acctkey)

                l = [sonicversion, asicname, daemonname, hostname]
                e = []
                while len(e) != len(l):
                    e.append(l[len(e)])
                    svc.create_directory(sharename, "/".join(e))

                log_debug("Remote dir created: " + "/".join(e))

                svc.create_file_from_path(sharename, "/".join(l), fname, fpath)
                log_debug("Remote file created: name{} path{}".format(
                    fname, fpath))
                break

            except Exception as e:
                log_err("core uploader failed: Failed during upload (" +
                        str(e) + ")")
                fail_msg = str(e)
                i += 1
                if i >= MAX_RETRIES:
                    raise Exception("Failed while uploading. msg(" + fail_msg +
                                    ") after " + str(i) + " retries")
                time.sleep(PAUSE_ON_FAIL)
Esempio n. 2
0
    def upload_file(fname, fpath, coref):
        daemonname = fname.split(".")[0]
        i = 0
        fail_msg = ""

        while True:
            try:
                svc = FileService(account_name=acctname, account_key=acctkey)

                l = [sonicversion, asicname, daemonname, hostname]
                e = []
                while len(e) != len(l):
                    e.append(l[len(e)])
                    svc.create_directory(sharename, "/".join(e))

                log_debug("Remote dir created: " + "/".join(e))

                svc.create_file_from_path(sharename, "/".join(l), fname, fpath)
                log_debug("Remote file created: name{} path{}".format(
                    fname, fpath))
                newcoref = os.path.dirname(
                    coref) + "/" + UPLOAD_PREFIX + os.path.basename(coref)
                os.rename(coref, newcoref)
                break

            except Exception as ex:
                log_err("core uploader failed: Failed during upload (" +
                        coref + ") err: (" + str(ex) + ") retry:" + str(i))
                if not os.path.exists(fpath):
                    break
                i += 1
                time.sleep(PAUSE_ON_FAIL)
Esempio n. 3
0
def file_storage_connect():
    global file_service
    global file_storage_dir
    global file_storage_share
    global overwrite_remote_files
    file_storage_url = dbparameters['fs_server'].strip()
    file_storage_user = dbparameters['fs_username'].strip()
    file_storage_pwd = dbparameters['fs_password'].strip()
    file_storage_share = dbparameters['fs_share'].strip()
    file_storage_dir = dbparameters['fs_directory_prefix'].strip()
    overwrite_remote_files = dbparameters['overwrite_remote_files'].strip()

    file_service = FileService(account_name=file_storage_user,
                               account_key=file_storage_pwd)
    try:
        if file_service.exists(file_storage_share):
            print(
                'Connection to Azure file storage successfully established...')
            if len(file_storage_dir) > 0 and not file_service.exists(
                    file_storage_share, directory_name=file_storage_dir):
                subdirs = file_storage_dir.split('/')
                subdirfull = ""
                for subdir in subdirs:
                    subdirfull += subdir
                    file_service.create_directory(file_storage_share,
                                                  subdirfull)
                    subdirfull += "/"
                print('Created directory:' + file_storage_dir)
        else:
            print(
                'Filaed to connect to Asure file storage, share does not exist: '
                + file_storage_share)
    except Exception as ex:
        print('Error connecting to Azure file storage: ', ex)
def prepare_azure_file_share_service(config, dataset_directory='dataset_directory'):
    # Create a file share
    service = FileService(config.storage_account_name, config.storage_account_key)
    service.create_share(config.workspace_file_share, fail_on_exist=False)

    # Create a directory in the file share
    service.create_directory(config.workspace_file_share, dataset_directory, fail_on_exist=False)

    return service
Esempio n. 5
0
 def upload(path: str):
     from azure.storage.file import FileService
     service = FileService(account_name=config['account_name'],
                           account_key=config['account_key'])
     if shared_directory not in service.list_directories_and_files(
             config['share_name']):
         service.create_directory(config['share_name'], shared_directory)
     service.create_file_from_path(config['share_name'], shared_directory,
                                   path.split('/')[-1], path)
Esempio n. 6
0
def create_azure_fileshare(share_prefix, account_name, account_key):
    """
    Generate a unique share name to avoid overlaps in shared infra
    :param share_prefix:
    :param account_name:
    :param account_key:
    :return:
    """

    # FIXME - Need to remove hardcoded directoty link below

    d_dir = './WebInDeploy/bootstrap'
    share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4()))
    print('using share_name of: {}'.format(share_name))

    # archive_file_path = _create_archive_directory(files, share_prefix)

    try:
        # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this
        s = requests.Session()
        s.verify = False

        file_service = FileService(account_name=account_name,
                                   account_key=account_key,
                                   request_session=s)

        # print(file_service)
        if not file_service.exists(share_name):
            file_service.create_share(share_name)

        for d in ['config', 'content', 'software', 'license']:
            print('creating directory of type: {}'.format(d))
            if not file_service.exists(share_name, directory_name=d):
                file_service.create_directory(share_name, d)

            # FIXME - We only handle bootstrap files.  May need to handle other dirs

            if d == 'config':
                for filename in os.listdir(d_dir):
                    print('creating file: {0}'.format(filename))
                    file_service.create_file_from_path(
                        share_name, d, filename, os.path.join(d_dir, filename))

    except AttributeError as ae:
        # this can be returned on bad auth information
        print(ae)
        return "Authentication or other error creating bootstrap file_share in Azure"

    except AzureException as ahe:
        print(ahe)
        return str(ahe)
    except ValueError as ve:
        print(ve)
        return str(ve)

    print('all done')
    return share_name
Esempio n. 7
0
def upload_scripts(config, job_name, filenames):
    service = FileService(config.storage_account['name'],
                          config.storage_account['key'])
    service.create_directory(config.fileshare_name,
                             job_name,
                             fail_on_exist=False)
    trasfer_file = lambda fname: service.create_file_from_path(
        config.fileshare_name, job_name, os.path.basename(fname), fname)
    for filename in filenames:
        trasfer_file(filename)
class S3AzureFileBypass(BaseS3Bypass):
    """
    Bypass executed by default when data source is an S3 bucket and data destination
    is an Azure share.
    It should be transparent to user. Conditions are:

        - S3Reader and AzureFileWriter are used on configuration.
        - No filter modules are set up.
        - No transform module is set up.
        - No grouper module is set up.
        - AzureFileWriter has not a items_limit set in configuration.
        - AzureFileWriter has default items_per_buffer_write and size_per_buffer_write per default.
    """

    def __init__(self, config, metadata):
        super(S3AzureFileBypass, self).__init__(config, metadata)
        from azure.storage.file import FileService
        self.azure_service = FileService(
            self.read_option('writer', 'account_name'),
            self.read_option('writer', 'account_key'))
        self.share = self.read_option('writer', 'share')
        self.filebase_path = self._format_filebase_path(self.read_option('writer', 'filebase'))
        self._ensure_path(self.filebase_path)

    @classmethod
    def meets_conditions(cls, config):
        if not config.writer_options['name'].endswith('AzureFileWriter'):
            cls._log_skip_reason('Wrong reader configured')
            return False
        return super(S3AzureFileBypass, cls).meets_conditions(config)

    def _format_filebase_path(self, filebase):
        filebase_with_date = datetime.datetime.now().strftime(filebase)
        # warning: we strip file prefix here, could be unexpected
        filebase_path, prefix = os.path.split(filebase_with_date)
        return filebase_path

    def _ensure_path(self, filebase):
        path = filebase.split('/')
        folders_added = []
        for sub_path in path:
            folders_added.append(sub_path)
            parent = '/'.join(folders_added)
            self.azure_service.create_directory(self.share, parent)

    @retry_long
    def _copy_s3_key(self, key):
        file_name = key.name.split('/')[-1]
        self.azure_service.copy_file(
            self.share,
            self.filebase_path,
            file_name,
            key.generate_url(S3_URL_EXPIRES_IN)
        )
Esempio n. 9
0
def file():
    static_dir_path = "D:\home\site\wwwroot\static"
    static_file_dir_path = static_dir_path + '\\' + 'files'
    account_name = 'hanastragetest'
    account_key = 'acount_key'
    root_share_name = 'root'
    share_name = 'images'
    directory_url = 'https://hanastragetest.file.core.windows.net/' + root_share_name + '/' + share_name

    # create local save directory
    if os.path.exist(static_file_dir_path) is False:
        os.mkdir(static_file_dir_path)

    file_service = FileService(account_name=account_name,
                               account_key=account_key)
    # create share
    file_service.create_share(root_share_name)

    # create directory
    file_service.create_directory(root_share_name, share_name)

    files = os.listdir(static_dir_path)
    for file in files:
        # delete
        if file_service.exists(root_share_name, share_name, file):
            file_service.delete_file(root_share_name, share_name, file)

        # file upload
        file_service.create_file_from_path(
            root_share_name,
            share_name,  # We want to create this blob in the root directory, so we specify None for the directory_name
            file,
            static_dir_path + '\\' + file,
            content_settings=ContentSettings(content_type='image/png'))

    generator = file_service.list_directories_and_files(
        root_share_name, share_name)

    html = ""
    for file in generator:
        # file download
        file_save_path = static_file_dir_path + '\\' + file
        file_service.get_file_to_path(root_share_name, share_name, file,
                                      file_save_path)
        html = "{}<img src='{}'>".format(html, file_save_path)

    result = {
        "result": True,
        "data": {
            "file_or_dir_name":
            [file_or_dir.name for file_or_dir in generator]
        }
    }
    return make_response(json.dumps(result, ensure_ascii=False) + html)
Esempio n. 10
0
class S3AzureFileBypass(BaseS3Bypass):
    """
    Bypass executed by default when data source is an S3 bucket and data destination
    is an Azure share.
    It should be transparent to user. Conditions are:

        - S3Reader and AzureFileWriter are used on configuration.
        - No filter modules are set up.
        - No transform module is set up.
        - No grouper module is set up.
        - AzureFileWriter has not a items_limit set in configuration.
        - AzureFileWriter has default items_per_buffer_write and size_per_buffer_write per default.
        - AzureFileWriter has default write_buffer.
    """
    def __init__(self, config, metadata):
        super(S3AzureFileBypass, self).__init__(config, metadata)
        from azure.storage.file import FileService
        self.azure_service = FileService(
            self.read_option('writer', 'account_name'),
            self.read_option('writer', 'account_key'))
        self.share = self.read_option('writer', 'share')
        self.filebase_path = self._format_filebase_path(
            self.read_option('writer', 'filebase'))
        self._ensure_path(self.filebase_path)

    @classmethod
    def meets_conditions(cls, config):
        if not config.writer_options['name'].endswith('AzureFileWriter'):
            cls._log_skip_reason('Wrong reader configured')
            return False
        return super(S3AzureFileBypass, cls).meets_conditions(config)

    def _format_filebase_path(self, filebase):
        filebase_with_date = datetime.datetime.now().strftime(filebase)
        # warning: we strip file prefix here, could be unexpected
        filebase_path, prefix = os.path.split(filebase_with_date)
        return filebase_path

    def _ensure_path(self, filebase):
        path = filebase.split('/')
        folders_added = []
        for sub_path in path:
            folders_added.append(sub_path)
            parent = '/'.join(folders_added)
            self.azure_service.create_directory(self.share, parent)

    @retry_long
    def _copy_s3_key(self, key):
        file_name = key.name.split('/')[-1]
        self.azure_service.copy_file(self.share, self.filebase_path, file_name,
                                     key.generate_url(S3_URL_EXPIRES_IN))
Esempio n. 11
0
 def create_share_name(self, remote_folder):
     parse_url = _parse_url(remote_folder)
     key = self.storage_client.storage_accounts.list_keys(
         self.resource_group_name, parse_url.account).keys[0].value
     fs = FileService(account_name=parse_url.account, account_key=key)
     return fs.create_directory(
         share_name=parse_url.container_or_share_name,
         directory_name=parse_url.path)
Esempio n. 12
0
def create_azure_fileshare(files, share_prefix, account_name, account_key):
    # generate a unique share name to avoid overlaps in shared infra
    share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4()))
    print('using share_name of: {}'.format(share_name))

    archive_file_path = _create_archive_directory(files, share_prefix)

    try:
        # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this
        s = requests.Session()
        s.verify = False

        file_service = FileService(account_name=account_name,
                                   account_key=account_key,
                                   request_session=s)

        # print(file_service)
        if not file_service.exists(share_name):
            file_service.create_share(share_name)

        for d in ['config', 'content', 'software', 'license']:
            print('creating directory of type: {}'.format(d))
            if not file_service.exists(share_name, directory_name=d):
                file_service.create_directory(share_name, d)

            d_dir = os.path.join(archive_file_path, d)
            for filename in os.listdir(d_dir):
                print('creating file: {0}'.format(filename))
                file_service.create_file_from_path(
                    share_name, d, filename, os.path.join(d_dir, filename))

    except AttributeError as ae:
        # this can be returned on bad auth information
        print(ae)
        return "Authentication or other error creating bootstrap file_share in Azure"

    except AzureException as ahe:
        print(ahe)
        return str(ahe)
    except ValueError as ve:
        print(ve)
        return str(ve)

    print('all done')
    return 'Azure file-share {} created successfully'.format(share_name)
Esempio n. 13
0
def saveModel(customer, modelName, model, storage_account_name,
              storage_account_key):
    fileService = FileService(account_name=storage_account_name,
                              account_key=storage_account_key)
    if not fileService.exists('trainedmodels', customer):
        fileService.create_share('trainedmodels')
        fileService.create_directory('trainedmodels', customer)

    if not fileService.exists('trainedmodels', customer + '/' + modelName):
        fileService.create_directory('trainedmodels',
                                     customer + '/' + modelName)

    modelPickle = pickle.dumps(model)
    timestr = time.strftime('%Y%m%d-%H%M%S')
    fileName = modelName + '_' + timestr + '.pkl'
    fileService.create_file_from_bytes('trainedmodels',
                                       customer + '/' + modelName, fileName,
                                       modelPickle)
    print(fileName + ' saved.')
Esempio n. 14
0
    def upload_to_share(self, region, resource_group_name,
                        storage_account_name, share_name, dir_name,
                        tar_gz_file_to_upload):

        logging.info(
            "Uploading contents of '{}' to 'https://{}.file.core.windows.net/{}/{}'"
            .format(tar_gz_file_to_upload, storage_account_name, share_name,
                    dir_name))

        self.create_storage_account_if_not_exists(region, resource_group_name,
                                                  storage_account_name)
        storage_account_name, storage_key = self.get_storage_credentials(
            resource_group_name, storage_account_name)
        share_service = FileService(account_name=storage_account_name,
                                    account_key=storage_key)
        self.create_share_if_not_exists(share_service, share_name)
        share_service.create_directory(share_name, dir_name)
        self.upload_tar_gz_contents(share_service, share_name, dir_name,
                                    tar_gz_file_to_upload)

        return storage_account_name, storage_key
Esempio n. 15
0
    def upload_to_file_storage():
        #init file manager
        fnm = FilenameManager()

        # get a list of pdf files in dir_pdfs
        template = dir_upload + "**"
        if operating_system == 'mac' or operating_system == 'linux':
            template += '/*.pdf'
        elif operating_system == 'windows':
            template += '\\*.pdf'
        lpdfs = glob.glob(template, recursive=True)
        lpdfs.sort()
        #os.chdir(dir_pdfs) # needed for ftp.storbinary('STOR command work not with paths but with filenames
        # connect to FTP server and upload files
        try:
            file_storage_url = dparameters['fs_server'].strip()
            file_storage_user = dparameters['fs_username'].strip()
            file_storage_pwd = dparameters['fs_password'].strip()
            file_storage_share = dparameters['fs_share'].strip()
            file_storage_dir = dparameters['fs_directory_prefix'].strip()
            file_service = FileService(account_name=file_storage_user,
                                       account_key=file_storage_pwd)
            try:
                if file_service.exists(file_storage_share):
                    print(
                        'Connection to Azure file storage successfully established...'
                    )
                    if len(file_storage_dir) > 0 and not file_service.exists(
                            file_storage_share,
                            directory_name=file_storage_dir):
                        file_service.create_directory(file_storage_share,
                                                      file_storage_dir)
                        print('Created directory:' + file_storage_dir)
                else:
                    print(
                        'Failed to connect to Asure file storage, share does not exist: '
                        + file_storage_share)
            except Exception as ex:
                print('Error connecting to Azure file storage: ', ex)

            for pdffile in lpdfs:
                file_details = db.readFileStatus(file_original_name=pdffile,
                                                 file_status='Uploaded')
                if file_details is None:
                    file_id = None
                    file_details = db.readFileStatus(
                        file_original_name=pdffile, file_status='Classified')
                    if file_details is not None:
                        file_id = file_details["id"]
                    dir, rpdffile = ntpath.split(pdffile)

                    destinationdir = ''

                    if (dir + '\\') == dir_upload or (dir + '/') == dir_upload:
                        destinationdir = 'Unclassified'
                    else:
                        dir, year = ntpath.split(dir)
                        dir, destinationdir = ntpath.split(dir)

                    retries = 0
                    while retries < 3:
                        try:
                            path = pdffile
                            print('Uploading {}'.format(path))
                            filename = pdffile
                            remote_filename = fnm.azure_validate_filename(
                                rpdffile)
                            if not remote_filename:
                                return
                            if len(file_storage_dir) > 0:
                                directory = file_storage_dir + '/' + destinationdir
                            else:
                                directory = destinationdir
                            if not file_service.exists(
                                    file_storage_share,
                                    directory_name=directory):
                                file_service.create_directory(
                                    file_storage_share, directory)
                            directory += '/' + year
                            if not file_service.exists(
                                    file_storage_share,
                                    directory_name=directory):
                                file_service.create_directory(
                                    file_storage_share, directory)
                            print('Checking if {}/{} already exists'.format(
                                directory, remote_filename))
                            if file_service.exists(file_storage_share,
                                                   directory_name=directory,
                                                   file_name=remote_filename):
                                print('{}/{} already exists'.format(
                                    directory, remote_filename))
                                if file_id is None:
                                    db.saveFileStatus(
                                        script_name=script_name,
                                        file_original_name=pdffile,
                                        file_upload_path=directory,
                                        file_upload_name=remote_filename,
                                        file_status='Uploaded')
                                else:
                                    db.saveFileStatus(
                                        id=file_details["id"],
                                        file_upload_path=directory,
                                        file_upload_name=remote_filename,
                                        file_status='Uploaded')
                                os.remove(pdffile)
                                break
                            file_service.create_file_from_path(
                                file_storage_share,
                                directory,
                                remote_filename,
                                path,
                                content_settings=ContentSettings(
                                    content_type='application/pdf'))
                            if file_id is None:
                                db.saveFileStatus(
                                    script_name=script_name,
                                    file_original_name=pdffile,
                                    file_upload_path=directory,
                                    file_upload_name=remote_filename,
                                    file_status='Uploaded')
                            else:
                                db.saveFileStatus(
                                    id=file_details["id"],
                                    file_upload_path=directory,
                                    file_upload_name=remote_filename,
                                    file_status='Uploaded')
                            print('{}/{} uploaded'.format(
                                directory, remote_filename))
                            retries = 3
                            os.remove(pdffile)
                        except Exception as e:
                            print('Error uploading to Asure file storage,',
                                  str(e))
                            retries += 1
                else:
                    print('File {} was uploaded before'.format(
                        file_details["file_original_name"]))
                    os.remove(pdffile)
        except Exception as e:
            print(str(e))
            logging.critical(str(e))
Esempio n. 16
0
class Crawler:
    def __init__(self, config, section, script_name=None, error_message=None):
        self.script_name = script_name
        self.config = config
        self.db = DbCommunicator(config)
        self.error_message = error_message
        try:
            self.section = section
            self.dbparams = self.db.readProps('general')
            self.dbparams.update(self.db.readProps(section))
            self.downloads_path = self.get_property('downloads_path', section)
            self.overwrite_remote_files = self.get_property(
                'overwrite_remote_files', section, 'bool')
            if not os.path.exists(self.downloads_path):
                os.makedirs(self.downloads_path)
            elif not os.path.isdir(self.downloads_path):
                print(
                    'ERROR:{} downloads_path parameter points to file!'.format(
                        section))
                sys.exit(1)
            self.headless_mode = self.get_property('headless_mode', 'general',
                                                   'bool')
            if self.headless_mode:
                display = Display(visible=0, size=(1920, 1080))
                display.start()
            options = webdriver.ChromeOptions()
            options.add_argument("--no-sandbox")
            options.add_argument('--headless')
            prefs = {
                'download.default_directory': self.downloads_path,
                'download.prompt_for_download': False,
                'download.directory_upgrade': True,
                'plugins.always_open_pdf_externally': True,
            }
            options.add_experimental_option("prefs", prefs)
            self.browser = webdriver.Chrome(
                chrome_options=options,
                service_args=["--verbose", "--log-path=/tmp/selenium.log"])
            self.browser.implicitly_wait(10)
            self.browser.set_page_load_timeout(10000)
            self.browser.set_script_timeout(10000)
            # self.ftp_connect()
            self.file_storage_connect()
        except Exception as e:
            self.error_message = str(e)

    def get_property(self, prop, section, type='str'):
        if type == 'str':
            if self.dbparams is not None and prop in self.dbparams:
                return self.dbparams[prop]
            else:
                return self.config.get(section, prop).strip()
        elif type == 'bool':
            if self.dbparams is not None and prop in self.dbparams:
                return self.dbparams[prop] == 'True'
            else:
                return self.config.getboolean(section, prop, fallback=False)

    def file_storage_connect(self):
        self.file_storage_url = self.get_property('fs_server', 'general')
        self.file_storage_user = self.get_property('fs_username', 'general')
        self.file_storage_pwd = self.get_property('fs_password', 'general')
        self.file_storage_share = self.get_property('fs_share', 'general')
        self.file_storage_dir = self.get_property('fs_directory_prefix',
                                                  'general')
        self.file_service = FileService(account_name=self.file_storage_user,
                                        account_key=self.file_storage_pwd)
        try:
            if self.file_service.exists(self.file_storage_share):
                print(
                    'Connection to Azure file storage successfully established...'
                )
                if len(self.file_storage_dir
                       ) > 0 and not self.file_service.exists(
                           self.file_storage_share,
                           directory_name=self.file_storage_dir):
                    subdirs = self.file_storage_dir.split('/')
                    subdirfull = ""
                    for subdir in subdirs:
                        subdirfull += subdir
                        self.file_service.create_directory(
                            self.file_storage_share, subdirfull)
                        subdirfull += "/"
                    print('Created directory:' + self.file_storage_dir)
            else:
                print(
                    'Filaed to connect to Asure file storage, share does not exist: '
                    + self.file_storage_share)
        except Exception as ex:
            print('Error connecting to Azure file storage: ', ex)

    def ftp_connect(self):
        self.ftp = FTP()
        self.ftp.connect(
            self.config.get('general', 'ftp_server').strip(),
            int(self.config.get('general', 'ftp_port')),
        )
        self.ftp.login(
            user=self.config.get('general', 'ftp_username').strip(),
            passwd=self.config.get('general', 'ftp_password').strip(),
        )
        print('Connection to ftp successfully established...')

    def get(self, url):
        self.browser.get(url)
        time.sleep(3)

    def assert_exists(self, selector):
        _ = self.browser.find_element_by_css_selector(selector)

    def get_elements(self, selector, root=None):
        if root is None:
            root = self.browser
        return root.find_elements_by_css_selector(selector)

    def wait_for_displayed(self, selector):
        element = self.browser.find_element_by_css_selector(selector)
        while not element.is_displayed():
            pass

    def click_by_text(self, text):
        self.browser.find_element_by_link_text(text)
        time.sleep(3)

    def click_xpath(self, path, single=True):
        if single:
            self.browser.find_element_by_xpath(path).click()
        else:
            for el in self.browser.find_elements_by_xpath(path):
                el.click()
        time.sleep(3)

    def click(self, selector, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            root.find_element_by_css_selector(selector).click()
        else:
            for el in root.find_elements_by_css_selector(selector):
                el.click()
        time.sleep(3)

    def send_keys(self, selector, keys):
        elem = self.browser.find_element_by_css_selector(selector)
        elem.clear()
        elem.send_keys(keys)
        time.sleep(3)

    def open_new_tab(self):
        self.browser.execute_script("window.open('');")
        self.browser.switch_to.window(self.browser.window_handles[1])

    def close_current_tab(self):
        self.browser.close()
        self.browser.switch_to.window(self.browser.window_handles[-1])

    def get_text(self, selector, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            return root.find_element_by_css_selector(selector).text
        return [el.text for el in root.find_elements_by_css_selector(selector)]

    def get_attr(self, selector, attr, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            return root.find_element_by_css_selector(selector).get_attribute(
                attr)
        return [
            el.get_attribute(attr)
            for el in root.find_elements_by_css_selector(selector)
        ]

    def execute(self, script):
        self.browser.execute_script(script, [])
        time.sleep(3)

    def deselect_all(self, selector):
        select = Select(self.browser.find_element_by_css_selector(selector))
        select.deselect_all()
        time.sleep(3)

    def select_option(self, selector, option):
        select = Select(self.browser.find_element_by_css_selector(selector))
        select.select_by_visible_text(option)
        time.sleep(3)

    def select_option_by_index(self, selector, index):
        select = Select(self.browser.find_element_by_css_selector(selector))
        if index < len(select.options):
            select.select_by_index(index)
            time.sleep(3)
            return True
        return False

    def back(self):
        self.browser.back()
        time.sleep(3)

    def close_dialog(self):
        try:
            alert = self.browser.switch_to.alert
            alert.dismiss()
            # alert.accept()
        except Exception as e:
            pass

    def close(self):
        if hasattr(self, 'browser'):
            self.browser.quit()
        if hasattr(self, 'db'):
            self.db.close()
        # self.ftp.quit()

    def download(self, url, filename, file_db_id=None):
        # print('Downloading', filename, self._get_remote_filename(filename))
        # return
        downloaded = False
        if url.startswith('https'):
            ctx = ssl.create_default_context()
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
        else:
            ctx = None

        content_length = 1
        retry = 0
        file_size = 0
        file_name = ''
        while file_size != content_length and retry < 3:
            try:
                r = urllib.request.urlopen(url, context=ctx)
                content_length = r.length
                file_name = os.path.join(self.downloads_path, filename)
                with open(file_name, 'wb') as f:
                    f.write(r.read())
                    file_size = os.stat(file_name).st_size
                    retry += 1
                    # print('Attempt', retry, 'Downloaded', file_size, 'bytes of', content_length)
            except Exception as e:
                retry += 1
                print('Attempt', retry, 'ERROR: Downloading failed!', url,
                      str(e))
                try:
                    os.remove(file_name)
                except OSError:
                    pass
        if file_size == content_length:
            downloaded = True
            if file_db_id:
                self.db.saveFileStatus(id=file_db_id,
                                       script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='Downloaded')
            else:
                self.db.saveFileStatus(script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='Downloaded')
        else:
            if file_db_id:
                self.db.saveFileStatus(id=file_db_id,
                                       script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='None')
            else:
                self.db.saveFileStatus(script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='None')
        return downloaded

    def _get_remote_filename(self, local_filename):
        raise NotImplemented

    def merge_files(self, filenames):
        pdfline = '"' + '" "'.join(filenames) + '"'
        res_filename = '"' + filenames[0].split(' part')[0] + '.pdf"'
        command = 'pdftk ' + pdfline + ' cat output ' + res_filename
        os.system(command)
        return res_filename

    def upload_to_ftp(self, filename):
        self.upload_to_file_storage(filename)

    def upload_to_ftp_old(self, filename):
        retries = 0
        while retries < 3:
            try:
                path = os.path.join(self.downloads_path, filename)
                # print('Uploading {}'.format(path))
                pdf_file = open(path, 'rb')
                remote_filename = self._get_remote_filename(filename)
                if not remote_filename:
                    return
                directory, filename = remote_filename
                try:
                    self.ftp.cwd('/{}'.format(directory))
                except Exception:
                    self.ftp.mkd('/{}'.format(directory))
                    self.ftp.cwd('/{}'.format(directory))
                if not self.overwrite_remote_files:
                    # print('Checking if {}/{} already exists'.format(directory, filename))
                    try:
                        self.ftp.retrbinary('RETR {}'.format(filename),
                                            lambda x: x)
                        return
                    except error_perm:
                        pass

                self.ftp.storbinary('STOR {}'.format(filename), pdf_file)
                # print('{} uploaded'.format(path))
                pdf_file.close()
                retries = 3
            except Exception as e:
                print('Error uploading to ftp,', str(e))
                retries += 1
                try:
                    self.ftp.voidcmd("NOOP")
                except Exception as ex:
                    self.ftp_connect()

    def move_to_another(self, filename):
        try:
            entity_type = filename.split('|')[1]
            remote_filename = self._get_remote_filename(filename)
            if not remote_filename:
                return
            if (entity_type == 'County') or (entity_type == 'City') or \
                    (entity_type == 'Township') or (entity_type == 'Village'):
                return
            directory, server_filename = remote_filename
            self.ftp.rename('/General Purpose/{}'.format(server_filename),
                            '/{}/{}'.format(directory, server_filename))
            print('Moved {} to {}'.format(server_filename, directory))
        except Exception as e:
            print(str(e))

    def upload_to_file_storage(self, filename):
        fnm = FilenameManager()
        retries = 0
        while retries < 3:
            try:
                path = os.path.join(self.downloads_path, filename)
                file_details = self.db.readFileStatus(
                    file_original_name=filename, file_status='Uploaded')
                if file_details is not None:
                    print(
                        'File {} was already uploaded before'.format(filename))
                    return
                file_details = self.db.readFileStatus(
                    file_original_name=filename,
                    file_status='Other',
                    notes='Uplodaed Before')
                if file_details is not None:
                    print(
                        'File {} was already uploaded before'.format(filename))
                    return
                file_details = self.db.readFileStatus(
                    file_original_name=filename, file_status='Downloaded')
                print('Uploading {}'.format(path))
                remote_filename = self._get_remote_filename(filename)
                old_filename = filename
                directory = None
                if not remote_filename:
                    return
                try:
                    directory, filename, year = remote_filename
                except:
                    directory, filename = remote_filename
                filename = fnm.azure_validate_filename(filename)
                if len(self.file_storage_dir) > 0:
                    directory = self.file_storage_dir + '/' + directory
                if not self.file_service.exists(self.file_storage_share,
                                                directory_name=directory):
                    self.file_service.create_directory(self.file_storage_share,
                                                       directory)
                if year:
                    directory += '/' + year
                    if not self.file_service.exists(self.file_storage_share,
                                                    directory_name=directory):
                        self.file_service.create_directory(
                            self.file_storage_share, directory)
                if not self.overwrite_remote_files:
                    print('Checking if {}/{} already exists'.format(
                        directory, filename))
                    if self.file_service.exists(self.file_storage_share,
                                                directory_name=directory,
                                                file_name=filename):
                        print('{}/{} already exists'.format(
                            directory, filename))
                        if file_details is None:
                            self.db.saveFileStatus(
                                script_name=self.script_name,
                                file_original_name=old_filename,
                                file_upload_path=directory,
                                file_upload_name=filename,
                                file_status='Other',
                                notes='Uplodaed Before')
                        else:
                            self.db.saveFileStatus(id=file_details['id'],
                                                   file_upload_path=directory,
                                                   file_upload_name=filename,
                                                   file_status='Other',
                                                   notes='Uplodaed Before')
                        return
                self.file_service.create_file_from_path(
                    self.file_storage_share,
                    directory,
                    filename,
                    path,
                    content_settings=ContentSettings(
                        content_type='application/pdf'))
                if file_details is None:
                    self.db.saveFileStatus(script_name=self.script_name,
                                           file_original_name=old_filename,
                                           file_upload_path=directory,
                                           file_upload_name=filename,
                                           file_status='Uploaded')
                else:
                    self.db.saveFileStatus(id=file_details['id'],
                                           file_upload_path=directory,
                                           file_upload_name=filename,
                                           file_status='Uploaded')
                print('{} uploaded'.format(path))
                retries = 3
            except Exception as e:
                print('Error uploading to Asure file storage,', str(e))
                filename = old_filename
                retries += 1
Esempio n. 17
0
  backup_bz2.add(repertoire_de_sauvegarde+'/docker-compose.yml')
  backup_bz2.close() # fermeture du fichier

  print("Compression et sauvegarde des fichiers OK !")
  logging.debug("Compression et sauvegarde des fichiers OK !")
#  syslog.syslog(syslog.LOG_DEBUG,"Compression et sauvegarde des fichiers OK !")

# Sauvegarde sur Microsoft AZURE #

# Création d'un sous-répertoire: save_date du jour

  print("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...")
  logging.debug("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...") # warning 
#  syslog.syslog(syslog.LOG_DEBUG, "Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...") # warning 

  file_service.create_directory(AZURE_REP_BKP,'save_'+str(BACKUP_DATE))

  print("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !")
  logging.debug("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !") # warning 
#  syslog.syslog(syslog.LOG_DEBUG, "Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !") # warning 

# copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE

  print("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...")
  logging.debug("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...") # warning 
#  syslog.syslog(syslog.LOG_DEBUG,"Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...") # warning 

  file_service.create_file_from_path(AZURE_REP_BKP,'save_'+str(BACKUP_DATE),'save_'+str(BACKUP_DATE)+'db.sql',repertoire_de_sauvegarde+'/save_'+str(BACKUP_DATE)+'db.sql')
  file_service.create_file_from_path(AZURE_REP_BKP,'save_'+str(BACKUP_DATE),'save_'+str(BACKUP_DATE)+'.tar.bz2',repertoire_de_sauvegarde+'/save_'+str(BACKUP_DATE)+'.tar.bz2')

  print("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE OK !")
class StorageHelper(object):
    """Handle details related to a single storage account and share.
    Instantiate this object with information sufficient to
    uniquely identify a storage account and a file share within it.
    Then .account can be used to retrieve the Azure SDK for Python
    object corresponding to the account, and .key can be used to
    get an access key for it.
    For both those properties, if the value mentioned doesn't exist,
    it will be created upon first property access.
    """
    def __init__(self,
                 client_data,
                 resource_helper,
                 name,
                 account=None,
                 default_share='share'):
        self.name = name
        self.default_share = default_share
        self._account = account
        self._key = os.environ.get('AZURE_STORAGE_KEY')
        self.resource_helper = resource_helper
        self.client = StorageManagementClient(*client_data)
        self.file_service = FileService(
            account_name=self.account.name,
            account_key=self.key,
        )

    @property
    def account(self):
        """Return the managed StorageAccounts object.
        If no such account exists, create it first.
        """
        if self._account is None:
            print('Creating storage account...')
            # Error to create storage account if it already exists!
            name_check = self.client.storage_accounts.check_name_availability(
                self.name)
            if name_check.name_available:
                storage_creation = self.client.storage_accounts.create(
                    self.resource_helper.group.name, self.name,
                    StorageAccountCreateParameters(
                        sku=StorageAccountSku(StorageSkuName.standard_lrs),
                        kind=StorageKind.storage,
                        location=self.resource_helper.group.location,
                    ))
                storage = storage_creation.result()
            else:
                try:
                    storage = self.client.storage_accounts.get_properties(
                        self.resource_helper.group.name, self.name)
                except CloudError:
                    print('Storage account {} already exists'
                          ' in a resource group other than {}.'.format(
                              self.name, self.resource_helper.group.name))
            print('Got storage account:', storage.name)
            self._account = storage
        return self._account

    @property
    def key(self):
        """Get the first available storage key.
        This will crash if there are no available storage keys,
        which is unlikely since two are created along with a storage account.
        """
        if self._key is None:
            storage_keys = self.client.storage_accounts.list_keys(
                self.resource_helper.group.name, self.account.name)
            self._key = next(iter(storage_keys.keys)).value
        return self._key

    def upload_file(self, path, sharename):
        """Upload a file into the default share on the storage account.
        If the share doesn't exist, create it first.
        """

        self.file_service.create_file_from_path(
            self.default_share if sharename is None else sharename,
            None,
            os.path.basename(path),
            path,
        )
        return '/'.join([self.default_share, os.path.basename(path)])

    def download_file(self, sharename, filename):
        file_service.get_file_to_path(sharename, None, filename, filename)

    def delete_file(self, sharename, filename):
        file_service.delete_file(sharename, None, filename)

    def create_share(self, sharename):
        self.file_service.create_share(sharename)

    def create_directory(self, sharename, directoryname):
        self.file_service.create_directory(sharename, directoryname)

    def list_directories_and_files(self, sharename):
        generator = self.file_service.list_directories_and_files(sharename)
        return [file_or_dir.name for file_or_dir in generator]

    def list_shares(self):
        shares = list(self.file_service.list_shares(include_snapshots=True))
        sharelist = [fileshare.name for fileshare in shares]
        print(sharelist)
        return sharelist
Esempio n. 19
0
class AzureFileManager():
    def __init__(self):
        # fetch config data
        conf = Configuration()
        # create Azure File share service
        self.file_service = FileService(
            account_name=conf.account_name, account_key=conf.account_key)
        # set azure share file name (container)
        self.file_share = conf.file_share

    def upload_file(self, upload_path, file_path):
        if not os.path.isfile(file_path):
            print("Your file is not exists, check your file path and try again.")
            return
        filename = os.path.basename(file_path)
        # remove ' or " from path, if path was empty like "" or '' set upload_path=None, this make upload file to root directory
        upload_path = upload_path.strip().replace("'", '').replace('"', '')
        # remove start and end / or \
        if upload_path.endswith('/') or upload_path.endswith('\\'):
            upload_path = upload_path[:-1]
        if upload_path.startswith('/') or upload_path.startswith('\\'):
            upload_path = upload_path[1:]
        # sanity check
        upload_path = upload_path if len(upload_path) >= 1 else None

        print("Start uploading...")
        try:
            # create sub directories
            self.create_sub_directories(upload_path)
            # upload
            self.file_service.create_file_from_path(
                share_name=self.file_share,  # file_share name in azure
                directory_name=upload_path,  # server directories address. None => root directory
                file_name=filename,          # Name of file to create in azure
                local_file_path=file_path)
            print("'{0}' has been successfully uploaded".format(filename))
        except:
            print("Failed to upload '{0}', please try again".format(filename))

    def download_file(self, file_path):
        """ download file from azure, enter file path in azure """
        # check file path was not empty
        file_path = file_path.strip().replace("'", '').replace('"', '')
        if len(file_path) == 0:
            print("Please enter a file path")
            return
        filename = os.path.basename(file_path)
        dir_path = os.path.dirname(file_path)
        # if parent path was not available, use None => root directory
        dir_path = dir_path if dir_path else None

        print("Downloading...")
        try:
            self.file_service.get_file_to_path(
                share_name=self.file_share,
                directory_name=dir_path,  # The path to the directory in azure
                file_name=filename,  # Name of existing file in azure
                # Path of file to write to local machine
                file_path="{0}".format(filename))
            print(
                "'{0}' has been successfully downloaded and saved in current directory.".format(filename))
        except:
            print("Failed to download '{0}', either file doesn't exist or you are offline.".format(
                filename))

    def get_list_of_files(self, dir_name=None):
        """ show list of all files and all directories in azure"""
        generator = self.file_service.list_directories_and_files(
            share_name=self.file_share,
            directory_name=dir_name)
        parent = "" if dir_name == None else dir_name
        for file_or_dir in generator:
            if not re.match(r"(.[a-z]*[A-Z]*[0-9]*)$", file_or_dir.name):
                # file
                if len(parent) == 0:
                    print(file_or_dir.name)
                else:
                    print("{0}/{1}".format(parent, file_or_dir.name))
            else:
                # dir
                if len(parent) == 0:
                    self.get_list_of_files(file_or_dir.name)
                else:
                    self.get_list_of_files(
                        "{0}/{1}".format(parent, file_or_dir.name))

    def create_sub_directories(self, path):
        """ create sub directories in Azure """
        if path is None:
            return
        dirs = os.path.normpath(path).split(os.path.sep)
        parent = ''
        for dir in dirs:
            parent += dir if len(parent) == 0 else '/'+dir
            self.file_service.create_directory(self.file_share, parent)
Esempio n. 20
0
class AzureFileWriter(FilebaseBaseWriter):
    """
    Writes items to azure file shares. It is a File Based writer, so it has filebase
    option available

        - account_name (str)
            Public acces name of the azure account.

        - account_key (str)
            Public acces key to the azure account.

        - share (str)
            File share name.

        - filebase (str)
            Base path to store the items in the share.

    """

    supported_options = {
        "account_name": {"type": six.string_types, "env_fallback": "EXPORTERS_AZUREWRITER_NAME"},
        "account_key": {"type": six.string_types, "env_fallback": "EXPORTERS_AZUREWRITER_KEY"},
        "share": {"type": six.string_types},
    }

    def __init__(self, options, meta, *args, **kw):
        from azure.storage.file import FileService

        super(AzureFileWriter, self).__init__(options, meta, *args, **kw)
        account_name = self.read_option("account_name")
        account_key = self.read_option("account_key")
        self.azure_service = FileService(account_name, account_key)
        self.share = self.read_option("share")
        self.azure_service.create_share(self.share)
        self.logger.info("AzureWriter has been initiated." "Writing to share {}".format(self.share))
        self.set_metadata("files_counter", Counter())
        self.set_metadata("files_written", [])

    def write(self, dump_path, group_key=None, file_name=None):
        if group_key is None:
            group_key = []
        self._write_file(dump_path, group_key, file_name)

    def _update_metadata(self, dump_path, filebase_path, file_name):
        buffer_info = self.write_buffer.metadata[dump_path]
        file_info = {
            "file_name": file_name,
            "filebase_path": filebase_path,
            "size": buffer_info["size"],
            "number_of_records": buffer_info["number_of_records"],
        }
        files_written = self.get_metadata("files_written")
        files_written.append(file_info)
        self.set_metadata("files_written", files_written)
        self.get_metadata("files_counter")[filebase_path] += 1

    def _ensure_path(self, filebase):
        path = filebase.split("/")
        folders_added = []
        for sub_path in path:
            folders_added.append(sub_path)
            parent = "/".join(folders_added)
            self.azure_service.create_directory(self.share, parent)

    @retry_long
    def _write_file(self, dump_path, group_key, file_name=None):
        filebase_path, file_name = self.create_filebase_name(group_key, file_name=file_name)
        self._ensure_path(filebase_path)
        self.azure_service.create_file_from_path(self.share, filebase_path, file_name, dump_path, max_connections=5)
        self._update_metadata(dump_path, filebase_path, file_name)

    def get_file_suffix(self, path, prefix):
        number_of_keys = self.get_metadata("files_counter").get(path, 0)
        suffix = "{}".format(str(number_of_keys))
        return suffix

    def _check_write_consistency(self):
        from azure.common import AzureMissingResourceHttpError

        for file_info in self.get_metadata("files_written"):
            try:
                afile = self.azure_service.get_file_properties(
                    self.share, file_info["filebase_path"], file_info["file_name"]
                )
                file_size = afile.properties.content_length
                if str(file_size) != str(file_info["size"]):
                    raise InconsistentWriteState(
                        "File {} has unexpected size. (expected {} - got {})".format(
                            file_info["file_name"], file_info["size"], file_size
                        )
                    )
            except AzureMissingResourceHttpError:
                raise InconsistentWriteState("Missing file {}".format(file_info["file_name"]))
        self.logger.info("Consistency check passed")
Esempio n. 21
0
def main(path, debug, remote_directory, typhoonname):
    initialize.setup_cartopy()
    start_time = datetime.now()
    print(
        '---------------------AUTOMATION SCRIPT STARTED---------------------------------'
    )
    print(str(start_time))
    #%% check for active typhoons
    print(
        '---------------------check for active typhoons---------------------------------'
    )
    print(str(start_time))
    remote_dir = remote_directory
    if debug:
        typhoonname = 'SURIGAE'
        remote_dir = '20210421120000'
        logger.info(f"DEBUGGING piepline for typhoon{typhoonname}")
        Activetyphoon = [typhoonname]
    else:
        # If passed typhoon name is None or empty string
        if not typhoonname:
            Activetyphoon = Check_for_active_typhoon.check_active_typhoon()
            if not Activetyphoon:
                logger.info("No active typhoon in PAR stop pipeline")
                sys.exit()
            logger.info(f"Running on active Typhoon(s) {Activetyphoon}")
        else:
            Activetyphoon = [typhoonname]
            remote_dir = remote_directory
            logger.info(f"Running on custom Typhoon {Activetyphoon}")

    Alternative_data_point = (start_time -
                              timedelta(hours=24)).strftime("%Y%m%d")

    date_dir = start_time.strftime("%Y%m%d%H")
    Input_folder = os.path.join(path, f'forecast/Input/{date_dir}/Input/')
    Output_folder = os.path.join(path, f'forecast/Output/{date_dir}/Output/')

    if not os.path.exists(Input_folder):
        os.makedirs(Input_folder)
    if not os.path.exists(Output_folder):
        os.makedirs(Output_folder)
    #download NOAA rainfall
    try:
        #Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point)
        Rainfall_data.download_rainfall_nomads(Input_folder, path,
                                               Alternative_data_point)
        rainfall_error = False
    except:
        traceback.print_exc()
        #logger.warning(f'Rainfall download failed, performing download in R script')
        logger.info(
            f'Rainfall download failed, performing download in R script')
        rainfall_error = True
    ###### download UCL data

    try:
        ucl_data.create_ucl_metadata(path, os.environ['UCL_USERNAME'],
                                     os.environ['UCL_PASSWORD'])
        ucl_data.process_ucl_data(path, Input_folder,
                                  os.environ['UCL_USERNAME'],
                                  os.environ['UCL_PASSWORD'])
    except:
        logger.info(f'UCL download failed')
    #%%
    ##Create grid points to calculate Winfield
    cent = Centroids()
    cent.set_raster_from_pnt_bounds((118, 6, 127, 19), res=0.05)
    #this option is added to make the script scaleable globally To Do
    #cent.set_raster_from_pnt_bounds((LonMin,LatMin,LonMax,LatMax), res=0.05)
    cent.check()
    cent.plot()
    ####
    admin = gpd.read_file(
        os.path.join(path, "./data-raw/phl_admin3_simpl2.geojson"))
    df = pd.DataFrame(data=cent.coord)
    df["centroid_id"] = "id" + (df.index).astype(str)
    centroid_idx = df["centroid_id"].values
    ncents = cent.size
    df = df.rename(columns={0: "lat", 1: "lon"})
    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))
    #df.to_crs({'init': 'epsg:4326'})
    df.crs = {'init': 'epsg:4326'}
    df_admin = sjoin(df, admin, how="left").dropna()

    # Sometimes the ECMWF ftp server complains about too many requests
    # This code allows several retries with some sleep time in between
    n_tries = 0
    while True:
        try:
            logger.info("Downloading ECMWF typhoon tracks")
            bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=remote_dir)
            fcast = TCForecast()
            fcast.fetch_ecmwf(files=bufr_files)
        except ftplib.all_errors as e:
            n_tries += 1
            if n_tries >= ECMWF_MAX_TRIES:
                logger.error(
                    f' Data downloading from ECMWF failed: {e}, '
                    f'reached limit of {ECMWF_MAX_TRIES} tries, exiting')
                sys.exit()
            logger.error(
                f' Data downloading from ECMWF failed: {e}, retrying after {ECMWF_SLEEP} s'
            )
            time.sleep(ECMWF_SLEEP)
            continue
        break

    #%% filter data downloaded in the above step for active typhoons  in PAR
    # filter tracks with name of current typhoons and drop tracks with only one timestep
    fcast.data = [
        track_data_clean.track_data_clean(tr) for tr in fcast.data
        if (tr.time.size > 1 and tr.name in Activetyphoon)
    ]

    # fcast.data = [tr for tr in fcast.data if tr.name in Activetyphoon]
    # fcast.data = [tr for tr in fcast.data if tr.time.size>1]
    for typhoons in Activetyphoon:
        #typhoons=Activetyphoon[0]
        logger.info(f'Processing data {typhoons}')
        fname = open(
            os.path.join(path, 'forecast/Input/',
                         "typhoon_info_for_model.csv"), 'w')
        fname.write('source,filename,event,time' + '\n')
        if not rainfall_error:
            line_ = 'Rainfall,' + '%srainfall' % Input_folder + ',' + typhoons + ',' + date_dir  #StormName #
            fname.write(line_ + '\n')

        line_ = 'Output_folder,' + '%s' % Output_folder + ',' + typhoons + ',' + date_dir  #StormName #
        #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
        fname.write(line_ + '\n')

        #typhoons='SURIGAE'  # to run it manually for any typhoon
        # select windspeed for HRS model

        fcast.data = [tr for tr in fcast.data if tr.name == typhoons]
        tr_HRS = [tr for tr in fcast.data if (tr.is_ensemble == 'False')]

        if tr_HRS != []:
            HRS_SPEED = (tr_HRS[0].max_sustained_wind.values / 0.84).tolist(
            )  ############# 0.84 is conversion factor for ECMWF 10MIN TO 1MIN AVERAGE
            dfff = tr_HRS[0].to_dataframe()
            dfff[['VMAX', 'LAT',
                  'LON']] = dfff[['max_sustained_wind', 'lat', 'lon']]
            dfff['YYYYMMDDHH'] = dfff.index.values
            dfff['YYYYMMDDHH'] = dfff['YYYYMMDDHH'].apply(
                lambda x: x.strftime("%Y%m%d%H%M"))
            dfff['STORMNAME'] = typhoons
            dfff[['YYYYMMDDHH', 'VMAX', 'LAT', 'LON',
                  'STORMNAME']].to_csv(os.path.join(Input_folder,
                                                    'ecmwf_hrs_track.csv'),
                                       index=False)
            line_ = 'ecmwf,' + '%secmwf_hrs_track.csv' % Input_folder + ',' + typhoons + ',' + date_dir  #StormName #
            #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
            fname.write(line_ + '\n')
            # Adjust track time step
            data_forced = [
                tr.where(tr.time <= max(tr_HRS[0].time.values), drop=True)
                for tr in fcast.data
            ]
            # data_forced = [track_data_clean.track_data_force_HRS(tr,HRS_SPEED) for tr in data_forced] # forced with HRS windspeed

            #data_forced= [track_data_clean.track_data_clean(tr) for tr in fcast.data] # taking speed of ENS
            # interpolate to 3h steps from the original 6h
            #fcast.equal_timestep(3)
        else:
            len_ar = np.min([len(var.lat.values) for var in fcast.data])
            lat_ = np.ma.mean([var.lat.values[:len_ar] for var in fcast.data],
                              axis=0)
            lon_ = np.ma.mean([var.lon.values[:len_ar] for var in fcast.data],
                              axis=0)
            YYYYMMDDHH = pd.date_range(fcast.data[0].time.values[0],
                                       periods=len_ar,
                                       freq="H")
            vmax_ = np.ma.mean(
                [var.max_sustained_wind.values[:len_ar] for var in fcast.data],
                axis=0)
            d = {
                'YYYYMMDDHH': YYYYMMDDHH,
                "VMAX": vmax_,
                "LAT": lat_,
                "LON": lon_
            }
            dfff = pd.DataFrame(d)
            dfff['STORMNAME'] = typhoons
            dfff['YYYYMMDDHH'] = dfff['YYYYMMDDHH'].apply(
                lambda x: x.strftime("%Y%m%d%H%M"))
            dfff[['YYYYMMDDHH', 'VMAX', 'LAT', 'LON',
                  'STORMNAME']].to_csv(os.path.join(Input_folder,
                                                    'ecmwf_hrs_track.csv'),
                                       index=False)
            line_ = 'ecmwf,' + '%secmwf_hrs_track.csv' % Input_folder + ',' + typhoons + ',' + date_dir  #StormName #
            #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
            fname.write(line_ + '\n')
            data_forced = fcast.data

        # calculate windfields for each ensamble
        threshold = 0  #(threshold to filter dataframe /reduce data )
        df = pd.DataFrame(data=cent.coord)
        df["centroid_id"] = "id" + (df.index).astype(str)
        centroid_idx = df["centroid_id"].values
        ncents = cent.size
        df = df.rename(columns={0: "lat", 1: "lon"})

        #calculate wind field for each ensamble members
        list_intensity = []
        distan_track = []
        for tr in data_forced:
            logger.info(
                f"Running on ensemble # {tr.ensemble_number} for typhoon {tr.name}"
            )
            track = TCTracks()
            typhoon = TropCyclone()
            track.data = [tr]
            #track.equal_timestep(3)
            tr = track.data[0]
            typhoon.set_from_tracks(track, cent, store_windfields=True)
            # Make intensity plot using the high resolution member
            if tr.is_ensemble == 'False':
                logger.info("High res member: creating intensity plot")
                plot_intensity.plot_inensity(typhoon=typhoon,
                                             event=tr.sid,
                                             output_dir=Output_folder,
                                             date_dir=date_dir,
                                             typhoon_name=tr.name)
            windfield = typhoon.windfields
            nsteps = windfield[0].shape[0]
            centroid_id = np.tile(centroid_idx, nsteps)
            intensity_3d = windfield[0].toarray().reshape(nsteps, ncents, 2)
            intensity = np.linalg.norm(intensity_3d, axis=-1).ravel()
            timesteps = np.repeat(track.data[0].time.values, ncents)
            #timesteps = np.repeat(tr.time.values, ncents)
            timesteps = timesteps.reshape((nsteps, ncents)).ravel()
            inten_tr = pd.DataFrame({
                'centroid_id': centroid_id,
                'value': intensity,
                'timestamp': timesteps,
            })
            inten_tr = inten_tr[inten_tr.value > threshold]
            inten_tr['storm_id'] = tr.sid
            inten_tr['ens_id'] = tr.sid + '_' + str(tr.ensemble_number)
            inten_tr['name'] = tr.name
            inten_tr = (pd.merge(inten_tr,
                                 df_admin,
                                 how='outer',
                                 on='centroid_id').dropna().groupby(
                                     ['adm3_pcode', 'ens_id'],
                                     as_index=False).agg(
                                         {"value": ['count', 'max']}))
            inten_tr.columns = [
                x for x in ['adm3_pcode', 'storm_id', 'value_count', 'v_max']
            ]
            list_intensity.append(inten_tr)
            distan_track1 = []
            for index, row in df.iterrows():
                dist = np.min(
                    np.sqrt(
                        np.square(tr.lat.values - row['lat']) +
                        np.square(tr.lon.values - row['lon'])))
                distan_track1.append(dist * 111)
            dist_tr = pd.DataFrame({
                'centroid_id': centroid_idx,
                'value': distan_track1
            })
            dist_tr['storm_id'] = tr.sid
            dist_tr['name'] = tr.name
            dist_tr['ens_id'] = tr.sid + '_' + str(tr.ensemble_number)
            dist_tr = (pd.merge(dist_tr,
                                df_admin,
                                how='outer',
                                on='centroid_id').dropna().groupby(
                                    ['adm3_pcode', 'name', 'ens_id'],
                                    as_index=False).agg({'value': 'min'}))
            dist_tr.columns = [
                x for x in ['adm3_pcode', 'name', 'storm_id', 'dis_track_min']
            ]  # join_left_df_.columns.ravel()]
            distan_track.append(dist_tr)
        df_intensity_ = pd.concat(list_intensity)
        distan_track1 = pd.concat(distan_track)

        typhhon_df = pd.merge(df_intensity_,
                              distan_track1,
                              how='left',
                              on=['adm3_pcode', 'storm_id'])

        typhhon_df.to_csv(os.path.join(Input_folder, 'windfield.csv'),
                          index=False)

        line_ = 'windfield,' + '%swindfield.csv' % Input_folder + ',' + typhoons + ',' + date_dir  #StormName #
        #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
        fname.write(line_ + '\n')
        fname.close()

        #############################################################
        #### Run IBF model
        #############################################################
        os.chdir(path)

        if platform == "linux" or platform == "linux2":  #check if running on linux or windows os
            # linux
            try:
                p = subprocess.check_call(
                    ["Rscript", "run_model_V2.R",
                     str(rainfall_error)])
            except subprocess.CalledProcessError as e:
                logger.error(f'failed to excute R sript')
                raise ValueError(str(e))
        elif platform == "win32":  #if OS is windows edit the path for Rscript
            try:
                p = subprocess.check_call([
                    "C:/Program Files/R/R-4.1.0/bin/Rscript", "run_model_V2.R",
                    str(rainfall_error)
                ])
            except subprocess.CalledProcessError as e:
                logger.error(f'failed to excute R sript')
                raise ValueError(str(e))

        #############################################################
        # send email in case of landfall-typhoon
        #############################################################

        image_filenames = list(Path(Output_folder).glob('*.png'))
        data_filenames = list(Path(Output_folder).glob('*.csv'))

        if image_filenames or data_filenames:
            message_html = """\
            <html>
            <body>
            <h1>IBF model run result </h1>
            <p>Please find attached a map and data with updated model run</p>
            <img src="cid:Impact_Data">
            </body>
            </html>
            """
            Sendemail.sendemail(
                smtp_server=os.environ["SMTP_SERVER"],
                smtp_port=int(os.environ["SMTP_PORT"]),
                email_username=os.environ["EMAIL_LOGIN"],
                email_password=os.environ["EMAIL_PASSWORD"],
                email_subject='Updated impact map for a new Typhoon in PAR',
                from_address=os.environ["EMAIL_FROM"],
                to_address_list=os.environ["EMAIL_TO_LIST"].split(','),
                cc_address_list=os.environ["EMAIL_CC_LIST"].split(','),
                message_html=message_html,
                filename_list=image_filenames + data_filenames)
        else:
            raise FileNotFoundError(
                f'No .png or .csv found in {Output_folder}')
            ##################### upload model output to 510 datalack ##############

        file_service = FileService(
            account_name=os.environ["AZURE_STORAGE_ACCOUNT"],
            protocol='https',
            connection_string=os.environ["AZURE_CONNECTING_STRING"])
        file_service.create_share('forecast')
        OutPutFolder = date_dir
        file_service.create_directory('forecast', OutPutFolder)

        for img_file in image_filenames:
            file_service.create_file_from_path(
                'forecast',
                OutPutFolder,
                os.fspath(img_file.parts[-1]),
                img_file,
                content_settings=ContentSettings(content_type='image/png'))

        for data_file in data_filenames:
            file_service.create_file_from_path(
                'forecast',
                OutPutFolder,
                os.fspath(data_file.parts[-1]),
                data_file,
                content_settings=ContentSettings(content_type='text/csv'))

        ##################### upload model input(Rainfall+wind intensity) to 510 datalack ##############
        # To DO

    print(
        '---------------------AUTOMATION SCRIPT FINISHED---------------------------------'
    )
    print(str(datetime.now()))
Esempio n. 22
0
# PG DUMP
try:
    COMMANDS = ['pg_dump', '-F', 'c', '-b', '-v', '-f', './%s' % FILENAME]
    print("Running: '%s'" % (' '.join(COMMANDS)))
    exit_code = subprocess.call(COMMANDS)
    if exit_code is 1:
        raise Exception('Could not Backup, please check logs')

    # AZURE CONNECTION
    file_service = FileService(endpoint_suffix=AZURE_ENDPOINT_SUFFIX,
                               account_name=AZURE_ACCOUNT_NAME,
                               account_key=AZURE_ACCOUNT_KEY)

    # Check if AZURE_BACKUP_FOLDER exists, if not create it
    if not file_service.exists(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER):
        file_service.create_directory(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER)

    # Upload
    print("uploading to: '%s/%s/%s'" %
          (AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME))
    file_service.create_file_from_path(AZURE_SHARE_NAME,
                                       AZURE_BACKUP_FOLDER,
                                       FILENAME,
                                       FILENAME,
                                       progress_callback=upload_callback)

    # Cleaning Backup Files
    backup_files = file_service.list_directories_and_files(
        AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER)
    filenames = []
    for file in backup_files:
Esempio n. 23
0
RECEIPTS_DIRECTORY = 'delivery-receipts'
RECEIPTS_URL = ('http://www.cmegroup.com/delivery_reports/'
                'deliverable-commodities-under-registration.xls')
RECEIPTS_FILENAME_SUFFIX = '-deliverable-commodities-under-registration.xls'

STOCKS_DIRECTORY = 'stocks-of-grain'
STOCKS_URL = 'http://www.cmegroup.com/delivery_reports/stocks-of-grain-updated-tuesday.xls'
STOCKS_FILENAME_SUFFIX = '-stocks-of-grain-updated-tuesday.xls'


def filename(suffix):
    return datetime.now().strftime('%Y%m%d') + suffix


def get_bytes(url):
    return requests.get(url).content


f = FileService(account_name=ACCOUNT_NAME, account_key=KEY)
f.create_share(SHARE_NAME)

f.create_directory(SHARE_NAME, RECEIPTS_DIRECTORY)
f.create_file_from_bytes(SHARE_NAME, RECEIPTS_DIRECTORY,
                         filename(RECEIPTS_FILENAME_SUFFIX),
                         get_bytes(RECEIPTS_URL))

f.create_directory(SHARE_NAME, STOCKS_DIRECTORY)
f.create_file_from_bytes(SHARE_NAME, STOCKS_DIRECTORY,
                         filename(STOCKS_FILENAME_SUFFIX),
                         get_bytes(STOCKS_URL))
Esempio n. 24
0
from azure.storage.blob import PublicAccess
from azure.storage.file import FileService

ACCOUNT_NAME = os.environ['StorageAccountName']
ACCOUNT_KEY = os.environ['StorageAccountKey']
CONTAINER_NAME = os.environ['TelemetryContainerName']

az_blob_service = BlockBlobService(account_name=ACCOUNT_NAME,
                                   account_key=ACCOUNT_KEY)

az_blob_service.create_container(CONTAINER_NAME, fail_on_exist=False)

file_service = FileService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY)
file_service.create_share(share_name='model', quota=1)

source = os.environ['ModelZipUrl']
dest = 'model.zip'

urllib.request.urlretrieve(source, dest)

with zipfile.ZipFile(dest, "r") as zip_ref:
    zip_ref.extractall("model")

for root, dirs, files in os.walk('model', topdown=True):
    directory = os.path.relpath(root, 'model')
    if directory != '.':
        file_service.create_directory('model', directory)
    for f in files:
        file_service.create_file_from_path('model', directory, f,
                                           os.path.join(root, f))
Esempio n. 25
0
containername = os.environ['AZURE_CONTAINER_NAME']
subscription_id = os.environ['AZURE_SUBSCRIPTION_ID']
resource_group_params = {'location' : location}
sku = 'standard_ragrs)'
kind = 'BlobStorage'
storage_account_params = {sku:sku,kind:kind,location:location}

# Configure Credentials
credentials = ServicePrincipalCredentials(client_id=os.environ['AZURE_CLIENT_ID'],secret=os.environ['AZURE_CLIENT_SECRET'],tenant=os.environ['AZURE_TENANT_ID'])
resource_client = ResourceManagementClient(credentials, subscription_id)
storage_client = StorageManagementClient(credentials, subscription_id)

# Create Resource Group & Storage Account
resource_client.resource_groups.create_or_update(resourcegroupname, resource_group_params)
create_sa = storage_client.storage_accounts.create(resourcegroupname, storageaccountname, {'location':'eastus','kind':'storage','sku':{'name':'standard_ragrs'}})
create_sa.wait()

# Create Container
sak = storage_client.storage_accounts.list_keys(resourcegroupname, storageaccountname)
storageaccountkey = sak.keys[0].value
storage_client = CloudStorageAccount(storageaccountname, storageaccountkey)
blob_service = storage_client.create_block_blob_service()
blob_service.create_container(containername,public_access=PublicAccess.Blob)

# Copy Files
file_service = FileService(account_name=storageaccountname, account_key=storageaccountkey)
file_service.create_share(containername)
file_service.create_directory(containername, 'directory1')
file_service.create_file_from_path(containername,'directory1','55224azuresetup.ps1','55224azuresetup.ps1',)

Esempio n. 26
0
    smtp.starttls()
    smtp.login(username, password)
    smtp.sendmail(send_from, send_to, msg.as_string())
    smtp.close()
    print('Email sent')


# Gmail login details
username = '******'
password = '******'
default_address = []

# Login Details for Azure Storage
file_service = FileService(account_name='killianoneachtain', account_key='KEY')
file_service.create_share('security')
file_service.create_directory('security', 'securityPhotos')

cwd = os.getcwd()  # Get the current working directory (cwd)
path = cwd + "/securityPhotos"
# change file permissions
access_rights = 0o755

# create a photo directory if none exists
try:
    os.mkdir(path, access_rights)
except OSError:
    print("Creation of the directory %s failed. \nFolder already exists!" %
          path)
else:
    print("Successfully created the directory %s " % path)
Esempio n. 27
0
from azure.storage.blob import BlockBlobService
from azure.storage.blob import PublicAccess
from azure.storage.file import FileService

STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME']
STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']

table_service = TableService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY)

table_service.create_table('cluster')

file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY)
file_service.create_share(share_name='azureml-project', quota=1)
file_service.create_share(share_name='azureml-share', quota=1)

file_service.create_directory('azureml-share', 'Solution1')
file_service.create_directory('azureml-share', 'Solution2')

block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY)

container_name ='telemetry'
block_blob_service.create_container(container_name)  

source=os.environ['AML_ASSETS_URL']
dest='azureml_project.zip'

urllib.request.urlretrieve(source, dest)

with zipfile.ZipFile(dest,"r") as zip_ref:
    zip_ref.extractall("azureml-project")
Esempio n. 28
0
class AzureFileWriter(FilebaseBaseWriter):
    """
    Writes items to azure file shares. It is a File Based writer, so it has filebase
    option available

        - account_name (str)
            Public acces name of the azure account.

        - account_key (str)
            Public acces key to the azure account.

        - share (str)
            File share name.

        - filebase (str)
            Base path to store the items in the share.

    """
    supported_options = {
        'account_name': {
            'type': six.string_types,
            'env_fallback': 'EXPORTERS_AZUREWRITER_NAME'
        },
        'account_key': {
            'type': six.string_types,
            'env_fallback': 'EXPORTERS_AZUREWRITER_KEY'
        },
        'share': {
            'type': six.string_types
        }
    }

    def __init__(self, options, meta, *args, **kw):
        from azure.storage.file import FileService
        super(AzureFileWriter, self).__init__(options, meta, *args, **kw)
        account_name = self.read_option('account_name')
        account_key = self.read_option('account_key')
        self.azure_service = FileService(account_name, account_key)
        self.share = self.read_option('share')
        self.azure_service.create_share(self.share)
        self.logger.info('AzureWriter has been initiated.'
                         'Writing to share {}'.format(self.share))
        self.set_metadata('files_counter', Counter())
        self.set_metadata('files_written', [])

    def write(self, dump_path, group_key=None, file_name=None):
        if group_key is None:
            group_key = []
        self._write_file(dump_path, group_key, file_name)

    def _update_metadata(self, dump_path, filebase_path, file_name):
        buffer_info = self.write_buffer.metadata[dump_path]
        file_info = {
            'file_name': file_name,
            'filebase_path': filebase_path,
            'size': buffer_info['size'],
            'number_of_records': buffer_info['number_of_records']
        }
        files_written = self.get_metadata('files_written')
        files_written.append(file_info)
        self.set_metadata('files_written', files_written)
        self.get_metadata('files_counter')[filebase_path] += 1

    def _ensure_path(self, filebase):
        path = filebase.split('/')
        folders_added = []
        for sub_path in path:
            folders_added.append(sub_path)
            parent = '/'.join(folders_added)
            self.azure_service.create_directory(self.share, parent)

    @retry_long
    def _write_file(self, dump_path, group_key, file_name=None):
        filebase_path, file_name = self.create_filebase_name(
            group_key, file_name=file_name)
        self._ensure_path(filebase_path)
        self.azure_service.create_file_from_path(
            self.share,
            filebase_path,
            file_name,
            dump_path,
            max_connections=5,
        )
        self._update_metadata(dump_path, filebase_path, file_name)

    def get_file_suffix(self, path, prefix):
        number_of_keys = self.get_metadata('files_counter').get(path, 0)
        suffix = '{}'.format(str(number_of_keys))
        return suffix

    def _check_write_consistency(self):
        from azure.common import AzureMissingResourceHttpError
        for file_info in self.get_metadata('files_written'):
            try:
                afile = self.azure_service.get_file_properties(
                    self.share, file_info['filebase_path'],
                    file_info['file_name'])
                file_size = afile.properties.content_length
                if str(file_size) != str(file_info['size']):
                    raise InconsistentWriteState(
                        'File {} has unexpected size. (expected {} - got {})'.
                        format(file_info['file_name'], file_info['size'],
                               file_size))
            except AzureMissingResourceHttpError:
                raise InconsistentWriteState('Missing file {}'.format(
                    file_info['file_name']))
        self.logger.info('Consistency check passed')
Esempio n. 29
0
class AFSLoader():
    def __init__(self, local_root: Path, afs_creds: dict = None):
        if afs_creds is None:
            afs_creds = get_afs_creds()
        self.afs_name = afs_creds["AFS_NAME"]
        self.afs_key = afs_creds["AFS_KEY"]
        self.afs_share = afs_creds["AFS_SHARE"]
        self.file_service = FileService(account_name=self.afs_name,
                                        account_key=self.afs_key)
        self.local_root = Path(local_root)

    def get_afs_creds(self):
        return self.afs_name, self.afs_key, self.afs_share

    def upload_data_afs(self, data_path: Path, push_data: bool = False):
        """
        Copy data to the AFS directory.

        :param data_path: <Path>. Specify your path to the local data folder.
        :param push_data. If True upload data if it already exists.
        :return: path of the directory in the AFS share.
        """
        logging.info("Sending data to AFS")
        checksum = md5_dir(data_path)[:10]
        afs_path = time.strftime("%Y-%m-%d-%H.%M") + '-' + checksum

        list_folder = self.file_service.list_directories_and_files(
            self.afs_share)
        for folder in list_folder:
            if checksum == folder.name[-10:]:
                logging.info("Folder for data already exist!")
                afs_path = folder.name
                logging.info("Data is in the AFS {}".format(folder.name))
                if push_data:
                    logging.warning("Rewriting data")
                    afs_path = folder.name
                else:
                    return afs_path
        self.file_service.create_directory(share_name=self.afs_share,
                                           directory_name=afs_path)

        for file in Path(data_path).iterdir():
            progress_callback = lambda current, total: logbar(
                current, total, f"Uploading {file.name}")
            self.file_service.create_file_from_path(
                share_name=self.afs_share,
                directory_name=afs_path,
                file_name=file.name,
                local_file_path=str(file),
                max_connections=cpu_count(),
                progress_callback=progress_callback)
        logging.info("Sending is over")
        return afs_path

    def download_data_afs(self, afs_path: Path, dst_path: Path = None):
        afs_path = Path(afs_path)
        if not dst_path:
            assert self.local_root is not None
            dst_path = self.local_root

        list_folder = self.file_service.list_directories_and_files(
            self.afs_share, directory_name=afs_path)
        try:
            os.mkdir(self.local_root / afs_path)
        except FileExistsError:
            print(f"Directory {self.local_root / afs_path} was rewritten ")
        for file in list_folder:
            progress_callback = lambda current, total: logbar(
                current, total, f"Downloading {file.name}")
            self.file_service.get_file_to_path(
                share_name=self.afs_share,
                directory_name=afs_path,
                file_name=file.name,
                file_path=str(dst_path / afs_path / file.name),
                progress_callback=progress_callback)
Esempio n. 30
0
SAVER_FOLDER = "./" + SAVER
TRAIN_DATA = "data-04-zoo.csv"
RESULT_FILE = 'result.json'
FILE_SHARE = 'meltingpang-savedata'

for file in os.listdir(SAVER_FOLDER):
    os.remove(SAVER_FOLDER + "/" + file)

file_service = FileService(
    account_name='meltingpang',
    account_key=
    'aS/W8kwqNXDicJgQNnobqkWItcr9vNW3+KO8n+V5qRqY0X3Z+XGBS1sUeLfYtyneUwaose59rbhEziPpFs2qKw=='
)

file_service.get_file_to_path(FILE_SHARE, None, TRAIN_DATA, TRAIN_DATA)
file_service.create_directory(FILE_SHARE, SAVER)

xy = np.loadtxt(TRAIN_DATA, delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
nb_classes = 7

X = tf.placeholder(tf.float32, [None, 16])
Y = tf.placeholder(tf.int32, [None, 1])

Y_one_hot = tf.one_hot(Y, nb_classes)
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])

W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
Esempio n. 31
0
class ModelGymClient:
    config = {}
    project_root = ""
    project_name = ""
    user = ""

    def __init__(self,
                 config=None,
                 config_path=MODELGYM_CONFIG["default_config_path"]):
        if config_path:
            self.config = self.__config_by_path(config_path)
        if type(config) is dict:
            self.config.update(config)
        else:
            if config:
                raise TypeError("config must be dictionary!")

        project_root = Path(self.config["local_project_root"]).expanduser()
        self.project_root = project_root
        self.project_name = Path(self.project_root.parts[-1])
        if not project_root.is_dir():
            project_root.mkdir(parents=True, exist_ok=True)
        user_folder = self.project_root / self.config["user"]
        self.user = self.config["user"]
        if not user_folder.is_dir():
            user_folder.mkdir(parents=True, exist_ok=True)

        # self.stub = new_client()
        self.file_service = FileService(
            account_name=self.config['azurefs_acc_name'],
            account_key=self.config['azurefs_acc_key'])
        self.afs_share = self.config['azurefs_share']
        self.__get_client_transport_credentials(
            str(Path(self.config["client_cert"]).expanduser()),
            str(Path(self.config["client_key"]).expanduser()),
            str(Path(self.config["ca_cert"]).expanduser()))
        self.channel = grpc.secure_channel(
            self.config["connect_to"],
            self.creds,
            options=(
                ('grpc.max_send_message_length',
                 self.config["max_msg_size_megabytes"]),
                ('grpc.max_receive_message_length',
                 self.config["max_msg_size_megabytes"]),
            ))
        self.stub = wonderland_pb2_grpc.WonderlandStub(self.channel)
        self.check_user()

    def check_user(self):
        list_folder = self.file_service.list_directories_and_files(
            self.afs_share)
        for folder in list_folder:
            if self.user == folder.name:
                return True
        self.file_service.create_directory(share_name=self.afs_share,
                                           directory_name=self.user)
        return True

    def __get_client_transport_credentials(self, client_cert_path,
                                           client_key_path, ca_cert_path):
        client_cert_path = Path(client_cert_path).expanduser()
        client_key_path = Path(client_key_path).expanduser()
        ca_cert_path = Path(ca_cert_path).expanduser()
        path_ok = [
            client_cert_path.exists(),
            client_key_path.exists(),
            ca_cert_path.exists()
        ]
        if not all(path_ok):
            raise ValueError("One of credentials files does not exist")
        self.creds = grpc.ssl_channel_credentials(
            ca_cert_path.read_bytes(), client_key_path.read_bytes(),
            client_cert_path.read_bytes())

    def __config_by_path(self, path):
        path = Path(path).expanduser()
        if path.exists():
            with path.open() as file:
                config = yaml.load(file)
            return config
        else:
            raise FileNotFoundError(
                "Config {} doesn't exist !!! Check ~/.wonder/config.yaml".
                format(path))

    def eval_model(self, model_info, data_path):
        model_path = self.send_model(model_info)
        job = Job(input=json.dumps({
            "model_path": str(model_path),
            "data_path": str(data_path)
        }),
                  kind="hyperopt")
        job = self.stub.CreateJob(job)
        self.stub.GetJob(RequestWithId(id=job.id))
        return job.id

    def gather_results(self, job_id_list, timeout):
        job_compeleted = {job_id: Job.PENDING for job_id in job_id_list}
        deadline = time.time() + timeout
        while True:
            time.sleep(5)
            for id in job_id_list:
                job = self.stub.GetJob(RequestWithId(id=id))
                job_compeleted[id] = job.status
            if not any(s in job_compeleted.values()
                       for s in (Job.PENDING, Job.RUNNING, Job.PULLED)):
                break
            if time.time() > deadline:
                print("Timeout was expired!")
                break

        results = []
        for i, id in enumerate(job_id_list):
            job = self.stub.GetJob(RequestWithId(id=id))
            if job.status == Job.COMPLETED:
                results += [{}]
            else:
                results.append(None)
            files = {}
            if job.output != "":
                files = json.loads(job.output)
            for file, path in files.items():
                self.file_service.get_file_to_path(
                    share_name=self.afs_share,
                    directory_name=Path(path).parent,
                    file_name=Path(path).name,
                    file_path=str(self.project_root / path))
                if file == 'output':
                    with open(self.project_root / path, "r") as f:
                        results[i]['output'] = json.load(f)
                if file == 'result_model_path':
                    results[i]['result_model_path'] = self.project_root / path
                if file == 'error':
                    with open(self.project_root / path, "r") as f:
                        logging.warning(f.read())
        return results

    def send_model(self, model_info):
        folder = "model-" + ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(12)
        ])
        model_path = self.project_root / self.user / folder / MODELGYM_CONFIG[
            "model_file"]
        try:
            model_folder = model_path.parent
            model_folder.mkdir()
        except FileExistsError:
            logging.warning("Model folder {} is exist !".format(model_folder))
        except FileNotFoundError:
            logging.warning(
                "Model folder {} is missing !".format(model_folder))
        with (model_path).open(mode="w") as file:
            json.dump(model_info, file, cls=NumpyEncoder)
        afs_path = Path(self.user) / folder / MODELGYM_CONFIG["model_file"]
        self.file_service.create_directory(share_name=self.afs_share,
                                           directory_name=afs_path.parent)
        self.file_service.create_file_from_path(share_name=self.afs_share,
                                                directory_name=afs_path.parent,
                                                file_name=afs_path.name,
                                                local_file_path=model_path,
                                                max_connections=cpu_count())
        return afs_path

    def send_data(self, data_path, push_data=False):
        """
        Copy data to the AFS DATA directory.

        :param data_path: <string>. Specify you data path by string.
        :return: path in the AFS share.
        """
        logging.info("Sending data to AFS")
        checksum = get_data_hash(data_path)[:10]
        data_folder = time.strftime("%Y-%m-%d-%H.%M") + '-' + checksum
        afs_path = Path(MODELGYM_CONFIG["data_folder"]
                        ) / data_folder / MODELGYM_CONFIG["data_file"]

        list_folder = self.file_service.list_directories_and_files(
            self.afs_share, directory_name="DATA")
        for folder in list_folder:
            if checksum == folder.name[-10:]:
                logging.info("Folder for data already exist!")
                afs_path = Path(
                    "DATA") / folder.name / MODELGYM_CONFIG["data_file"]
                logging.info("Data is in the AFS {}".format(folder.name))
                if push_data:
                    logging.warning("Rewriting data")
                    afs_path = Path(
                        MODELGYM_CONFIG["data_folder"]
                    ) / folder.name / MODELGYM_CONFIG["data_file"]
                else:
                    return afs_path
        self.file_service.create_directory(share_name=self.afs_share,
                                           directory_name=afs_path.parent)
        self.file_service.create_file_from_path(share_name=self.afs_share,
                                                directory_name=afs_path.parent,
                                                file_name=afs_path.name,
                                                local_file_path=data_path,
                                                max_connections=cpu_count(),
                                                progress_callback=logbar)
        logging.info("Sending is over")
        return afs_path

    def from_project_root_path(self, path):
        path = Path(path)
        # if not path.exists():
        # logging.warning("{} is missing !!".format(path))
        try:
            relative_path = path.relative_to(self.project_root.parent)
            return str(relative_path)
        except ValueError:
            logging.warning("Path doesn't have project_root folder {}".format(
                self.project_root))
Esempio n. 32
0
table_service.create_table('cluster')

asset = {
    'PartitionKey': 'predictivemaintenance',
    'RowKey': 'predictivemaintenance',
    'Status': 'Not Created'
}
table_service.insert_or_merge_entity('cluster', asset)

file_service = FileService(account_name=STORAGE_ACCOUNT_NAME,
                           account_key=STORAGE_ACCOUNT_KEY)
file_service.create_share(share_name='azureml-project', quota=1)
file_service.create_share(share_name='azureml-share', quota=1)

source = os.environ['AML_ASSETS_URL']
dest = 'azureml_project.zip'

urllib.request.urlretrieve(source, dest)

with zipfile.ZipFile(dest, "r") as zip_ref:
    zip_ref.extractall("azureml-project")

for root, dirs, files in os.walk('azureml-project', topdown=True):
    directory = os.path.relpath(root, 'azureml-project')
    if directory != '.':
        file_service.create_directory('azureml-project', directory)
    for f in files:
        file_service.create_file_from_path('azureml-project', directory, f,
                                           os.path.join(root, f))