def upload_file(fname, fpath): daemonname = fname.split(".")[0] i = 0 fail_msg = "" while i <= MAX_RETRIES: try: svc = FileService(account_name=acctname, account_key=acctkey) l = [sonicversion, asicname, daemonname, hostname] e = [] while len(e) != len(l): e.append(l[len(e)]) svc.create_directory(sharename, "/".join(e)) log_debug("Remote dir created: " + "/".join(e)) svc.create_file_from_path(sharename, "/".join(l), fname, fpath) log_debug("Remote file created: name{} path{}".format( fname, fpath)) break except Exception as e: log_err("core uploader failed: Failed during upload (" + str(e) + ")") fail_msg = str(e) i += 1 if i >= MAX_RETRIES: raise Exception("Failed while uploading. msg(" + fail_msg + ") after " + str(i) + " retries") time.sleep(PAUSE_ON_FAIL)
def upload_file(fname, fpath, coref): daemonname = fname.split(".")[0] i = 0 fail_msg = "" while True: try: svc = FileService(account_name=acctname, account_key=acctkey) l = [sonicversion, asicname, daemonname, hostname] e = [] while len(e) != len(l): e.append(l[len(e)]) svc.create_directory(sharename, "/".join(e)) log_debug("Remote dir created: " + "/".join(e)) svc.create_file_from_path(sharename, "/".join(l), fname, fpath) log_debug("Remote file created: name{} path{}".format( fname, fpath)) newcoref = os.path.dirname( coref) + "/" + UPLOAD_PREFIX + os.path.basename(coref) os.rename(coref, newcoref) break except Exception as ex: log_err("core uploader failed: Failed during upload (" + coref + ") err: (" + str(ex) + ") retry:" + str(i)) if not os.path.exists(fpath): break i += 1 time.sleep(PAUSE_ON_FAIL)
def file_storage_connect(): global file_service global file_storage_dir global file_storage_share global overwrite_remote_files file_storage_url = dbparameters['fs_server'].strip() file_storage_user = dbparameters['fs_username'].strip() file_storage_pwd = dbparameters['fs_password'].strip() file_storage_share = dbparameters['fs_share'].strip() file_storage_dir = dbparameters['fs_directory_prefix'].strip() overwrite_remote_files = dbparameters['overwrite_remote_files'].strip() file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd) try: if file_service.exists(file_storage_share): print( 'Connection to Azure file storage successfully established...') if len(file_storage_dir) > 0 and not file_service.exists( file_storage_share, directory_name=file_storage_dir): subdirs = file_storage_dir.split('/') subdirfull = "" for subdir in subdirs: subdirfull += subdir file_service.create_directory(file_storage_share, subdirfull) subdirfull += "/" print('Created directory:' + file_storage_dir) else: print( 'Filaed to connect to Asure file storage, share does not exist: ' + file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex)
def prepare_azure_file_share_service(config, dataset_directory='dataset_directory'): # Create a file share service = FileService(config.storage_account_name, config.storage_account_key) service.create_share(config.workspace_file_share, fail_on_exist=False) # Create a directory in the file share service.create_directory(config.workspace_file_share, dataset_directory, fail_on_exist=False) return service
def upload(path: str): from azure.storage.file import FileService service = FileService(account_name=config['account_name'], account_key=config['account_key']) if shared_directory not in service.list_directories_and_files( config['share_name']): service.create_directory(config['share_name'], shared_directory) service.create_file_from_path(config['share_name'], shared_directory, path.split('/')[-1], path)
def create_azure_fileshare(share_prefix, account_name, account_key): """ Generate a unique share name to avoid overlaps in shared infra :param share_prefix: :param account_name: :param account_key: :return: """ # FIXME - Need to remove hardcoded directoty link below d_dir = './WebInDeploy/bootstrap' share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4())) print('using share_name of: {}'.format(share_name)) # archive_file_path = _create_archive_directory(files, share_prefix) try: # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this s = requests.Session() s.verify = False file_service = FileService(account_name=account_name, account_key=account_key, request_session=s) # print(file_service) if not file_service.exists(share_name): file_service.create_share(share_name) for d in ['config', 'content', 'software', 'license']: print('creating directory of type: {}'.format(d)) if not file_service.exists(share_name, directory_name=d): file_service.create_directory(share_name, d) # FIXME - We only handle bootstrap files. May need to handle other dirs if d == 'config': for filename in os.listdir(d_dir): print('creating file: {0}'.format(filename)) file_service.create_file_from_path( share_name, d, filename, os.path.join(d_dir, filename)) except AttributeError as ae: # this can be returned on bad auth information print(ae) return "Authentication or other error creating bootstrap file_share in Azure" except AzureException as ahe: print(ahe) return str(ahe) except ValueError as ve: print(ve) return str(ve) print('all done') return share_name
def upload_scripts(config, job_name, filenames): service = FileService(config.storage_account['name'], config.storage_account['key']) service.create_directory(config.fileshare_name, job_name, fail_on_exist=False) trasfer_file = lambda fname: service.create_file_from_path( config.fileshare_name, job_name, os.path.basename(fname), fname) for filename in filenames: trasfer_file(filename)
class S3AzureFileBypass(BaseS3Bypass): """ Bypass executed by default when data source is an S3 bucket and data destination is an Azure share. It should be transparent to user. Conditions are: - S3Reader and AzureFileWriter are used on configuration. - No filter modules are set up. - No transform module is set up. - No grouper module is set up. - AzureFileWriter has not a items_limit set in configuration. - AzureFileWriter has default items_per_buffer_write and size_per_buffer_write per default. """ def __init__(self, config, metadata): super(S3AzureFileBypass, self).__init__(config, metadata) from azure.storage.file import FileService self.azure_service = FileService( self.read_option('writer', 'account_name'), self.read_option('writer', 'account_key')) self.share = self.read_option('writer', 'share') self.filebase_path = self._format_filebase_path(self.read_option('writer', 'filebase')) self._ensure_path(self.filebase_path) @classmethod def meets_conditions(cls, config): if not config.writer_options['name'].endswith('AzureFileWriter'): cls._log_skip_reason('Wrong reader configured') return False return super(S3AzureFileBypass, cls).meets_conditions(config) def _format_filebase_path(self, filebase): filebase_with_date = datetime.datetime.now().strftime(filebase) # warning: we strip file prefix here, could be unexpected filebase_path, prefix = os.path.split(filebase_with_date) return filebase_path def _ensure_path(self, filebase): path = filebase.split('/') folders_added = [] for sub_path in path: folders_added.append(sub_path) parent = '/'.join(folders_added) self.azure_service.create_directory(self.share, parent) @retry_long def _copy_s3_key(self, key): file_name = key.name.split('/')[-1] self.azure_service.copy_file( self.share, self.filebase_path, file_name, key.generate_url(S3_URL_EXPIRES_IN) )
def file(): static_dir_path = "D:\home\site\wwwroot\static" static_file_dir_path = static_dir_path + '\\' + 'files' account_name = 'hanastragetest' account_key = 'acount_key' root_share_name = 'root' share_name = 'images' directory_url = 'https://hanastragetest.file.core.windows.net/' + root_share_name + '/' + share_name # create local save directory if os.path.exist(static_file_dir_path) is False: os.mkdir(static_file_dir_path) file_service = FileService(account_name=account_name, account_key=account_key) # create share file_service.create_share(root_share_name) # create directory file_service.create_directory(root_share_name, share_name) files = os.listdir(static_dir_path) for file in files: # delete if file_service.exists(root_share_name, share_name, file): file_service.delete_file(root_share_name, share_name, file) # file upload file_service.create_file_from_path( root_share_name, share_name, # We want to create this blob in the root directory, so we specify None for the directory_name file, static_dir_path + '\\' + file, content_settings=ContentSettings(content_type='image/png')) generator = file_service.list_directories_and_files( root_share_name, share_name) html = "" for file in generator: # file download file_save_path = static_file_dir_path + '\\' + file file_service.get_file_to_path(root_share_name, share_name, file, file_save_path) html = "{}<img src='{}'>".format(html, file_save_path) result = { "result": True, "data": { "file_or_dir_name": [file_or_dir.name for file_or_dir in generator] } } return make_response(json.dumps(result, ensure_ascii=False) + html)
class S3AzureFileBypass(BaseS3Bypass): """ Bypass executed by default when data source is an S3 bucket and data destination is an Azure share. It should be transparent to user. Conditions are: - S3Reader and AzureFileWriter are used on configuration. - No filter modules are set up. - No transform module is set up. - No grouper module is set up. - AzureFileWriter has not a items_limit set in configuration. - AzureFileWriter has default items_per_buffer_write and size_per_buffer_write per default. - AzureFileWriter has default write_buffer. """ def __init__(self, config, metadata): super(S3AzureFileBypass, self).__init__(config, metadata) from azure.storage.file import FileService self.azure_service = FileService( self.read_option('writer', 'account_name'), self.read_option('writer', 'account_key')) self.share = self.read_option('writer', 'share') self.filebase_path = self._format_filebase_path( self.read_option('writer', 'filebase')) self._ensure_path(self.filebase_path) @classmethod def meets_conditions(cls, config): if not config.writer_options['name'].endswith('AzureFileWriter'): cls._log_skip_reason('Wrong reader configured') return False return super(S3AzureFileBypass, cls).meets_conditions(config) def _format_filebase_path(self, filebase): filebase_with_date = datetime.datetime.now().strftime(filebase) # warning: we strip file prefix here, could be unexpected filebase_path, prefix = os.path.split(filebase_with_date) return filebase_path def _ensure_path(self, filebase): path = filebase.split('/') folders_added = [] for sub_path in path: folders_added.append(sub_path) parent = '/'.join(folders_added) self.azure_service.create_directory(self.share, parent) @retry_long def _copy_s3_key(self, key): file_name = key.name.split('/')[-1] self.azure_service.copy_file(self.share, self.filebase_path, file_name, key.generate_url(S3_URL_EXPIRES_IN))
def create_share_name(self, remote_folder): parse_url = _parse_url(remote_folder) key = self.storage_client.storage_accounts.list_keys( self.resource_group_name, parse_url.account).keys[0].value fs = FileService(account_name=parse_url.account, account_key=key) return fs.create_directory( share_name=parse_url.container_or_share_name, directory_name=parse_url.path)
def create_azure_fileshare(files, share_prefix, account_name, account_key): # generate a unique share name to avoid overlaps in shared infra share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4())) print('using share_name of: {}'.format(share_name)) archive_file_path = _create_archive_directory(files, share_prefix) try: # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this s = requests.Session() s.verify = False file_service = FileService(account_name=account_name, account_key=account_key, request_session=s) # print(file_service) if not file_service.exists(share_name): file_service.create_share(share_name) for d in ['config', 'content', 'software', 'license']: print('creating directory of type: {}'.format(d)) if not file_service.exists(share_name, directory_name=d): file_service.create_directory(share_name, d) d_dir = os.path.join(archive_file_path, d) for filename in os.listdir(d_dir): print('creating file: {0}'.format(filename)) file_service.create_file_from_path( share_name, d, filename, os.path.join(d_dir, filename)) except AttributeError as ae: # this can be returned on bad auth information print(ae) return "Authentication or other error creating bootstrap file_share in Azure" except AzureException as ahe: print(ahe) return str(ahe) except ValueError as ve: print(ve) return str(ve) print('all done') return 'Azure file-share {} created successfully'.format(share_name)
def saveModel(customer, modelName, model, storage_account_name, storage_account_key): fileService = FileService(account_name=storage_account_name, account_key=storage_account_key) if not fileService.exists('trainedmodels', customer): fileService.create_share('trainedmodels') fileService.create_directory('trainedmodels', customer) if not fileService.exists('trainedmodels', customer + '/' + modelName): fileService.create_directory('trainedmodels', customer + '/' + modelName) modelPickle = pickle.dumps(model) timestr = time.strftime('%Y%m%d-%H%M%S') fileName = modelName + '_' + timestr + '.pkl' fileService.create_file_from_bytes('trainedmodels', customer + '/' + modelName, fileName, modelPickle) print(fileName + ' saved.')
def upload_to_share(self, region, resource_group_name, storage_account_name, share_name, dir_name, tar_gz_file_to_upload): logging.info( "Uploading contents of '{}' to 'https://{}.file.core.windows.net/{}/{}'" .format(tar_gz_file_to_upload, storage_account_name, share_name, dir_name)) self.create_storage_account_if_not_exists(region, resource_group_name, storage_account_name) storage_account_name, storage_key = self.get_storage_credentials( resource_group_name, storage_account_name) share_service = FileService(account_name=storage_account_name, account_key=storage_key) self.create_share_if_not_exists(share_service, share_name) share_service.create_directory(share_name, dir_name) self.upload_tar_gz_contents(share_service, share_name, dir_name, tar_gz_file_to_upload) return storage_account_name, storage_key
def upload_to_file_storage(): #init file manager fnm = FilenameManager() # get a list of pdf files in dir_pdfs template = dir_upload + "**" if operating_system == 'mac' or operating_system == 'linux': template += '/*.pdf' elif operating_system == 'windows': template += '\\*.pdf' lpdfs = glob.glob(template, recursive=True) lpdfs.sort() #os.chdir(dir_pdfs) # needed for ftp.storbinary('STOR command work not with paths but with filenames # connect to FTP server and upload files try: file_storage_url = dparameters['fs_server'].strip() file_storage_user = dparameters['fs_username'].strip() file_storage_pwd = dparameters['fs_password'].strip() file_storage_share = dparameters['fs_share'].strip() file_storage_dir = dparameters['fs_directory_prefix'].strip() file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd) try: if file_service.exists(file_storage_share): print( 'Connection to Azure file storage successfully established...' ) if len(file_storage_dir) > 0 and not file_service.exists( file_storage_share, directory_name=file_storage_dir): file_service.create_directory(file_storage_share, file_storage_dir) print('Created directory:' + file_storage_dir) else: print( 'Failed to connect to Asure file storage, share does not exist: ' + file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex) for pdffile in lpdfs: file_details = db.readFileStatus(file_original_name=pdffile, file_status='Uploaded') if file_details is None: file_id = None file_details = db.readFileStatus( file_original_name=pdffile, file_status='Classified') if file_details is not None: file_id = file_details["id"] dir, rpdffile = ntpath.split(pdffile) destinationdir = '' if (dir + '\\') == dir_upload or (dir + '/') == dir_upload: destinationdir = 'Unclassified' else: dir, year = ntpath.split(dir) dir, destinationdir = ntpath.split(dir) retries = 0 while retries < 3: try: path = pdffile print('Uploading {}'.format(path)) filename = pdffile remote_filename = fnm.azure_validate_filename( rpdffile) if not remote_filename: return if len(file_storage_dir) > 0: directory = file_storage_dir + '/' + destinationdir else: directory = destinationdir if not file_service.exists( file_storage_share, directory_name=directory): file_service.create_directory( file_storage_share, directory) directory += '/' + year if not file_service.exists( file_storage_share, directory_name=directory): file_service.create_directory( file_storage_share, directory) print('Checking if {}/{} already exists'.format( directory, remote_filename)) if file_service.exists(file_storage_share, directory_name=directory, file_name=remote_filename): print('{}/{} already exists'.format( directory, remote_filename)) if file_id is None: db.saveFileStatus( script_name=script_name, file_original_name=pdffile, file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') else: db.saveFileStatus( id=file_details["id"], file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') os.remove(pdffile) break file_service.create_file_from_path( file_storage_share, directory, remote_filename, path, content_settings=ContentSettings( content_type='application/pdf')) if file_id is None: db.saveFileStatus( script_name=script_name, file_original_name=pdffile, file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') else: db.saveFileStatus( id=file_details["id"], file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') print('{}/{} uploaded'.format( directory, remote_filename)) retries = 3 os.remove(pdffile) except Exception as e: print('Error uploading to Asure file storage,', str(e)) retries += 1 else: print('File {} was uploaded before'.format( file_details["file_original_name"])) os.remove(pdffile) except Exception as e: print(str(e)) logging.critical(str(e))
class Crawler: def __init__(self, config, section, script_name=None, error_message=None): self.script_name = script_name self.config = config self.db = DbCommunicator(config) self.error_message = error_message try: self.section = section self.dbparams = self.db.readProps('general') self.dbparams.update(self.db.readProps(section)) self.downloads_path = self.get_property('downloads_path', section) self.overwrite_remote_files = self.get_property( 'overwrite_remote_files', section, 'bool') if not os.path.exists(self.downloads_path): os.makedirs(self.downloads_path) elif not os.path.isdir(self.downloads_path): print( 'ERROR:{} downloads_path parameter points to file!'.format( section)) sys.exit(1) self.headless_mode = self.get_property('headless_mode', 'general', 'bool') if self.headless_mode: display = Display(visible=0, size=(1920, 1080)) display.start() options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument('--headless') prefs = { 'download.default_directory': self.downloads_path, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'plugins.always_open_pdf_externally': True, } options.add_experimental_option("prefs", prefs) self.browser = webdriver.Chrome( chrome_options=options, service_args=["--verbose", "--log-path=/tmp/selenium.log"]) self.browser.implicitly_wait(10) self.browser.set_page_load_timeout(10000) self.browser.set_script_timeout(10000) # self.ftp_connect() self.file_storage_connect() except Exception as e: self.error_message = str(e) def get_property(self, prop, section, type='str'): if type == 'str': if self.dbparams is not None and prop in self.dbparams: return self.dbparams[prop] else: return self.config.get(section, prop).strip() elif type == 'bool': if self.dbparams is not None and prop in self.dbparams: return self.dbparams[prop] == 'True' else: return self.config.getboolean(section, prop, fallback=False) def file_storage_connect(self): self.file_storage_url = self.get_property('fs_server', 'general') self.file_storage_user = self.get_property('fs_username', 'general') self.file_storage_pwd = self.get_property('fs_password', 'general') self.file_storage_share = self.get_property('fs_share', 'general') self.file_storage_dir = self.get_property('fs_directory_prefix', 'general') self.file_service = FileService(account_name=self.file_storage_user, account_key=self.file_storage_pwd) try: if self.file_service.exists(self.file_storage_share): print( 'Connection to Azure file storage successfully established...' ) if len(self.file_storage_dir ) > 0 and not self.file_service.exists( self.file_storage_share, directory_name=self.file_storage_dir): subdirs = self.file_storage_dir.split('/') subdirfull = "" for subdir in subdirs: subdirfull += subdir self.file_service.create_directory( self.file_storage_share, subdirfull) subdirfull += "/" print('Created directory:' + self.file_storage_dir) else: print( 'Filaed to connect to Asure file storage, share does not exist: ' + self.file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex) def ftp_connect(self): self.ftp = FTP() self.ftp.connect( self.config.get('general', 'ftp_server').strip(), int(self.config.get('general', 'ftp_port')), ) self.ftp.login( user=self.config.get('general', 'ftp_username').strip(), passwd=self.config.get('general', 'ftp_password').strip(), ) print('Connection to ftp successfully established...') def get(self, url): self.browser.get(url) time.sleep(3) def assert_exists(self, selector): _ = self.browser.find_element_by_css_selector(selector) def get_elements(self, selector, root=None): if root is None: root = self.browser return root.find_elements_by_css_selector(selector) def wait_for_displayed(self, selector): element = self.browser.find_element_by_css_selector(selector) while not element.is_displayed(): pass def click_by_text(self, text): self.browser.find_element_by_link_text(text) time.sleep(3) def click_xpath(self, path, single=True): if single: self.browser.find_element_by_xpath(path).click() else: for el in self.browser.find_elements_by_xpath(path): el.click() time.sleep(3) def click(self, selector, single=True, root=None): if root is None: root = self.browser if single: root.find_element_by_css_selector(selector).click() else: for el in root.find_elements_by_css_selector(selector): el.click() time.sleep(3) def send_keys(self, selector, keys): elem = self.browser.find_element_by_css_selector(selector) elem.clear() elem.send_keys(keys) time.sleep(3) def open_new_tab(self): self.browser.execute_script("window.open('');") self.browser.switch_to.window(self.browser.window_handles[1]) def close_current_tab(self): self.browser.close() self.browser.switch_to.window(self.browser.window_handles[-1]) def get_text(self, selector, single=True, root=None): if root is None: root = self.browser if single: return root.find_element_by_css_selector(selector).text return [el.text for el in root.find_elements_by_css_selector(selector)] def get_attr(self, selector, attr, single=True, root=None): if root is None: root = self.browser if single: return root.find_element_by_css_selector(selector).get_attribute( attr) return [ el.get_attribute(attr) for el in root.find_elements_by_css_selector(selector) ] def execute(self, script): self.browser.execute_script(script, []) time.sleep(3) def deselect_all(self, selector): select = Select(self.browser.find_element_by_css_selector(selector)) select.deselect_all() time.sleep(3) def select_option(self, selector, option): select = Select(self.browser.find_element_by_css_selector(selector)) select.select_by_visible_text(option) time.sleep(3) def select_option_by_index(self, selector, index): select = Select(self.browser.find_element_by_css_selector(selector)) if index < len(select.options): select.select_by_index(index) time.sleep(3) return True return False def back(self): self.browser.back() time.sleep(3) def close_dialog(self): try: alert = self.browser.switch_to.alert alert.dismiss() # alert.accept() except Exception as e: pass def close(self): if hasattr(self, 'browser'): self.browser.quit() if hasattr(self, 'db'): self.db.close() # self.ftp.quit() def download(self, url, filename, file_db_id=None): # print('Downloading', filename, self._get_remote_filename(filename)) # return downloaded = False if url.startswith('https'): ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE else: ctx = None content_length = 1 retry = 0 file_size = 0 file_name = '' while file_size != content_length and retry < 3: try: r = urllib.request.urlopen(url, context=ctx) content_length = r.length file_name = os.path.join(self.downloads_path, filename) with open(file_name, 'wb') as f: f.write(r.read()) file_size = os.stat(file_name).st_size retry += 1 # print('Attempt', retry, 'Downloaded', file_size, 'bytes of', content_length) except Exception as e: retry += 1 print('Attempt', retry, 'ERROR: Downloading failed!', url, str(e)) try: os.remove(file_name) except OSError: pass if file_size == content_length: downloaded = True if file_db_id: self.db.saveFileStatus(id=file_db_id, script_name=self.script_name, file_original_name=filename, file_status='Downloaded') else: self.db.saveFileStatus(script_name=self.script_name, file_original_name=filename, file_status='Downloaded') else: if file_db_id: self.db.saveFileStatus(id=file_db_id, script_name=self.script_name, file_original_name=filename, file_status='None') else: self.db.saveFileStatus(script_name=self.script_name, file_original_name=filename, file_status='None') return downloaded def _get_remote_filename(self, local_filename): raise NotImplemented def merge_files(self, filenames): pdfline = '"' + '" "'.join(filenames) + '"' res_filename = '"' + filenames[0].split(' part')[0] + '.pdf"' command = 'pdftk ' + pdfline + ' cat output ' + res_filename os.system(command) return res_filename def upload_to_ftp(self, filename): self.upload_to_file_storage(filename) def upload_to_ftp_old(self, filename): retries = 0 while retries < 3: try: path = os.path.join(self.downloads_path, filename) # print('Uploading {}'.format(path)) pdf_file = open(path, 'rb') remote_filename = self._get_remote_filename(filename) if not remote_filename: return directory, filename = remote_filename try: self.ftp.cwd('/{}'.format(directory)) except Exception: self.ftp.mkd('/{}'.format(directory)) self.ftp.cwd('/{}'.format(directory)) if not self.overwrite_remote_files: # print('Checking if {}/{} already exists'.format(directory, filename)) try: self.ftp.retrbinary('RETR {}'.format(filename), lambda x: x) return except error_perm: pass self.ftp.storbinary('STOR {}'.format(filename), pdf_file) # print('{} uploaded'.format(path)) pdf_file.close() retries = 3 except Exception as e: print('Error uploading to ftp,', str(e)) retries += 1 try: self.ftp.voidcmd("NOOP") except Exception as ex: self.ftp_connect() def move_to_another(self, filename): try: entity_type = filename.split('|')[1] remote_filename = self._get_remote_filename(filename) if not remote_filename: return if (entity_type == 'County') or (entity_type == 'City') or \ (entity_type == 'Township') or (entity_type == 'Village'): return directory, server_filename = remote_filename self.ftp.rename('/General Purpose/{}'.format(server_filename), '/{}/{}'.format(directory, server_filename)) print('Moved {} to {}'.format(server_filename, directory)) except Exception as e: print(str(e)) def upload_to_file_storage(self, filename): fnm = FilenameManager() retries = 0 while retries < 3: try: path = os.path.join(self.downloads_path, filename) file_details = self.db.readFileStatus( file_original_name=filename, file_status='Uploaded') if file_details is not None: print( 'File {} was already uploaded before'.format(filename)) return file_details = self.db.readFileStatus( file_original_name=filename, file_status='Other', notes='Uplodaed Before') if file_details is not None: print( 'File {} was already uploaded before'.format(filename)) return file_details = self.db.readFileStatus( file_original_name=filename, file_status='Downloaded') print('Uploading {}'.format(path)) remote_filename = self._get_remote_filename(filename) old_filename = filename directory = None if not remote_filename: return try: directory, filename, year = remote_filename except: directory, filename = remote_filename filename = fnm.azure_validate_filename(filename) if len(self.file_storage_dir) > 0: directory = self.file_storage_dir + '/' + directory if not self.file_service.exists(self.file_storage_share, directory_name=directory): self.file_service.create_directory(self.file_storage_share, directory) if year: directory += '/' + year if not self.file_service.exists(self.file_storage_share, directory_name=directory): self.file_service.create_directory( self.file_storage_share, directory) if not self.overwrite_remote_files: print('Checking if {}/{} already exists'.format( directory, filename)) if self.file_service.exists(self.file_storage_share, directory_name=directory, file_name=filename): print('{}/{} already exists'.format( directory, filename)) if file_details is None: self.db.saveFileStatus( script_name=self.script_name, file_original_name=old_filename, file_upload_path=directory, file_upload_name=filename, file_status='Other', notes='Uplodaed Before') else: self.db.saveFileStatus(id=file_details['id'], file_upload_path=directory, file_upload_name=filename, file_status='Other', notes='Uplodaed Before') return self.file_service.create_file_from_path( self.file_storage_share, directory, filename, path, content_settings=ContentSettings( content_type='application/pdf')) if file_details is None: self.db.saveFileStatus(script_name=self.script_name, file_original_name=old_filename, file_upload_path=directory, file_upload_name=filename, file_status='Uploaded') else: self.db.saveFileStatus(id=file_details['id'], file_upload_path=directory, file_upload_name=filename, file_status='Uploaded') print('{} uploaded'.format(path)) retries = 3 except Exception as e: print('Error uploading to Asure file storage,', str(e)) filename = old_filename retries += 1
backup_bz2.add(repertoire_de_sauvegarde+'/docker-compose.yml') backup_bz2.close() # fermeture du fichier print("Compression et sauvegarde des fichiers OK !") logging.debug("Compression et sauvegarde des fichiers OK !") # syslog.syslog(syslog.LOG_DEBUG,"Compression et sauvegarde des fichiers OK !") # Sauvegarde sur Microsoft AZURE # # Création d'un sous-répertoire: save_date du jour print("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...") logging.debug("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...") # warning # syslog.syslog(syslog.LOG_DEBUG, "Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE en cours ...") # warning file_service.create_directory(AZURE_REP_BKP,'save_'+str(BACKUP_DATE)) print("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !") logging.debug("Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !") # warning # syslog.syslog(syslog.LOG_DEBUG, "Création d'un sous-répertoire save_"+str(BACKUP_DATE)+" sur Microsoft AZURE OK !") # warning # copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE print("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...") logging.debug("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...") # warning # syslog.syslog(syslog.LOG_DEBUG,"Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE en cours ...") # warning file_service.create_file_from_path(AZURE_REP_BKP,'save_'+str(BACKUP_DATE),'save_'+str(BACKUP_DATE)+'db.sql',repertoire_de_sauvegarde+'/save_'+str(BACKUP_DATE)+'db.sql') file_service.create_file_from_path(AZURE_REP_BKP,'save_'+str(BACKUP_DATE),'save_'+str(BACKUP_DATE)+'.tar.bz2',repertoire_de_sauvegarde+'/save_'+str(BACKUP_DATE)+'.tar.bz2') print("Copy des fichiers de sauvegarde sur le répertoire Microsoft AZURE OK !")
class StorageHelper(object): """Handle details related to a single storage account and share. Instantiate this object with information sufficient to uniquely identify a storage account and a file share within it. Then .account can be used to retrieve the Azure SDK for Python object corresponding to the account, and .key can be used to get an access key for it. For both those properties, if the value mentioned doesn't exist, it will be created upon first property access. """ def __init__(self, client_data, resource_helper, name, account=None, default_share='share'): self.name = name self.default_share = default_share self._account = account self._key = os.environ.get('AZURE_STORAGE_KEY') self.resource_helper = resource_helper self.client = StorageManagementClient(*client_data) self.file_service = FileService( account_name=self.account.name, account_key=self.key, ) @property def account(self): """Return the managed StorageAccounts object. If no such account exists, create it first. """ if self._account is None: print('Creating storage account...') # Error to create storage account if it already exists! name_check = self.client.storage_accounts.check_name_availability( self.name) if name_check.name_available: storage_creation = self.client.storage_accounts.create( self.resource_helper.group.name, self.name, StorageAccountCreateParameters( sku=StorageAccountSku(StorageSkuName.standard_lrs), kind=StorageKind.storage, location=self.resource_helper.group.location, )) storage = storage_creation.result() else: try: storage = self.client.storage_accounts.get_properties( self.resource_helper.group.name, self.name) except CloudError: print('Storage account {} already exists' ' in a resource group other than {}.'.format( self.name, self.resource_helper.group.name)) print('Got storage account:', storage.name) self._account = storage return self._account @property def key(self): """Get the first available storage key. This will crash if there are no available storage keys, which is unlikely since two are created along with a storage account. """ if self._key is None: storage_keys = self.client.storage_accounts.list_keys( self.resource_helper.group.name, self.account.name) self._key = next(iter(storage_keys.keys)).value return self._key def upload_file(self, path, sharename): """Upload a file into the default share on the storage account. If the share doesn't exist, create it first. """ self.file_service.create_file_from_path( self.default_share if sharename is None else sharename, None, os.path.basename(path), path, ) return '/'.join([self.default_share, os.path.basename(path)]) def download_file(self, sharename, filename): file_service.get_file_to_path(sharename, None, filename, filename) def delete_file(self, sharename, filename): file_service.delete_file(sharename, None, filename) def create_share(self, sharename): self.file_service.create_share(sharename) def create_directory(self, sharename, directoryname): self.file_service.create_directory(sharename, directoryname) def list_directories_and_files(self, sharename): generator = self.file_service.list_directories_and_files(sharename) return [file_or_dir.name for file_or_dir in generator] def list_shares(self): shares = list(self.file_service.list_shares(include_snapshots=True)) sharelist = [fileshare.name for fileshare in shares] print(sharelist) return sharelist
class AzureFileManager(): def __init__(self): # fetch config data conf = Configuration() # create Azure File share service self.file_service = FileService( account_name=conf.account_name, account_key=conf.account_key) # set azure share file name (container) self.file_share = conf.file_share def upload_file(self, upload_path, file_path): if not os.path.isfile(file_path): print("Your file is not exists, check your file path and try again.") return filename = os.path.basename(file_path) # remove ' or " from path, if path was empty like "" or '' set upload_path=None, this make upload file to root directory upload_path = upload_path.strip().replace("'", '').replace('"', '') # remove start and end / or \ if upload_path.endswith('/') or upload_path.endswith('\\'): upload_path = upload_path[:-1] if upload_path.startswith('/') or upload_path.startswith('\\'): upload_path = upload_path[1:] # sanity check upload_path = upload_path if len(upload_path) >= 1 else None print("Start uploading...") try: # create sub directories self.create_sub_directories(upload_path) # upload self.file_service.create_file_from_path( share_name=self.file_share, # file_share name in azure directory_name=upload_path, # server directories address. None => root directory file_name=filename, # Name of file to create in azure local_file_path=file_path) print("'{0}' has been successfully uploaded".format(filename)) except: print("Failed to upload '{0}', please try again".format(filename)) def download_file(self, file_path): """ download file from azure, enter file path in azure """ # check file path was not empty file_path = file_path.strip().replace("'", '').replace('"', '') if len(file_path) == 0: print("Please enter a file path") return filename = os.path.basename(file_path) dir_path = os.path.dirname(file_path) # if parent path was not available, use None => root directory dir_path = dir_path if dir_path else None print("Downloading...") try: self.file_service.get_file_to_path( share_name=self.file_share, directory_name=dir_path, # The path to the directory in azure file_name=filename, # Name of existing file in azure # Path of file to write to local machine file_path="{0}".format(filename)) print( "'{0}' has been successfully downloaded and saved in current directory.".format(filename)) except: print("Failed to download '{0}', either file doesn't exist or you are offline.".format( filename)) def get_list_of_files(self, dir_name=None): """ show list of all files and all directories in azure""" generator = self.file_service.list_directories_and_files( share_name=self.file_share, directory_name=dir_name) parent = "" if dir_name == None else dir_name for file_or_dir in generator: if not re.match(r"(.[a-z]*[A-Z]*[0-9]*)$", file_or_dir.name): # file if len(parent) == 0: print(file_or_dir.name) else: print("{0}/{1}".format(parent, file_or_dir.name)) else: # dir if len(parent) == 0: self.get_list_of_files(file_or_dir.name) else: self.get_list_of_files( "{0}/{1}".format(parent, file_or_dir.name)) def create_sub_directories(self, path): """ create sub directories in Azure """ if path is None: return dirs = os.path.normpath(path).split(os.path.sep) parent = '' for dir in dirs: parent += dir if len(parent) == 0 else '/'+dir self.file_service.create_directory(self.file_share, parent)
class AzureFileWriter(FilebaseBaseWriter): """ Writes items to azure file shares. It is a File Based writer, so it has filebase option available - account_name (str) Public acces name of the azure account. - account_key (str) Public acces key to the azure account. - share (str) File share name. - filebase (str) Base path to store the items in the share. """ supported_options = { "account_name": {"type": six.string_types, "env_fallback": "EXPORTERS_AZUREWRITER_NAME"}, "account_key": {"type": six.string_types, "env_fallback": "EXPORTERS_AZUREWRITER_KEY"}, "share": {"type": six.string_types}, } def __init__(self, options, meta, *args, **kw): from azure.storage.file import FileService super(AzureFileWriter, self).__init__(options, meta, *args, **kw) account_name = self.read_option("account_name") account_key = self.read_option("account_key") self.azure_service = FileService(account_name, account_key) self.share = self.read_option("share") self.azure_service.create_share(self.share) self.logger.info("AzureWriter has been initiated." "Writing to share {}".format(self.share)) self.set_metadata("files_counter", Counter()) self.set_metadata("files_written", []) def write(self, dump_path, group_key=None, file_name=None): if group_key is None: group_key = [] self._write_file(dump_path, group_key, file_name) def _update_metadata(self, dump_path, filebase_path, file_name): buffer_info = self.write_buffer.metadata[dump_path] file_info = { "file_name": file_name, "filebase_path": filebase_path, "size": buffer_info["size"], "number_of_records": buffer_info["number_of_records"], } files_written = self.get_metadata("files_written") files_written.append(file_info) self.set_metadata("files_written", files_written) self.get_metadata("files_counter")[filebase_path] += 1 def _ensure_path(self, filebase): path = filebase.split("/") folders_added = [] for sub_path in path: folders_added.append(sub_path) parent = "/".join(folders_added) self.azure_service.create_directory(self.share, parent) @retry_long def _write_file(self, dump_path, group_key, file_name=None): filebase_path, file_name = self.create_filebase_name(group_key, file_name=file_name) self._ensure_path(filebase_path) self.azure_service.create_file_from_path(self.share, filebase_path, file_name, dump_path, max_connections=5) self._update_metadata(dump_path, filebase_path, file_name) def get_file_suffix(self, path, prefix): number_of_keys = self.get_metadata("files_counter").get(path, 0) suffix = "{}".format(str(number_of_keys)) return suffix def _check_write_consistency(self): from azure.common import AzureMissingResourceHttpError for file_info in self.get_metadata("files_written"): try: afile = self.azure_service.get_file_properties( self.share, file_info["filebase_path"], file_info["file_name"] ) file_size = afile.properties.content_length if str(file_size) != str(file_info["size"]): raise InconsistentWriteState( "File {} has unexpected size. (expected {} - got {})".format( file_info["file_name"], file_info["size"], file_size ) ) except AzureMissingResourceHttpError: raise InconsistentWriteState("Missing file {}".format(file_info["file_name"])) self.logger.info("Consistency check passed")
def main(path, debug, remote_directory, typhoonname): initialize.setup_cartopy() start_time = datetime.now() print( '---------------------AUTOMATION SCRIPT STARTED---------------------------------' ) print(str(start_time)) #%% check for active typhoons print( '---------------------check for active typhoons---------------------------------' ) print(str(start_time)) remote_dir = remote_directory if debug: typhoonname = 'SURIGAE' remote_dir = '20210421120000' logger.info(f"DEBUGGING piepline for typhoon{typhoonname}") Activetyphoon = [typhoonname] else: # If passed typhoon name is None or empty string if not typhoonname: Activetyphoon = Check_for_active_typhoon.check_active_typhoon() if not Activetyphoon: logger.info("No active typhoon in PAR stop pipeline") sys.exit() logger.info(f"Running on active Typhoon(s) {Activetyphoon}") else: Activetyphoon = [typhoonname] remote_dir = remote_directory logger.info(f"Running on custom Typhoon {Activetyphoon}") Alternative_data_point = (start_time - timedelta(hours=24)).strftime("%Y%m%d") date_dir = start_time.strftime("%Y%m%d%H") Input_folder = os.path.join(path, f'forecast/Input/{date_dir}/Input/') Output_folder = os.path.join(path, f'forecast/Output/{date_dir}/Output/') if not os.path.exists(Input_folder): os.makedirs(Input_folder) if not os.path.exists(Output_folder): os.makedirs(Output_folder) #download NOAA rainfall try: #Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point) Rainfall_data.download_rainfall_nomads(Input_folder, path, Alternative_data_point) rainfall_error = False except: traceback.print_exc() #logger.warning(f'Rainfall download failed, performing download in R script') logger.info( f'Rainfall download failed, performing download in R script') rainfall_error = True ###### download UCL data try: ucl_data.create_ucl_metadata(path, os.environ['UCL_USERNAME'], os.environ['UCL_PASSWORD']) ucl_data.process_ucl_data(path, Input_folder, os.environ['UCL_USERNAME'], os.environ['UCL_PASSWORD']) except: logger.info(f'UCL download failed') #%% ##Create grid points to calculate Winfield cent = Centroids() cent.set_raster_from_pnt_bounds((118, 6, 127, 19), res=0.05) #this option is added to make the script scaleable globally To Do #cent.set_raster_from_pnt_bounds((LonMin,LatMin,LonMax,LatMax), res=0.05) cent.check() cent.plot() #### admin = gpd.read_file( os.path.join(path, "./data-raw/phl_admin3_simpl2.geojson")) df = pd.DataFrame(data=cent.coord) df["centroid_id"] = "id" + (df.index).astype(str) centroid_idx = df["centroid_id"].values ncents = cent.size df = df.rename(columns={0: "lat", 1: "lon"}) df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat)) #df.to_crs({'init': 'epsg:4326'}) df.crs = {'init': 'epsg:4326'} df_admin = sjoin(df, admin, how="left").dropna() # Sometimes the ECMWF ftp server complains about too many requests # This code allows several retries with some sleep time in between n_tries = 0 while True: try: logger.info("Downloading ECMWF typhoon tracks") bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=remote_dir) fcast = TCForecast() fcast.fetch_ecmwf(files=bufr_files) except ftplib.all_errors as e: n_tries += 1 if n_tries >= ECMWF_MAX_TRIES: logger.error( f' Data downloading from ECMWF failed: {e}, ' f'reached limit of {ECMWF_MAX_TRIES} tries, exiting') sys.exit() logger.error( f' Data downloading from ECMWF failed: {e}, retrying after {ECMWF_SLEEP} s' ) time.sleep(ECMWF_SLEEP) continue break #%% filter data downloaded in the above step for active typhoons in PAR # filter tracks with name of current typhoons and drop tracks with only one timestep fcast.data = [ track_data_clean.track_data_clean(tr) for tr in fcast.data if (tr.time.size > 1 and tr.name in Activetyphoon) ] # fcast.data = [tr for tr in fcast.data if tr.name in Activetyphoon] # fcast.data = [tr for tr in fcast.data if tr.time.size>1] for typhoons in Activetyphoon: #typhoons=Activetyphoon[0] logger.info(f'Processing data {typhoons}') fname = open( os.path.join(path, 'forecast/Input/', "typhoon_info_for_model.csv"), 'w') fname.write('source,filename,event,time' + '\n') if not rainfall_error: line_ = 'Rainfall,' + '%srainfall' % Input_folder + ',' + typhoons + ',' + date_dir #StormName # fname.write(line_ + '\n') line_ = 'Output_folder,' + '%s' % Output_folder + ',' + typhoons + ',' + date_dir #StormName # #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName # fname.write(line_ + '\n') #typhoons='SURIGAE' # to run it manually for any typhoon # select windspeed for HRS model fcast.data = [tr for tr in fcast.data if tr.name == typhoons] tr_HRS = [tr for tr in fcast.data if (tr.is_ensemble == 'False')] if tr_HRS != []: HRS_SPEED = (tr_HRS[0].max_sustained_wind.values / 0.84).tolist( ) ############# 0.84 is conversion factor for ECMWF 10MIN TO 1MIN AVERAGE dfff = tr_HRS[0].to_dataframe() dfff[['VMAX', 'LAT', 'LON']] = dfff[['max_sustained_wind', 'lat', 'lon']] dfff['YYYYMMDDHH'] = dfff.index.values dfff['YYYYMMDDHH'] = dfff['YYYYMMDDHH'].apply( lambda x: x.strftime("%Y%m%d%H%M")) dfff['STORMNAME'] = typhoons dfff[['YYYYMMDDHH', 'VMAX', 'LAT', 'LON', 'STORMNAME']].to_csv(os.path.join(Input_folder, 'ecmwf_hrs_track.csv'), index=False) line_ = 'ecmwf,' + '%secmwf_hrs_track.csv' % Input_folder + ',' + typhoons + ',' + date_dir #StormName # #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName # fname.write(line_ + '\n') # Adjust track time step data_forced = [ tr.where(tr.time <= max(tr_HRS[0].time.values), drop=True) for tr in fcast.data ] # data_forced = [track_data_clean.track_data_force_HRS(tr,HRS_SPEED) for tr in data_forced] # forced with HRS windspeed #data_forced= [track_data_clean.track_data_clean(tr) for tr in fcast.data] # taking speed of ENS # interpolate to 3h steps from the original 6h #fcast.equal_timestep(3) else: len_ar = np.min([len(var.lat.values) for var in fcast.data]) lat_ = np.ma.mean([var.lat.values[:len_ar] for var in fcast.data], axis=0) lon_ = np.ma.mean([var.lon.values[:len_ar] for var in fcast.data], axis=0) YYYYMMDDHH = pd.date_range(fcast.data[0].time.values[0], periods=len_ar, freq="H") vmax_ = np.ma.mean( [var.max_sustained_wind.values[:len_ar] for var in fcast.data], axis=0) d = { 'YYYYMMDDHH': YYYYMMDDHH, "VMAX": vmax_, "LAT": lat_, "LON": lon_ } dfff = pd.DataFrame(d) dfff['STORMNAME'] = typhoons dfff['YYYYMMDDHH'] = dfff['YYYYMMDDHH'].apply( lambda x: x.strftime("%Y%m%d%H%M")) dfff[['YYYYMMDDHH', 'VMAX', 'LAT', 'LON', 'STORMNAME']].to_csv(os.path.join(Input_folder, 'ecmwf_hrs_track.csv'), index=False) line_ = 'ecmwf,' + '%secmwf_hrs_track.csv' % Input_folder + ',' + typhoons + ',' + date_dir #StormName # #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName # fname.write(line_ + '\n') data_forced = fcast.data # calculate windfields for each ensamble threshold = 0 #(threshold to filter dataframe /reduce data ) df = pd.DataFrame(data=cent.coord) df["centroid_id"] = "id" + (df.index).astype(str) centroid_idx = df["centroid_id"].values ncents = cent.size df = df.rename(columns={0: "lat", 1: "lon"}) #calculate wind field for each ensamble members list_intensity = [] distan_track = [] for tr in data_forced: logger.info( f"Running on ensemble # {tr.ensemble_number} for typhoon {tr.name}" ) track = TCTracks() typhoon = TropCyclone() track.data = [tr] #track.equal_timestep(3) tr = track.data[0] typhoon.set_from_tracks(track, cent, store_windfields=True) # Make intensity plot using the high resolution member if tr.is_ensemble == 'False': logger.info("High res member: creating intensity plot") plot_intensity.plot_inensity(typhoon=typhoon, event=tr.sid, output_dir=Output_folder, date_dir=date_dir, typhoon_name=tr.name) windfield = typhoon.windfields nsteps = windfield[0].shape[0] centroid_id = np.tile(centroid_idx, nsteps) intensity_3d = windfield[0].toarray().reshape(nsteps, ncents, 2) intensity = np.linalg.norm(intensity_3d, axis=-1).ravel() timesteps = np.repeat(track.data[0].time.values, ncents) #timesteps = np.repeat(tr.time.values, ncents) timesteps = timesteps.reshape((nsteps, ncents)).ravel() inten_tr = pd.DataFrame({ 'centroid_id': centroid_id, 'value': intensity, 'timestamp': timesteps, }) inten_tr = inten_tr[inten_tr.value > threshold] inten_tr['storm_id'] = tr.sid inten_tr['ens_id'] = tr.sid + '_' + str(tr.ensemble_number) inten_tr['name'] = tr.name inten_tr = (pd.merge(inten_tr, df_admin, how='outer', on='centroid_id').dropna().groupby( ['adm3_pcode', 'ens_id'], as_index=False).agg( {"value": ['count', 'max']})) inten_tr.columns = [ x for x in ['adm3_pcode', 'storm_id', 'value_count', 'v_max'] ] list_intensity.append(inten_tr) distan_track1 = [] for index, row in df.iterrows(): dist = np.min( np.sqrt( np.square(tr.lat.values - row['lat']) + np.square(tr.lon.values - row['lon']))) distan_track1.append(dist * 111) dist_tr = pd.DataFrame({ 'centroid_id': centroid_idx, 'value': distan_track1 }) dist_tr['storm_id'] = tr.sid dist_tr['name'] = tr.name dist_tr['ens_id'] = tr.sid + '_' + str(tr.ensemble_number) dist_tr = (pd.merge(dist_tr, df_admin, how='outer', on='centroid_id').dropna().groupby( ['adm3_pcode', 'name', 'ens_id'], as_index=False).agg({'value': 'min'})) dist_tr.columns = [ x for x in ['adm3_pcode', 'name', 'storm_id', 'dis_track_min'] ] # join_left_df_.columns.ravel()] distan_track.append(dist_tr) df_intensity_ = pd.concat(list_intensity) distan_track1 = pd.concat(distan_track) typhhon_df = pd.merge(df_intensity_, distan_track1, how='left', on=['adm3_pcode', 'storm_id']) typhhon_df.to_csv(os.path.join(Input_folder, 'windfield.csv'), index=False) line_ = 'windfield,' + '%swindfield.csv' % Input_folder + ',' + typhoons + ',' + date_dir #StormName # #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName # fname.write(line_ + '\n') fname.close() ############################################################# #### Run IBF model ############################################################# os.chdir(path) if platform == "linux" or platform == "linux2": #check if running on linux or windows os # linux try: p = subprocess.check_call( ["Rscript", "run_model_V2.R", str(rainfall_error)]) except subprocess.CalledProcessError as e: logger.error(f'failed to excute R sript') raise ValueError(str(e)) elif platform == "win32": #if OS is windows edit the path for Rscript try: p = subprocess.check_call([ "C:/Program Files/R/R-4.1.0/bin/Rscript", "run_model_V2.R", str(rainfall_error) ]) except subprocess.CalledProcessError as e: logger.error(f'failed to excute R sript') raise ValueError(str(e)) ############################################################# # send email in case of landfall-typhoon ############################################################# image_filenames = list(Path(Output_folder).glob('*.png')) data_filenames = list(Path(Output_folder).glob('*.csv')) if image_filenames or data_filenames: message_html = """\ <html> <body> <h1>IBF model run result </h1> <p>Please find attached a map and data with updated model run</p> <img src="cid:Impact_Data"> </body> </html> """ Sendemail.sendemail( smtp_server=os.environ["SMTP_SERVER"], smtp_port=int(os.environ["SMTP_PORT"]), email_username=os.environ["EMAIL_LOGIN"], email_password=os.environ["EMAIL_PASSWORD"], email_subject='Updated impact map for a new Typhoon in PAR', from_address=os.environ["EMAIL_FROM"], to_address_list=os.environ["EMAIL_TO_LIST"].split(','), cc_address_list=os.environ["EMAIL_CC_LIST"].split(','), message_html=message_html, filename_list=image_filenames + data_filenames) else: raise FileNotFoundError( f'No .png or .csv found in {Output_folder}') ##################### upload model output to 510 datalack ############## file_service = FileService( account_name=os.environ["AZURE_STORAGE_ACCOUNT"], protocol='https', connection_string=os.environ["AZURE_CONNECTING_STRING"]) file_service.create_share('forecast') OutPutFolder = date_dir file_service.create_directory('forecast', OutPutFolder) for img_file in image_filenames: file_service.create_file_from_path( 'forecast', OutPutFolder, os.fspath(img_file.parts[-1]), img_file, content_settings=ContentSettings(content_type='image/png')) for data_file in data_filenames: file_service.create_file_from_path( 'forecast', OutPutFolder, os.fspath(data_file.parts[-1]), data_file, content_settings=ContentSettings(content_type='text/csv')) ##################### upload model input(Rainfall+wind intensity) to 510 datalack ############## # To DO print( '---------------------AUTOMATION SCRIPT FINISHED---------------------------------' ) print(str(datetime.now()))
# PG DUMP try: COMMANDS = ['pg_dump', '-F', 'c', '-b', '-v', '-f', './%s' % FILENAME] print("Running: '%s'" % (' '.join(COMMANDS))) exit_code = subprocess.call(COMMANDS) if exit_code is 1: raise Exception('Could not Backup, please check logs') # AZURE CONNECTION file_service = FileService(endpoint_suffix=AZURE_ENDPOINT_SUFFIX, account_name=AZURE_ACCOUNT_NAME, account_key=AZURE_ACCOUNT_KEY) # Check if AZURE_BACKUP_FOLDER exists, if not create it if not file_service.exists(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER): file_service.create_directory(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER) # Upload print("uploading to: '%s/%s/%s'" % (AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME)) file_service.create_file_from_path(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME, FILENAME, progress_callback=upload_callback) # Cleaning Backup Files backup_files = file_service.list_directories_and_files( AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER) filenames = [] for file in backup_files:
RECEIPTS_DIRECTORY = 'delivery-receipts' RECEIPTS_URL = ('http://www.cmegroup.com/delivery_reports/' 'deliverable-commodities-under-registration.xls') RECEIPTS_FILENAME_SUFFIX = '-deliverable-commodities-under-registration.xls' STOCKS_DIRECTORY = 'stocks-of-grain' STOCKS_URL = 'http://www.cmegroup.com/delivery_reports/stocks-of-grain-updated-tuesday.xls' STOCKS_FILENAME_SUFFIX = '-stocks-of-grain-updated-tuesday.xls' def filename(suffix): return datetime.now().strftime('%Y%m%d') + suffix def get_bytes(url): return requests.get(url).content f = FileService(account_name=ACCOUNT_NAME, account_key=KEY) f.create_share(SHARE_NAME) f.create_directory(SHARE_NAME, RECEIPTS_DIRECTORY) f.create_file_from_bytes(SHARE_NAME, RECEIPTS_DIRECTORY, filename(RECEIPTS_FILENAME_SUFFIX), get_bytes(RECEIPTS_URL)) f.create_directory(SHARE_NAME, STOCKS_DIRECTORY) f.create_file_from_bytes(SHARE_NAME, STOCKS_DIRECTORY, filename(STOCKS_FILENAME_SUFFIX), get_bytes(STOCKS_URL))
from azure.storage.blob import PublicAccess from azure.storage.file import FileService ACCOUNT_NAME = os.environ['StorageAccountName'] ACCOUNT_KEY = os.environ['StorageAccountKey'] CONTAINER_NAME = os.environ['TelemetryContainerName'] az_blob_service = BlockBlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) az_blob_service.create_container(CONTAINER_NAME, fail_on_exist=False) file_service = FileService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) file_service.create_share(share_name='model', quota=1) source = os.environ['ModelZipUrl'] dest = 'model.zip' urllib.request.urlretrieve(source, dest) with zipfile.ZipFile(dest, "r") as zip_ref: zip_ref.extractall("model") for root, dirs, files in os.walk('model', topdown=True): directory = os.path.relpath(root, 'model') if directory != '.': file_service.create_directory('model', directory) for f in files: file_service.create_file_from_path('model', directory, f, os.path.join(root, f))
containername = os.environ['AZURE_CONTAINER_NAME'] subscription_id = os.environ['AZURE_SUBSCRIPTION_ID'] resource_group_params = {'location' : location} sku = 'standard_ragrs)' kind = 'BlobStorage' storage_account_params = {sku:sku,kind:kind,location:location} # Configure Credentials credentials = ServicePrincipalCredentials(client_id=os.environ['AZURE_CLIENT_ID'],secret=os.environ['AZURE_CLIENT_SECRET'],tenant=os.environ['AZURE_TENANT_ID']) resource_client = ResourceManagementClient(credentials, subscription_id) storage_client = StorageManagementClient(credentials, subscription_id) # Create Resource Group & Storage Account resource_client.resource_groups.create_or_update(resourcegroupname, resource_group_params) create_sa = storage_client.storage_accounts.create(resourcegroupname, storageaccountname, {'location':'eastus','kind':'storage','sku':{'name':'standard_ragrs'}}) create_sa.wait() # Create Container sak = storage_client.storage_accounts.list_keys(resourcegroupname, storageaccountname) storageaccountkey = sak.keys[0].value storage_client = CloudStorageAccount(storageaccountname, storageaccountkey) blob_service = storage_client.create_block_blob_service() blob_service.create_container(containername,public_access=PublicAccess.Blob) # Copy Files file_service = FileService(account_name=storageaccountname, account_key=storageaccountkey) file_service.create_share(containername) file_service.create_directory(containername, 'directory1') file_service.create_file_from_path(containername,'directory1','55224azuresetup.ps1','55224azuresetup.ps1',)
smtp.starttls() smtp.login(username, password) smtp.sendmail(send_from, send_to, msg.as_string()) smtp.close() print('Email sent') # Gmail login details username = '******' password = '******' default_address = [] # Login Details for Azure Storage file_service = FileService(account_name='killianoneachtain', account_key='KEY') file_service.create_share('security') file_service.create_directory('security', 'securityPhotos') cwd = os.getcwd() # Get the current working directory (cwd) path = cwd + "/securityPhotos" # change file permissions access_rights = 0o755 # create a photo directory if none exists try: os.mkdir(path, access_rights) except OSError: print("Creation of the directory %s failed. \nFolder already exists!" % path) else: print("Successfully created the directory %s " % path)
from azure.storage.blob import BlockBlobService from azure.storage.blob import PublicAccess from azure.storage.file import FileService STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME'] STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY'] table_service = TableService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) table_service.create_table('cluster') file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) file_service.create_share(share_name='azureml-project', quota=1) file_service.create_share(share_name='azureml-share', quota=1) file_service.create_directory('azureml-share', 'Solution1') file_service.create_directory('azureml-share', 'Solution2') block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) container_name ='telemetry' block_blob_service.create_container(container_name) source=os.environ['AML_ASSETS_URL'] dest='azureml_project.zip' urllib.request.urlretrieve(source, dest) with zipfile.ZipFile(dest,"r") as zip_ref: zip_ref.extractall("azureml-project")
class AzureFileWriter(FilebaseBaseWriter): """ Writes items to azure file shares. It is a File Based writer, so it has filebase option available - account_name (str) Public acces name of the azure account. - account_key (str) Public acces key to the azure account. - share (str) File share name. - filebase (str) Base path to store the items in the share. """ supported_options = { 'account_name': { 'type': six.string_types, 'env_fallback': 'EXPORTERS_AZUREWRITER_NAME' }, 'account_key': { 'type': six.string_types, 'env_fallback': 'EXPORTERS_AZUREWRITER_KEY' }, 'share': { 'type': six.string_types } } def __init__(self, options, meta, *args, **kw): from azure.storage.file import FileService super(AzureFileWriter, self).__init__(options, meta, *args, **kw) account_name = self.read_option('account_name') account_key = self.read_option('account_key') self.azure_service = FileService(account_name, account_key) self.share = self.read_option('share') self.azure_service.create_share(self.share) self.logger.info('AzureWriter has been initiated.' 'Writing to share {}'.format(self.share)) self.set_metadata('files_counter', Counter()) self.set_metadata('files_written', []) def write(self, dump_path, group_key=None, file_name=None): if group_key is None: group_key = [] self._write_file(dump_path, group_key, file_name) def _update_metadata(self, dump_path, filebase_path, file_name): buffer_info = self.write_buffer.metadata[dump_path] file_info = { 'file_name': file_name, 'filebase_path': filebase_path, 'size': buffer_info['size'], 'number_of_records': buffer_info['number_of_records'] } files_written = self.get_metadata('files_written') files_written.append(file_info) self.set_metadata('files_written', files_written) self.get_metadata('files_counter')[filebase_path] += 1 def _ensure_path(self, filebase): path = filebase.split('/') folders_added = [] for sub_path in path: folders_added.append(sub_path) parent = '/'.join(folders_added) self.azure_service.create_directory(self.share, parent) @retry_long def _write_file(self, dump_path, group_key, file_name=None): filebase_path, file_name = self.create_filebase_name( group_key, file_name=file_name) self._ensure_path(filebase_path) self.azure_service.create_file_from_path( self.share, filebase_path, file_name, dump_path, max_connections=5, ) self._update_metadata(dump_path, filebase_path, file_name) def get_file_suffix(self, path, prefix): number_of_keys = self.get_metadata('files_counter').get(path, 0) suffix = '{}'.format(str(number_of_keys)) return suffix def _check_write_consistency(self): from azure.common import AzureMissingResourceHttpError for file_info in self.get_metadata('files_written'): try: afile = self.azure_service.get_file_properties( self.share, file_info['filebase_path'], file_info['file_name']) file_size = afile.properties.content_length if str(file_size) != str(file_info['size']): raise InconsistentWriteState( 'File {} has unexpected size. (expected {} - got {})'. format(file_info['file_name'], file_info['size'], file_size)) except AzureMissingResourceHttpError: raise InconsistentWriteState('Missing file {}'.format( file_info['file_name'])) self.logger.info('Consistency check passed')
class AFSLoader(): def __init__(self, local_root: Path, afs_creds: dict = None): if afs_creds is None: afs_creds = get_afs_creds() self.afs_name = afs_creds["AFS_NAME"] self.afs_key = afs_creds["AFS_KEY"] self.afs_share = afs_creds["AFS_SHARE"] self.file_service = FileService(account_name=self.afs_name, account_key=self.afs_key) self.local_root = Path(local_root) def get_afs_creds(self): return self.afs_name, self.afs_key, self.afs_share def upload_data_afs(self, data_path: Path, push_data: bool = False): """ Copy data to the AFS directory. :param data_path: <Path>. Specify your path to the local data folder. :param push_data. If True upload data if it already exists. :return: path of the directory in the AFS share. """ logging.info("Sending data to AFS") checksum = md5_dir(data_path)[:10] afs_path = time.strftime("%Y-%m-%d-%H.%M") + '-' + checksum list_folder = self.file_service.list_directories_and_files( self.afs_share) for folder in list_folder: if checksum == folder.name[-10:]: logging.info("Folder for data already exist!") afs_path = folder.name logging.info("Data is in the AFS {}".format(folder.name)) if push_data: logging.warning("Rewriting data") afs_path = folder.name else: return afs_path self.file_service.create_directory(share_name=self.afs_share, directory_name=afs_path) for file in Path(data_path).iterdir(): progress_callback = lambda current, total: logbar( current, total, f"Uploading {file.name}") self.file_service.create_file_from_path( share_name=self.afs_share, directory_name=afs_path, file_name=file.name, local_file_path=str(file), max_connections=cpu_count(), progress_callback=progress_callback) logging.info("Sending is over") return afs_path def download_data_afs(self, afs_path: Path, dst_path: Path = None): afs_path = Path(afs_path) if not dst_path: assert self.local_root is not None dst_path = self.local_root list_folder = self.file_service.list_directories_and_files( self.afs_share, directory_name=afs_path) try: os.mkdir(self.local_root / afs_path) except FileExistsError: print(f"Directory {self.local_root / afs_path} was rewritten ") for file in list_folder: progress_callback = lambda current, total: logbar( current, total, f"Downloading {file.name}") self.file_service.get_file_to_path( share_name=self.afs_share, directory_name=afs_path, file_name=file.name, file_path=str(dst_path / afs_path / file.name), progress_callback=progress_callback)
SAVER_FOLDER = "./" + SAVER TRAIN_DATA = "data-04-zoo.csv" RESULT_FILE = 'result.json' FILE_SHARE = 'meltingpang-savedata' for file in os.listdir(SAVER_FOLDER): os.remove(SAVER_FOLDER + "/" + file) file_service = FileService( account_name='meltingpang', account_key= 'aS/W8kwqNXDicJgQNnobqkWItcr9vNW3+KO8n+V5qRqY0X3Z+XGBS1sUeLfYtyneUwaose59rbhEziPpFs2qKw==' ) file_service.get_file_to_path(FILE_SHARE, None, TRAIN_DATA, TRAIN_DATA) file_service.create_directory(FILE_SHARE, SAVER) xy = np.loadtxt(TRAIN_DATA, delimiter=',', dtype=np.float32) x_data = xy[:, 0:-1] y_data = xy[:, [-1]] nb_classes = 7 X = tf.placeholder(tf.float32, [None, 16]) Y = tf.placeholder(tf.int32, [None, 1]) Y_one_hot = tf.one_hot(Y, nb_classes) Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes]) W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight') b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
class ModelGymClient: config = {} project_root = "" project_name = "" user = "" def __init__(self, config=None, config_path=MODELGYM_CONFIG["default_config_path"]): if config_path: self.config = self.__config_by_path(config_path) if type(config) is dict: self.config.update(config) else: if config: raise TypeError("config must be dictionary!") project_root = Path(self.config["local_project_root"]).expanduser() self.project_root = project_root self.project_name = Path(self.project_root.parts[-1]) if not project_root.is_dir(): project_root.mkdir(parents=True, exist_ok=True) user_folder = self.project_root / self.config["user"] self.user = self.config["user"] if not user_folder.is_dir(): user_folder.mkdir(parents=True, exist_ok=True) # self.stub = new_client() self.file_service = FileService( account_name=self.config['azurefs_acc_name'], account_key=self.config['azurefs_acc_key']) self.afs_share = self.config['azurefs_share'] self.__get_client_transport_credentials( str(Path(self.config["client_cert"]).expanduser()), str(Path(self.config["client_key"]).expanduser()), str(Path(self.config["ca_cert"]).expanduser())) self.channel = grpc.secure_channel( self.config["connect_to"], self.creds, options=( ('grpc.max_send_message_length', self.config["max_msg_size_megabytes"]), ('grpc.max_receive_message_length', self.config["max_msg_size_megabytes"]), )) self.stub = wonderland_pb2_grpc.WonderlandStub(self.channel) self.check_user() def check_user(self): list_folder = self.file_service.list_directories_and_files( self.afs_share) for folder in list_folder: if self.user == folder.name: return True self.file_service.create_directory(share_name=self.afs_share, directory_name=self.user) return True def __get_client_transport_credentials(self, client_cert_path, client_key_path, ca_cert_path): client_cert_path = Path(client_cert_path).expanduser() client_key_path = Path(client_key_path).expanduser() ca_cert_path = Path(ca_cert_path).expanduser() path_ok = [ client_cert_path.exists(), client_key_path.exists(), ca_cert_path.exists() ] if not all(path_ok): raise ValueError("One of credentials files does not exist") self.creds = grpc.ssl_channel_credentials( ca_cert_path.read_bytes(), client_key_path.read_bytes(), client_cert_path.read_bytes()) def __config_by_path(self, path): path = Path(path).expanduser() if path.exists(): with path.open() as file: config = yaml.load(file) return config else: raise FileNotFoundError( "Config {} doesn't exist !!! Check ~/.wonder/config.yaml". format(path)) def eval_model(self, model_info, data_path): model_path = self.send_model(model_info) job = Job(input=json.dumps({ "model_path": str(model_path), "data_path": str(data_path) }), kind="hyperopt") job = self.stub.CreateJob(job) self.stub.GetJob(RequestWithId(id=job.id)) return job.id def gather_results(self, job_id_list, timeout): job_compeleted = {job_id: Job.PENDING for job_id in job_id_list} deadline = time.time() + timeout while True: time.sleep(5) for id in job_id_list: job = self.stub.GetJob(RequestWithId(id=id)) job_compeleted[id] = job.status if not any(s in job_compeleted.values() for s in (Job.PENDING, Job.RUNNING, Job.PULLED)): break if time.time() > deadline: print("Timeout was expired!") break results = [] for i, id in enumerate(job_id_list): job = self.stub.GetJob(RequestWithId(id=id)) if job.status == Job.COMPLETED: results += [{}] else: results.append(None) files = {} if job.output != "": files = json.loads(job.output) for file, path in files.items(): self.file_service.get_file_to_path( share_name=self.afs_share, directory_name=Path(path).parent, file_name=Path(path).name, file_path=str(self.project_root / path)) if file == 'output': with open(self.project_root / path, "r") as f: results[i]['output'] = json.load(f) if file == 'result_model_path': results[i]['result_model_path'] = self.project_root / path if file == 'error': with open(self.project_root / path, "r") as f: logging.warning(f.read()) return results def send_model(self, model_info): folder = "model-" + ''.join([ random.choice(string.ascii_letters + string.digits) for _ in range(12) ]) model_path = self.project_root / self.user / folder / MODELGYM_CONFIG[ "model_file"] try: model_folder = model_path.parent model_folder.mkdir() except FileExistsError: logging.warning("Model folder {} is exist !".format(model_folder)) except FileNotFoundError: logging.warning( "Model folder {} is missing !".format(model_folder)) with (model_path).open(mode="w") as file: json.dump(model_info, file, cls=NumpyEncoder) afs_path = Path(self.user) / folder / MODELGYM_CONFIG["model_file"] self.file_service.create_directory(share_name=self.afs_share, directory_name=afs_path.parent) self.file_service.create_file_from_path(share_name=self.afs_share, directory_name=afs_path.parent, file_name=afs_path.name, local_file_path=model_path, max_connections=cpu_count()) return afs_path def send_data(self, data_path, push_data=False): """ Copy data to the AFS DATA directory. :param data_path: <string>. Specify you data path by string. :return: path in the AFS share. """ logging.info("Sending data to AFS") checksum = get_data_hash(data_path)[:10] data_folder = time.strftime("%Y-%m-%d-%H.%M") + '-' + checksum afs_path = Path(MODELGYM_CONFIG["data_folder"] ) / data_folder / MODELGYM_CONFIG["data_file"] list_folder = self.file_service.list_directories_and_files( self.afs_share, directory_name="DATA") for folder in list_folder: if checksum == folder.name[-10:]: logging.info("Folder for data already exist!") afs_path = Path( "DATA") / folder.name / MODELGYM_CONFIG["data_file"] logging.info("Data is in the AFS {}".format(folder.name)) if push_data: logging.warning("Rewriting data") afs_path = Path( MODELGYM_CONFIG["data_folder"] ) / folder.name / MODELGYM_CONFIG["data_file"] else: return afs_path self.file_service.create_directory(share_name=self.afs_share, directory_name=afs_path.parent) self.file_service.create_file_from_path(share_name=self.afs_share, directory_name=afs_path.parent, file_name=afs_path.name, local_file_path=data_path, max_connections=cpu_count(), progress_callback=logbar) logging.info("Sending is over") return afs_path def from_project_root_path(self, path): path = Path(path) # if not path.exists(): # logging.warning("{} is missing !!".format(path)) try: relative_path = path.relative_to(self.project_root.parent) return str(relative_path) except ValueError: logging.warning("Path doesn't have project_root folder {}".format( self.project_root))
table_service.create_table('cluster') asset = { 'PartitionKey': 'predictivemaintenance', 'RowKey': 'predictivemaintenance', 'Status': 'Not Created' } table_service.insert_or_merge_entity('cluster', asset) file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) file_service.create_share(share_name='azureml-project', quota=1) file_service.create_share(share_name='azureml-share', quota=1) source = os.environ['AML_ASSETS_URL'] dest = 'azureml_project.zip' urllib.request.urlretrieve(source, dest) with zipfile.ZipFile(dest, "r") as zip_ref: zip_ref.extractall("azureml-project") for root, dirs, files in os.walk('azureml-project', topdown=True): directory = os.path.relpath(root, 'azureml-project') if directory != '.': file_service.create_directory('azureml-project', directory) for f in files: file_service.create_file_from_path('azureml-project', directory, f, os.path.join(root, f))