def remove(self, name):
		logger.debug(self._context(f'Removing vault'))
		if not is_file(self._file_name(name)):
			logger.warning(self._context(f'Vault does not exist'))
		else:
			delete_file(self._file_name(name), ignore_errors=True)
			if is_file(self._file_name(name)):
				logger.warning('Unable to remove vault')
def test_delete_files():
    setup_test_files()

    # Set the readonly.txt file to read/write so it can be deleted
    readonly_file_name = f'{test_folder_path}/readonly.txt'
    if is_file(readonly_file_name):
        os.chmod(readonly_file_name, S_IWUSR | S_IREAD)

    # delete both the readonly.txt and the readwrite.txt files
    delete_files(f'{test_folder_path}/*.txt')

    # assert both files have been deleted
    assert is_file(f'{test_folder_path}/readwrite.txt') is False
    assert is_file(f'{test_folder_path}/readonly.txt') is False

    teardown_test_files()
def test_rename_file():
    setup_test_files()

    rename_file(f'{test_folder_path}/readwrite.txt', f'{test_folder_path}/readwrite_renamed.txt')
    assert is_file(f'{test_folder_path}/readwrite_renamed.txt') is True

    teardown_test_files()
Exemple #4
0
 def load(self, file_name, default_section_key=''):
     """Search across registered paths loading all instances of file in path order."""
     logger.info(f'Searching for {file_name} across: {self.path_names}')
     for path_name in self.path_names:
         load_file_name = f'{path_name}/{file_name}'
         if is_file(load_file_name):
             self.load_file(load_file_name, default_section_key)
	def connect(self, name, password):
		# assume failure
		self.name = name
		self.is_connected = False
		self.is_updated = False

		if not is_file(self._file_name(name)):
			warning_message = 'Vault does not exist'
			logger.warning(self._context(warning_message))
			raise self.exception(warning_message)
		else:
			logger.debug(self._context('Opening vault'))
			if not self._load_file():
				warning_message = self._context('Vault corrupt')
				logger.warning(warning_message)
				raise self.exception(warning_message)

			# key vault authentication
			if not self._authenticate():
				warning_message = self._context('Vault password authentication failed')
				logger.warning(warning_message)
				raise self.exception(warning_message)
			else:
				# we successfully connected to vault
				self.is_connected = True

				# remove password from secrets; we add back when we disconnect
				del self.secrets[self.password_key]
Exemple #6
0
	def _listen(self):
		if not self.endpoint:
			return ''
		else:
			if is_file(self.endpoint):
				return load_text(self.endpoint, '')
			else:
				return ''
	def find_file(self, file_name):
		# search for file across multiple paths/file types
		logger.info(f'Searching for {file_name} across: {self.path_names}')
		for path_name in self.path_names:
			load_file_name = f'{path_name}/{file_name}'
			if is_file(load_file_name):
				# return file handle
				pass
def test_move_file():
    setup_test_files()

    move_file(f'{test_folder_path}/readwrite.txt',
              f'{test_folder_path}/working/readwrite_copy.txt')
    assert is_file(f'{test_folder_path}/working/readwrite_copy.txt') is True

    teardown_test_files()
def teardown_test_files():
    # set the read_only.txt file to read/write so it can be deleted if it exists
    readonly_file_name = f'{test_folder_path}/readonly.txt'
    if is_file(readonly_file_name):
        os.chmod(readonly_file_name, S_IWUSR | S_IREAD)

    # clear the test/working folder
    # DISCUSS: We may want to remove entire folder and recreate in case there are sub-folders?
    # eg, clear_folder(test_folder_path)
    clear_folder(f'{test_folder_path}/')
def test_clear_folder():
    setup_test_files()
    # ToDo: add exception handling test
    # set the read_only.txt file to read/write so it can be deleted if it exists
    readonly_file_name = f'{test_folder_path}/readonly.txt'
    if is_file(readonly_file_name):
        os.chmod(readonly_file_name, S_IWUSR | S_IREAD)

    clear_folder(f'{test_folder_path}/')
    assert len(os.listdir(f'{test_folder_path}/')) == 0

    teardown_test_files()
 def delete(self, blob_name):
     """Delete blob."""
     target_file_name = self._blob_file(blob_name)
     if not is_file(target_file_name):
         warning_message = f"Blob name does not exist"
         logger.warning(self._context(warning_message, blob_name))
         is_success = False
     else:
         logger.debug(self._context(f"Deleting blob", blob_name))
         delete_file(target_file_name)
         is_success = True
     return is_success
 def get(self, target_file_name, blob_name):
     """Download blob to target file name."""
     blob_folder = self._blob_folder()
     source_file_name = f"{blob_folder}/{blob_name}"
     if not is_file(source_file_name):
         logger.warning(self._context("Blob name does not exist", blob_name))
         is_success = False
     else:
         logger.debug(self._context(f"Getting {target_file_name}", blob_name))
         copy_file_if_exists(source_file_name, target_file_name)
         is_success = True
     return is_success
Exemple #13
0
    def load(self):
        if not is_file(self.file_name):
            # file doesn't exist, initialize object with default values
            logger.info(f'Initializing {self.file_name}')
            self.job_id = 1
            self.tables = dict()
        else:
            logger.info(f'Loading {self.file_name}')
            obj = load_jsonpickle(self.file_name)

            # load key attributes
            self.job_id = obj.job_id
            self.tables = obj.tables
    def on_mouse_move(self, *args):
        mouse_pos = args[1]
        if not self.mouse_disabled():
            for child in self.children:

                if child.focus:
                    child.background_color = child.focused_color

                elif child.collide_point(*child.to_widget(*mouse_pos))\
                        and not (is_file(child.attrs) and self.moving):
                    # do not change background color of icons when moving files and mouse dosesn't point directory
                    child.background_color = child.active_color

                else:
                    child.background_color = child.unactive_color
Exemple #15
0
 def callback(self, path):
     _path, filename = split(path)
     # print('REMOTE SEARCH', self.text, path,  thumb_dir not in path and self.text in filename.lower())
     if thumb_dir not in path and self.text in filename.lower():
         try:
             attrs = get_dir_attrs(path, self.sftp)
         except Exception as ex:
             ex_log(f'Failed to get attrs {ex}')
         else:
             attrs.thumbnail = self.thumbnail
             if is_file(attrs):
                 attrs.path = path
             else:
                 attrs.path = _path
             self.search_list.append(attrs)
             self.files_queue.put(attrs)
Exemple #16
0
    def is_stale(self):
        """Returns True if pid file is stale (older than boot time or contains inactive pid)."""
        reason = ''

        if is_file(self.file_name):
            # pid file exists so check if its stale
            pid = self.get_pid()
            pid_datetime = file_modify_datetime(self.file_name)
            if pid_datetime < boot_datetime():
                reason = f' - pid file older ({pid_datetime}) than boot time ({boot_datetime()})'
            elif not is_process(pid):
                reason = f' - pid ({pid}) does not exist'

        status = bool(reason)
        logger.debug(f'{self}:is_stale({status}){reason}')
        return status
    def __init__(self, project_file=None):
        # session folder (acts as root path for job specific folders)
        self.session_folder = '../sessions'

        # configuration engines
        self.config = None
        self.option = None

        # project metadata
        self.project = None
        self.namespace = None

        # project resources
        self.database = None
        self.schedule = None

        # project dataset specific working folders
        self.state_folder = None
        self.work_folder = None
        self.publish_folder = None

        # project database connections (db_conn)
        self.source_db_conn = None
        self.target_db_conn = None

        # project file and name
        self.project_file = ''
        self.project_name = ''

        # if optional project file supplied use it; otherwise try command line
        if project_file:
            self.project_file = project_file
        elif len(sys.argv) > 1:
            self.project_file = sys.argv[1]

        # make sure we have a valid project file
        app_name = script_name()
        if not self.project_file:
            print(f'{app_name}: error - must specify project file')
        elif not is_file(f'../conf/{self.project_file}'):
            print(
                f'{app_name}: error - project file not found ({project_file})')
        else:
            # project file controls configuration
            self.project_name = just_file_stem(self.project_file)
    def __init__(self, tenant):
        # tenant
        self.tenant = force_local_path(tenant.lower())

        # make sure we have a tenant
        create_folder(self.tenant)

        self.ad_file_name = force_file_ext(f'{self.tenant}/ad', 'json')

        # start in a non-authenticated state
        self.identity = None
        self.is_authenticated = False

        # load our directory
        if is_file(self.ad_file_name):
            self._load()
        else:
            self.identities = dict()
	def create(self, name, password):
		# if already connected, then disconnect first
		if self.is_connected:
			self.disconnect()

		if is_file(self._file_name(name)):
			# update status
			logger.warning(self._context('Vault already exists'))
		else:
			# update status
			self.is_updated = True
			self.is_connected = True

			# create new key vault
			self.name = name
			self.password = password
			self.secrets = dict()
			self._save_file()
			logger.debug(self._context('Creating vault'))
    def put(self, source_file_name, blob_name):
        """"Upload source file name to blob."""
        if not is_file(source_file_name):
            warning_message = f"Source file does not exist ({source_file_name})"
            logger.warning(self._context(warning_message, blob_name))
            is_success = False
        else:
            logger.debug(self._context(f"Putting {source_file_name}", blob_name))

            # build blob target file and folder names
            blob_folder = self._blob_folder()
            target_file_name = f"{blob_folder}/{blob_name}"
            target_folder = just_path(target_file_name)

            # make sure the blob's target folder exists
            create_folder(target_folder)

            # then copy source file to blob container
            copy_file_if_exists(source_file_name, target_file_name)
            is_success = True
        return is_success
Exemple #21
0
    def stage_file(self, archive_capture_file_name):
        logger.info(
            f"Getting {archive_capture_file_name} from archive blob store")

        # make sure work folder exists and is empty
        clear_folder(self.work_folder)

        # connect to the archive blobstore
        resource = self.config(self.project.blobstore_archive)
        bs_archive = BlobStore()
        bs_archive.connect(resource)

        # extract dataset name and job id from archive capture file name
        dataset_name, _, job_id = just_file_stem(
            archive_capture_file_name).partition("#")

        # copy archive_capture_file_name to our local working folder
        capture_file_name = just_file_name(archive_capture_file_name)
        local_work_file_name = f"{self.work_folder}/{capture_file_name}"
        archive_capture_file_blob_name = f"{archive_capture_file_name}"
        bs_archive.get(local_work_file_name, archive_capture_file_blob_name)
        bs_archive.disconnect()

        # unzip the capture file we retrieved from archive
        with zipfile.ZipFile(local_work_file_name) as zf:
            zf.extractall(self.work_folder)

        # create the file's dataset_name schema if missing
        self.target_db_conn.create_schema(dataset_name)

        # process all table files in our work folder
        for file_name in sorted(glob.glob(f"{self.work_folder}/*.table")):
            table_name = just_file_stem(file_name)
            logger.info(f"Processing {table_name} ...")

            # always load table objects
            table_object = load_jsonpickle(
                f"{self.work_folder}/{table_name}.table")

            # skip table if no schema file exists
            schema_file_name = f"{self.work_folder}/{table_name}.schema"
            if not is_file(schema_file_name):
                logger.warning(
                    f"Table skipped ({table_name}); schema file not found")
                continue

            # always load table schema
            table_schema = load_jsonpickle(schema_file_name)

            # always load table pk
            table_pk = load_text(f"{self.work_folder}/{table_name}.pk").strip()

            # extend table object with table table and column names from table_schema object
            table_object.table_name = table_name
            table_object.column_names = [
                column_name for column_name in table_schema.columns
            ]

            # if drop_table, drop table and exit
            if table_object.drop_table:
                logger.info(f"Table drop request; table_drop=1")
                self.target_db_conn.drop_table(dataset_name, table_name)
                return

            # convert table schema to our target database and add extended column definitions
            extended_definitions = "udp_jobid int, udp_timestamp datetime2".split(
                ",")
            convert_to_mssql(table_schema, extended_definitions)

            # Future: support custom staging table type overrides
            # [table].table_type = < blank > | standard, columnar, memory, columnar - memory

            # handle cdc vs non-cdc table workflows differently
            logger.debug(
                f"{table_name}.cdc={table_object.cdc}, timestamp={table_object.timestamp}"
            )
            if (not table_object.cdc or table_object.cdc.lower() == "none"
                    or not table_pk):
                # if table cdc=none, drop the target table
                logger.info(
                    f"Table cdc=[{table_object.cdc}]; rebuilding table")
                self.target_db_conn.drop_table(dataset_name, table_name)

                # then re-create target table with latest schema
                # FUTURE: Add udp_pk, udp_nk, udp_nstk and other extended columns
                logger.info(
                    f"Re-creating non-CDC table: {dataset_name}.{table_name}")
                self.target_db_conn.create_table_from_table_schema(
                    dataset_name, table_name, table_schema,
                    extended_definitions)

                # no cdc in effect for this table - insert directly to target table
                work_folder_obj = pathlib.Path(self.work_folder)
                batch_number = 0
                for json_file in sorted(
                        work_folder_obj.glob(f"{table_name}#*.json")):
                    # load rows from json file
                    # input_stream = open(json_file)
                    # rows = json.load(input_stream)
                    # input_stream.close()
                    rows = load_jsonpickle(json_file)

                    # insert/upsert/merge *.json into target tables
                    if not rows:
                        logger.info(
                            f"Table {table_name} has 0 rows; no updates")
                    else:
                        batch_number += 1
                        logger.info(
                            f"Job {job_id}, batch {batch_number}, table {table_name}"
                        )
                        self.progress_message(
                            f"loading {just_file_stem(capture_file_name)}({table_name}.{batch_number:04}) ..."
                        )

                        # convert date/datetime columns to date/datetime values
                        convert_data_types(rows, table_schema)
                        self.target_db_conn.bulk_insert_into_table(
                            dataset_name, table_name, table_schema, rows)

            else:
                # table has cdc updates

                # create target table if it doesn't exist
                if not self.target_db_conn.does_table_exist(
                        dataset_name, table_name):
                    # FUTURE: Add udp_pk, udp_nk, udp_nstk and other extended columns
                    logger.info(f"Creating table: {dataset_name}.{table_name}")
                    self.target_db_conn.create_table_from_table_schema(
                        dataset_name, table_name, table_schema,
                        extended_definitions)

                # create temp table to receive captured changes
                # FUTURE: Create a database wrapper function for creating 'portable' temp table names vs hard-coding '#'.
                temp_table_name = f"_{table_name}"
                self.target_db_conn.drop_table(dataset_name, temp_table_name)
                self.target_db_conn.create_table_from_table_schema(
                    dataset_name, temp_table_name, table_schema,
                    extended_definitions)

                # insert captured updates into temp table
                work_folder_obj = pathlib.Path(self.work_folder)
                batch_number = 0
                for json_file in sorted(
                        work_folder_obj.glob(f"{table_name}#*.json")):
                    # load rows from json file
                    # input_stream = open(json_file)
                    # rows = json.load(input_stream)
                    # input_stream.close()
                    rows = load_jsonpickle(json_file)

                    # insert/upsert/merge *.json into target tables
                    if not rows:
                        logger.info(
                            f"Table {table_name} has 0 rows; no updates")
                        break
                    else:
                        batch_number += 1
                        logger.info(
                            f"Job {job_id}, batch {batch_number}, table {table_name}"
                        )
                        self.progress_message(
                            f"loading {just_file_stem(capture_file_name)}({table_name}.{batch_number:04}) ..."
                        )

                        # convert date/datetime columns to date/datetime values
                        convert_data_types(rows, table_schema)
                        self.target_db_conn.bulk_insert_into_table(
                            dataset_name, temp_table_name, table_schema, rows)
                else:
                    # merge (upsert) temp table to target table
                    merge_cdc = cdc_merge.MergeCDC(table_object,
                                                   extended_definitions)
                    sql_command = merge_cdc.merge(dataset_name, table_pk)

                    # TODO: Capture SQL commands in a sql specific log.
                    logger.debug(sql_command)
                    self.target_db_conn.cursor.execute(sql_command)

                # drop temp table after merge
                self.target_db_conn.drop_table(dataset_name, temp_table_name)
def test_is_file():
    setup_test_files()
    assert is_file(f'{test_folder_path}/readonly.txt') is True
    assert is_file(f'{test_folder_path}/ThisDoesNotExist.txt') is False
    teardown_test_files()