Ejemplo n.º 1
0
    def process_one_file(self, url, desired_filename, filename_suff,
                         raw_xml_str):

        xml_etree = etree.fromstring(raw_xml_str)
        xml_filename = check_for_filename_collision(url, desired_filename,
                                                    filename_suff,
                                                    self._assigned_filenames,
                                                    self._tei_logger)
        out_filename = os_path_basename(xml_filename)
        try:
            self._validator.assert_(xml_etree)
            valid = True
        except AssertionError as err:
            self._tei_logger.log('ERROR', 'TEI validation error:', url,
                                 out_filename, err)
            valid = False
        if valid:
            digests = self._hasher.hash_file(BytesIO(raw_xml_str))
            self._zipfile.writestr(xml_filename, raw_xml_str)
            print(out_filename,
                  url,
                  *digests,
                  sep='\t',
                  file=self._hashsums_fh)
        else:
            with open(os_path_join(self._bad_urls_dir, out_filename),
                      'wb') as fh:
                fh.write(raw_xml_str)

        return xml_filename
Ejemplo n.º 2
0
def process_single_scan_battery_keras(model_folder,
                                      source_scan_battery_dirname):
    # Make sure model_folder and source_scan_battery_dirname exist.
    if not os_path_isdir(model_folder):
        raise OSError('{}: model folder {} does not exist'.format(
            SCRIPT_FNAME, model_folder))
    if not os_path_isdir(source_scan_battery_dirname):
        raise OSError(
            '{}: source scan battery folder {} does not exist'.format(
                SCRIPT_FNAME, source_scan_battery_dirname))

    # model/scan_batteries folders.
    model_scan_batteries_dirname = os_path_join(model_folder,
                                                SCAN_BATTERIES_DIRNAME)
    model_scan_battery_dirname = os_path_join(
        model_scan_batteries_dirname,
        os_path_basename(source_scan_battery_dirname))

    # Copy source scan_batteries folder into model scan_batteries folder
    # TODO: Could also just copy the entire scan_batteries folder (all 3 types) into model_folder
    # logging_info('{}: copying {} to {}'.format(SCRIPT_FNAME, source_scan_battery_dirname, model_scan_battery_dirname))
    # copy_anything(source_scan_battery_dirname, model_scan_battery_dirname)
    # model_scan_battery_process_scripts_dirname = os_path_abspath(os_path_join(model_scan_battery_dirname, PROCESS_SCRIPTS_DIRNAME))

    # Grab all targets with glob
    mode_scan_battery_target_prefix = os_path_join(model_scan_battery_dirname,
                                                   TARGET_PREFIX + '*')
    target_dirnames = glob_glob(mode_scan_battery_target_prefix)
    if not target_dirnames:
        raise ValueError('{}: no targets found with prefix {}'.format(
            SCRIPT_FNAME, mode_scan_battery_target_prefix))

    for target_dirname in target_dirnames:
        process_single_target(target_dirname)

    # for target_dirname in target_dirnames:
    #     # print('{}: processing target directory {}'.format(SCRIPT_FNAME, target_dirname))
    #     process_single_target(target_dirname)

    # Remove scan battery-level folders
    for folder in SCAN_BATTERY_FOLDERS_TO_REMOVE:
        folder_path = os_path_join(model_scan_battery_dirname, folder)
        if os_path_isdir(folder_path):
            # print('{}: Trying to remove {}'.format(SCRIPT_FNAME, folder_path))
            try:
                shutil.rmtree(folder_path)
            except:
                raise OSError(
                    'Error: unable to remove file {}'.format(folder_path))

    # Remove scan battery-level files
    for file in SCAN_BATTERY_FILES_TO_REMOVE:
        file_path = os_path_join(model_scan_battery_dirname, file)
        if os_path_isfile(file_path):
            # print('{}: Trying to remove {}'.format(SCRIPT_FNAME, file_path))
            try:
                os_remove(file_path)
            except:
                raise OSError(
                    'Error: unable to remove file {}'.format(file_path))
Ejemplo n.º 3
0
def guess_filename(obj):
    """Tries to guess the filename of the given object."""

    name = getattr(obj, 'name', None)
    if (name and isinstance(name, basestring) and name[0] != '<' and
            name[-1] != '>'):
        return os_path_basename(name)
Ejemplo n.º 4
0
def address_from_address_family(address_family, single_address):
    """Given an AddressFamily and a SingleAddress, return an Addresses object containing the Address.

  Raises an exception if the SingleAddress does not match an existing Address.
  """
    name = single_address.name
    if name is None:
        name = os_path_basename(single_address.directory)
    if name not in address_family.objects_by_name:
        _raise_did_you_mean(address_family, single_address.name)
    return Addresses(tuple([Address(address_family.namespace, name)]))
Ejemplo n.º 5
0
def address_from_address_family(address_family, single_address):
  """Given an AddressFamily and a SingleAddress, return an Addresses object containing the Address.

  Raises an exception if the SingleAddress does not match an existing Address.
  """
  name = single_address.name
  if name is None:
    name = os_path_basename(single_address.directory)
  if name not in address_family.objects_by_name:
    _raise_did_you_mean(address_family, single_address.name)
  return Addresses(tuple([Address(address_family.namespace, name)]))
Ejemplo n.º 6
0
def parse_with_depends(paths):
    result = {}
    for path in paths:
        name = os_path_basename(path)
        result[name] = {'path': path, 'depends': []}
        manifest_filename = is_module(path)
        manifest = eval(open(manifest_filename).read())
        result[name].update({'manifest': manifest})
        depends = manifest.get('depends', [])
        result[name]['depends'] = depends
    return result
Ejemplo n.º 7
0
 def goBlue(self):
     filename = self.SOURCELIST.getFilename()
     if not filename:
         return
     sourceDir = self.SOURCELIST.getCurrentDirectory()
     if os_path_isdir(filename):
         text = _("Rename directory")
         filename = os_path_basename(os_path_dirname(filename))
     else:
         text = _("Rename file")
     self.session.openWithCallback(self.doRename, VirtualKeyBoard, title = text, text = filename)
Ejemplo n.º 8
0
def evaluate_one_model_keras(model_dirpath):
    # rename _trained as _evaluating
    new_folder_name = model_dirpath.replace('_trained', '_evaluating')
    shutil_move(model_dirpath, new_folder_name)
    model_name = os_path_basename(new_folder_name)
    copied_scan_battery_dirname = os_path_join(
        new_folder_name, os_path_basename(SCAN_BATTERIES_DIRNAME))
    copy_anything(SCAN_BATTERIES_DIRNAME, copied_scan_battery_dirname)

    time_start = time_time()

    # with Pool() as pool:
    #     list(pool.imap_unordered(process_single_target, target_dirnames))
    for scan_battery_dirname in glob_glob(
            os_path_join(SCAN_BATTERIES_DIRNAME, '*')):
        process_single_scan_battery_keras(new_folder_name,
                                          scan_battery_dirname)
    print('{}: it took {:.2f} to evaluate model {} for all scan batteries'.
          format(SCRIPT_FNAME,
                 time_time() - time_start, model_name))
    shutil_move(new_folder_name,
                new_folder_name.replace('_evaluating', '_evaluated'))
Ejemplo n.º 9
0
	def update_db(self):
		DB_update_folder = DB_DIR+"/Updates";
		if(not os_path_exists(DB_update_folder)): return;

		# Get all files that match versioning
		file_versions = [];
		for file in os_listdir(DB_update_folder):
			filepath = os_path_join(DB_update_folder, file);
			# Try to get the file's name excluding extension (valid filename example: v0.0.0.sql)
			version_string = Version.version_string(os_path_splitext(os_path_basename(filepath))[0]);

			# Include only files with proper version names within update range
			if(os_path_isfile(filepath) and version_string and self.version_is_usable(Version(version_string))):
				file_versions.append({"path": filepath, "version": Version(version_string)});

		file_versions.sort(key=lambda file_version : file_version["version"]);

		for file in file_versions:
			if(self.call_shell_command(["sudo", "mysql", "-u", "root", "<", file["path"]])):
				raise Exception(f"Failed to update DB with file {file['path']}");
Ejemplo n.º 10
0
def filter_buildfile_paths(address_mapper, files):
  build_files = tuple(f for f in files.dependencies
                      if address_mapper.build_pattern.match(os_path_basename(f.path)))
  return BuildFiles(build_files)
Ejemplo n.º 11
0
def filter_buildfile_paths(address_mapper, directory_listing):
    build_files = tuple(
        f for f in directory_listing.files
        if address_mapper.build_pattern.match(os_path_basename(f.path)))
    return BuildFilePaths(build_files)
Ejemplo n.º 12
0
 def read_video(self, video_path: str):
     self.video_capture = VideoCapture(video_path)
     self.video_name = os_path_basename(video_path)
     return self.video_capture
Ejemplo n.º 13
0
def filter_buildfile_paths(address_mapper, directory_listing):
  build_files = tuple(f for f in directory_listing.files
                      if address_mapper.build_pattern.match(os_path_basename(f.path)))
  return BuildFilePaths(build_files)
Ejemplo n.º 14
0
def document(infolder, outfolder, extension, loader, external_css=None,
             generate_toc=None, overwrite=False):
    # Get previously generated TOC object
    TOC = os_path_join(infolder, '.cdoc_toc')
    try:
        with open(TOC, 'rb') as file:
            old_toc = pickle_load(file)
    except (FileNotFoundError, EOFError):
        old_toc = table_Dict2D(OrderedDict)

    # Create new TOC object
    new_toc = table_Dict2D(OrderedDict)

    # TODO: do we really need a separate OrderedDict for pages ???
    pages = OrderedDict()
    anonym = iter_count()

    # TODO: Create real dependency graphs
    #       Document object:
    #           parents  = set()  # other documents depending on this document
    #           children = set()  # other documents this document depending on
    #
    #       If document changed:
    #           set all parents of document => changed
    #
    #       If any of its children changed:
    #           set all parents of child => changed
    #
    #       -- The loop should check if a document's change flag has already
    #          been set. If not, hash file, and set flag, and notify all
    #          dependencies (parents)

    # Load all pages
    with check_Checker(infolder, file='.cdoc_cache', lazy_update=True) as checker:
        # Go through all files
        for file in os_listdir(infolder):
            # If file has the proper extension
            if file.endswith(extension):
                # Create full file path
                filepath = os_path_join(infolder, file)
                # If file has been changed since last check
                if checker.ischanged(filepath) and not overwrite:
                    # Regenerate file
                    filename, pagename, depends = \
                        _process(infolder, file, filepath, pages, loader, anonym)
                # If file hasn't been changed
                else:
                    # If file has been cached before
                    try:
                        # Get previous infos
                        filename, depends = old_toc[filepath]
                        pagename = old_toc.otherkey(filepath)
                        pages[pagename] = None
                        # If any of the dependencies has changed
                        for dependency in depends:
                            if checker.ischanged(dependency) and not overwrite:
                                # Regenerate file
                                filename, pagename, depends = \
                                    _process(infolder, file, filepath, pages, loader, anonym)
                                break
                    # If file is new and hasn't been cached before
                    except KeyError:
                        # Generate it for the first time
                        filename, pagename, depends = \
                            _process(infolder, file, filepath, pages, loader, anonym)
                # Store new values
                new_toc[pagename:filepath] = filename, depends

    # If order changing, renaming, inserting, deleting, etc. happened
    if set(old_toc) - set(new_toc):
        for pagename, filepath in new_toc.keys():
            if pages[pagename] is None:
                _process(os_path_basename(filepath), filepath, pages, loader, anonym)

    # Write back TOC object
    with open(TOC, 'wb') as file:
        pickle_dump(new_toc, file, pickle_HIGHEST_PROTOCOL)
    # Generate Table of Content?
    if generate_toc is None:
        generate_toc = len(new_toc) > 1
    # Create documents
    _build(pages, outfolder, generate_toc, new_toc, external_css)
Ejemplo n.º 15
0
def filter_buildfile_paths(address_mapper, files):
    build_files = tuple(
        f for f in files.dependencies
        if address_mapper.build_pattern.match(os_path_basename(f.path)))
    return BuildFiles(build_files)
Ejemplo n.º 16
0
from time import time as time_time
from logging import basicConfig as logging_basicConfig, \
                    DEBUG as logging_DEBUG, \
                    INFO as logging_INFO, \
                    getLogger as logging_getLogger

from lib.process_single_scan_battery_keras import process_single_scan_battery_keras
from lib.utils import copy_anything

# SCAN_BATTERIES_TARGETS_GLOB_STRING = 'data/BEAM_Reverb_20181004_L74_70mm/target_*_SCR_*_0dB'

# SCAN_BATTERIES_DIRNAME = 'data/BEAM_Reverb_20181004_L74_70mm_selected'
SCAN_BATTERIES_DIRNAME = 'scan_batteries'
MODEL_SAVE_FNAME = 'model.joblib'
MODELS_DIRNAME = 'DNNs'
SCRIPT_FNAME = os_path_basename(__file__)
PROJECT_DIRNAME = os_path_dirname(__file__)
LIB_DIRNAME = os_path_join(PROJECT_DIRNAME, 'lib')


def evaluate_one_model_keras(model_dirpath):
    # rename _trained as _evaluating
    new_folder_name = model_dirpath.replace('_trained', '_evaluating')
    shutil_move(model_dirpath, new_folder_name)
    model_name = os_path_basename(new_folder_name)
    copied_scan_battery_dirname = os_path_join(
        new_folder_name, os_path_basename(SCAN_BATTERIES_DIRNAME))
    copy_anything(SCAN_BATTERIES_DIRNAME, copied_scan_battery_dirname)

    time_start = time_time()
                    isfile as os_path_isfile
from multiprocessing import Pool
from glob import glob as glob_glob
import shutil
from logging import info as logging_info

from scipy.io import loadmat

from lib.r2_dnn_stft import r2_dnn_stft
from lib.r3_dnn_apply import r3_dnn_apply
from lib.r4_dnn_istft import r4_dnn_istft
from lib.r5_dnn_image import r5_dnn_image
from lib.r6_dnn_image_display import r6_dnn_image_display
from lib.utils import get_mat_obj_from_h5py

SCRIPT_FNAME = os_path_basename(
    __file__)  # for error messages. File name can change.

OLD_STFT_FNAME = 'old_stft.mat'
CHANDAT_FNAME = 'chandat.mat'
SCAN_BATTERIES_DIRNAME = 'scan_batteries'
TARGET_PREFIX = 'target_'
PROCESS_SCRIPTS_DIRNAME = 'process_scripts'
TARGET_FILES_TO_REMOVE = [
    'old_stft.mat', 'new_stft.mat', 'chandat.mat', 'chandat_dnn.mat',
    'dnn_image.mat', 'box_left_max.txt', 'box_left_min.txt',
    'box_right_max.txt', 'box_right_min.txt', 'circle_out_radius.txt',
    'circle_out_xc.txt', 'circle_out_zc.txt', 'region_in.txt',
    'circle_radius.txt', 'circle_xc.txt', 'circle_zc.txt', 'chandat_clean.mat',
    'das_image.mat', 'masks.mat', 'masks.npz', 'startOffsets.mat'
]
SCAN_BATTERY_FOLDERS_TO_REMOVE = [
Ejemplo n.º 18
0
def document(infolder,
             outfolder,
             extension,
             loader,
             external_css=None,
             generate_toc=None,
             overwrite=False):
    # Get previously generated TOC object
    TOC = os_path_join(infolder, '.cdoc_toc')
    try:
        with open(TOC, 'rb') as file:
            old_toc = pickle_load(file)
    except (FileNotFoundError, EOFError):
        old_toc = table_Dict2D(OrderedDict)

    # Create new TOC object
    new_toc = table_Dict2D(OrderedDict)

    # TODO: do we really need a separate OrderedDict for pages ???
    pages = OrderedDict()
    anonym = iter_count()

    # TODO: Create real dependency graphs
    #       Document object:
    #           parents  = set()  # other documents depending on this document
    #           children = set()  # other documents this document depending on
    #
    #       If document changed:
    #           set all parents of document => changed
    #
    #       If any of its children changed:
    #           set all parents of child => changed
    #
    #       -- The loop should check if a document's change flag has already
    #          been set. If not, hash file, and set flag, and notify all
    #          dependencies (parents)

    # Load all pages
    with check_Checker(infolder, file='.cdoc_cache',
                       lazy_update=True) as checker:
        # Go through all files
        for file in os_listdir(infolder):
            # If file has the proper extension
            if file.endswith(extension):
                # Create full file path
                filepath = os_path_join(infolder, file)
                # If file has been changed since last check
                if checker.ischanged(filepath) and not overwrite:
                    # Regenerate file
                    filename, pagename, depends = \
                        _process(infolder, file, filepath, pages, loader, anonym)
                # If file hasn't been changed
                else:
                    # If file has been cached before
                    try:
                        # Get previous infos
                        filename, depends = old_toc[filepath]
                        pagename = old_toc.otherkey(filepath)
                        pages[pagename] = None
                        # If any of the dependencies has changed
                        for dependency in depends:
                            if checker.ischanged(dependency) and not overwrite:
                                # Regenerate file
                                filename, pagename, depends = \
                                    _process(infolder, file, filepath, pages, loader, anonym)
                                break
                    # If file is new and hasn't been cached before
                    except KeyError:
                        # Generate it for the first time
                        filename, pagename, depends = \
                            _process(infolder, file, filepath, pages, loader, anonym)
                # Store new values
                new_toc[pagename:filepath] = filename, depends

    # If order changing, renaming, inserting, deleting, etc. happened
    if set(old_toc) - set(new_toc):
        for pagename, filepath in new_toc.keys():
            if pages[pagename] is None:
                _process(os_path_basename(filepath), filepath, pages, loader,
                         anonym)

    # Write back TOC object
    with open(TOC, 'wb') as file:
        pickle_dump(new_toc, file, pickle_HIGHEST_PROTOCOL)
    # Generate Table of Content?
    if generate_toc is None:
        generate_toc = len(new_toc) > 1
    # Create documents
    _build(pages, outfolder, generate_toc, new_toc, external_css)