Exemple #1
0
  def obsproc_run(self):
    '''
    run obsproc.exe
    '''
    obsproc_dir = os.path.join(self.config['filesystem']['wrfda_dir'],
                               'var/obsproc')
    # TODO: check if output is file is created and no errors have occurred
    j_id = None
    if len(self.config['options_slurm']['slurm_obsproc.exe']):
      # run using slurm
      if j_id:
        mid = "--dependency=afterok:%d" %j_id
        obsproc_command = ['sbatch', mid, self.config['options_slurm']['slurm_obsproc.exe']]
      else:
        obsproc_command = ['sbatch', self.config['options_slurm']['slurm_obsproc.exe']]
      utils.check_file_exists(obsproc_command[-1])
      try:
        res = subprocess.check_output(obsproc_command, cwd=obsproc_dir,
                                      stderr=utils.devnull())
        j_id = int(res.split()[-1])  # slurm job-id
      except subprocess.CalledProcessError:
        logger.error('Obsproc failed %s:' %obsproc_command)
        raise  # re-raise exception
      return j_id  # return slurm job-id
    else:
      # run locally
      subprocess.check_call(os.path.join(obsproc_dir, 'obsproc.exe'), cwd=obsproc_dir,
                            stdout=utils.devnull(), stderr=utils.devnull())

      return None
    def __init__(self):
        self.batch_size = 200
        self.num_classes = 10
        self.epochs = 50

        # check if model exist
        # if exist -> load else -> train
        (x_train, y_train), (x_test, y_test) = mnist.load_data(get_cwd() + "/.keras/datasets/mnist.npz")

        self.x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
        self.x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32')
        self.x_train = self.x_train / 255
        self.x_test = self.x_test / 255
        self.y_train = keras.utils.to_categorical(y_train)
        self.y_test = keras.utils.to_categorical(y_test)

        print('x_train shape:', x_train.shape)
        print(x_train.shape[0], 'train_samples')
        print(x_test.shape[0], 'test_samples')

        self.model = None
        if check_file_exists(get_cwd() + "/.keras/models/model.h5") and check_file_exists(
                get_cwd() + "/.keras/models/model.yaml"):
            yaml_file = open(get_cwd() + "/.keras/models/model.yaml", "r")
            self.model: Sequential = keras.models.model_from_yaml(yaml_file.read())
            yaml_file.close()
            self.model.load_weights(get_cwd() + "/.keras/models/model.h5")
Exemple #3
0
 def wrfvar_run(self, domain):
   '''
   run da_wrfvar.exe
   '''
   # set domain specific workdir
   wrfda_workdir = os.path.join(self.wrfda_workdir, "d0" + str(domain))
   logfile = os.path.join(wrfda_workdir, 'log.wrfda_d' + str(domain))
   j_id = None
   if len(self.config['options_slurm']['slurm_wrfvar.exe']):
     if j_id:
       mid = "--dependency=afterok:%d" %j_id
       wrfvar_command = ['sbatch', mid, self.config['options_slurm']['slurm_wrfvar.exe']]
     else:
       wrfvar_command = ['sbatch', self.config['options_slurm']['slurm_wrfvar.exe']]
     utils.check_file_exists(wrfvar_command[-1])
     try:
       res = subprocess.check_output(wrfvar_command, cwd=wrfda_workdir,
                                     stderr=utils.devnull())
       j_id = int(res.split()[-1])  # slurm job-id
     except subprocess.CalledProcessError:
       logger.error('Wrfvar failed %s:' %wrfvar_command)
       raise  # re-raise exception
     while True:
       time.sleep(1)
       if not utils.testjob(j_id):
         break
   else:
     # run locally
     subprocess.check_call([os.path.join(wrfda_workdir, 'da_wrfvar.exe'), '>&!', logfile],
                           cwd=wrfda_workdir, stdout=utils.devnull(), stderr=utils.devnull())
Exemple #4
0
 def _run_metgrid(self, j_id=None):
   '''
   run metgrid.exe (locally or using slurm script defined in config.json)
   '''
   if len(self.config['options_slurm']['slurm_metgrid.exe']):
     if j_id:
       mid = "--dependency=afterok:%d" %j_id
       metgrid_command = ['sbatch', mid, self.config['options_slurm']['slurm_metgrid.exe']]
     else:
       metgrid_command = ['sbatch', self.config['options_slurm']['slurm_metgrid.exe']]
     utils.check_file_exists(metgrid_command[-1])
     utils.silentremove(os.path.join(self.wps_workdir, 'metgrid', 'metgrid.exe'))
     os.symlink(os.path.join(self.config['filesystem']['wps_dir'],'metgrid','metgrid.exe'),
                os.path.join(self.wps_workdir, 'metgrid', 'metgrid.exe'))
     try:
       res = subprocess.check_output(metgrid_command, cwd=self.wps_workdir,
                                     stderr=utils.devnull())
       j_id = int(res.split()[-1])  # slurm job-id
     except subprocess.CalledProcessError:
       logger.error('Metgrid failed %s:' %metgrid_command)
       raise  # re-raise exception
     return j_id  # return slurm job-id
   else:
     metgrid_command = os.path.join(self.config['filesystem']['wps_dir'],
                             'metgrid', 'metgrid.exe')
     utils.check_file_exists(metgrid_command)
     try:
       subprocess.check_call(metgrid_command, cwd=self.wps_workdir,
                             stdout=utils.devnull(), stderr=utils.devnull())
     except subprocess.CalledProcessError:
       logger.error('Metgrid failed %s:' %metgrid_command)
       raise  # re-raise exception
Exemple #5
0
 def _check_wrf(self):
   '''
   check wrf options in json config file
   '''
   # verify that the config option is specified by the user
   assert (len(self.config['options_wrf']['namelist.input']) > 0), (
     'No WRF namelist.input specified in config file')
   # check if specified namelist.wps exist and are readable
   utils.check_file_exists(self.config['options_wrf']['namelist.input'])
   # check if namelist.input is in the required format and has all keys needed
   self._check_namelist_wrf()
Exemple #6
0
 def _connect_to_database(self):
     """
 check if database exists and try to connect to the database
 """
     utils.check_file_exists(self.database)  # check if database exists
     try:
         logger.debug("Connecting to database: %s" % self.database)
         self.connection = sqlite3.connect(
             self.database, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES
         )
     except:
         message = "Database %s exists, but failed to connect" % self.database
         logger.error(message)
         raise
Exemple #7
0
 def _connect_to_database(self):
   '''
   check if database exists and try to connect to the database
   '''
   utils.check_file_exists(self.database)  # check if database exists
   try:
     logger.debug('Connecting to database: %s' %self.database)
     self.connection = sqlite3.connect(
       self.database,
       detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
   except:
     message = 'Database %s exists, but failed to connect' %self.database
     logger.error(message)
     raise
Exemple #8
0
 def _check_namelist_wps(self):
   '''
   check if namelist.wps is in the required format and has all keys needed
   '''
   # verify that example namelist.wps exists and is not removed by user
   basepath = utils.get_script_path()
   basepath = '/home/WUR/haren009/wrfpy'  # TODO: fix
   self.example_file = os.path.join(basepath, 'examples', 'namelist.wps')
   utils.check_file_exists(self.example_file)
   # load specified namelist
   self.user_nml = f90nml.read(self.config['options_wps']['namelist.wps'])
   # verify that all keys in self.user_nml are also in example namelist
   self._verify_namelist_wps_keys()
   # validate the key information specified
   self._validate_namelist_wps_keys()
    def retrieve_csv(self, camera_url_file, duration, interval, result_path):
        """
        Reads camera urls from csv file and archives the images at the requested directory.
        """

        #verify file exists and can be read
        if not check_file_exists(camera_url_file):
            return -1

        with open(camera_url_file, 'r') as camera_file:
            camera_reader = csv.reader(camera_file)
            id = 1
            cams = []
            for camera_url in camera_reader:
                #These cameras do not come from the database and so have no ID.  Assign one to them so they can be placed in a result folder.
                camera_type = camera_url[0].split(".")[-1]
                if (camera_type == "m3u8"):
                    camera = StreamCamera(id, duration, interval,
                                          camera_url[0])
                else:
                    camera = NonIPCamera(id, duration, interval, camera_url[0])
                id += 1
                cams.append(camera)
        if len(cams):
            self.__archive_cameras(cams, result_path)
    def retrieve_db(self, camera_id_file, duration, interval, result_path):
        """
        Reads camera IDs from csv file, retrieves the associated camera objects from the database, and archives the images at the requested directory.
        """
        if not check_file_exists(camera_id_file):
            return -1

        with open(camera_id_file, 'r') as id_file:
            id_reader = csv.reader(id_file)
            cams = []
            for line in id_reader:
                try:
                    id = int(line[0])
                except:
                    raise (Exception(
                        "Error: No camera_id exists in line {} of input file \"{}\""
                        .format(line, id_file)))

                camera = self.__get_camera_from_db(id, duration, interval)
                if camera is not None:
                    cams.append(camera)

        if len(cams):
            self.__archive_cameras(cams, result_path)
        return 0
Exemple #11
0
def init(params):
    '''
    Project initialation

    package.yml base conf
    - name
    - version
    - author
    '''

    if not utils.check_file_exists(
            os.path.join(params.name, package_file) if params.
            name else package_file):
        package_file_data = {}
        package_file_data[
            'name'] = params.name if params.name else project_name
        package_file_data['author'] = getpass.getuser()
        package_file_data[
            'version'] = '1.0.0'  # version formating: [project version].[feature version].[bug version]
        write_conf(
            os.path.join(params.name, package_file)
            if params.name else package_file, package_file_data)

        if params.env:
            virtualenv = {
                'cmd':
                'virtualenv',
                'args': [
                    os.path.join(
                        os.getcwd(),
                        os.path.dirname(
                            os.path.join(params.name, package_file) if params.
                            name else package_file), 'venv')
                ]
            }

            if params.sys:
                virtualenv['args'].append('--system-site-packages')
            if params.nosys:
                virtualenv['args'].append('--no-site-packages')

            args = virtualenv['args']

            args.insert(0, virtualenv['cmd'])

            cmd_string = ' '.join(args)

            if not utils.cmd_with_check_os_value(cmd_string):
                cmd = 'source {0}'.format(
                    os.path.join(
                        os.path.dirname(
                            os.path.join(params.name, package_file) if params.
                            name else package_file), 'venv', 'bin',
                        'activate'))
                print 'Enter command \'{0}\' to start your project.'.format(
                    cmd)
    else:
        print 'package.yml already exists'
        exit(0)
Exemple #12
0
def compare_folders(folder1, folder2, output_file):
    """
    Compares the contents of two folders and writes the differences to the output file.
    """

    return_md = ""

    for lang in settings.languages:
        expected_files = ""

        generated_output_rst = settings.generated_output_rst.format(
            language=lang)
        generated_output_csv = settings.generated_output_csv.format(
            language=lang)

        # check if files exist in both folder1 and folder 2
        if not utils.check_file_exists(f"{folder1}/{generated_output_rst}"):
            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder1}\n"
        if not utils.check_file_exists(f"{folder2}/{generated_output_rst}"):
            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder2}\n"
        if not utils.check_file_exists(f"{folder1}/{generated_output_csv}"):
            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder1}\n"
        if not utils.check_file_exists(f"{folder2}/{generated_output_csv}"):
            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder2}\n"

        if expected_files != "":
            print("Expected files are missing", file=sys.stderr)
            return_md += f"\n### {lang}\n\n#### Expected files are missing for {lang}\n{expected_files}\n"
            continue

        # compare contents of files
        cmp1 = compare_files(f"{folder1}/{generated_output_rst}",
                             f"{folder2}/{generated_output_rst}")
        cmp2 = compare_files(f"{folder1}/{generated_output_csv}",
                             f"{folder2}/{generated_output_csv}")

        if cmp1 != "" or cmp2 != "":
            print("Generated file contents are not matching", file=sys.stderr)
            return_md += f"\n### {lang}\n\n#### Generated file changes for {lang}\n\n"
            if cmp1 != "":
                return_md += f"- Changes to {generated_output_rst}:\n```diff\n{cmp1}```\n\n"
            if cmp2 != "":
                return_md += f"- Changes to {generated_output_csv}:\n```diff\n{cmp2}```\n\n"

    with open(output_file, 'w', newline='') as out:
        out.write(return_md)
Exemple #13
0
def ask_input_image_uri(message):
    while True:
        uri = input(message)
        if not utils.check_file_exists(uri):
            print('URI not valid')
            return False
        else:
            return uri
Exemple #14
0
def ask_input_image_uri(message):
    while True:
        uri = input(message)
        if not utils.check_file_exists(uri):
            print('URI not valid')
            return False
        else:
            return uri
Exemple #15
0
 def _check_upp_dir(self):
   assert os.path.isdir(self.config['filesystem']['upp_dir']), (
     'upp directory %s not found' %self.config['filesystem']['upp_dir'])
   # create list of files to check
   files_to_check = [
     os.path.join(self.config['filesystem']['upp_dir'], filename) for
     filename in ['bin/unipost.exe', 'parm/wrf_cntrl.parm']]
   # check if all files in the list exist and are readable
   [utils.check_file_exists(filename) for filename in files_to_check]
Exemple #16
0
 def _archive_output(self, current_time, thours, domain):
   '''
   rename unipost.exe output to wrfpost_d0${domain}_time.grb and archive
   '''
   import shutil
   # verify that domain is an int
   if not isinstance(domain, int):
     message = 'domain id should be an integer'
     logger.error(message)
     raise IOError(message)
   # define original and destination filename
   origname = 'WRFPRS%02d.tm00' %thours
   outname = 'wrfpost_d%02d_%s.grb' %(domain, current_time)
   # rename file and move to archive dir
   shutil.move(os.path.join(config['post_dir'], origname),
               os.path.join(config['upp_archive_dir'], outname))
   # check if file is indeed archived
   utils.check_file_exists(os.path.join(config['upp_archive_dir'], outname))
Exemple #17
0
 def check_cv5(self):
   '''
   return True if be.dat_d0{domain} is defined for each domain in config.json
    and all files exits, else return False
   '''
   return all([utils.check_file_exists(
              self.config['options_wrfda'][
              'be.dat_d0' + str(domain)], boolean=True
              ) for domain in range(1, self.max_dom+1)])
Exemple #18
0
def featurize_images_augmented(input_dir,
                               model_path,
                               output_dir,
                               batch_size,
                               downsample_encoder=True):
    """
    Compresses a set of whole-slide aumented images using a trained encoder network.

    :param input_dir: directory containing the vectorized images.
    :param model_path: path to trained encoder network.
    :param output_dir: destination folder to store the compressed images.
    :param batch_size: number of images to process in the GPU in one-go.
    :param downsample_encoder: if true downsample image from 128 to 64
    :return: nothing
    """

    # Output dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Load encoder model
    encoder = keras.models.load_model(model_path, compile=False)

    # Downsample image to fit encoder needed for bigan encoder
    if downsample_encoder:
        encoder = downsample_encoder_128_to_64(encoder)

    image_list = get_file_list(input_dir, ext='_{item}.png')
    total_images = len(image_list)

    for index in range(total_images):
        filename = os.path.splitext(os.path.basename(image_list[index]))[0]
        filename_npy = input_dir + '/' + filename + '.npy'  # by convection on NIC it has to be an .npy
        wsi_pattern = input_dir + '/' + filename.split('_')[0] + '_{item}.npy'
        if check_file_exists(wsi_pattern.format(item='im_shape')):
            print(f'Processing image {filename}')
            encode_augment_wsi(wsi_pattern=filename_npy,
                               encoder=encoder,
                               output_dir=output_dir,
                               batch_size=batch_size,
                               aug_modes=[('none', 0), ('none', 90),
                                          ('none', 180), ('none', 270),
                                          ('horizontal', 0), ('vertical', 0),
                                          ('vertical', 90), ('vertical', 270)],
                               overwrite=False)
            print(
                f'Successful vectorized {filename} : {total_images - index - 1} images left'
            )
        else:
            print('Vectorized file not found: {f}'.format(f=wsi_pattern.format(
                item='im_shape')),
                  flush=True)
    print('Finish Processing All images!')
Exemple #19
0
def vectorize_images(input_dir, mask_dir, output_dir, cache_dir, image_level,
                     patch_size):
    """
    Converts a set of whole-slide images into numpy arrays with valid tissue patches for fast processing.

    :param input_dir: folder containing the whole-slide images.
    :param mask_dir: folder containing the whole-slide masks.
    :param output_dir: destination folder to store the vectorized images.
    :param cache_dir: folder to store whole-slide images temporarily for fast access.
    :param image_level: image resolution to read the patches.
    :param patch_size: size of the read patches.
    :return: nothing
    """

    # Output dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Read image file names
    image_paths = get_file_list(input_dir,
                                ext='tif')  # get all the wsi.svs files

    # Read mask file names
    mask_paths = get_file_list(mask_dir)  # get all the mask files

    total_images = len(image_paths)

    for index in range(total_images):
        image_id = (os.path.basename(image_paths[index])).split('.')[0]
        output_pattern = output_dir + '/' + image_id + '_{item}.npy'  # by convection on NIC it has to be an .npy
        vectorized_png = output_dir + '/' + image_id + '_{item}.png'
        if not check_file_exists(vectorized_png):
            print(f'Processing image {image_id}')
            vectorize_wsi(image_path=cache_file(image_paths[index],
                                                cache_dir,
                                                overwrite=False),
                          mask_path=mask_paths[index],
                          output_pattern=output_pattern,
                          image_level=image_level,
                          mask_level=image_level,
                          patch_size=patch_size,
                          stride=patch_size,
                          downsample=1,
                          select_bounding_box=False)
            print(
                f'Successful vectorized {image_id} : {total_images - index} images left'
            )
        else:
            print(
                f'Already existing file {image_id} - {total_images - index - 1} images left'
            )
    print('Finish Processing All images!')
Exemple #20
0
def featurize_images(input_dir,
                     model_path,
                     output_dir,
                     batch_size,
                     downsample_encoder=True):
    """
    Featurizes vectorized of whole-slide images using a trained encoder network.

    :param input_dir: directory containing the vectorized images.
    :param model_path: path to trained encoder network.
    :param output_dir: destination folder to store the compressed images.
    :param batch_size: number of images to process in the GPU in one-go.
    :param downsample_encoder: if true downsample image from 128 to 64
    :return: nothing
    """

    # Output dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Load encoder model
    encoder = keras.models.load_model(model_path, compile=False)

    # Downsample image to fit encoder needed for bigan encoder
    if downsample_encoder:
        encoder = downsample_encoder_128_to_64(encoder)

    image_list = get_file_list(input_dir, ext='_{item}.png')
    total_images = len(image_list)

    for index in range(total_images):
        filename = os.path.splitext(os.path.basename(image_list[index]))[0]
        filename_npy = input_dir + '/' + filename + '.npy'  # by convection on NIC it has to be an .npy
        featurized_npy = output_dir + '/' + filename.split('_')[0] + '.npy'
        featurized_png = output_dir + '/' + filename.split('_')[0] + '.png'
        if not check_file_exists(featurized_npy):
            print(f'Processing image {filename}')
            encode_wsi_npy_simple(encoder,
                                  filename_npy,
                                  batch_size,
                                  featurized_npy,
                                  featurized_png,
                                  output_distance_map=True)
            print(
                f'Successful vectorized {filename} : {total_images - index - 1} images left'
            )
        else:
            print(
                f'Already existing file {filename} - {total_images - index - 1} images left'
            )
    print('Finish Processing All images!')
    def __init__(self, img_dir, xray_csv, bbox_csv, transform=None, masks=False):

        self.transform = transform
        self.path_to_images = img_dir
        self.df = pd.read_csv(xray_csv)
        self.masks = pd.read_csv((bbox_csv), 
                names=["Image Index","Finding Label","x","y","w","h","_1","_2","_3"],
               skiprows=1)


        check_path_exists(self.path_to_images)
        check_file_exists(xray_csv)

        if masks:
            check_file_exists(self.masks)

        self.df = self.df.set_index("Image Index")

        self.diseases = [
            'Atelectasis',
            'Cardiomegaly',
            'Effusion',
            'Infiltration',
            'Mass',
            'Nodule',
            'Pneumonia',
            'Pneumothorax',
            'Consolidation',
            'Edema',
            'Emphysema',
            'Fibrosis',
            'Pleural_Thickening',
            'Hernia',
            'Enlarged_Cardiomediastinum',
            'Lung_Lesion',
            'Fracture',
            'Lung_Opacity']
Exemple #22
0
 def __init__(self, wrfpy_config=False):
   global logger
   wrfpy_dir = os.environ['HOME']
   logger = utils.start_logging(os.path.join(wrfpy_dir, 'wrfpy.log'))
   if not wrfpy_config:
     try:
       # get CYLC_SUITE_DEF_PATH environment variable
       wrfpy_dir = os.environ['CYLC_SUITE_DEF_PATH']
     except KeyError:
       # default back to user home dir in case CYLC is not used
       wrfpy_dir = os.environ['HOME']
     # config.json needs to be in base of wrfpy_dir
     self.configfile = os.path.join(wrfpy_dir, 'config.json')
   else:
     self.configfile = wrfpy_config
   try:
     logger.debug('Checking if configuration file exists: %s' %self.configfile)
     utils.check_file_exists(self.configfile)
   except IOError:
     # create config file
     self._create_empty_config()
     # TODO: exit and notify user to manually edit config file
   # read json config file
   self._read_json()
Exemple #23
0
 def _check_wrda_dir(self):
   '''
   check if the wrfda directory exist
   check if obsproc.exe and da_wrfvar.exe executables exist in the wrfda
   directory
   '''
   # TODO: find out if we can verify that WRFDA dir is 3dvar or 4dvar compiled
   assert os.path.isdir(self.config['filesystem']['wrfda_dir']), (
     'wrfda directory %s not found' %self.config['filesystem']['wrfda_dir'])
   # create list of files to check
   files_to_check = [
     os.path.join(self.config['filesystem']['wrfda_dir'], filename) for
     filename in ['var/obsproc/obsproc.exe', 'var/da/da_wrfvar.exe']]
   # check if all files in the list exist and are readable
   [utils.check_file_exists(filename) for filename in files_to_check]
Exemple #24
0
def overwrite_files():
    for lang in settings.languages:
        repo_output_rst = settings.repo_output_rst.format(language=lang)
        repo_output_csv = settings.repo_output_csv.format(language=lang)

        generated_output_rst = settings.generated_output_rst.format(
            language=lang)
        generated_output_csv = settings.generated_output_csv.format(
            language=lang)

        exists = utils.check_file_exists(generated_output_rst)
        if not exists:
            print(f"Generated RST file {generated_output_rst} is missing",
                  file=sys.stderr)
            sys.exit(1)

        exists = utils.check_file_exists(generated_output_csv)
        if not exists:
            print(f"Generated RST file {generated_output_csv} is missing",
                  file=sys.stderr)
            sys.exit(1)

        shutil.move(generated_output_rst, repo_output_rst)
        shutil.move(generated_output_csv, repo_output_csv)
Exemple #25
0
    def release_fastq(self, fq_type):

        print '> release {} ...'.format(fq_type)

        if fq_type == 'raw':
            data_dir = 'RawData'
        elif fq_type == 'clean':
            data_dir = 'CleanData'
        else:
            exit('error fq_type')

        for sample in self.qc_lists:
            md5_list = []

            dest = '{Data}/{data_dir}/{sample}/'.format(
                **dict(self.__dict__, **locals()))
            dest_md5 = '{Data}/{data_dir}/{sample}/MD5.txt'.format(
                **dict(self.__dict__, **locals()))

            for lane in self.qc_lists[sample]['lanes']:
                for read in (1, 2):
                    fastq = '{analydir}/QC/{sample}/{sample}_{novoid}_{flowcell_lane}_{read}.clean.fq.gz'.format(
                        sample=sample,
                        read=read,
                        analydir=self.analydir,
                        **lane)

                    if fq_type == 'raw':
                        fastq = fastq.replace('clean.fq.gz', 'fq.gz').replace(
                            'QC', 'RawData')

                    self.link_data(fastq, dest)
                    fastq_md5 = fastq + '.MD5.txt'

                    if utils.check_file_exists(fastq_md5):
                        md5_list.append(fastq_md5)
            if md5_list:
                self.cat_md5(md5_list, dest_md5)
Exemple #26
0
def do_restore(color, progress, date, key, bucket, jobname, target):
    # Colors
    yellow = color_macro(color, colored.yellow)
    cyan = color_macro(color, colored.cyan)
    red = color_macro(color, colored.red)
    green = color_macro(color, colored.green)

    # First check if the given backup exists
    # If no date specified, use most recent backup
    puts(
        f"Trying to restore {cyan(jobname)} from AWS S3 bucket {yellow(bucket)} to {yellow(target)}"
    )

    if not check_folder_exists(target):
        raise RuntimeError(red(f"Folder {target} does not exists"))

    if not date:
        puts("No date supplied, trying to restore most recent backup")
        try:
            out = subprocess.check_output(
                ["aws", "s3", "ls",
                 s3_url(bucket, jobname + "/")]).decode("utf-8")
        except:
            raise RuntimeError(
                f"Could not list bucket {bucket}/{jobname}, please double check the name and jobname"
            )

        dates = [x.rsplit(" ", 1)[1].strip("/") for x in out.splitlines()]
        dates_sorted = sorted(dates)
        date = dates_sorted[-1]
        puts(f"Most recent backup: {yellow(date)}")
    else:
        try:
            datetime.strptime(date, "%Y-%m-%d_%H-%M-%S")
        except:
            raise RuntimeError(
                f"date ({date}) has invalid date format, expected %Y-%m-%d_%H-%M-%S"
            )

        try:
            puts(f"Checking if backup for {yellow(date)} exists...",
                 newline=False)
            # Check if backup with that date actually exists
            out = subprocess.check_call([
                "aws", "s3", "ls",
                s3_url(bucket, os.path.join(jobname, date))
            ],
                                        stdout=subprocess.DEVNULL,
                                        stderr=subprocess.DEVNULL)
            puts(green("OK"))
        except:
            print()
            raise click.BadOptionUsage("date",
                                       red(f"No backup found for date {date}"))

    # Next check files, determine if encrypted, compressed or both
    print(f"Checking files in {bucket}/{jobname}/{date}...", end="")

    try:
        backup_content_str = subprocess.check_output([
            "aws", "s3", "ls",
            s3_url(bucket,
                   os.path.join(jobname, date) + "/")
        ]).decode("utf-8")
        backup_content = [
            x.rsplit(" ", 1)[1].strip("/")
            for x in backup_content_str.splitlines()
        ]
        puts(green("DONE"))
    except:
        raise RuntimeError(
            f"Could not list files in {bucket}/{jobname}/{date}")

    encrypted = any(".meta.enc" for s in backup_content)
    compressed = any(".tar.zstd" for s in backup_content)

    print(
        f"Backup is{' not' if not encrypted else ''} encrypted and{' not' if not compressed else ''} compressed"
    )

    if encrypted:
        if not key or not check_file_exists(key):
            raise click.BadOptionUsage("key",
                                       "Key is missing, backup is encrypted")

        print("Downloading metafile...", end="", flush=True)
        try:
            metafile_url = s3_url(
                bucket, os.path.join(jobname, date, f"{jobname}.meta.enc"))
            # print(metafile_url)

            openssl = subprocess.Popen(
                ["openssl", "rsautl", "-decrypt", "-inkey", key],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE)
            openssl_out = openssl.stdout

            aws = subprocess.Popen(["aws", "s3", "cp", metafile_url, "-"],
                                   stdout=openssl.stdin)
            aws.wait()
            print("yottek", flush=True)
            print(openssl_out.readline())
        except:
            raise RuntimeError(f"Could not download/decrypt metafile")
Exemple #27
0
def runtest():
    """check command line interface"""

    # setup
    shutil.copyfile(Disks.recsgen, Disks.work)

    # disk image operations
    with open(Files.output, "w") as f1, open(Files.reference, "w") as f2:
        xdm(Disks.work, "-i", stdout=f2)
        xdm(Disks.work, "-q", stdout=f1)
    check_files_eq("CLI", Files.output, Files.reference, "DIS/VAR255")

    xdm(Disks.work, "-e", "PROG00255", "DV064X010", "DF002X001")
    xdm(Disks.work, "-e", "PROG00255", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255", "PROGRAM")
    xdm(Disks.work, "-e", "DV064X010", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010", "DIS/VAR64")
    xdm(Disks.work, "-e", "DF002X001", "-o", Files.output)
    check_files_eq("CLI", Files.output, "df002x001", "DIS/FIX 2")

    with open(Files.output, "w") as f1:
        xdm(Disks.work, "-p", "DV064X010", stdout=f1)
    check_files_eq("CLI", Files.output, "dv064x010", "DIS/VAR 64")

    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "-e", "INVALID", stderr=ferr, rc=1)

    xdm(Disks.work, "-S", "0x01", "-o", Files.output)
    check_files_eq("CLI", Files.output, os.path.join(Dirs.refs, "sector1"),
                   "DIS/VAR255")

    # add, rename, remove files
    shutil.copyfile(Disks.blank, Disks.work)
    xdm(Disks.work, "-a", "prog00255", "dv064x010", "df002x001")
    xdm(Disks.work, "-e", "PROG00255", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255", "PROGRAM")
    xdm(Disks.work, "-e", "DV064X010", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010", "PROGRAM")  #!

    shutil.copyfile(Disks.work, Disks.tifiles)
    xdm(Disks.work, "-e", "PROG00255", "-o", Files.reference)
    xdm(Disks.work, "-r", "PROG00255:OTHERNAME")
    xdm(Disks.work, "-e", "OTHERNAME", "-o", Files.output)
    check_files_eq("CLI", Files.output, Files.reference, "P")
    xdm(Disks.work, "-r", "OTHERNAME:PROG00255")
    check_files_eq("CLI", Disks.work, Disks.tifiles, "P")

    xdm(Disks.work, "-d", "PROG00255", "DV064X010", "DF002X001")
    with open(Files.output, "w") as f1, open(Files.reference, "w") as f2:
        xdm(Disks.work, "-i", stdout=f1)
        xdm(Disks.blank, "-i", stdout=f2)
    check_files_eq("CLI", Files.output, Files.reference, "DIS/VAR255")

    shutil.copyfile(Disks.recsgen, Disks.work)
    xdm(Disks.work, "-e", "DF127*", "PROG00001", "PROG00002")
    if (not os.path.isfile("df127x001") or not os.path.isfile("df127x010")
            or not os.path.isfile("df127x020p")):
        error("CLI", "DF127*: Missing files")

    xdm(Disks.work, "-d", "PROG*", "D?010X060")
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "-e", "PROG00255", stderr=ferr, rc=1)
        xdm(Disks.work, "-e", "DV010X060", stderr=ferr, rc=1)
        xdm(Disks.work, "-e", "DF010X060", stderr=ferr, rc=1)

    # multi-file naming
    xdm(Disks.work, "-n", "MULTI", "-a", "prog00001", "prog00255", "prog00002")
    xdm(Disks.work, "-e", "MULTI", "-o", Files.output)
    check_files_eq("CLI", "prog00001", Files.output, "P")
    xdm(Disks.work, "-e", "MULTJ", "-o", Files.output)
    check_files_eq("CLI", "prog00255", Files.output, "P")
    xdm(Disks.work, "-e", "MULTK", "-o", Files.output)
    check_files_eq("CLI", "prog00002", Files.output, "P")

    xdm("-T", "prog00001", "prog00255", "prog00002", "-n",
        "MULTFI")  # -n applies to internal names!
    xdm(Disks.work, "-t", "-a", "prog00001.tfi", "prog00255.tfi",
        "prog00002.tfi")
    xdm(Disks.work, "-e", "MULTFI", "-o", Files.output)
    check_files_eq("CLI", "prog00001", Files.output, "P")
    xdm(Disks.work, "-e", "MULTFJ", "-o", Files.output)
    check_files_eq("CLI", "prog00255", Files.output, "P")
    xdm(Disks.work, "-e", "MULTFK", "-o", Files.output)
    check_files_eq("CLI", "prog00002", Files.output, "P")

    xdm("-T", "prog00255", "prog00002", "-9", "-n", "MULV9T")
    xdm(Disks.work, "-9", "-a", "prog00255.v9t9", "prog00002.v9t9")
    xdm(Disks.work, "-e", "MULV9T", "-o", Files.output)
    check_files_eq("CLI", "prog00255", Files.output, "P")
    xdm(Disks.work, "-e", "MULV9U", "-o", Files.output)
    check_files_eq("CLI", "prog00002", Files.output, "P")

    ref = os.path.join(Dirs.refs, "glob")
    xdm(Disks.work, "-a", ref + "?", "-n", "GLOBA1", shell=True)
    xdm(Disks.work, "-e", "GLOBA1", "-o", Files.output)
    xdm(Disks.work, "-e", "GLOBA2", "-o", Files.output)
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "-e", "GLOBA3", "-o", Files.output, stderr=ferr, rc=1)
    xdm(Disks.work, "-d", "GLOB*", "-o", Files.output)
    xdm(Disks.work, "-a", ref + "*", "-n", "GLOBB1", shell=True)
    xdm(Disks.work, "-e", "GLOBB1", "-o", Files.output)
    xdm(Disks.work, "-e", "GLOBB2", "-o", Files.output)
    xdm(Disks.work, "-e", "GLOBB3", "-o", Files.output)

    # initialize disk
    xdm(Disks.work, "--initialize", "360", "-n", "SSSD")
    check_file_size(Disks.work, 360 * 256)
    check_files_eq("CLI", Disks.work, Disks.blank, "P")
    os.remove(Disks.work)
    xdm(Disks.work, "--initialize", "SSSD", "-n", "SSSD")
    check_file_size(Disks.work, 360 * 256)
    check_files_eq("CLI", Disks.work, Disks.blank, "P")
    xdm(Disks.work, "--initialize", "800", "-n", "INIT")
    with open(Files.output, "w") as f:
        xdm(Disks.work, "-i", stdout=f)
    check_file_matches(Files.output, [(0, "\s2\s+used\s+798\s+free\s")])
    os.remove(Disks.work)
    xdm(Disks.work, "--initialize", "CF", "-n", "INIT")
    with open(Files.output, "w") as f:
        xdm(Disks.work, "-i", stdout=f)
    check_file_matches(Files.output, [(0, "\s2\s+used\s+1598\s+free\s")])
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "--initialize", "1", stderr=ferr, rc=1)
        xdm(Disks.work, "--initialize", "1601", stderr=ferr, rc=1)
        xdm(Disks.work, "--initialize", "FOO", stderr=ferr, rc=1)
    f = os.path.join(Dirs.refs, "vardis")
    for n in ["AA", "BB"]:
        xdm(Disks.work, "--initialize", "SSSD", "-a", f, "-n", n)
        with open(Files.output, "w") as fout:
            xdm(Disks.work, "-i", stdout=fout)
        check_file_matches(Files.output, [(0, n + "\s+"), (2, n + "\s+")])

    # set geometry
    xdm(Disks.work, "--initialize", "1600", "-n", "GEO")
    for g, p in [("1S1D", "1S/1D\s+40T"), ("99T8D7S", "7S/8D\s+99T"),
                 ("22TDD", "7S/2D\s+22T"), ("DSSD", "2S/1D\s+22T"),
                 ("1T", "2S/1D\s+1T"), ("3D10T9S", "9S/3D\s+10T"),
                 ("SDDS", "2S/1D\s+10T"), ("SS", "1S/1D\s+10T")]:
        xdm(Disks.work, "--set-geometry", g)
        with open(Files.output, "w") as fout:
            xdm(Disks.work, "-i", "-q", stdout=fout)
        check_file_matches(Files.output, [(0, p)])

    # resize disk
    shutil.copyfile(Disks.recsgen, Disks.work)
    for s in ["800", "248", "1600"]:
        xdm(Disks.work, "-Z", s, "-q")
        for f in ["PROG02560", "DF129X010", "DV127X010", "DV255X015P"]:
            xdm(Disks.work, "-e", f, "-q", "-o", Files.output)
            xdm(Disks.recsgen, "-e", f, "-o", Files.reference)
            check_files_eq("CLI", Files.output, Files.reference, "PROGRAM")
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "-Z", "240", stderr=ferr, rc=1)
        xdm(Disks.work, "-Z", "1608", stderr=ferr, rc=1)

    # new geometry handling (v1.5.3)
    for c, g, p in [
        ("--initialize", "SSSD", r"358 free\s+90 KB\s+1S/1D\s+40T"),
        ("--resize", "DS1D", r"718 free\s+180 KB\s+2S/1D\s+40T"),
        ("--set-geometry", "80T",
         r"718 free\s+180 KB\s+2S/1D\s+80T"),  # geom mismatch
        ("--initialize", "408", r"406 free\s+102 KB\s+2S/1D\s+40T"),
        ("--resize", "DSSD80T", r"1438 free\s+360 KB\s+2S/1D\s+80T"),
        ("--resize", "2DSS", r"718 free\s+180 KB\s+1S/2D\s+40T"),
        ("-Z", "208", r"206 free\s+52 KB\s+1S/2D\s+40T"),
        ("--set-geometry", "SD80T", r"206 free\s+52 KB\s+1S/1D\s+80T"),
        ("-X", "DSSD80T", r"1438 free\s+360 KB\s+2S/1D\s+80T"),
        ("--set-geometry", "20T", r"1438 free\s+360 KB\s+2S/1D\s+20T")
    ]:  # geom mismatch
        xdm(Disks.work, c, g)
        with open(Files.output, "w") as fout:
            xdm(Disks.work, "-i", "-q", stdout=fout)
        check_file_matches(Files.output, [(0, p)])
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "--initialize", "SS80T", stderr=ferr, rc=1)
        xdm(Disks.work, "--resize", "2S", stderr=ferr, rc=1)
        xdm(Disks.work, "--resize", "80T", stderr=ferr, rc=1)
        xdm(Disks.work, "--set-geometry", "123", stderr=ferr, rc=1)

    # xdm99 vs real images
    rfile = os.path.join(Dirs.refs, "ti-text")  # TEXT D/V80
    with open(Files.output, "w") as fout, open(Files.error, "w") as ferr:
        xdm(Disks.work, "-X", "sssd", "-n", "TI-DISK", stderr=ferr, rc=0)
        xdm(Disks.work,
            "-a",
            rfile,
            "-n",
            "TEXT",
            "-f",
            "dv80",
            stderr=ferr,
            rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tisssd)
        xdm(Disks.work, "-X", "dsdd", "-n", "TI-DISK", stderr=ferr, rc=0)
        xdm(Disks.work,
            "-a",
            rfile,
            "-n",
            "TEXT",
            "-f",
            "dv80",
            stderr=ferr,
            rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tidsdd)
        xdm(Disks.work, "-Z", "sssd", stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tisssd)
        xdm(Disks.work, "--set-geometry", "ssdd", stderr=ferr, rc=0)  # warn
        check_file_len(Files.error, min_lines=1, max_lines=1)
        xdm(Disks.work, "-i", stdout=fout, stderr=ferr, rc=0)  # warn
        check_file_len(Files.error, min_lines=2, max_lines=2)
        xdm(Disks.work, "-Z", "dsdd", stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        check_disks_eq(Disks.work, Disks.tidsdd)
        xdm(Disks.work, "--set-geometry", "ssdd80t", stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        xdm(Disks.work, "-X", "dssd80t", "-n", "TI-DSSD80", stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        check_disks_eq(Disks.work, Disks.tidssd80)

    # repair disks
    shutil.copyfile(Disks.bad, Disks.work)
    with open(Files.output, "w") as f1, open(Files.reference, "w") as f2:
        xdm(Disks.work, "-C", stderr=f1, rc=1)
        xdm(Disks.work, "-R", stderr=f2)
    check_file_len(Files.output, min_lines=2)
    with open(Files.output, "w") as f1:
        xdm(Disks.work, "-C", stderr=f1)
    check_file_len(Files.output, max_lines=0)

    # FIAD operations
    shutil.copyfile(Disks.recsgen, Disks.work)
    xdm(Disks.work, "-e", "PROG00255", "DV064X010", "-t")
    xdm(Disks.work, "-e", "PROG00255", "-t", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255.tfi", "PROGRAM")
    xdm(Disks.work, "-e", "DV064X010", "-t", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010.tfi", "PROGRAM")

    with open(Files.output, "w") as f:
        xdm("-I", "prog00255.tfi", "dv064x010.tfi", stdout=f)

    xdm(Disks.work, "-e", "PROG00255", "DV064X010", "-9")
    xdm(Disks.work, "-e", "PROG00255", "-9", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255.v9t9", "PROGRAM")
    xdm(Disks.work, "-e", "DV064X010", "-9", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010.v9t9", "PROGRAM")

    with open(Files.output, "w") as f:
        xdm("-I", "prog00255.v9t9", "dv064x010.v9t9", stdout=f)

    xdm(Disks.work, "-e", "PROG00255")
    xdm("-T", "prog00255", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255.tfi", "PROGRAM",
                   Masks.TIFile)
    xdm("-T", "prog00255", "-9", "-o", Files.output)
    check_files_eq("CLI", Files.output, "prog00255.v9t9", "PROGRAM",
                   Masks.v9t9)

    xdm(Disks.work, "-e", "DV064X010", "-o", Files.reference)
    xdm("-F", "dv064x010.tfi")
    check_files_eq("CLI", "dv064x010", Files.reference, "DIS/VAR 64")
    xdm("-F", "dv064x010.tfi", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010", "PROGRAM")

    xdm("-F", "dv064x010.v9t9", "-9")
    check_files_eq("CLI", "dv064x010", Files.reference, "DIS/VAR 64")
    xdm("-F", "dv064x010.v9t9", "-o", Files.output)
    check_files_eq("CLI", Files.output, "dv064x010", "PROGRAM")

    xdm("-T", "dv064x010", "-o", Files.output, "-n", "DV064X010", "-f",
        "DIS/VAR 64")
    check_files_eq("CLI", Files.output, "dv064x010.tfi", "PROGRAM",
                   Masks.TIFile)
    os.remove("dv064x010.tfi")
    xdm("-T", "dv064x010", "-n", "DV064X010", "-f", "DIS/VAR 64")
    check_files_eq("CLI", "dv064x010.tfi", Files.output, "PROGRAM",
                   Masks.TIFile)

    xdm("-T", "dv064x010", "-9", "-o", Files.output, "-n", "DV064X010", "-f",
        "DIS/VAR 64")
    check_files_eq("CLI", Files.output, "dv064x010.v9t9", "PROGRAM",
                   Masks.v9t9)
    os.remove("dv064x010.v9t9")
    xdm("-T", "dv064x010", "-9", "-n", "DV064X010", "-f", "DIS/VAR 64")
    check_files_eq("CLI", "dv064x010.v9t9", Files.output, "PROGRAM",
                   Masks.v9t9)

    # TI names
    shutil.copyfile(Disks.recsdis, Disks.work)
    xdm(Disks.work, "-t", "-e", "F16", "V16")
    xdm(Disks.work, "-t", "-e", "F16", "V16", "--ti-names")
    check_files_eq("TI names", "F16", "f16.tfi", "PROGRAM")
    check_files_eq("TI names", "V16", "v16.tfi", "PROGRAM")
    xdm(Disks.work, "-9", "-e", "F1")
    xdm(Disks.work, "-9", "-e", "F1", "--ti-names")
    check_files_eq("TI names", "F1", "f1.v9t9", "PROGRAM")
    xdm(Disks.work, "-e", "V1", "-o", Files.reference)
    xdm(Disks.work, "-e", "V1", "--ti-names")
    check_files_eq("TI names", "V1", Files.reference, "PROGRAM")

    # conversion between TI/PC names ('.' vs '/')
    file1 = os.path.join(Dirs.refs, "vardis")
    with open(os.path.join(Dirs.tmp, "file.y.z"), "wb") as f:
        f.write("\xff" * 100)
    xdm(Disks.work, "-X", "sssd", "-a", file1, "-n", "FILE.X")
    xdm(Disks.work, "-a", os.path.join(Dirs.tmp, "file.y.z"))
    with open(Files.output, "w") as fout:
        xdm(Disks.work, "-i", stdout=fout, rc=0)
    check_lines_start(Files.output, ("FILE/X", "FILE/Y"), skip=1)

    xdm(Disks.work, "-r", "FILE/X:NEW.FILE/X")
    with open(Files.output, "w") as fout:
        xdm(Disks.work, "-i", stdout=fout, rc=0)
    check_lines_start(Files.output, ("NEW/FILE/X", "FILE/Y"), skip=1)

    xdm(Disks.work, "-e", "*")
    check_file_exists("new.file.x")
    os.remove("new.file.x")
    check_file_exists("file.y")
    os.remove("file.y")

    xdm(Disks.work, "-e", "FILE/Y", "-t")
    check_file_exists("file.y.tfi")
    os.remove("file.y.tfi")

    # rename disk (-n)
    xdm(Disks.work, "-X", "sssd", "-n", "FIRST.NAME")
    with open(Files.output, "w") as fout:
        xdm(Disks.work, "-i", stdout=fout, rc=0)
    check_lines_start(Files.output, ("FIRST/NAME", ))

    xdm(Disks.work, "-n", "SECND.NAME")
    with open(Files.output, "w") as fout:
        xdm(Disks.work, "-i", stdout=fout, rc=0)
    check_lines_start(Files.output, ("SECND/NAME", ))

    # output directory -o <dir>
    ref1 = os.path.join(Dirs.refs, "glob1")
    ref2 = os.path.join(Dirs.refs, "glob12")
    xdm(Disks.work, "-X", "sssd", "-a", ref1, ref2)
    xdm(Disks.work, "-e", "GLOB*", "-o", Dirs.tmp)
    check_file_exists(os.path.join(Dirs.tmp, "glob1"))
    os.remove(os.path.join(Dirs.tmp, "glob1"))
    check_file_exists(os.path.join(Dirs.tmp, "glob12"))
    os.remove(os.path.join(Dirs.tmp, "glob12"))

    xdm(Disks.work, "-X", "sssd", "-a", ref1, ref2)
    with open(Files.error, "w") as ferr:
        xdm(Disks.work, "-e", "GLOB*", "-o", Files.output, stderr=ferr, rc=1)

    # stdin and stdout
    ref = os.path.join(Dirs.refs, "vardis")
    with open(ref, "r") as fin:
        xdm(Disks.work,
            "--initialize",
            "sssd",
            "-a",
            "-",
            "-f",
            "dv40",
            stdin=fin)
    with open(Files.output, "w") as fout:
        xdm(Disks.work, "-e", "STDIN", "-o", "-", stdout=fout)
    check_files_eq("stdin/stdout", Files.output, ref, "DV")
    ref = os.path.join(Dirs.refs, "sector1")
    with open(Files.reference, "wb") as fout:
        xdm(Disks.work,
            "--initialize",
            "sssd",
            "-a",
            ref,
            "-n",
            "T",
            "-o",
            "-",
            stdout=fout)
    with open(Files.reference, "rb") as fin:
        xdm("-", "-e", "T", "-o", Files.output, stdin=fin)
    check_files_eq("stdin/stdout", Files.output, ref, "P")

    # usage errors
    with open(Files.error, "w") as ferr:
        xdm("-a", Files.output, stderr=ferr, rc=1)
        xdm("-T",
            "prog00001",
            "prog00002",
            "-o",
            Files.output,
            stderr=ferr,
            rc=1)
        xdm("-T",
            "prog00001",
            "prog00002",
            "-9",
            "-o",
            Files.output,
            stderr=ferr,
            rc=1)
        xdm("-F", "-o", Files.output, stderr=ferr, rc=2)

    # cleanup
    os.remove(Files.output)
    os.remove(Files.reference)
    os.remove(Files.error)
    os.remove(Disks.work)
    os.remove(Disks.tifiles)
    for fn in [
            "prog00001", "prog00002", "prog00255", "dv064x010", "df002x001",
            "df127x001", "df127x010", "df127x020p", "prog00001.tfi",
            "prog00002.tfi", "prog00255.tfi", "dv064x010.tfi",
            "prog00002.v9t9", "prog00255.v9t9", "dv064x010.v9t9", "F16", "V16",
            "f16.tfi", "v16.tfi", "F1", "f1.v9t9", "V1"
    ]:
        os.remove(fn)
Exemple #28
0
import utils as utils

IMAGES_FOLDER = "../images"

# Parsing
parser = argparse.ArgumentParser()
parser.add_argument("--term",
                    help="Pass the term of the image you are looking",
                    type=str)
parser.add_argument("--build-index",
                    action='store_true',
                    help="Recreate file with image probabilities",
                    default=False)
args = parser.parse_args()

term = args.term
model = MobileNet(weights='imagenet')

images = utils.get_imgs_paths(IMAGES_FOLDER)
_id = utils.term_to_id(term)

if utils.check_file_exists() and not args.build_index:
    probs = utils.open_probs()
else:
    probs = utils.get_imgs_probs(model, images)
    utils.save_probs(probs)

probs_id = utils.get_probs_id(probs, _id)
top_imgs = utils.get_top_probs(probs_id, 3)
utils.show_imgs(images, top_imgs)
Exemple #29
0
def runtest():
    """check command line interface"""

    # setup
    shutil.copyfile(Disks.recsgen, Disks.work)

    # disk image operations
    with open(Files.output, 'w') as f1, open(Files.reference, 'w') as f2:
        xdm(Disks.work, '-i', stdout=f2)
        xdm(Disks.work, '-q', stdout=f1)
    check_files_eq('CLI', Files.output, Files.reference, 'DIS/VAR255')

    ref_prog = os.path.join(Dirs.refs, 'prog00255')
    xdm(Disks.work, '-e', 'PROG00255', '-o', Files.output)
    check_files_eq('CLI', Files.output, ref_prog, 'PROGRAM')
    ref_dv = os.path.join(Dirs.refs, 'dv064x010')
    xdm(Disks.work, '-e', 'DV064X010', '-o', Files.output)
    check_files_eq('CLI', Files.output, ref_dv, 'DIS/VAR64')
    ref_df = os.path.join(Dirs.refs, 'df002x001')
    xdm(Disks.work, '-e', 'DF002X001', '-o', Files.output)
    check_files_eq('CLI', Files.output, ref_df, 'DIS/FIX 2')

    with open(Files.output, 'w') as f1:
        xdm(Disks.work, '-p', 'DV064X010', stdout=f1)
    check_files_eq('CLI', Files.output, ref_dv, 'DIS/VAR 64')

    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-e', 'INVALID', stderr=ferr, rc=1)

    xdm(Disks.work, '-S', '0x01', '-o', Files.output)
    check_files_eq('CLI', Files.output, os.path.join(Dirs.refs, 'sector1'),
                   'DIS/VAR255')

    # add, rename, remove files
    shutil.copyfile(Disks.blank, Disks.work)
    xdm(Disks.work, '-a', ref_prog, ref_dv, ref_df)
    xdm(Disks.work, '-e', 'PROG00255', '-o', Files.output)
    check_files_eq('CLI', Files.output, ref_prog, 'PROGRAM')
    xdm(Disks.work, '-e', 'DV064X010', '-o', Files.output)
    check_files_eq('CLI', Files.output, ref_dv,
                   'PROGRAM')  # use PROGRAM here to compare!

    shutil.copyfile(Disks.work, Disks.tifiles)
    xdm(Disks.work, '-e', 'PROG00255', '-o', Files.reference)
    xdm(Disks.work, '-r', 'PROG00255:OTHERNAME')
    xdm(Disks.work, '-e', 'OTHERNAME', '-o', Files.output)
    check_files_eq('CLI', Files.output, Files.reference, 'P')
    xdm(Disks.work, '-r', 'OTHERNAME:PROG00255')
    check_files_eq('CLI', Disks.work, Disks.tifiles, 'P')

    xdm(Disks.work, '-d', 'PROG00255', 'DV064X010', 'DF002X001')
    with open(Files.output, 'w') as f1, open(Files.reference, 'w') as f2:
        xdm(Disks.work, '-i', stdout=f1)
        xdm(Disks.blank, '-i', stdout=f2)
    check_files_eq('CLI', Files.output, Files.reference, 'DIS/VAR255')

    shutil.copyfile(Disks.recsgen, Disks.work)
    xdm(Disks.work, '-e', 'DF127*', 'PROG00001', 'PROG00002')
    if (not os.path.isfile('df127x001') or not os.path.isfile('df127x010')
            or not os.path.isfile('df127x020p')):
        error('CLI', 'DF127*: Missing files')

    xdm(Disks.work, '-d', 'PROG*', 'D?010X060')
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-e', 'PROG00255', stderr=ferr, rc=1)
        xdm(Disks.work, '-e', 'DV010X060', stderr=ferr, rc=1)
        xdm(Disks.work, '-e', 'DF010X060', stderr=ferr, rc=1)

    # multi-file naming
    xdm(Disks.work, '-n', 'MULTI', '-a', 'prog00001', ref_prog, 'prog00002')
    xdm(Disks.work, '-e', 'MULTI', '-o', Files.output)
    check_files_eq('CLI', 'prog00001', Files.output, 'P')
    xdm(Disks.work, '-e', 'MULTJ', '-o', Files.output)
    check_files_eq('CLI', ref_prog, Files.output, 'P')
    xdm(Disks.work, '-e', 'MULTK', '-o', Files.output)
    check_files_eq('CLI', 'prog00002', Files.output, 'P')

    xdm('-T', 'prog00001', ref_prog, 'prog00002', '-n',
        'MULTFI')  # -n applies to internal names!
    xdm(Disks.work, '-t', '-a', 'prog00001.tfi', ref_prog + '.tfi',
        'prog00002.tfi')
    xdm(Disks.work, '-e', 'MULTFI', '-o', Files.output)
    check_files_eq('CLI', 'prog00001', Files.output, 'P')
    xdm(Disks.work, '-e', 'MULTFJ', '-o', Files.output)
    check_files_eq('CLI', ref_prog, Files.output, 'P')
    xdm(Disks.work, '-e', 'MULTFK', '-o', Files.output)
    check_files_eq('CLI', 'prog00002', Files.output, 'P')

    xdm('-T', ref_prog, 'prog00002', '-9', '-n', 'MULV9T')
    xdm(Disks.work, '-9', '-a', ref_prog + '.v9t9', 'prog00002.v9t9')
    xdm(Disks.work, '-e', 'MULV9T', '-o', Files.output)
    check_files_eq('CLI', ref_prog, Files.output, 'P')
    xdm(Disks.work, '-e', 'MULV9U', '-o', Files.output)
    check_files_eq('CLI', 'prog00002', Files.output, 'P')

    ref = os.path.join(Dirs.refs, 'glob')
    xdm(Disks.work, '-a', ref + '?', '-n', 'GLOBA1', shell=True)
    xdm(Disks.work, '-e', 'GLOBA1', '-o', Files.output)
    xdm(Disks.work, '-e', 'GLOBA2', '-o', Files.output)
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-e', 'GLOBA3', '-o', Files.output, stderr=ferr, rc=1)
    xdm(Disks.work, '-d', 'GLOB*', '-o', Files.output)
    xdm(Disks.work, '-a', ref + '*', '-n', 'GLOBB1', shell=True)
    xdm(Disks.work, '-e', 'GLOBB1', '-o', Files.output)
    xdm(Disks.work, '-e', 'GLOBB2', '-o', Files.output)
    xdm(Disks.work, '-e', 'GLOBB3', '-o', Files.output)

    # initialize disk
    xdm(Disks.work, '--initialize', '360', '-n', 'SSSD')
    check_file_size(Disks.work, 360 * 256)
    check_files_eq('CLI', Disks.work, Disks.blank, 'P')
    os.remove(Disks.work)
    xdm(Disks.work, '--initialize', 'SSSD', '-n', 'SSSD')
    check_file_size(Disks.work, 360 * 256)
    check_files_eq('CLI', Disks.work, Disks.blank, 'P')
    xdm(Disks.work, '--initialize', '800', '-n', 'INIT')
    with open(Files.output, 'w') as f:
        xdm(Disks.work, '-i', '-q', stdout=f)
    check_file_matches(Files.output, [(0, '\s2\s+used\s+798\s+free\s')])
    os.remove(Disks.work)
    xdm(Disks.work, '--initialize', 'CF', '-n', 'INIT', '-q')
    with open(Files.output, 'w') as f:
        xdm(Disks.work, '-i', '-q', stdout=f)
    check_file_matches(Files.output, [(0, '\s2\s+used\s+1598\s+free\s')])
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '--initialize', '1', stderr=ferr, rc=1)
        xdm(Disks.work, '--initialize', '1601', stderr=ferr, rc=1)
        xdm(Disks.work, '--initialize', 'FOO', stderr=ferr, rc=1)
    f = os.path.join(Dirs.refs, 'vardis')
    for n in ['AA', 'BB']:
        xdm(Disks.work, '--initialize', 'SSSD', '-a', f, '-n', n)
        with open(Files.output, 'w') as fout:
            xdm(Disks.work, '-i', stdout=fout)
        check_file_matches(Files.output, [(0, n + '\s+'), (2, n + '\s+')])

    # set geometry
    xdm(Disks.work, '--initialize', '1600', '-n', 'GEO')
    for g, p in [('1S1D', '1S/1D\s+40T'), ('99T8D7S', '7S/8D\s+99T'),
                 ('22TDD', '7S/2D\s+22T'), ('DSSD', '2S/1D\s+22T'),
                 ('1T', '2S/1D\s+1T'), ('3D10T9S', '9S/3D\s+10T'),
                 ('SDDS', '2S/1D\s+10T'), ('SS', '1S/1D\s+10T')]:
        xdm(Disks.work, '--set-geometry', g, '-q')
        with open(Files.output, 'w') as fout:
            xdm(Disks.work, '-i', '-q', stdout=fout)
        check_file_matches(Files.output, [(0, p)])

    # resize disk
    shutil.copyfile(Disks.recsgen, Disks.work)
    for s in ['800', '248', '1600']:
        xdm(Disks.work, '-Z', s, '-q')
        for f in ['PROG02560', 'DF129X010', 'DV127X010', 'DV255X015P']:
            xdm(Disks.work, '-e', f, '-q', '-o', Files.output)
            xdm(Disks.recsgen, '-e', f, '-o', Files.reference)
            check_files_eq('CLI', Files.output, Files.reference, 'PROGRAM')
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-Z', '240', stderr=ferr, rc=1)
        xdm(Disks.work, '-Z', '1608', stderr=ferr, rc=1)

    # new geometry handling (v1.5.3)
    for c, g, p in [
        ('--initialize', 'SSSD', r'358 free\s+90 KB\s+1S/1D\s+40T'),
        ('--resize', 'DS1D', r'718 free\s+180 KB\s+2S/1D\s+40T'),
        ('--set-geometry', '80T',
         r'718 free\s+180 KB\s+2S/1D\s+80T'),  # geom mismatch
        ('--initialize', '408', r'406 free\s+102 KB\s+2S/1D\s+40T'),
        ('--resize', 'DSSD80T', r'1438 free\s+360 KB\s+2S/1D\s+80T'),
        ('--resize', '2DSS', r'718 free\s+180 KB\s+1S/2D\s+40T'),
        ('-Z', '208', r'206 free\s+52 KB\s+1S/2D\s+40T'),
        ('--set-geometry', 'SD80T', r'206 free\s+52 KB\s+1S/1D\s+80T'),
        ('-X', 'DSSD80T', r'1438 free\s+360 KB\s+2S/1D\s+80T'),
        ('--set-geometry', '20T', r'1438 free\s+360 KB\s+2S/1D\s+20T')
    ]:  # geom mismatch
        xdm(Disks.work, c, g, '-q')
        with open(Files.output, 'w') as fout:
            xdm(Disks.work, '-i', '-q', stdout=fout)
        check_file_matches(Files.output, [(0, p)])
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '--initialize', 'SS80T', stderr=ferr, rc=1)
        xdm(Disks.work, '--resize', '2S', stderr=ferr, rc=1)
        xdm(Disks.work, '--resize', '80T', stderr=ferr, rc=1)
        xdm(Disks.work, '--set-geometry', '123', stderr=ferr, rc=1)

    # xdm99 vs real images
    rfile = os.path.join(Dirs.refs, 'ti-text')  # TEXT D/V80
    with open(Files.output, 'w') as fout, open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-X', 'sssd', '-n', 'TI-DISK', stderr=ferr, rc=0)
        xdm(Disks.work,
            '-a',
            rfile,
            '-n',
            'TEXT',
            '-f',
            'dv80',
            stderr=ferr,
            rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tisssd)
        xdm(Disks.work, '-X', 'dsdd', '-n', 'TI-DISK', stderr=ferr, rc=0)
        xdm(Disks.work,
            '-a',
            rfile,
            '-n',
            'TEXT',
            '-f',
            'dv80',
            stderr=ferr,
            rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tidsdd)
        xdm(Disks.work, '-Z', 'sssd', stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=0)
        check_disks_eq(Disks.work, Disks.tisssd)
        xdm(Disks.work, '--set-geometry', 'ssdd', stderr=ferr, rc=0)  # warn
        check_file_len(Files.error, min_lines=1, max_lines=1)
        xdm(Disks.work, '-i', stdout=fout, stderr=ferr, rc=0)  # warn
        check_file_len(Files.error, min_lines=2, max_lines=2)
        xdm(Disks.work, '-Z', 'dsdd', stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        check_disks_eq(Disks.work, Disks.tidsdd)
        xdm(Disks.work, '--set-geometry', 'ssdd80t', stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        xdm(Disks.work, '-X', 'dssd80t', '-n', 'TI-DSSD80', stderr=ferr, rc=0)
        check_file_len(Files.error, max_lines=2)
        check_disks_eq(Disks.work, Disks.tidssd80)

    # repair disks
    shutil.copyfile(Disks.bad, Disks.work)
    with open(Files.output, 'w') as f1, open(Files.reference, 'w') as f2:
        xdm(Disks.work, '-C', stderr=f1, rc=1)
        xdm(Disks.work, '-R', stderr=f2)
    check_file_len(Files.output, min_lines=2)
    with open(Files.output, 'w') as f1:
        xdm(Disks.work, '-C', stderr=f1)
    check_file_len(Files.output, max_lines=0)

    # FIAD operations
    shutil.copyfile(Disks.recsgen, Disks.work)
    xdm(Disks.work, '-e', 'PROG00255', 'DV064X010', '-t')
    xdm(Disks.work, '-e', 'PROG00255', '-t', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'prog00255.tfi', 'PROGRAM')
    xdm(Disks.work, '-e', 'DV064X010', '-t', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'dv064x010.tfi', 'PROGRAM')

    with open(Files.output, 'w') as f:
        xdm('-I', 'prog00255.tfi', 'dv064x010.tfi', stdout=f)

    xdm(Disks.work, '-e', 'PROG00255', 'DV064X010', '-9')
    xdm(Disks.work, '-e', 'PROG00255', '-9', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'prog00255.v9t9', 'PROGRAM')
    xdm(Disks.work, '-e', 'DV064X010', '-9', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'dv064x010.v9t9', 'PROGRAM')

    with open(Files.output, 'w') as f:
        xdm('-I', 'prog00255.v9t9', 'dv064x010.v9t9', stdout=f)

    xdm(Disks.work, '-e', 'PROG00255')
    xdm('-T', 'prog00255', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'prog00255.tfi', 'PROGRAM',
                   Masks.TIFile)
    xdm('-T', 'prog00255', '-9', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'prog00255.v9t9', 'PROGRAM',
                   Masks.v9t9)

    xdm(Disks.work, '-e', 'DV064X010', '-o', Files.reference)
    xdm('-F', 'dv064x010.tfi')
    check_files_eq('CLI', 'dv064x010', Files.reference, 'DIS/VAR 64')
    xdm('-F', 'dv064x010.tfi', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'dv064x010', 'PROGRAM')

    xdm('-F', 'dv064x010.v9t9', '-9')
    check_files_eq('CLI', 'dv064x010', Files.reference, 'DIS/VAR 64')
    xdm('-F', 'dv064x010.v9t9', '-o', Files.output)
    check_files_eq('CLI', Files.output, 'dv064x010', 'PROGRAM')

    xdm('-T', 'dv064x010', '-o', Files.output, '-n', 'DV064X010', '-f',
        'DIS/VAR 64')
    check_files_eq('CLI', Files.output, 'dv064x010.tfi', 'PROGRAM',
                   Masks.TIFile)
    os.remove('dv064x010.tfi')
    xdm('-T', 'dv064x010', '-n', 'DV064X010', '-f', 'DIS/VAR 64')
    check_files_eq('CLI', 'dv064x010.tfi', Files.output, 'PROGRAM',
                   Masks.TIFile)

    xdm('-T', 'dv064x010', '-9', '-o', Files.output, '-n', 'DV064X010', '-f',
        'DIS/VAR 64')
    check_files_eq('CLI', Files.output, 'dv064x010.v9t9', 'PROGRAM',
                   Masks.v9t9)
    os.remove('dv064x010.v9t9')
    xdm('-T', 'dv064x010', '-9', '-n', 'DV064X010', '-f', 'DIS/VAR 64')
    check_files_eq('CLI', 'dv064x010.v9t9', Files.output, 'PROGRAM',
                   Masks.v9t9)

    # TI names
    shutil.copyfile(Disks.recsdis, Disks.work)
    xdm(Disks.work, '-t', '-e', 'F16', 'V16')
    xdm(Disks.work, '-t', '-e', 'F16', 'V16', '--ti-names')
    check_files_eq('TI names', 'F16', 'f16.tfi', 'PROGRAM')
    check_files_eq('TI names', 'V16', 'v16.tfi', 'PROGRAM')
    xdm(Disks.work, '-9', '-e', 'F1')
    xdm(Disks.work, '-9', '-e', 'F1', '--ti-names')
    check_files_eq('TI names', 'F1', 'f1.v9t9', 'PROGRAM')
    xdm(Disks.work, '-e', 'V1', '-o', Files.reference)
    xdm(Disks.work, '-e', 'V1', '--ti-names')
    check_files_eq('TI names', 'V1', Files.reference, 'PROGRAM')

    # conversion between TI/PC names ('.' vs '/')
    file1 = os.path.join(Dirs.refs, 'vardis')
    with open(os.path.join(Dirs.tmp, 'file.y.z'), 'wb') as f:
        f.write(b'\xff' * 100)
    xdm(Disks.work, '-X', 'sssd', '-a', file1, '-n', 'FILE.X')
    xdm(Disks.work, '-a', os.path.join(Dirs.tmp, 'file.y.z'))
    with open(Files.output, 'w') as fout:
        xdm(Disks.work, '-i', stdout=fout, rc=0)
    check_lines_start(Files.output, ('FILE/X', 'FILE/Y'), skip=1)

    xdm(Disks.work, '-r', 'FILE/X:NEW.FILE/X')
    with open(Files.output, 'w') as fout:
        xdm(Disks.work, '-i', stdout=fout, rc=0)
    check_lines_start(Files.output, ('NEW/FILE/X', 'FILE/Y'), skip=1)

    xdm(Disks.work, '-e', '*')
    check_file_exists('new.file.x')
    os.remove('new.file.x')
    check_file_exists('file.y')
    os.remove('file.y')

    xdm(Disks.work, '-e', 'FILE/Y', '-t')
    check_file_exists('file.y.tfi')
    os.remove('file.y.tfi')

    # rename disk (-n)
    xdm(Disks.work, '-X', 'sssd', '-n', 'FIRST.NAME')
    with open(Files.output, 'w') as fout:
        xdm(Disks.work, '-i', stdout=fout, rc=0)
    check_lines_start(Files.output, ('FIRST/NAME', ))

    xdm(Disks.work, '-n', 'SECND.NAME')
    with open(Files.output, 'w') as fout:
        xdm(Disks.work, '-i', stdout=fout, rc=0)
    check_lines_start(Files.output, ('SECND/NAME', ))

    # output directory -o <dir>
    ref1 = os.path.join(Dirs.refs, 'glob1')
    ref2 = os.path.join(Dirs.refs, 'glob12')
    xdm(Disks.work, '-X', 'sssd', '-a', ref1, ref2)
    xdm(Disks.work, '-e', 'GLOB*', '-o', Dirs.tmp)
    check_file_exists(os.path.join(Dirs.tmp, 'glob1'))
    os.remove(os.path.join(Dirs.tmp, 'glob1'))
    check_file_exists(os.path.join(Dirs.tmp, 'glob12'))
    os.remove(os.path.join(Dirs.tmp, 'glob12'))

    xdm(Disks.work, '-X', 'sssd', '-a', ref1, ref2)
    with open(Files.error, 'w') as ferr:
        xdm(Disks.work, '-e', 'GLOB*', '-o', Files.output, stderr=ferr, rc=1)

    # stdin and stdout
    ref = os.path.join(Dirs.refs, 'vardis')
    with open(ref, 'r') as fin:
        xdm(Disks.work,
            '--initialize',
            'sssd',
            '-a',
            '-',
            '-f',
            'dv40',
            stdin=fin)
    with open(Files.output, 'w') as fout:
        xdm(Disks.work, '-e', 'STDIN', '-o', '-', stdout=fout)
    check_files_eq('stdin/stdout', Files.output, ref, 'DV')
    ref = os.path.join(Dirs.refs, 'sector1')
    with open(Files.reference, 'wb') as fout:
        xdm(Disks.work,
            '--initialize',
            'sssd',
            '-a',
            ref,
            '-n',
            'T',
            '-o',
            '-',
            stdout=fout)
    with open(Files.reference, 'rb') as fin:
        xdm('-', '-e', 'T', '-o', Files.output, stdin=fin)
    check_files_eq('stdin/stdout', Files.output, ref, 'P')

    # usage errors
    with open(Files.error, 'w') as ferr:
        xdm('-a', Files.output, stderr=ferr, rc=2)
        xdm('-T',
            'prog00001',
            'prog00002',
            '-o',
            Files.output,
            stderr=ferr,
            rc=1)
        xdm('-T',
            'prog00001',
            'prog00002',
            '-9',
            '-o',
            Files.output,
            stderr=ferr,
            rc=1)
        xdm('-F', '-o', Files.output, stderr=ferr, rc=2)

    # cleanup
    os.remove(Files.output)
    os.remove(Files.reference)
    os.remove(Files.error)
    os.remove(Disks.work)
    os.remove(Disks.tifiles)
    for fn in [
            'prog00001', 'prog00002', 'df127x001', 'df127x010', 'df127x020p',
            'prog00001.tfi', 'prog00002.tfi', 'prog00255.tfi', 'dv064x010.tfi',
            'prog00002.v9t9', 'prog00255.v9t9', 'dv064x010.v9t9', 'F16', 'V16',
            'f16.tfi', 'v16.tfi', 'F1', 'f1.v9t9', 'V1'
    ]:
        os.remove(fn)
Exemple #30
0
 def wrapper(*args):
     if utils.check_file_exists(package_file):
         return func(*args)
     else:
         print '\'package.yml\' isn\'t exists'
         exit(0)
Exemple #31
0
  def _prepare_post_dir(self):
    '''
    Create and prepare post_dir
    '''
    logger.debug('Preparing postprd directory: %s' %config['post_dir'])

    # create config['post_dir'] if it does not exist yet
    utils._create_directory(config['post_dir'])

    # Link all the relevant files need to compute various diagnostics
    relpath_to_link = ['EmisCoeff/Big_Endian/EmisCoeff.bin',
                       'AerosolCoeff/Big_Endian/AerosolCoeff.bin',
                       'CloudCoeff/Big_Endian/CloudCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_g11.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_g11.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_g12.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_g12.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_g13.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_g13.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_g15.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_g15.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_mt1r.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_mt1r.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_mt2.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_mt2.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/imgr_insat3d.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/imgr_insat3d.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/amsre_aqua.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/amsre_aqua.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/tmi_trmm.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/tmi_trmm.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmi_f13.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmi_f13.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmi_f14.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmi_f14.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmi_f15.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmi_f15.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmis_f16.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmis_f16.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmis_f17.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmis_f17.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmis_f18.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmis_f18.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmis_f19.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmis_f19.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/ssmis_f20.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/ssmis_f20.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/seviri_m10.SpcCoeff.bin',
                       'TauCoeff/ODPS/Big_Endian/seviri_m10.TauCoeff.bin',
                       'SpcCoeff/Big_Endian/v.seviri_m10.SpcCoeff.bin']

    # abspath coefficients for crtm2 (simulated synthetic satellites)
    abspath_coeff= [os.path.join(config['crtm_dir'], relpath) for relpath in
                    relpath_to_link ]
    # abspath wrf_cntrl param file
    abspath_pf = os.path.join(config['upp_domain_dir'], 'parm',
                              'wrf_cntrl.parm')
    # concatenate lists of paths
    abspath_to_link = abspath_coeff + [abspath_pf]
    # create a symlink for every file in abspath_to_link
    for fl in abspath_to_link:
      utils.check_file_exists(fl)  # check if file exist and is readable
      os.symlink(fl, os.path.join(config['post_dir'], os.path.basename(fl)))
    # symlink wrf_cntrl.parm to config['post_dir']/fort.14
    os.symlink(abspath_pf, os.path.join(config['post_dir'], 'fort.14'))
    # symlink microphysic's tables - code used is based on mp_physics option
    # used in the wrfout file
    os.symlink(os.path.join(config['wrf_run_dir'], 'ETAMPNEW_DATA'),
               os.path.join(config['post_dir'], 'nam_micro_lookup.dat'))
    os.symlink(os.path.join(config['wrf_run_dir'],
                            'ETAMPNEW_DATA.expanded_rain'
                            ), os.path.join(config['post_dir'],
                                            'hires_micro_lookup.dat'))
def generate_textid_corpus(args: argparse.Namespace) -> None:
    """
    Read raw files (in specified directory), parse and filter, then output
    the Bert token-ids for all files to another directory

    :param args: ArgumentParser-parsed arguments
    :return: None
    """

    if not args.mode in VALID_MODES:
        raise ValueError(f"The argument 'mode' needs to be one of "
                         f"{VALID_MODES}, got {args.mode}.")

    if platform.system() == "Darwin" and args.mode in MODES_NEEDING_BLINGFIRE:
        raise Exception(
            f"Got a mode requiring Blingfire (mode = {args.mode}), "
            "yet Blingfire doesn't support Macos.")

    if not blingfire:
        # If we aren't using blingfire, then we must use spacy
        # for sentence segmentation.
        try:
            spacy_model = spacy.load("en_core_web_sm")
        except OSError:
            print()
            print("Exception:")
            print("Didn't find the model for spacy.")
            print("Run 'python -m spacy download en_core_web_sm'")
            exit(-1)

    # Get list of input file paths
    in_list = sorted(glob.glob(os.path.join(args.input_dir, "*.txt")))
    if args.max_number_of_books:
        in_list = in_list[:args.max_number_of_books]

        logging.warning(
            f"{colorama.Fore.RED}>>> USING A MAX NUMBER OF BOOKS <<<"
            f"{colorama.Style.RESET_ALL}")

    # Load blingfire textid model
    if args.mode == "blingfire" and platform.system() == "Darwin":
        raise Exception("BlingFire is not compatible with MacOS.")

    idtok_model = None
    if blingfire and args.mode in MODES_NEEDING_BLINGFIRE:
        model_path = os.path.join(args.textid_dir, args.base_tok_file)
        utils.check_file_exists(model_path)
        idtok_model = blingfire.load_model(model_path)

    utils.check_file_exists(args.vocab_path)
    bert_full_tokenizer = tokenization.FullTokenizer(vocab_file=str(
        args.vocab_path),
                                                     do_lower_case=False)

    if args.mode == "check":
        with open(args.vocab_path) as fin:
            ids_to_words = fin.read().strip().split("\n")
            words_to_ids = {i: word for i, word in enumerate(ids_to_words)}

    # Iterate through each raw file
    if args.mode != "blingfire":
        print("WARNING: We aren't in a mode that doesn't "
              f"exclusively use Blingfire. Will be slow.\nMode: {args.mode}")

    logging.info(f"Main Loop - {args.mode}")
    for i, in_file_path in enumerate(tqdm.tqdm(in_list)):
        # Generate output file path
        file_basename = os.path.splitext(os.path.basename(in_file_path))[0]
        out_file_path = os.path.join(args.output_dir, file_basename)

        # Read file chunk by chunk
        with open(in_file_path) as in_file:
            # We read the whole file, then cut to CHUNK_MAX_LEN characters long.
            # This seems like a more resistant way to guarantee that we
            # correctly get full sentences.
            # The length of the chunks at 100k is the longuest that doesn't
            # break spacy's sentence tokenizer.
            logging.debug("Loading a file >")
            file_text = in_file.read().strip()
            if not file_text:
                continue

            logging.debug("< Done loading a file")

            for i in range(len(file_text) // CHUNK_MAX_LEN):
                logging.debug("Chunking. >")
                chunk = file_text[i * CHUNK_MAX_LEN:(i + 1) * CHUNK_MAX_LEN]
                # Get the blingfire-processed sentences from this chunk
                # (NOTE: maybe redundant, look into it maybe removing if slow)
                sent_tok_start = time.time()
                logging.debug("< Done chunking.")

                logging.debug("Segmentizing sentence. >")
                if blingfire:
                    sentences = chunk_to_sentences(chunk)
                else:
                    sentences = [str(x) for x in spacy_model(chunk).sents]
                # Ignore the first and last sentences, as they've
                # likely been cut weirdly by the chunking process.
                # We loose less than 1/1000th of all sentences by doing this.
                # (with a CHUNK_MAX_LEN of 100k).
                logging.debug(f"Number of sentences: {len(sentences)}")
                sentences = sentences[1:-1]

                logging.debug(f"< Done segmentizing sentence. It took "
                              f"{time.time() - sent_tok_start} seconds.")
                # Additional filtering for plaintext sentences
                filter_time_start = time.time()
                logging.debug("Filtering sentences >")
                ft_sentences = filter_sentences(sentences)
                logging.debug(f"< Done filtering sentences. It took "
                              f"{time.time() - filter_time_start} seconds.")

                # Convert each sentence to their textid
                bpe_tok_time_start = time.time()
                logging.debug("Tokenizing sentences >")

                curr_ids = utils.TypedList(np.ndarray)
                for ft_sent in ft_sentences:
                    ids = None
                    if blingfire:
                        ids = blingfire.text_to_ids(idtok_model, ft_sent,
                                                    args.id_seq_length,
                                                    args.oov_id)

                    if args.mode == "bert-native" or args.mode == "check":
                        bert_tokens = bert_full_tokenizer.tokenize(ft_sent)
                        bert_tok_ids = bert_full_tokenizer.convert_tokens_to_ids(
                            bert_tokens)

                        bert_tok_ids_ = utils.TypedList(int)
                        for x in bert_tok_ids:
                            bert_tok_ids_.append(x)
                        bert_tok_ids = bert_tok_ids_

                        while len(bert_tok_ids) < args.id_seq_length:
                            bert_tok_ids.append(0)

                        bert_tok_ids = np.array(
                            list(bert_tok_ids),
                            dtype=np.int32)[:args.id_seq_length]

                        if args.mode == "bert-native":
                            ids = bert_tok_ids

                    if args.mode == "check":
                        # In the "check" mode, we test that both the
                        # bert native tokenizer and blingfire return
                        # the same thing.

                        utils.check_equal(ids.shape, bert_tok_ids.shape)
                        comp = ids == bert_tok_ids

                        if not np.all(comp):

                            def bert_decode(ids):
                                return " ".join(
                                    ids_to_words[wid] for wid in ids
                                    if wid != 0)  #.replace(" ##", "")

                            # print("Blingfire ids:")
                            # print(ids)
                            print(
                                "\n################################################"
                            )
                            print("Mismatch between decoders:")
                            print(
                                f"\t Blingfire decoded: \"{bert_decode(ids)}\""
                            )
                            print(
                                f"\t- Bert-native decoded: \"{bert_decode(bert_tok_ids)}\""
                            )
                            print(
                                "################################################\n"
                            )
                            # print("Bert-native tokenizer ids:")
                            # print(bert_tok_ids)

                            num_errors = np.sum(np.logical_not(comp))
                            out_of = max(np.sum(ids != 0),
                                         np.sum(bert_tok_ids != 0))

                            if num_errors / out_of >= 1:
                                raise ValueError(f"{num_errors} "
                                                 f"different out of {out_of} "
                                                 f"non padding values")

                    curr_ids.append(ids)

                logging.debug(f"< Done tokenizing sentences. It took "
                              f"{time.time() - bpe_tok_time_start} seconds.")

                concat_time_start = time.time()
                logging.debug("Concatenating the ids. >")

                if not curr_ids:
                    logging.warning(">> Warning: empty cur_file_ids")

                id_mat = np.array(list(curr_ids), dtype=np.int32)

                logging.debug(f"< Done Concatenating the ids. Took "
                              f"{time.time() - concat_time_start} seconds.")
                if len(id_mat) == 0:
                    logging.warn(
                        f"We got an id_mat of size 0.\nFile index = {i}."
                        f"\nBook file path = {in_file_path}.")
                logging.debug("Saving >")
                path = pathlib.Path(out_file_path)
                np.save(path.parent / (f"{i}_" + str(path.name)), id_mat)
                logging.debug("< Done saving.")

    # Free model
    if blingfire:
        blingfire.free_model(idtok_model)