Ejemplo n.º 1
0
def process(ffp, ctx):
    """
    process(ffp, ctx)

    File process that:

     * Builds dataset ID,
     * Retrieves file size,
     * Does checksums,
     * Deduces mapfile name,
     * Makes output directory if not already exists,
     * Writes the corresponding line into it.

    :param str ffp: The file full path
    :param esgprep.mapfile.main.ProcessingContext ctx: The processing context
    :return: The output file full path
    :rtype: *str*

    """
    # Instantiate file handler
    fh = File(ffp)
    # Matching between directory_format and file full path
    fh.load_attributes(ctx)
    # Silently stop process if not in desired version scope
    if not in_version_scope(fh, ctx):
        return False
    # Deduce dataset_id
    dataset_id = fh.get_dataset_id(ctx)
    # Deduce dataset_version
    dataset_version = fh.get_dataset_version(ctx)
    # Build mapfile name depending on the --mapfile flag and appropriate tokens
    outfile = get_output_mapfile(fh.attributes, dataset_id, dataset_version, ctx)
    # Generate the corresponding mapfile entry/line
    line = generate_mapfile_entry(dataset_id,
                                  dataset_version,
                                  ffp,
                                  fh.size,
                                  fh.mtime,
                                  fh.checksum(ctx.checksum_type, ctx.checksum_client),
                                  ctx)
    insert_mapfile_entry(outfile, line, ffp)
    # Return mapfile name
    return outfile
Ejemplo n.º 2
0
def process(ffp):
    """
    Process time axis checkup and rewriting if needed.

    :param str ffp: The file full path to process
    :returns: The file status
    :rtype: *list*

    """
    # Get process content from process global env
    assert 'pctx' in globals().keys()
    pctx = globals()['pctx']
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(ffp=ffp,
                  pattern=pctx.pattern,
                  ref_units=pctx.ref_units,
                  ref_calendar=pctx.ref_calendar,
                  input_start_timestamp=pctx.ref_start,
                  input_end_timestamp=pctx.ref_end)
        # Check time axis correctness
        wrong_timesteps = list()
        # Rebuild a theoretical time axis with appropriate precision
        fh.time_axis_rebuilt = trunc(fh.build_time_axis(), NDECIMALS)
        if not np.array_equal(fh.time_axis_rebuilt, fh.time_axis):
            fh.status.append(ERROR_TIME_AXIS_KO)
            time_axis_diff = (fh.time_axis_rebuilt == fh.time_axis)
            wrong_timesteps = list(np.where(time_axis_diff == False)[0])
        # Check time boundaries correctness
        wrong_bounds = list()
        if fh.has_bounds:
            fh.time_bounds_rebuilt = trunc(fh.build_time_bounds(), NDECIMALS)
            if not np.array_equal(fh.time_bounds_rebuilt, fh.time_bounds):
                fh.status.append(ERROR_TIME_BOUNDS_KO)
                time_bounds_diff = (fh.time_bounds_rebuilt == fh.time_bounds)
                wrong_bounds = list(np.where(np.all(time_bounds_diff, axis=1) == False)[0])
        # Get last theoretical date
        fh.last_num = fh.time_axis_rebuilt[-1]
        fh.last_date = fh.date_axis_rebuilt[-1]
        fh.last_timestamp = truncated_timestamp(str2dates(fh.last_date), fh.timestamp_length)
        # Check consistency between start date infile and start date from filename
        if fh.start_date_infile != fh.start_date_filename:
            if fh.start_date_infile < fh.start_date_filename:
                fh.status.append(ERROR_START_DATE_IN_VS_NAME)
            else:
                fh.status.append(ERROR_START_DATE_NAME_VS_IN)
        # Check consistency between end date infile and end date from filename
        if not pctx.on_fly and fh.end_date_infile != fh.end_date_filename:
            if fh.end_date_infile < fh.end_date_filename:
                fh.status.append(ERROR_END_DATE_IN_VS_NAME)
            else:
                fh.status.append(ERROR_END_DATE_NAME_VS_IN)
        # Check consistency between rebuilt end date and end date from filename
        if not pctx.on_fly and fh.last_date != fh.end_date_filename:
            if fh.last_date < fh.end_date_filename:
                fh.status.append(ERROR_END_DATE_REF_VS_NAME)
            else:
                fh.status.append(ERROR_END_DATE_NAME_VS_REF)
        # Check consistency between rebuilt end date and end date infile
        if not pctx.on_fly and fh.last_date != fh.end_date_infile:
            if fh.last_date < fh.end_date_infile:
                fh.status.append(ERROR_END_DATE_REF_VS_IN)
            else:
                fh.status.append(ERROR_END_DATE_IN_VS_REF)
        # Check file consistency between instant time and time boundaries
        if fh.is_instant and fh.has_bounds:
            fh.status.append(ERROR_TIME_BOUNDS_INS)
        # Check file consistency between averaged time and time boundaries
        if not fh.is_instant and not fh.has_bounds:
            fh.status.append(ERROR_TIME_BOUNDS_AVE)
        # Check time units consistency between file and ref
        if pctx.ref_units != fh.tunits:
            fh.status.append(ERROR_TIME_UNITS)
        # Check calendar consistency between file and ref
        if pctx.ref_calendar != fh.calendar:
            fh.status.append(ERROR_TIME_CALENDAR)
        # Exclude codes to ignore from status codes
        # Before correction to avoid undesired operations
        fh.status = [code for code in fh.status if code not in pctx.ignore_codes]
        correction = False
        # Rename file depending on checking
        if (pctx.write and (
                (
                        ({ERROR_END_DATE_NAME_VS_IN, ERROR_END_DATE_IN_VS_NAME}.intersection(set(fh.status)))
                        and
                        {ERROR_END_DATE_NAME_VS_REF, ERROR_END_DATE_REF_VS_NAME}.intersection(set(fh.status))
                )
                or
                (
                        ({ERROR_END_DATE_NAME_VS_REF, ERROR_END_DATE_REF_VS_NAME}.intersection(set(fh.status))
                         and
                         {ERROR_END_DATE_IN_VS_REF, ERROR_END_DATE_REF_VS_IN}.intersection(set(fh.status)))
                )
        )) or pctx.force:
            # Change filename and file full path dynamically
            fh.nc_file_rename(new_filename=re.sub(fh.end_timestamp_filename, fh.last_timestamp, fh.filename))
            correction = True
        # Remove time boundaries depending on checking
        if pctx.write and ERROR_TIME_BOUNDS_INS in fh.status:
            # Delete time bounds and bounds attribute from file if write or force mode
            fh.nc_var_delete(variable=fh.tbnds)
            fh.nc_att_delete(attribute='bounds', variable='time')
            correction = True
        # Rewrite time units depending on checking
        if pctx.write and ERROR_TIME_UNITS in fh.status:
            fh.nc_att_overwrite('units', variable='time', data=pctx.ref_units)
        # Rewrite time calendar depending on checking
        if pctx.write and ERROR_TIME_CALENDAR in fh.status:
            fh.nc_att_overwrite('calendar', variable='time', data=pctx.ref_calendar)
        # Rewrite time axis depending on checking
        if (pctx.write and {ERROR_TIME_AXIS_KO, ERROR_TIME_BOUNDS_KO}.intersection(set(fh.status))) or pctx.force:
            fh.nc_var_overwrite('time', fh.time_axis_rebuilt)
            # Rewrite time boundaries if needed
            if fh.has_bounds:
                fh.nc_var_overwrite(fh.tbnds, fh.time_bounds_rebuilt)
            correction = True
        # Diagnostic display
        msgval = {}
        msgval['file'] = COLORS.HEADER(fh.filename)
        if ERROR_TIME_UNITS in fh.status:
            msgval['infile_units'] = COLORS.FAIL(fh.tunits)
            msgval['ref_units'] = COLORS.SUCCESS(pctx.ref_units)
        else:
            msgval['infile_units'] = COLORS.SUCCESS(fh.tunits)
            msgval['ref_units'] = COLOR('cyan').bold(pctx.ref_units)
        if ERROR_TIME_CALENDAR in fh.status:
            msgval['infile_calendar'] = COLORS.FAIL(fh.calendar)
            msgval['ref_calendar'] = COLORS.SUCCESS(pctx.ref_calendar)
        else:
            msgval['infile_calendar'] = COLORS.SUCCESS(fh.calendar)
            msgval['ref_calendar'] = COLOR('cyan').bold(pctx.ref_calendar)
        if {ERROR_START_DATE_IN_VS_NAME, ERROR_START_DATE_NAME_VS_IN}.intersection(set(fh.status)):
            msgval['infile_start_timestamp'] = COLORS.FAIL(fh.start_timestamp_infile)
            msgval['infile_start_date'] = COLORS.FAIL(fh.start_date_infile)
            msgval['infile_start_num'] = COLORS.FAIL(str(fh.start_num_infile))
            msgval['ref_start_timestamp'] = COLORS.SUCCESS(fh.start_timestamp_filename)
            msgval['ref_start_date'] = COLORS.SUCCESS(fh.start_date_filename)
            msgval['ref_start_num'] = COLORS.SUCCESS(str(fh.start_num_filename))
        else:
            msgval['infile_start_timestamp'] = COLORS.SUCCESS(fh.start_timestamp_infile)
            msgval['infile_start_date'] = COLORS.SUCCESS(fh.start_date_infile)
            msgval['infile_start_num'] = COLORS.SUCCESS(str(fh.start_num_infile))
            msgval['ref_start_timestamp'] = COLOR('cyan').bold(fh.start_timestamp_filename)
            msgval['ref_start_date'] = COLOR('cyan').bold(fh.start_date_filename)
            msgval['ref_start_num'] = COLOR('cyan').bold(str(fh.start_num_filename))
        if {ERROR_END_DATE_NAME_VS_REF, ERROR_END_DATE_REF_VS_NAME}.intersection(set(fh.status)):
            msgval['filename_end_timestamp'] = COLORS.FAIL(fh.end_timestamp_filename)
            msgval['filename_end_date'] = COLORS.FAIL(fh.end_date_filename)
            msgval['filename_end_num'] = COLORS.FAIL(str(fh.end_num_filename))
        else:
            msgval['filename_end_timestamp'] = COLORS.SUCCESS(fh.end_timestamp_filename)
            msgval['filename_end_date'] = COLORS.SUCCESS(fh.end_date_filename)
            msgval['filename_end_num'] = COLORS.SUCCESS(str(fh.end_num_filename))
        if {ERROR_END_DATE_IN_VS_REF, ERROR_END_DATE_REF_VS_IN}.intersection(set(fh.status)):
            msgval['infile_end_timestamp'] = COLORS.FAIL(fh.end_timestamp_infile)
            msgval['infile_end_date'] = COLORS.FAIL(fh.end_date_infile)
            msgval['infile_end_num'] = COLORS.FAIL(str(fh.end_num_infile))
        else:
            msgval['infile_end_timestamp'] = COLORS.SUCCESS(fh.end_timestamp_infile)
            msgval['infile_end_date'] = COLORS.SUCCESS(fh.end_date_infile)
            msgval['infile_end_num'] = COLORS.SUCCESS(str(fh.end_num_infile))
        msgval['ref_end_timestamp'] = COLOR('cyan').bold(fh.last_timestamp)
        msgval['ref_end_date'] = COLOR('cyan').bold(fh.last_date)
        msgval['ref_end_num'] = COLOR('cyan').bold(str(fh.last_num))
        msgval['len'] = fh.length
        msgval['table'] = fh.table
        msgval['freq'] = fh.frequency
        msgval['step'] = fh.step
        msgval['units'] = fh.step_units
        msgval['instant'] = fh.is_instant
        msgval['clim'] = fh.is_climatology
        msgval['bnds'] = fh.has_bounds

        msg = """{file}
        Units:
            IN FILE -- {infile_units}
            REF     -- {ref_units}
        Calendar:
            IN FILE -- {infile_calendar}
            REF     -- {ref_calendar}
        Start: 
            IN FILE -- {infile_start_timestamp} = {infile_start_date} = {infile_start_num}
            REF     -- {ref_start_timestamp} = {ref_start_date} = {ref_start_num}
        End:
            FILENAME -- {filename_end_timestamp} = {filename_end_date} = {filename_end_num}
            IN FILE  -- {infile_end_timestamp} = {infile_end_date} = {infile_end_num}
            REBUILT  -- {ref_end_timestamp} = {ref_end_date} = {ref_end_num}
        Length: {len}
        MIP table: {table}
        Frequency: {freq} = {step} {units}
        Is instant: {instant}
        Is climatology: {clim}
        Has bounds: {bnds}""".format(**msgval)

        # Add status message
        if fh.status:
            for s in fh.status:
                msg += """\n        Status: {} """.format(COLORS.FAIL('Error {} -- {}'.format(s, STATUS[s])))
                if correction and s in ERROR_CORRECTED_SET:
                    msg += ' -- {}'.format(COLORS.SUCCESS('Corrected'))
        else:
            msg += """\n        Status: {}""".format(COLORS.SUCCESS(STATUS[ERROR_TIME_AXIS_OK]))
        # Display wrong time steps and/or bounds
        timestep_limit = pctx.limit if pctx.limit else len(wrong_timesteps)
        for i, v in enumerate(wrong_timesteps):
            if (i + 1) <= timestep_limit:
                msg += """\n        Wrong time step at index {}: IN FILE -- {} = {} vs. REBUILT -- {} = {}""".format(
                    COLORS.HEADER(str(v + 1)),
                    COLORS.FAIL(fh.date_axis[v]),
                    COLORS.FAIL(str(fh.time_axis[v]).ljust(10)),
                    COLORS.SUCCESS(fh.date_axis_rebuilt[v]),
                    COLORS.SUCCESS(str(fh.time_axis_rebuilt[v]).ljust(10)))
        bounds_limit = pctx.limit if pctx.limit else len(wrong_bounds)
        for i, v in enumerate(wrong_bounds):
            if (i + 1) <= bounds_limit:
                msg += """\n        Wrong time bounds at index {}: IN FILE -- {} = {} vs. REBUILT -- {} = {}""".format(
                    COLORS.HEADER(str(v + 1)),
                    COLORS.FAIL('[{} {}]'.format(fh.date_bounds[v][0], fh.date_bounds[v][1])),
                    COLORS.FAIL(str(fh.time_bounds[v]).ljust(20)),
                    COLORS.SUCCESS('[{} {}]'.format(fh.date_bounds_rebuilt[v][0], fh.date_bounds_rebuilt[v][1])),
                    COLORS.SUCCESS(str(fh.time_bounds_rebuilt[v]).ljust(20)))
        # Acquire lock to print result
        with pctx.lock:
            if fh.status:
                Print.error(msg, buffer=True)
            else:
                Print.success(msg, buffer=True)
        # Return error if it is the case
        if fh.status:
            return 1
        else:
            return 0
    except KeyboardInterrupt:
        raise
    except Exception:
        exc = traceback.format_exc().splitlines()
        msg = COLORS.HEADER('{}'.format(os.path.basename(ffp)))
        msg += """\n        Status: {}""".format(COLORS.FAIL('Skipped'))
        msg += """\n        {}""".format(exc[0])
        msg += """\n      """
        msg += """\n      """.join(exc[1:])
        with pctx.lock:
            Print.error(msg, buffer=True)
        return None
    finally:
        # Print progress
        with pctx.lock:
            pctx.progress.value += 1
            percentage = int(pctx.progress.value * 100 / pctx.nbfiles)
            msg = COLORS.OKBLUE('\rProcess netCDF file(s): ')
            msg += '{}% | {}/{} files'.format(percentage, pctx.progress.value, pctx.nbfiles)
            Print.progress(msg)
Ejemplo n.º 3
0
def process(ffp):
    """
    Process time axis checkup and rewriting if needed.

    :param str ffp: The file full path to process
    :returns: The file status
    :rtype: *list*

    """
    # Get process content from process global env
    assert 'pctx' in globals().keys()
    pctx = globals()['pctx']
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(ffp=ffp,
                  pattern=pctx.pattern,
                  ref_units=pctx.ref_units,
                  ref_calendar=pctx.ref_calendar,
                  input_start_timestamp=pctx.ref_start,
                  input_end_timestamp=pctx.ref_end)
        # Check time axis correctness
        wrong_timesteps = list()
        # Rebuild a theoretical time axis with appropriate precision
        fh.time_axis_rebuilt = trunc(fh.build_time_axis(), NDECIMALS)
        if not np.array_equal(fh.time_axis_rebuilt, fh.time_axis):
            fh.status.append(ERROR_TIME_AXIS_KO)
            time_axis_diff = (fh.time_axis_rebuilt == fh.time_axis)
            wrong_timesteps = list(np.where(time_axis_diff == False)[0])
        # Check time boundaries correctness
        wrong_bounds = list()
        if fh.has_bounds:
            fh.time_bounds_rebuilt = trunc(fh.build_time_bounds(), NDECIMALS)
            if not np.array_equal(fh.time_bounds_rebuilt, fh.time_bounds):
                fh.status.append(ERROR_TIME_BOUNDS_KO)
                time_bounds_diff = (fh.time_bounds_rebuilt == fh.time_bounds)
                wrong_bounds = list(
                    np.where(np.all(time_bounds_diff, axis=1) == False)[0])
        # Get last theoretical date
        fh.last_num = fh.time_axis_rebuilt[-1]
        fh.last_date = fh.date_axis_rebuilt[-1]
        fh.last_timestamp = truncated_timestamp(str2dates(fh.last_date),
                                                fh.timestamp_length)
        # Check consistency between start date infile and start date from filename
        if fh.start_date_infile != fh.start_date_filename:
            if fh.start_date_infile < fh.start_date_filename:
                fh.status.append(ERROR_START_DATE_IN_VS_NAME)
            else:
                fh.status.append(ERROR_START_DATE_NAME_VS_IN)
        # Check consistency between end date infile and end date from filename
        if not pctx.on_fly and fh.end_date_infile != fh.end_date_filename:
            if fh.end_date_infile < fh.end_date_filename:
                fh.status.append(ERROR_END_DATE_IN_VS_NAME)
            else:
                fh.status.append(ERROR_END_DATE_NAME_VS_IN)
        # Check consistency between rebuilt end date and end date from filename
        if not pctx.on_fly and fh.last_date != fh.end_date_filename:
            if fh.last_date < fh.end_date_filename:
                fh.status.append(ERROR_END_DATE_REF_VS_NAME)
            else:
                fh.status.append(ERROR_END_DATE_NAME_VS_REF)
        # Check consistency between rebuilt end date and end date infile
        if not pctx.on_fly and fh.last_date != fh.end_date_infile:
            if fh.last_date < fh.end_date_infile:
                fh.status.append(ERROR_END_DATE_REF_VS_IN)
            else:
                fh.status.append(ERROR_END_DATE_IN_VS_REF)
        # Check file consistency between instant time and time boundaries
        if fh.is_instant and fh.has_bounds:
            fh.status.append(ERROR_TIME_BOUNDS_INS)
        # Check file consistency between averaged time and time boundaries
        if not fh.is_instant and not fh.has_bounds:
            fh.status.append(ERROR_TIME_BOUNDS_AVE)
        # Check time units consistency between file and ref
        if pctx.ref_units != fh.tunits:
            fh.status.append(ERROR_TIME_UNITS)
        # Check calendar consistency between file and ref
        if pctx.ref_calendar != fh.calendar:
            fh.status.append(ERROR_TIME_CALENDAR)
        # Exclude codes to ignore from status codes
        # Before correction to avoid undesired operations
        fh.status = [
            code for code in fh.status if code not in pctx.ignore_codes
        ]
        correction = False
        # Rename file depending on checking
        if (pctx.write and
            ((({ERROR_END_DATE_NAME_VS_IN, ERROR_END_DATE_IN_VS_NAME
                }.intersection(set(fh.status)))
              and {ERROR_END_DATE_NAME_VS_REF, ERROR_END_DATE_REF_VS_NAME
                   }.intersection(set(fh.status))) or
             (({ERROR_END_DATE_NAME_VS_REF, ERROR_END_DATE_REF_VS_NAME
                }.intersection(set(fh.status))
               and {ERROR_END_DATE_IN_VS_REF, ERROR_END_DATE_REF_VS_IN
                    }.intersection(set(fh.status)))))) or pctx.force:
            # Change filename and file full path dynamically
            fh.nc_file_rename(
                new_filename=re.sub(fh.orig_end_timestamp_filename,
                                    fh.last_timestamp, fh.filename))
            correction = True
        # Remove time boundaries depending on checking
        if pctx.write and ERROR_TIME_BOUNDS_INS in fh.status:
            # Delete time bounds and bounds attribute from file if write or force mode
            fh.nc_var_delete(variable=fh.tbnds)
            fh.nc_att_delete(attribute='bounds', variable='time')
            correction = True
        # Rewrite time units depending on checking
        if pctx.write and ERROR_TIME_UNITS in fh.status:
            fh.nc_att_overwrite('units', variable='time', data=pctx.ref_units)
        # Rewrite time calendar depending on checking
        if pctx.write and ERROR_TIME_CALENDAR in fh.status:
            fh.nc_att_overwrite('calendar',
                                variable='time',
                                data=pctx.ref_calendar)
        # Rewrite time axis depending on checking
        if (pctx.write and {ERROR_TIME_AXIS_KO, ERROR_TIME_BOUNDS_KO
                            }.intersection(set(fh.status))) or pctx.force:
            fh.nc_var_overwrite('time', fh.time_axis_rebuilt)
            # Rewrite time boundaries if needed
            if fh.has_bounds:
                fh.nc_var_overwrite(fh.tbnds, fh.time_bounds_rebuilt)
            correction = True
        # Diagnostic display
        msgval = {}
        msgval['file'] = COLORS.HEADER(fh.filename)
        if ERROR_TIME_UNITS in fh.status:
            msgval['infile_units'] = COLORS.FAIL(fh.tunits)
            msgval['ref_units'] = COLORS.SUCCESS(pctx.ref_units)
        else:
            msgval['infile_units'] = COLORS.SUCCESS(fh.tunits)
            msgval['ref_units'] = COLOR('cyan').bold(pctx.ref_units)
        if ERROR_TIME_CALENDAR in fh.status:
            msgval['infile_calendar'] = COLORS.FAIL(fh.calendar)
            msgval['ref_calendar'] = COLORS.SUCCESS(pctx.ref_calendar)
        else:
            msgval['infile_calendar'] = COLORS.SUCCESS(fh.calendar)
            msgval['ref_calendar'] = COLOR('cyan').bold(pctx.ref_calendar)
        if {ERROR_START_DATE_IN_VS_NAME,
                ERROR_START_DATE_NAME_VS_IN}.intersection(set(fh.status)):
            msgval['infile_start_timestamp'] = COLORS.FAIL(
                fh.start_timestamp_infile)
            msgval['infile_start_date'] = COLORS.FAIL(fh.start_date_infile)
            msgval['infile_start_num'] = COLORS.FAIL(str(fh.start_num_infile))
            msgval['ref_start_timestamp'] = COLORS.SUCCESS(
                fh.start_timestamp_filename)
            msgval['ref_start_date'] = COLORS.SUCCESS(fh.start_date_filename)
            msgval['ref_start_num'] = COLORS.SUCCESS(str(
                fh.start_num_filename))
        else:
            msgval['infile_start_timestamp'] = COLORS.SUCCESS(
                fh.start_timestamp_infile)
            msgval['infile_start_date'] = COLORS.SUCCESS(fh.start_date_infile)
            msgval['infile_start_num'] = COLORS.SUCCESS(
                str(fh.start_num_infile))
            msgval['ref_start_timestamp'] = COLOR('cyan').bold(
                fh.start_timestamp_filename)
            msgval['ref_start_date'] = COLOR('cyan').bold(
                fh.start_date_filename)
            msgval['ref_start_num'] = COLOR('cyan').bold(
                str(fh.start_num_filename))
        if {ERROR_END_DATE_NAME_VS_REF,
                ERROR_END_DATE_REF_VS_NAME}.intersection(set(fh.status)):
            msgval['filename_end_timestamp'] = COLORS.FAIL(
                fh.end_timestamp_filename)
            msgval['filename_end_date'] = COLORS.FAIL(fh.end_date_filename)
            msgval['filename_end_num'] = COLORS.FAIL(str(fh.end_num_filename))
        else:
            msgval['filename_end_timestamp'] = COLORS.SUCCESS(
                fh.end_timestamp_filename)
            msgval['filename_end_date'] = COLORS.SUCCESS(fh.end_date_filename)
            msgval['filename_end_num'] = COLORS.SUCCESS(
                str(fh.end_num_filename))
        if {ERROR_END_DATE_IN_VS_REF,
                ERROR_END_DATE_REF_VS_IN}.intersection(set(fh.status)):
            msgval['infile_end_timestamp'] = COLORS.FAIL(
                fh.end_timestamp_infile)
            msgval['infile_end_date'] = COLORS.FAIL(fh.end_date_infile)
            msgval['infile_end_num'] = COLORS.FAIL(str(fh.end_num_infile))
        else:
            msgval['infile_end_timestamp'] = COLORS.SUCCESS(
                fh.end_timestamp_infile)
            msgval['infile_end_date'] = COLORS.SUCCESS(fh.end_date_infile)
            msgval['infile_end_num'] = COLORS.SUCCESS(str(fh.end_num_infile))
        msgval['ref_end_timestamp'] = COLOR('cyan').bold(fh.last_timestamp)
        msgval['ref_end_date'] = COLOR('cyan').bold(fh.last_date)
        msgval['ref_end_num'] = COLOR('cyan').bold(str(fh.last_num))
        msgval['len'] = fh.length
        msgval['table'] = fh.table
        msgval['freq'] = fh.frequency
        msgval['step'] = fh.step
        msgval['units'] = fh.step_units
        msgval['instant'] = fh.is_instant
        msgval['clim'] = fh.is_climatology
        msgval['bnds'] = fh.has_bounds

        msg = """{file}
        Units:
            IN FILE -- {infile_units}
            REF     -- {ref_units}
        Calendar:
            IN FILE -- {infile_calendar}
            REF     -- {ref_calendar}
        Start: 
            IN FILE -- {infile_start_timestamp} = {infile_start_date} = {infile_start_num}
            REF     -- {ref_start_timestamp} = {ref_start_date} = {ref_start_num}
        End:
            FILENAME -- {filename_end_timestamp} = {filename_end_date} = {filename_end_num}
            IN FILE  -- {infile_end_timestamp} = {infile_end_date} = {infile_end_num}
            REBUILT  -- {ref_end_timestamp} = {ref_end_date} = {ref_end_num}
        Length: {len}
        MIP table: {table}
        Frequency: {freq} = {step} {units}
        Is instant: {instant}
        Is climatology: {clim}
        Has bounds: {bnds}""".format(**msgval)

        # Add status message
        if fh.status:
            for s in fh.status:
                msg += """\n        Status: {} """.format(
                    COLORS.FAIL('Error {} -- {}'.format(s, STATUS[s])))
                if correction and s in ERROR_CORRECTED_SET:
                    msg += ' -- {}'.format(COLORS.SUCCESS('Corrected'))
        else:
            msg += """\n        Status: {}""".format(
                COLORS.SUCCESS(STATUS[ERROR_TIME_AXIS_OK]))
        # Display wrong time steps and/or bounds
        timestep_limit = pctx.limit if pctx.limit else len(wrong_timesteps)
        for i, v in enumerate(wrong_timesteps):
            if (i + 1) <= timestep_limit:
                msg += """\n        Wrong time step at index {}: IN FILE -- {} = {} vs. REBUILT -- {} = {}""".format(
                    COLORS.HEADER(str(v + 1)), COLORS.FAIL(fh.date_axis[v]),
                    COLORS.FAIL(str(fh.time_axis[v]).ljust(10)),
                    COLORS.SUCCESS(fh.date_axis_rebuilt[v]),
                    COLORS.SUCCESS(str(fh.time_axis_rebuilt[v]).ljust(10)))
        bounds_limit = pctx.limit if pctx.limit else len(wrong_bounds)
        for i, v in enumerate(wrong_bounds):
            if (i + 1) <= bounds_limit:
                msg += """\n        Wrong time bounds at index {}: IN FILE -- {} = {} vs. REBUILT -- {} = {}""".format(
                    COLORS.HEADER(str(v + 1)),
                    COLORS.FAIL('[{} {}]'.format(fh.date_bounds[v][0],
                                                 fh.date_bounds[v][1])),
                    COLORS.FAIL(str(fh.time_bounds[v]).ljust(20)),
                    COLORS.SUCCESS('[{} {}]'.format(
                        fh.date_bounds_rebuilt[v][0],
                        fh.date_bounds_rebuilt[v][1])),
                    COLORS.SUCCESS(str(fh.time_bounds_rebuilt[v]).ljust(20)))
        # Acquire lock to print result
        with pctx.lock:
            if fh.status:
                Print.error(msg, buffer=True)
            else:
                Print.success(msg, buffer=True)
        # Return error if it is the case
        if fh.status:
            return 1
        else:
            return 0
    except KeyboardInterrupt:
        raise
    except Exception:
        exc = traceback.format_exc().splitlines()
        msg = COLORS.HEADER('{}'.format(os.path.basename(ffp)))
        msg += """\n        Status: {}""".format(COLORS.FAIL('Skipped'))
        msg += """\n        {}""".format(exc[0])
        msg += """\n      """
        msg += """\n      """.join(exc[1:])
        with pctx.lock:
            Print.error(msg, buffer=True)
        return None
    finally:
        # Print progress
        with pctx.lock:
            pctx.progress.value += 1
            percentage = int(pctx.progress.value * 100 / pctx.nbfiles)
            msg = COLORS.OKBLUE('\rProcess netCDF file(s): ')
            msg += '{}% | {}/{} files'.format(percentage, pctx.progress.value,
                                              pctx.nbfiles)
            Print.progress(msg)
Ejemplo n.º 4
0
def process(source):
    """
    process(collector_input)

    File process that:

     * Handles files,
     * Deduces facet key, values pairs from file attributes
     * Checks facet values against CV,
     * Applies the versioning
     * Populates the DRS tree crating the appropriate leaves,
     * Stores dataset statistics.


    :param str source: The file full path to process

    """
    # Get process content from process global env
    assert 'pctx' in globals().keys()
    pctx = globals()['pctx']
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(source)
        # Ignore files from incoming
        if fh.filename in pctx.ignore_from_incoming:
            msg = TAGS.SKIP + COLORS.HEADER(source)
            with pctx.lock:
                Print.exception(msg, buffer=True)
            return None
        # Loads attributes from filename, netCDF attributes, command-line
        fh.load_attributes(root=pctx.root,
                           pattern=pctx.pattern,
                           set_values=pctx.set_values)
        # Checks the facet values provided by the loaded attributes
        fh.check_facets(facets=pctx.facets,
                        config=pctx.cfg,
                        set_keys=pctx.set_keys)
        # Get parts of DRS path
        parts = fh.get_drs_parts(pctx.facets)
        # Instantiate file DRS path handler
        fh.drs = DRSPath(parts)
        # Ensure that the called project section is ALWAYS part of the DRS path elements (case insensitive)
        if not fh.drs.path().lower().startswith(pctx.project.lower()):
            raise InconsistentDRSPath(pctx.project, fh.drs.path())
        # Compute file checksum
        if fh.drs.v_latest and not pctx.no_checksum:
            fh.checksum = checksum(fh.ffp, pctx.checksum_type)
        # Get file tracking id
        fh.tracking_id = get_tracking_id(fh.ffp, pctx.project)
        if fh.drs.v_latest:
            latest_file = os.path.join(fh.drs.path(latest=True, root=True),
                                       fh.filename)
            # Compute checksum of latest file version if exists
            if os.path.exists(latest_file) and not pctx.no_checksum:
                fh.latest_checksum = checksum(latest_file, pctx.checksum_type)
            # Get tracking_id of latest file version if exists
            if os.path.exists(latest_file):
                fh.latest_tracking_id = get_tracking_id(
                    latest_file, pctx.project)
        msg = TAGS.SUCCESS + 'Processing {}'.format(COLORS.HEADER(fh.ffp))
        Print.info(msg)
        return fh
    except KeyboardInterrupt:
        raise
    except Exception:
        exc = traceback.format_exc().splitlines()
        msg = TAGS.SKIP + COLORS.HEADER(source) + '\n'
        msg += '\n'.join(exc)
        with pctx.lock:
            Print.exception(msg, buffer=True)
        return None
    finally:
        with pctx.lock:
            pctx.progress.value += 1
            percentage = int(pctx.progress.value * 100 / pctx.nbsources)
            msg = COLORS.OKBLUE('\rScanning incoming file(s): ')
            msg += '{}% | {}/{} file(s)'.format(percentage,
                                                pctx.progress.value,
                                                pctx.nbsources)
            Print.progress(msg)
Ejemplo n.º 5
0
def process(source):
    """
    File process that:

     * Handles file,
     * Harvests directory attributes,
     * Check DRS attributes against CV,
     * Builds dataset ID,
     * Retrieves file size,
     * Does checksums,
     * Deduces mapfile name,
     * Writes the corresponding mapfile entry.

    Any error leads to skip the file. It does not stop the process.

    :param str source: The source to process could be a path or a dataset ID
    :returns: The output mapfile full path
    :rtype: *str*

    """
    # Get process content from process global env
    assert 'pctx' in globals().keys()
    pctx = globals()['pctx']
    # Block to avoid program stop if a thread fails
    try:
        if pctx.source_type == 'file':
            # Instantiate source handle as file
            sh = File(source)
        else:
            # Instantiate source handler as dataset
            sh = Dataset(source)
        # Matching between directory_format and file full path
        sh.load_attributes(pattern=pctx.pattern)
        # Deduce dataset_id
        dataset_id = pctx.dataset_name
        if not pctx.dataset_name:
            sh.check_facets(facets=pctx.facets, config=pctx.cfg)
            dataset_id = sh.get_dataset_id(pctx.cfg.get('dataset_id',
                                                        raw=True))
        # Ensure that the first facet is ALWAYS the same as the called project section (case insensitive)
        if not dataset_id.lower().startswith(pctx.project.lower()):
            raise InconsistentDatasetID(pctx.project, dataset_id.lower())
        # Deduce dataset_version
        dataset_version = sh.get_dataset_version(pctx.no_version)
        # Build mapfile name depending on the --mapfile flag and appropriate tokens
        outfile = get_output_mapfile(outdir=pctx.outdir,
                                     attributes=sh.attributes,
                                     mapfile_name=pctx.mapfile_name,
                                     dataset_id=dataset_id,
                                     dataset_version=dataset_version,
                                     mapfile_drs=pctx.mapfile_drs,
                                     basename=pctx.basename)
        # Dry-run: don't write mapfile to only show their paths
        if pctx.action == 'make':
            # Generate the corresponding mapfile entry/line
            optional_attrs = dict()
            optional_attrs['mod_time'] = sh.mtime
            if not pctx.no_checksum:
                optional_attrs['checksum'] = get_checksum(
                    sh.source, pctx.checksum_type, pctx.checksums_from)
                optional_attrs['checksum_type'] = pctx.checksum_type.upper()
            optional_attrs['dataset_tech_notes'] = pctx.notes_url
            optional_attrs['dataset_tech_notes_title'] = pctx.notes_title
            line = mapfile_entry(dataset_id=dataset_id,
                                 dataset_version=dataset_version,
                                 ffp=source,
                                 size=sh.size,
                                 optional_attrs=optional_attrs)
            write(outfile, line)
            msg = TAGS.SUCCESS
            msg += '{}'.format(os.path.splitext(os.path.basename(outfile))[0])
            msg += ' <-- ' + COLORS.HEADER(source)
            with pctx.lock:
                Print.info(msg)
        # Return mapfile name
        return outfile
    # Catch any exception into error log instead of stop the run
    except KeyboardInterrupt:
        raise
    except Exception:
        exc = traceback.format_exc().splitlines()
        msg = TAGS.SKIP + COLORS.HEADER(source) + '\n'
        msg += '\n'.join(exc)
        with pctx.lock:
            Print.exception(msg, buffer=True)
        return None
    finally:
        with pctx.lock:
            pctx.progress.value += 1
            percentage = int(pctx.progress.value * 100 / pctx.nbsources)
            msg = COLORS.OKBLUE('\rMapfile(s) generation: ')
            msg += '{}% | {}/{} {}'.format(percentage, pctx.progress.value,
                                           pctx.nbsources,
                                           SOURCE_TYPE[pctx.source_type])
            Print.progress(msg)
Ejemplo n.º 6
0
def process(source):
    """
    process(collector_input)

    File process that:

     * Handles files,
     * Deduces facet key, values pairs from file attributes
     * Checks facet values against CV,
     * Applies the versioning
     * Populates the DRS tree crating the appropriate leaves,
     * Stores dataset statistics.


    :param str source: The file full path to process

    """
    # Get process content from process global env
    assert 'pctx' in globals().keys()
    pctx = globals()['pctx']
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(source)
        # Ignore files from incoming
        if fh.filename in pctx.ignore_from_incoming:
            msg = TAGS.SKIP + COLORS.HEADER(source)
            with pctx.lock:
                Print.exception(msg, buffer=True)
            return None
        # Loads attributes from filename, netCDF attributes, command-line
        fh.load_attributes(root=pctx.root,
                           pattern=pctx.pattern,
                           set_values=pctx.set_values)
        # Checks the facet values provided by the loaded attributes
        fh.check_facets(facets=pctx.facets,
                        config=pctx.cfg,
                        set_keys=pctx.set_keys)
        # Get parts of DRS path
        parts = fh.get_drs_parts(pctx.facets)
        # Instantiate file DRS path handler
        fh.drs = DRSPath(parts)
        # Ensure that the called project section is ALWAYS part of the DRS path elements (case insensitive)
        if not fh.drs.path().lower().startswith(pctx.project.lower()):
            raise InconsistentDRSPath(pctx.project, fh.drs.path())
        # Evaluate if processing file already exists in the latest existing dataset version (i.e., "is duplicated")
        # Default: fh.is_duplicate = False
        # 1. If a latest dataset version exists
        if fh.drs.v_latest:
            # Build corresponding latest file path
            latest_file = os.path.join(fh.drs.path(latest=True, root=True),
                                       fh.filename)
            # 2. Test if a file with the same filename exists in latest version
            if os.path.exists(latest_file):
                # Get tracking ID (None if not recorded into the file)
                fh.tracking_id = get_tracking_id(fh.ffp, pctx.project)
                latest_tracking_id = get_tracking_id(latest_file, pctx.project)
                # 3. Test if tracking IDs are different (i.e., keep is_duplicate = False)
                if fh.tracking_id == latest_tracking_id:
                    latest_size = os.stat(latest_file).st_size
                    # 4. Test if file sizes are different (i.e., keep is_duplicate = False)
                    if fh.size == latest_size and not pctx.no_checksum:
                        # Read or compute the checksums
                        fh.checksum = get_checksum(fh.ffp, pctx.checksum_type,
                                                   pctx.checksums_from)
                        latest_checksum = get_checksum(latest_file,
                                                       pctx.checksum_type,
                                                       pctx.checksums_from)
                        # store checksum
                        if fh.checksum == latest_checksum:
                            fh.is_duplicate = True
                        elif fh.tracking_id and latest_tracking_id:
                            # If the checksums are different, the tracking ID must not be identical if exist.
                            # If no tracking IDs keep to is_duplicate = False
                            raise UnchangedTrackingID(latest_file,
                                                      latest_tracking_id,
                                                      fh.ffp, fh.tracking_id)
                    elif fh.tracking_id and latest_tracking_id:
                        # If the sizes are different, the tracking ID must not be identical if exist.
                        # If no tracking IDs keep to is_duplicate = False
                        raise UnchangedTrackingID(latest_file,
                                                  latest_tracking_id, fh.ffp,
                                                  fh.tracking_id)
        msg = TAGS.SUCCESS + 'Processing {}'.format(COLORS.HEADER(fh.ffp))
        Print.info(msg)
        return fh
    except KeyboardInterrupt:
        raise
    except Exception:
        exc = traceback.format_exc().splitlines()
        msg = TAGS.SKIP + COLORS.HEADER(source) + '\n'
        msg += '\n'.join(exc)
        with pctx.lock:
            Print.exception(msg, buffer=True)
        return None
    finally:
        with pctx.lock:
            pctx.progress.value += 1
            percentage = int(pctx.progress.value * 100 / pctx.nbsources)
            msg = COLORS.OKBLUE('\rScanning incoming file(s): ')
            msg += '{}% | {}/{} file(s)'.format(percentage,
                                                pctx.progress.value,
                                                pctx.nbsources)
            Print.progress(msg)
Ejemplo n.º 7
0
def process(collector_input):
    """
    File process that:

     * Handles file,
     * Harvests directory attributes,
     * Check DRS attributes against CV,
     * Builds dataset ID,
     * Retrieves file size,
     * Does checksums,
     * Deduces mapfile name,
     * Writes the corresponding mapfile entry.

    Any error leads to skip the file. It does not stop the process.

    :param tuple collector_input: A tuple with the file path and the processing context
    :returns: The output mapfile full path
    :rtype: *str*

    """
    # Deserialize inputs from collector
    source, ctx = collector_input
    # Block to avoid program stop if a thread fails
    try:
        if ctx.source_type == 'file':
            # Instantiate source handle as file
            sh = File(source)
        else:
            # Instantiate source handler as dataset
            sh = Dataset(source)
        # Matching between directory_format and file full path
        sh.load_attributes(pattern=ctx.pattern)
        # Deduce dataset_id
        dataset_id = ctx.dataset_name
        if not ctx.dataset_name:
            sh.check_facets(facets=ctx.facets, config=ctx.cfg)
            dataset_id = sh.get_dataset_id(ctx.cfg.get('dataset_id', raw=True))
        # Ensure that the first facet is ALWAYS the same as the called project section (case insensitive)
        assert dataset_id.lower().startswith(ctx.project.lower()), 'Inconsistent dataset identifier. ' \
                                                                   'Must start with "{}/" ' \
                                                                   '(case-insensitive)'.format(ctx.project)
        # Deduce dataset_version
        dataset_version = sh.get_dataset_version(ctx.no_version)
        # Build mapfile name depending on the --mapfile flag and appropriate tokens
        outfile = get_output_mapfile(outdir=ctx.outdir,
                                     attributes=sh.attributes,
                                     mapfile_name=ctx.mapfile_name,
                                     dataset_id=dataset_id,
                                     dataset_version=dataset_version,
                                     mapfile_drs=ctx.mapfile_drs,
                                     basename=ctx.basename)
        # Dry-run: don't write mapfile to only show their paths
        if ctx.action == 'make':
            # Generate the corresponding mapfile entry/line
            optional_attrs = dict()
            optional_attrs['mod_time'] = sh.mtime
            if not ctx.no_checksum:
                optional_attrs['checksum'] = sh.checksum(ctx.checksum_type)
                optional_attrs['checksum_type'] = ctx.checksum_type.upper()
            optional_attrs['dataset_tech_notes'] = ctx.notes_url
            optional_attrs['dataset_tech_notes_title'] = ctx.notes_title
            line = mapfile_entry(dataset_id=dataset_id,
                                 dataset_version=dataset_version,
                                 ffp=source,
                                 size=sh.size,
                                 optional_attrs=optional_attrs)
            write(outfile, line)
            logging.info('{} <-- {}'.format(
                os.path.splitext(os.path.basename(outfile))[0], source))
        # Return mapfile name
        return outfile
    # Catch any exception into error log instead of stop the run
    except Exception as e:
        logging.error('{} skipped\n{}: {}'.format(source, e.__class__.__name__,
                                                  e.message))
        return None
    finally:
        if ctx.pbar:
            ctx.pbar.update()
Ejemplo n.º 8
0
def process(collector_input):
    """
    File process that:

     * Handles file,
     * Harvests directory attributes,
     * Check DRS attributes against CV,
     * Builds dataset ID,
     * Retrieves file size,
     * Does checksums,
     * Deduces mapfile name,
     * Writes the corresponding mapfile entry.

    Any error leads to skip the file. It does not stop the process.

    :param tuple collector_input: A tuple with the file path and the processing context
    :returns: The output mapfile full path
    :rtype: *str*

    """
    # Deserialize inputs from collector
    ffp, ctx = collector_input
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(ffp)
        # Matching between directory_format and file full path
        fh.load_attributes(pattern=ctx.pattern)
        # Apply proper case to each attribute
        for key in fh.attributes:
            # Try to get the appropriate facet case for "category_default"
            try:
                fh.attributes[key] = ctx.cfg.get_options_from_pairs('category_defaults', key)
            except NoConfigKey:
                # If not specified keep facet case from local path, do nothing
                pass
        # Deduce dataset_id
        dataset_id = ctx.dataset
        if not ctx.dataset:
            fh.check_facets(facets=ctx.facets,
                            config=ctx.cfg)
            dataset_id = fh.get_dataset_id(ctx.cfg.get('dataset_id', raw=True))
        # Deduce dataset_version
        dataset_version = fh.get_dataset_version(ctx.no_version)
        # Build mapfile name depending on the --mapfile flag and appropriate tokens
        outfile = get_output_mapfile(outdir=ctx.outdir,
                                     attributes=fh.attributes,
                                     mapfile_name=ctx.mapfile_name,
                                     dataset_id=dataset_id,
                                     dataset_version=dataset_version,
                                     mapfile_drs=ctx.mapfile_drs)
        # Generate the corresponding mapfile entry/line
        optional_attrs = dict()
        optional_attrs['mod_time'] = fh.mtime
        if not ctx.no_checksum:
            optional_attrs['checksum'] = fh.checksum(ctx.checksum_type)
            optional_attrs['checksum_type'] = ctx.checksum_type.upper()
        optional_attrs['dataset_tech_notes'] = ctx.notes_url
        optional_attrs['dataset_tech_notes_title'] = ctx.notes_title
        line = mapfile_entry(dataset_id=dataset_id,
                             dataset_version=dataset_version,
                             ffp=ffp,
                             size=fh.size,
                             optional_attrs=optional_attrs)
        write(outfile, line)
        logging.info('{} <-- {}'.format(os.path.splitext(os.path.basename(outfile))[0], ffp))
        # Return mapfile name
        return outfile
    # Catch any exception into error log instead of stop the run
    except Exception as e:
        logging.error('{} skipped\n{}: {}'.format(ffp, e.__class__.__name__, e.message))
        return None
    finally:
        ctx.pbar.update()
Ejemplo n.º 9
0
def process(collector_input):
    """
    process(collector_input)

    File process that:

     * Handles files,
     * Deduces facet key, values pairs from file attributes
     * Checks facet values against CV,
     * Applies the versioning
     * Populates the DRS tree crating the appropriate leaves,
     * Stores dataset statistics.

    :param tuple collector_input: A tuple with the file path and the processing context
    :return: True on success
    :rtype: *boolean*

    """
    # Deserialize inputs from collector
    ffp, ctx = collector_input
    # Block to avoid program stop if a thread fails
    try:
        # Instantiate file handler
        fh = File(ffp)
        # Loads attributes from filename, netCDF attributes, command-line
        fh.load_attributes(root=ctx.root,
                           pattern=ctx.pattern,
                           set_values=ctx.set_values)
        # Apply proper case to each attribute
        for key in fh.attributes:
            # Try to get the appropriate facet case for "category_default"
            try:
                fh.attributes[key] = ctx.cfg.get_options_from_pairs('category_defaults', key)
            except NoConfigKey:
                # If not specified keep facet case from local path, do nothing
                pass
        fh.check_facets(facets=ctx.facets,
                        config=ctx.cfg,
                        set_keys=ctx.set_keys)
        # Get parts of DRS path
        parts = fh.get_drs_parts(ctx.facets)
        # Instantiate file DRS path handler
        fph = DRSPath(parts)
        # If a latest version already exists make some checks FIRST to stop files to not process
        if fph.v_latest:
            # Latest version should be older than upgrade version
            if int(DRSPath.TREE_VERSION[1:]) <= int(fph.v_latest[1:]):
                raise OlderUpgrade(DRSPath.TREE_VERSION, fph.v_latest)
            # Walk through the latest dataset version to check its uniqueness with file checksums
            dset_nid = fph.path(f_part=False, latest=True, root=True)
            if dset_nid not in ctx.tree.hash.keys():
                ctx.tree.hash[dset_nid] = dict()
                ctx.tree.hash[dset_nid]['latest'] = dict()
                for root, _, filenames in os.walk(fph.path(f_part=False, latest=True, root=True)):
                    for filename in filenames:
                        ctx.tree.hash[dset_nid]['latest'][filename] = checksum(os.path.join(root, filename),
                                                                               ctx.checksum_type)
            # Pickup the latest file version
            latest_file = os.path.join(fph.path(latest=True, root=True), fh.filename)
            # Check latest file if exists
            if os.path.exists(latest_file):
                latest_checksum = checksum(latest_file, ctx.checksum_type)
                current_checksum = checksum(fh.ffp, ctx.checksum_type)
                # Check if processed file is a duplicate in comparison with latest version
                if latest_checksum == current_checksum:
                    fh.is_duplicate = True
        # Start the tree generation
        if not fh.is_duplicate:
            # Add the processed file to the "vYYYYMMDD" node
            src = ['..'] * len(fph.items(d_part=False))
            src.extend(fph.items(d_part=False, file_folder=True))
            src.append(fh.filename)
            ctx.tree.create_leaf(nodes=fph.items(root=True),
                                 leaf=fh.filename,
                                 label='{}{}{}'.format(fh.filename, LINK_SEPARATOR, os.path.join(*src)),
                                 src=os.path.join(*src),
                                 mode='symlink',
                                 origin=fh.ffp)
            # Add the "latest" node for symlink
            ctx.tree.create_leaf(nodes=fph.items(f_part=False, version=False, root=True),
                                 leaf='latest',
                                 label='{}{}{}'.format('latest', LINK_SEPARATOR, fph.v_upgrade),
                                 src=fph.v_upgrade,
                                 mode='symlink')
            # Add the processed file to the "files" node
            ctx.tree.create_leaf(nodes=fph.items(file_folder=True, root=True),
                                 leaf=fh.filename,
                                 label=fh.filename,
                                 src=fh.ffp,
                                 mode=ctx.mode)
            if ctx.upgrade_from_latest:
                # Walk through the latest dataset version and create a simlink for each file with a different
                # filename than the processed one
                for root, _, filenames in os.walk(fph.path(f_part=False, latest=True, root=True)):
                    for filename in filenames:
                        # Add latest files as tree leaves with version to upgrade instead of latest version
                        # i.e., copy latest dataset leaves to Tree
                        if filename != fh.filename:
                            src = os.path.join(root, filename)
                            ctx.tree.create_leaf(nodes=fph.items(root=True),
                                                 leaf=filename,
                                                 label='{}{}{}'.format(filename, LINK_SEPARATOR, os.readlink(src)),
                                                 src=os.readlink(src),
                                                 mode='symlink',
                                                 origin=os.path.realpath(src))
        else:
            # Pickup the latest file version
            latest_file = os.path.join(fph.path(latest=True, root=True), fh.filename)
            if ctx.upgrade_from_latest:
                # If upgrade from latest is activated, raise the error, no duplicated files allowed
                # Because incoming must only contain modifed/corrected files
                raise DuplicatedFile(latest_file, fh.ffp)
            else:
                # If default behavior, the incoming contains all data for a new version
                # In the case of a duplicated file, just pass to the expected symlink creation
                # and records duplicated file for further removal only if migration mode is the
                # default (i.e., moving files). In the case of --copy or --link, keep duplicates
                # in place into the incoming directory
                src = os.readlink(latest_file)
                ctx.tree.create_leaf(nodes=fph.items(root=True),
                                     leaf=fh.filename,
                                     label='{}{}{}'.format(fh.filename, LINK_SEPARATOR, src),
                                     src=src,
                                     mode='symlink',
                                     origin=fh.ffp)
                if ctx.mode == 'move':
                    ctx.tree.duplicates.append(fh.ffp)
        # Record entry for list()
        incoming = {'src': fh.ffp,
                    'dst': fph.path(root=True),
                    'filename': fh.filename,
                    'latest': fph.v_latest or 'Initial',
                    'size': fh.size}
        if fph.path(f_part=False) in ctx.tree.paths.keys():
            ctx.tree.paths[fph.path(f_part=False)].append(incoming)
        else:
            ctx.tree.paths[fph.path(f_part=False)] = [incoming]
        logging.info('{} <-- {}'.format(fph.path(f_part=False), fh.filename))
        return True
    except Exception as e:
        logging.error('{} skipped\n{}: {}'.format(ffp, e.__class__.__name__, e.message))
        return None
    finally:
        ctx.pbar.update()