Ejemplo n.º 1
0
    def limit_col_default_range_to_cols(cls, values: Dict) -> Dict:
        spec_type = values.get('spec_type')
        col_default_range = values.get('col_default_range')

        if col_default_range and spec_type not in ('incl_col', 'excl_col'):
            comm.abort(f'Error: col_default_range set for a record spec')
        return values
Ejemplo n.º 2
0
    def load_from_file(self, file_name: str, dialect):

        if file_name == '-':
            comm.abort(f'Invalid file_name for Header: {file_name}')

        reader = csv.reader(open(file_name, newline=''), dialect=dialect)
        for field_names in reader:
            break
        else:
            raise EOFError

        for field_sub, raw_field_name in enumerate(field_names):
            if dialect.has_header:
                field_name = self.format_raw_header_field_name(raw_field_name)
            else:
                raw_field_name = field_name = f'field_{field_sub}'

            field_name = self._make_field_name_unique(field_name, field_names)

            self.field_names.append(field_name)
            self.fields_by_position[field_sub] = field_name
            self.fields_by_name[field_name] = field_sub

            self.raw_field_names.append(raw_field_name)
            self.raw_fields_by_position[field_sub] = raw_field_name
            self.raw_fields_by_name[raw_field_name] = field_sub
Ejemplo n.º 3
0
 def _process_unknown_args(self, unknown_args: list) -> None:
     for arg in unknown_args:
         if arg in self.obsolete_options.keys():
             comm.abort('Error: obsolete option',
                        self.obsolete_options[arg])
         else:
             comm.abort(f'ERROR: Unknown option: {arg}')
Ejemplo n.º 4
0
    def _consolidate_configs(self, file_args: CONFIG_TYPE,
                             env_args: CONFIG_TYPE,
                             cli_args: CONFIG_TYPE) -> CONFIG_TYPE:
        """ Consolidates environmental and cli arguments.

        First all _app_metadata keys are added,
        Then values from matching environmental variable keys are overlaid,
        Finally values from matching cli arg keys are overlaid,
        """
        consolidated_args: Dict[str, Any] = {}

        for key in self._app_metadata:
            actual_key = self._app_metadata[key].get('dest', key)
            consolidated_args[actual_key] = None

        try:
            for key, val in file_args.items():
                actual_key = self._app_metadata[key].get('dest', key)
                consolidated_args[actual_key] = val
        except KeyError as e:
            if key in self.obsolete_options.keys():
                comm.abort('Error: obsolete option',
                           self.obsolete_options[key])
            else:
                comm.abort(f'ERROR: Unknown option: {key}')

        for key, val in env_args.items():
            actual_key = self._app_metadata[key].get('dest', key)
            consolidated_args[actual_key] = val

        for key, val in cli_args.items():
            if val is not None and val != []:
                consolidated_args[key] = val

        return consolidated_args
Ejemplo n.º 5
0
    def __init__(self, config_fn: str) -> None:

        if not isfile(config_fn):
            comm.abort(f'Error: config file not found!',
                       f'for config-fn: {config_fn}')
        self.config_fn = config_fn
        self.file_gristle_app_args = self._get_args()
Ejemplo n.º 6
0
    def _transform_key_field(self, header: Optional[csvhelper.Header],
                             key_field: str) -> str:
        """key_field looks like:
               - [field][~][type][~][direction]
               - ex: 0sf
               - ex: 3ir
               - ex: home_statesf
               - ex: home_state~s~f
               - ex: 0~s~f
        """
        if key_field.count('~') == 2:
            field, keytype, direction = key_field.split('~')
        else:
            direction = key_field[-1]
            keytype = key_field[-2]
            field = key_field[:-2]

        if comm.isnumeric(field):
            field_offset = field
        else:
            try:
                field_offset = str(header.get_field_position(field))
            except KeyError:
                comm.abort(
                    'Error: field name not found in header',
                    f'field name: {field}        '
                    f'header fields: {header.field_names}')

        new_key_field = f'{field_offset}{keytype}{direction}'
        return new_key_field
Ejemplo n.º 7
0
def default_dialect(dialect: Dialect, delimiter: Optional[str],
                    quoting: Optional[str], quotechar: Optional[str],
                    has_header: Optional[bool], doublequote: Optional[bool],
                    escapechar: Optional[str],
                    skipinitialspace: Optional[bool]) -> Dialect:
    """ defaults the dialect with any non-None values from other args
    """
    assert isinstance(quoting, (str, type(None)))

    if dialect.delimiter is None:
        dialect.delimiter = delimiter
    if dialect.quoting is None:
        dialect.quoting = get_quote_number(quoting)
    if dialect.quotechar is None:
        dialect.quotechar = quotechar
    if dialect.has_header is None:
        dialect.has_header = has_header
    if dialect.skipinitialspace is None:
        dialect.skipinitialspace = skipinitialspace

    # Make sure we only have either doublequoting or escapechar turned on
    if dialect.doublequote is None:
        dialect.doublequote = doublequote
    if dialect.escapechar is None:
        dialect.escapechar = escapechar
    if dialect.doublequote and dialect.escapechar:
        comm.abort('Error: cannot have both doublequoting and escapechar')

    dialect.lineterminator = '\n'

    return dialect
Ejemplo n.º 8
0
    def limit_rec_default_range_to_recs(cls, values: Dict) -> Dict:
        spec_type = values.get('spec_type')
        rec_default_range = values.get('rec_default_range')

        if rec_default_range and spec_type not in ('incl_rec', 'excl_rec'):
            comm.abort(f'Error: rec_default_range set for a column spec')
        return values
Ejemplo n.º 9
0
 def fix_types(record, schema):
     new_record = []
     for i, field in enumerate(record):
         try:
             schema_field_type = schema['items'][i].get('type')
         except IndexError:
             comm.abort(
                 'Error: schema does not have entries for all fields',
                 f'field number {i} not found in schema')
         if schema_field_type == 'integer':
             try:
                 new_field = int(field)
             except ValueError:
                 new_field = field
         elif schema_field_type == 'number':
             try:
                 new_field = float(field)
             except ValueError:
                 new_field = field
         else:
             try:
                 new_field = field
             except ValueError:
                 new_field = field
         new_record.append(new_field)
     return new_record
Ejemplo n.º 10
0
 def _read_old_csv(self) -> None:
     """ Reads next rec from new file into self.old_rec
     Args:    None
     Returns: Nothing
     Notes:
         - Confirms sort order of file
         - Will assign None to self.old_rec at eof
     """
     try:
         last_rec = self.old_rec
         self.old_rec = self.old_csv.__next__()
         if last_rec is None:  # first read priming
             last_rec = self.old_rec
         if len(last_rec) != len(self.old_rec):
             abort('old file has inconsistent number of fields',
                   f'old_rec = {self.new_rec}')
         for key in self.join_fields:
             if self.old_rec[key] > last_rec[key]:
                 self.old_read_cnt += 1
                 break  # good
             if self.old_rec[key] < last_rec[key]:
                 abort(
                     'ERROR: old file is not sorted correctly',
                     f'This refers to file {self.old_fqfn}, and key: {key}, and record: {self.old_rec} and last rec: {last_rec}'
                 )
     except StopIteration:
         self.old_rec = None
Ejemplo n.º 11
0
    def assign(self,
               outtype: str,
               outrec: RecordType,
               old_rec: RecordType,
               new_rec: RecordType) -> RecordType:
        """ Apply all assignment for a single rec.

        Args:
            outtype - one of insert, delete, same, chgold, chgnew
            outrec  - list of output record
            old_rec - list of old input record
            new_rec - list of new input record
        Returns:
            outrec - new version with assigned field
        Raises:
            sys.exit - if assignment fails
        """
        self.old_rec = old_rec
        self.new_rec = new_rec
        if outtype in self.assignments:
            for dest_field in self.assignments[outtype]:
                assigner = self.assignments[outtype][dest_field]
                try:
                    if assigner['src_type'] == 'literal':
                        outrec[dest_field] = assigner['src_val']
                    elif assigner['src_type'] == 'copy':
                        outrec[dest_field] = self._get_copy_value(assigner['src_file'],
                                                                  assigner['src_field'])
                    elif assigner['src_type'] == 'sequence':
                        outrec[dest_field] = self._get_seq_value(assigner['src_field'])
                    elif assigner['src_type'] == 'special':
                        outrec[dest_field] = self._get_special_value(assigner['src_val'])
                except ValueError as err:
                    abort(err)
        return outrec
Ejemplo n.º 12
0
    def assign(self, outtype: str, outrec: RecordType, old_rec: RecordType,
               new_rec: RecordType) -> RecordType:
        """ Apply all assignment for a single rec.

        Args:
            outtype - one of insert, delete, same, chgold, chgnew
            outrec  - list of output record
            old_rec - list of old input record
            new_rec - list of new input record
        Returns:
            outrec - new version with assigned field
        Raises:
            sys.exit - if assignment fails
        """
        self.old_rec = old_rec
        self.new_rec = new_rec
        if outtype in self.assignments:
            for dest_field in self.assignments[outtype]:
                assigner = self.assignments[outtype][dest_field]
                try:
                    if assigner['src_type'] == 'literal':
                        outrec[dest_field] = assigner['src_val']
                    elif assigner['src_type'] == 'copy':
                        outrec[dest_field] = self._get_copy_value(
                            assigner['src_file'], assigner['src_field'])
                    elif assigner['src_type'] == 'sequence':
                        outrec[dest_field] = self._get_seq_value(
                            assigner['src_field'])
                    elif assigner['src_type'] == 'special':
                        outrec[dest_field] = self._get_special_value(
                            assigner['src_val'])
                except (ValueError, IndexError) as err:
                    msg = f'assignments={assigner}, dest_field={dest_field}, outtype={outtype}'
                    abort(repr(err), msg, verbosity='debug')
        return outrec
Ejemplo n.º 13
0
    def set_sequence_starts(self, dialect: csvhelper.Dialect,
                            old_fqfn: str) -> None:
        """ Sets all sequences to their starting values.

        Args:
            dialect: csv dialect of input files
            old_fqfn: fully-qualified old file name
        Returns:
            None
        Raises:
            sys.exit: if invalid values found in csv sequence field
        """
        for key in self.seq:
            if self.seq[key]['start_val'] is None:
                break
        else:
            return  # all sequences already have a starting val

        old_rec_cnt = 0
        with open(old_fqfn, 'rt') as infile:
            reader = csv.reader(infile, dialect)
            for rec in reader:
                old_rec_cnt += 1
                for src_field in self.seq:
                    if self.seq[src_field]['last_val'] is not None:
                        continue  # set already by config
                    elif rec[src_field].strip() == '':
                        continue  # old file lacks good sequence val in this rec
                    try:
                        new_val = int(rec[src_field])
                    except ValueError:
                        abort('Non-integer value within sequence field: %s' %
                              rec[src_field])
                    if old_rec_cnt == 1:
                        self.seq[src_field]['start_val'] = new_val
                    elif new_val > self.seq[src_field]['start_val']:
                        self.seq[src_field]['start_val'] = new_val

        # for any sequences set by the loop through the file, now we
        # can set their last_val:
        for src_field in self.seq:
            if (self.seq[src_field]['last_val'] is None
                    and self.seq[src_field]['start_val'] is not None):
                self.seq[src_field]['last_val'] = self.seq[src_field][
                    'start_val']

        # if any sequences are stil None - it's because of an empty old file
        # or no valid starting sequences found in old file.
        # Set empty file to 0 otherwise abort.
        for src_field in self.seq:
            if (self.seq[src_field]['last_val'] is None
                    or self.seq[src_field]['start_val'] is None):
                if old_rec_cnt == 0:
                    self.seq[src_field]['last_val'] = 0
                    self.seq[src_field]['start_val'] = 0
                else:
                    abort(
                        'Logic Error: no starting sequence found in old file')
Ejemplo n.º 14
0
 def limit_steps_to_inclusions(cls, values: Dict) -> Dict:
     spec_type = values.get('spec_type')
     step = values.get('step')
     if spec_type not in ('incl_rec', 'incl_col'):
         if step != 1.0:
             comm.abort(
                 f'Error: exclusion spec is not allowed to have steps: {step}'
             )
     return values
Ejemplo n.º 15
0
    def set_sequence_starts(self,
                            dialect: csvhelper.Dialect,
                            old_fqfn: str) -> None:
        """ Sets all sequences to their starting values.

        Args:
            dialect: csv dialect of input files
            old_fqfn: fully-qualified old file name
        Returns:
            None
        Raises:
            sys.exit: if invalid values found in csv sequence field
        """
        for key in self.seq:
            if self.seq[key]['start_val'] is None:
                break
        else:
            return # all sequences already have a starting val

        old_rec_cnt = 0
        with open(old_fqfn, 'rt') as infile:
            reader = csv.reader(infile, dialect)
            for rec in reader:
                old_rec_cnt += 1
                for src_field in self.seq:
                    if self.seq[src_field]['last_val'] is not None:
                        continue # set already by config
                    elif rec[src_field].strip() == '':
                        continue # old file lacks good sequence val in this rec
                    try:
                        new_val = int(rec[src_field])
                    except ValueError:
                        abort('Non-integer value within sequence field: %s' % rec[src_field])
                    if old_rec_cnt == 1:
                        self.seq[src_field]['start_val'] = new_val
                    elif new_val > self.seq[src_field]['start_val']:
                        self.seq[src_field]['start_val'] = new_val

        # for any sequences set by the loop through the file, now we
        # can set their last_val:
        for src_field in self.seq:
            if (self.seq[src_field]['last_val'] is None
                    and self.seq[src_field]['start_val'] is not None):
                self.seq[src_field]['last_val'] = self.seq[src_field]['start_val']

        # if any sequences are stil None - it's because of an empty old file
        # or no valid starting sequences found in old file.
        # Set empty file to 0 otherwise abort.
        for src_field in self.seq:
            if (self.seq[src_field]['last_val'] is None
                    or self.seq[src_field]['start_val'] is None):
                if old_rec_cnt == 0:
                    self.seq[src_field]['last_val'] = 0
                    self.seq[src_field]['start_val'] = 0
                else:
                    abort('Logic Error: no starting sequence found in old file')
Ejemplo n.º 16
0
    def _get_sort_values(self,
                         key_fields: List[Any],
                         rec: List[Union[str, int, float]],
                         primary_order: str) -> List[Any]:

        try:
            sort_values = [transform(rec[key_field.position], key_field, primary_order) for key_field in key_fields]
        except IndexError:
            comm.abort('Error: key references columns that does not exist in record', f'{rec=}')
        return sort_values
Ejemplo n.º 17
0
 def find_schema_file_on_path(schema_file):
     if exists(schema_file):
         return schema_file
     elif basename(schema_file) == schema_file:
         for schema_dir in schema_path:
             temp_schema_file = pjoin(schema_dir, schema_file)
             if exists(temp_schema_file):
                 return temp_schema_file
         else:
             comm.abort('Error: schema file not found',
                        f'File not found: {schema_file}')
Ejemplo n.º 18
0
 def _add_file_args():
     for key, val in file_args.items():
         try:
             actual_key = self._app_metadata[key].get('dest', key)
             actual_val = _get_actual_value(key, val)
             consolidated_args[actual_key] = actual_val
         except KeyError:
             if key in self.obsolete_options.keys():
                 comm.abort('Error: obsolete option', self.obsolete_options[key])
             else:
                 comm.abort(f'ERROR: Unknown option: {key}')
     return consolidated_args
Ejemplo n.º 19
0
 def _make_field_name_unique(self, starting_field_name: str,
                             field_names: List[str]) -> str:
     temp_field_name = starting_field_name
     for count in range(999):
         if temp_field_name in self.field_names:
             temp_field_name = starting_field_name + f'__{count}'
         else:
             break
     else:
         comm.abort(
             'Error: cannot create unique header field name - 999 attempts failed'
         )
     return temp_field_name
Ejemplo n.º 20
0
    def validate_schema():
        """ Validates entire schema - with a few high-level checks, and by
            running _validate_field_checks for each field validation set.
        """
        if 'items' not in schema:
            comm.abort("Error: invalid schema, missing 'items' key")
        if len(schema.keys()) != 1:
            comm.abort(
                "Error: invalid schema, incorrect number of 'items' keys")

        for field_checks in schema['items']:
            for v_key in field_checks.keys():
                validate_field_checks(v_key, field_checks[v_key])
Ejemplo n.º 21
0
 def _get_special_value(self, src_val: str) -> str:
     """ Get special variable value.
     Args:
         src_val - name of special variable
     Returns:
         value - value associated with variable name
     Raises:
         aborts if src_val not found
     """
     try:
         return self.special_values[src_val]
     except KeyError:
         abort(f'Invalid special value in assignment: {src_val}',
               json.dumps(self.special_values))
Ejemplo n.º 22
0
 def _get_special_value(self, src_val: str) -> str:
     """ Get special variable value.
     Args:
         src_val - name of special variable
     Returns:
         value - value associated with variable name
     Raises:
         aborts if src_val not found
     """
     try:
         return self.special_values[src_val]
     except KeyError:
         pp(self.special_values)
         abort('Invalid special value referenced in assignment: %s' % src_val)
Ejemplo n.º 23
0
 def transform_name(self, val: Optional[str]) -> Optional[str]:
     if val is None:
         return None
     if comm.isnumeric(val):
         return val
     if self.header is None:
         raise UnidentifiableNonNumericSpec(
             f'Do not know how to interpret: {val}')
     try:
         position = str(self.header.get_field_position(val.strip()))
     except KeyError:
         comm.abort(f'Error: Invalid string in spec: {val}',
                    f'Not in header list: {self.header.field_names}')
     return position
Ejemplo n.º 24
0
 def start_stop_relationship(cls, values: Dict) -> Dict:
     start = values['start']
     stop = values['stop']
     step = values['step']
     if step > 0:
         if start > stop:
             comm.abort(f'spec has start ({start}) after stop ({stop})')
     if step < 0:
         if start < stop:
             comm.abort(
                 f'negative spec has start ({start}) before stop ({stop})',
                 'negative specs require the start (start:stop:step) to be AFTER the stop'
             )
     return values
Ejemplo n.º 25
0
def binary_arg_fixer(app_metadata,
                     args):
    """ Returns a copy of the args in which:
        - keys are replaced by any destination key names if they exist
        - and store_const is used rather than value for bools

        The reason for this function is that unlike cli args, envvars
        and config files don't have flags whose mere existance indicates
        True or False: instead config files have bools which can be set
        either way and envvars just have strings - which might have a True
        or False, 1 or 0.  This code helps treat the envvars & config files
        like cli args.
    """

    def get_bool_actual_value(key,
                              orig_config_val):
        assert app_metadata[key]['type'] is bool
        assert app_metadata[key]['action'] == 'store_const'
        assert app_metadata[key]['const'] in (True, False)

        if type(orig_config_val) is bool:
            transformed_config_val = orig_config_val
        elif orig_config_val is None or orig_config_val.strip().lower() in ('none', 'null'):
            comm.abort(f'Config item {key} has a non-true value of {orig_config_val}',
                       'This is a flag type whose value is established by pgm metadata '
                       'and when provided via envvar or config file must always be set to true')
        else:
            transformed_config_val = orig_config_val.strip().lower() in ('true', 't', '1', '')

        if not transformed_config_val:
            comm.abort(f'Config item {key} has a non-true value of {orig_config_val}',
                       'This is a flag type whose value is established by pgm metadata'
                       'and when provided via envvar or config file must always be set to true')

        return app_metadata[key]['const']

    cleaned_args = {}
    for orig_key, val in args.items():
        try:
            if app_metadata[orig_key]['type'] != bool:
                cleaned_args[orig_key] = val
            else:
                actual_key = app_metadata[orig_key].get('dest', orig_key)
                actual_val = get_bool_actual_value(orig_key, val)
                cleaned_args[actual_key] = actual_val
        except KeyError:
            comm.abort(f'Error: option {orig_key} is unknown')

    return cleaned_args
Ejemplo n.º 26
0
    def phase1__item_parsing(
            self, item: str) -> Tuple[Optional[str], Optional[str], str, bool]:
        """ Split a specification item string into separate parts
        """
        parts = item.split(':')
        if len(parts) > 3:
            comm.abort(f'Error: spec item has too many parts: {item}')

        is_range = True if len(parts) > 1 else False

        start = parts[0]
        stop = parts[1] if len(parts) > 1 else None
        step = parts[2] if len(parts) > 2 else '1'

        return start, stop, step, is_range
Ejemplo n.º 27
0
 def _get_special_value(self, src_val: str) -> str:
     """ Get special variable value.
     Args:
         src_val - name of special variable
     Returns:
         value - value associated with variable name
     Raises:
         aborts if src_val not found
     """
     try:
         return self.special_values[src_val]
     except KeyError:
         pp(self.special_values)
         abort('Invalid special value referenced in assignment: %s' %
               src_val)
Ejemplo n.º 28
0
    def must_process_in_memory(self) -> bool:

        if ((self.incl_rec_slicer.includes_out_of_order
             and self.nconfig.any_order is False)
                or self.incl_rec_slicer.includes_repeats
                or self.incl_rec_slicer.includes_reverse):

            if self.rec_index.is_valid:
                return True
            elif self.is_optimized_for_all_recs():
                return True
            else:
                comm.abort(
                    'Error: There are out of order, reverse, or repeating rec specs but cannot fit into memory!',
                    verbosity='debug')

        return False
Ejemplo n.º 29
0
def config_validation_detailed(schema):
    """ Run a very detailed verification of the confg.

    We don't depend on this one alone since the resulting error messages
    are very difficult to read.
    """

    jsonschema.validate(instance=None, schema=schema)
    try:
        jsonschema.validate(instance=None, schema=schema)
    except jsonschema.exceptions.SchemaError as err:
        msgs = [
            'See jsonschema docs for help: https://json-schema.org/understanding-json-schema/'
        ]
        for msg in err.context:
            msgs.append(repr(msg))
        msg_str = '\n'.join(msgs)
        comm.abort('Error: schema is invalid - failed jsonschema validation',
                   msg_str)
Ejemplo n.º 30
0
    def generate_csv_dialect_config(self,
                                    override_filename: Optional[str]=None):
        """ Adds the csv dialect to the config.

            Added by calling programs within the extend_config method.
        """
        if override_filename:
            filenames = [override_filename]
        else:
            filenames = self.config['infiles']
        md = self._app_metadata
        try:
            autodetected = csvhelper.get_dialect(filenames,
                                                 verbosity=self.config['verbosity'])
        except FileNotFoundError:
            comm.abort('Error: File not found when generating csv dialect config',
                       f"One of these files was not found: {','.join(self.config['infiles'])}")

        # First override auto-detected dialect with any explicit options
        overridden = csvhelper.override_dialect(autodetected,
                                                delimiter=self.config['delimiter'],
                                                quoting=self.config['quoting'],
                                                quotechar=self.config['quotechar'],
                                                has_header=self.config['has_header'],
                                                doublequote=self.config['doublequote'],
                                                escapechar=self.config['escapechar'],
                                                skipinitialspace=self.config['skipinitialspace'])

        # Finally we can apply any defaults needed - using the extended-defaults, which exist
        # because regular defaults would have been automatically applied.
        defaulted = csvhelper.default_dialect(overridden,
                                              delimiter=md['delimiter']['extended_default'],
                                              quoting=md['quoting']['extended_default'],
                                              has_header=md['has_header']['extended_default'],
                                              quotechar=md['quotechar']['extended_default'],
                                              escapechar=md['escapechar']['extended_default'],
                                              doublequote=md['doublequote']['extended_default'],
                                              skipinitialspace=md['skipinitialspace']['extended_default'])

        assert csvhelper.is_valid_dialect(defaulted)

        self.update_config('dialect', defaulted)
Ejemplo n.º 31
0
    def get_bool_actual_value(key,
                              orig_config_val):
        assert app_metadata[key]['type'] is bool
        assert app_metadata[key]['action'] == 'store_const'
        assert app_metadata[key]['const'] in (True, False)

        if type(orig_config_val) is bool:
            transformed_config_val = orig_config_val
        elif orig_config_val is None or orig_config_val.strip().lower() in ('none', 'null'):
            comm.abort(f'Config item {key} has a non-true value of {orig_config_val}',
                       'This is a flag type whose value is established by pgm metadata '
                       'and when provided via envvar or config file must always be set to true')
        else:
            transformed_config_val = orig_config_val.strip().lower() in ('true', 't', '1', '')

        if not transformed_config_val:
            comm.abort(f'Config item {key} has a non-true value of {orig_config_val}',
                       'This is a flag type whose value is established by pgm metadata'
                       'and when provided via envvar or config file must always be set to true')

        return app_metadata[key]['const']
Ejemplo n.º 32
0
    def setup_stage2(self) -> None:
        try:
            self._setup_specs()
        except (slicer.NegativeOffsetWithoutItemCountError,
                slicer.NegativeStepWithoutItemCountError) as err:
            if self.are_infiles_from_stdin():
                self._write_stdin_to_file()
                self.nconfig, _ = self.config_manager.get_config(self.temp_fn)
            self._setup_counts()
            try:
                self._setup_specs()
            except (slicer.NegativeOffsetWithoutItemCountError,
                    slicer.NegativeStepWithoutItemCountError):
                comm.abort(
                    'Error: unable to count rows in file to resolve config references!',
                    f'Record count: {self.rec_cnt}, Column count: {self.col_cnt}',
                    verbosity='debug')

        self._setup_slicers()
        self.rec_index = RecIndexOptimization(self.incl_rec_slicer,
                                              self.excl_rec_slicer,
                                              self.nconfig.verbosity)
        self.col_index = ColIndexOptimization(self.incl_col_slicer,
                                              self.excl_col_slicer,
                                              self.is_optimized_for_all_cols(),
                                              self.nconfig.verbosity)

        self._pp(' ')
        self._pp('Stage2 Optimizations: ')
        self._pp(
            f'    is_optimized_for_all_recs: {self.is_optimized_for_all_recs()}'
        )
        self._pp(f'    is_optimized_with_rec_index: {self.rec_index.is_valid}')
        self._pp(
            f'    rec_index_optimization_stop_rec: {self.rec_index.stop_rec}')

        self._pp(
            f'    is_optimized_for_all_cols: {self.is_optimized_for_all_cols()}'
        )
        self._pp(f'    is_optimized_with_col_index: {self.col_index.is_valid}')
Ejemplo n.º 33
0
    def specs_cleaner(self) -> List[SpecRecord]:
        """ Returns a transformed version of the specs

        Returns:
            final_specs: List[SpecRecord]
            for specs that are empty   (ex: '') returns: []
            for specs that are default (ex: ':') returns: [SpecRecord]
        """

        if len(self.specs_strings) == 1:
            if self.specs_strings[0].strip() == '':
                self.specs_strings[0] = ':'

        final_specs = []
        for item in self.specs_strings:

            try:
                start, stop, step, is_range = self.phase1__item_parsing(item)

                int_start, int_stop, float_step = self.phase2__translate_item_parts(
                    start, stop, step, is_range)

                (int_start, int_stop, float_step, col_default_range,
                 rec_default_range) = self.phase3__resolve_deps(
                     int_start, int_stop, float_step, is_range)
                try:
                    final_rec = SpecRecord(start=int_start,
                                           stop=int_stop,
                                           step=float_step,
                                           spec_type=self.spec_type,
                                           col_default_range=col_default_range,
                                           rec_default_range=rec_default_range)
                except ValidationError as err:
                    comm.abort('Error: invalid specification',
                               f'{self.spec_type}: {start}:{stop}:{step}')
                final_specs.append(final_rec)
            except OutOfRangeError:
                continue
        return final_specs
Ejemplo n.º 34
0
 def _read_old_csv(self) -> None:
     """ Reads next rec from new file into self.old_rec
     Args:    None
     Returns: Nothing
     Notes:
         - Confirms sort order of file
         - Will assign None to self.old_rec at eof
     """
     try:
         last_rec = self.old_rec
         self.old_rec = self.old_csv.__next__()
         if last_rec is None: # first read priming
             last_rec = self.old_rec
         if len(last_rec) != len(self.old_rec):
             abort('old file has inconsistent number of fields')
         for key in self.join_fields:
             if self.old_rec[key] > last_rec[key]:
                 self.old_read_cnt += 1
                 break # good
             if self.old_rec[key] < last_rec[key]:
                 print(self.old_rec)
                 abort('old file is not sorted correctly')
     except StopIteration:
         self.old_rec = None
Ejemplo n.º 35
0
    def set_assignment(self,
                       dest_file: str,
                       dest_field: int,
                       src_type: str,
                       src_val: str = None,
                       src_file: str = None,
                       src_field: int = None) -> None:
        """ Write instructions for the assignment of a csv field in an output file.

        Args:
            dest_file: one of insert, delete, chgold or chgnew
            dest_field: the field position, given a zero-offset
            src_type: one of literal, copy, sequence, or special
            src_val: used by literal, lookup and sequence
            src_file: one of old, new or None
            src_field: the field position, given a zero-offset
        Returns:
            nothing
        Raises:
            ValueError if args are invalid
            sys.exit if sequence assignment is invalid
        """
        if dest_field:
            assert int(dest_field)
        if src_field:
            assert int(src_field)
        if dest_file not in ['insert', 'delete', 'chgold', 'chgnew']:
            raise ValueError('Invalid dest_file: %s' % dest_file)
        if not comm.isnumeric(dest_field):
            raise ValueError('Invalid dest_field: %s' % dest_field)
        if src_type not in ['literal', 'copy', 'sequence', 'special']:
            raise ValueError('Invalid src_type of %s' % src_type)
        if src_type in ['literal', 'lookup'] and src_val is None:
            raise ValueError('Missing src_val')
        if src_type == 'copy' and (src_file is None or src_field is None):
            raise ValueError('Missing src_file or src_field')
        if src_file not in [None, 'old', 'new']:
            raise ValueError('Invalid src_file: %s' % src_file)

        if dest_file not in self.assignments:
            self.assignments[dest_file] = {}
        self.assignments[dest_file][int(dest_field)] = {'src_type':src_type,
                                                        'src_val':src_val,
                                                        'src_file':src_file,
                                                        'src_field':src_field}
        if src_type == 'sequence':
            # note that seq validation does not check to see if same sequence was
            # refeenced twice with two different values.
            if src_file is not None and src_field is not None:
                tmp_val = None  # will get assigned based on file & field
            elif src_file is not None or src_field is not None:
                abort('Invalid sequence assignment config: src_file or src_field is None')
            elif src_val is None:
                tmp_val = 0
            elif comm.isnumeric(src_val):
                tmp_val = int(src_val)
            elif src_val in self.special_values:
                if comm.isnumeric(self.special_values[src_val]):
                    tmp_val = int(self.special_values[src_val])
                else:
                    abort('Sequence refers to invalid special variable'
                          'should be unique.  Variable: %s   Its value: %s'
                          % (src_val, self.special_values[src_val]))
            else:
                abort('Invalid src_val from config, must be numeric for sequence: %s'
                      ' or refer to special variable name '% src_val)

            self.seq[src_field] = {'start_val': tmp_val, 'last_val':  tmp_val}