コード例 #1
0
ファイル: slice_specs.py プロジェクト: kenfar/DataGristle
    def transform_none_stop(self, start: int, stop: Optional[int], step: float,
                            is_range: bool) -> Tuple[int, bool, bool]:

        assert stop is None or comm.isnumeric(stop)
        col_default_range = False
        rec_default_range = False

        if comm.isnumeric(stop):
            assert (isinstance(stop, int))
            return stop, col_default_range, rec_default_range

        if is_range:
            if step >= 0:
                if self.infile_item_count is None:
                    if self.spec_type in ('incl_rec', 'excl_rec'):
                        int_stop = DEFAULT_REC_RANGE_STOP
                        rec_default_range = True
                    else:
                        int_stop = DEFAULT_COL_RANGE_STOP
                        col_default_range = True
                else:
                    int_stop = self.infile_item_count
            else:
                int_stop = -1
        else:
            if step >= 0:
                int_stop = start + 1
            else:
                int_stop = start - 1

        return int_stop, col_default_range, rec_default_range
コード例 #2
0
ファイル: slice_specs.py プロジェクト: kenfar/DataGristle
    def transform_none_start(self, start: Union[int, None],
                             step: float) -> int:
        """
        Example Sources:
            - -r 1
            - -r 3:
            - -r 3:4
            - -r :3
            - -r '::'
            - -r '::2'
        """
        assert start is None or comm.isnumeric(start)
        if comm.isnumeric(start):
            assert (isinstance(start, int))
            return start

        # Start=None - which is *always* a range (unlike stop)
        if step >= 0:
            int_start = 0
        else:
            if self.infile_item_count is None:
                raise NegativeStepWithoutItemCountError
            else:
                int_start = self.infile_item_count

        return int_start
コード例 #3
0
ファイル: slice_specs.py プロジェクト: kenfar/DataGristle
    def phase2__translate_item_parts(
            self, orig_start: Optional[str], orig_stop: Optional[str],
            orig_step: str,
            is_range: bool) -> Tuple[Optional[int], Optional[int], float]:
        """ Translate the specification item parts into numeric forms
        """

        # translate the start:
        start = Specifications.transform_empty_string(orig_start)
        start = self.transform_name(start)
        start = self.transform_negative_start_number(start, is_range)
        if start is not None and not comm.isnumeric(start):
            raise UnidentifiableNonNumericSpec(
                f'Do not know how to interpret: {start}')
        int_start = int(start) if start is not None else None

        # translate the stop:
        stop = self.transform_empty_string(orig_stop)
        stop = self.transform_name(stop)
        stop = self.transform_negative_stop_number(stop, is_range)
        stop = self.validate_positive_number(stop, is_range)
        if stop is not None and not comm.isnumeric(stop):
            raise UnidentifiableNonNumericSpec(
                f'Do not know how to interpret: {stop}')
        int_stop = int(stop) if stop is not None else None

        # translate the step:
        step = self.transform_empty_string(orig_step)
        if step is not None and not comm.isnumeric(step):
            raise UnidentifiableNonNumericSpec(
                f'Do not know how to interpret: {step}')
        float_step = float(step) if step is not None else 1.0

        return int_start, int_stop, float_step
コード例 #4
0
ファイル: file_sorter.py プロジェクト: kenfar/DataGristle
    def _transform_key_field(self, header: Optional[csvhelper.Header],
                             key_field: str) -> str:
        """key_field looks like:
               - [field][~][type][~][direction]
               - ex: 0sf
               - ex: 3ir
               - ex: home_statesf
               - ex: home_state~s~f
               - ex: 0~s~f
        """
        if key_field.count('~') == 2:
            field, keytype, direction = key_field.split('~')
        else:
            direction = key_field[-1]
            keytype = key_field[-2]
            field = key_field[:-2]

        if comm.isnumeric(field):
            field_offset = field
        else:
            try:
                field_offset = str(header.get_field_position(field))
            except KeyError:
                comm.abort(
                    'Error: field name not found in header',
                    f'field name: {field}        '
                    f'header fields: {header.field_names}')

        new_key_field = f'{field_offset}{keytype}{direction}'
        return new_key_field
コード例 #5
0
 def get_field_position_from_any(self, lookup: Union[str, int]) -> int:
     """ Returns a field position given either a field name or position
     """
     if comm.isnumeric(lookup):
         return int(lookup)
     else:
         return self.get_field_position(lookup)
コード例 #6
0
ファイル: file_type.py プロジェクト: sidhu177/DataGristle
def get_quote_name(quote_number: Union[int, str]) -> str:
    """ used to help applications look up quote numbers typically provided by
        users.
    """
    if not comm.isnumeric(quote_number):
        raise ValueError('Invalid quote_number: %s' % quote_number)

    for key, value in csv.__dict__.items():
        if value == int(quote_number):
            return key
    else:
        raise ValueError('Invalid quote_number: %s' % quote_number)
コード例 #7
0
ファイル: file_type.py プロジェクト: kenfar/DataGristle
def get_quote_name(quote_number: Union[int, str]) -> str:
    """ used to help applications look up quote numbers typically provided by
        users.
    """
    if not comm.isnumeric(quote_number):
        raise ValueError('Invalid quote_number: %s' % quote_number)

    for key, value in csv.__dict__.items():
        if value == int(quote_number):
            return key
    else:
        raise ValueError('Invalid quote_number: %s' % quote_number)
コード例 #8
0
ファイル: slice_specs.py プロジェクト: kenfar/DataGristle
 def transform_name(self, val: Optional[str]) -> Optional[str]:
     if val is None:
         return None
     if comm.isnumeric(val):
         return val
     if self.header is None:
         raise UnidentifiableNonNumericSpec(
             f'Do not know how to interpret: {val}')
     try:
         position = str(self.header.get_field_position(val.strip()))
     except KeyError:
         comm.abort(f'Error: Invalid string in spec: {val}',
                    f'Not in header list: {self.header.field_names}')
     return position
コード例 #9
0
ファイル: file_type.py プロジェクト: kenfar/DataGristle
def get_quote_number(quote_name: str) -> int:
    """ used to help applications look up quote names typically provided by
    users.
    Inputs:
       - quote_name
    Outputs:
       - quote_number
    """
    if comm.isnumeric(quote_name):
        raise ValueError('Invalid quote_name: %s' % quote_name)
    if quote_name is None:
        raise ValueError('Invalid quote_name: %s' % quote_name)

#    if quote_name is None:
#        return None
#    else:
    try:
        return int(csv.__dict__[quote_name.upper()])
    except KeyError:
        raise ValueError('Invalid quote_name: %s' % quote_name)
コード例 #10
0
ファイル: file_type.py プロジェクト: sidhu177/DataGristle
def get_quote_number(quote_name: str) -> int:
    """ used to help applications look up quote names typically provided by
    users.
    Inputs:
       - quote_name
    Outputs:
       - quote_number
    """
    if comm.isnumeric(quote_name):
        raise ValueError('Invalid quote_name: %s' % quote_name)
    if quote_name is None:
        raise ValueError('Invalid quote_name: %s' % quote_name)


#    if quote_name is None:
#        return None
#    else:
    try:
        return int(csv.__dict__[quote_name.upper()])
    except KeyError:
        raise ValueError('Invalid quote_name: %s' % quote_name)
コード例 #11
0
def get_max_decimals(values: FreqType,
                     field_type: Optional[str] = None) -> Optional[int]:
    ''' Returns the maximum number of decimal places on any value.

        Not using typical numeric methods since they can easily expand the size of the decimals
        due to floating point characteristics.
    '''
    if not values:
        return None
    if field_type == 'integer':
        return 0

    float_values = [
        str(x[0]) for x in values
        if common.isnumeric(x[0]) and '.' in str(x[0])
    ]

    decimals = [len(x.rsplit('.', 1)[-1]) for x in float_values]

    if decimals:
        return max(decimals)
    else:
        return 0
コード例 #12
0
ファイル: file_type.py プロジェクト: sidhu177/DataGristle
def get_dialect(files: List[str], delimiter: str, quotename: str, quotechar: str, recdelimiter: str, has_header: bool)\
                -> csvhelper.Dialect:
    """ Gets a csv dialect for a csv file or set of attributes.

    If files are provided and are not '-' -then use files and run file_type.FileTyper
    to get csv - while passing rest of args to FileTyper.  Otherwise, manually construct
    csv dialect from non-files arguments.

    Args:
        files: a list of files to analyze.  Analyze the minimum number of recs
               from the first file to determine delimiter.
        delimiter: a single character
        quotename: one of QUOTE_MINIMAL, QUOTE_NONE, QUOTE_ALL, QUOTE_NONNUMERIC
        quotechar: a single character
        recdelimiter: a single character
        has_header: a boolean
    Returns:
        csv dialect object
    Raises:
        sys.exit - if all files are empty
    """
    assert isinstance(files, list)
    dialect = None

    if files[0] == '-':
        # dialect parameters needed for stdin - since the normal code can't
        # analyze this data.
        dialect = csvhelper.Dialect
        dialect.delimiter = delimiter
        dialect.quoting = get_quote_number(quotename)
        dialect.quotechar = quotechar
        dialect.lineterminator = '\n'  # naive assumption
        dialect.has_header = has_header
    else:
        for fn in files:
            if not isfile(fn):
                raise ValueError('file does not exist: %s' % fn)
            my_file = FileTyper(fn,
                                delimiter,
                                recdelimiter,
                                has_header,
                                quoting=quotename,
                                quote_char=quotechar,
                                read_limit=5000)
            try:
                my_file.analyze_file()
                dialect = my_file.dialect
                break
            except IOErrorEmptyFile:
                continue
            else:
                # todo: is this a typo?
                sys.exit(errno.ENODATA)
        # Don't exit with ENODATA unless all files are empty:
        if dialect is None:
            sys.exit(errno.ENODATA)

    # validate quoting & assign defaults:
    if dialect.quoting is None:
        dialect.quoting = get_quote_number('quote_minimal')
    assert dialect.quoting is not None and comm.isnumeric(dialect.quoting)

    # validate delimiter & assign defaults:
    if dialect.delimiter is None:
        raise ValueError("Invalid Delimiter: %s" % dialect.delimiter)

    return dialect
コード例 #13
0
    def set_assignment(self,
                       dest_file: str,
                       dest_field: int,
                       src_type: str,
                       src_val: str = None,
                       src_file: str = None,
                       src_field: int = None) -> None:
        """ Write instructions for the assignment of a csv field in an output file.

        Args:
            dest_file: one of insert, delete, chgold or chgnew
            dest_field: the field position, given a zero-offset
            src_type: one of literal, copy, sequence, or special
            src_val: used by literal, lookup and sequence
            src_file: one of old, new or None
            src_field: the field position, given a zero-offset
        Returns:
            nothing
        Raises:
            ValueError if args are invalid
            sys.exit if sequence assignment is invalid
        """
        if dest_field:
            assert int(dest_field)
        if src_field:
            assert int(src_field)
        if dest_file not in ['insert', 'delete', 'chgold', 'chgnew', 'same']:
            raise ValueError('Invalid dest_file: %s' % dest_file)
        if not comm.isnumeric(dest_field):
            raise ValueError('Invalid dest_field: %s' % dest_field)
        if src_type not in ['literal', 'copy', 'sequence', 'special']:
            raise ValueError('Invalid src_type of %s' % src_type)
        if src_type in ['literal', 'lookup'] and src_val is None:
            raise ValueError('Missing src_val')
        if src_type == 'copy' and (src_file is None or src_field is None):
            raise ValueError('Missing src_file or src_field')
        if src_file not in [None, 'old', 'new']:
            raise ValueError('Invalid src_file: %s' % src_file)

        if dest_file not in self.assignments:
            self.assignments[dest_file] = {}
        self.assignments[dest_file][int(dest_field)] = {
            'src_type': src_type,
            'src_val': src_val,
            'src_file': src_file,
            'src_field': src_field
        }
        if src_type == 'sequence':
            # note that seq validation does not check to see if same sequence was
            # refeenced twice with two different values.
            if src_file is not None and src_field is not None:
                tmp_val = None  # will get assigned based on file & field
            elif src_file is not None or src_field is not None:
                abort(
                    'Invalid sequence assignment config: src_file or src_field is None'
                )
            elif src_val is None:
                tmp_val = 0
            elif comm.isnumeric(src_val):
                tmp_val = int(src_val)
            elif src_val in self.special_values:
                if comm.isnumeric(self.special_values[src_val]):
                    tmp_val = int(self.special_values[src_val])
                else:
                    abort('Sequence refers to invalid special variable'
                          'should be unique.  Variable: %s   Its value: %s' %
                          (src_val, self.special_values[src_val]))
            else:
                abort(
                    'Invalid src_val from config, must be numeric for sequence: %s'
                    ' or refer to special variable name ' % src_val)

            self.seq[src_field] = {'start_val': tmp_val, 'last_val': tmp_val}
コード例 #14
0
ファイル: file_type.py プロジェクト: kenfar/DataGristle
def get_dialect(files: List[str], delimiter: str, quotename: str, quotechar: str, recdelimiter: str, has_header: bool)\
                -> csvhelper.Dialect:
    """ Gets a csv dialect for a csv file or set of attributes.

    If files are provided and are not '-' -then use files and run file_type.FileTyper
    to get csv - while passing rest of args to FileTyper.  Otherwise, manually construct
    csv dialect from non-files arguments.

    Args:
        files: a list of files to analyze.  Analyze the minimum number of recs
               from the first file to determine delimiter.
        delimiter: a single character
        quotename: one of QUOTE_MINIMAL, QUOTE_NONE, QUOTE_ALL, QUOTE_NONNUMERIC
        quotechar: a single character
        recdelimiter: a single character
        has_header: a boolean
    Returns:
        csv dialect object
    Raises:
        sys.exit - if all files are empty
    """
    assert isinstance(files, list)
    dialect = None

    if files[0] == '-':
        # dialect parameters needed for stdin - since the normal code can't
        # analyze this data.
        dialect = csvhelper.Dialect
        dialect.delimiter = delimiter
        dialect.quoting = get_quote_number(quotename)
        dialect.quotechar = quotechar
        dialect.lineterminator = '\n'  # naive assumption
        dialect.has_header = has_header
    else:
        for fn in files:
            if not isfile(fn):
                raise ValueError('file does not exist: %s' % fn)
            my_file = FileTyper(fn,
                                delimiter,
                                recdelimiter,
                                has_header,
                                quoting=quotename,
                                quote_char=quotechar,
                                read_limit=5000)
            try:
                my_file.analyze_file()
                dialect = my_file.dialect
                break
            except IOErrorEmptyFile:
                continue
            else:
                # todo: is this a typo?
                sys.exit(errno.ENODATA)
        # Don't exit with ENODATA unless all files are empty:
        if dialect is None:
            sys.exit(errno.ENODATA)

    # validate quoting & assign defaults:
    if dialect.quoting is None:
        dialect.quoting = get_quote_number('quote_minimal')
    assert dialect.quoting is not None and comm.isnumeric(dialect.quoting)

    # validate delimiter & assign defaults:
    if dialect.delimiter is None:
        raise ValueError("Invalid Delimiter: %s" % dialect.delimiter)

    return dialect
コード例 #15
0
ファイル: file_delta.py プロジェクト: kenfar/DataGristle
    def set_assignment(self,
                       dest_file: str,
                       dest_field: int,
                       src_type: str,
                       src_val: str = None,
                       src_file: str = None,
                       src_field: int = None) -> None:
        """ Write instructions for the assignment of a csv field in an output file.

        Args:
            dest_file: one of insert, delete, chgold or chgnew
            dest_field: the field position, given a zero-offset
            src_type: one of literal, copy, sequence, or special
            src_val: used by literal, lookup and sequence
            src_file: one of old, new or None
            src_field: the field position, given a zero-offset
        Returns:
            nothing
        Raises:
            ValueError if args are invalid
            sys.exit if sequence assignment is invalid
        """
        if dest_field:
            assert int(dest_field)
        if src_field:
            assert int(src_field)
        if dest_file not in ['insert', 'delete', 'chgold', 'chgnew']:
            raise ValueError('Invalid dest_file: %s' % dest_file)
        if not comm.isnumeric(dest_field):
            raise ValueError('Invalid dest_field: %s' % dest_field)
        if src_type not in ['literal', 'copy', 'sequence', 'special']:
            raise ValueError('Invalid src_type of %s' % src_type)
        if src_type in ['literal', 'lookup'] and src_val is None:
            raise ValueError('Missing src_val')
        if src_type == 'copy' and (src_file is None or src_field is None):
            raise ValueError('Missing src_file or src_field')
        if src_file not in [None, 'old', 'new']:
            raise ValueError('Invalid src_file: %s' % src_file)

        if dest_file not in self.assignments:
            self.assignments[dest_file] = {}
        self.assignments[dest_file][int(dest_field)] = {'src_type':src_type,
                                                        'src_val':src_val,
                                                        'src_file':src_file,
                                                        'src_field':src_field}
        if src_type == 'sequence':
            # note that seq validation does not check to see if same sequence was
            # refeenced twice with two different values.
            if src_file is not None and src_field is not None:
                tmp_val = None  # will get assigned based on file & field
            elif src_file is not None or src_field is not None:
                abort('Invalid sequence assignment config: src_file or src_field is None')
            elif src_val is None:
                tmp_val = 0
            elif comm.isnumeric(src_val):
                tmp_val = int(src_val)
            elif src_val in self.special_values:
                if comm.isnumeric(self.special_values[src_val]):
                    tmp_val = int(self.special_values[src_val])
                else:
                    abort('Sequence refers to invalid special variable'
                          'should be unique.  Variable: %s   Its value: %s'
                          % (src_val, self.special_values[src_val]))
            else:
                abort('Invalid src_val from config, must be numeric for sequence: %s'
                      ' or refer to special variable name '% src_val)

            self.seq[src_field] = {'start_val': tmp_val, 'last_val':  tmp_val}