def transform_none_stop(self, start: int, stop: Optional[int], step: float, is_range: bool) -> Tuple[int, bool, bool]: assert stop is None or comm.isnumeric(stop) col_default_range = False rec_default_range = False if comm.isnumeric(stop): assert (isinstance(stop, int)) return stop, col_default_range, rec_default_range if is_range: if step >= 0: if self.infile_item_count is None: if self.spec_type in ('incl_rec', 'excl_rec'): int_stop = DEFAULT_REC_RANGE_STOP rec_default_range = True else: int_stop = DEFAULT_COL_RANGE_STOP col_default_range = True else: int_stop = self.infile_item_count else: int_stop = -1 else: if step >= 0: int_stop = start + 1 else: int_stop = start - 1 return int_stop, col_default_range, rec_default_range
def transform_none_start(self, start: Union[int, None], step: float) -> int: """ Example Sources: - -r 1 - -r 3: - -r 3:4 - -r :3 - -r '::' - -r '::2' """ assert start is None or comm.isnumeric(start) if comm.isnumeric(start): assert (isinstance(start, int)) return start # Start=None - which is *always* a range (unlike stop) if step >= 0: int_start = 0 else: if self.infile_item_count is None: raise NegativeStepWithoutItemCountError else: int_start = self.infile_item_count return int_start
def phase2__translate_item_parts( self, orig_start: Optional[str], orig_stop: Optional[str], orig_step: str, is_range: bool) -> Tuple[Optional[int], Optional[int], float]: """ Translate the specification item parts into numeric forms """ # translate the start: start = Specifications.transform_empty_string(orig_start) start = self.transform_name(start) start = self.transform_negative_start_number(start, is_range) if start is not None and not comm.isnumeric(start): raise UnidentifiableNonNumericSpec( f'Do not know how to interpret: {start}') int_start = int(start) if start is not None else None # translate the stop: stop = self.transform_empty_string(orig_stop) stop = self.transform_name(stop) stop = self.transform_negative_stop_number(stop, is_range) stop = self.validate_positive_number(stop, is_range) if stop is not None and not comm.isnumeric(stop): raise UnidentifiableNonNumericSpec( f'Do not know how to interpret: {stop}') int_stop = int(stop) if stop is not None else None # translate the step: step = self.transform_empty_string(orig_step) if step is not None and not comm.isnumeric(step): raise UnidentifiableNonNumericSpec( f'Do not know how to interpret: {step}') float_step = float(step) if step is not None else 1.0 return int_start, int_stop, float_step
def _transform_key_field(self, header: Optional[csvhelper.Header], key_field: str) -> str: """key_field looks like: - [field][~][type][~][direction] - ex: 0sf - ex: 3ir - ex: home_statesf - ex: home_state~s~f - ex: 0~s~f """ if key_field.count('~') == 2: field, keytype, direction = key_field.split('~') else: direction = key_field[-1] keytype = key_field[-2] field = key_field[:-2] if comm.isnumeric(field): field_offset = field else: try: field_offset = str(header.get_field_position(field)) except KeyError: comm.abort( 'Error: field name not found in header', f'field name: {field} ' f'header fields: {header.field_names}') new_key_field = f'{field_offset}{keytype}{direction}' return new_key_field
def get_field_position_from_any(self, lookup: Union[str, int]) -> int: """ Returns a field position given either a field name or position """ if comm.isnumeric(lookup): return int(lookup) else: return self.get_field_position(lookup)
def get_quote_name(quote_number: Union[int, str]) -> str: """ used to help applications look up quote numbers typically provided by users. """ if not comm.isnumeric(quote_number): raise ValueError('Invalid quote_number: %s' % quote_number) for key, value in csv.__dict__.items(): if value == int(quote_number): return key else: raise ValueError('Invalid quote_number: %s' % quote_number)
def transform_name(self, val: Optional[str]) -> Optional[str]: if val is None: return None if comm.isnumeric(val): return val if self.header is None: raise UnidentifiableNonNumericSpec( f'Do not know how to interpret: {val}') try: position = str(self.header.get_field_position(val.strip())) except KeyError: comm.abort(f'Error: Invalid string in spec: {val}', f'Not in header list: {self.header.field_names}') return position
def get_quote_number(quote_name: str) -> int: """ used to help applications look up quote names typically provided by users. Inputs: - quote_name Outputs: - quote_number """ if comm.isnumeric(quote_name): raise ValueError('Invalid quote_name: %s' % quote_name) if quote_name is None: raise ValueError('Invalid quote_name: %s' % quote_name) # if quote_name is None: # return None # else: try: return int(csv.__dict__[quote_name.upper()]) except KeyError: raise ValueError('Invalid quote_name: %s' % quote_name)
def get_max_decimals(values: FreqType, field_type: Optional[str] = None) -> Optional[int]: ''' Returns the maximum number of decimal places on any value. Not using typical numeric methods since they can easily expand the size of the decimals due to floating point characteristics. ''' if not values: return None if field_type == 'integer': return 0 float_values = [ str(x[0]) for x in values if common.isnumeric(x[0]) and '.' in str(x[0]) ] decimals = [len(x.rsplit('.', 1)[-1]) for x in float_values] if decimals: return max(decimals) else: return 0
def get_dialect(files: List[str], delimiter: str, quotename: str, quotechar: str, recdelimiter: str, has_header: bool)\ -> csvhelper.Dialect: """ Gets a csv dialect for a csv file or set of attributes. If files are provided and are not '-' -then use files and run file_type.FileTyper to get csv - while passing rest of args to FileTyper. Otherwise, manually construct csv dialect from non-files arguments. Args: files: a list of files to analyze. Analyze the minimum number of recs from the first file to determine delimiter. delimiter: a single character quotename: one of QUOTE_MINIMAL, QUOTE_NONE, QUOTE_ALL, QUOTE_NONNUMERIC quotechar: a single character recdelimiter: a single character has_header: a boolean Returns: csv dialect object Raises: sys.exit - if all files are empty """ assert isinstance(files, list) dialect = None if files[0] == '-': # dialect parameters needed for stdin - since the normal code can't # analyze this data. dialect = csvhelper.Dialect dialect.delimiter = delimiter dialect.quoting = get_quote_number(quotename) dialect.quotechar = quotechar dialect.lineterminator = '\n' # naive assumption dialect.has_header = has_header else: for fn in files: if not isfile(fn): raise ValueError('file does not exist: %s' % fn) my_file = FileTyper(fn, delimiter, recdelimiter, has_header, quoting=quotename, quote_char=quotechar, read_limit=5000) try: my_file.analyze_file() dialect = my_file.dialect break except IOErrorEmptyFile: continue else: # todo: is this a typo? sys.exit(errno.ENODATA) # Don't exit with ENODATA unless all files are empty: if dialect is None: sys.exit(errno.ENODATA) # validate quoting & assign defaults: if dialect.quoting is None: dialect.quoting = get_quote_number('quote_minimal') assert dialect.quoting is not None and comm.isnumeric(dialect.quoting) # validate delimiter & assign defaults: if dialect.delimiter is None: raise ValueError("Invalid Delimiter: %s" % dialect.delimiter) return dialect
def set_assignment(self, dest_file: str, dest_field: int, src_type: str, src_val: str = None, src_file: str = None, src_field: int = None) -> None: """ Write instructions for the assignment of a csv field in an output file. Args: dest_file: one of insert, delete, chgold or chgnew dest_field: the field position, given a zero-offset src_type: one of literal, copy, sequence, or special src_val: used by literal, lookup and sequence src_file: one of old, new or None src_field: the field position, given a zero-offset Returns: nothing Raises: ValueError if args are invalid sys.exit if sequence assignment is invalid """ if dest_field: assert int(dest_field) if src_field: assert int(src_field) if dest_file not in ['insert', 'delete', 'chgold', 'chgnew', 'same']: raise ValueError('Invalid dest_file: %s' % dest_file) if not comm.isnumeric(dest_field): raise ValueError('Invalid dest_field: %s' % dest_field) if src_type not in ['literal', 'copy', 'sequence', 'special']: raise ValueError('Invalid src_type of %s' % src_type) if src_type in ['literal', 'lookup'] and src_val is None: raise ValueError('Missing src_val') if src_type == 'copy' and (src_file is None or src_field is None): raise ValueError('Missing src_file or src_field') if src_file not in [None, 'old', 'new']: raise ValueError('Invalid src_file: %s' % src_file) if dest_file not in self.assignments: self.assignments[dest_file] = {} self.assignments[dest_file][int(dest_field)] = { 'src_type': src_type, 'src_val': src_val, 'src_file': src_file, 'src_field': src_field } if src_type == 'sequence': # note that seq validation does not check to see if same sequence was # refeenced twice with two different values. if src_file is not None and src_field is not None: tmp_val = None # will get assigned based on file & field elif src_file is not None or src_field is not None: abort( 'Invalid sequence assignment config: src_file or src_field is None' ) elif src_val is None: tmp_val = 0 elif comm.isnumeric(src_val): tmp_val = int(src_val) elif src_val in self.special_values: if comm.isnumeric(self.special_values[src_val]): tmp_val = int(self.special_values[src_val]) else: abort('Sequence refers to invalid special variable' 'should be unique. Variable: %s Its value: %s' % (src_val, self.special_values[src_val])) else: abort( 'Invalid src_val from config, must be numeric for sequence: %s' ' or refer to special variable name ' % src_val) self.seq[src_field] = {'start_val': tmp_val, 'last_val': tmp_val}
def set_assignment(self, dest_file: str, dest_field: int, src_type: str, src_val: str = None, src_file: str = None, src_field: int = None) -> None: """ Write instructions for the assignment of a csv field in an output file. Args: dest_file: one of insert, delete, chgold or chgnew dest_field: the field position, given a zero-offset src_type: one of literal, copy, sequence, or special src_val: used by literal, lookup and sequence src_file: one of old, new or None src_field: the field position, given a zero-offset Returns: nothing Raises: ValueError if args are invalid sys.exit if sequence assignment is invalid """ if dest_field: assert int(dest_field) if src_field: assert int(src_field) if dest_file not in ['insert', 'delete', 'chgold', 'chgnew']: raise ValueError('Invalid dest_file: %s' % dest_file) if not comm.isnumeric(dest_field): raise ValueError('Invalid dest_field: %s' % dest_field) if src_type not in ['literal', 'copy', 'sequence', 'special']: raise ValueError('Invalid src_type of %s' % src_type) if src_type in ['literal', 'lookup'] and src_val is None: raise ValueError('Missing src_val') if src_type == 'copy' and (src_file is None or src_field is None): raise ValueError('Missing src_file or src_field') if src_file not in [None, 'old', 'new']: raise ValueError('Invalid src_file: %s' % src_file) if dest_file not in self.assignments: self.assignments[dest_file] = {} self.assignments[dest_file][int(dest_field)] = {'src_type':src_type, 'src_val':src_val, 'src_file':src_file, 'src_field':src_field} if src_type == 'sequence': # note that seq validation does not check to see if same sequence was # refeenced twice with two different values. if src_file is not None and src_field is not None: tmp_val = None # will get assigned based on file & field elif src_file is not None or src_field is not None: abort('Invalid sequence assignment config: src_file or src_field is None') elif src_val is None: tmp_val = 0 elif comm.isnumeric(src_val): tmp_val = int(src_val) elif src_val in self.special_values: if comm.isnumeric(self.special_values[src_val]): tmp_val = int(self.special_values[src_val]) else: abort('Sequence refers to invalid special variable' 'should be unique. Variable: %s Its value: %s' % (src_val, self.special_values[src_val])) else: abort('Invalid src_val from config, must be numeric for sequence: %s' ' or refer to special variable name '% src_val) self.seq[src_field] = {'start_val': tmp_val, 'last_val': tmp_val}