def upsert_instance(cls, hasAward: 'Award' = None, yearHeld: str = None, hasEditionNumber: int = None, yearScreened: int = None) -> 'AwardCeremony': kwds = { 'hasAward': hasAward, 'yearHeld': yearHeld, 'hasEditionNumber': hasEditionNumber, 'yearScreened': yearScreened } try: return cls.objects.get(**select_not_null(kwds)) except cls.DoesNotExist: pass return AwardCeremony.objects.create(**select_not_null(kwds))
def read_xsv( file: IO, dialect: str, fieldnames: List[str] = None, first_line_is_column_header: bool = True, discard: int = None, load_at_most: int = None, ) -> Iterable[Dict]: """Returns an iterable of dict. Must be iterated while file is still open. Args: file: An open file. dialect: As used in built-in module `csv`. fieldnames: TODO: Pending documentation for 'fieldnames' first_line_is_column_header: If True, parses first line as column headers. discard: Non-negative integer or None. Initial rows of _data_ to discard. load_at_most: Non-negative integer or None. Rows of _data_ to load. Notes: Use 'excel' dialect for CSV. Use 'excel-tab' for TSV. Warnings: Must be iterated while file is still open. """ kwargs = { 'fieldnames': fieldnames, 'dialect': dialect, } if not first_line_is_column_header and fieldnames is None: # use 'Column X' as fieldnames like in OpenRefine first_line = file.readline(1) file.seek(-1) delimiter = csv.get_dialect(dialect).delimiter num_cols = len(first_line.split(delimiter)) kwargs['fieldnames'] = [f'Column {i + 1}' for i in range(num_cols)] if first_line_is_column_header and fieldnames is not None: raise NotImplementedError( "Changing column names isn't supported for simplicity") reader = csv.DictReader(file, **select_not_null(kwargs, 'fieldnames', 'dialect')) stop = None if load_at_most is not None: stop = load_at_most if discard is not None: stop += discard return islice(reader, discard, stop)
def read_xsv_file( filename: Union[str, Path], dialect: str, *, encoding: str = None, fieldnames: List[str] = None, first_line_is_column_header: bool = True, discard: int = None, load_at_most: int = None, ) -> List[Dict]: """Returns a list of dicts. Convenience method for `read_xsv`. Args: filename: The filename to open. dialect: As used in built-in module `csv`. encoding: Encoding of the file to open. fieldnames: TODO: Pending documentation for 'fieldnames' first_line_is_column_header: If True, parses first line as column headers. discard: Non-negative integer or None. Initial rows of _data_ to discard. load_at_most: Non-negative integer or None. Rows of _data_ to load. Notes: Use 'excel' dialect for CSV. Use 'excel-tab' for TSV. """ kwargs = { 'encoding': encoding, 'fieldnames': fieldnames, 'first_line_is_column_header': first_line_is_column_header, 'discard': discard, 'load_at_most': load_at_most, } with open(filename, 'r', **select_not_null(kwargs, 'encoding')) as file: kwargs.pop('encoding') # must iterated now because file will be closed return list(read_xsv(file, dialect, **select_not_null(kwargs)))
def write_model(file: Union[IO, str, Path], model_name: str, data: List[Dict[str, Any]], *, gen_pk: bool = True, pk_cols: List[str] = None): kwargs = {'pk_cols': pk_cols, 'gen_pk': gen_pk} df = pd.DataFrame(data) table = Table(model_name, df, **select_not_null(kwargs)) open_and_write_file(file, table.as_python())
def open_and_write_file(file: Union[IO, str, Path], s: str, *, mode: str = 'w', buffering: int = None, encoding: str = None, errors: str = None, newline: str = None, closefd: bool = None) -> None: """Convenience function to write to file. Ensures `s` is written to `file` using either its name or its path. Args: file: A filename or an opened file (IO object). s: The string to write to the file. options: Options to open the file with if unopened. Returns: None Raises: OSError: When `file` is an IO that doesn't support writing. """ try: file.write(s) return except OSError: raise except AttributeError: options = { 'mode': mode, 'buffering': buffering, 'encoding': encoding if encoding is not None else 'utf-8', 'errors': errors, 'newline': newline, 'closefd': closefd, } if isinstance(file, (str, Path)): with open(file, **select_not_null(options)) as f: f.write(s) return raise AssertionError
def parse_tsv(file: IO, chunksize: int = None) -> pd.DataFrame: kwd = {chunksize: chunksize} return pd.read_csv(file, dialect='excel-tab', **select_not_null(kwd))