def determine_date_output_style(unhandled_hints: Set[str],
                                hints: ValidatedRecordsHints,
                                fail_if_cant_handle_hint: bool) -> \
        Tuple[DateOutputStyle, Optional[DateOrderStyle]]:

    # see docs in the types module

    dateformat = hints.dateformat
    timeonlyformat = hints.timeonlyformat
    datetimeformattz = hints.datetimeformattz
    datetimeformat = hints.datetimeformat

    date_order_style: Optional[DateOrderStyle] = None

    if (dateformat == 'YYYY-MM-DD'
            and timeonlyformat in ['HH24:MI:SS', 'HH:MI:SS']
            and datetimeformattz
            in ['YYYY-MM-DD HH:MI:SSOF', 'YYYY-MM-DD HH24:MI:SSOF']
            and datetimeformat
            in ['YYYY-MM-DD HH24:MI:SS', 'YYYY-MM-DD HH:MI:SS']):
        date_output_style: DateOutputStyle = 'ISO'
        # date_order_style doesn't really matter, as ISO is not ambiguous
    else:
        # 'SQL', 'Postgres' and 'German' all support only alphabetic
        # timezone indicators, which aren't yet supported in the
        # records spec
        cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformattz', hints)

    quiet_remove(unhandled_hints, 'dateformat')
    quiet_remove(unhandled_hints, 'timeonlyformat')
    quiet_remove(unhandled_hints, 'datetimeformattz')
    quiet_remove(unhandled_hints, 'datetimeformat')

    return (date_output_style, date_order_style)
 def upgrade_date_order_style(style: DateOrderStyle,
                              hint_name: str) -> None:
     nonlocal date_order_style
     if date_order_style not in (None, style):
         cant_handle_hint(fail_if_cant_handle_hint, hint_name, hints)
     else:
         date_order_style = style
         quiet_remove(unhandled_hints, hint_name)
Example #3
0
def postgres_copy_options_common(unhandled_hints: Set[str],
                                 hints: ValidatedRecordsHints,
                                 fail_if_cant_handle_hint: bool,
                                 original_postgres_options: PostgresCopyOptions) ->\
        PostgresCopyOptions:
    postgres_options = original_postgres_options.copy()

    # ENCODING
    #
    #  Specifies that the file is encoded in the encoding_name. If
    #  this option is omitted, the current client encoding is
    #  used. See the Notes below for more details.

    encoding_hint = hints.encoding
    if encoding_hint in postgres_encoding_names:
        postgres_options['encoding'] = postgres_encoding_names[encoding_hint]
        quiet_remove(unhandled_hints, 'encoding')
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'encoding', hints)

    # FORCE_NOT_NULL
    #
    #  Do not match the specified columns' values against the null
    #  string. In the default case where the null string is empty,
    #  this means that empty values will be read as zero-length
    #  strings rather than nulls, even when they are not quoted. This
    #  option is allowed only in COPY FROM, and only when using CSV
    #  format.
    #
    # HEADER
    #
    #  Specifies that the file contains a header line with the names
    #  of each column in the file. On output, the first line contains
    #  the column names from the table, and on input, the first line
    #  is ignored. This option is allowed only when using CSV format.
    #
    quiet_remove(unhandled_hints, 'header-row')
    postgres_options['header'] = hints.header_row

    # OIDS
    #
    #  Specifies copying the OID for each row. (An error is raised if
    #  OIDS is specified for a table that does not have OIDs, or in
    #  the case of copying a query.)
    #

    # DELIMITER
    #
    #  Specifies the character that separates columns within each row
    #  (line) of the file. The default is a tab character in text
    #  format, a comma in CSV format. This must be a single one-byte
    #  character. This option is not allowed when using binary format.
    #
    postgres_options['delimiter'] = hints.field_delimiter
    quiet_remove(unhandled_hints, 'field-delimiter')

    return postgres_options
Example #4
0
def _convert_series_or_index(
        series_or_index: T, field: RecordsSchemaField,
        records_format: DelimitedRecordsFormat,
        processing_instructions: ProcessingInstructions) -> Optional[T]:
    if field.field_type == 'date':
        if not isinstance(series_or_index[0], pd.Timestamp):
            logger.warning(f"Found {series_or_index.name} as unexpected type "
                           f"{type(series_or_index[0])}")
        else:
            logger.info(
                f"Converting {series_or_index.name} from np.datetime64 to "
                "string in CSV's format")
            hint_date_format = records_format.hints['dateformat']
            assert isinstance(hint_date_format, str)
            pandas_date_format = python_date_format_from_hints.get(
                hint_date_format)  # type: ignore
            if pandas_date_format is None:
                cant_handle_hint(
                    processing_instructions.fail_if_cant_handle_hint,
                    'dateformat', records_format.hints)
                pandas_date_format = '%Y-%m-%d'
            if isinstance(series_or_index, pd.Series):
                return series_or_index.dt.strftime(pandas_date_format)
            else:
                return series_or_index.strftime(pandas_date_format)
    elif field.field_type == 'time':
        if not isinstance(series_or_index[0], pd.Timestamp):
            logger.warning(f"Found {series_or_index.name} as unexpected "
                           f"type {type(series_or_index[0])}")
        else:
            logger.info(
                f"Converting {series_or_index.name} from np.datetime64 to string "
                "in CSV's format")
            hint_time_format = records_format.hints['timeonlyformat']
            assert isinstance(hint_time_format, str)
            pandas_time_format = python_time_format_from_hints.get(
                hint_time_format)  # type: ignore
            if pandas_time_format is None:
                cant_handle_hint(
                    processing_instructions.fail_if_cant_handle_hint,
                    'timeonlyformat', records_format.hints)
                pandas_time_format = '%H:%M:%S'
            if isinstance(series_or_index, pd.Series):
                return series_or_index.dt.strftime(pandas_time_format)
            else:
                return series_or_index.strftime(pandas_time_format)
    else:
        logger.debug(f"Not converting field type {field.field_type}")

    return None
def mysql_load_options(unhandled_hints: Set[str],
                       records_format: DelimitedRecordsFormat,
                       fail_if_cant_handle_hint: bool) -> MySqlLoadOptions:
    hints = records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)

    #
    # The server uses the character set indicated by the
    # character_set_database system variable to interpret the
    # information in the file. SET NAMES and the setting of
    # character_set_client do not affect interpretation of input. If
    # the contents of the input file use a character set that differs
    # from the default, it is usually preferable to specify the
    # character set of the file by using the CHARACTER SET clause. A
    # character set of binary specifies “no conversion.”
    #
    hint_encoding: HintEncoding = hints.encoding
    character_set = MYSQL_CHARACTER_SETS_FOR_LOAD.get(hint_encoding)
    if character_set is not None:
        mysql_character_set = character_set
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'encoding', records_format.hints)
        mysql_character_set = 'utf8'
    quiet_remove(unhandled_hints, 'encoding')

    field_terminator: HintFieldDelimiter = hints.field_delimiter
    mysql_fields_terminator = field_terminator
    quiet_remove(unhandled_hints, 'field-delimiter')

    # https://dev.mysql.com/doc/refman/8.0/en/load-data.html
    #
    #
    # LOAD DATA can be used to read files obtained from external
    # sources. For example, many programs can export data in
    # comma-separated values (CSV) format, such that lines have fields
    # separated by commas and enclosed within double quotation marks,
    # with an initial line of column names. If the lines in such a
    # file are terminated by carriage return/newline pairs, the
    # statement shown here illustrates the field- and line-handling
    # options you would use to load the file:
    #
    # LOAD DATA INFILE 'data.txt' INTO TABLE tbl_name
    #  FIELDS TERMINATED BY ',' ENCLOSED BY '"'
    #  LINES TERMINATED BY '\r\n'
    #  IGNORE 1 LINES;
    #
    #
    mysql_fields_enclosed_by = None
    mysql_fields_optionally_enclosed_by = None
    hint_quotechar: HintQuoteChar = hints.quotechar
    hint_quoting: HintQuoting = hints.quoting
    if hint_quoting == 'all':
        mysql_fields_enclosed_by = hint_quotechar
    elif hint_quoting == 'minimal':
        # "If the input values are not necessarily enclosed within
        # quotation marks, use OPTIONALLY before the ENCLOSED BY option."
        #
        # This implies to me that parsing here is permissive -
        # otherwise unambiguous strings without double quotes around
        # them will be understood as a string, not rejected.
        mysql_fields_optionally_enclosed_by = hint_quotechar
    elif hint_quoting == 'nonnumeric':
        mysql_fields_optionally_enclosed_by = hint_quotechar
    elif hint_quoting is None:
        pass
    else:
        _assert_never(hint_quotechar)
    quiet_remove(unhandled_hints, 'quoting')
    quiet_remove(unhandled_hints, 'quotechar')

    # If the field begins with the ENCLOSED BY character, instances of
    # that character are recognized as terminating a field value only
    # if followed by the field or line TERMINATED BY sequence. To
    # avoid ambiguity, occurrences of the ENCLOSED BY character within
    # a field value can be doubled and are interpreted as a single
    # instance of the character. For example, if ENCLOSED BY '"' is
    # specified, quotation marks are handled as shown here:
    hint_doublequote: HintDoublequote = hints.doublequote
    if hint_quoting is not None:
        # We need to ignore flake8's "is vs ==" check because 'is'
        # doesn't work currently with MyPy's Literal[] case checking
        if hint_doublequote == True:  # noqa: E712
            pass
        elif hint_doublequote == False:  # noqa: E712
            cant_handle_hint(fail_if_cant_handle_hint, 'doublequote', records_format.hints)
        else:
            _assert_never(hint_doublequote)
    quiet_remove(unhandled_hints, 'doublequote')

    # FIELDS ESCAPED BY controls how to read or write special characters:
    #
    # * For input, if the FIELDS ESCAPED BY character is not empty,
    #   occurrences of that character are stripped and the following
    #   character is taken literally as part of a field value. Some
    #   two-character sequences that are exceptions, where the first
    #   character is the escape character.
    #
    # [...]
    #
    # If the FIELDS ESCAPED BY character is empty, escape-sequence
    # interpretation does not occur.
    hint_escape: HintEscape = hints.escape
    if hint_escape is None:
        mysql_fields_escaped_by = None
    elif hint_escape == '\\':
        mysql_fields_escaped_by = '\\'
    else:
        _assert_never(hint_quoting)
    quiet_remove(unhandled_hints, 'escape')

    mysql_lines_starting_by = ''

    hint_record_terminator: HintRecordTerminator = hints.record_terminator
    mysql_lines_terminated_by = hint_record_terminator
    quiet_remove(unhandled_hints, 'record-terminator')

    hint_header_row: HintHeaderRow = hints.header_row
    # We need to ignore flake8's "is vs ==" check because 'is'
    # doesn't work currently with MyPy's Literal[] case checking
    if hint_header_row == True:  # noqa: E712
        mysql_ignore_n_lines = 1
    elif hint_header_row == False:  # noqa: E712
        mysql_ignore_n_lines = 0
    else:
        _assert_never(hint_header_row)
    quiet_remove(unhandled_hints, 'header-row')

    hint_compression: HintCompression = hints.compression
    if hint_compression is not None:
        cant_handle_hint(fail_if_cant_handle_hint, 'compression', records_format.hints)
    quiet_remove(unhandled_hints, 'compression')

    #
    # Date/time parsing in MySQL seems to be permissive enough to
    # accept all formats we've sent at it in integration tests.  That
    # said, DD/MM support is unlikely to work if the server isn't set
    # to a compatible locale.  This is true for a number of the
    # database drivers; the backlog item to address is here:
    #
    # https://github.com/bluelabsio/records-mover/issues/75
    #
    # To address, we'll want to look into "set trade_date" per
    # https://stackoverflow.com/questions/44171283/load-data-local-infile-with-sqlalchemy-and-pymysql
    #
    quiet_remove(unhandled_hints, 'dateformat')
    quiet_remove(unhandled_hints, 'timeonlyformat')
    quiet_remove(unhandled_hints, 'datetimeformat')
    quiet_remove(unhandled_hints, 'datetimeformattz')

    return MySqlLoadOptions(character_set=mysql_character_set,
                            fields_terminated_by=mysql_fields_terminator,
                            fields_enclosed_by=mysql_fields_enclosed_by,
                            fields_optionally_enclosed_by=mysql_fields_optionally_enclosed_by,
                            fields_escaped_by=mysql_fields_escaped_by,
                            lines_starting_by=mysql_lines_starting_by,
                            lines_terminated_by=mysql_lines_terminated_by,
                            ignore_n_lines=mysql_ignore_n_lines)
Example #6
0
def postgres_copy_options_csv(unhandled_hints: Set[str],
                              hints: ValidatedRecordsHints,
                              fail_if_cant_handle_hint: bool,
                              mode: CopyOptionsModeType) ->\
        PostgresCopyOptions:
    postgres_options: PostgresCopyOptions = {}
    # FORMAT
    #
    #  Selects the data format to be read or written: text, csv (Comma
    #  Separated Values), or binary. The default is text.
    #
    postgres_options['format'] = 'csv'

    # NULL
    #
    #  Specifies the string that represents a null value. The default
    #  is \N (backslash-N) in text format, and an unquoted empty
    #  string in CSV format. You might prefer an empty string even in
    #  text format for cases where you don't want to distinguish nulls
    #  from empty strings. This option is not allowed when using
    #  binary format.
    #
    #  Note: When using COPY FROM, any data item that matches this
    #  string will be stored as a null value, so you should make sure
    #  that you use the same string as you used with COPY TO.
    #

    # QUOTE
    #
    #  Specifies the quoting character to be used when a data value is
    #  quoted. The default is double-quote. This must be a single
    #  one-byte character. This option is allowed only when using CSV
    #  format.
    #

    postgres_options['quote'] = hints.quotechar
    quiet_remove(unhandled_hints, 'quotechar')

    # ESCAPE
    #
    #  Specifies the character that should appear before a data
    #  character that matches the QUOTE value. The default is the same
    #  as the QUOTE value (so that the quoting character is doubled if
    #  it appears in the data). This must be a single one-byte
    #  character. This option is allowed only when using CSV format.
    #

    if not hints.doublequote:
        cant_handle_hint(fail_if_cant_handle_hint, 'doublequote', hints)
    else:
        quiet_remove(unhandled_hints, 'doublequote')

    if hints.escape is not None:
        cant_handle_hint(fail_if_cant_handle_hint, 'escape', hints)
    else:
        quiet_remove(unhandled_hints, 'escape')

    # FORCE_QUOTE
    #
    #  Forces quoting to be used for all non-NULL values in each
    #  specified column. NULL output is never quoted. If * is
    #  specified, non-NULL values will be quoted in all columns. This
    #  option is allowed only in COPY TO, and only when using CSV
    #  format.
    if mode is CopyOptionsMode.LOADING:
        if hints.quoting != 'minimal':
            cant_handle_hint(fail_if_cant_handle_hint, 'quoting', hints)
        else:
            quiet_remove(unhandled_hints, 'quoting')
    elif mode is CopyOptionsMode.UNLOADING:
        # The values in each record are separated by the DELIMITER
        # character. If the value contains the delimiter character,
        # the QUOTE character, the NULL string, a carriage return, or
        # line feed character, then the whole value is prefixed and
        # suffixed by the QUOTE character, and any occurrence within
        # the value of a QUOTE character or the ESCAPE character is
        # preceded by the escape character. You can also use
        # FORCE_QUOTE to force quotes when outputting non-NULL values
        # in specific columns.

        if hints.quoting == 'minimal':
            pass  # default
        elif hints.quoting == 'all':
            postgres_options['force_quote'] = '*'
        else:
            cant_handle_hint(fail_if_cant_handle_hint, 'quoting', hints)
    else:
        _assert_never(mode)
    quiet_remove(unhandled_hints, 'quoting')

    # As of the 9.2 release (documentation as of 2019-03-12), there's
    # no statement in the docs on what newline formats are accepted in
    # "CSV" mode:
    #
    # https://www.postgresql.org/docs/9.2/sql-copy.html#AEN67247
    #
    # So let's test and find out!
    #
    # This test file is in UNIX newline mode:
    #
    # $ file tests/integration/resources/delimited-bigquery-with-header.csv
    # tests/integration/resources/delimited-bigquery-with-header.csv: ASCII text
    # $
    # It loads fine with:
    # $ mvrec file2table --source.variant bigquery
    # --source.no_compression tests/integration/resources/delimited-bigquery-with-header.csv
    # --target.existing_table drop_and_recreate dockerized-postgres public bigqueryformat
    # $ unix2mac -n tests/integration/resources/delimited-bigquery-with-header.csv
    # tests/integration/resources/delimited-bigquery-with-header-mac.csv
    # $ mvrec file2table --source.variant bigquery --source.no_compression
    # tests/integration/resources/delimited-bigquery-with-header-mac.csv
    # dockerized-postgres public bigqueryformat # loads fine
    # $ unix2dos -n tests/integration/resources/delimited-bigquery-with-header.csv
    # tests/integration/resources/delimited-bigquery-with-header-dos.csv
    # $ mvrec file2table --source.variant bigquery --source.no_compression
    # tests/integration/resources/delimited-bigquery-with-header-dos.csv
    # --target.existing_table drop_and_recreate
    # dockerized-postgres public bigqueryformat # loads fine

    if mode is CopyOptionsMode.LOADING:
        if hints.record_terminator in ("\n", "\r\n", "\r", None):
            quiet_remove(unhandled_hints, 'record-terminator')
        else:
            cant_handle_hint(fail_if_cant_handle_hint, 'records-terminator', hints)
    elif mode is CopyOptionsMode.UNLOADING:
        # No control for this is given - exports appear with unix
        # newlines.
        if hints.record_terminator == "\n":
            quiet_remove(unhandled_hints, 'record-terminator')
        else:
            cant_handle_hint(fail_if_cant_handle_hint, 'records-terminator', hints)
    else:
        _assert_never(mode)

    if hints.compression is not None:
        cant_handle_hint(fail_if_cant_handle_hint, 'compression', hints)
    else:
        quiet_remove(unhandled_hints, 'compression')

    return postgres_copy_options_common(unhandled_hints,
                                        hints,
                                        fail_if_cant_handle_hint,
                                        postgres_options)
def determine_input_date_order_style(unhandled_hints: Set[str],
                                     hints: ValidatedRecordsHints,
                                     fail_if_cant_handle_hint: bool) ->\
        Optional[DateOrderStyle]:
    date_order_style: Optional[DateOrderStyle] = None

    def upgrade_date_order_style(style: DateOrderStyle,
                                 hint_name: str) -> None:
        nonlocal date_order_style
        if date_order_style not in (None, style):
            cant_handle_hint(fail_if_cant_handle_hint, hint_name, hints)
        else:
            date_order_style = style
            quiet_remove(unhandled_hints, hint_name)

    # https://www.postgresql.org/docs/9.5/datatype-datetime.html#DATATYPE-DATETIME-INPUT

    # "Date and time input is accepted in almost any reasonable
    # format, including ISO 8601, SQL-compatible, traditional
    # POSTGRES, and others. For some formats, ordering of day, month,
    # and year in date input is ambiguous and there is support for
    # specifying the expected ordering of these fields. Set the
    # DateStyle parameter to MDY to select month-day-year
    # interpretation, DMY to select day-month-year interpretation, or
    # YMD to select year-month-day interpretation."

    # $ ./itest shell
    # $ db dockerized-postgres
    # postgres=# SHOW DateStyle;
    #  DateStyle
    # -----------
    #  ISO, MDY
    # (1 row)
    #
    # postgres=#

    datetimeformattz = hints.datetimeformattz

    # datetimeformattz: Valid values: "YYYY-MM-DD HH:MI:SSOF",
    # "YYYY-MM-DD HH:MI:SS", "YYYY-MM-DD HH24:MI:SSOF", "YYYY-MM-DD
    # HH24:MI:SSOF", "MM/DD/YY HH24:MI". See Redshift docs for more
    # information (note that HH: is equivalent to HH24: and that if
    # you don't provide an offset (OF), times are assumed to be in
    # UTC).

    # Default value is "YYYY-MM-DD HH:MI:SSOF".

    if datetimeformattz in [
            'YYYY-MM-DD HH:MI:SSOF', 'YYYY-MM-DD HH24:MI:SSOF'
    ]:
        #
        #  postgres=# select timestamptz '2020-01-01 23:01:01-10';
        #        timestamptz
        #  ------------------------
        #   2020-01-02 09:01:01+00
        #  (1 row)
        #
        #  postgres=#

        # Any DateStyle will do as this is unambiguous
        quiet_remove(unhandled_hints, 'datetimeformattz')
    elif datetimeformattz == 'YYYY-MM-DD HH:MI:SS':
        #
        #
        #  postgres=# select timestamptz '2020-01-01 23:01:01';
        #        timestamptz
        #  ------------------------
        #   2020-01-01 23:01:01+00
        #  (1 row)
        #
        #  postgres=#

        # Any DateStyle will do as this is unambiguous
        quiet_remove(unhandled_hints, 'datetimeformattz')
    elif datetimeformattz == "MM/DD/YY HH24:MI":
        # "MM/DD/YY HH24:MI"
        #
        #  postgres=# select timestamptz '01/02/2999 23:01';
        #        timestamptz
        #  ------------------------
        #   2999-01-02 23:01:00+00
        #  (1 row)
        #
        #  postgres=#
        upgrade_date_order_style('MDY', 'datetimeformattz')
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformattz', hints)

    # datetimeformat: Valid values: "YYYY-MM-DD HH24:MI:SS",
    # "YYYY-MM-DD HH12:MI AM", "MM/DD/YY HH24:MI". See Redshift docs
    # for more information.

    datetimeformat = hints.datetimeformat

    if datetimeformat in ("YYYY-MM-DD HH24:MI:SS", "YYYY-MM-DD HH:MI:SS"):
        #
        #  postgres=# select timestamp '2020-01-02 15:13:12';
        #        timestamp
        #  ---------------------
        #   2020-01-02 15:13:12
        #  (1 row)
        #
        #  postgres=#

        # Any DateStyle will do as this is unambiguous
        quiet_remove(unhandled_hints, 'datetimeformat')
    elif datetimeformat == "YYYY-MM-DD HH12:MI AM":
        # "YYYY-MM-DD HH12:MI AM"
        #
        #  postgres=# select timestamp '2020-01-02 1:13 PM';
        #        timestamp
        #  ---------------------
        #   2020-01-02 13:13:00
        #  (1 row)
        #
        #  postgres=#

        # Any DateStyle will do as this is unambiguous
        quiet_remove(unhandled_hints, 'datetimeformat')
    elif datetimeformat == "MM/DD/YY HH24:MI":
        #  postgres=# select timestamp '01/02/20 15:23';
        #        timestamp
        #  ---------------------
        #   2020-01-02 15:23:00
        #  (1 row)
        #
        #  postgres=#

        upgrade_date_order_style('MDY', 'datetimeformat')
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformat', hints)

    timeonlyformat = hints.timeonlyformat

    # timeonlyformat: Valid values: "HH12:MI AM" (e.g., "1:00 PM"),
    # "HH24:MI:SS" (e.g., "13:00:00")

    if timeonlyformat == "HH12:MI AM":
        # "HH12:MI AM" (e.g., "1:00 PM"),
        #
        #  postgres=# select time '1:00 PM';
        #     time
        #  ----------
        #   13:00:00
        #  (1 row)
        #
        #  postgres=#

        # Supported!
        quiet_remove(unhandled_hints, 'timeonlyformat')
    elif timeonlyformat == "HH24:MI:SS":

        # "HH24:MI:SS" (e.g., "13:00:00")
        #
        #  postgres=# select time '13:00:00';
        #     time
        #  ----------
        #   13:00:00
        #  (1 row)
        #
        #  postgres=#

        # Supported!
        quiet_remove(unhandled_hints, 'timeonlyformat')
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformat', hints)

    # dateformat: Valid values: null, "YYYY-MM-DD", "MM-DD-YYYY", "DD-MM-YYYY", "MM/DD/YY".
    dateformat = hints.dateformat

    if dateformat == "YYYY-MM-DD":
        #  postgres=# select date '1999-01-02';
        #      date
        #  ------------
        #   1999-01-02
        #  (1 row)
        #
        #  postgres=#
        # Any DateStyle will do as this is unambiguous
        quiet_remove(unhandled_hints, 'dateformat')
    elif dateformat == "MM-DD-YYYY":
        # "MM-DD-YYYY"
        #
        #  postgres=# select date '01-02-1999';
        #      date
        #  ------------
        #   1999-01-02
        #  (1 row)
        #
        #  postgres=#
        upgrade_date_order_style('MDY', 'dateformat')
    elif dateformat == "DD-MM-YYYY":
        # "DD-MM-YYYY" - not supported by default, need to switch to
        # DMY:
        #
        #  postgres=# select date '02-01-1999';
        #      date
        #  ------------
        #   1999-02-01
        #  (1 row)
        #
        #  postgres=#
        upgrade_date_order_style('DMY', 'dateformat')
    elif dateformat == "MM/DD/YY":
        # "MM/DD/YY".
        #
        #  postgres=# select date '01/02/99';
        #      date
        #  ------------
        #   1999-01-02
        #  (1 row)
        #
        #  postgres=#
        upgrade_date_order_style('MDY', 'dateformat')
    elif dateformat is None:
        # null implies that that date format is unknown, and that the
        # implementation SHOULD generate using their default value and
        # parse permissively.

        # ...which is what Postgres does!
        quiet_remove(unhandled_hints, 'dateformat')
    else:
        cant_handle_hint(fail_if_cant_handle_hint, 'dateformat', hints)

    return date_order_style
Example #8
0
def postgres_copy_options_text(unhandled_hints: Set[str],
                               hints: ValidatedRecordsHints,
                               fail_if_cant_handle_hint: bool,
                               mode: CopyOptionsModeType) ->\
        PostgresCopyOptions:
    postgres_options: PostgresCopyOptions = {}

    # FORMAT
    #
    #  Selects the data format to be read or written: text, csv (Comma
    #  Separated Values), or binary. The default is text.
    #
    postgres_options['format'] = 'text'

    # Backslash characters (\) can be used in the COPY data to quote
    # data characters that might otherwise be taken as row or column
    # delimiters. In particular, the following characters must be
    # preceded by a backslash if they appear as part of a column
    # value: backslash itself, newline, carriage return, and the
    # current delimiter character.
    if hints.escape is None:
        cant_handle_hint(fail_if_cant_handle_hint, 'escape', hints)
    else:
        quiet_remove(unhandled_hints, 'escape')

    # NULL
    #
    #  Specifies the string that represents a null value. The default
    #  is \N (backslash-N) in text format, and an unquoted empty
    #  string in CSV format. You might prefer an empty string even in
    #  text format for cases where you don't want to distinguish nulls
    #  from empty strings. This option is not allowed when using
    #  binary format.
    #
    #  Note: When using COPY FROM, any data item that matches this
    #  string will be stored as a null value, so you should make sure
    #  that you use the same string as you used with COPY TO.
    #

    # QUOTE
    #
    #  Specifies the quoting character to be used when a data value is
    #  quoted. The default is double-quote. This must be a single
    #  one-byte character. This option is allowed only when using CSV
    #  format.
    #

    quiet_remove(unhandled_hints, 'quotechar')

    # ESCAPE
    #
    #  Specifies the character that should appear before a data
    #  character that matches the QUOTE value. The default is the same
    #  as the QUOTE value (so that the quoting character is doubled if
    #  it appears in the data). This must be a single one-byte
    #  character. This option is allowed only when using CSV format.
    #

    if hints.doublequote:
        cant_handle_hint(fail_if_cant_handle_hint, 'doublequote', hints)
    else:
        quiet_remove(unhandled_hints, 'doublequote')

    # FORCE_QUOTE
    #
    #  Forces quoting to be used for all non-NULL values in each
    #  specified column. NULL output is never quoted. If * is
    #  specified, non-NULL values will be quoted in all columns. This
    #  option is allowed only in COPY TO, and only when using CSV
    #  format.
    #

    if hints.quoting is not None:
        cant_handle_hint(fail_if_cant_handle_hint, 'quoting', hints)
    else:
        quiet_remove(unhandled_hints, 'quoting')

    # FORCE_NOT_NULL
    #
    #  Do not match the specified columns' values against the null
    #  string. In the default case where the null string is empty,
    #  this means that empty values will be read as zero-length
    #  strings rather than nulls, even when they are not quoted. This
    #  option is allowed only in COPY FROM, and only when using CSV
    #  format.
    #

    # COPY TO will terminate each row with a Unix-style newline
    # ("\n"). Servers running on Microsoft Windows instead output
    # carriage return/newline ("\r\n"), but only for COPY to a server
    # file; for consistency across platforms, COPY TO STDOUT always
    # sends "\n" regardless of server platform. COPY FROM can handle
    # lines ending with newlines, carriage returns, or carriage
    # return/newlines. To reduce the risk of error due to
    # un-backslashed newlines or carriage returns that were meant as
    # data, COPY FROM will complain if the line endings in the input
    # are not all alike.

    if mode is CopyOptionsMode.LOADING:
        if hints.record_terminator in ["\n", "\r", "\r\n"]:
            quiet_remove(unhandled_hints, 'record-terminator')
        else:
            cant_handle_hint(fail_if_cant_handle_hint, 'record-terminator',
                             hints)
    elif mode is CopyOptionsMode.UNLOADING:
        # No control for this is given - exports appear with unix
        # newlines.
        if hints.record_terminator == "\n":
            quiet_remove(unhandled_hints, 'record-terminator')
        else:
            cant_handle_hint(fail_if_cant_handle_hint, 'record-terminator',
                             hints)
    else:
        _assert_never(mode)

    if hints.compression is not None:
        cant_handle_hint(fail_if_cant_handle_hint, 'compression', hints)
    else:
        quiet_remove(unhandled_hints, 'compression')

    return postgres_copy_options_common(unhandled_hints, hints,
                                        fail_if_cant_handle_hint,
                                        postgres_options)