예제 #1
0
 def copy(src):
     if isinstance(src, dict):
         dst = OrderedDict()
         for k in sorted(src):
             dst[k] = copy(src[k])
         return dst
     elif isinstance(src, (
             list,
             tuple,
     )):
         return [copy(v) for v in src]
     elif isinstance(src, set):
         return [copy(v) for v in _sorted_set(src)]
     elif isinstance(src, datetime):
         return [
             src.year, src.month, src.day, src.hour, src.minute, src.second,
             src.microsecond
         ]
     elif isinstance(src, date):
         return [src.year, src.month, src.day]
     elif isinstance(src, time):
         return [
             1970, 1, 1, src.hour, src.minute, src.second, src.microsecond
         ]
     elif isinstance(src, timedelta):
         return src.total_seconds()
     elif PY2 and isinstance(src, bytes):
         return uni(src)
     else:
         assert isinstance(src, (str, unicode, int, float, long,
                                 bool)) or src is None, type(src)
         return src
예제 #2
0
def namefix(d, name):
    ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._-'
    name = ''.join(c if c in ok else '_' for c in uni(name))
    if name == 'default' and options.chaining != 'off':
        name = 'default_'
    while name in d:
        name += '_'
    return name
예제 #3
0
파일: extras.py 프로젝트: eBay/accelerator
	def typefix(e):
		if isinstance(e, dict):
			return dict_type((typefix(k), typefix(v)) for k, v in iteritems(e))
		elif isinstance(e, (list, tuple, set,)):
			return [typefix(v) for v in e]
		elif PY2 and isinstance(e, bytes):
			return uni(e)
		else:
			return e
예제 #4
0
 def _options(self, optionsdict, title='Options'):
     if not optionsdict:
         return
     self.println(title)
     maxlen = max(len(k) for k in optionsdict)
     for k, v in sorted(optionsdict.items()):
         k = uni(k).ljust(maxlen)
         if isinstance(v, (list, tuple)):
             self.println('  %s :' % (k, ))
             for t in v:
                 self.println('  %s   %s' % (
                     ' ' * maxlen,
                     uni(t),
                 ))
         else:
             self.println("  %s : %s " % (
                 k,
                 uni(v),
             ))
예제 #5
0
 def printvec(self, vec, columns):
     spacing = 80 // columns - 6
     for ix, x in enumerate(vec):
         self.write('  %3d %s' % (
             ix,
             uni(x).ljust(spacing),
         ))
         if ix % columns == columns - 1:
             self.write('\n')
     if ix % columns != columns - 1:
         self.write('\n')
예제 #6
0
def char2int(name, empty_value, specials="empty"):
	char = options.get(name)
	if not char:
		return empty_value
	msg = "%s must be a single iso-8859-1 character (or %s)" % (name, specials,)
	if isinstance(char, bytes):
		char = uni(char)
	try:
		char = char.encode("iso-8859-1")
	except UnicodeEncodeError:
		raise Exception(msg)
	assert len(char) == 1, msg
	return cstuff.backend.char2int(char)
예제 #7
0
 def __exit__(self, type, value, tb):
     # We don't care if an exception occured, we still want to save
     # the report.
     # But if saving the report produces an exception we want to
     # ignore that and re-raise the original exception (or raise
     # our own exception if no original exception exists).
     try:
         if tb is None:
             self.line()
         with open('report.txt', 'w', encoding='utf-8') as F:
             F.write(uni(self.s))
         if self.stdout:
             print(self.s)
     except Exception:
         # This logic looks backwards, but it isn't
         if tb is None:
             raise
     finally:
         self._closed = True
def synthesis(job):
    check_good_file(job, "mixed line endings",
                    b"ix,0,1\r\n1,a,a\n2,b,b\r\n3,c,c", {
                        1: b"a",
                        2: b"b",
                        3: b"c"
                    })
    check_good_file(job, "ignored quotes",
                    b"ix,0,1\n1,'a,'a\n2,'b','b'\n3,\"c\",\"c\"\n4,d',d'\n", {
                        1: b"'a",
                        2: b"'b'",
                        3: b'"c"',
                        4: b"d'"
                    })
    check_good_file(job,
                    "ignored quotes and extra fields",
                    b"ix,0,1\n1,\"a,\"a\n2,'b,c',d\n3,d\",d\"\n", {
                        1: b'"a',
                        3: b'd"'
                    },
                    allow_bad=True,
                    d_bad={3: b"2,'b,c',d"})
    check_good_file(
        job,
        "spaces and quotes",
        b"ix,0,1\none,a,a\ntwo, b, b\n three,c,c\n4,\"d\"\"\",d\"\n5, 'e',\" 'e'\"\n",
        {
            b"one": b"a",
            b"two": b" b",
            b" three": b"c",
            4: b'd"',
            5: b" 'e'"
        },
        quotes=True)
    check_good_file(job,
                    "empty fields",
                    b"ix,0,1\n1,,''\n2,,\n3,'',\n4,\"\",", {
                        1: b"",
                        2: b"",
                        3: b"",
                        4: b""
                    },
                    quotes=True)
    check_good_file(job,
                    "renamed fields",
                    b"0,1,2\n0,foo,foo", {0: b"foo"},
                    rename={
                        "0": "ix",
                        "2": "0"
                    })
    check_good_file(job,
                    "discarded field",
                    b"ix,0,no,1\n0,yes,no,yes\n1,a,'foo,bar',a", {
                        0: b"yes",
                        1: b"a"
                    },
                    quotes=True,
                    discard={"no"})
    check_good_file(
        job,
        "bad quotes",
        b"""ix,0,1\n1,a,a\n2,"b,"b\n\n3,'c'c','c'c'\n4,"d",'d'\n""", {
            1: b"a",
            4: b"d"
        },
        quotes=True,
        allow_bad=True,
        d_bad={
            3: b'2,"b,"b',
            4: b"",
            5: b"3,'c'c','c'c'"
        })
    check_good_file(job,
                    "comments",
                    b"""# blah\nix,0,1\n1,a,a\n2,b,b\n#3,c,c\n4,#d,#d\n""", {
                        1: b"a",
                        2: b"b",
                        4: b"#d"
                    },
                    comment="#",
                    d_skipped={
                        1: b"# blah",
                        5: b"#3,c,c"
                    })
    check_good_file(job, "not comments",
                    b"""ix,0,1\n1,a,a\n2,b,b\n#3,c,c\n4,#d,#d\n""", {
                        1: b"a",
                        2: b"b",
                        b"#3": b"c",
                        4: b"#d"
                    })
    check_good_file(
        job,
        "a little of everything",
        b""";not,1,labels\na,2,1\n;a,3,;a\n";b",4,;b\n'c,5,c'\r\n d,6,' d'\ne,7,e,\n,8,""",
        {
            4: b";b",
            6: b" d",
            8: b""
        },
        allow_bad=True,
        rename={
            "a": "0",
            "2": "ix"
        },
        quotes=True,
        comment=";",
        d_bad={
            5: b"'c,5,c'",
            7: b"e,7,e,"
        },
        d_skipped={
            1: b";not,1,labels",
            3: b";a,3,;a"
        })
    check_good_file(job,
                    "skipped lines",
                    b"""just some text\n\nix,0,1\n1,a,a\n2,b,b""", {
                        1: b"a",
                        2: b"b"
                    },
                    skip_lines=2,
                    d_skipped={
                        1: b"just some text",
                        2: b""
                    })
    check_good_file(job,
                    "skipped and bad lines",
                    b"""not data here\nnor here\nix,0,1\n1,a,a\n2,b\n3,c,c""",
                    {
                        1: b"a",
                        3: b"c"
                    },
                    skip_lines=2,
                    allow_bad=True,
                    d_bad={5: b"2,b"},
                    d_skipped={
                        1: b"not data here",
                        2: b"nor here"
                    })
    check_good_file(job,
                    "override labels",
                    b"""a,b,c\n0,foo,foo""", {0: b"foo"},
                    labels=["ix", "0", "1"])
    check_good_file(job, "only labels", b"""ix,0,1""", {})
    check_good_file(job, "empty file", b"", {}, labels=["ix", "0", "1"])
    check_good_file(job,
                    "lineno with bad lines",
                    b"ix,0,1\n2,a,a\n3,b\nc\n5,d,d\n6,e,e\n7\n8,g,g\n\n", {
                        2: b"a",
                        5: b"d",
                        6: b"e",
                        8: b"g"
                    },
                    d_bad={
                        3: b"3,b",
                        4: b"c",
                        7: b"7",
                        9: b""
                    },
                    allow_bad=True,
                    lineno_label="num")
    check_good_file(job,
                    "lineno with skipped lines",
                    b"a\nb\n3,c,c\n4,d,d", {
                        3: b"c",
                        4: b"d"
                    },
                    lineno_label="l",
                    labels=["ix", "0", "1"],
                    labelsonfirstline=False,
                    skip_lines=2,
                    d_skipped={
                        1: b"a",
                        2: b"b"
                    })
    check_good_file(job,
                    "lineno with comment lines",
                    b"ix,0,1\n2,a,a\n3,b,b\n#4,c,c\n5,d,d", {
                        2: b"a",
                        3: b"b",
                        5: b"d"
                    },
                    lineno_label="another name",
                    comment="#",
                    d_skipped={4: b"#4,c,c"})
    check_good_file(job,
                    "strip labels",
                    b" ix , 0 , 1 \n1,a,a\n2,b ,b ", {
                        1: b"a",
                        2: b"b "
                    },
                    strip_labels=True)
    check_good_file(job,
                    "allow extra empty",
                    b"ix,0,1,,,,\n1,a,a\n2,b,b,,\n3,,,", {
                        1: b"a",
                        2: b"b",
                        3: b""
                    },
                    allow_extra_empty=True)
    check_good_file(job,
                    "allow extra empty quoted",
                    b"ix,_0_,1,,,__,\n1,a,a\n_2_,b,b,__,\n3,c,c,__", {
                        1: b"a",
                        2: b"b",
                        3: b"c"
                    },
                    allow_extra_empty=True,
                    quotes='_')
    check_good_file(
        job,
        "allow extra empty quoted bad",
        b"ix,0,1,,,'',\"\"\n1,a,a\n'2',b,b,'',\n3,c,c,\"\"\n4,d,d,'\"\n5,'',\"\",'",
        {
            1: b"a",
            2: b"b",
            3: b"c"
        },
        allow_extra_empty=True,
        quotes=True,
        allow_bad=True,
        d_bad={
            5: b"4,d,d,'\"",
            6: b"5,'',\"\",'"
        })
    check_good_file(job,
                    "skip empty lines",
                    b"\nix,0,1\n\n\n1,a,a\n", {1: b"a"},
                    skip_empty_lines=True)
    check_good_file(job,
                    "skip empty lines and comments",
                    b"\r\nix,0,1\n\n\n5,a,a\n#6,b,b\n7,c,c\n#", {
                        5: b"a",
                        7: b"c"
                    },
                    skip_empty_lines=True,
                    comment="#",
                    d_skipped={
                        1: b"",
                        3: b"",
                        4: b"",
                        6: b"#6,b,b",
                        8: b"#"
                    },
                    lineno_label="line")
    check_good_file(job,
                    "skip empty lines and bad",
                    b"\n\nix,0,1\n4,a,a\n \n6,b,b\n\r\n", {
                        4: b"a",
                        6: b"b"
                    },
                    skip_empty_lines=True,
                    comment="#",
                    d_skipped={
                        1: b"",
                        2: b"",
                        7: b""
                    },
                    d_bad={5: b" "},
                    allow_bad=True,
                    lineno_label="line")

    bad_lines = [
        b"bad,bad",
        b",",
        b"bad,",
        b",bad",
        b"',',",
        b"'lo there broken line",
        b"'nope\"",
        b"'bad quotes''",
        b'"bad quote " inside"',
        b'"more ""bad"" quotes """ inside"',
    ]
    good_lines = [
        b"\x00", (b"'good, good'", b"good, good"),
        (b'"also good, yeah!"', b"also good, yeah!"),
        (b"'single quote''s inside'", b"single quote's inside"),
        (b"'single quote at end: '''", b"single quote at end: '"),
        (b'"""double quotes around"""', b'"double quotes around"'),
        (b'"double quote at end: """', b'double quote at end: "'),
        (b'" I\'m special "', b" I'm special "), b"I'm not",
        b" unquoted but with spaces around ", (b"','", b","), b"\x00\xff",
        b"\xff\x00\x08\x00",
        (b"'lot''s of ''quotes'' around here: '''''''' '",
         b"lot's of 'quotes' around here: '''' ")
    ]
    check_array(job, good_lines, "strange values.txt", bad_lines, quotes=True)
    # The lines will be 2 * length + 3 bytes (plus lf)
    long_lines = [
        b"a" * length for length in (64 * 1024 - 2, 999, 999, 1999, 3000,
                                     65000, 8 * 1024 * 1024 - 99)
    ]
    check_array(job, long_lines, "long lines.txt")
    check_bad_file(job, "extra field", b"foo,bar\nwith,extra,field\nok,here\n")
    check_bad_file(job, "missing field", b"foo,bar\nmissing\nok,here\n")
    check_bad_file(job, "no valid lines", b"foo\nc,\n")

    # let's also check some really idiotic combinations
    for combo in permutations([0, 10, 13, 255], 3):
        name = "idiotic.%d.%d.%d" % combo
        sep, newline, comment = (uni(chr(x)) for x in combo)
        data = [
            comment,
            sep.join(["ix", "0", "1"]),
            sep.join(["0", "a", "a"]),
            sep.join([comment + "1", "b", "b"]),
            sep.join(["2", "", ""]),
            comment + sep,
            sep.join(["", "", ""]),
            sep.join(["4", ",", ","]),
            comment,
        ]
        check_good_file(
            job,
            name,
            data=newline.join(data).encode("iso-8859-1"),
            d={
                0: b"a",
                2: b"",
                b"": b"",
                4: b","
            },
            d_skipped={
                k: data[k - 1].encode("iso-8859-1")
                for k in (1, 4, 6, 9)
            },
            separator=sep,
            newline=newline,
            comment=comment,
        )

    check_no_separator(job)
def check_one(job,
              newline,
              sep,
              data,
              want_res=None,
              prefix="",
              quotes=False,
              leave_bad=False):
    sep_c = uni(chr(sep))
    # Can't have separator character in unquoted values
    if not quotes and not leave_bad:
        data = [[el.replace(sep_c, "") for el in line] for line in data]
    if not want_res:
        want_res = [
            tuple(s.encode("ascii") for s in line) for line in data[1:]
        ]
    filename = "%s_csv.%d.%s.txt" % (prefix, sep, "CRLF"
                                     if newline == "\r\n" else ord(newline))
    newline = uni(newline)
    with job.open(filename, "w", encoding="iso-8859-1", temp=True) as fh:
        for line in data:
            if quotes:
                line = [
                    quotes + el.replace(quotes, quotes + quotes) + quotes
                    for el in line
                ]
            fh.write(sep_c.join(line))
            fh.write(newline)
    try:
        jid = subjobs.build("csvimport",
                            options=dict(
                                filename=job.filename(filename),
                                separator=sep_c,
                                quotes=quotes,
                                newline='' if "\n" in newline else newline,
                            ))
    except JobError as e:
        raise CSVImportException(
            "Failed to csvimport for separator %d with newline %r, csvimport error was:\n%s"
            % (sep, newline, e.format_msg()))
    ds = Dataset(jid)
    labels = sorted(ds.columns)
    if labels != data[0]:
        raise WrongLabelsException(
            "csvimport gave wrong labels for separator %d with newline %r: %r (expected %r)"
            % (
                sep,
                newline,
                labels,
                data[0],
            ))
    res = list(ds.iterate(None, data[0]))
    if res != want_res:
        raise WrongDataException(
            "csvimport gave wrong data for separator %d with newline %r: %r (expected %r)"
            % (
                sep,
                newline,
                res,
                want_res,
            ))
예제 #10
0
 def write(self, s):
     assert not self._closed, 'Closed.'
     self.s += uni(s)