Exemple #1
0
def test_coerce_types():
    content = [["happy", "9"], ["day", "8"], ["4", "7"], ["YOU!", "6"]]

    tokens = coerce_types(content)
    assert len(tokens) == 4
    assert tokens[0][0] == "happy"
    assert tokens[0][1] == 9
    assert tokens[2][0] == "4"
Exemple #2
0
def test_coerce_types_with_mix_floats_ints():
    content = [["happy", "9"], ["day", "8.7"], ["4", "7.0"], ["YOU!", "6"]]

    tokens = coerce_types(content)
    assert len(tokens) == 4
    assert tokens[0][0] == "happy"
    assert tokens[0][1] == 9.0
    assert tokens[1][1] == 8.7
    assert tokens[2][1] == 7
Exemple #3
0
def test_coerce_types():
    content = [
        ["happy", "9"],
        ["day", "8"],
        ["4", "7"],
        ["YOU!", "6"]
    ]

    tokens = coerce_types(content)
    assert len(tokens) == 4
    assert tokens[0][0] == "happy"
    assert tokens[0][1] == 9
    assert tokens[2][0] == "4"
Exemple #4
0
def test_coerce_types_with_mix_floats_ints():
    content = [
        ["happy", "9"],
        ["day", "8.7"],
        ["4", "7.0"],
        ["YOU!", "6"]
    ]

    tokens = coerce_types(content)
    assert len(tokens) == 4
    assert tokens[0][0] == "happy"
    assert tokens[0][1] == 9.0
    assert tokens[1][1] == 8.7
    assert tokens[2][1] == 7
def tokens2json(ids, names, field, split, sep, token_docs):
    '''Convert a set of token documents into a
    JSON array of document objects.'''

    docs = []

    names = read_names(names)
    ids = read_names(ids)

    for idx, path in enumerate(token_docs):
        if path == '-':
            tokens_doc = sys.stdin
        else:
            tokens_doc = open(path, 'r')
        if split:
            content = read_csv(tokens_doc, sep)
            content = coerce_types(content)
        else:
            content = read_tokens(tokens_doc)

        # ordered so that these attributes stay at the top
        doc = OrderedDict()

        if idx < len(ids) - 1:
            doc['id'] = ids[idx]
        else:
            doc['id'] = path

        if idx < len(names) - 1:
            doc['name'] = names[idx]
        else:
            doc['name'] = path

        doc[field] = content
        docs.append(doc)
        tokens_doc.close()

    out_content = json.dumps(docs, indent=2)
    output(out_content)