def _to_doc(docfields): doc = {} for key in docfields: value = docfields[key] key = util.decode(key) if isinstance(value, list): value = list(map(util.decode, value)) else: value = util.decode(value) if key == "tag" and not isinstance(value, list): value = [value] doc[key] = value return doc
def test_index_pprint_default(capsys, index_one): index_one.pprint() docid = list(index_one)[0]["id"] expected = "id: " + util.decode(docid) + "\n" out, err = capsys.readouterr() assert not err assert out.startswith(expected)
def conf(): try: c = configparser.SafeConfigParser() except AttributeError: c = configparser.ConfigParser() c.add_section("main") c.set("main", "data", util.decode("/app/data")) return c
def _add(writer, **kwargs): assert "text" not in kwargs _no_update = kwargs.pop("_no_update", False) if not kwargs.get("id"): kwargs["id"] = schema.identifier(kwargs) text = [] if "tag" in kwargs: tags = kwargs["tag"] if not isinstance(tags, (list, tuple)): tags = tags.split(",") tags = list(filter(None, [t.strip() for t in tags])) text.extend(tags * 4) # include in text, and weight high kwargs["tag"] = tags if "content" in kwargs: text.append(kwargs["content"].strip()) kwargs["text"] = " ".join(text) if not _no_update: kwargs["updated"] = datetime.now(pytz.utc) kwargs = dict((k, util.decode(util.strip(kwargs[k]))) for k in kwargs) writer.update_document(**kwargs)
def test_decode_iso88591(): bs = "café".encode("iso-8859-1") with pytest.raises(UnicodeDecodeError): bs.decode("utf8") assert util.decode(bs) == "café"
def test_decode_utf8(): bs = "चतरस".encode("utf8") assert isinstance(bs, six.binary_type) s = util.decode(bs) assert isinstance(s, six.text_type) assert s == "चतरस"
def test_decode_ascii(): bs = "asdf".encode("ascii") assert isinstance(bs, six.binary_type) s = util.decode(bs) assert isinstance(s, six.text_type) assert s == "asdf"
def test_decode_nonstr(): assert util.decode(0) == 0