def test_basic_content(self): content = StringIO("<en-note><div>Hi there</div></en-note>") expected_keys = ("CONTENT-TOKEN-hi", "CONTENT-TOKEN-there") expected = dict.fromkeys(expected_keys, 1) featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, expected)
def test_entities(self): content = StringIO('<en-note><div>test ></div>' '<div>test test2</div></en-note>') expected_keys = ("CONTENT-TOKEN-test", "CONTENT-TOKEN-test2", "CONTENT-TOKEN->") expected = dict.fromkeys(expected_keys, 1) featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, expected)
def test_complex_content(self): content = StringIO("<en-note><div>Hi <b>there</b> friend</div>" "<div> Hi</div>" "</en-note>") expected_keys = ("CONTENT-TOKEN-hi", "CONTENT-TOKEN-there", "CONTENT-TOKEN-friend") expected = dict.fromkeys(expected_keys, 1) featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, expected)
def test_unicode(self): string = ('<?xml version="1.0" encoding="UTF-8"?>' '<!DOCTYPE en-note SYSTEM ' '"http://xml.evernote.com/pub/enml.dtd">' u'<en-note><div>hi abcdé</div></en-note>') content = StringIO(string.encode("utf-8")) expected_keys = ("CONTENT-TOKEN-hi", u"CONTENT-TOKEN-abcdé") expected = dict.fromkeys(expected_keys, 1) featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, expected)
def note_featuredict(note, content): """Generate a featuredict. Args: note: Note object. content: File-like object containing the note content. Returns: A dictionary where keys are feature names and values are feature values. """ featuredict = {"DEFAULT": 1} features.add_metadata_features(featuredict, note) if note.attributes.contentClass is None: features.add_content_features(featuredict, content) return featuredict
def test_full_content(self): content = StringIO('<en-note><div>test</div> ' '<en-media type="image/jpeg"/>' '<en-todo/>' '<a href="http://test1/path">link1</a> ' '<a href="http://test2/path">link2</a> ' '<a>link3</a>' '<en-media type="image/png"/>' '</en-note>') expected_keys = ("CONTENT-TOKEN-test", "CONTENT-TOKEN-link1", "CONTENT-TOKEN-link2", "CONTENT-TOKEN-link3", "CONTENT-LINK-test1", "CONTENT-LINK-test2", "CONTENT-TODO", "CONTENT-MEDIA-image/jpeg", "CONTENT-MEDIA-image/png", "CONTENT-HASLINK") expected = dict.fromkeys(expected_keys, 1) featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, expected)
def test_empty_content(self): content = StringIO("<en-note><div> <b> </b> </div>" "</en-note>") featuredict = {} features.add_content_features(featuredict, content) self.assertEqual(featuredict, {})