def testAddSame(self): path = os.path.join(self.currdir, "data", "collection", "test.charcol") charcol = CharacterCollection() charcol.read(path) charcol2 = CharacterCollection() charcol2.read(path) charcol3 = charcol.concatenate(charcol2, check_duplicate=True) self.assertEquals(charcol3.get_set_list(), ["一", "三", "二", "四"]) self.assertEquals(len(charcol3.get_characters("一")), 3) self.assertEquals(len(charcol3.get_characters("三")), 2) self.assertEquals(len(charcol3.get_characters("二")), 1) self.assertEquals(len(charcol3.get_characters("四")), 0)
def setUp(self): self.currdir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(self.currdir, "data", "collection", "test.charcol") self.cc = CharacterCollection() self.cc.read(path) f = os.path.join(self.currdir, "data", "character.xml") self.c = Character() self.c.read(f)
def get_aggregated_charcol(tuples, dbpath=None): """ Create a character collection out of other character collections, character directories, tomoe dictionaries or kuchibue databases. tuples: a list of tuples (TYPE, path list) """ # number of files for each character collection type n_files = [len(t[1]) for t in tuples] # we don't need to merge character collections if only one is provided # this can save a lot of time for large collections if sum(n_files) == 1 and dbpath is None: idx = n_files.index(1) return _get_charcol(tuples[idx][0], tuples[idx][1][0]) if dbpath is not None and dbpath.endswith(".chardb"): if os.path.exists(dbpath): print "%s exists already." % dbpath print "Continuing will modify it..." answer = raw_input("Continue anyway? (y/N)") if answer == "y": print "Overwrite to concatenate collections together " + \ "in a new database" print "Don't overwrite to append new characters or " + \ "filter (-i,-e,-m) existing database" answer = raw_input("Overwrite it? (y/N)") if answer == "y": os.unlink(dbpath) else: exit() charcol = CharacterCollection(dbpath) #charcol.WRITE_BACK = False #charcol.AUTO_COMMIT = True else: charcol = CharacterCollection() # in memory db charcols = [_get_charcol(typ, path) \ for typ, paths in tuples for path in paths] charcol.merge(charcols) return charcol
def testAdd(self): path = os.path.join(self.currdir, "data", "collection", "test.charcol") charcol = CharacterCollection() charcol.read(path) path2 = os.path.join(self.currdir, "data", "collection", "test2.charcol") charcol2 = CharacterCollection() charcol2.read(path2) charcol3 = charcol + charcol2 self.assertEquals(charcol3.get_set_list(), ["一", "三", "二", "四", "a", "b", "c", "d"]) self.assertEquals(len(charcol3.get_characters("一")), 3) self.assertEquals(len(charcol3.get_characters("三")), 2) self.assertEquals(len(charcol3.get_characters("二")), 1) self.assertEquals(len(charcol3.get_characters("四")), 0) self.assertEquals(len(charcol3.get_characters("a")), 3) self.assertEquals(len(charcol3.get_characters("b")), 2) self.assertEquals(len(charcol3.get_characters("c")), 1) self.assertEquals(len(charcol3.get_characters("d")), 0)
def _get_charcol(charcol_type, charcol_path): if charcol_type == TYPE_DIRECTORY: # charcol_path is actually a directory here return CharacterCollection.from_character_directory(charcol_path) elif charcol_type in (TYPE_CHARCOL, TYPE_CHARCOL_DB): return CharacterCollection(charcol_path) elif charcol_type == TYPE_TOMOE: return tomoe_dict_to_character_collection(charcol_path) elif charcol_type == TYPE_KUCHIBUE: return kuchibue_to_character_collection(charcol_path)
def get_character_collection(self): charcol = CharacterCollection() assert(len(self._labels) == len(self._characters)) # group characters with the same label into sets sets = {} for i in range(len(self._characters)): utf8 = self._labels[i] self._characters[i].set_utf8(utf8) sets[utf8] = sets.get(utf8, []) + [self._characters[i]] charcol.add_sets(sets.keys()) for set_name, characters in sets.items(): charcol.append_characters(set_name, characters) return charcol
def __init__(self): self._charcol = CharacterCollection()
def testToXML(self): charcol2 = CharacterCollection() charcol2.read_string(self.cc.to_xml()) self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list()) self.assertEquals(self.cc.get_all_characters(), charcol2.get_all_characters())
def testWriteBz2String(self): charcol2 = CharacterCollection() charcol2.read_string(self.cc.write_string(bz2=True), bz2=True) self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list()) self.assertEquals(self.cc.get_all_characters(), charcol2.get_all_characters())