def group_best(): best_1a = ['aga', 'apa', 'apb', 'app', 'art', 'ced', 'gmd', 'ibm', 'ipm', 'pri', 'syn', 'uqb', 'val'] best_1b = ['aga', 'apa', 'apb', 'app', 'art', 'ced', 'ibm', 'kai', 'lou', 'pri', 'syn', 'tos', 'upb', 'val', 'cea', 'ceb'] best_1c = ['aga', 'apa', 'apb', 'app', 'art', 'cea', 'ceb', 'ced', 'gmd', 'ibm', 'imp', 'kai', 'lav', 'lou', 'mot', 'pri', 'sie', 'syn', 'tos', 'val'] best_dbs = best_1c charcols = [] for db_name in best_dbs: db_file = 'unipen_db/' + db_name + '.chardb' charcol = CharacterCollection(db_file) # print db_name, charcol.get_total_n_characters() charcols.append(charcol) charcol_best = CharacterCollection() charcol_best.merge(charcols) print charcol_best.get_total_n_characters() charcol_best.save("unipen_db/best_1c.chardb") # unipen_to_sqlite() # group_unipen_db() # group_best()
def get_aggregated_charcol(tuples, dbpath=None): """ Create a character collection out of other character collections, character directories, tomoe dictionaries or kuchibue databases. tuples: a list of tuples (TYPE, path list) """ # number of files for each character collection type n_files = [len(t[1]) for t in tuples] # we don't need to merge character collections if only one is provided # this can save a lot of time for large collections if sum(n_files) == 1 and dbpath is None: idx = n_files.index(1) return _get_charcol(tuples[idx][0], tuples[idx][1][0]) if dbpath is not None and dbpath.endswith(".chardb"): if os.path.exists(dbpath): print "%s exists already." % dbpath print "Continuing will modify it..." answer = raw_input("Continue anyway? (y/N)") if answer == "y": print "Overwrite to concatenate collections together " + \ "in a new database" print "Don't overwrite to append new characters or " + \ "filter (-i,-e,-m) existing database" answer = raw_input("Overwrite it? (y/N)") if answer == "y": os.unlink(dbpath) else: exit() charcol = CharacterCollection(dbpath) #charcol.WRITE_BACK = False #charcol.AUTO_COMMIT = True else: charcol = CharacterCollection() # in memory db charcols = [_get_charcol(typ, path) \ for typ, paths in tuples for path in paths] charcol.merge(charcols) return charcol