def fieldUnique(self, name): (ord, conf) = self._fmap[name] if not conf['uniq']: return True val = self[name] if not val: return True csum = fieldChecksum(val) if self.id: lim = "and fid != :fid" else: lim = "" fids = self.deck.db.list( "select fid from fsums where csum = ? and fid != ? and mid = ?", csum, self.id or 0, self.mid) if not fids: return True # grab facts with the same checksums, and see if they're actually # duplicates for flds in self.deck.db.list("select flds from facts where id in "+ ids2str(fids)): fields = splitFields(flds) if fields[ord] == val: return False return True
def flush(self, mod=None): "If fields or tags have changed, write changes to disk." assert self.scm == self.col.scm self._preFlush() sfld = stripHTMLMedia(self.fields[self.col.models.sortIdx(self._model)]) tags = self.stringTags() fields = self.joinedFields() if not mod and self.col.db.scalar( "select 1 from notes where id = ? and tags = ? and flds = ?", self.id, tags, fields ): return csum = fieldChecksum(self.fields[0]) self.mod = mod if mod else intTime() self.usn = self.col.usn() res = self.col.db.execute( """ insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""", self.id, self.guid, self.mid, self.mod, self.usn, tags, fields, sfld, csum, self.flags, self.data, ) self.col.tags.register(self.tags) self._postFlush()
def flush(self, mod=None): assert self.scm == self.col.scm self._preFlush() self.mod = mod if mod else intTime() self.usn = self.col.usn() sfld = stripHTML(self.fields[self.col.models.sortIdx(self._model)]) tags = self.stringTags() csum = fieldChecksum(self.fields[0]) res = self.col.db.execute( """ insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""", self.id, self.guid, self.mid, self.mod, self.usn, tags, self.joinedFields(), sfld, csum, self.flags, self.data, ) self.col.tags.register(self.tags) self._postFlush()
def scmhash(self, m): "Return a hash of the schema, to see if models are compatible." s = "" for f in m['flds']: s += f['name'] for t in m['tmpls']: s += t['name'] s += t['qfmt'] s += t['afmt'] return fieldChecksum(s)
def updateFieldChecksums(self): self.deck.db.execute("delete from fsums where fid = ?", self.id) d = [] for (ord, conf) in self._fmap.values(): if not conf['uniq']: continue val = self.fields[ord] if not val: continue d.append((self.id, self.mid, fieldChecksum(val))) self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
def isDupe(self, data): "Takes field, model and returns True if the field is a dupe and False otherwise." # find any matching csums and compare csum = fieldChecksum(data["field"]) mid = mw.col.models.byName(data["model"])["id"] for flds in mw.col.db.list( "select flds from notes where csum = ? and id != ? and mid = ?", csum, 0, mid): if splitFields(flds)[0] == data["field"]: return True return False
def updateFieldCache(self, nids): "Update field checksums and sort cache, after find&replace, etc." snids = ids2str(nids) r = [] for (nid, mid, flds) in self._fieldData(snids): fields = splitFields(flds) model = self.models.get(mid) r.append((stripHTML(fields[self.models.sortIdx(model)]), fieldChecksum(fields[0]), nid)) # apply, relying on calling code to bump usn+mod self.db.executemany("update notes set sfld=?, csum=? where id=?", r)
def dupeOrEmpty(self): "1 if first is empty; 2 if first is a duplicate, False otherwise." val = self.fields[0] if not val.strip(): return 1 csum = fieldChecksum(val) # find any matching csums and compare for flds in self.col.db.list( "select flds from notes where csum = ? and id != ? and mid = ?", csum, self.id or 0, self.mid): if splitFields(flds)[0] == self.fields[0]: return 2 return False
def dupeOrEmpty(self): "1 if first is empty; 2 if first is a duplicate, False otherwise." val = self.fields[0] if not val.strip(): return 1 csum = fieldChecksum(val) # find any matching csums and compare for flds in self.col.db.list( "select flds from notes where csum = ? and id != ? and mid = ?", csum, self.id or 0, self.mid): if stripHTMLMedia(splitFields(flds)[0]) == stripHTMLMedia( self.fields[0]): return 2 return False
def _findDupes(self, val): # caller must call stripHTMLMedia on passed val try: mid, val = val.split(",", 1) except OSError: return csum = fieldChecksum(val) nids = [] for nid, flds in self.col.db.execute( "select id, flds from notes where mid=? and csum=?", mid, csum): if stripHTMLMedia(splitFields(flds)[0]) == val: nids.append(nid) return "n.id in %s" % ids2str(nids)
def updateFieldCache(self, nids): "Update field checksums and sort cache, after find&replace, etc." snids = ids2str(nids) r = [] for (nid, mid, flds) in self._fieldData(snids): fields = splitFields(flds) model = self.models.get(mid) if not model: # note points to invalid model continue r.append((stripHTML(fields[self.models.sortIdx(model)]), fieldChecksum(fields[0]), nid)) # apply, relying on calling code to bump usn+mod self.db.executemany("update notes set sfld=?, csum=? where id=?", r)
def flush(self, mod=None): assert self.scm == self.col.scm self._preFlush() self.mod = mod if mod else intTime() self.usn = self.col.usn() sfld = stripHTML(self.fields[self.col.models.sortIdx(self._model)]) tags = self.stringTags() csum = fieldChecksum(self.fields[0]) res = self.col.db.execute( """ insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""", self.id, self.guid, self.mid, self.mod, self.usn, tags, self.joinedFields(), sfld, csum, self.flags, self.data) self.col.tags.register(self.tags) self._postFlush()
def updateFieldCache(self, fids, csum=True): "Update field checksums and sort cache, after find&replace, etc." sfids = ids2str(fids) mods = self.models() r = [] r2 = [] for (fid, mid, flds) in self._fieldData(sfids): fields = splitFields(flds) model = mods[mid] if csum: for f in model.fields: if f['uniq'] and fields[f['ord']]: r.append((fid, mid, fieldChecksum(fields[f['ord']]))) r2.append((stripHTML(fields[model.sortIdx()]), fid)) if csum: self.db.execute("delete from fsums where fid in "+sfids) self.db.executemany("insert into fsums values (?,?,?)", r) self.db.executemany("update facts set sfld = ? where id = ?", r2)
def flush(self, mod=None): "If fields or tags have changed, write changes to disk." assert self.scm == self.col.scm self._preFlush() sfld = stripHTMLMedia(self.fields[self.col.models.sortIdx( self._model)]) tags = self.stringTags() fields = self.joinedFields() if not mod and self.col.db.scalar( "select 1 from notes where id = ? and tags = ? and flds = ?", self.id, tags, fields): return csum = fieldChecksum(self.fields[0]) self.mod = mod if mod else intTime() self.usn = self.col.usn() res = self.col.db.execute( """ insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""", self.id, self.guid, self.mid, self.mod, self.usn, tags, fields, sfld, csum, self.flags, self.data) self.col.tags.register(self.tags) self._postFlush()
def dupeOrEmptyWithOrds(self): """ Returns a tuple. The contents of each element are as follows: 1) 1 if first is empty; 2 if first is a duplicate, False otherwise. 2) For a duplicate (2), this returns the list of ordinals that make up the key. Otherwise this is None. """ val = self.fields[0] if not val.strip(): return 1, None csum = fieldChecksum(val) # find any matching csums and compare for flds in self.col.db.list( "select flds from notes where csum = ? and id != ? and mid = ?", csum, self.id or 0, self.mid): model = self.model() field_ords = [0] for fld in model["flds"]: if fld["ord"] == 0: continue elif fld["name"].endswith(KEY_SUFFIX): field_ords.append(fld["ord"]) all_fields_equal = True fields_split = splitFields(flds) for field_ord in field_ords: if stripHTMLMedia(fields_split[field_ord]) != stripHTMLMedia( self.fields[field_ord]): all_fields_equal = False if all_fields_equal: return 2, field_ords return False, None
def updateCompleteDeck(self, data): self.startEditing() did = self.decks().id(data["deck"]) self.decks().flush() model_manager = self.collection().models for _, card in data["cards"].items(): self.database().execute( "replace into cards (id, nid, did, ord, type, queue, due, ivl, factor, reps, lapses, left, " "mod, usn, odue, odid, flags, data) " "values (" + "?," * (12 + 6 - 1) + "?)", card["id"], card["nid"], did, card["ord"], card["type"], card["queue"], card["due"], card["ivl"], card["factor"], card["reps"], card["lapses"], card["left"], intTime(), -1, 0, 0, 0, 0 ) note = data["notes"][str(card["nid"])] tags = self.collection().tags.join(self.collection().tags.canonify(note["tags"])) self.database().execute( "replace into notes(id, mid, tags, flds," "guid, mod, usn, flags, data, sfld, csum) values (" + "?," * (4 + 7 - 1) + "?)", note["id"], note["mid"], tags, joinFields(note["fields"]), guid64(), intTime(), -1, 0, 0, "", fieldChecksum(note["fields"][0]) ) model = data["models"][str(note["mid"])] if not model_manager.get(model["id"]): model_o = model_manager.new(model["name"]) for field_name in model["fields"]: field = model_manager.newField(field_name) model_manager.addField(model_o, field) for template_name in model["templateNames"]: template = model_manager.newTemplate(template_name) model_manager.addTemplate(model_o, template) model_o["id"] = model["id"] model_manager.update(model_o) model_manager.flush() self.stopEditing()
def updateCompleteDeck(self, data): self.startEditing() did = self.decks().id(data['deck']) self.decks().flush() model_manager = self.collection().models for _, card in data['cards'].items(): self.database().execute( 'replace into cards (id, nid, did, ord, type, queue, due, ivl, factor, reps, lapses, left, ' 'mod, usn, odue, odid, flags, data) ' 'values (' + '?,' * (12 + 6 - 1) + '?)', card['id'], card['nid'], did, card['ord'], card['type'], card['queue'], card['due'], card['ivl'], card['factor'], card['reps'], card['lapses'], card['left'], intTime(), -1, 0, 0, 0, 0 ) note = data['notes'][str(card['nid'])] tags = self.collection().tags.join(self.collection().tags.canonify(note['tags'])) self.database().execute( 'replace into notes(id, mid, tags, flds,' 'guid, mod, usn, flags, data, sfld, csum) values (' + '?,' * (4 + 7 - 1) + '?)', note['id'], note['mid'], tags, joinFields(note['fields']), guid64(), intTime(), -1, 0, 0, '', fieldChecksum(note['fields'][0]) ) model = data['models'][str(note['mid'])] if not model_manager.get(model['id']): model_o = model_manager.new(model['name']) for field_name in model['fields']: field = model_manager.newField(field_name) model_manager.addField(model_o, field) for template_name in model['templateNames']: template = model_manager.newTemplate(template_name) model_manager.addTemplate(model_o, template) model_o['id'] = model['id'] model_manager.update(model_o) model_manager.flush() self.stopEditing()
def scmhash(self, m): "Return a hash of the schema, to see if models are compatible." s = "" for f in m['flds']: s += f['name'] return fieldChecksum(s)
def updateNotes(allDb): t_0, now, db = time.time(), intTime(), mw.col.db TAG = mw.col.tags # type: TagManager ds, nid2mmi = [], {} mw.progress.start(label='Updating data', immediate=True) fidDb = allDb.fidDb(recalc=True) loc_db = allDb.locDb(recalc=False) # type: Dict[Location, Set[Morpheme]] # read tag names compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = cfg( 'Tag_Comprehension'), cfg('Tag_Vocab'), cfg('Tag_Fresh'), cfg( 'Tag_NotReady'), cfg('Tag_AlreadyKnown'), cfg('Tag_Priority'), cfg( 'Tag_TooShort'), cfg('Tag_TooLong'), cfg('Tag_Frequency') TAG.register(tagNames) badLengthTag = cfg('Tag_BadLength') # handle secondary databases mw.progress.update(label='Creating seen/known/mature from all.db') seenDb = filterDbByMat(allDb, cfg('threshold_seen')) knownDb = filterDbByMat(allDb, cfg('threshold_known')) matureDb = filterDbByMat(allDb, cfg('threshold_mature')) mw.progress.update(label='Loading priority.db') priorityDb = MorphDb(cfg('path_priority'), ignoreErrors=True).db mw.progress.update(label='Loading frequency.txt') frequencyListPath = cfg('path_frequency') frequency_map = {} frequency_has_morphemes = False try: with io.open(frequencyListPath, encoding='utf-8-sig') as csvfile: csvreader = csv.reader(csvfile, delimiter="\t") rows = [row for row in csvreader] if rows[0][0] == "#study_plan_frequency": frequency_has_morphemes = True frequency_map = dict( zip([ Morpheme(row[0], row[1], row[2], row[3], row[4], row[5]) for row in rows[1:] ], itertools.count(0))) else: frequency_map = dict( zip([row[0] for row in rows], itertools.count(0))) except FileNotFoundError: pass frequencyListLength = len(frequency_map) # prefetch cfg for fields field_focus_morph = cfg('Field_FocusMorph') field_unknown_count = cfg('Field_UnknownMorphCount') field_unmature_count = cfg('Field_UnmatureMorphCount') field_morph_man_index = cfg('Field_MorphManIndex') field_unknowns = cfg('Field_Unknowns') field_unmatures = cfg('Field_Unmatures') field_unknown_freq = cfg('Field_UnknownFreq') field_focus_morph_pos = cfg("Field_FocusMorphPos") skip_comprehension_cards = cfg('Option_SkipComprehensionCards') skip_fresh_cards = cfg('Option_SkipFreshVocabCards') # Find all morphs that changed maturity and the notes that refer to them. last_maturities = allDb.meta.get('last_maturities', {}) new_maturities = {} refresh_notes = set() # Recompute everything if preferences changed. last_preferences = allDb.meta.get('last_preferences', {}) if not last_preferences == get_preferences(): print("Preferences changed. Updating all notes...") last_updated = 0 else: last_updated = allDb.meta.get('last_updated', 0) # Todo: Remove this forced 0 once we add checks for other changes like new frequency.txt files. last_updated = 0 # If we're updating everything anyway, clear the notes set. if last_updated > 0: for m, locs in allDb.db.items(): maturity_bits = 0 if seenDb.matches(m): maturity_bits |= 1 if knownDb.matches(m): maturity_bits |= 2 if matureDb.matches(m): maturity_bits |= 4 new_maturities[m] = maturity_bits if last_maturities.get(m, -1) != maturity_bits: for loc in locs: if isinstance(loc, AnkiDeck): refresh_notes.add(loc.noteId) included_types, include_all = getModifyEnabledModels() included_mids = [ m['id'] for m in mw.col.models.all() if include_all or m['name'] in included_types ] query = ''' select id, mid, flds, guid, tags from notes WHERE mid IN ({0}) and ( mod > {2} or id in ({1}) ) '''.format(','.join([str(m) for m in included_mids]), ','.join([str(id) for id in refresh_notes]), last_updated) query_results = db.execute(query) N_notes = len(query_results) mw.progress.finish() mw.progress.start(label='Updating notes', max=N_notes, immediate=True) for i, (nid, mid, flds, guid, tags) in enumerate(query_results): ts = TAG.split(tags) if i % 500 == 0: mw.progress.update(value=i) C = partial(cfg, model_id=mid) notecfg = getFilterByMidAndTags(mid, ts) if notecfg is None or not notecfg['Modify']: continue # Get all morphemes for note morphemes = set() for fieldName in notecfg['Fields']: try: loc = fidDb[(nid, guid, fieldName)] morphemes.update(loc_db[loc]) except KeyError: continue proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, new_knowns = set(), set(), set(), set() for morpheme in morphemes: if proper_nouns_known and morpheme.isProperNoun(): continue if not seenDb.matches(morpheme): unseens.add(morpheme) if not knownDb.matches(morpheme): unknowns.add(morpheme) if not matureDb.matches(morpheme): unmatures.add(morpheme) if knownDb.matches(morpheme): new_knowns.add(morpheme) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len( unmatures) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # add bonus for morphs in priority.db and frequency.txt frequencyBonus = C('frequency.txt bonus') isPriority = False isFrequency = False focusMorph = None F_k = 0 usefulness = 0 for focusMorph in unknowns: F_k += allDb.frequency(focusMorph) if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') if frequency_has_morphemes: focusMorphIndex = frequency_map.get(focusMorph, -1) else: focusMorphIndex = frequency_map.get(focusMorph.base, -1) if focusMorphIndex >= 0: isFrequency = True # The bigger this number, the lower mmi becomes usefulness += int( round(frequencyBonus * (1 - focusMorphIndex / frequencyListLength))) # average frequency of unknowns (ie. how common the word is within your collection) F_k_avg = F_k // N_k if N_k > 0 else F_k usefulness += F_k_avg # add bonus for studying recent learned knowns (reinforce) for morpheme in new_knowns: locs = knownDb.getMatchingLocs(morpheme) if locs: ivl = min(1, max(loc.maturity for loc in locs)) # TODO: maybe average this so it doesnt favor long sentences usefulness += C('reinforce new vocab weight') // ivl if any(morpheme.pos == '動詞' for morpheme in unknowns): # FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 99999 - min(99999, usefulness) # difference from optimal length range (too little context vs long sentence) lenDiffRaw = min(N - C('min good sentence length'), max(0, N - C('max good sentence length'))) lenDiff = min(9, abs(lenDiffRaw)) # Fill in various fields/tags on the note based on cfg fs = splitFields(flds) # clear any 'special' tags, the appropriate will be set in the next few lines ts = [ t for t in ts if t not in (notReadyTag, compTag, vocabTag, freshTag) ] # determine card type if N_m == 0: # sentence comprehension card, m+0 ts.append(compTag) if skip_comprehension_cards: usefulness += 1000000 # Add a penalty to put these cards at the end of the queue elif N_k == 1: # new vocab card, k+1 ts.append(vocabTag) setField(mid, fs, field_focus_morph, focusMorph.base) setField(mid, fs, field_focus_morph_pos, focusMorph.pos) elif N_k > 1: # M+1+ and K+2+ ts.append(notReadyTag) elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts.append(freshTag) if skip_fresh_cards: usefulness += 1000000 # Add a penalty to put these cards at the end of the queue focusMorph = next(iter(unmatures)) setField(mid, fs, field_focus_morph, focusMorph.base) setField(mid, fs, field_focus_morph_pos, focusMorph.pos) else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts.append(freshTag) if skip_fresh_cards: usefulness += 1000000 # Add a penalty to put these cards at the end of the queue # calculate mmi mmi = 100000 * N_k + 1000 * lenDiff + int(round(usefulness)) if C('set due based on mmi'): nid2mmi[nid] = mmi # set type agnostic fields setField(mid, fs, field_unknown_count, '%d' % N_k) setField(mid, fs, field_unmature_count, '%d' % N_m) setField(mid, fs, field_morph_man_index, '%d' % mmi) setField(mid, fs, field_unknowns, ', '.join(u.base for u in unknowns)) setField(mid, fs, field_unmatures, ', '.join(u.base for u in unmatures)) setField(mid, fs, field_unknown_freq, '%d' % F_k_avg) # remove deprecated tag if badLengthTag is not None and badLengthTag in ts: ts.remove(badLengthTag) # other tags if priorityTag in ts: ts.remove(priorityTag) if isPriority: ts.append(priorityTag) if frequencyTag in ts: ts.remove(frequencyTag) if isFrequency: ts.append(frequencyTag) if tooShortTag in ts: ts.remove(tooShortTag) if lenDiffRaw < 0: ts.append(tooShortTag) if tooLongTag in ts: ts.remove(tooLongTag) if lenDiffRaw > 0: ts.append(tooLongTag) # remove unnecessary tags if not cfg('Option_SetNotRequiredTags'): unnecessary = [priorityTag, tooShortTag, tooLongTag] ts = [tag for tag in ts if tag not in unnecessary] # update sql db tags_ = TAG.join(TAG.canonify(ts)) flds_ = joinFields(fs) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum(fs[0]) sfld = stripHTML(fs[getSortFieldIndex(mid)]) ds.append((tags_, flds_, sfld, csum, now, mw.col.usn(), nid)) mw.progress.update(label='Updating anki database...') mw.col.db.executemany( 'update notes set tags=?, flds=?, sfld=?, csum=?, mod=?, usn=? where id=?', ds) # Now reorder new cards based on MMI mw.progress.update(label='Updating new card ordering...') ds = [] # "type = 0": new cards # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type] # "type = 2": review cards for (cid, nid, due) in db.execute('select id, nid, due from cards where type = 0'): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[nid] if due != due_: # only update cards that have changed ds.append((due_, now, mw.col.usn(), cid)) mw.col.db.executemany('update cards set due=?, mod=?, usn=? where id=?', ds) mw.reset() allDb.meta['last_preferences'] = get_preferences() allDb.meta['last_maturities'] = new_maturities allDb.meta['last_updated'] = int(time.time() + 0.5) printf('Updated %d notes in %f sec' % (N_notes, time.time() - t_0)) if cfg('saveDbs'): mw.progress.update(label='Saving all/seen/known/mature dbs') allDb.save(cfg('path_all')) seenDb.save(cfg('path_seen')) knownDb.save(cfg('path_known')) matureDb.save(cfg('path_mature')) printf('Updated %d notes + saved dbs in %f sec' % (N_notes, time.time() - t_0)) mw.progress.finish() return knownDb
def updateNotes( allDb ): t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags ds, nid2mmi = [], {} N_notes = db.scalar( 'select count() from notes' ) mw.progress.start( label='Updating data', max=N_notes, immediate=True ) fidDb = allDb.fidDb() locDb = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc # read tag names compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag = tagNames = jcfg('Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg('Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg('Tag_Priority'), jcfg('Tag_TooShort'), jcfg('Tag_TooLong') TAG.register( tagNames ) badLengthTag = jcfg2().get('Tag_BadLength') # handle secondary databases mw.progress.update( label='Creating seen/known/mature from all.db' ) seenDb = filterDbByMat( allDb, cfg1('threshold_seen') ) knownDb = filterDbByMat( allDb, cfg1('threshold_known') ) matureDb = filterDbByMat( allDb, cfg1('threshold_mature') ) mw.progress.update( label='Loading priority.db' ) priorityDb = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db if cfg1('saveDbs'): mw.progress.update( label='Saving seen/known/mature dbs' ) seenDb.save( cfg1('path_seen') ) knownDb.save( cfg1('path_known') ) matureDb.save( cfg1('path_mature') ) mw.progress.update( label='Updating notes' ) for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ): if i % 500 == 0: mw.progress.update( value=i ) C = partial( cfg, mid, None ) note = mw.col.getNote(nid) notecfg = getFilter(note) if notecfg is None or not notecfg['Modify']: continue # Get all morphemes for note morphemes = set() for fieldName in notecfg['Fields']: try: loc = fidDb[ ( nid, guid, fieldName ) ] morphemes.update( locDb[ loc ] ) except KeyError: continue # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set() for morpheme in morphemes: if morpheme not in seenDb.db: unseens.add( morpheme ) if morpheme not in knownDb.db: unknowns.add( morpheme ) if morpheme not in matureDb.db: unmatures.add( morpheme ) if morpheme not in matureDb.db and morpheme in knownDb.db: newKnowns.add( morpheme ) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len( morphemes ), len( unseens ), len( unknowns ), len( unmatures ) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # average frequency of unknowns (ie. how common the word is within your collection) F_k = 0 for focusMorph in unknowns: # focusMorph used outside loop F_k += allDb.frequency(focusMorph) F_k_avg = F_k // N_k if N_k > 0 else F_k usefulness = F_k_avg # add bonus for morphs in priority.db isPriority = False for focusMorph in unknowns: if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') # add bonus for studying recent learned knowns (reinforce) for morpheme in newKnowns: locs = allDb.db[ morpheme ] if locs: ivl = min( 1, max( loc.maturity for loc in locs ) ) usefulness += C('reinforce new vocab weight') // ivl #TODO: maybe average this so it doesnt favor long sentences if any( morpheme.pos == u'動詞' for morpheme in unknowns ): #FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 999 - min( 999, usefulness ) # difference from optimal length range (too little context vs long sentence) lenDiffRaw = min(N - C('min good sentence length'), max(0, N - C('max good sentence length'))) lenDiff = min(9, abs(lenDiffRaw)) # calculate mmi mmi = 10000*N_k + 1000*lenDiff + usefulness if C('set due based on mmi'): nid2mmi[ nid ] = mmi # Fill in various fields/tags on the note based on cfg ts, fs = TAG.split( tags ), splitFields( flds ) # clear any 'special' tags, the appropriate will be set in the next few lines ts = [ t for t in ts if t not in [ notReadyTag, compTag, vocabTag, freshTag ] ] # determine card type if N_m == 0: # sentence comprehension card, m+0 ts = ts + [ compTag ] setField( mid, fs, jcfg('Field_FocusMorph'), u'' ) elif N_k == 1: # new vocab card, k+1 ts = ts + [ vocabTag ] setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % focusMorph.base ) elif N_k > 1: # M+1+ and K+2+ ts = ts + [ notReadyTag ] setField( mid, fs, jcfg('Field_FocusMorph'), u'') elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts = ts + [ freshTag ] setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % list(unmatures)[0].base) else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts = ts + [ freshTag ] setField( mid, fs, jcfg('Field_FocusMorph'), u'') # set type agnostic fields setField( mid, fs, jcfg('Field_UnknownMorphCount'), u'%d' % N_k ) setField( mid, fs, jcfg('Field_UnmatureMorphCount'), u'%d' % N_m ) setField( mid, fs, jcfg('Field_MorphManIndex'), u'%d' % mmi ) setField( mid, fs, jcfg('Field_Unknowns'), u', '.join( u.base for u in unknowns ) ) setField( mid, fs, jcfg('Field_Unmatures'), u', '.join( u.base for u in unmatures ) ) setField( mid, fs, jcfg('Field_UnknownFreq'), u'%d' % F_k_avg ) # remove deprecated tag if badLengthTag is not None and badLengthTag in ts: ts.remove( badLengthTag ) # other tags if priorityTag in ts: ts.remove( priorityTag ) if isPriority: ts.append( priorityTag ) if tooShortTag in ts: ts.remove( tooShortTag ) if lenDiffRaw < 0: ts.append( tooShortTag ) if tooLongTag in ts: ts.remove( tooLongTag ) if lenDiffRaw > 0: ts.append( tooLongTag ) # remove unnecessary tags if not jcfg('Option_SetNotRequiredTags'): unnecessary = [priorityTag, tooShortTag, tooLongTag] ts = [tag for tag in ts if tag not in unnecessary] # update sql db tags_ = TAG.join( TAG.canonify( ts ) ) flds_ = joinFields( fs ) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum( fs[0] ) sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] ) ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } ) mw.progress.update( value=i, label='Updating anki database...' ) mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds ) # Now reorder new cards based on MMI mw.progress.update( value=i, label='Updating new card ordering...' ) ds = [] # "type = 0": new cards # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type] # "type = 2": review cards for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[ nid ] if due != due_: # only update cards that have changed ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } ) mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds ) mw.reset() printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) ) mw.progress.finish() return knownDb
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] updateLog = [] updateLogTxt = _("Update as first field matched: %s") dupeLogTxt = _("Added duplicate with first field: %s") new = [] self._ids = [] self._cards = [] self._emptyNotes = False for n in notes: if not self.allowHTML: for c in range(len(n.fields)): n.fields[c] = cgi.escape(n.fields[c]) fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append(_("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts and self.importMode != 2: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == 0: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append(updateLogTxt % fld0) found = True break elif self.importMode == 2: # allow duplicates in this case updateLog.append(dupeLogTxt % fld0) found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) # make sure to update sflds, etc self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert(0, _( "Empty cards found. Please run Tools>Empty Cards.")) # apply scheduling updates self.updateCards() self.col.sched.maybeRandomizeDeck() part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new) part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount self.log.append("%s, %s." % (part1, part2)) self.log.extend(updateLog) if self._emptyNotes: self.log.append(_("""\ One or more notes were not imported, because they didn't generate any cards. \ This can happen when you have empty fields or when you have not mapped the \ content in the text file to the correct fields.""")) self.total = len(self._ids)
def scmhash(self, m): "Return a hash of the schema, to see if models are compatible." s = "" for f in m["flds"]: s += f["name"] return fieldChecksum(s)
def _getOriginal(note): csum = fieldChecksum(note.fields[0]) original_nid = mw.col.db.first( "select id from notes where csum == {} and id != {} and mid == {}". format(csum, note.id, note.mid))[0] return mw.col.getNote(original_nid)
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] updateLog = [] updateLogTxt = _("First field matched: %s") dupeLogTxt = _("Added duplicate with first field: %s") new = [] self._ids = [] self._cards = [] self._emptyNotes = False dupeCount = 0 dupes = [] for n in notes: for c in range(len(n.fields)): if not self.allowHTML: n.fields[c] = html.escape(n.fields[c], quote=False) n.fields[c] = n.fields[c].strip() if not self.allowHTML: n.fields[c] = n.fields[c].replace("\n", "<br>") n.fields[c] = unicodedata.normalize("NFC", n.fields[c]) n.tags = [unicodedata.normalize("NFC", t) for t in n.tags] fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append( _("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts and self.importMode != 2: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == 0: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append(updateLogTxt % fld0) dupeCount += 1 found = True elif self.importMode == 1: dupeCount += 1 elif self.importMode == 2: # allow duplicates in this case if fld0 not in dupes: # only show message once, no matter how many # duplicates are in the collection already updateLog.append(dupeLogTxt % fld0) dupes.append(fld0) found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) # make sure to update sflds, etc self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert( 0, _("Empty cards found. Please run Tools>Empty Cards.")) # apply scheduling updates self.updateCards() # we randomize or order here, to ensure that siblings # have the same due# did = self.col.decks.selected() conf = self.col.decks.confForDid(did) # in order due? if conf['new']['order'] == NEW_CARDS_RANDOM: self.col.sched.randomizeCards(did) part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new) part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount if self.importMode == 0: unchanged = dupeCount - self.updateCount elif self.importMode == 1: unchanged = dupeCount else: unchanged = 0 part3 = ngettext("%d note unchanged", "%d notes unchanged", unchanged) % unchanged self.log.append("%s, %s, %s." % (part1, part2, part3)) self.log.extend(updateLog) if self._emptyNotes: self.log.append( _("""\ One or more notes were not imported, because they didn't generate any cards. \ This can happen when you have empty fields or when you have not mapped the \ content in the text file to the correct fields.""")) self.total = len(self._ids)
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] updateLog = [] updateLogTxt = _("First field matched: %s") dupeLogTxt = _("Added duplicate with first field: %s") new = [] self._ids = [] self._cards = [] self._emptyNotes = False dupeCount = 0 dupes = [] for n in notes: for c in range(len(n.fields)): if not self.allowHTML: n.fields[c] = cgi.escape(n.fields[c]) n.fields[c] = n.fields[c].strip().replace("\n", "<br>") fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append(_("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts and self.importMode != 2: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == 0: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append(updateLogTxt % fld0) dupeCount += 1 found = True elif self.importMode == 1: dupeCount += 1 elif self.importMode == 2: # allow duplicates in this case if fld0 not in dupes: # only show message once, no matter how many # duplicates are in the collection already updateLog.append(dupeLogTxt % fld0) dupes.append(fld0) found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) # make sure to update sflds, etc self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert(0, _( "Empty cards found. Please run Tools>Empty Cards.")) # apply scheduling updates self.updateCards() # we randomize or order here, to ensure that siblings # have the same due# did = self.col.decks.selected() conf = self.col.decks.confForDid(did) # in order due? if conf['new']['order'] == NEW_CARDS_RANDOM: self.col.sched.randomizeCards(did) else: self.col.sched.orderCards(did) part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new) part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount if self.importMode == 0: unchanged = dupeCount - self.updateCount elif self.importMode == 1: unchanged = dupeCount else: unchanged = 0 part3 = ngettext("%d note unchanged", "%d notes unchanged", unchanged) % unchanged self.log.append("%s, %s, %s." % (part1, part2, part3)) self.log.extend(updateLog) if self._emptyNotes: self.log.append(_("""\ One or more notes were not imported, because they didn't generate any cards. \ This can happen when you have empty fields or when you have not mapped the \ content in the text file to the correct fields.""")) self.total = len(self._ids)
def importNotes(self, notes: list[ForeignNote]) -> None: "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums: dict[str, list[NoteId]] = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model["id"]): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts: dict[str, bool] = {} fld0idx = self.mapping.index(self.model["flds"][0]["name"]) self._fmap = self.col.models.field_map(self.model) self._nextID = NoteId(timestampID(self.col.db, "notes")) # loop through the notes updates: list[Updates] = [] updateLog = [] new = [] self._ids: list[NoteId] = [] self._cards: list[tuple] = [] dupeCount = 0 dupes: list[str] = [] for n in notes: for c, field in enumerate(n.fields): if not self.allowHTML: n.fields[c] = html.escape(field, quote=False) n.fields[c] = field.strip() if not self.allowHTML: n.fields[c] = field.replace("\n", "<br>") fld0 = unicodedata.normalize("NFC", n.fields[fld0idx]) # first field must exist if not fld0: self.log.append( self.col.tr.importing_empty_first_field( val=" ".join(n.fields))) continue csum = fieldChecksum(fld0) # earlier in import? if fld0 in firsts and self.importMode != ADD_MODE: # duplicates in source file; log and ignore self.log.append( self.col.tr.importing_appeared_twice_in_file(val=fld0)) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == UPDATE_MODE: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append( self.col.tr.importing_first_field_matched( val=fld0)) dupeCount += 1 found = True elif self.importMode == IGNORE_MODE: dupeCount += 1 elif self.importMode == ADD_MODE: # allow duplicates in this case if fld0 not in dupes: # only show message once, no matter how many # duplicates are in the collection already updateLog.append( self.col.tr. importing_added_duplicate_with_first_field( val=fld0, )) dupes.append(fld0) found = False # newly add if not found: new_data = self.newData(n) if new_data: new.append(new_data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) # generate cards + update field cache self.col.after_note_updates(self._ids, mark_modified=False) # apply scheduling updates self.updateCards() # we randomize or order here, to ensure that siblings # have the same due# did = self.col.decks.selected() conf = self.col.decks.config_dict_for_deck_id(did) # in order due? if not conf["dyn"] and conf["new"]["order"] == NEW_CARDS_RANDOM: self.col.sched.randomizeCards(did) part1 = self.col.tr.importing_note_added(count=len(new)) part2 = self.col.tr.importing_note_updated(count=self.updateCount) if self.importMode == UPDATE_MODE: unchanged = dupeCount - self.updateCount elif self.importMode == IGNORE_MODE: unchanged = dupeCount else: unchanged = 0 part3 = self.col.tr.importing_note_unchanged(count=unchanged) self.log.append(f"{part1}, {part2}, {part3}.") self.log.extend(updateLog) self.total = len(self._ids)
def updateNotes(allDb): t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags ds, nid2mmi = [], {} N_notes = db.scalar('select count() from notes') mw.progress.start(label='Updating data', max=N_notes, immediate=True) fidDb = allDb.fidDb() locDb = allDb.locDb(recalc=False) # fidDb() already forces locDb recalc # handle secondary databases mw.progress.update(label='Creating seen/known/mature from all.db') seenDb = filterDbByMat(allDb, cfg1('threshold_seen')) knownDb = filterDbByMat(allDb, cfg1('threshold_known')) matureDb = filterDbByMat(allDb, cfg1('threshold_mature')) mw.progress.update(label='Loading priority.db') priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db if cfg1('saveDbs'): mw.progress.update(label='Saving seen/known/mature dbs') seenDb.save(cfg1('path_seen')) knownDb.save(cfg1('path_known')) matureDb.save(cfg1('path_mature')) mw.progress.update(label='Calculating frequency information') pops = [len(locs) for locs in allDb.db.values()] pops = [n for n in pops if n > 1] mw.progress.update(label='Updating notes') for i, (nid, mid, flds, guid, tags) in enumerate( db.execute('select id, mid, flds, guid, tags from notes')): if i % 500 == 0: mw.progress.update(value=i) C = partial(cfg, mid, None) if not C('enabled'): continue # Get all morphemes for note ms = set() for fieldName in C('morph_fields'): try: loc = fidDb[(nid, guid, fieldName)] ms.update(locDb[loc]) except KeyError: continue ms = [m for m in ms if m.pos not in C('morph_blacklist')] # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set() for m in ms: if m not in seenDb.db: unseens.add(m) if m not in knownDb.db: unknowns.add(m) if m not in matureDb.db: unmatures.add(m) if m not in matureDb.db and m in knownDb.db: newKnowns.add(m) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len(ms), len(unseens), len(unknowns), len(unmatures) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # average frequency of unknowns (ie. how common the word is within your collection) F_k = 0 for focusMorph in unknowns: # focusMorph used outside loop F_k += len(allDb.db[focusMorph]) F_k_avg = F_k / N_k if N_k > 0 else F_k usefulness = F_k_avg # add bonus for morphs in priority.db isPriority = False for focusMorph in unknowns: if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') # add bonus for studying recent learned knowns (reinforce) for m in newKnowns: locs = allDb.db[m] if locs: ivl = min(1, max(loc.maturity for loc in locs)) usefulness += C( 'reinforce new vocab weight' ) / ivl #TODO: maybe average this so it doesnt favor long sentences if any(m.pos == u'動詞' for m in unknowns): #FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 999 - min(999, usefulness) # difference from optimal length (too little context vs long sentence) lenDiff = max(0, min(9, abs(C('optimal sentence length') - N) - 2)) tooLong = N > C('optimal sentence length') # calculate mmi mmi = 10000 * N_k + 1000 * lenDiff + usefulness if C('set due based on mmi'): nid2mmi[nid] = mmi # Fill in various fields/tags on the note based on cfg ts, fs = TAG.split(tags), splitFields(flds) # determine card type compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag, badLengthTag, tooLongTag = tagNames = C( 'tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C( 'tag_alreadyKnown'), C('tag_priority'), C('tag_badLength'), C( 'tag_tooLong') if N_m == 0: # sentence comprehension card, m+0 ts = [compTag ] + [t for t in ts if t not in [vocabTag, notReadyTag]] setField(mid, fs, C('focusMorph'), u'') elif N_k == 1: # new vocab card, k+1 ts = [vocabTag ] + [t for t in ts if t not in [compTag, notReadyTag]] setField(mid, fs, C('focusMorph'), u'%s' % focusMorph.base) elif N_k > 1: # M+1+ and K+2+ ts = [notReadyTag ] + [t for t in ts if t not in [compTag, vocabTag]] # set type agnostic fields setField(mid, fs, C('k+N'), u'%d' % N_k) setField(mid, fs, C('m+N'), u'%d' % N_m) setField(mid, fs, C('morphManIndex'), u'%d' % mmi) setField(mid, fs, C('unknowns'), u', '.join(u.base for u in unknowns)) setField(mid, fs, C('unmatures'), u', '.join(u.base for u in unmatures)) setField(mid, fs, C('unknownFreq'), u'%d' % F_k_avg) # other tags if priorityTag in ts: ts.remove(priorityTag) if isPriority: ts.append(priorityTag) if badLengthTag in ts: ts.remove(badLengthTag) if lenDiff: ts.append(badLengthTag) if tooLongTag in ts: ts.remove(tooLongTag) if tooLong: ts.append(tooLongTag) # update sql db tags_ = TAG.join(TAG.canonify(ts)) flds_ = joinFields(fs) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum(fs[0]) sfld = stripHTML(fs[getSortFieldIndex(mid)]) ds.append({ 'now': now, 'tags': tags_, 'flds': flds_, 'sfld': sfld, 'csum': csum, 'usn': mw.col.usn(), 'nid': nid }) mw.progress.update(value=i, label='Updating anki database...') mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds) TAG.register(tagNames) # Now reorder new cards based on MMI mw.progress.update(value=i, label='Updating new card ordering...') ds = [] for (cid, nid, due) in db.execute('select id, nid, due from cards where type = 0'): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[nid] if due != due_: # only update cards that have changed ds.append({ 'now': now, 'due': due_, 'usn': mw.col.usn(), 'cid': cid }) mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds) mw.reset() printf('Updated notes in %f sec' % (time.time() - t_0)) mw.progress.finish() return knownDb
def updateNotes(allDb): t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags ds, nid2mmi = [], {} N_notes = db.scalar('select count() from notes') mw.progress.start(label='Updating data', max=N_notes, immediate=True) fidDb = allDb.fidDb() locDb = allDb.locDb(recalc=False) # fidDb() already forces locDb recalc # read tag names compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = jcfg( 'Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg( 'Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg( 'Tag_Priority'), jcfg('Tag_TooShort'), jcfg( 'Tag_TooLong'), jcfg('Tag_Frequency') TAG.register(tagNames) badLengthTag = jcfg2().get('Tag_BadLength') # handle secondary databases mw.progress.update(label='Creating seen/known/mature from all.db') seenDb = filterDbByMat(allDb, cfg1('threshold_seen')) knownDb = filterDbByMat(allDb, cfg1('threshold_known')) matureDb = filterDbByMat(allDb, cfg1('threshold_mature')) mw.progress.update(label='Loading priority.db') priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db mw.progress.update(label='Loading frequency.txt') frequencyListPath = cfg1('path_frequency') try: with codecs.open(frequencyListPath, 'r', 'utf-8') as f: frequencyList = [line.strip() for line in f.readlines()] frequencyListLength = len(frequencyList) except FileNotFoundError: pass # User does not have a frequency.txt if cfg1('saveDbs'): mw.progress.update(label='Saving seen/known/mature dbs') seenDb.save(cfg1('path_seen')) knownDb.save(cfg1('path_known')) matureDb.save(cfg1('path_mature')) mw.progress.update(label='Updating notes') for i, (nid, mid, flds, guid, tags) in enumerate( db.execute('select id, mid, flds, guid, tags from notes')): if i % 500 == 0: mw.progress.update(value=i) C = partial(cfg, mid, None) note = mw.col.getNote(nid) notecfg = getFilter(note) if notecfg is None or not notecfg['Modify']: continue # Get all morphemes for note morphemes = set() for fieldName in notecfg['Fields']: try: loc = fidDb[(nid, guid, fieldName)] morphemes.update(locDb[loc]) except KeyError: continue # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set() for morpheme in morphemes: if morpheme not in seenDb.db: unseens.add(morpheme) if morpheme not in knownDb.db: unknowns.add(morpheme) if morpheme not in matureDb.db: unmatures.add(morpheme) if morpheme not in matureDb.db and morpheme in knownDb.db: newKnowns.add(morpheme) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len( unmatures) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # average frequency of unknowns (ie. how common the word is within your collection) F_k = 0 for focusMorph in unknowns: # focusMorph used outside loop F_k += allDb.frequency(focusMorph) F_k_avg = F_k // N_k if N_k > 0 else F_k usefulness = F_k_avg # add bonus for morphs in priority.db and frequency.txt isPriority = False isFrequency = False for focusMorph in unknowns: if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') focusMorphString = focusMorph.show().split()[0] try: focusMorphIndex = frequencyList.index(focusMorphString) isFrequency = True frequencyWeight = C('frequency.txt weight scale') # The bigger this number, the lower mmi becomes usefulness += (frequencyListLength - focusMorphIndex) * frequencyWeight except: pass # add bonus for studying recent learned knowns (reinforce) for morpheme in newKnowns: locs = allDb.db[morpheme] if locs: ivl = min(1, max(loc.maturity for loc in locs)) usefulness += C( 'reinforce new vocab weight' ) // ivl #TODO: maybe average this so it doesnt favor long sentences if any(morpheme.pos == '動詞' for morpheme in unknowns): #FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 99999 - min(99999, usefulness) # difference from optimal length range (too little context vs long sentence) lenDiffRaw = min(N - C('min good sentence length'), max(0, N - C('max good sentence length'))) lenDiff = min(9, abs(lenDiffRaw)) # calculate mmi mmi = 100000 * N_k + 1000 * lenDiff + usefulness if C('set due based on mmi'): nid2mmi[nid] = mmi # Fill in various fields/tags on the note based on cfg ts, fs = TAG.split(tags), splitFields(flds) # clear any 'special' tags, the appropriate will be set in the next few lines ts = [ t for t in ts if t not in [notReadyTag, compTag, vocabTag, freshTag] ] # determine card type if N_m == 0: # sentence comprehension card, m+0 ts = ts + [compTag] elif N_k == 1: # new vocab card, k+1 ts = ts + [vocabTag] setField(mid, fs, jcfg('Field_FocusMorph'), '%s' % focusMorph.base) elif N_k > 1: # M+1+ and K+2+ ts = ts + [notReadyTag] setField(mid, fs, jcfg('Field_FocusMorph'), '') elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts = ts + [freshTag] setField(mid, fs, jcfg('Field_FocusMorph'), '%s' % list(unmatures)[0].base) else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts = ts + [freshTag] setField(mid, fs, jcfg('Field_FocusMorph'), '') # set type agnostic fields setField(mid, fs, jcfg('Field_UnknownMorphCount'), '%d' % N_k) setField(mid, fs, jcfg('Field_UnmatureMorphCount'), '%d' % N_m) setField(mid, fs, jcfg('Field_MorphManIndex'), '%d' % mmi) setField(mid, fs, jcfg('Field_Unknowns'), ', '.join(u.base for u in unknowns)) setField(mid, fs, jcfg('Field_Unmatures'), ', '.join(u.base for u in unmatures)) setField(mid, fs, jcfg('Field_UnknownFreq'), '%d' % F_k_avg) # remove deprecated tag if badLengthTag is not None and badLengthTag in ts: ts.remove(badLengthTag) # other tags if priorityTag in ts: ts.remove(priorityTag) if isPriority: ts.append(priorityTag) if frequencyTag in ts: ts.remove(frequencyTag) if isFrequency: ts.append(frequencyTag) if tooShortTag in ts: ts.remove(tooShortTag) if lenDiffRaw < 0: ts.append(tooShortTag) if tooLongTag in ts: ts.remove(tooLongTag) if lenDiffRaw > 0: ts.append(tooLongTag) # remove unnecessary tags if not jcfg('Option_SetNotRequiredTags'): unnecessary = [priorityTag, tooShortTag, tooLongTag] ts = [tag for tag in ts if tag not in unnecessary] # update sql db tags_ = TAG.join(TAG.canonify(ts)) flds_ = joinFields(fs) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum(fs[0]) sfld = stripHTML(fs[getSortFieldIndex(mid)]) ds.append({ 'now': now, 'tags': tags_, 'flds': flds_, 'sfld': sfld, 'csum': csum, 'usn': mw.col.usn(), 'nid': nid }) mw.progress.update(value=i, label='Updating anki database...') mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds) # Now reorder new cards based on MMI mw.progress.update(value=i, label='Updating new card ordering...') ds = [] # "type = 0": new cards # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type] # "type = 2": review cards for (cid, nid, due) in db.execute('select id, nid, due from cards where type = 0'): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[nid] if due != due_: # only update cards that have changed ds.append({ 'now': now, 'due': due_, 'usn': mw.col.usn(), 'cid': cid }) mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds) mw.reset() printf('Updated notes in %f sec' % (time.time() - t_0)) mw.progress.finish() return knownDb
def updateNotes( allDb ): t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags ds, nid2mmi = [], {} N_notes = db.scalar( 'select count() from notes' ) mw.progress.start( label='Updating data', max=N_notes, immediate=True ) fidDb = allDb.fidDb() locDb = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc # handle secondary databases mw.progress.update( label='Creating seen/known/mature from all.db' ) seenDb = filterDbByMat( allDb, cfg1('threshold_seen') ) knownDb = filterDbByMat( allDb, cfg1('threshold_known') ) matureDb = filterDbByMat( allDb, cfg1('threshold_mature') ) mw.progress.update( label='Loading priority.db' ) priorityDb = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db if cfg1('saveDbs'): mw.progress.update( label='Saving seen/known/mature dbs' ) seenDb.save( cfg1('path_seen') ) knownDb.save( cfg1('path_known') ) matureDb.save( cfg1('path_mature') ) mw.progress.update( label='Calculating frequency information' ) pops = [ len( locs ) for locs in allDb.db.values() ] pops = [ n for n in pops if n > 1 ] mw.progress.update( label='Updating notes' ) for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ): if i % 500 == 0: mw.progress.update( value=i ) C = partial( cfg, mid, None ) if not C('enabled'): continue # Get all morphemes for note ms = set() for fieldName in C('morph_fields'): try: loc = fidDb[ ( nid, guid, fieldName ) ] ms.update( locDb[ loc ] ) except KeyError: continue ms = [ m for m in ms if m.pos not in C('morph_blacklist') ] # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set() for m in ms: if m not in seenDb.db: unseens.add( m ) if m not in knownDb.db: unknowns.add( m ) if m not in matureDb.db: unmatures.add( m ) if m not in matureDb.db and m in knownDb.db: newKnowns.add( m ) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len( ms ), len( unseens ), len( unknowns ), len( unmatures ) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # average frequency of unknowns (ie. how common the word is within your collection) F_k = 0 for focusMorph in unknowns: # focusMorph used outside loop F_k += len( allDb.db[ focusMorph ] ) F_k_avg = F_k / N_k if N_k > 0 else F_k usefulness = F_k_avg # add bonus for morphs in priority.db isPriority = False for focusMorph in unknowns: if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') # add bonus for studying recent learned knowns (reinforce) for m in newKnowns: locs = allDb.db[ m ] if locs: ivl = min( 1, max( loc.maturity for loc in locs ) ) usefulness += C('reinforce new vocab weight') / ivl #TODO: maybe average this so it doesnt favor long sentences if any( m.pos == u'動詞' for m in unknowns ): #FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 999 - min( 999, usefulness ) # difference from optimal length (too little context vs long sentence) lenDiff = max( 0, min( 9, abs( C('optimal sentence length') - N ) -2 ) ) # calculate mmi mmi = 10000*N_k + 1000*lenDiff + usefulness if C('set due based on mmi'): nid2mmi[ nid ] = mmi # Fill in various fields/tags on the note based on cfg ts, fs = TAG.split( tags ), splitFields( flds ) # determine card type compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag = tagNames = C('tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C('tag_alreadyKnown'), C('tag_priority') if N_m == 0: # sentence comprehension card, m+0 ts = [ compTag ] + [ t for t in ts if t not in [ vocabTag, notReadyTag ] ] setField( mid, fs, C('focusMorph'), u'' ) elif N_k == 1: # new vocab card, k+1 ts = [ vocabTag ] + [ t for t in ts if t not in [ compTag, notReadyTag ] ] setField( mid, fs, C('focusMorph'), u'%s' % focusMorph.base ) elif N_k > 1: # M+1+ and K+2+ ts = [ notReadyTag ] + [ t for t in ts if t not in [ compTag, vocabTag ] ] # set type agnostic fields setField( mid, fs, C('k+N'), u'%d' % N_k ) setField( mid, fs, C('m+N'), u'%d' % N_m ) setField( mid, fs, C('morphManIndex'), u'%d' % mmi ) setField( mid, fs, C('unknowns'), u', '.join( u.base for u in unknowns ) ) setField( mid, fs, C('unmatures'), u', '.join( u.base for u in unmatures ) ) setField( mid, fs, C('unknownFreq'), u'%d' % F_k_avg ) # other tags if priorityTag in ts: ts.remove( priorityTag ) if isPriority: ts.append( priorityTag ) # update sql db tags_ = TAG.join( TAG.canonify( ts ) ) flds_ = joinFields( fs ) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum( fs[0] ) sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] ) ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } ) mw.progress.update( value=i, label='Updating anki database...' ) mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds ) TAG.register( tagNames ) # Now reorder new cards based on MMI mw.progress.update( value=i, label='Updating new card ordering...' ) ds = [] for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[ nid ] if due != due_: # only update cards that have changed ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } ) mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds ) mw.reset() printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) ) mw.progress.finish()
def _ignore_dupes(self_note=None, self_expression=None, self_deck=None): """We will override Anki's Note.dupeOrEmpty function with this function, This method is meant to return 1 if self.fields[0] is empty 2 if the note is a duplicate False elsewise (i.e. "nice" note). :param self_note: Anki note object. :param self_expression: String. Will overwrite note.fields[0] :param self_deck: Deck the note belongs to. """ # Nomenclature: We compare the note given as argument to other notes. # Everything that has to do with that initial note has the prefix 'self', # everything that has to do with one of the other notes has the prefix # other. # Some explanation for abbreviations used in Anki: # * id: Note id (as variable belonging to a note) # * nid: Note id (as variable belonging to a card) # * did: Deck id # * mid: Model id # 1. Default values & Co. if self_note is None and self_expression is None: # maybe we should raise a ValueError instead, but well... return False if self_note: self_search_value = self_note.fields[0] # might be None! self_note_id = self_note.id self_note_mid = self_note.mid else: self_search_value = None self_note_id = None self_note_mid = None if self_expression: # Note: If self_note was given as well, self_search_value will be # overwritten. self_search_value = self_expression # 2. Check if we have a key field/Expression logger.debug("key field = '%s'" % self_search_value) if not self_search_value or isinstance(self_search_value, str) and not \ self_search_value.strip(): # note that self_note.fields[0] might be None! logger.debug("Key field empty.") return 1 # 3. Get Note Ids of notes that might be duplicates. csum = fieldChecksum(self_search_value) if self_note_mid: # we don't have to check for the note id, because it defaults to 0 in # the search query (mostly copied from anki's source). # Select all note ids from notes # 1. whose key field has the same check sum # 2. whose note id is different (i.e. we're excluding self_note) # 3. whose model id is the same other_note_ids = mw.col.db.list( "select id from notes where csum = ? " "and id != ? and mid = ?", csum, self_note_id or 0, self_note_mid) else: # don't apply any criteria for note id and mid model id, just seach # for the checksum. other_note_ids = mw.col.db.list("select id from notes where csum = ?", csum) logger.debug("other_note_ids: {}".format(other_note_ids)) if not other_note_ids: logger.debug("Did not find any notes with the same key field checksum " "as self.") return False # 4. get the self_deck ids from the decks the self card belonged to if self_deck: # use the deck supplied as argument self_deck_ids = did_from_dname(self_deck) else: # try to get the deck from anki self_deck_ids = mw.col.db.list("select did from cards where nid = ?", self_note_id) if not self_deck_ids: # We tried to get the denk name from anki, but the result was None. # use the self_deck id of the currently active self_deck self_deck_ids = [mw.col.conf['curDeck']] logger.debug("self_deck_ids {}".format(self_deck_ids)) # 5. Loop over the other_note_ids for other_note_id in other_note_ids: # 5a. Get the field values of note with other_note_id other_fields = mw.col.db.list("select flds from notes where id = ?", other_note_id) if not other_fields: # note with no fields logger.debug("No fields.") return False # 5b. Get the self_deck ids of all the cards of the note with # other_note_id (one note can have multiple cards in different decks) other_deck_ids = mw.col.db.list("select did from cards where nid = ?", other_note_id) logger.debug("other_deck_ids {}".format(other_deck_ids)) if not other_deck_ids: logger.debug("No cards with matching checksum.") return False # 5c. Check that the key fields match. if not stripHTMLMedia(splitFields(other_fields[0])[0]) == \ stripHTMLMedia(self_search_value): logger.debug("Expressions didn't match after all.") return False # 6c. Check if we want to ignore that case. # Normally a card would be flagged as a duplicate here. for self_deck_id in self_deck_ids: for other_deck_id in other_deck_ids: self_name = dname_from_did(self_deck_id) other_name = dname_from_did(other_deck_id) if ignore_duplicates(self_name, other_name): # don't do anything! logger.debug("Duplicate! deck1 = '%s', deck2 = '%s' ==> " "Ignored." % (self_name, other_name)) else: logger.debug("Duplicate! deck1 = '%s', deck2 = '%s' ==> " "Flagged." % (self_name, other_name)) return 2 return False
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] new = [] self._ids = [] self._cards = [] for n in notes: fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append(_("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.update: data = self.updateData(n, id, sflds) if data: updates.append(data) found = True break # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert(0, _( "Empty cards found. Please run Tools>Maintenance>Empty Cards.")) # apply scheduling updates self.updateCards() # make sure to update sflds, etc self.log.append(_("%(a)d notes added, %(b)d notes updated.") % dict(a=len(new), b=self.updateCount)) self.total = len(self._ids)
def maybeChecksum(self, data, unique): if not unique: return "" return fieldChecksum(data)
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." print "importNotes()" print notes assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] updateLog = [] updateLogTxt = _("First field matched: %s") dupeLogTxt = _("Added duplicate with first field: %s") new = [] self._ids = [] self._cards = [] self._emptyNotes = False for n in notes: if not self.allowHTML: for c in range(len(n.fields)): n.fields[c] = cgi.escape(n.fields[c]) fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append(_("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts and self.importMode != 2: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == 0: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append(updateLogTxt % fld0) found = True break elif self.importMode == 2: # allow duplicates in this case updateLog.append(dupeLogTxt % fld0) found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True did = self.col.decks.id(self.deck) self.col.decks.select(did) #SUPER IMPORTANT (setting the associated deck to the model) self.model['did'] = did print "Selected: ", self.col.decks.get(self.col.decks.selected()); self.addNew(new) self.addUpdates(updates) # make sure to update sflds, etc self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert(0, _( "Empty cards found. Please run Tools>Empty Cards.")) # apply scheduling updates self.updateCards() # we randomize or order here, to ensure that siblings # have the same due# # m = self.col.models.byName("Basic") # deck = self.col.decks.get(did) # deck['mid'] = m['id'] # self.col.decks.save(deck) # print "Deck:", self.col.decks.byName(self.deck) # print "DID:", did # save tags to model # m = self.note.model() # m['tags'] = self.note.tags # self.mw.col.models.save(m) conf = self.col.decks.confForDid(did) # print "Conf: ",conf # in order due? if conf['new']['order'] == NEW_CARDS_RANDOM: self.col.sched.randomizeCards(did) else: self.col.sched.orderCards(did) part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new) part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount self.log.append("%s, %s." % (part1, part2)) print part1, part2, "on deck: [", self.deck, "]" self.log.extend(updateLog) if self._emptyNotes: print "there were empty notes" self.log.append(_("""\ One or more notes were not imported, because they didn't generate any cards. \ This can happen when you have empty fields or when you have not mapped the \ content in the text file to the correct fields.""")) self.total = len(self._ids)
try: if re.search("(?i)^" + regex + "$", strg): nids.append(id) except sre_constants.error: return if not nids: return "0" return "n.id in %s" % ids2str(nids) def _findDupes(self, (val, args)): # caller must call stripHTMLMedia on passed val try: mid, val = val.split(",", 1) except OSError: return csum = fieldChecksum(val) nids = [] for nid, flds in self.col.db.execute("select id, flds from notes where mid=? and csum=?", mid, csum): if stripHTMLMedia(splitFields(flds)[0]) == val: nids.append(nid) return "n.id in %s" % ids2str(nids) # Find and replace ########################################################################## def findReplace(col, nids, src, dst, regex=False, field=None, fold=True): "Find and replace fields in a note." mmap = {} if field:
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] new = [] self._ids = [] self._cards = [] self._emptyNotes = False for n in notes: if not self.allowHTML: for c in range(len(n.fields)): n.fields[c] = cgi.escape(n.fields[c]) fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append(_("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts and self.importMode != 2: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == 0: data = self.updateData(n, id, sflds) if data: updates.append(data) found = True break elif self.importMode == 2: # allow duplicates in this case found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert(0, _( "Empty cards found. Please run Tools>Empty Cards.")) # apply scheduling updates self.updateCards() # make sure to update sflds, etc part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new) part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount self.log.append("%s, %s." % (part1, part2)) if self._emptyNotes: self.log.append(_("""\ One or more notes were not imported, because they didn't generate any cards. \ This can happen when you have empty fields or when you have not mapped the \ content in the text file to the correct fields.""")) self.total = len(self._ids)
def importNotes(self, notes): "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # gather checks for duplicate comparison csums = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model['id']): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts = {} fld0idx = self.mapping.index(self.model['flds'][0]['name']) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] new = [] self._ids = [] self._cards = [] for n in notes: fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append( _("Empty first field: %s") % " ".join(n.fields)) continue # earlier in import? if fld0 in firsts: # duplicates in source file; log and ignore self.log.append(_("Appeared twice in file: %s") % fld0) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.update: data = self.updateData(n, id, sflds) if data: updates.append(data) found = True break # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) self.col.updateFieldCache(self._ids) # generate cards if self.col.genCards(self._ids): self.log.insert( 0, _("Empty cards found. Please run Tools>Maintenance>Empty Cards." )) # apply scheduling updates self.updateCards() # make sure to update sflds, etc self.log.append( _("%(a)d notes added, %(b)d notes updated.") % dict(a=len(new), b=self.updateCount)) self.total = len(self._ids)
def updateNotes(allDb): t_0, now, db = time.time(), intTime(), mw.col.db TAG = mw.col.tags # type: TagManager ds, nid2mmi = [], {} N_notes = db.scalar('select count() from notes') mw.progress.start(label='Updating data', max=N_notes, immediate=True) fidDb = allDb.fidDb(recalc=True) loc_db = allDb.locDb(recalc=False) # type: Dict[Location, Set[Morpheme]] # read tag names compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = cfg( 'Tag_Comprehension'), cfg('Tag_Vocab'), cfg('Tag_Fresh'), cfg( 'Tag_NotReady'), cfg('Tag_AlreadyKnown'), cfg('Tag_Priority'), cfg( 'Tag_TooShort'), cfg('Tag_TooLong'), cfg('Tag_Frequency') TAG.register(tagNames) badLengthTag = cfg('Tag_BadLength') # handle secondary databases mw.progress.update(label='Creating seen/known/mature from all.db') seenDb = filterDbByMat(allDb, cfg('threshold_seen')) knownDb = filterDbByMat(allDb, cfg('threshold_known')) matureDb = filterDbByMat(allDb, cfg('threshold_mature')) mw.progress.update(label='Loading priority.db') priorityDb = MorphDb(cfg('path_priority'), ignoreErrors=True).db mw.progress.update(label='Loading frequency.txt') frequencyListPath = cfg('path_frequency') try: with codecs.open(frequencyListPath, encoding='utf-8') as f: # create a dictionary. key is word, value is its position in the file frequency_list = dict( zip([line.strip().split('\t')[0] for line in f.readlines()], itertools.count(0))) except FileNotFoundError: frequency_list = dict() frequencyListLength = len(frequency_list) if cfg('saveDbs'): mw.progress.update(label='Saving seen/known/mature dbs') seenDb.save(cfg('path_seen')) knownDb.save(cfg('path_known')) matureDb.save(cfg('path_mature')) mw.progress.update(label='Updating notes') # prefetch cfg for fields field_focus_morph = cfg('Field_FocusMorph') field_unknown_count = cfg('Field_UnknownMorphCount') field_unmature_count = cfg('Field_UnmatureMorphCount') field_morph_man_index = cfg('Field_MorphManIndex') field_unknowns = cfg('Field_Unknowns') field_unmatures = cfg('Field_Unmatures') field_unknown_freq = cfg('Field_UnknownFreq') field_focus_morph_pos = cfg("Field_FocusMorphPos") for i, (nid, mid, flds, guid, tags) in enumerate( db.execute('select id, mid, flds, guid, tags from notes')): ts = TAG.split(tags) if i % 500 == 0: mw.progress.update(value=i) C = partial(cfg, model_id=mid) notecfg = getFilterByMidAndTags(mid, ts) if notecfg is None or not notecfg['Modify']: continue # Get all morphemes for note morphemes = set() for fieldName in notecfg['Fields']: try: loc = fidDb[(nid, guid, fieldName)] morphemes.update(loc_db[loc]) except KeyError: continue proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown') # Determine un-seen/known/mature and i+N unseens, unknowns, unmatures, new_knowns = set(), set(), set(), set() for morpheme in morphemes: if proper_nouns_known and morpheme.isProperNoun(): continue if not seenDb.matches(morpheme): unseens.add(morpheme) if not knownDb.matches(morpheme): unknowns.add(morpheme) if not matureDb.matches(morpheme): unmatures.add(morpheme) if knownDb.matches(morpheme): new_knowns.add(morpheme) # Determine MMI - Morph Man Index N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len( unmatures) # Bail early for lite update if N_k > 2 and C('only update k+2 and below'): continue # add bonus for morphs in priority.db and frequency.txt frequencyBonus = C('frequency.txt bonus') isPriority = False isFrequency = False focusMorph = None F_k = 0 usefulness = 0 for focusMorph in unknowns: F_k += allDb.frequency(focusMorph) if focusMorph in priorityDb: isPriority = True usefulness += C('priority.db weight') focusMorphString = focusMorph.base try: focusMorphIndex = frequency_list[focusMorphString] isFrequency = True # The bigger this number, the lower mmi becomes usefulness += int( round(frequencyBonus * (1 - focusMorphIndex / frequencyListLength))) except KeyError: pass # average frequency of unknowns (ie. how common the word is within your collection) F_k_avg = F_k // N_k if N_k > 0 else F_k usefulness += F_k_avg # add bonus for studying recent learned knowns (reinforce) for morpheme in new_knowns: locs = knownDb.getMatchingLocs(morpheme) if locs: ivl = min(1, max(loc.maturity for loc in locs)) # TODO: maybe average this so it doesnt favor long sentences usefulness += C('reinforce new vocab weight') // ivl if any(morpheme.pos == '動詞' for morpheme in unknowns): # FIXME: this isn't working??? usefulness += C('verb bonus') usefulness = 99999 - min(99999, usefulness) # difference from optimal length range (too little context vs long sentence) lenDiffRaw = min(N - C('min good sentence length'), max(0, N - C('max good sentence length'))) lenDiff = min(9, abs(lenDiffRaw)) # calculate mmi mmi = 100000 * N_k + 1000 * lenDiff + int(round(usefulness)) if C('set due based on mmi'): nid2mmi[nid] = mmi # Fill in various fields/tags on the note based on cfg fs = splitFields(flds) # clear any 'special' tags, the appropriate will be set in the next few lines ts = [ t for t in ts if t not in (notReadyTag, compTag, vocabTag, freshTag) ] # determine card type if N_m == 0: # sentence comprehension card, m+0 ts.append(compTag) elif N_k == 1: # new vocab card, k+1 ts.append(vocabTag) setField(mid, fs, field_focus_morph, focusMorph.base) setField(mid, fs, field_focus_morph_pos, focusMorph.pos) elif N_k > 1: # M+1+ and K+2+ ts.append(notReadyTag) elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts.append(freshTag) focusMorph = next(iter(unmatures)) setField(mid, fs, field_focus_morph, focusMorph.base) setField(mid, fs, field_focus_morph_pos, focusMorph.pos) else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme ts.append(freshTag) # set type agnostic fields setField(mid, fs, field_unknown_count, '%d' % N_k) setField(mid, fs, field_unmature_count, '%d' % N_m) setField(mid, fs, field_morph_man_index, '%d' % mmi) setField(mid, fs, field_unknowns, ', '.join(u.base for u in unknowns)) setField(mid, fs, field_unmatures, ', '.join(u.base for u in unmatures)) setField(mid, fs, field_unknown_freq, '%d' % F_k_avg) # remove deprecated tag if badLengthTag is not None and badLengthTag in ts: ts.remove(badLengthTag) # other tags if priorityTag in ts: ts.remove(priorityTag) if isPriority: ts.append(priorityTag) if frequencyTag in ts: ts.remove(frequencyTag) if isFrequency: ts.append(frequencyTag) if tooShortTag in ts: ts.remove(tooShortTag) if lenDiffRaw < 0: ts.append(tooShortTag) if tooLongTag in ts: ts.remove(tooLongTag) if lenDiffRaw > 0: ts.append(tooLongTag) # remove unnecessary tags if not cfg('Option_SetNotRequiredTags'): unnecessary = [priorityTag, tooShortTag, tooLongTag] ts = [tag for tag in ts if tag not in unnecessary] # update sql db tags_ = TAG.join(TAG.canonify(ts)) flds_ = joinFields(fs) if flds != flds_ or tags != tags_: # only update notes that have changed csum = fieldChecksum(fs[0]) sfld = stripHTML(fs[getSortFieldIndex(mid)]) ds.append((tags_, flds_, sfld, csum, now, mw.col.usn(), nid)) mw.progress.update(label='Updating anki database...') mw.col.db.executemany( 'update notes set tags=?, flds=?, sfld=?, csum=?, mod=?, usn=? where id=?', ds) # Now reorder new cards based on MMI mw.progress.update(label='Updating new card ordering...') ds = [] # "type = 0": new cards # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type] # "type = 2": review cards for (cid, nid, due) in db.execute('select id, nid, due from cards where type = 0'): if nid in nid2mmi: # owise it was disabled due_ = nid2mmi[nid] if due != due_: # only update cards that have changed ds.append((due_, now, mw.col.usn(), cid)) mw.col.db.executemany('update cards set due=?, mod=?, usn=? where id=?', ds) mw.reset() printf('Updated notes in %f sec' % (time.time() - t_0)) mw.progress.finish() return knownDb
try: if re.search("(?i)^" + regex + "$", strg): nids.append(id) except sre_constants.error: return if not nids: return "0" return "n.id in %s" % ids2str(nids) def _findDupes(self, (val, args)): # caller must call stripHTMLMedia on passed val try: mid, val = val.split(",", 1) except OSError: return csum = fieldChecksum(val) nids = [] for nid, flds in self.col.db.execute( "select id, flds from notes where mid=? and csum=?", mid, csum): if stripHTMLMedia(splitFields(flds)[0]) == val: nids.append(nid) return "n.id in %s" % ids2str(nids) # Find and replace ########################################################################## def findReplace(col, nids, src, dst, regex=False, field=None, fold=True): "Find and replace fields in a note."
def importNotes(self, notes: List[ForeignNote]) -> None: "Convert each card into a note, apply attributes and add to col." assert self.mappingOk() # note whether tags are mapped self._tagsMapped = False for f in self.mapping: if f == "_tags": self._tagsMapped = True # gather checks for duplicate comparison csums: Dict[str, List[int]] = {} for csum, id in self.col.db.execute( "select csum, id from notes where mid = ?", self.model["id"]): if csum in csums: csums[csum].append(id) else: csums[csum] = [id] firsts: Dict[str, bool] = {} fld0idx = self.mapping.index(self.model["flds"][0]["name"]) self._fmap = self.col.models.fieldMap(self.model) self._nextID = timestampID(self.col.db, "notes") # loop through the notes updates = [] updateLog = [] new = [] self._ids: List[int] = [] self._cards: List[Tuple] = [] dupeCount = 0 dupes: List[str] = [] for n in notes: for c in range(len(n.fields)): if not self.allowHTML: n.fields[c] = html.escape(n.fields[c], quote=False) n.fields[c] = n.fields[c].strip() if not self.allowHTML: n.fields[c] = n.fields[c].replace("\n", "<br>") fld0 = n.fields[fld0idx] csum = fieldChecksum(fld0) # first field must exist if not fld0: self.log.append( self.col.tr(TR.IMPORTING_EMPTY_FIRST_FIELD, val=" ".join(n.fields))) continue # earlier in import? if fld0 in firsts and self.importMode != ADD_MODE: # duplicates in source file; log and ignore self.log.append( self.col.tr(TR.IMPORTING_APPEARED_TWICE_IN_FILE, val=fld0)) continue firsts[fld0] = True # already exists? found = False if csum in csums: # csum is not a guarantee; have to check for id in csums[csum]: flds = self.col.db.scalar( "select flds from notes where id = ?", id) sflds = splitFields(flds) if fld0 == sflds[0]: # duplicate found = True if self.importMode == UPDATE_MODE: data = self.updateData(n, id, sflds) if data: updates.append(data) updateLog.append( self.col.tr( TR.IMPORTING_FIRST_FIELD_MATCHED, val=fld0)) dupeCount += 1 found = True elif self.importMode == IGNORE_MODE: dupeCount += 1 elif self.importMode == ADD_MODE: # allow duplicates in this case if fld0 not in dupes: # only show message once, no matter how many # duplicates are in the collection already updateLog.append( self.col.tr( TR. IMPORTING_ADDED_DUPLICATE_WITH_FIRST_FIELD, val=fld0, )) dupes.append(fld0) found = False # newly add if not found: data = self.newData(n) if data: new.append(data) # note that we've seen this note once already firsts[fld0] = True self.addNew(new) self.addUpdates(updates) # generate cards + update field cache self.col.after_note_updates(self._ids, mark_modified=False) # apply scheduling updates self.updateCards() # we randomize or order here, to ensure that siblings # have the same due# did = self.col.decks.selected() conf = self.col.decks.confForDid(did) # in order due? if conf["new"]["order"] == NEW_CARDS_RANDOM: self.col.sched.randomizeCards(did) part1 = self.col.tr(TR.IMPORTING_NOTE_ADDED, count=len(new)) part2 = self.col.tr(TR.IMPORTING_NOTE_UPDATED, count=self.updateCount) if self.importMode == UPDATE_MODE: unchanged = dupeCount - self.updateCount elif self.importMode == IGNORE_MODE: unchanged = dupeCount else: unchanged = 0 part3 = self.col.tr(TR.IMPORTING_NOTE_UNCHANGED, count=unchanged) self.log.append(f"{part1}, {part2}, {part3}.") self.log.extend(updateLog) self.total = len(self._ids)