Ejemplo n.º 1
0
 def fieldUnique(self, name):
     (ord, conf) = self._fmap[name]
     if not conf['uniq']:
         return True
     val = self[name]
     if not val:
         return True
     csum = fieldChecksum(val)
     if self.id:
         lim = "and fid != :fid"
     else:
         lim = ""
     fids = self.deck.db.list(
         "select fid from fsums where csum = ? and fid != ? and mid = ?",
         csum, self.id or 0, self.mid)
     if not fids:
         return True
     # grab facts with the same checksums, and see if they're actually
     # duplicates
     for flds in self.deck.db.list("select flds from facts where id in "+
                                   ids2str(fids)):
         fields = splitFields(flds)
         if fields[ord] == val:
             return False
     return True
Ejemplo n.º 2
0
Archivo: notes.py Proyecto: dae/anki
    def flush(self, mod=None):
        "If fields or tags have changed, write changes to disk."
        assert self.scm == self.col.scm
        self._preFlush()
        sfld = stripHTMLMedia(self.fields[self.col.models.sortIdx(self._model)])
        tags = self.stringTags()
        fields = self.joinedFields()
        if not mod and self.col.db.scalar(
            "select 1 from notes where id = ? and tags = ? and flds = ?", self.id, tags, fields
        ):
            return
        csum = fieldChecksum(self.fields[0])
        self.mod = mod if mod else intTime()
        self.usn = self.col.usn()
        res = self.col.db.execute(
            """
insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""",
            self.id,
            self.guid,
            self.mid,
            self.mod,
            self.usn,
            tags,
            fields,
            sfld,
            csum,
            self.flags,
            self.data,
        )
        self.col.tags.register(self.tags)
        self._postFlush()
Ejemplo n.º 3
0
    def flush(self, mod=None):
        assert self.scm == self.col.scm
        self._preFlush()
        self.mod = mod if mod else intTime()
        self.usn = self.col.usn()
        sfld = stripHTML(self.fields[self.col.models.sortIdx(self._model)])
        tags = self.stringTags()
        csum = fieldChecksum(self.fields[0])
        res = self.col.db.execute(
            """
insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""",
            self.id,
            self.guid,
            self.mid,
            self.mod,
            self.usn,
            tags,
            self.joinedFields(),
            sfld,
            csum,
            self.flags,
            self.data,
        )
        self.col.tags.register(self.tags)
        self._postFlush()
Ejemplo n.º 4
0
 def scmhash(self, m):
     "Return a hash of the schema, to see if models are compatible."
     s = ""
     for f in m['flds']:
         s += f['name']
     for t in m['tmpls']:
         s += t['name']
         s += t['qfmt']
         s += t['afmt']
     return fieldChecksum(s)
Ejemplo n.º 5
0
 def updateFieldChecksums(self):
     self.deck.db.execute("delete from fsums where fid = ?", self.id)
     d = []
     for (ord, conf) in self._fmap.values():
         if not conf['uniq']:
             continue
         val = self.fields[ord]
         if not val:
             continue
         d.append((self.id, self.mid, fieldChecksum(val)))
     self.deck.db.executemany("insert into fsums values (?, ?, ?)", d)
 def isDupe(self, data):
     "Takes field, model and returns True if the field is a dupe and False otherwise."
     # find any matching csums and compare
     csum = fieldChecksum(data["field"])
     mid = mw.col.models.byName(data["model"])["id"]
     for flds in mw.col.db.list(
             "select flds from notes where csum = ? and id != ? and mid = ?",
             csum, 0, mid):
         if splitFields(flds)[0] == data["field"]:
             return True
     return False
Ejemplo n.º 7
0
 def updateFieldCache(self, nids):
     "Update field checksums and sort cache, after find&replace, etc."
     snids = ids2str(nids)
     r = []
     for (nid, mid, flds) in self._fieldData(snids):
         fields = splitFields(flds)
         model = self.models.get(mid)
         r.append((stripHTML(fields[self.models.sortIdx(model)]),
                   fieldChecksum(fields[0]),
                   nid))
     # apply, relying on calling code to bump usn+mod
     self.db.executemany("update notes set sfld=?, csum=? where id=?", r)
Ejemplo n.º 8
0
 def dupeOrEmpty(self):
     "1 if first is empty; 2 if first is a duplicate, False otherwise."
     val = self.fields[0]
     if not val.strip():
         return 1
     csum = fieldChecksum(val)
     # find any matching csums and compare
     for flds in self.col.db.list(
         "select flds from notes where csum = ? and id != ? and mid = ?",
         csum, self.id or 0, self.mid):
         if splitFields(flds)[0] == self.fields[0]:
             return 2
     return False
Ejemplo n.º 9
0
Archivo: notes.py Proyecto: Stvad/anki
 def dupeOrEmpty(self):
     "1 if first is empty; 2 if first is a duplicate, False otherwise."
     val = self.fields[0]
     if not val.strip():
         return 1
     csum = fieldChecksum(val)
     # find any matching csums and compare
     for flds in self.col.db.list(
             "select flds from notes where csum = ? and id != ? and mid = ?",
             csum, self.id or 0, self.mid):
         if stripHTMLMedia(splitFields(flds)[0]) == stripHTMLMedia(
                 self.fields[0]):
             return 2
     return False
Ejemplo n.º 10
0
 def _findDupes(self, val):
     # caller must call stripHTMLMedia on passed val
     try:
         mid, val = val.split(",", 1)
     except OSError:
         return
     csum = fieldChecksum(val)
     nids = []
     for nid, flds in self.col.db.execute(
             "select id, flds from notes where mid=? and csum=?", mid,
             csum):
         if stripHTMLMedia(splitFields(flds)[0]) == val:
             nids.append(nid)
     return "n.id in %s" % ids2str(nids)
Ejemplo n.º 11
0
Archivo: find.py Proyecto: fingul/anki
 def _findDupes(self, val):
     # caller must call stripHTMLMedia on passed val
     try:
         mid, val = val.split(",", 1)
     except OSError:
         return
     csum = fieldChecksum(val)
     nids = []
     for nid, flds in self.col.db.execute(
             "select id, flds from notes where mid=? and csum=?",
             mid, csum):
         if stripHTMLMedia(splitFields(flds)[0]) == val:
             nids.append(nid)
     return "n.id in %s" % ids2str(nids)
Ejemplo n.º 12
0
 def updateFieldCache(self, nids):
     "Update field checksums and sort cache, after find&replace, etc."
     snids = ids2str(nids)
     r = []
     for (nid, mid, flds) in self._fieldData(snids):
         fields = splitFields(flds)
         model = self.models.get(mid)
         if not model:
             # note points to invalid model
             continue
         r.append((stripHTML(fields[self.models.sortIdx(model)]),
                   fieldChecksum(fields[0]), nid))
     # apply, relying on calling code to bump usn+mod
     self.db.executemany("update notes set sfld=?, csum=? where id=?", r)
Ejemplo n.º 13
0
    def flush(self, mod=None):
        assert self.scm == self.col.scm
        self._preFlush()
        self.mod = mod if mod else intTime()
        self.usn = self.col.usn()
        sfld = stripHTML(self.fields[self.col.models.sortIdx(self._model)])
        tags = self.stringTags()
        csum = fieldChecksum(self.fields[0])
        res = self.col.db.execute(
            """
insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""",
            self.id, self.guid, self.mid, self.mod, self.usn, tags,
            self.joinedFields(), sfld, csum, self.flags, self.data)
        self.col.tags.register(self.tags)
        self._postFlush()
Ejemplo n.º 14
0
Archivo: deck.py Proyecto: ChYi/libanki
 def updateFieldCache(self, fids, csum=True):
     "Update field checksums and sort cache, after find&replace, etc."
     sfids = ids2str(fids)
     mods = self.models()
     r = []
     r2 = []
     for (fid, mid, flds) in self._fieldData(sfids):
         fields = splitFields(flds)
         model = mods[mid]
         if csum:
             for f in model.fields:
                 if f['uniq'] and fields[f['ord']]:
                     r.append((fid, mid, fieldChecksum(fields[f['ord']])))
         r2.append((stripHTML(fields[model.sortIdx()]), fid))
     if csum:
         self.db.execute("delete from fsums where fid in "+sfids)
         self.db.executemany("insert into fsums values (?,?,?)", r)
     self.db.executemany("update facts set sfld = ? where id = ?", r2)
Ejemplo n.º 15
0
    def flush(self, mod=None):
        "If fields or tags have changed, write changes to disk."
        assert self.scm == self.col.scm
        self._preFlush()
        sfld = stripHTMLMedia(self.fields[self.col.models.sortIdx(
            self._model)])
        tags = self.stringTags()
        fields = self.joinedFields()
        if not mod and self.col.db.scalar(
                "select 1 from notes where id = ? and tags = ? and flds = ?",
                self.id, tags, fields):
            return
        csum = fieldChecksum(self.fields[0])
        self.mod = mod if mod else intTime()
        self.usn = self.col.usn()
        res = self.col.db.execute(
            """
insert or replace into notes values (?,?,?,?,?,?,?,?,?,?,?)""", self.id,
            self.guid, self.mid, self.mod, self.usn, tags, fields, sfld, csum,
            self.flags, self.data)
        self.col.tags.register(self.tags)
        self._postFlush()
Ejemplo n.º 16
0
def dupeOrEmptyWithOrds(self):
    """
    Returns a tuple. The contents of each element are as follows:

    1) 1 if first is empty; 2 if first is a duplicate, False otherwise.
    2) For a duplicate (2), this returns the list of ordinals that make up the key.
       Otherwise this is None.
    """

    val = self.fields[0]
    if not val.strip():
        return 1, None
    csum = fieldChecksum(val)
    # find any matching csums and compare
    for flds in self.col.db.list(
            "select flds from notes where csum = ? and id != ? and mid = ?",
            csum, self.id or 0, self.mid):

        model = self.model()
        field_ords = [0]
        for fld in model["flds"]:
            if fld["ord"] == 0:
                continue
            elif fld["name"].endswith(KEY_SUFFIX):
                field_ords.append(fld["ord"])

        all_fields_equal = True
        fields_split = splitFields(flds)
        for field_ord in field_ords:
            if stripHTMLMedia(fields_split[field_ord]) != stripHTMLMedia(
                    self.fields[field_ord]):
                all_fields_equal = False

        if all_fields_equal:
            return 2, field_ords

    return False, None
Ejemplo n.º 17
0
    def updateCompleteDeck(self, data):
        self.startEditing()
        did = self.decks().id(data["deck"])
        self.decks().flush()
        model_manager = self.collection().models
        for _, card in data["cards"].items():
            self.database().execute(
                "replace into cards (id, nid, did, ord, type, queue, due, ivl, factor, reps, lapses, left, "
                "mod, usn, odue, odid, flags, data) "
                "values (" + "?," * (12 + 6 - 1) + "?)",
                card["id"], card["nid"], did, card["ord"], card["type"], card["queue"], card["due"],
                card["ivl"], card["factor"], card["reps"], card["lapses"], card["left"],
                intTime(), -1, 0, 0, 0, 0
            )
            note = data["notes"][str(card["nid"])]
            tags = self.collection().tags.join(self.collection().tags.canonify(note["tags"]))
            self.database().execute(
                "replace into notes(id, mid, tags, flds,"
                "guid, mod, usn, flags, data, sfld, csum) values (" + "?," * (4 + 7 - 1) + "?)",
                note["id"], note["mid"], tags, joinFields(note["fields"]),
                guid64(), intTime(), -1, 0, 0, "", fieldChecksum(note["fields"][0])
            )
            model = data["models"][str(note["mid"])]
            if not model_manager.get(model["id"]):
                model_o = model_manager.new(model["name"])
                for field_name in model["fields"]:
                    field = model_manager.newField(field_name)
                    model_manager.addField(model_o, field)
                for template_name in model["templateNames"]:
                    template = model_manager.newTemplate(template_name)
                    model_manager.addTemplate(model_o, template)
                model_o["id"] = model["id"]
                model_manager.update(model_o)
                model_manager.flush()

        self.stopEditing()
Ejemplo n.º 18
0
    def updateCompleteDeck(self, data):
        self.startEditing()
        did = self.decks().id(data['deck'])
        self.decks().flush()
        model_manager = self.collection().models
        for _, card in data['cards'].items():
            self.database().execute(
                'replace into cards (id, nid, did, ord, type, queue, due, ivl, factor, reps, lapses, left, '
                'mod, usn, odue, odid, flags, data) '
                'values (' + '?,' * (12 + 6 - 1) + '?)',
                card['id'], card['nid'], did, card['ord'], card['type'], card['queue'], card['due'],
                card['ivl'], card['factor'], card['reps'], card['lapses'], card['left'],
                intTime(), -1, 0, 0, 0, 0
            )
            note = data['notes'][str(card['nid'])]
            tags = self.collection().tags.join(self.collection().tags.canonify(note['tags']))
            self.database().execute(
                'replace into notes(id, mid, tags, flds,'
                'guid, mod, usn, flags, data, sfld, csum) values (' + '?,' * (4 + 7 - 1) + '?)',
                note['id'], note['mid'], tags, joinFields(note['fields']),
                guid64(), intTime(), -1, 0, 0, '', fieldChecksum(note['fields'][0])
            )
            model = data['models'][str(note['mid'])]
            if not model_manager.get(model['id']):
                model_o = model_manager.new(model['name'])
                for field_name in model['fields']:
                    field = model_manager.newField(field_name)
                    model_manager.addField(model_o, field)
                for template_name in model['templateNames']:
                    template = model_manager.newTemplate(template_name)
                    model_manager.addTemplate(model_o, template)
                model_o['id'] = model['id']
                model_manager.update(model_o)
                model_manager.flush()

        self.stopEditing()
Ejemplo n.º 19
0
 def scmhash(self, m):
     "Return a hash of the schema, to see if models are compatible."
     s = ""
     for f in m['flds']:
         s += f['name']
     return fieldChecksum(s)
Ejemplo n.º 20
0
def updateNotes(allDb):
    t_0, now, db = time.time(), intTime(), mw.col.db

    TAG = mw.col.tags  # type: TagManager
    ds, nid2mmi = [], {}
    mw.progress.start(label='Updating data', immediate=True)
    fidDb = allDb.fidDb(recalc=True)
    loc_db = allDb.locDb(recalc=False)  # type: Dict[Location, Set[Morpheme]]

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = cfg(
        'Tag_Comprehension'), cfg('Tag_Vocab'), cfg('Tag_Fresh'), cfg(
            'Tag_NotReady'), cfg('Tag_AlreadyKnown'), cfg('Tag_Priority'), cfg(
                'Tag_TooShort'), cfg('Tag_TooLong'), cfg('Tag_Frequency')
    TAG.register(tagNames)
    badLengthTag = cfg('Tag_BadLength')

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg('path_priority'), ignoreErrors=True).db

    mw.progress.update(label='Loading frequency.txt')
    frequencyListPath = cfg('path_frequency')
    frequency_map = {}
    frequency_has_morphemes = False

    try:
        with io.open(frequencyListPath, encoding='utf-8-sig') as csvfile:
            csvreader = csv.reader(csvfile, delimiter="\t")
            rows = [row for row in csvreader]

            if rows[0][0] == "#study_plan_frequency":
                frequency_has_morphemes = True
                frequency_map = dict(
                    zip([
                        Morpheme(row[0], row[1], row[2], row[3], row[4],
                                 row[5]) for row in rows[1:]
                    ], itertools.count(0)))
            else:
                frequency_map = dict(
                    zip([row[0] for row in rows], itertools.count(0)))

    except FileNotFoundError:
        pass

    frequencyListLength = len(frequency_map)

    # prefetch cfg for fields
    field_focus_morph = cfg('Field_FocusMorph')
    field_unknown_count = cfg('Field_UnknownMorphCount')
    field_unmature_count = cfg('Field_UnmatureMorphCount')
    field_morph_man_index = cfg('Field_MorphManIndex')
    field_unknowns = cfg('Field_Unknowns')
    field_unmatures = cfg('Field_Unmatures')
    field_unknown_freq = cfg('Field_UnknownFreq')
    field_focus_morph_pos = cfg("Field_FocusMorphPos")

    skip_comprehension_cards = cfg('Option_SkipComprehensionCards')
    skip_fresh_cards = cfg('Option_SkipFreshVocabCards')

    # Find all morphs that changed maturity and the notes that refer to them.
    last_maturities = allDb.meta.get('last_maturities', {})
    new_maturities = {}
    refresh_notes = set()

    # Recompute everything if preferences changed.
    last_preferences = allDb.meta.get('last_preferences', {})
    if not last_preferences == get_preferences():
        print("Preferences changed.  Updating all notes...")
        last_updated = 0
    else:
        last_updated = allDb.meta.get('last_updated', 0)

    # Todo: Remove this forced 0 once we add checks for other changes like new frequency.txt files.
    last_updated = 0

    # If we're updating everything anyway, clear the notes set.
    if last_updated > 0:
        for m, locs in allDb.db.items():
            maturity_bits = 0
            if seenDb.matches(m):
                maturity_bits |= 1
            if knownDb.matches(m):
                maturity_bits |= 2
            if matureDb.matches(m):
                maturity_bits |= 4

            new_maturities[m] = maturity_bits

            if last_maturities.get(m, -1) != maturity_bits:
                for loc in locs:
                    if isinstance(loc, AnkiDeck):
                        refresh_notes.add(loc.noteId)

    included_types, include_all = getModifyEnabledModels()
    included_mids = [
        m['id'] for m in mw.col.models.all()
        if include_all or m['name'] in included_types
    ]

    query = '''
        select id, mid, flds, guid, tags from notes
        WHERE mid IN ({0}) and ( mod > {2} or id in ({1}) )
        '''.format(','.join([str(m) for m in included_mids]),
                   ','.join([str(id) for id in refresh_notes]), last_updated)
    query_results = db.execute(query)

    N_notes = len(query_results)
    mw.progress.finish()
    mw.progress.start(label='Updating notes', max=N_notes, immediate=True)

    for i, (nid, mid, flds, guid, tags) in enumerate(query_results):
        ts = TAG.split(tags)
        if i % 500 == 0:
            mw.progress.update(value=i)

        C = partial(cfg, model_id=mid)

        notecfg = getFilterByMidAndTags(mid, ts)
        if notecfg is None or not notecfg['Modify']:
            continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[(nid, guid, fieldName)]
                morphemes.update(loc_db[loc])
            except KeyError:
                continue

        proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown')

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, new_knowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if proper_nouns_known and morpheme.isProperNoun():
                continue
            if not seenDb.matches(morpheme):
                unseens.add(morpheme)
            if not knownDb.matches(morpheme):
                unknowns.add(morpheme)
            if not matureDb.matches(morpheme):
                unmatures.add(morpheme)
                if knownDb.matches(morpheme):
                    new_knowns.add(morpheme)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len(
            unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'):
            continue

        # add bonus for morphs in priority.db and frequency.txt
        frequencyBonus = C('frequency.txt bonus')
        isPriority = False
        isFrequency = False

        focusMorph = None

        F_k = 0
        usefulness = 0
        for focusMorph in unknowns:
            F_k += allDb.frequency(focusMorph)
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            if frequency_has_morphemes:
                focusMorphIndex = frequency_map.get(focusMorph, -1)
            else:
                focusMorphIndex = frequency_map.get(focusMorph.base, -1)

            if focusMorphIndex >= 0:
                isFrequency = True

                # The bigger this number, the lower mmi becomes
                usefulness += int(
                    round(frequencyBonus *
                          (1 - focusMorphIndex / frequencyListLength)))

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness += F_k_avg

        # add bonus for studying recent learned knowns (reinforce)
        for morpheme in new_knowns:
            locs = knownDb.getMatchingLocs(morpheme)
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                # TODO: maybe average this so it doesnt favor long sentences
                usefulness += C('reinforce new vocab weight') // ivl

        if any(morpheme.pos == '動詞'
               for morpheme in unknowns):  # FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 99999 - min(99999, usefulness)

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

        # Fill in various fields/tags on the note based on cfg
        fs = splitFields(flds)

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [
            t for t in ts
            if t not in (notReadyTag, compTag, vocabTag, freshTag)
        ]

        # determine card type
        if N_m == 0:  # sentence comprehension card, m+0
            ts.append(compTag)
            if skip_comprehension_cards:
                usefulness += 1000000  # Add a penalty to put these cards at the end of the queue
        elif N_k == 1:  # new vocab card, k+1
            ts.append(vocabTag)
            setField(mid, fs, field_focus_morph, focusMorph.base)
            setField(mid, fs, field_focus_morph_pos, focusMorph.pos)
        elif N_k > 1:  # M+1+ and K+2+
            ts.append(notReadyTag)
        elif N_m == 1:  # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts.append(freshTag)
            if skip_fresh_cards:
                usefulness += 1000000  # Add a penalty to put these cards at the end of the queue
            focusMorph = next(iter(unmatures))
            setField(mid, fs, field_focus_morph, focusMorph.base)
            setField(mid, fs, field_focus_morph_pos, focusMorph.pos)

        else:  # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts.append(freshTag)
            if skip_fresh_cards:
                usefulness += 1000000  # Add a penalty to put these cards at the end of the queue

        # calculate mmi
        mmi = 100000 * N_k + 1000 * lenDiff + int(round(usefulness))
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # set type agnostic fields
        setField(mid, fs, field_unknown_count, '%d' % N_k)
        setField(mid, fs, field_unmature_count, '%d' % N_m)
        setField(mid, fs, field_morph_man_index, '%d' % mmi)
        setField(mid, fs, field_unknowns, ', '.join(u.base for u in unknowns))
        setField(mid, fs, field_unmatures,
                 ', '.join(u.base for u in unmatures))
        setField(mid, fs, field_unknown_freq, '%d' % F_k_avg)

        # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove(badLengthTag)

        # other tags
        if priorityTag in ts:
            ts.remove(priorityTag)
        if isPriority:
            ts.append(priorityTag)

        if frequencyTag in ts:
            ts.remove(frequencyTag)
        if isFrequency:
            ts.append(frequencyTag)

        if tooShortTag in ts:
            ts.remove(tooShortTag)
        if lenDiffRaw < 0:
            ts.append(tooShortTag)

        if tooLongTag in ts:
            ts.remove(tooLongTag)
        if lenDiffRaw > 0:
            ts.append(tooLongTag)

        # remove unnecessary tags
        if not cfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

        # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append((tags_, flds_, sfld, csum, now, mw.col.usn(), nid))

    mw.progress.update(label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=?, flds=?, sfld=?, csum=?, mod=?, usn=? where id=?',
        ds)

    # Now reorder new cards based on MMI
    mw.progress.update(label='Updating new card ordering...')
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append((due_, now, mw.col.usn(), cid))

    mw.col.db.executemany('update cards set due=?, mod=?, usn=? where id=?',
                          ds)

    mw.reset()

    allDb.meta['last_preferences'] = get_preferences()
    allDb.meta['last_maturities'] = new_maturities
    allDb.meta['last_updated'] = int(time.time() + 0.5)

    printf('Updated %d notes in %f sec' % (N_notes, time.time() - t_0))

    if cfg('saveDbs'):
        mw.progress.update(label='Saving all/seen/known/mature dbs')
        allDb.save(cfg('path_all'))
        seenDb.save(cfg('path_seen'))
        knownDb.save(cfg('path_known'))
        matureDb.save(cfg('path_mature'))
        printf('Updated %d notes + saved dbs in %f sec' %
               (N_notes, time.time() - t_0))

    mw.progress.finish()
    return knownDb
Ejemplo n.º 21
0
def updateNotes( allDb ):
    t_0, now, db, TAG   = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi         = [], {}
    N_notes             = db.scalar( 'select count() from notes' )
    mw.progress.start( label='Updating data', max=N_notes, immediate=True )
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag = tagNames = jcfg('Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg('Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg('Tag_Priority'), jcfg('Tag_TooShort'), jcfg('Tag_TooLong')
    TAG.register( tagNames )
    badLengthTag = jcfg2().get('Tag_BadLength')

    # handle secondary databases
    mw.progress.update( label='Creating seen/known/mature from all.db' )
    seenDb      = filterDbByMat( allDb, cfg1('threshold_seen') )
    knownDb     = filterDbByMat( allDb, cfg1('threshold_known') )
    matureDb    = filterDbByMat( allDb, cfg1('threshold_mature') )
    mw.progress.update( label='Loading priority.db' )
    priorityDb  = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db

    if cfg1('saveDbs'):
        mw.progress.update( label='Saving seen/known/mature dbs' )
        seenDb.save( cfg1('path_seen') )
        knownDb.save( cfg1('path_known') )
        matureDb.save( cfg1('path_mature') )

    mw.progress.update( label='Updating notes' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None or not notecfg['Modify']: continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[ ( nid, guid, fieldName ) ]
                morphemes.update( locDb[ loc ] )
            except KeyError: continue

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if morpheme not in seenDb.db:      unseens.add( morpheme )
            if morpheme not in knownDb.db:     unknowns.add( morpheme )
            if morpheme not in matureDb.db:    unmatures.add( morpheme )
            if morpheme not in matureDb.db and morpheme in knownDb.db:
                newKnowns.add( morpheme )

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len( morphemes ), len( unseens ), len( unknowns ), len( unmatures )

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

            # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns: # focusMorph used outside loop
            F_k += allDb.frequency(focusMorph)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness = F_k_avg

            # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for morpheme in newKnowns:
            locs = allDb.db[ morpheme ]
            if locs:
                ivl = min( 1, max( loc.maturity for loc in locs ) )
                usefulness += C('reinforce new vocab weight') // ivl #TODO: maybe average this so it doesnt favor long sentences

        if any( morpheme.pos == u'動詞' for morpheme in unknowns ): #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min( 999, usefulness )

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

            # calculate mmi
        mmi = 10000*N_k + 1000*lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[ nid ] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split( tags ), splitFields( flds )

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [ t for t in ts if t not in [ notReadyTag, compTag, vocabTag, freshTag ] ]

        # determine card type
        if N_m == 0:    # sentence comprehension card, m+0
            ts = ts + [ compTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'' )
        elif N_k == 1:  # new vocab card, k+1
            ts = ts + [ vocabTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % focusMorph.base )
        elif N_k > 1:   # M+1+ and K+2+
            ts = ts + [ notReadyTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'')
        elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [ freshTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % list(unmatures)[0].base)
        else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [ freshTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'')


            # set type agnostic fields
        setField( mid, fs, jcfg('Field_UnknownMorphCount'), u'%d' % N_k )
        setField( mid, fs, jcfg('Field_UnmatureMorphCount'), u'%d' % N_m )
        setField( mid, fs, jcfg('Field_MorphManIndex'), u'%d' % mmi )
        setField( mid, fs, jcfg('Field_Unknowns'), u', '.join( u.base for u in unknowns ) )
        setField( mid, fs, jcfg('Field_Unmatures'), u', '.join( u.base for u in unmatures ) )
        setField( mid, fs, jcfg('Field_UnknownFreq'), u'%d' % F_k_avg )

            # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove( badLengthTag )

            # other tags
        if priorityTag in ts:   ts.remove( priorityTag )
        if isPriority:          ts.append( priorityTag )

        if tooShortTag in ts:   ts.remove( tooShortTag )
        if lenDiffRaw < 0:      ts.append( tooShortTag )

        if tooLongTag in ts:    ts.remove( tooLongTag )
        if lenDiffRaw > 0:      ts.append( tooLongTag )

        # remove unnecessary tags
        if not jcfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

            # update sql db
        tags_ = TAG.join( TAG.canonify( ts ) )
        flds_ = joinFields( fs )
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum( fs[0] )
            sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] )
            ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } )

    mw.progress.update( value=i, label='Updating anki database...' )
    mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds )

    # Now reorder new cards based on MMI
    mw.progress.update( value=i, label='Updating new card ordering...' )
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ):
        if nid in nid2mmi: # owise it was disabled
            due_ = nid2mmi[ nid ]
            if due != due_: # only update cards that have changed
                ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } )
    mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds )
    mw.reset()

    printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) )
    mw.progress.finish()
    return knownDb
Ejemplo n.º 22
0
    def importNotes(self, notes):
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums = {}
        for csum, id in self.col.db.execute(
            "select csum, id from notes where mid = ?", self.model['id']):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts = {}
        fld0idx = self.mapping.index(self.model['flds'][0]['name'])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        updateLog = []
        updateLogTxt = _("Update as first field matched: %s")
        dupeLogTxt = _("Added duplicate with first field: %s")
        new = []
        self._ids = []
        self._cards = []
        self._emptyNotes = False
        for n in notes:
            if not self.allowHTML:
                for c in range(len(n.fields)):
                    n.fields[c] = cgi.escape(n.fields[c])
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(_("Empty first field: %s") %
                                " ".join(n.fields))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != 2:
                # duplicates in source file; log and ignore
                self.log.append(_("Appeared twice in file: %s") %
                                fld0)
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == 0:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(updateLogTxt % fld0)
                                found = True
                            break
                        elif self.importMode == 2:
                            # allow duplicates in this case
                            updateLog.append(dupeLogTxt % fld0)
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        # make sure to update sflds, etc
        self.col.updateFieldCache(self._ids)
        # generate cards
        if self.col.genCards(self._ids):
            self.log.insert(0, _(
                "Empty cards found. Please run Tools>Empty Cards."))
        # apply scheduling updates
        self.updateCards()
        self.col.sched.maybeRandomizeDeck()
        part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new)
        part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount
        self.log.append("%s, %s." % (part1, part2))
        self.log.extend(updateLog)
        if self._emptyNotes:
            self.log.append(_("""\
One or more notes were not imported, because they didn't generate any cards. \
This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields."""))
        self.total = len(self._ids)
Ejemplo n.º 23
0
 def scmhash(self, m):
     "Return a hash of the schema, to see if models are compatible."
     s = ""
     for f in m["flds"]:
         s += f["name"]
     return fieldChecksum(s)
Ejemplo n.º 24
0
def _getOriginal(note):
    csum = fieldChecksum(note.fields[0])
    original_nid = mw.col.db.first(
        "select id from notes where csum == {} and id != {} and mid == {}".
        format(csum, note.id, note.mid))[0]
    return mw.col.getNote(original_nid)
Ejemplo n.º 25
0
    def importNotes(self, notes):
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums = {}
        for csum, id in self.col.db.execute(
                "select csum, id from notes where mid = ?", self.model['id']):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts = {}
        fld0idx = self.mapping.index(self.model['flds'][0]['name'])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        updateLog = []
        updateLogTxt = _("First field matched: %s")
        dupeLogTxt = _("Added duplicate with first field: %s")
        new = []
        self._ids = []
        self._cards = []
        self._emptyNotes = False
        dupeCount = 0
        dupes = []
        for n in notes:
            for c in range(len(n.fields)):
                if not self.allowHTML:
                    n.fields[c] = html.escape(n.fields[c], quote=False)
                n.fields[c] = n.fields[c].strip()
                if not self.allowHTML:
                    n.fields[c] = n.fields[c].replace("\n", "<br>")
                n.fields[c] = unicodedata.normalize("NFC", n.fields[c])
            n.tags = [unicodedata.normalize("NFC", t) for t in n.tags]
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(
                    _("Empty first field: %s") % " ".join(n.fields))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != 2:
                # duplicates in source file; log and ignore
                self.log.append(_("Appeared twice in file: %s") % fld0)
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == 0:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(updateLogTxt % fld0)
                                dupeCount += 1
                                found = True
                        elif self.importMode == 1:
                            dupeCount += 1
                        elif self.importMode == 2:
                            # allow duplicates in this case
                            if fld0 not in dupes:
                                # only show message once, no matter how many
                                # duplicates are in the collection already
                                updateLog.append(dupeLogTxt % fld0)
                                dupes.append(fld0)
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        # make sure to update sflds, etc
        self.col.updateFieldCache(self._ids)
        # generate cards
        if self.col.genCards(self._ids):
            self.log.insert(
                0, _("Empty cards found. Please run Tools>Empty Cards."))
        # apply scheduling updates
        self.updateCards()
        # we randomize or order here, to ensure that siblings
        # have the same due#
        did = self.col.decks.selected()
        conf = self.col.decks.confForDid(did)
        # in order due?
        if conf['new']['order'] == NEW_CARDS_RANDOM:
            self.col.sched.randomizeCards(did)

        part1 = ngettext("%d note added", "%d notes added",
                         len(new)) % len(new)
        part2 = ngettext("%d note updated", "%d notes updated",
                         self.updateCount) % self.updateCount
        if self.importMode == 0:
            unchanged = dupeCount - self.updateCount
        elif self.importMode == 1:
            unchanged = dupeCount
        else:
            unchanged = 0
        part3 = ngettext("%d note unchanged", "%d notes unchanged",
                         unchanged) % unchanged
        self.log.append("%s, %s, %s." % (part1, part2, part3))
        self.log.extend(updateLog)
        if self._emptyNotes:
            self.log.append(
                _("""\
One or more notes were not imported, because they didn't generate any cards. \
This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields."""))
        self.total = len(self._ids)
Ejemplo n.º 26
0
    def importNotes(self, notes):
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums = {}
        for csum, id in self.col.db.execute(
                "select csum, id from notes where mid = ?", self.model['id']):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts = {}
        fld0idx = self.mapping.index(self.model['flds'][0]['name'])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        updateLog = []
        updateLogTxt = _("First field matched: %s")
        dupeLogTxt = _("Added duplicate with first field: %s")
        new = []
        self._ids = []
        self._cards = []
        self._emptyNotes = False
        dupeCount = 0
        dupes = []
        for n in notes:
            for c in range(len(n.fields)):
                if not self.allowHTML:
                    n.fields[c] = cgi.escape(n.fields[c])
                n.fields[c] = n.fields[c].strip().replace("\n", "<br>")
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(_("Empty first field: %s") %
                                " ".join(n.fields))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != 2:
                # duplicates in source file; log and ignore
                self.log.append(_("Appeared twice in file: %s") %
                                fld0)
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == 0:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(updateLogTxt % fld0)
                                dupeCount += 1
                                found = True
                        elif self.importMode == 1:
                            dupeCount += 1
                        elif self.importMode == 2:
                            # allow duplicates in this case
                            if fld0 not in dupes:
                                # only show message once, no matter how many
                                # duplicates are in the collection already
                                updateLog.append(dupeLogTxt % fld0)
                                dupes.append(fld0)
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        # make sure to update sflds, etc
        self.col.updateFieldCache(self._ids)
        # generate cards
        if self.col.genCards(self._ids):
            self.log.insert(0, _(
                "Empty cards found. Please run Tools>Empty Cards."))
        # apply scheduling updates
        self.updateCards()
        # we randomize or order here, to ensure that siblings
        # have the same due#
        did = self.col.decks.selected()
        conf = self.col.decks.confForDid(did)
        # in order due?
        if conf['new']['order'] == NEW_CARDS_RANDOM:
            self.col.sched.randomizeCards(did)
        else:
            self.col.sched.orderCards(did)

        part1 = ngettext("%d note added", "%d notes added",
                         len(new)) % len(new)
        part2 = ngettext("%d note updated", "%d notes updated",
                         self.updateCount) % self.updateCount
        if self.importMode == 0:
            unchanged = dupeCount - self.updateCount
        elif self.importMode == 1:
            unchanged = dupeCount
        else:
            unchanged = 0
        part3 = ngettext("%d note unchanged", "%d notes unchanged",
                         unchanged) % unchanged
        self.log.append("%s, %s, %s." % (part1, part2, part3))
        self.log.extend(updateLog)
        if self._emptyNotes:
            self.log.append(_("""\
One or more notes were not imported, because they didn't generate any cards. \
This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields."""))
        self.total = len(self._ids)
Ejemplo n.º 27
0
    def importNotes(self, notes: list[ForeignNote]) -> None:
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums: dict[str, list[NoteId]] = {}
        for csum, id in self.col.db.execute(
                "select csum, id from notes where mid = ?", self.model["id"]):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts: dict[str, bool] = {}
        fld0idx = self.mapping.index(self.model["flds"][0]["name"])
        self._fmap = self.col.models.field_map(self.model)
        self._nextID = NoteId(timestampID(self.col.db, "notes"))
        # loop through the notes
        updates: list[Updates] = []
        updateLog = []
        new = []
        self._ids: list[NoteId] = []
        self._cards: list[tuple] = []
        dupeCount = 0
        dupes: list[str] = []
        for n in notes:
            for c, field in enumerate(n.fields):
                if not self.allowHTML:
                    n.fields[c] = html.escape(field, quote=False)
                n.fields[c] = field.strip()
                if not self.allowHTML:
                    n.fields[c] = field.replace("\n", "<br>")
            fld0 = unicodedata.normalize("NFC", n.fields[fld0idx])
            # first field must exist
            if not fld0:
                self.log.append(
                    self.col.tr.importing_empty_first_field(
                        val=" ".join(n.fields)))
                continue
            csum = fieldChecksum(fld0)
            # earlier in import?
            if fld0 in firsts and self.importMode != ADD_MODE:
                # duplicates in source file; log and ignore
                self.log.append(
                    self.col.tr.importing_appeared_twice_in_file(val=fld0))
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == UPDATE_MODE:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(
                                    self.col.tr.importing_first_field_matched(
                                        val=fld0))
                                dupeCount += 1
                                found = True
                        elif self.importMode == IGNORE_MODE:
                            dupeCount += 1
                        elif self.importMode == ADD_MODE:
                            # allow duplicates in this case
                            if fld0 not in dupes:
                                # only show message once, no matter how many
                                # duplicates are in the collection already
                                updateLog.append(
                                    self.col.tr.
                                    importing_added_duplicate_with_first_field(
                                        val=fld0, ))
                                dupes.append(fld0)
                            found = False
            # newly add
            if not found:
                new_data = self.newData(n)
                if new_data:
                    new.append(new_data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        # generate cards + update field cache
        self.col.after_note_updates(self._ids, mark_modified=False)
        # apply scheduling updates
        self.updateCards()
        # we randomize or order here, to ensure that siblings
        # have the same due#
        did = self.col.decks.selected()
        conf = self.col.decks.config_dict_for_deck_id(did)
        # in order due?
        if not conf["dyn"] and conf["new"]["order"] == NEW_CARDS_RANDOM:
            self.col.sched.randomizeCards(did)

        part1 = self.col.tr.importing_note_added(count=len(new))
        part2 = self.col.tr.importing_note_updated(count=self.updateCount)
        if self.importMode == UPDATE_MODE:
            unchanged = dupeCount - self.updateCount
        elif self.importMode == IGNORE_MODE:
            unchanged = dupeCount
        else:
            unchanged = 0
        part3 = self.col.tr.importing_note_unchanged(count=unchanged)
        self.log.append(f"{part1}, {part2}, {part3}.")
        self.log.extend(updateLog)
        self.total = len(self._ids)
Ejemplo n.º 28
0
def updateNotes(allDb):
    t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi = [], {}
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Updating data', max=N_notes, immediate=True)
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg1('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg1('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg1('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db

    if cfg1('saveDbs'):
        mw.progress.update(label='Saving seen/known/mature dbs')
        seenDb.save(cfg1('path_seen'))
        knownDb.save(cfg1('path_known'))
        matureDb.save(cfg1('path_mature'))

    mw.progress.update(label='Calculating frequency information')
    pops = [len(locs) for locs in allDb.db.values()]
    pops = [n for n in pops if n > 1]

    mw.progress.update(label='Updating notes')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)
        if not C('enabled'): continue
        # Get all morphemes for note
        ms = set()
        for fieldName in C('morph_fields'):
            try:
                loc = fidDb[(nid, guid, fieldName)]
                ms.update(locDb[loc])
            except KeyError:
                continue
        ms = [m for m in ms if m.pos not in C('morph_blacklist')]

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for m in ms:
            if m not in seenDb.db: unseens.add(m)
            if m not in knownDb.db: unknowns.add(m)
            if m not in matureDb.db: unmatures.add(m)
            if m not in matureDb.db and m in knownDb.db:
                newKnowns.add(m)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(ms), len(unseens), len(unknowns), len(unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns:  # focusMorph used outside loop
            F_k += len(allDb.db[focusMorph])
        F_k_avg = F_k / N_k if N_k > 0 else F_k
        usefulness = F_k_avg

        # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for m in newKnowns:
            locs = allDb.db[m]
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                usefulness += C(
                    'reinforce new vocab weight'
                ) / ivl  #TODO: maybe average this so it doesnt favor long sentences

        if any(m.pos == u'動詞'
               for m in unknowns):  #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min(999, usefulness)

        # difference from optimal length (too little context vs long sentence)
        lenDiff = max(0, min(9, abs(C('optimal sentence length') - N) - 2))
        tooLong = N > C('optimal sentence length')

        # calculate mmi
        mmi = 10000 * N_k + 1000 * lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split(tags), splitFields(flds)
        # determine card type
        compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag, badLengthTag, tooLongTag = tagNames = C(
            'tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C(
                'tag_alreadyKnown'), C('tag_priority'), C('tag_badLength'), C(
                    'tag_tooLong')
        if N_m == 0:  # sentence comprehension card, m+0
            ts = [compTag
                  ] + [t for t in ts if t not in [vocabTag, notReadyTag]]
            setField(mid, fs, C('focusMorph'), u'')
        elif N_k == 1:  # new vocab card, k+1
            ts = [vocabTag
                  ] + [t for t in ts if t not in [compTag, notReadyTag]]
            setField(mid, fs, C('focusMorph'), u'%s' % focusMorph.base)
        elif N_k > 1:  # M+1+ and K+2+
            ts = [notReadyTag
                  ] + [t for t in ts if t not in [compTag, vocabTag]]

            # set type agnostic fields
        setField(mid, fs, C('k+N'), u'%d' % N_k)
        setField(mid, fs, C('m+N'), u'%d' % N_m)
        setField(mid, fs, C('morphManIndex'), u'%d' % mmi)
        setField(mid, fs, C('unknowns'), u', '.join(u.base for u in unknowns))
        setField(mid, fs, C('unmatures'),
                 u', '.join(u.base for u in unmatures))
        setField(mid, fs, C('unknownFreq'), u'%d' % F_k_avg)

        # other tags
        if priorityTag in ts: ts.remove(priorityTag)
        if isPriority: ts.append(priorityTag)

        if badLengthTag in ts: ts.remove(badLengthTag)
        if lenDiff: ts.append(badLengthTag)

        if tooLongTag in ts: ts.remove(tooLongTag)
        if tooLong: ts.append(tooLongTag)

        # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append({
                'now': now,
                'tags': tags_,
                'flds': flds_,
                'sfld': sfld,
                'csum': csum,
                'usn': mw.col.usn(),
                'nid': nid
            })

    mw.progress.update(value=i, label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid',
        ds)
    TAG.register(tagNames)

    # Now reorder new cards based on MMI
    mw.progress.update(value=i, label='Updating new card ordering...')
    ds = []
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append({
                    'now': now,
                    'due': due_,
                    'usn': mw.col.usn(),
                    'cid': cid
                })
    mw.col.db.executemany(
        'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds)
    mw.reset()

    printf('Updated notes in %f sec' % (time.time() - t_0))
    mw.progress.finish()
    return knownDb
Ejemplo n.º 29
0
def updateNotes(allDb):
    t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi = [], {}
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Updating data', max=N_notes, immediate=True)
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = jcfg(
        'Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg(
            'Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg(
                'Tag_Priority'), jcfg('Tag_TooShort'), jcfg(
                    'Tag_TooLong'), jcfg('Tag_Frequency')
    TAG.register(tagNames)
    badLengthTag = jcfg2().get('Tag_BadLength')

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg1('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg1('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg1('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db

    mw.progress.update(label='Loading frequency.txt')
    frequencyListPath = cfg1('path_frequency')
    try:
        with codecs.open(frequencyListPath, 'r', 'utf-8') as f:
            frequencyList = [line.strip() for line in f.readlines()]
            frequencyListLength = len(frequencyList)
    except FileNotFoundError:
        pass  # User does not have a frequency.txt

    if cfg1('saveDbs'):
        mw.progress.update(label='Saving seen/known/mature dbs')
        seenDb.save(cfg1('path_seen'))
        knownDb.save(cfg1('path_known'))
        matureDb.save(cfg1('path_mature'))

    mw.progress.update(label='Updating notes')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None or not notecfg['Modify']: continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[(nid, guid, fieldName)]
                morphemes.update(locDb[loc])
            except KeyError:
                continue

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if morpheme not in seenDb.db: unseens.add(morpheme)
            if morpheme not in knownDb.db: unknowns.add(morpheme)
            if morpheme not in matureDb.db: unmatures.add(morpheme)
            if morpheme not in matureDb.db and morpheme in knownDb.db:
                newKnowns.add(morpheme)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len(
            unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns:  # focusMorph used outside loop
            F_k += allDb.frequency(focusMorph)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness = F_k_avg

        # add bonus for morphs in priority.db and frequency.txt
        isPriority = False
        isFrequency = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')
            focusMorphString = focusMorph.show().split()[0]
            try:
                focusMorphIndex = frequencyList.index(focusMorphString)
                isFrequency = True
                frequencyWeight = C('frequency.txt weight scale')

                # The bigger this number, the lower mmi becomes
                usefulness += (frequencyListLength -
                               focusMorphIndex) * frequencyWeight
            except:
                pass

        # add bonus for studying recent learned knowns (reinforce)
        for morpheme in newKnowns:
            locs = allDb.db[morpheme]
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                usefulness += C(
                    'reinforce new vocab weight'
                ) // ivl  #TODO: maybe average this so it doesnt favor long sentences

        if any(morpheme.pos == '動詞'
               for morpheme in unknowns):  #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 99999 - min(99999, usefulness)

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

        # calculate mmi
        mmi = 100000 * N_k + 1000 * lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split(tags), splitFields(flds)

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [
            t for t in ts
            if t not in [notReadyTag, compTag, vocabTag, freshTag]
        ]

        # determine card type
        if N_m == 0:  # sentence comprehension card, m+0
            ts = ts + [compTag]
        elif N_k == 1:  # new vocab card, k+1
            ts = ts + [vocabTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), '%s' % focusMorph.base)
        elif N_k > 1:  # M+1+ and K+2+
            ts = ts + [notReadyTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), '')
        elif N_m == 1:  # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [freshTag]
            setField(mid, fs, jcfg('Field_FocusMorph'),
                     '%s' % list(unmatures)[0].base)
        else:  # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [freshTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), '')

        # set type agnostic fields
        setField(mid, fs, jcfg('Field_UnknownMorphCount'), '%d' % N_k)
        setField(mid, fs, jcfg('Field_UnmatureMorphCount'), '%d' % N_m)
        setField(mid, fs, jcfg('Field_MorphManIndex'), '%d' % mmi)
        setField(mid, fs, jcfg('Field_Unknowns'),
                 ', '.join(u.base for u in unknowns))
        setField(mid, fs, jcfg('Field_Unmatures'),
                 ', '.join(u.base for u in unmatures))
        setField(mid, fs, jcfg('Field_UnknownFreq'), '%d' % F_k_avg)

        # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove(badLengthTag)

        # other tags
        if priorityTag in ts: ts.remove(priorityTag)
        if isPriority: ts.append(priorityTag)

        if frequencyTag in ts: ts.remove(frequencyTag)
        if isFrequency: ts.append(frequencyTag)

        if tooShortTag in ts: ts.remove(tooShortTag)
        if lenDiffRaw < 0: ts.append(tooShortTag)

        if tooLongTag in ts: ts.remove(tooLongTag)
        if lenDiffRaw > 0: ts.append(tooLongTag)

        # remove unnecessary tags
        if not jcfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

        # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append({
                'now': now,
                'tags': tags_,
                'flds': flds_,
                'sfld': sfld,
                'csum': csum,
                'usn': mw.col.usn(),
                'nid': nid
            })

    mw.progress.update(value=i, label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid',
        ds)

    # Now reorder new cards based on MMI
    mw.progress.update(value=i, label='Updating new card ordering...')
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append({
                    'now': now,
                    'due': due_,
                    'usn': mw.col.usn(),
                    'cid': cid
                })
    mw.col.db.executemany(
        'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds)
    mw.reset()

    printf('Updated notes in %f sec' % (time.time() - t_0))
    mw.progress.finish()
    return knownDb
Ejemplo n.º 30
0
def updateNotes( allDb ):
    t_0, now, db, TAG   = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi         = [], {}
    N_notes             = db.scalar( 'select count() from notes' )
    mw.progress.start( label='Updating data', max=N_notes, immediate=True )
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc

    # handle secondary databases
    mw.progress.update( label='Creating seen/known/mature from all.db' )
    seenDb      = filterDbByMat( allDb, cfg1('threshold_seen') )
    knownDb     = filterDbByMat( allDb, cfg1('threshold_known') )
    matureDb    = filterDbByMat( allDb, cfg1('threshold_mature') )
    mw.progress.update( label='Loading priority.db' )
    priorityDb  = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db

    if cfg1('saveDbs'):
        mw.progress.update( label='Saving seen/known/mature dbs' )
        seenDb.save( cfg1('path_seen') )
        knownDb.save( cfg1('path_known') )
        matureDb.save( cfg1('path_mature') )

    mw.progress.update( label='Calculating frequency information' )
    pops = [ len( locs ) for locs in allDb.db.values() ]
    pops = [ n for n in pops if n > 1 ]

    mw.progress.update( label='Updating notes' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )
        if not C('enabled'): continue
        # Get all morphemes for note
        ms = set()
        for fieldName in C('morph_fields'):
            try:
                loc = fidDb[ ( nid, guid, fieldName ) ]
                ms.update( locDb[ loc ] )
            except KeyError: continue
        ms = [ m for m in ms if m.pos not in C('morph_blacklist') ]

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for m in ms:
            if m not in seenDb.db:      unseens.add( m )
            if m not in knownDb.db:     unknowns.add( m )
            if m not in matureDb.db:    unmatures.add( m )
            if m not in matureDb.db and m in knownDb.db:
                newKnowns.add( m )

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len( ms ), len( unseens ), len( unknowns ), len( unmatures )

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

            # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns: # focusMorph used outside loop
            F_k += len( allDb.db[ focusMorph ] )
        F_k_avg = F_k / N_k if N_k > 0 else F_k
        usefulness = F_k_avg

            # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for m in newKnowns:
            locs = allDb.db[ m ]
            if locs:
                ivl = min( 1, max( loc.maturity for loc in locs ) )
                usefulness += C('reinforce new vocab weight') / ivl #TODO: maybe average this so it doesnt favor long sentences

        if any( m.pos == u'動詞' for m in unknowns ): #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min( 999, usefulness )

            # difference from optimal length (too little context vs long sentence)
        lenDiff = max( 0, min( 9, abs( C('optimal sentence length') - N ) -2 ) )

            # calculate mmi
        mmi = 10000*N_k + 1000*lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[ nid ] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split( tags ), splitFields( flds )
            # determine card type
        compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag = tagNames = C('tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C('tag_alreadyKnown'), C('tag_priority')
        if N_m == 0:    # sentence comprehension card, m+0
            ts = [ compTag ] + [ t for t in ts if t not in [ vocabTag, notReadyTag ] ]
            setField( mid, fs, C('focusMorph'), u'' )
        elif N_k == 1:  # new vocab card, k+1
            ts = [ vocabTag ] + [ t for t in ts if t not in [ compTag, notReadyTag ] ]
            setField( mid, fs, C('focusMorph'), u'%s' % focusMorph.base )
        elif N_k > 1:   # M+1+ and K+2+
            ts = [ notReadyTag ] + [ t for t in ts if t not in [ compTag, vocabTag ] ]

            # set type agnostic fields
        setField( mid, fs, C('k+N'), u'%d' % N_k )
        setField( mid, fs, C('m+N'), u'%d' % N_m )
        setField( mid, fs, C('morphManIndex'), u'%d' % mmi )
        setField( mid, fs, C('unknowns'), u', '.join( u.base for u in unknowns ) )
        setField( mid, fs, C('unmatures'), u', '.join( u.base for u in unmatures ) )
        setField( mid, fs, C('unknownFreq'), u'%d' % F_k_avg )

            # other tags
        if priorityTag in ts:   ts.remove( priorityTag )
        if isPriority:          ts.append( priorityTag )

            # update sql db
        tags_ = TAG.join( TAG.canonify( ts ) )
        flds_ = joinFields( fs )
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum( fs[0] )
            sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] )
            ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } )

    mw.progress.update( value=i, label='Updating anki database...' )
    mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds )
    TAG.register( tagNames )

    # Now reorder new cards based on MMI
    mw.progress.update( value=i, label='Updating new card ordering...' )
    ds = []
    for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ):
        if nid in nid2mmi: # owise it was disabled
            due_ = nid2mmi[ nid ]
            if due != due_: # only update cards that have changed
                ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } )
    mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds )
    mw.reset()

    printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) )
    mw.progress.finish()
Ejemplo n.º 31
0
def _ignore_dupes(self_note=None, self_expression=None, self_deck=None):
    """We will override Anki's Note.dupeOrEmpty function with this function,
    This method is meant to return
        1 		if self.fields[0] is empty
        2 		if the note is a duplicate
        False 	elsewise (i.e. "nice" note).
    :param self_note: Anki note object.
    :param self_expression: String. Will overwrite note.fields[0]
    :param self_deck: Deck the note belongs to.
    """

    # Nomenclature: We compare the note given as argument to other notes.
    # Everything that has to do with that initial note has the prefix 'self',
    # everything that has to do with one of the other notes has the prefix
    # other.

    # Some explanation for abbreviations used in Anki:
    # * id: Note id (as variable belonging to a note)
    # * nid: Note id (as variable belonging to a card)
    # * did: Deck id
    # * mid: Model id

    # 1. Default values & Co.

    if self_note is None and self_expression is None:
        # maybe we should raise a ValueError instead, but well...
        return False

    if self_note:
        self_search_value = self_note.fields[0]  # might be None!
        self_note_id = self_note.id
        self_note_mid = self_note.mid
    else:
        self_search_value = None
        self_note_id = None
        self_note_mid = None

    if self_expression:
        # Note: If self_note was given as well, self_search_value will be
        # overwritten.
        self_search_value = self_expression

    # 2. Check if we have a key field/Expression

    logger.debug("key field = '%s'" % self_search_value)
    if not self_search_value or isinstance(self_search_value, str) and not \
            self_search_value.strip():
        # note that self_note.fields[0] might be None!
        logger.debug("Key field empty.")
        return 1

    # 3. Get Note Ids of notes that might be duplicates.

    csum = fieldChecksum(self_search_value)

    if self_note_mid:
        # we don't have to check for the note id, because it defaults to 0 in
        # the search query (mostly copied from anki's source).
        # Select all note ids from notes
        # 1. whose key field has the same check sum
        # 2. whose note id is different (i.e. we're excluding self_note)
        # 3. whose model id is the same
        other_note_ids = mw.col.db.list(
            "select id from notes where csum = ? "
            "and id != ? and mid = ?", csum, self_note_id or 0, self_note_mid)
    else:
        # don't apply any criteria for note id and mid model id, just seach
        # for the checksum.
        other_note_ids = mw.col.db.list("select id from notes where csum = ?",
                                        csum)
    logger.debug("other_note_ids: {}".format(other_note_ids))

    if not other_note_ids:
        logger.debug("Did not find any notes with the same key field checksum "
                     "as self.")
        return False

    # 4. get the self_deck ids from the decks the self card belonged to

    if self_deck:
        # use the deck supplied as argument
        self_deck_ids = did_from_dname(self_deck)
    else:
        # try to get the deck from anki
        self_deck_ids = mw.col.db.list("select did from cards where nid = ?",
                                       self_note_id)

    if not self_deck_ids:
        # We tried to get the denk name from anki, but the result was None.
        # use the self_deck id of the currently active self_deck
        self_deck_ids = [mw.col.conf['curDeck']]

    logger.debug("self_deck_ids {}".format(self_deck_ids))

    # 5. Loop over the other_note_ids

    for other_note_id in other_note_ids:
        # 5a. Get the field values of note with other_note_id
        other_fields = mw.col.db.list("select flds from notes where id = ?",
                                      other_note_id)
        if not other_fields:
            # note with no fields
            logger.debug("No fields.")
            return False

        # 5b. Get the self_deck ids of all the cards of the note with
        # other_note_id (one note can have multiple cards in different decks)
        other_deck_ids = mw.col.db.list("select did from cards where nid = ?",
                                        other_note_id)
        logger.debug("other_deck_ids {}".format(other_deck_ids))
        if not other_deck_ids:
            logger.debug("No cards with matching checksum.")
            return False

        # 5c. Check that the key fields match.
        if not stripHTMLMedia(splitFields(other_fields[0])[0]) == \
                stripHTMLMedia(self_search_value):
            logger.debug("Expressions didn't match after all.")
            return False

        # 6c. Check if we want to ignore that case.
        # Normally a card would be flagged as a duplicate here.
        for self_deck_id in self_deck_ids:
            for other_deck_id in other_deck_ids:
                self_name = dname_from_did(self_deck_id)
                other_name = dname_from_did(other_deck_id)
                if ignore_duplicates(self_name, other_name):
                    # don't do anything!
                    logger.debug("Duplicate! deck1 = '%s', deck2 = '%s' ==> "
                                 "Ignored." % (self_name, other_name))
                else:
                    logger.debug("Duplicate! deck1 = '%s', deck2 = '%s' ==> "
                                 "Flagged." % (self_name, other_name))
                    return 2

    return False
Ejemplo n.º 32
0
 def importNotes(self, notes):
     "Convert each card into a note, apply attributes and add to col."
     assert self.mappingOk()
     # gather checks for duplicate comparison
     csums = {}
     for csum, id in self.col.db.execute(
         "select csum, id from notes where mid = ?", self.model['id']):
         if csum in csums:
             csums[csum].append(id)
         else:
             csums[csum] = [id]
     firsts = {}
     fld0idx = self.mapping.index(self.model['flds'][0]['name'])
     self._fmap = self.col.models.fieldMap(self.model)
     self._nextID = timestampID(self.col.db, "notes")
     # loop through the notes
     updates = []
     new = []
     self._ids = []
     self._cards = []
     for n in notes:
         fld0 = n.fields[fld0idx]
         csum = fieldChecksum(fld0)
         # first field must exist
         if not fld0:
             self.log.append(_("Empty first field: %s") %
                             " ".join(n.fields))
             continue
         # earlier in import?
         if fld0 in firsts:
             # duplicates in source file; log and ignore
             self.log.append(_("Appeared twice in file: %s") %
                             fld0)
             continue
         firsts[fld0] = True
         # already exists?
         found = False
         if csum in csums:
             # csum is not a guarantee; have to check
             for id in csums[csum]:
                 flds = self.col.db.scalar(
                     "select flds from notes where id = ?", id)
                 sflds = splitFields(flds)
                 if fld0 == sflds[0]:
                     # duplicate
                     found = True
                     if self.update:
                         data = self.updateData(n, id, sflds)
                         if data:
                             updates.append(data)
                             found = True
                         break
         # newly add
         if not found:
             data = self.newData(n)
             if data:
                 new.append(data)
                 # note that we've seen this note once already
                 firsts[fld0] = True
     self.addNew(new)
     self.addUpdates(updates)
     self.col.updateFieldCache(self._ids)
     # generate cards
     if self.col.genCards(self._ids):
         self.log.insert(0, _(
             "Empty cards found. Please run Tools>Maintenance>Empty Cards."))
     # apply scheduling updates
     self.updateCards()
     # make sure to update sflds, etc
     self.log.append(_("%(a)d notes added, %(b)d notes updated.") %
                     dict(a=len(new), b=self.updateCount))
     self.total = len(self._ids)
Ejemplo n.º 33
0
 def maybeChecksum(self, data, unique):
     if not unique:
         return ""
     return fieldChecksum(data)
Ejemplo n.º 34
0
    def importNotes(self, notes):
        "Convert each card into a note, apply attributes and add to col."
        print "importNotes()"
        print notes

        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums = {}
        for csum, id in self.col.db.execute(
            "select csum, id from notes where mid = ?", self.model['id']):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts = {}
        fld0idx = self.mapping.index(self.model['flds'][0]['name'])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        updateLog = []
        updateLogTxt = _("First field matched: %s")
        dupeLogTxt = _("Added duplicate with first field: %s")
        new = []
        self._ids = []
        self._cards = []
        self._emptyNotes = False
        for n in notes:
            if not self.allowHTML:
                for c in range(len(n.fields)):
                    n.fields[c] = cgi.escape(n.fields[c])
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(_("Empty first field: %s") %
                                " ".join(n.fields))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != 2:
                # duplicates in source file; log and ignore
                self.log.append(_("Appeared twice in file: %s") %
                                fld0)
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == 0:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(updateLogTxt % fld0)
                                found = True
                            break
                        elif self.importMode == 2:
                            # allow duplicates in this case
                            updateLog.append(dupeLogTxt % fld0)
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True

        did = self.col.decks.id(self.deck)
        self.col.decks.select(did)
        #SUPER IMPORTANT (setting the associated deck to the model)
        self.model['did'] = did

        print "Selected: ", self.col.decks.get(self.col.decks.selected());

        self.addNew(new)
        self.addUpdates(updates)
        # make sure to update sflds, etc
        self.col.updateFieldCache(self._ids)
        # generate cards
        if self.col.genCards(self._ids):
            self.log.insert(0, _(
                "Empty cards found. Please run Tools>Empty Cards."))
        # apply scheduling updates
        self.updateCards()
        # we randomize or order here, to ensure that siblings
        # have the same due#
        

        # m = self.col.models.byName("Basic")
        # deck = self.col.decks.get(did)
        # deck['mid'] = m['id']
        # self.col.decks.save(deck)

        # print "Deck:", self.col.decks.byName(self.deck)
        # print "DID:", did
        
        # save tags to model
        # m = self.note.model()
        # m['tags'] = self.note.tags
        # self.mw.col.models.save(m)

        conf = self.col.decks.confForDid(did)
        # print "Conf: ",conf
        # in order due?
        if conf['new']['order'] == NEW_CARDS_RANDOM:
            self.col.sched.randomizeCards(did)
        else:
            self.col.sched.orderCards(did)
        part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new)
        part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount
        self.log.append("%s, %s." % (part1, part2))
        print part1, part2, "on deck: [", self.deck, "]"
        self.log.extend(updateLog)
        if self._emptyNotes:
            print "there were empty notes"
            self.log.append(_("""\
One or more notes were not imported, because they didn't generate any cards. \
This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields."""))
        self.total = len(self._ids)
Ejemplo n.º 35
0
            try:
                if re.search("(?i)^" + regex + "$", strg):
                    nids.append(id)
            except sre_constants.error:
                return
        if not nids:
            return "0"
        return "n.id in %s" % ids2str(nids)

    def _findDupes(self, (val, args)):
        # caller must call stripHTMLMedia on passed val
        try:
            mid, val = val.split(",", 1)
        except OSError:
            return
        csum = fieldChecksum(val)
        nids = []
        for nid, flds in self.col.db.execute("select id, flds from notes where mid=? and csum=?", mid, csum):
            if stripHTMLMedia(splitFields(flds)[0]) == val:
                nids.append(nid)
        return "n.id in %s" % ids2str(nids)


# Find and replace
##########################################################################


def findReplace(col, nids, src, dst, regex=False, field=None, fold=True):
    "Find and replace fields in a note."
    mmap = {}
    if field:
Ejemplo n.º 36
0
    def importNotes(self, notes):
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums = {}
        for csum, id in self.col.db.execute(
            "select csum, id from notes where mid = ?", self.model['id']):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts = {}
        fld0idx = self.mapping.index(self.model['flds'][0]['name'])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        new = []
        self._ids = []
        self._cards = []
        self._emptyNotes = False
        for n in notes:
            if not self.allowHTML:
                for c in range(len(n.fields)):
                    n.fields[c] = cgi.escape(n.fields[c])
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(_("Empty first field: %s") %
                                " ".join(n.fields))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != 2:
                # duplicates in source file; log and ignore
                self.log.append(_("Appeared twice in file: %s") %
                                fld0)
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == 0:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                found = True
                            break
                        elif self.importMode == 2:
                            # allow duplicates in this case
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        self.col.updateFieldCache(self._ids)
        # generate cards
        if self.col.genCards(self._ids):
            self.log.insert(0, _(
                "Empty cards found. Please run Tools>Empty Cards."))
        # apply scheduling updates
        self.updateCards()
        # make sure to update sflds, etc
        part1 = ngettext("%d note added", "%d notes added", len(new)) % len(new)
        part2 = ngettext("%d note updated", "%d notes updated", self.updateCount) % self.updateCount
        self.log.append("%s, %s." % (part1, part2))
        if self._emptyNotes:
            self.log.append(_("""\
One or more notes were not imported, because they didn't generate any cards. \
This can happen when you have empty fields or when you have not mapped the \
content in the text file to the correct fields."""))
        self.total = len(self._ids)
Ejemplo n.º 37
0
 def importNotes(self, notes):
     "Convert each card into a note, apply attributes and add to col."
     assert self.mappingOk()
     # gather checks for duplicate comparison
     csums = {}
     for csum, id in self.col.db.execute(
             "select csum, id from notes where mid = ?", self.model['id']):
         if csum in csums:
             csums[csum].append(id)
         else:
             csums[csum] = [id]
     firsts = {}
     fld0idx = self.mapping.index(self.model['flds'][0]['name'])
     self._fmap = self.col.models.fieldMap(self.model)
     self._nextID = timestampID(self.col.db, "notes")
     # loop through the notes
     updates = []
     new = []
     self._ids = []
     self._cards = []
     for n in notes:
         fld0 = n.fields[fld0idx]
         csum = fieldChecksum(fld0)
         # first field must exist
         if not fld0:
             self.log.append(
                 _("Empty first field: %s") % " ".join(n.fields))
             continue
         # earlier in import?
         if fld0 in firsts:
             # duplicates in source file; log and ignore
             self.log.append(_("Appeared twice in file: %s") % fld0)
             continue
         firsts[fld0] = True
         # already exists?
         found = False
         if csum in csums:
             # csum is not a guarantee; have to check
             for id in csums[csum]:
                 flds = self.col.db.scalar(
                     "select flds from notes where id = ?", id)
                 sflds = splitFields(flds)
                 if fld0 == sflds[0]:
                     # duplicate
                     found = True
                     if self.update:
                         data = self.updateData(n, id, sflds)
                         if data:
                             updates.append(data)
                             found = True
                         break
         # newly add
         if not found:
             data = self.newData(n)
             if data:
                 new.append(data)
                 # note that we've seen this note once already
                 firsts[fld0] = True
     self.addNew(new)
     self.addUpdates(updates)
     self.col.updateFieldCache(self._ids)
     # generate cards
     if self.col.genCards(self._ids):
         self.log.insert(
             0,
             _("Empty cards found. Please run Tools>Maintenance>Empty Cards."
               ))
     # apply scheduling updates
     self.updateCards()
     # make sure to update sflds, etc
     self.log.append(
         _("%(a)d notes added, %(b)d notes updated.") %
         dict(a=len(new), b=self.updateCount))
     self.total = len(self._ids)
Ejemplo n.º 38
0
def updateNotes(allDb):
    t_0, now, db = time.time(), intTime(), mw.col.db

    TAG = mw.col.tags  # type: TagManager
    ds, nid2mmi = [], {}
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Updating data', max=N_notes, immediate=True)
    fidDb = allDb.fidDb(recalc=True)
    loc_db = allDb.locDb(recalc=False)  # type: Dict[Location, Set[Morpheme]]

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag, frequencyTag = tagNames = cfg(
        'Tag_Comprehension'), cfg('Tag_Vocab'), cfg('Tag_Fresh'), cfg(
            'Tag_NotReady'), cfg('Tag_AlreadyKnown'), cfg('Tag_Priority'), cfg(
                'Tag_TooShort'), cfg('Tag_TooLong'), cfg('Tag_Frequency')
    TAG.register(tagNames)
    badLengthTag = cfg('Tag_BadLength')

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg('path_priority'), ignoreErrors=True).db

    mw.progress.update(label='Loading frequency.txt')
    frequencyListPath = cfg('path_frequency')
    try:
        with codecs.open(frequencyListPath, encoding='utf-8') as f:
            # create a dictionary. key is word, value is its position in the file
            frequency_list = dict(
                zip([line.strip().split('\t')[0] for line in f.readlines()],
                    itertools.count(0)))
    except FileNotFoundError:
        frequency_list = dict()

    frequencyListLength = len(frequency_list)

    if cfg('saveDbs'):
        mw.progress.update(label='Saving seen/known/mature dbs')
        seenDb.save(cfg('path_seen'))
        knownDb.save(cfg('path_known'))
        matureDb.save(cfg('path_mature'))

    mw.progress.update(label='Updating notes')

    # prefetch cfg for fields
    field_focus_morph = cfg('Field_FocusMorph')
    field_unknown_count = cfg('Field_UnknownMorphCount')
    field_unmature_count = cfg('Field_UnmatureMorphCount')
    field_morph_man_index = cfg('Field_MorphManIndex')
    field_unknowns = cfg('Field_Unknowns')
    field_unmatures = cfg('Field_Unmatures')
    field_unknown_freq = cfg('Field_UnknownFreq')
    field_focus_morph_pos = cfg("Field_FocusMorphPos")

    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        ts = TAG.split(tags)
        if i % 500 == 0:
            mw.progress.update(value=i)

        C = partial(cfg, model_id=mid)

        notecfg = getFilterByMidAndTags(mid, ts)
        if notecfg is None or not notecfg['Modify']:
            continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[(nid, guid, fieldName)]
                morphemes.update(loc_db[loc])
            except KeyError:
                continue

        proper_nouns_known = cfg('Option_ProperNounsAlreadyKnown')

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, new_knowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if proper_nouns_known and morpheme.isProperNoun():
                continue
            if not seenDb.matches(morpheme):
                unseens.add(morpheme)
            if not knownDb.matches(morpheme):
                unknowns.add(morpheme)
            if not matureDb.matches(morpheme):
                unmatures.add(morpheme)
                if knownDb.matches(morpheme):
                    new_knowns.add(morpheme)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len(
            unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'):
            continue

        # add bonus for morphs in priority.db and frequency.txt
        frequencyBonus = C('frequency.txt bonus')
        isPriority = False
        isFrequency = False

        focusMorph = None

        F_k = 0
        usefulness = 0
        for focusMorph in unknowns:
            F_k += allDb.frequency(focusMorph)
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')
            focusMorphString = focusMorph.base
            try:
                focusMorphIndex = frequency_list[focusMorphString]
                isFrequency = True

                # The bigger this number, the lower mmi becomes
                usefulness += int(
                    round(frequencyBonus *
                          (1 - focusMorphIndex / frequencyListLength)))

            except KeyError:
                pass

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness += F_k_avg

        # add bonus for studying recent learned knowns (reinforce)
        for morpheme in new_knowns:
            locs = knownDb.getMatchingLocs(morpheme)
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                # TODO: maybe average this so it doesnt favor long sentences
                usefulness += C('reinforce new vocab weight') // ivl

        if any(morpheme.pos == '動詞'
               for morpheme in unknowns):  # FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 99999 - min(99999, usefulness)

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

        # calculate mmi
        mmi = 100000 * N_k + 1000 * lenDiff + int(round(usefulness))
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # Fill in various fields/tags on the note based on cfg
        fs = splitFields(flds)

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [
            t for t in ts
            if t not in (notReadyTag, compTag, vocabTag, freshTag)
        ]

        # determine card type
        if N_m == 0:  # sentence comprehension card, m+0
            ts.append(compTag)
        elif N_k == 1:  # new vocab card, k+1
            ts.append(vocabTag)
            setField(mid, fs, field_focus_morph, focusMorph.base)
            setField(mid, fs, field_focus_morph_pos, focusMorph.pos)
        elif N_k > 1:  # M+1+ and K+2+
            ts.append(notReadyTag)
        elif N_m == 1:  # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts.append(freshTag)
            focusMorph = next(iter(unmatures))
            setField(mid, fs, field_focus_morph, focusMorph.base)
            setField(mid, fs, field_focus_morph_pos, focusMorph.pos)

        else:  # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts.append(freshTag)

        # set type agnostic fields
        setField(mid, fs, field_unknown_count, '%d' % N_k)
        setField(mid, fs, field_unmature_count, '%d' % N_m)
        setField(mid, fs, field_morph_man_index, '%d' % mmi)
        setField(mid, fs, field_unknowns, ', '.join(u.base for u in unknowns))
        setField(mid, fs, field_unmatures,
                 ', '.join(u.base for u in unmatures))
        setField(mid, fs, field_unknown_freq, '%d' % F_k_avg)

        # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove(badLengthTag)

        # other tags
        if priorityTag in ts:
            ts.remove(priorityTag)
        if isPriority:
            ts.append(priorityTag)

        if frequencyTag in ts:
            ts.remove(frequencyTag)
        if isFrequency:
            ts.append(frequencyTag)

        if tooShortTag in ts:
            ts.remove(tooShortTag)
        if lenDiffRaw < 0:
            ts.append(tooShortTag)

        if tooLongTag in ts:
            ts.remove(tooLongTag)
        if lenDiffRaw > 0:
            ts.append(tooLongTag)

        # remove unnecessary tags
        if not cfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

        # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append((tags_, flds_, sfld, csum, now, mw.col.usn(), nid))

    mw.progress.update(label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=?, flds=?, sfld=?, csum=?, mod=?, usn=? where id=?',
        ds)

    # Now reorder new cards based on MMI
    mw.progress.update(label='Updating new card ordering...')
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append((due_, now, mw.col.usn(), cid))

    mw.col.db.executemany('update cards set due=?, mod=?, usn=? where id=?',
                          ds)
    mw.reset()

    printf('Updated notes in %f sec' % (time.time() - t_0))
    mw.progress.finish()
    return knownDb
Ejemplo n.º 39
0
            try:
                if re.search("(?i)^" + regex + "$", strg):
                    nids.append(id)
            except sre_constants.error:
                return
        if not nids:
            return "0"
        return "n.id in %s" % ids2str(nids)

    def _findDupes(self, (val, args)):
        # caller must call stripHTMLMedia on passed val
        try:
            mid, val = val.split(",", 1)
        except OSError:
            return
        csum = fieldChecksum(val)
        nids = []
        for nid, flds in self.col.db.execute(
                "select id, flds from notes where mid=? and csum=?", mid,
                csum):
            if stripHTMLMedia(splitFields(flds)[0]) == val:
                nids.append(nid)
        return "n.id in %s" % ids2str(nids)


# Find and replace
##########################################################################


def findReplace(col, nids, src, dst, regex=False, field=None, fold=True):
    "Find and replace fields in a note."
Ejemplo n.º 40
0
    def importNotes(self, notes: List[ForeignNote]) -> None:
        "Convert each card into a note, apply attributes and add to col."
        assert self.mappingOk()
        # note whether tags are mapped
        self._tagsMapped = False
        for f in self.mapping:
            if f == "_tags":
                self._tagsMapped = True
        # gather checks for duplicate comparison
        csums: Dict[str, List[int]] = {}
        for csum, id in self.col.db.execute(
                "select csum, id from notes where mid = ?", self.model["id"]):
            if csum in csums:
                csums[csum].append(id)
            else:
                csums[csum] = [id]
        firsts: Dict[str, bool] = {}
        fld0idx = self.mapping.index(self.model["flds"][0]["name"])
        self._fmap = self.col.models.fieldMap(self.model)
        self._nextID = timestampID(self.col.db, "notes")
        # loop through the notes
        updates = []
        updateLog = []
        new = []
        self._ids: List[int] = []
        self._cards: List[Tuple] = []
        dupeCount = 0
        dupes: List[str] = []
        for n in notes:
            for c in range(len(n.fields)):
                if not self.allowHTML:
                    n.fields[c] = html.escape(n.fields[c], quote=False)
                n.fields[c] = n.fields[c].strip()
                if not self.allowHTML:
                    n.fields[c] = n.fields[c].replace("\n", "<br>")
            fld0 = n.fields[fld0idx]
            csum = fieldChecksum(fld0)
            # first field must exist
            if not fld0:
                self.log.append(
                    self.col.tr(TR.IMPORTING_EMPTY_FIRST_FIELD,
                                val=" ".join(n.fields)))
                continue
            # earlier in import?
            if fld0 in firsts and self.importMode != ADD_MODE:
                # duplicates in source file; log and ignore
                self.log.append(
                    self.col.tr(TR.IMPORTING_APPEARED_TWICE_IN_FILE, val=fld0))
                continue
            firsts[fld0] = True
            # already exists?
            found = False
            if csum in csums:
                # csum is not a guarantee; have to check
                for id in csums[csum]:
                    flds = self.col.db.scalar(
                        "select flds from notes where id = ?", id)
                    sflds = splitFields(flds)
                    if fld0 == sflds[0]:
                        # duplicate
                        found = True
                        if self.importMode == UPDATE_MODE:
                            data = self.updateData(n, id, sflds)
                            if data:
                                updates.append(data)
                                updateLog.append(
                                    self.col.tr(
                                        TR.IMPORTING_FIRST_FIELD_MATCHED,
                                        val=fld0))
                                dupeCount += 1
                                found = True
                        elif self.importMode == IGNORE_MODE:
                            dupeCount += 1
                        elif self.importMode == ADD_MODE:
                            # allow duplicates in this case
                            if fld0 not in dupes:
                                # only show message once, no matter how many
                                # duplicates are in the collection already
                                updateLog.append(
                                    self.col.tr(
                                        TR.
                                        IMPORTING_ADDED_DUPLICATE_WITH_FIRST_FIELD,
                                        val=fld0,
                                    ))
                                dupes.append(fld0)
                            found = False
            # newly add
            if not found:
                data = self.newData(n)
                if data:
                    new.append(data)
                    # note that we've seen this note once already
                    firsts[fld0] = True
        self.addNew(new)
        self.addUpdates(updates)
        # generate cards + update field cache
        self.col.after_note_updates(self._ids, mark_modified=False)
        # apply scheduling updates
        self.updateCards()
        # we randomize or order here, to ensure that siblings
        # have the same due#
        did = self.col.decks.selected()
        conf = self.col.decks.confForDid(did)
        # in order due?
        if conf["new"]["order"] == NEW_CARDS_RANDOM:
            self.col.sched.randomizeCards(did)

        part1 = self.col.tr(TR.IMPORTING_NOTE_ADDED, count=len(new))
        part2 = self.col.tr(TR.IMPORTING_NOTE_UPDATED, count=self.updateCount)
        if self.importMode == UPDATE_MODE:
            unchanged = dupeCount - self.updateCount
        elif self.importMode == IGNORE_MODE:
            unchanged = dupeCount
        else:
            unchanged = 0
        part3 = self.col.tr(TR.IMPORTING_NOTE_UNCHANGED, count=unchanged)
        self.log.append(f"{part1}, {part2}, {part3}.")
        self.log.extend(updateLog)
        self.total = len(self._ids)
Ejemplo n.º 41
0
 def maybeChecksum(self, data, unique):
     if not unique:
         return ""
     return fieldChecksum(data)