Ejemplo n.º 1
0
def main():
    # load existing all.db
    mw.progress.start( label='Loading existing all.db', immediate=True )
    t_0 = time.time()
    cur = util.allDb() if cfg1('loadAllDb') else None
    printf( 'Loaded all.db in %f sec' % ( time.time() - t_0 ) )
    mw.progress.finish()

    # update all.db
    allDb = mkAllDb( cur )

    # merge in external.db
    mw.progress.start( label='Merging ext.db', immediate=True )
    ext = MorphDb( cfg1('path_ext'), ignoreErrors=True )
    allDb.merge( ext )
    mw.progress.finish()

    # update notes
    knownDb = updateNotes( allDb )
    
    # update stats and refresh display
    stats.updateStats( knownDb )
    mw.toolbar.draw()

    # set global allDb
    util._allDb = allDb
Ejemplo n.º 2
0
def highlight(txt, extra, fieldDict, field, mod_field):
    '''When a field is marked with the 'focusMorph' command, we format it by
    wrapping all the morphemes in <span>s with attributes set to its maturity'''

    # must avoid formatting a smaller morph that is contained in a bigger morph
    # => do largest subs first and don't sub anything already in <span>
    def nonSpanSub(sub, repl, string):
        return u''.join(
            re.sub(sub, repl, s) if not s.startswith('<span') else s
            for s in re.split('(<span.*?</span>)', string))

    from morphemes import getMorphemes
    ms = getMorphemes(txt)
    for m in sorted(ms, key=lambda x: len(x.inflected),
                    reverse=True):  # largest subs first
        locs = allDb().db.get(m, set())
        mat = max(loc.maturity for loc in locs) if locs else 0

        if mat >= cfg1('threshold_mature'): mtype = 'mature'
        elif mat >= cfg1('threshold_known'): mtype = 'known'
        elif mat >= cfg1('threshold_seen'): mtype = 'seen'
        else: mtype = 'unknown'
        repl = u'<span class="morphHighlight" mtype="{mtype}" mat="{mat}">{morph}</span>'.format(
            morph=m.inflected, mtype=mtype, mat=mat)
        txt = nonSpanSub(m.inflected, repl, txt)
    return txt
Ejemplo n.º 3
0
def main():
    # load existing all.db
    mw.progress.start(label='Loading existing all.db', immediate=True)
    t_0 = time.time()
    cur = util.allDb() if cfg1('loadAllDb') else None
    printf('Loaded all.db in %f sec' % (time.time() - t_0))
    mw.progress.finish()

    # update all.db
    allDb = mkAllDb(cur)

    # merge in external.db
    mw.progress.start(label='Merging ext.db', immediate=True)
    ext = MorphDb(cfg1('path_ext'), ignoreErrors=True)
    allDb.merge(ext)
    mw.progress.finish()

    # update notes
    knownDb = updateNotes(allDb)

    # update stats and refresh display
    stats.updateStats(knownDb)
    mw.toolbar.draw()

    # set global allDb
    util._allDb = allDb
Ejemplo n.º 4
0
def run( duelingSubsPath, outputSubsPath, morphemizer, matureFmt, knownFmt, unknownFmt ):
    # Load files
    kdb = MorphDb( cfg1('path_known') )
    mdb = MorphDb( cfg1('path_mature') )
    subFileLines = codecs.open( duelingSubsPath, 'r', 'utf-8' ).readlines()

    # Get dueling subs
    dialogueLines = [ l for l in subFileLines if l.startswith( u'Dialogue' ) ]
    header = subFileLines[ : subFileLines.index( dialogueLines[0] ) ]
    assert len( dialogueLines ) % 2 == 0, 'Should be an even number of dialogue lines'

    lines = []
    for i in xrange( 0, len( dialogueLines ), 2 ):
        target, native = dialogueLines[i:i+2]
        target, native, pre = getText( target ), getText( native ), getPreText( target )

        # get unknowns
        ms = getMorphemes(morphemizer, target)
        unknowns, N_k = getNotInDb( ms, kdb.db )
        unmatures, N_m = getNotInDb( ms, mdb.db )
        d = { 'target':target, 'native':native, 'N_k':N_k, 'N_m':N_m, 'unknowns':unknowns, 'unmatures':unmatures }

        if N_m == 0:
            lines.append( pre + matureFmt % d )
        elif N_k == 0:
            lines.append( pre + knownFmt % d )
        else:
            lines.append( pre + unknownFmt % d )

    outFile = codecs.open( outputSubsPath, 'w', 'utf-8' )
    outFile.write( u''.join( header ) )
    outFile.write( u'\n'.join( lines ) )
    outFile.close()
Ejemplo n.º 5
0
def updateStats( knownDb=None ):
    mw.progress.start( label='Updating stats', immediate=True )

    from morphemes import MorphDb
    d = {}

    # Load known.db and get total morphemes known
    if knownDb is None:
        knownDb = MorphDb( cfg1('path_known'), ignoreErrors=True )

    d['totalKnown'] = len( knownDb.db )

    # Load Goal.*.db dbs, get morphemes required, and compare vs known.db
    d['goals'] = {}
    goalDbPaths = glob.glob( os.path.join( cfg1('path_dbs'), 'Goal.*.db' ) )

    for path in goalDbPaths:
        name = os.path.basename( path )[5:][:-3]
        gdb = MorphDb( path )

        # track total unique morphemes + when weighted by frequency
        # NOTE: a morpheme may occur multiple times within the same sentence, but this frequency is wrt note fields
        numUniqueReq, numUniqueKnown, numFreqReq, numFreqKnown = 0, 0, 0, 0
        for m,locs in gdb.db.iteritems():
            numUniqueReq += 1
            numFreqReq   += len( locs )
            if m in knownDb.db:
                numUniqueKnown += 1
                numFreqKnown   += len( locs )
        
        d['goals'][ name ] = { 'total':numUniqueReq, 'known':numUniqueKnown, 'freqTotal':numFreqReq, 'freqKnown':numFreqKnown }

    saveStats( d )
    mw.progress.finish()
    return d
Ejemplo n.º 6
0
def run( duelingSubsPath, outputSubsPath, whitelist, blacklist, matureFmt, knownFmt, unknownFmt ):
    # Load files
    kdb = MorphDb( cfg1('path_known') )
    mdb = MorphDb( cfg1('path_mature') )
    subFileLines = codecs.open( duelingSubsPath, 'r', 'utf-8' ).readlines()

    # Get dueling subs
    dialogueLines = [ l for l in subFileLines if l.startswith( u'Dialogue' ) ]
    header = subFileLines[ : subFileLines.index( dialogueLines[0] ) ]
    assert len( dialogueLines ) % 2 == 0, 'Should be an even number of dialogue lines'

    lines = []
    for i in xrange( 0, len( dialogueLines ), 2 ):
        jpn, eng = dialogueLines[i:i+2]
        jpn, eng, pre = getText( jpn ), getText( eng ), getPreText( jpn )

        # get unknowns
        ms = getMorphemes( jpn, whitelist, blacklist )
        unknowns, N_k = getNotInDb( ms, kdb.db )
        unmatures, N_m = getNotInDb( ms, mdb.db )
        d = { 'jpn':jpn, 'eng':eng, 'N_k':N_k, 'N_m':N_m, 'unknowns':unknowns, 'unmatures':unmatures }

        if N_m == 0:
            lines.append( pre + matureFmt % d )
        elif N_k == 0:
            lines.append( pre + knownFmt % d )
        else:
            lines.append( pre + unknownFmt % d )

    outFile = codecs.open( outputSubsPath, 'w', 'utf-8' )
    outFile.write( u''.join( header ) )
    outFile.write( u'\n'.join( lines ) )
    outFile.close()
Ejemplo n.º 7
0
def my_reviewer_keyHandler(self, evt):
    ''' :type self: aqt.reviewer.Reviewer '''
    key = unicode(evt.text())
    key_browse, key_skip = cfg1('browse same focus key'), cfg1(
        'set known and skip key')
    if key == key_skip: setKnownAndSkip(self)
    elif key == key_browse: browseSameFocus(self)
Ejemplo n.º 8
0
def mkAllDb( allDb=None ):
    t_0, db, TAG = time.time(), mw.col.db, mw.col.tags
    N_notes = db.scalar( 'select count() from notes' )
    mw.progress.start( label='Prep work for all.db creation', max=N_notes, immediate=True )

    if not allDb: allDb = MorphDb()
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False )   # fidDb() already forces locDb recalc

    mw.progress.update( label='Generating all.db data' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )
        if not C('enabled'): continue
        mats = [ ( 0.5 if ivl == 0 and ctype == 1 else ivl ) for ivl, ctype in db.execute( 'select ivl, type from cards where nid = :nid', nid=nid ) ]
        ts, alreadyKnownTag = TAG.split( tags ), C('tag_alreadyKnown')
        if alreadyKnownTag in ts:
            mats += [ C('threshold_mature')+1 ]

        for fieldName in C('morph_fields'):
            try: # if doesn't have field, continue
                #fieldValue = normalizeFieldValue( getField( fieldName, flds, mid ) )
                fieldValue = getMecabField( fieldName, flds, mid )
            except KeyError: continue
            except TypeError:
                mname = mw.col.models.get( mid )[ 'name' ]
                errorMsg( u'Failed to get field "{field}" from a note of model "{model}". Please fix your config.py file to match your collection appropriately and ignore the following error.'.format( model=mname, field=fieldName ) )
                raise

            loc = fidDb.get( ( nid, guid, fieldName ), None )
            if not loc:
                loc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                ms = getMorphemes( fieldValue )
                if ms: #TODO: this needed? should we change below too then?
                    #printf( '    .loc for %d[%s]' % ( nid, fieldName ) )
                    locDb[ loc ] = ms
            else:
                # mats changed -> new loc (new mats), move morphs
                if loc.fieldValue == fieldValue and loc.maturities != mats:
                    printf( '    .mats for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                    locDb[ newLoc ] = locDb.pop( loc )
                # field changed -> new loc, new morphs
                elif loc.fieldValue != fieldValue:
                    printf( '    .morphs for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                    ms = getMorphemes( fieldValue )
                    locDb.pop( loc )
                    locDb[ newLoc ] = ms
    printf( 'Processed all %d notes in %f sec' % ( N_notes, time.time() - t_0 ) )
    mw.progress.update( value=i, label='Creating all.db object' )
    allDb.clear()
    allDb.addFromLocDb( locDb )
    if cfg1('saveDbs'):
        mw.progress.update( value=i, label='Saving all.db to disk' )
        allDb.save( cfg1('path_all') )
        printf( 'Processed all %d notes + saved all.db in %f sec' % ( N_notes, time.time() - t_0 ) )
    mw.progress.finish()
    return allDb
def per( st, n ):
    mats = mw.col.db.list( 'select ivl from cards where nid = :nid', nid=n.id )
    for f in cfg( n.mid, None, 'morph_fields' ):
        ms = getMorphemes( n[ f ], None, cfg1('morph_blacklist') )
        loc = AnkiDeck( n.id, f, n[ f ], n.guid, mats )
        st['morphDb'].addMsL( ms, loc )
    return st
Ejemplo n.º 10
0
def getMorphemesMecab(e):
    ms = [tuple(m.split('\t')) for m in interact(e).split('\r')]  # morphemes
    ms = [Morpheme(*m) for m in ms
          if len(m) == MECAB_NODE_LENGTH]  # filter garbage
    #if whitelist: ms = [ m for m in ms if m.pos in whitelist ]
    blacklist = cfg1('mecab_blacklist')
    if blacklist: ms = [m for m in ms if m.pos not in blacklist]
    ms = [fixReading(m) for m in ms]
    return ms
Ejemplo n.º 11
0
def highlight( txt, extra, fieldDict, field, mod_field ):
    '''When a field is marked with the 'focusMorph' command, we format it by
    wrapping all the morphemes in <span>s with attributes set to its maturity'''
    from util import getFilterByTagsAndType
    from morphemizer import getMorphemizerByName
    from morphemes import getMorphemes

    # must avoid formatting a smaller morph that is contained in a bigger morph
    # => do largest subs first and don't sub anything already in <span>
    def nonSpanSub( sub, repl, string ):
        return u''.join( re.sub( sub, repl, s ) if not s.startswith('<span') else s for s in re.split( '(<span.*?</span>)', string ) )

    # find morphemizer; because no note/card information is exposed through arguments, we have to find morphemizer based on tags alone
    #from aqt.qt import debug; debug()
    #
    #if mw.reviewer.card is None: return txt
    #note = mw.reviewer.card.note()
    #if not isNoteSame(note, fieldDict): return txt
    #from aqt.qt import debug; debug()

    tags = fieldDict['Tags'].split()
    filter = getFilterByTagsAndType(fieldDict['Type'], tags)
    if filter is None:
        return txt
    morphemizer = getMorphemizerByName(filter['Morphemizer'])
    if morphemizer is None:
        return txt
    ms = getMorphemes(morphemizer, txt, tags)

    for m in sorted( ms, key=lambda x: len(x.inflected), reverse=True ): # largest subs first
        locs = allDb().db.get( m, set() )
        mat = max( loc.maturity for loc in locs ) if locs else 0

        if   mat >= cfg1( 'threshold_mature' ):  mtype = 'mature'
        elif mat >= cfg1( 'threshold_known' ):   mtype = 'known'
        elif mat >= cfg1( 'threshold_seen' ):    mtype = 'seen'
        else:                                    mtype = 'unknown'
        repl = u'<span class="morphHighlight" mtype="{mtype}" mat="{mat}">{morph}</span>'.format(
                morph = m.inflected,
                mtype = mtype,
                mat = mat
                )
        txt = nonSpanSub( m.inflected, repl, txt )
    return txt
Ejemplo n.º 12
0
def per( st, n ): # :: State -> Note -> State
    #n.delTag( st['tags'] ) # clear tags if they already exist?
    
    for field in cfg( n.mid, None, 'morph_fields' ):
        for m in getMorphemes( n[ field ], None, cfg1('morph_blacklist') ):
            if m in st['db'].db:
                n.addTag( st['tags'] )
                break
    n.flush()
    return st
Ejemplo n.º 13
0
 def onExtractTxtFile( self ):
     srcPath = QFileDialog.getOpenFileName( caption='Text file to extract from?', directory=dbsPath )
     if not srcPath: return
     destPath = QFileDialog.getSaveFileName( caption='Save morpheme db to?', directory=dbsPath + os.sep + 'textFile.db' )
     if not destPath: return
     mat = cfg1('text file import maturity')
     db = MorphDb.mkFromFile( str(srcPath), getAllMorphemizers()[self.morphemizerComboBox.currentIndex()], mat )
     if db:
         db.save( str(destPath) )
         infoMsg( 'Extracted successfully' )
Ejemplo n.º 14
0
 def onExtractTxtFile( self ):
     srcPath = QFileDialog.getOpenFileName( caption='Text file to extract from?', directory=dbsPath )
     if not srcPath: return
     destPath = QFileDialog.getSaveFileName( caption='Save morpheme db to?', directory=dbsPath + os.sep + 'textFile.db' )
     if not destPath: return
     mat = cfg1('text file import maturity')
     db = MorphDb.mkFromFile( str(srcPath), mat )
     if db:
         db.save( str(destPath) )
         infoMsg( 'Extracted successfully' )
Ejemplo n.º 15
0
def per(st, n):  # :: State -> Note -> State
    #n.delTag( st['tags'] ) # clear tags if they already exist?

    for field in cfg(n.mid, None, 'morph_fields'):
        for m in getMorphemes(n[field], None, cfg1('morph_blacklist')):
            if m in st['db'].db:
                n.addTag(st['tags'])
                break
    n.flush()
    return st
Ejemplo n.º 16
0
def updateStats(knownDb=None):
    mw.progress.start(label='Updating stats', immediate=True)

    from morphemes import MorphDb
    d = {}

    # Load known.db and get total morphemes known
    if knownDb is None:
        knownDb = MorphDb(cfg1('path_known'), ignoreErrors=True)

    d['totalKnown'] = len(knownDb.db)

    # Load Goal.*.db dbs, get morphemes required, and compare vs known.db
    d['goals'] = {}
    goalDbPaths = glob.glob(os.path.join(cfg1('path_dbs'), 'Goal.*.db'))

    for path in goalDbPaths:
        name = os.path.basename(path)[5:][:-3]
        gdb = MorphDb(path)

        # track total unique morphemes + when weighted by frequency
        # NOTE: a morpheme may occur multiple times within the same sentence, but this frequency is wrt note fields
        numUniqueReq, numUniqueKnown, numFreqReq, numFreqKnown = 0, 0, 0, 0
        for m in gdb.db.iterkeys():
            freq = gdb.db.frequency(m)
            numUniqueReq += 1
            numFreqReq += freq
            if m in knownDb.db:
                numUniqueKnown += 1
                numFreqKnown += freq

        d['goals'][name] = {
            'total': numUniqueReq,
            'known': numUniqueKnown,
            'freqTotal': numFreqReq,
            'freqKnown': numFreqKnown
        }

    saveStats(d)
    mw.progress.finish()
    return d
Ejemplo n.º 17
0
def per( st, n ):
    n.delTag( st['tags'] )

    if n['k+N'] == '1': # FIXME this special but commonly wanted logic must be a cfg option
        ms = getMorphemes( n['focusMorph'], None, cfg1('morph_blacklist') )
        for m in ms:
            if m in st['db'].db:
                n.addTag( st['tags'] )
                break

    n.flush()
    return st
Ejemplo n.º 18
0
def markFocusSeen( self, n ):
    '''Mark a focusMorph as already seen so future new cards with the same focus
    will be skipped. Also prints number of cards to be skipped if enabled'''
    global seenMorphs
    try:
        if not focus( n ): return
        q = u'%s:%s' % ( focusName( n ), focus( n ) )
    except KeyError: return
    seenMorphs.add( focus(n) )
    numSkipped = len( self.mw.col.findNotes( q ) ) -1
    if numSkipped and cfg1('print number of alternatives skipped'):
        tooltip( _( '%d alternatives will be skipped' % numSkipped ) )
Ejemplo n.º 19
0
def markFocusSeen( self, n ):
    '''Mark a focusMorph as already seen so future new cards with the same focus
    will be skipped. Also prints number of cards to be skipped if enabled'''
    global seenMorphs
    try:
        if not focus( n ): return
        q = u'%s:%s' % ( focusName( n ), focus( n ) )
    except KeyError: return
    seenMorphs.add( focus(n) )
    numSkipped = len( self.mw.col.findNotes( q ) ) -1
    if numSkipped and cfg1('print number of alternatives skipped'):
        tooltip( _( '%d alternatives will be skipped' % numSkipped ) )
Ejemplo n.º 20
0
def highlight( txt, extra, fieldDict, field, mod_field ):
    '''When a field is marked with the 'focusMorph' command, we format it by
    wrapping all the morphemes in <span>s with attributes set to its maturity'''
    # must avoid formatting a smaller morph that is contained in a bigger morph
    # => do largest subs first and don't sub anything already in <span>
    def nonSpanSub( sub, repl, string ):
        return u''.join( re.sub( sub, repl, s ) if not s.startswith('<span') else s for s in re.split( '(<span.*?</span>)', string ) )
    from morphemes import getMorphemes
    ms = getMorphemes( txt )
    for m in sorted( ms, key=lambda x: len(x.inflected), reverse=True ): # largest subs first
        locs = allDb().db.get( m, set() )
        mat = max( loc.maturity for loc in locs ) if locs else 0

        if   mat >= cfg1( 'threshold_mature' ):  mtype = 'mature'
        elif mat >= cfg1( 'threshold_known' ):   mtype = 'known'
        elif mat >= cfg1( 'threshold_seen' ):    mtype = 'seen'
        else:                                    mtype = 'unknown'
        repl = u'<span class="morphHighlight" mtype="{mtype}" mat="{mat}">{morph}</span>'.format(
                morph = m.inflected,
                mtype = mtype,
                mat = mat
                )
        txt = nonSpanSub( m.inflected, repl, txt )
    return txt
Ejemplo n.º 21
0
def getStatsPath(): return cfg1('path_stats')

def loadStats():
Ejemplo n.º 22
0
def mkAllDb(allDb=None):
    t_0, db, TAG = time.time(), mw.col.db, mw.col.tags
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Prep work for all.db creation',
                      max=N_notes,
                      immediate=True)

    if not allDb: allDb = MorphDb()
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    mw.progress.update(label='Generating all.db data')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)
        if not C('enabled'): continue

        mats = [(0.5 if ivl == 0 and ctype == 1 else ivl)
                for ivl, ctype in db.execute(
                    'select ivl, type from cards where nid = :nid', nid=nid)]
        if C('ignore maturity'):
            mats = [0 for mat in mats]
        ts, alreadyKnownTag = TAG.split(tags), C('tag_alreadyKnown')
        if alreadyKnownTag in ts:
            mats += [C('threshold_mature') + 1]

        for fieldName in C('morph_fields'):
            try:  # if doesn't have field, continue
                #fieldValue = normalizeFieldValue( getField( fieldName, flds, mid ) )
                fieldValue = getMecabField(fieldName, flds, mid)
            except KeyError:
                continue
            except TypeError:
                mname = mw.col.models.get(mid)['name']
                errorMsg(
                    u'Failed to get field "{field}" from a note of model "{model}". Please fix your config.py file to match your collection appropriately and ignore the following error.'
                    .format(model=mname, field=fieldName))
                raise

            loc = fidDb.get((nid, guid, fieldName), None)
            if not loc:
                loc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                ms = getMorphemes(fieldValue)
                if ms:  #TODO: this needed? should we change below too then?
                    #printf( '    .loc for %d[%s]' % ( nid, fieldName ) )
                    locDb[loc] = ms
            else:
                # mats changed -> new loc (new mats), move morphs
                if loc.fieldValue == fieldValue and loc.maturities != mats:
                    printf('    .mats for %d[%s]' % (nid, fieldName))
                    newLoc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                    locDb[newLoc] = locDb.pop(loc)
                # field changed -> new loc, new morphs
                elif loc.fieldValue != fieldValue:
                    printf('    .morphs for %d[%s]' % (nid, fieldName))
                    newLoc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                    ms = getMorphemes(fieldValue)
                    locDb.pop(loc)
                    locDb[newLoc] = ms
    printf('Processed all %d notes in %f sec' % (N_notes, time.time() - t_0))
    mw.progress.update(value=i, label='Creating all.db object')
    allDb.clear()
    allDb.addFromLocDb(locDb)
    if cfg1('saveDbs'):
        mw.progress.update(value=i, label='Saving all.db to disk')
        allDb.save(cfg1('path_all'))
        printf('Processed all %d notes + saved all.db in %f sec' %
               (N_notes, time.time() - t_0))
    mw.progress.finish()
    return allDb
Ejemplo n.º 23
0
def updateNotes( allDb ):
    t_0, now, db, TAG   = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi         = [], {}
    N_notes             = db.scalar( 'select count() from notes' )
    mw.progress.start( label='Updating data', max=N_notes, immediate=True )
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag = tagNames = jcfg('Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg('Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg('Tag_Priority'), jcfg('Tag_TooShort'), jcfg('Tag_TooLong')
    TAG.register( tagNames )
    badLengthTag = jcfg2().get('Tag_BadLength')

    # handle secondary databases
    mw.progress.update( label='Creating seen/known/mature from all.db' )
    seenDb      = filterDbByMat( allDb, cfg1('threshold_seen') )
    knownDb     = filterDbByMat( allDb, cfg1('threshold_known') )
    matureDb    = filterDbByMat( allDb, cfg1('threshold_mature') )
    mw.progress.update( label='Loading priority.db' )
    priorityDb  = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db

    if cfg1('saveDbs'):
        mw.progress.update( label='Saving seen/known/mature dbs' )
        seenDb.save( cfg1('path_seen') )
        knownDb.save( cfg1('path_known') )
        matureDb.save( cfg1('path_mature') )

    mw.progress.update( label='Updating notes' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None or not notecfg['Modify']: continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[ ( nid, guid, fieldName ) ]
                morphemes.update( locDb[ loc ] )
            except KeyError: continue

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if morpheme not in seenDb.db:      unseens.add( morpheme )
            if morpheme not in knownDb.db:     unknowns.add( morpheme )
            if morpheme not in matureDb.db:    unmatures.add( morpheme )
            if morpheme not in matureDb.db and morpheme in knownDb.db:
                newKnowns.add( morpheme )

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len( morphemes ), len( unseens ), len( unknowns ), len( unmatures )

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

            # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns: # focusMorph used outside loop
            F_k += allDb.frequency(focusMorph)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness = F_k_avg

            # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for morpheme in newKnowns:
            locs = allDb.db[ morpheme ]
            if locs:
                ivl = min( 1, max( loc.maturity for loc in locs ) )
                usefulness += C('reinforce new vocab weight') // ivl #TODO: maybe average this so it doesnt favor long sentences

        if any( morpheme.pos == u'動詞' for morpheme in unknowns ): #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min( 999, usefulness )

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

            # calculate mmi
        mmi = 10000*N_k + 1000*lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[ nid ] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split( tags ), splitFields( flds )

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [ t for t in ts if t not in [ notReadyTag, compTag, vocabTag, freshTag ] ]

        # determine card type
        if N_m == 0:    # sentence comprehension card, m+0
            ts = ts + [ compTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'' )
        elif N_k == 1:  # new vocab card, k+1
            ts = ts + [ vocabTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % focusMorph.base )
        elif N_k > 1:   # M+1+ and K+2+
            ts = ts + [ notReadyTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'')
        elif N_m == 1: # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [ freshTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'%s' % list(unmatures)[0].base)
        else: # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [ freshTag ]
            setField( mid, fs, jcfg('Field_FocusMorph'), u'')


            # set type agnostic fields
        setField( mid, fs, jcfg('Field_UnknownMorphCount'), u'%d' % N_k )
        setField( mid, fs, jcfg('Field_UnmatureMorphCount'), u'%d' % N_m )
        setField( mid, fs, jcfg('Field_MorphManIndex'), u'%d' % mmi )
        setField( mid, fs, jcfg('Field_Unknowns'), u', '.join( u.base for u in unknowns ) )
        setField( mid, fs, jcfg('Field_Unmatures'), u', '.join( u.base for u in unmatures ) )
        setField( mid, fs, jcfg('Field_UnknownFreq'), u'%d' % F_k_avg )

            # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove( badLengthTag )

            # other tags
        if priorityTag in ts:   ts.remove( priorityTag )
        if isPriority:          ts.append( priorityTag )

        if tooShortTag in ts:   ts.remove( tooShortTag )
        if lenDiffRaw < 0:      ts.append( tooShortTag )

        if tooLongTag in ts:    ts.remove( tooLongTag )
        if lenDiffRaw > 0:      ts.append( tooLongTag )

        # remove unnecessary tags
        if not jcfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

            # update sql db
        tags_ = TAG.join( TAG.canonify( ts ) )
        flds_ = joinFields( fs )
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum( fs[0] )
            sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] )
            ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } )

    mw.progress.update( value=i, label='Updating anki database...' )
    mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds )

    # Now reorder new cards based on MMI
    mw.progress.update( value=i, label='Updating new card ordering...' )
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ):
        if nid in nid2mmi: # owise it was disabled
            due_ = nid2mmi[ nid ]
            if due != due_: # only update cards that have changed
                ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } )
    mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds )
    mw.reset()

    printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) )
    mw.progress.finish()
    return knownDb
Ejemplo n.º 24
0
def updateNotes(allDb):
    t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi = [], {}
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Updating data', max=N_notes, immediate=True)
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg1('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg1('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg1('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db

    if cfg1('saveDbs'):
        mw.progress.update(label='Saving seen/known/mature dbs')
        seenDb.save(cfg1('path_seen'))
        knownDb.save(cfg1('path_known'))
        matureDb.save(cfg1('path_mature'))

    mw.progress.update(label='Calculating frequency information')
    pops = [len(locs) for locs in allDb.db.values()]
    pops = [n for n in pops if n > 1]

    mw.progress.update(label='Updating notes')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)
        if not C('enabled'): continue
        # Get all morphemes for note
        ms = set()
        for fieldName in C('morph_fields'):
            try:
                loc = fidDb[(nid, guid, fieldName)]
                ms.update(locDb[loc])
            except KeyError:
                continue
        ms = [m for m in ms if m.pos not in C('morph_blacklist')]

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for m in ms:
            if m not in seenDb.db: unseens.add(m)
            if m not in knownDb.db: unknowns.add(m)
            if m not in matureDb.db: unmatures.add(m)
            if m not in matureDb.db and m in knownDb.db:
                newKnowns.add(m)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(ms), len(unseens), len(unknowns), len(unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns:  # focusMorph used outside loop
            F_k += len(allDb.db[focusMorph])
        F_k_avg = F_k / N_k if N_k > 0 else F_k
        usefulness = F_k_avg

        # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for m in newKnowns:
            locs = allDb.db[m]
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                usefulness += C(
                    'reinforce new vocab weight'
                ) / ivl  #TODO: maybe average this so it doesnt favor long sentences

        if any(m.pos == u'動詞'
               for m in unknowns):  #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min(999, usefulness)

        # difference from optimal length (too little context vs long sentence)
        lenDiff = max(0, min(9, abs(C('optimal sentence length') - N) - 2))
        tooLong = N > C('optimal sentence length')

        # calculate mmi
        mmi = 10000 * N_k + 1000 * lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split(tags), splitFields(flds)
        # determine card type
        compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag, badLengthTag, tooLongTag = tagNames = C(
            'tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C(
                'tag_alreadyKnown'), C('tag_priority'), C('tag_badLength'), C(
                    'tag_tooLong')
        if N_m == 0:  # sentence comprehension card, m+0
            ts = [compTag
                  ] + [t for t in ts if t not in [vocabTag, notReadyTag]]
            setField(mid, fs, C('focusMorph'), u'')
        elif N_k == 1:  # new vocab card, k+1
            ts = [vocabTag
                  ] + [t for t in ts if t not in [compTag, notReadyTag]]
            setField(mid, fs, C('focusMorph'), u'%s' % focusMorph.base)
        elif N_k > 1:  # M+1+ and K+2+
            ts = [notReadyTag
                  ] + [t for t in ts if t not in [compTag, vocabTag]]

            # set type agnostic fields
        setField(mid, fs, C('k+N'), u'%d' % N_k)
        setField(mid, fs, C('m+N'), u'%d' % N_m)
        setField(mid, fs, C('morphManIndex'), u'%d' % mmi)
        setField(mid, fs, C('unknowns'), u', '.join(u.base for u in unknowns))
        setField(mid, fs, C('unmatures'),
                 u', '.join(u.base for u in unmatures))
        setField(mid, fs, C('unknownFreq'), u'%d' % F_k_avg)

        # other tags
        if priorityTag in ts: ts.remove(priorityTag)
        if isPriority: ts.append(priorityTag)

        if badLengthTag in ts: ts.remove(badLengthTag)
        if lenDiff: ts.append(badLengthTag)

        if tooLongTag in ts: ts.remove(tooLongTag)
        if tooLong: ts.append(tooLongTag)

        # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append({
                'now': now,
                'tags': tags_,
                'flds': flds_,
                'sfld': sfld,
                'csum': csum,
                'usn': mw.col.usn(),
                'nid': nid
            })

    mw.progress.update(value=i, label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid',
        ds)
    TAG.register(tagNames)

    # Now reorder new cards based on MMI
    mw.progress.update(value=i, label='Updating new card ordering...')
    ds = []
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append({
                    'now': now,
                    'due': due_,
                    'usn': mw.col.usn(),
                    'cid': cid
                })
    mw.col.db.executemany(
        'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds)
    mw.reset()

    printf('Updated notes in %f sec' % (time.time() - t_0))
    mw.progress.finish()
    return knownDb
Ejemplo n.º 25
0
def mkAllDb(allDb=None):
    import config
    reload(config)
    t_0, db, TAG = time.time(), mw.col.db, mw.col.tags
    N_notes = db.scalar('select count() from notes')
    N_enabled_notes = 0  # for providing an error message if there is no note that is used for processing
    mw.progress.start(label='Prep work for all.db creation',
                      max=N_notes,
                      immediate=True)

    if not allDb: allDb = MorphDb()
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    mw.progress.update(label='Generating all.db data')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None: continue
        morphemizer = getMorphemizerByName(notecfg['Morphemizer'])

        N_enabled_notes += 1

        mats = [(0.5 if ivl == 0 and ctype == 1 else ivl)
                for ivl, ctype in db.execute(
                    'select ivl, type from cards where nid = :nid', nid=nid)]
        if C('ignore maturity'):
            mats = [0 for mat in mats]
        ts, alreadyKnownTag = TAG.split(tags), jcfg('Tag_AlreadyKnown')
        if alreadyKnownTag in ts:
            mats += [C('threshold_mature') + 1]

        for fieldName in notecfg['Fields']:
            try:  # if doesn't have field, continue
                #fieldValue = normalizeFieldValue( getField( fieldName, flds, mid ) )
                fieldValue = extractFieldData(fieldName, flds, mid)
            except KeyError:
                continue
            except TypeError:
                mname = mw.col.models.get(mid)['name']
                errorMsg(
                    u'Failed to get field "{field}" from a note of model "{model}". Please fix your config.py file to match your collection appropriately and ignore the following error.'
                    .format(model=mname, field=fieldName))
                raise

            loc = fidDb.get((nid, guid, fieldName), None)
            if not loc:
                loc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                ms = getMorphemes(morphemizer, fieldValue, ts)
                if ms:  #TODO: this needed? should we change below too then?
                    #printf( '    .loc for %d[%s]' % ( nid, fieldName ) )
                    locDb[loc] = ms
            else:
                # mats changed -> new loc (new mats), move morphs
                if loc.fieldValue == fieldValue and loc.maturities != mats:
                    #printf( '    .mats for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                    locDb[newLoc] = locDb.pop(loc)
                # field changed -> new loc, new morphs
                elif loc.fieldValue != fieldValue:
                    #printf( '    .morphs for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck(nid, fieldName, fieldValue, guid, mats)
                    ms = getMorphemes(morphemizer, fieldValue, ts)
                    locDb.pop(loc)
                    locDb[newLoc] = ms

    if N_enabled_notes == 0:
        mw.progress.finish()
        errorMsg(
            u'There is no card that can be analyzed or be moved. Add cards or (re-)check your configuration under "Tools -> MorhpMan Preferences" or in "Anki/addons/morph/config.py" for mistakes.'
        )
        return None

    printf('Processed all %d notes in %f sec' % (N_notes, time.time() - t_0))
    mw.progress.update(value=i, label='Creating all.db object')
    allDb.clear()
    allDb.addFromLocDb(locDb)
    if cfg1('saveDbs'):
        mw.progress.update(value=i, label='Saving all.db to disk')
        allDb.save(cfg1('path_all'))
        printf('Processed all %d notes + saved all.db in %f sec' %
               (N_notes, time.time() - t_0))
    mw.progress.finish()
    return allDb
Ejemplo n.º 26
0
def updateNotes( allDb ):
    t_0, now, db, TAG   = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi         = [], {}
    N_notes             = db.scalar( 'select count() from notes' )
    mw.progress.start( label='Updating data', max=N_notes, immediate=True )
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False ) # fidDb() already forces locDb recalc

    # handle secondary databases
    mw.progress.update( label='Creating seen/known/mature from all.db' )
    seenDb      = filterDbByMat( allDb, cfg1('threshold_seen') )
    knownDb     = filterDbByMat( allDb, cfg1('threshold_known') )
    matureDb    = filterDbByMat( allDb, cfg1('threshold_mature') )
    mw.progress.update( label='Loading priority.db' )
    priorityDb  = MorphDb( cfg1('path_priority'), ignoreErrors=True ).db

    if cfg1('saveDbs'):
        mw.progress.update( label='Saving seen/known/mature dbs' )
        seenDb.save( cfg1('path_seen') )
        knownDb.save( cfg1('path_known') )
        matureDb.save( cfg1('path_mature') )

    mw.progress.update( label='Calculating frequency information' )
    pops = [ len( locs ) for locs in allDb.db.values() ]
    pops = [ n for n in pops if n > 1 ]

    mw.progress.update( label='Updating notes' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )
        if not C('enabled'): continue
        # Get all morphemes for note
        ms = set()
        for fieldName in C('morph_fields'):
            try:
                loc = fidDb[ ( nid, guid, fieldName ) ]
                ms.update( locDb[ loc ] )
            except KeyError: continue
        ms = [ m for m in ms if m.pos not in C('morph_blacklist') ]

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for m in ms:
            if m not in seenDb.db:      unseens.add( m )
            if m not in knownDb.db:     unknowns.add( m )
            if m not in matureDb.db:    unmatures.add( m )
            if m not in matureDb.db and m in knownDb.db:
                newKnowns.add( m )

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len( ms ), len( unseens ), len( unknowns ), len( unmatures )

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

            # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns: # focusMorph used outside loop
            F_k += len( allDb.db[ focusMorph ] )
        F_k_avg = F_k / N_k if N_k > 0 else F_k
        usefulness = F_k_avg

            # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for m in newKnowns:
            locs = allDb.db[ m ]
            if locs:
                ivl = min( 1, max( loc.maturity for loc in locs ) )
                usefulness += C('reinforce new vocab weight') / ivl #TODO: maybe average this so it doesnt favor long sentences

        if any( m.pos == u'動詞' for m in unknowns ): #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min( 999, usefulness )

            # difference from optimal length (too little context vs long sentence)
        lenDiff = max( 0, min( 9, abs( C('optimal sentence length') - N ) -2 ) )

            # calculate mmi
        mmi = 10000*N_k + 1000*lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[ nid ] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split( tags ), splitFields( flds )
            # determine card type
        compTag, vocabTag, notReadyTag, alreadyKnownTag, priorityTag = tagNames = C('tag_comprehension'), C('tag_vocab'), C('tag_notReady'), C('tag_alreadyKnown'), C('tag_priority')
        if N_m == 0:    # sentence comprehension card, m+0
            ts = [ compTag ] + [ t for t in ts if t not in [ vocabTag, notReadyTag ] ]
            setField( mid, fs, C('focusMorph'), u'' )
        elif N_k == 1:  # new vocab card, k+1
            ts = [ vocabTag ] + [ t for t in ts if t not in [ compTag, notReadyTag ] ]
            setField( mid, fs, C('focusMorph'), u'%s' % focusMorph.base )
        elif N_k > 1:   # M+1+ and K+2+
            ts = [ notReadyTag ] + [ t for t in ts if t not in [ compTag, vocabTag ] ]

            # set type agnostic fields
        setField( mid, fs, C('k+N'), u'%d' % N_k )
        setField( mid, fs, C('m+N'), u'%d' % N_m )
        setField( mid, fs, C('morphManIndex'), u'%d' % mmi )
        setField( mid, fs, C('unknowns'), u', '.join( u.base for u in unknowns ) )
        setField( mid, fs, C('unmatures'), u', '.join( u.base for u in unmatures ) )
        setField( mid, fs, C('unknownFreq'), u'%d' % F_k_avg )

            # other tags
        if priorityTag in ts:   ts.remove( priorityTag )
        if isPriority:          ts.append( priorityTag )

            # update sql db
        tags_ = TAG.join( TAG.canonify( ts ) )
        flds_ = joinFields( fs )
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum( fs[0] )
            sfld = stripHTML( fs[ getSortFieldIndex( mid ) ] )
            ds.append( { 'now':now, 'tags':tags_, 'flds':flds_, 'sfld':sfld, 'csum':csum, 'usn':mw.col.usn(), 'nid':nid } )

    mw.progress.update( value=i, label='Updating anki database...' )
    mw.col.db.executemany( 'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid', ds )
    TAG.register( tagNames )

    # Now reorder new cards based on MMI
    mw.progress.update( value=i, label='Updating new card ordering...' )
    ds = []
    for ( cid, nid, due ) in db.execute( 'select id, nid, due from cards where type = 0' ):
        if nid in nid2mmi: # owise it was disabled
            due_ = nid2mmi[ nid ]
            if due != due_: # only update cards that have changed
                ds.append( { 'now':now, 'due':due_, 'usn':mw.col.usn(), 'cid':cid } )
    mw.col.db.executemany( 'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds )
    mw.reset()

    printf( 'Updated notes in %f sec' % ( time.time() - t_0 ) )
    mw.progress.finish()
Ejemplo n.º 27
0
def mkAllDb( allDb=None ):
    import config; reload(config)
    t_0, db, TAG = time.time(), mw.col.db, mw.col.tags
    N_notes = db.scalar( 'select count() from notes' )
    N_enabled_notes = 0 # for providing an error message if there is no note that is used for processing
    mw.progress.start( label='Prep work for all.db creation', max=N_notes, immediate=True )

    if not allDb: allDb = MorphDb()
    fidDb   = allDb.fidDb()
    locDb   = allDb.locDb( recalc=False )   # fidDb() already forces locDb recalc

    mw.progress.update( label='Generating all.db data' )
    for i,( nid, mid, flds, guid, tags ) in enumerate( db.execute( 'select id, mid, flds, guid, tags from notes' ) ):
        if i % 500 == 0:    mw.progress.update( value=i )
        C = partial( cfg, mid, None )

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None: continue
        morphemizer = getMorphemizerByName(notecfg['Morphemizer'])

        N_enabled_notes += 1

        mats = [ ( 0.5 if ivl == 0 and ctype == 1 else ivl ) for ivl, ctype in db.execute( 'select ivl, type from cards where nid = :nid', nid=nid ) ]
        if C('ignore maturity'):
            mats = [ 0 for mat in mats ]
        ts, alreadyKnownTag = TAG.split( tags ), jcfg('Tag_AlreadyKnown')
        if alreadyKnownTag in ts:
            mats += [ C('threshold_mature')+1 ]

        for fieldName in notecfg['Fields']:
            try: # if doesn't have field, continue
                #fieldValue = normalizeFieldValue( getField( fieldName, flds, mid ) )
                fieldValue = extractFieldData( fieldName, flds, mid )
            except KeyError: continue
            except TypeError:
                mname = mw.col.models.get( mid )[ 'name' ]
                errorMsg( u'Failed to get field "{field}" from a note of model "{model}". Please fix your config.py file to match your collection appropriately and ignore the following error.'.format( model=mname, field=fieldName ) )
                raise

            loc = fidDb.get( ( nid, guid, fieldName ), None )
            if not loc:
                loc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                ms = getMorphemes(morphemizer, fieldValue, ts)
                if ms: #TODO: this needed? should we change below too then?
                    #printf( '    .loc for %d[%s]' % ( nid, fieldName ) )
                    locDb[ loc ] = ms
            else:
                # mats changed -> new loc (new mats), move morphs
                if loc.fieldValue == fieldValue and loc.maturities != mats:
                    #printf( '    .mats for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                    locDb[ newLoc ] = locDb.pop( loc )
                # field changed -> new loc, new morphs
                elif loc.fieldValue != fieldValue:
                    #printf( '    .morphs for %d[%s]' % ( nid, fieldName ) )
                    newLoc = AnkiDeck( nid, fieldName, fieldValue, guid, mats )
                    ms = getMorphemes(morphemizer, fieldValue, ts)
                    locDb.pop( loc )
                    locDb[ newLoc ] = ms

    if N_enabled_notes == 0:
        mw.progress.finish()
        errorMsg(u'There is no card that can be analyzed or be moved. Add cards or (re-)check your configuration under "Tools -> MorhpMan Preferences" or in "Anki/addons/morph/config.py" for mistakes.')
        return None

    printf( 'Processed all %d notes in %f sec' % ( N_notes, time.time() - t_0 ) )
    mw.progress.update( value=i, label='Creating all.db object' )
    allDb.clear()
    allDb.addFromLocDb( locDb )
    if cfg1('saveDbs'):
        mw.progress.update( value=i, label='Saving all.db to disk' )
        allDb.save( cfg1('path_all') )
        printf( 'Processed all %d notes + saved all.db in %f sec' % ( N_notes, time.time() - t_0 ) )
    mw.progress.finish()
    return allDb
Ejemplo n.º 28
0
def post( st ):
    ms = getMorphemes( st['txt'], None, cfg1('morph_blacklist') )
    s = ms2str( ms )
    infoMsg( '----- All -----\n' + s )
Ejemplo n.º 29
0
def my_reviewer_keyHandler( self, evt ):
    key = unicode( evt.text() )
    key_browse, key_skip = cfg1('browse same focus key'), cfg1('set known and skip key')
    if   key == key_skip:   setKnownAndSkip( self )
    elif key == key_browse: browseSameFocus( self )
Ejemplo n.º 30
0
def getStatsPath():
    return cfg1('path_stats')
Ejemplo n.º 31
0
def updateNotes(allDb):
    t_0, now, db, TAG = time.time(), intTime(), mw.col.db, mw.col.tags
    ds, nid2mmi = [], {}
    N_notes = db.scalar('select count() from notes')
    mw.progress.start(label='Updating data', max=N_notes, immediate=True)
    fidDb = allDb.fidDb()
    locDb = allDb.locDb(recalc=False)  # fidDb() already forces locDb recalc

    # read tag names
    compTag, vocabTag, freshTag, notReadyTag, alreadyKnownTag, priorityTag, tooShortTag, tooLongTag = tagNames = jcfg(
        'Tag_Comprehension'), jcfg('Tag_Vocab'), jcfg('Tag_Fresh'), jcfg(
            'Tag_NotReady'), jcfg('Tag_AlreadyKnown'), jcfg(
                'Tag_Priority'), jcfg('Tag_TooShort'), jcfg('Tag_TooLong')
    TAG.register(tagNames)
    badLengthTag = jcfg2().get('Tag_BadLength')

    # handle secondary databases
    mw.progress.update(label='Creating seen/known/mature from all.db')
    seenDb = filterDbByMat(allDb, cfg1('threshold_seen'))
    knownDb = filterDbByMat(allDb, cfg1('threshold_known'))
    matureDb = filterDbByMat(allDb, cfg1('threshold_mature'))
    mw.progress.update(label='Loading priority.db')
    priorityDb = MorphDb(cfg1('path_priority'), ignoreErrors=True).db

    if cfg1('saveDbs'):
        mw.progress.update(label='Saving seen/known/mature dbs')
        seenDb.save(cfg1('path_seen'))
        knownDb.save(cfg1('path_known'))
        matureDb.save(cfg1('path_mature'))

    mw.progress.update(label='Updating notes')
    for i, (nid, mid, flds, guid, tags) in enumerate(
            db.execute('select id, mid, flds, guid, tags from notes')):
        if i % 500 == 0: mw.progress.update(value=i)
        C = partial(cfg, mid, None)

        note = mw.col.getNote(nid)
        notecfg = getFilter(note)
        if notecfg is None or not notecfg['Modify']: continue

        # Get all morphemes for note
        morphemes = set()
        for fieldName in notecfg['Fields']:
            try:
                loc = fidDb[(nid, guid, fieldName)]
                morphemes.update(locDb[loc])
            except KeyError:
                continue

        # Determine un-seen/known/mature and i+N
        unseens, unknowns, unmatures, newKnowns = set(), set(), set(), set()
        for morpheme in morphemes:
            if morpheme not in seenDb.db: unseens.add(morpheme)
            if morpheme not in knownDb.db: unknowns.add(morpheme)
            if morpheme not in matureDb.db: unmatures.add(morpheme)
            if morpheme not in matureDb.db and morpheme in knownDb.db:
                newKnowns.add(morpheme)

        # Determine MMI - Morph Man Index
        N, N_s, N_k, N_m = len(morphemes), len(unseens), len(unknowns), len(
            unmatures)

        # Bail early for lite update
        if N_k > 2 and C('only update k+2 and below'): continue

        # average frequency of unknowns (ie. how common the word is within your collection)
        F_k = 0
        for focusMorph in unknowns:  # focusMorph used outside loop
            F_k += allDb.frequency(focusMorph)
        F_k_avg = F_k // N_k if N_k > 0 else F_k
        usefulness = F_k_avg

        # add bonus for morphs in priority.db
        isPriority = False
        for focusMorph in unknowns:
            if focusMorph in priorityDb:
                isPriority = True
                usefulness += C('priority.db weight')

            # add bonus for studying recent learned knowns (reinforce)
        for morpheme in newKnowns:
            locs = allDb.db[morpheme]
            if locs:
                ivl = min(1, max(loc.maturity for loc in locs))
                usefulness += C(
                    'reinforce new vocab weight'
                ) // ivl  #TODO: maybe average this so it doesnt favor long sentences

        if any(morpheme.pos == u'動詞'
               for morpheme in unknowns):  #FIXME: this isn't working???
            usefulness += C('verb bonus')

        usefulness = 999 - min(999, usefulness)

        # difference from optimal length range (too little context vs long sentence)
        lenDiffRaw = min(N - C('min good sentence length'),
                         max(0, N - C('max good sentence length')))
        lenDiff = min(9, abs(lenDiffRaw))

        # calculate mmi
        mmi = 10000 * N_k + 1000 * lenDiff + usefulness
        if C('set due based on mmi'):
            nid2mmi[nid] = mmi

        # Fill in various fields/tags on the note based on cfg
        ts, fs = TAG.split(tags), splitFields(flds)

        # clear any 'special' tags, the appropriate will be set in the next few lines
        ts = [
            t for t in ts
            if t not in [notReadyTag, compTag, vocabTag, freshTag]
        ]

        # determine card type
        if N_m == 0:  # sentence comprehension card, m+0
            ts = ts + [compTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), u'')
        elif N_k == 1:  # new vocab card, k+1
            ts = ts + [vocabTag]
            setField(mid, fs, jcfg('Field_FocusMorph'),
                     u'%s' % focusMorph.base)
        elif N_k > 1:  # M+1+ and K+2+
            ts = ts + [notReadyTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), u'')
        elif N_m == 1:  # we have k+0, and m+1, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [freshTag]
            setField(mid, fs, jcfg('Field_FocusMorph'),
                     u'%s' % list(unmatures)[0].base)
        else:  # only case left: we have k+0, but m+2 or higher, so this card does not introduce a new vocabulary -> card for newly learned morpheme
            ts = ts + [freshTag]
            setField(mid, fs, jcfg('Field_FocusMorph'), u'')

            # set type agnostic fields
        setField(mid, fs, jcfg('Field_UnknownMorphCount'), u'%d' % N_k)
        setField(mid, fs, jcfg('Field_UnmatureMorphCount'), u'%d' % N_m)
        setField(mid, fs, jcfg('Field_MorphManIndex'), u'%d' % mmi)
        setField(mid, fs, jcfg('Field_Unknowns'),
                 u', '.join(u.base for u in unknowns))
        setField(mid, fs, jcfg('Field_Unmatures'),
                 u', '.join(u.base for u in unmatures))
        setField(mid, fs, jcfg('Field_UnknownFreq'), u'%d' % F_k_avg)

        # remove deprecated tag
        if badLengthTag is not None and badLengthTag in ts:
            ts.remove(badLengthTag)

            # other tags
        if priorityTag in ts: ts.remove(priorityTag)
        if isPriority: ts.append(priorityTag)

        if tooShortTag in ts: ts.remove(tooShortTag)
        if lenDiffRaw < 0: ts.append(tooShortTag)

        if tooLongTag in ts: ts.remove(tooLongTag)
        if lenDiffRaw > 0: ts.append(tooLongTag)

        # remove unnecessary tags
        if not jcfg('Option_SetNotRequiredTags'):
            unnecessary = [priorityTag, tooShortTag, tooLongTag]
            ts = [tag for tag in ts if tag not in unnecessary]

            # update sql db
        tags_ = TAG.join(TAG.canonify(ts))
        flds_ = joinFields(fs)
        if flds != flds_ or tags != tags_:  # only update notes that have changed
            csum = fieldChecksum(fs[0])
            sfld = stripHTML(fs[getSortFieldIndex(mid)])
            ds.append({
                'now': now,
                'tags': tags_,
                'flds': flds_,
                'sfld': sfld,
                'csum': csum,
                'usn': mw.col.usn(),
                'nid': nid
            })

    mw.progress.update(value=i, label='Updating anki database...')
    mw.col.db.executemany(
        'update notes set tags=:tags, flds=:flds, sfld=:sfld, csum=:csum, mod=:now, usn=:usn where id=:nid',
        ds)

    # Now reorder new cards based on MMI
    mw.progress.update(value=i, label='Updating new card ordering...')
    ds = []

    # "type = 0": new cards
    # "type = 1": learning cards [is supposed to be learning: in my case no learning card had this type]
    # "type = 2": review cards
    for (cid, nid,
         due) in db.execute('select id, nid, due from cards where type = 0'):
        if nid in nid2mmi:  # owise it was disabled
            due_ = nid2mmi[nid]
            if due != due_:  # only update cards that have changed
                ds.append({
                    'now': now,
                    'due': due_,
                    'usn': mw.col.usn(),
                    'cid': cid
                })
    mw.col.db.executemany(
        'update cards set due=:due, mod=:now, usn=:usn where id=:cid', ds)
    mw.reset()

    printf('Updated notes in %f sec' % (time.time() - t_0))
    mw.progress.finish()
    return knownDb