Ejemplo n.º 1
0
Archivo: q.py Proyecto: yousong/gists
def drv(col):
    cwd = os.getcwd()
    flist = glob.glob('????.txt')
    for inf in flist:
        textf = 'economist.words.' + inf
        deckname = textf.rstrip('.txt')
        textf = os.path.join(cwd, textf)
        print(textf)
        deckid = col.decks.id(deckname)

        ti = TextImporter(col, textf)
        ti.model['did'] = deckid
        col.decks.select(deckid)

        ti.delimiter = '\t'
        ti.initMapping()
        ti.run()
Ejemplo n.º 2
0
def run_importer(file_path, tag_name, deck_name, collection):
    if not os.stat(file_path).st_size > 0:
        print "Nothing to import!"
        sys.exit()
    ti = TextImporter(collection, file_path)
    ti.delimiter = get_delimiter()
    ti.allowHTML = True
    ti.tagsToAdd = [tag_name]
    ti.initMapping()
    ti.run()

    # BUGFIX: anki doesn't add to selected deck
    did = collection.decks.id(deck_name)
    num_cards_added = ti.total
    ids = get_card_ids(collection, tag_name)
    ids = sorted(ids, reverse=True)
    for id in ids[:num_cards_added]:
        collection.db.execute("update cards set did = ? where id = ?",
                              did, id)
Ejemplo n.º 3
0
def ImportToAnki(model_name, import_to_deck, *args, **kwargs):
    # get file
    file = kwargs.get("file", None)
    if not file:
        file = getFile(mw,
                       _("Import"),
                       None,
                       key="import",
                       filter=Importers[0][0])
    if not file:
        return
    file = str(file)

    # check default model
    try:
        model = mw.col.models.byName(model_name)
        if not model:
            raise Exception("没有找到【{}】".format(model_name))
    except:
        importFile(mw, settings.deck_template_file)
        try:
            model = mw.col.models.byName(model_name)
        except:
            model = None

    importer = TextImporter(mw.col, file)
    importer.delimiter = "\t"
    importer.importMode = 0
    importer.allowHTML = True
    importer.model = model

    did = mw.col.decks.id(import_to_deck)
    mw.col.conf['curDeck'] = did
    importer.model['did'] = did
    mw.col.decks.select(did)
    importer.mapping = [kwargs.get("first")]
    importer.run()
    mw.reset()
    txt = _("Importing complete.") + "\n"
    if importer.log:
        txt += "\n".join(importer.log)
    showText(txt)
Ejemplo n.º 4
0
def importMHT():
    # Ask for the .mht file.
    file_path = getFile(mw, _("Import mht file"), None, key="import")
    if not file_path:
        return
    file_path = unicode(file_path)

    # Convert mht
    parser = Parser(file_path)
    output = parser.run()

    # Creates a temp dir instead of file since windows
    # won't allow subprocesses to access it otherwise.
    # https://stackoverflow.com/questions/15169101/how-to-create-a-temporary-file-that-can-be-read-by-a-subprocess
    try:
        temp_dir = mkdtemp()
        path = os.path.join(temp_dir, 'import.html')

        with open(path, 'w+') as html:
            html.write(output)
            # Move temp images to collection.media
            media_dir = os.path.join(mw.pm.profileFolder(), "collection.media")

            for meta in parser.file_map.values():
                temp_path = meta.get('path')
                new_path = os.path.join(media_dir, meta.get('filename'))
                shutil.move(temp_path, new_path)

        # import into the collection
        ti = TextImporter(mw.col, path)
        ti.delimiter = '\t'
        ti.allowHTML = True
        ti.initMapping()
        MHTImportDialog(mw, ti)

        # Remove file
        os.remove(path)
    finally:
        os.rmdir(temp_dir)
Ejemplo n.º 5
0
def importFile(deckId, notes, noteType):
    if len(notes) == 0:
        return


    with open(u"tmp.txt", "w") as f:
        for item in notes:
            f.write("%s\n" % item)

    m = mw.col.models.byName(noteType)
    deck = mw.col.decks.get(deckId)
    deck['mid'] = m['id']
    m['did'] = deckId
    mw.col.decks.save(deck)
    mw.col.models.save(m)


    ti = TextImporter(mw.col, u"tmp.txt")
    ti.delimiter = ","
    ti.initMapping()
    ti.run()

    os.remove(u"tmp.txt")
Ejemplo n.º 6
0
def import_data():
    mw.progress.start(immediate=True)
    mw.checkpoint(_("Importing..."))
    txt = ''
    url = GOOGLE_SHEETS_URL
    response = urllib2.urlopen(url)
    data = csv.reader(response)

    for entry in data:
        deck_name, doc_title, import_flag, doc_url = entry
        # Exclude header
        if (deck_name == 'Deck Name') or (import_flag == 'FALSE'):
            continue

        # Update progress
        mw.checkpoint(_("Importing " + str(deck_name)))

        request = urllib2.urlopen(doc_url)
        soup = BeautifulSoup(request)
        # remove scripts
        for script in soup.findAll('script'):
            script.extract()
        # showText(unicode(soup))
        # inline_unicode_html = pynliner.fromString(str(soup))
        inline_html = ClaSS2Style(unicode(soup)).transform()

        # #replace undesirable style that hides bullet points
        # undesirable = "list-style-type:none"
        # cleaned_inline_html = string.replace(inline_unicode_html, undesirable , "")

        # clean html stling
        inline_soup = BeautifulSoup(inline_html)
        # showText(unicode(inline_soup))
        output = parse_html(inline_soup)

        if output.find("\t") == -1:
            # runs if there are no card rows detected for importing
            continue

        # Write media to collection, write output to temp file
        output = write_img_to_media_col(output, doc_url)
        temp_html_path = write_output_to_html_file(output)

        # select deck by name
        deck_id = mw.col.decks.id(deck_name)
        mw.col.decks.select(deck_id)

        # set model id
        model = mw.col.models.byName("Basic")
        deck = mw.col.decks.get(deck_id)
        deck['mid'] = model['id']
        mw.col.decks.save(deck)

        # import into the collection

        ti = TextImporter(mw.col, temp_html_path)
        ti.delimiter = '\t'
        ti.allowHTML = True
        ti.importMode = 2
        mw.pm.profile['importMode'] = 2

        # #check if deck model and TextImporter model matches
        if deck_id != ti.model['did']:
            ti.model['did'] = deck_id
            mw.col.models.save(ti.model)
        # run text importer
        ti.initMapping()
        ti.run()
        txt += "Import Complete for " + deck_name + ".\n"
        if ti.log:
            # manipulate log to show only necessary fields

            for i in ti.log:
                if i.find("added") != -1:
                    txt += i + "\n\n"
            # txt +=  "".join(ti.log) + "\n"

        # Cleanup
        os.remove(temp_html_path)
        temp_dir = os.path.dirname(temp_html_path)
        os.rmdir(temp_dir)
        mw.col.save()
        mw.reset()
    del_log = ''
    del_log += updated_oldest_card_and_remove_new_duplicates()
    del_log += delete_empty_cards()
    txt += del_log
    mw.progress.finish()
    showText(txt)

    mw.reset()
Ejemplo n.º 7
0
def do_import_notes(bcol,
                    deck_name,
                    data_file,
                    note_type,
                    delimiter="\t",
                    import_mode=0):
    col = bcol.col

    existingNotes = {}

    #load existing notes
    bdeck = None
    for dummy, bdk in sorted(bcol.bdecks.items()):
        if deck_name == bdk.name:
            bdeck = bdk
    assert bdeck
    notes = bdeck.queryNotes()
    for n in notes:
        note_id, note_subject, note_content, note_tags = n
        note_content = note_content.split("\x1f")[1]
        note_tags = note_tags.strip()
        existingNotes[note_subject] = [
            note_id, note_subject, note_content, note_tags
        ]

    nochangeNotes = {}
    toBeImportNotes = {}
    toBeUpdatedNotes = {}
    #load data fiel
    fp = open(data_file, "r")
    for line in fp.readlines():
        line = line.strip()
        parts = line.split("\t")
        subject = parts[0]
        content = parts[1]
        if len(content) > int(131072 * 0.8):  #131072 is limit of ANKI field
            logging.error("Content too long to import: %d, note: %s",
                          len(content), subject)
            sys.exit(1)
        if len(parts) == 3:
            tags = parts[2]
            tags = tags.strip()
        else:
            tags = ""
        if subject in existingNotes:
            #compare content and tags
            exist_note = existingNotes[subject]
            if content == exist_note[2] and tags == exist_note[3]:
                #doesn't need to be updated
                nochangeNotes[subject] = True
                pass
            else:
                logging.info("Updated note: %s", subject)
                toBeUpdatedNotes[subject] = [
                    subject, content, tags, exist_note[0]
                ]
        else:
            logging.info("New note: %s", subject)
            toBeImportNotes[subject] = [subject, content, tags]
    fp.close()

    logging.info("%d notes wll be kept without any change", len(nochangeNotes))
    logging.info("%d notes need to be updated.", len(toBeUpdatedNotes))
    logging.info("%d notes need to be added.", len(toBeImportNotes))

    if not toBeUpdatedNotes and not toBeImportNotes:
        col.close()
        logging.info("No new note need to be imported! Bye!")
        sys.exit(1)

    new_data_file = filter_import_data_file(data_file, toBeImportNotes,
                                            toBeUpdatedNotes)
    assert new_data_file

    #set current model
    logging.info("setting current deck name: %s", deck_name)
    deck_id = col.decks.id(deck_name)
    logging.info("setting current deck id: %s", deck_id)
    logging.info("setting note_type : %s", note_type)
    model = col.models.byName(note_type)

    #select deck
    col.decks.select(deck_id)

    #update deck
    deck = col.decks.get(deck_id)
    deck['mid'] = model['id']
    col.decks.save(deck)

    #update model
    model['did'] = deck_id
    col.models.save(model)
    col.models.setCurrent(model)

    logging.info("directly import: %s", new_data_file)
    ti = TextImporter(col, new_data_file)
    ti.allowHTML = True
    ti.needDelimiter = True
    ti.delimiter = "\t"
    ti.importMode = import_mode  #0, UPDATE_MODE; 1, IGNORE_MODE; 2, ADD_MODE
    ti.initMapping()
    ti.run()

    col.save()
    col.close()

    logging.info("Total %d imported,%d updated successfully." %
                 (len(toBeImportNotes), len(toBeUpdatedNotes)))

    return
Ejemplo n.º 8
0
        def makeDeck(parent,prefix,deck):
            name = deck.csvname
            csvfile = "%s%s%s.csv" % (tmpPath,prefix,name)

            if not os.path.exists(csvfile) and deck.cardType != None:
                print('Skipping deck "%s" because no file "%s" was found.' % (name, csvfile))
                return
                # raise Exception('No csv file "' + csvfile + '" found.')

            did = tcol.decks.id(parent + deck.name)
            d = tcol.decks.get(did)
            tcol.decks.select(did)

            confId = tcol.decks.confId(parent + deck.name, cloneFrom=deck.conf)

            if not deck.cardType:
                conf = tcol.decks.getConf(confId)
                conf['new']['perDay'] = 999
                tcol.decks.updateConf(conf)
            elif deck.perDay:
                conf = tcol.decks.getConf(confId)
                conf['new']['perDay'] = deck.perDay
                tcol.decks.updateConf(conf)

            tcol.decks.setConf(d,confId)

            if deck.cardType:
                ct = deck.cardType

                if not tcol.models.byName(ct.name):
                    m = tcol.models.new(ct.name)
                    m['req'] = [[0, 'all', [0]]]
                    m['css'] = ct.css()
                    m['tmpls'] = [
                        {
                            'name': 'Card 1',
                            'qfmt': ct.front(),
                            'afmt': ct.back(),
                            'bfont': 'Lucida Sans Unicode',
                            'bamft': '',
                            'bqmft': '',
                            'ord': 0,
                            'did': None,
                            'bsize': 12
                        }
                    ]
                    tcol.models.add(m)

                    for i,field in enumerate(ct.fields):
                        f = tcol.models.newField(field.anki_name)
                        f['ord'] = i
                        tcol.models.addField(m,f)
                else:
                    m = tcol.models.byName(ct.name)

                # So that we can reuse already-present models
                # todo: this doesn't actually work but would be a big part of
                # updating
                # if m['id'] != ct.mid:
                # 	m = tcol.models.get(m['id'])
                # 	m['id'] = ct.mid
                # 	m.save(m)

                tcol.save()

                m['did'] = did
                tcol.decks.select(did)
                ti = TextImporter(tcol,csvfile)
                ti.model = m
                ti.allowHTML = True
                ti.initMapping()
                ti.delimiter = "\t"
                ti.updateDelimiter()

                ti.run()
                tcol.save()

            for sd in deck.subdecks:
                makeDeck(parent + deck.name + '::',prefix + name + '-', sd)
Ejemplo n.º 9
0
def advimport():
    Log('-'*80)

    filename = getFile(mw, "Select file to import", None, key="import")
    
    if len(filename) == 0:
        showText("invalid filename", mw, type="text", run=True)
        return 
    
    lines = []
    n = 0
    
    with open(filename) as f:
        reader = unicode_csv_reader(f)
        
        for i in range(N_HEADER_LINES):
            n += 1
            reader.next()
            
        for row in reader:
            #print row
            n += 1
            lines.append((n, row))


    for n, line in lines:
        
        #Log("--"*5)

        data = []
        
        _chapt = line[0]
        _sect = line[1]
        _keywords = line[2]
        _question = line[3]
        _solution = line[4]
        
        _type = line[5]
        _subtype = line[6]
        _symb = SYMBOLS.get(_type, "")
        
        _rests = line[7:]
        
        print "L%03i:"%n,
        
        if not _type:
            print "!!! No type, skipping"
            continue


        elif _type == u"rule":
            print "   Rule        ",
            
            model = "Rule"
            key = _question
            
            data = [key, _question, _solution, _chapt, _sect, _type, _symb]


        elif _type == u"pron":
            print "   Pronoun     ",
            
            model = "Simple"
            key = _solution
            
            data = [key, _question, _solution, _chapt, _sect, _type, _symb]


        elif _type == u"wend":
            print "   Sentence    ",
            
            model = "Simple"
            key = _solution
            
            data = [key, _question, _solution, _chapt, _sect, _type, _symb]
            

        elif _type == u"prep":
            print "   Prepos      ",
            
            model = "Simple"
            key = _solution
            
            data = [key, _question, _solution, _chapt, _sect, _type, _symb]


        elif _type == u"adv":
            print "   Adverb      ",
            
            model = "Simple"
            key = _solution
            
            data = [key, _question, _solution, _chapt, _sect, _type, _symb]


        elif _type == u"nom": # Noun
            print "   Noun        ",
            
            model = "Noun"
            
            key = _solution
            
            lst = _solution.split(' ')
            
            art = lst.pop(0)
            noun = " ".join(lst)
            
            
            if not _subtype or _subtype == u"":
                if   art == "el":    _subtype = u"♂"
                elif art == "la":    _subtype = u"♀"
                elif art == "los":   _subtype = u"♂♂/♂♀"
                elif art == "las":   _subtype = u"♀♀"
                elif art == "el/la": _subtype = u"♂/♀"
            elif _subtype[0] in ["F", "f"]: _subtype = u"♀"
            elif _subtype[0] in ["M", "m"]: _subtype = u"♂"
            
            data = [key, _question, _solution, _chapt, _sect, _type, _subtype, _symb]

            
        elif _type == u"verb":
            print "   Verb        ", 
            
            modus = _rests[0]
            temp = _rests[1]
            forms = _rests[2:]
            
            for ii, f in enumerate(forms):
                _ = f.split('|')
                if len(_)==2:
                    for i, (c, e) in enumerate(zip(["stem", "ext"], _)):
                        _[i] = '<span class="%s">%s</span>' % (c, e)
                
                for i, x in enumerate(_):
                    _[i] = _[i].replace("[", '<span class="irr">')
                    _[i] = _[i].replace("]", '</span>')
                    
                forms[ii] = "".join(_)
            
            model = "Verb"
            key = "%s (%s; %s)" % (_solution, modus, temp)
            jsforms = '''{'sg1':'%s','sg2':'%s','sg3':'%s','pl1':'%s','pl2':'%s','pl3':'%s'}''' % tuple(forms)
            #Log("JSF", jsforms)

            _question = _question.replace("[", '<span class="prp">')
            _question = _question.replace("]", '</span>')
            _solution = _solution.replace("[", '<span class="prp">')
            _solution = _solution.replace("]", '</span>')
            
            #print _question

            data = [key, _question, _solution, _chapt, _sect, _type, _subtype, _symb, modus, temp, jsforms]
          
          
        elif _type == u"adj":
            print "   Adjective   ",
            
            s = _solution
            
            def decline(stem, exts=['_o', '_a', '_os', '_as'], wrap=('<b>', '</b>')):
                return [stem+wrap[0]+_+wrap[1] for _ in exts]
            
            if '[' in s:
                _subtype = 'IRR'
                i = s.find('[')
                stem = s[:i]
                exts = s[i+1:s.find(']')].split('|')
                #Log("ir1: ", i, stem, exts, len(exts))
                
                if len(exts)==4:
                    pass
                elif len(exts)==2:
                    exts = [exts[0], exts[0], exts[1], exts[1]]
                elif len(exts)==3:
                    exts = [exts[0], exts[1], exts[2], exts[2]]
                else:
                    #TODO
                    exts = ['???']*4
                    
            elif '|' in s:
                _subtype = 'IRR'
                stem = ''
                exts = s.split('|')

                if len(exts)==4:
                    pass
                elif len(exts)==2:
                    exts = [exts[0], exts[0], exts[1], exts[1]]
                elif len(exts)==3:
                    exts = [exts[0], exts[1], exts[2], exts[2]]
                else:
                    exts = ['???']*4
                
            elif s[-1]=='o':
                _subtype = '-o'
                stem = s[:-1]
                exts = ['_o', '_a', '_os', '_as']

            elif s[-1]=='e':
                _subtype = '-e'
                stem = s[:-1]
                exts = ['e', 'e', '_es', '_es']
                
            elif s[-4:]=='ista':
                _subtype = '-ista'
                stem = s[:-4]
                exts = ['ist_a', 'ist_a', 'ist_as', 'ist_as']
            
            elif s[-2:] == u'ón':
                _subtype = u'-ón'
                stem = s[:-2]
                exts = [u'*ón_', '*on_a', '*on_es', '*on_as']

            elif s[-5:] == 'erior':
                _subtype = '-erior'
                stem = s[:-5]
                exts = [u'erior', 'erior', 'erior_s', 'erior_s']

            elif s[-2:] == u'or':
                _subtype = '-or'
                stem = s[:-2]
                exts = [u'or', 'or_a', 'or_es', 'or_as']
                
            elif s[-1] == 'z':
                _subtype = '-z'
                stem = s[:-1]
                exts = [u'*z', '*z', '*c_es', '*c_es']
                
            else: # consonant at end:
                _subtype = '-CONS'
                stem = s
                exts = ['', '', '_es', '_es']
                print '!!!! >> check this:', stem, exts ,"\n                     ",
                
                
            #decl = decline(stem, exts, wrap=('<span class="ext">', '</span>'))
            decl = decline(stem, exts, wrap=('', ''))
            #decl = [_.replace('_', '') for _ in decl]
            
            for i, d in enumerate(decl):
                while d.find('*')>=0:
                    fi = d.find('*')
                    #print fi, d
                    d = d[:fi] + '<span class="irr">' + d[fi+1] + '</span>' + d[fi+2:]
                if '_' in d:
                    d = d.replace('_', '<span class="ext">') + '</span>'
                decl[i] = d
            #print decl
            
            #Log(stem, exts, decl)
                
            model = "Adjectiv"
            key = stem + exts[0] # use masculine form sg as key
            key = key.replace('*', '').replace('_','')
            jsforms = '''{'MSg':'%s','FSg':'%s','MPl':'%s','FPl':'%s'}''' % tuple(decl)
            
            data = [key, _question, key, _chapt, _sect, _type, _subtype, _symb, jsforms]
        
        
        else:
            print "!!! Unknown type, skipping"
            continue
        
        
        
        if len(data) > 0:
            print data[1], " | ", data[2]
            with codecs.open('multiimport.tsv', 'w', encoding='utf-8') as f:
                #data = [_.encode("utf8") for _ in data]
                s = "\t".join(data)
                #f.write(s.decode("utf8"))
                f.write(s)
                #print s
            
            did = mw.col.decks.byName(deck_name)['id']
            mw.col.decks.select(did)

            m = mw.col.models.byName(model)
            mw.col.conf['curModel'] = m['id']
            cdeck = mw.col.decks.current()
            cdeck['mid'] = m['id']
            mw.col.decks.save(cdeck)
            mw.col.models.setCurrent(m)
            m['did'] = did
            mw.col.models.save(m)
            mw.reset()
            
            ti = TextImporter(mw.col,'multiimport.tsv')
            ti.delimiter = '\t'
            ti.allowHTML = True
            ti.initMapping()
            ti.run()
            #os.remove('multiimport.tsv')

      
    print('-'*80)