class Mnemosyne10Importer(Importer): multipleCardsAllowed = False def foreignCards(self): # empty objects so we can load the native mnemosyne file class MnemosyneModule(object): class StartTime: pass class Category: pass class Item: pass for module in ('mnemosyne', 'mnemosyne.core', 'mnemosyne.core.mnemosyne_core'): sys.modules[module] = MnemosyneModule() try: file = open(self.file, "rb") except (IOError, OSError), e: raise ImportFormatError(type="systemError", info=str(e)) header = file.readline().strip() # read the structure in try: struct = pickle.load(file) except (EOFError, KeyError): raise ImportFormatError(type="invalidFile") startTime = struct[0].time daysPassed = (time.time() - startTime) / 86400.0 # gather cards cards = [] for item in struct[2]: card = ForeignCard() card.fields.append(self.fudgeText(item.q)) card.fields.append(self.fudgeText(item.a)) # scheduling data card.interval = item.next_rep - item.last_rep secDelta = (item.next_rep - daysPassed) * 86400.0 card.due = card.nextTime = time.time() + secDelta card.factor = item.easiness # for some reason mnemosyne starts cards off on 1 instead of 0 card.successive = max( (item.acq_reps_since_lapse + item.ret_reps_since_lapse - 1), 0) card.yesCount = max((item.acq_reps + item.ret_reps) - 1, 0) card.noCount = item.lapses card.reps = card.yesCount + card.noCount if item.cat.name != u"<default>": card.tags = item.cat.name.replace(" ", "_") if card.reps: card.type = 1 cards.append(card) return cards
def foreignCards(self): lines = '' with open("test.txt", 'r') as f: for line in f: lines += line exams = [ x.strip() for x in re.split(r'^Exam [A-Z]$', lines, flags=re.MULTILINE)[1:] ] exams = [ re.split(r'^QUESTION [0-9][0-9]?[0-9]?', x, flags=re.MULTILINE)[1:] for x in exams ] exams = [[q.strip() for q in x] for x in exams] #orda = ord('A') hashes = [] exams = [[self.format_question(q) for q in e] for e in exams] n = '\n' br = '<br />' cards = [] for e in exams: for q in e: the_hash = hashlib.sha1(q['question']).digest() if the_hash in hashes: continue hashes.append(the_hash) front = q['question'] + n * 3 + 'Answers:\n\n' + (n * 2).join([ chr(x + self.orda) + '. ' + q['answers'][x] for x in range(len(q['answers'])) ]) back = (n * 2).join([ chr(a + self.orda) + '. ' + q['answers'][a] for a in q['answer'] ]) + n * 3 + q['explanation'] card = ForeignCard() card.fields = [ unicode(front.replace(n, br), "utf-8"), unicode(back.replace(n, br), "utf-8") ] cards.append(card) return cards
def cardFromFields(self, fields): card = ForeignCard() card.fields.extend([x.strip() for x in fields]) return card
def addItemToCards(self,item): "This method actually do conversion" # new anki card card = ForeignCard() # clean Q and A card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Question))) card.fields.append(self._fudgeText(self._decode_htmlescapes(item.Answer))) card.tags = u"" # pre-process scheduling data tLastrep = time.mktime(time.strptime(item.LastRepetition, '%d.%m.%Y')) tToday = time.time() # convert learning data if not self.META.resetLearningData: # migration of LearningData algorithm card.interval = item.Interval card.successive = item.Repetitions ##card.due = tToday + (float(item.Interval) * 86400.0) - tLastrep card.due = tLastrep + (float(item.Interval) * 86400.0) card.lastDue = 0 card.factor = float(item.AFactor.replace(',','.')) card.lastFactor = float(item.AFactor.replace(',','.')) # SM is not exporting all the information Anki keeps track off, so it # needs to be fudged card.youngEase0 = item.Lapses card.youngEase3 = item.Repetitions + item.Lapses card.yesCount = item.Repetitions card.noCount = item.Lapses card.reps = card.yesCount + card.noCount card.spaceUntil = card.due card.combinedDue = card.due # categories & tags # it's worth to have every theme (tree structure of sm collection) stored in tags, but sometimes not # you can deceide if you are going to tag all toppics or just that containing some pattern tTaggTitle = False for pattern in self.META.pathsToBeTagged: if item.lTitle != None and pattern.lower() in u" ".join(item.lTitle).lower(): tTaggTitle = True break if tTaggTitle or self.META.tagAllTopics: # normalize - remove diacritic punctuation from unicode chars to ascii item.lTitle = [ self._unicode2ascii(topic) for topic in item.lTitle] # Transfrom xyz / aaa / bbb / ccc on Title path to Tag xyzAaaBbbCcc # clean things like [999] or [111-2222] from title path, example: xyz / [1000-1200] zyx / xyz # clean whitespaces # set Capital letters for first char of the word tmp = list(set([ re.sub('(\[[0-9]+\])' , ' ' , i ).replace('_',' ') for i in item.lTitle ])) tmp = list(set([ re.sub('(\W)',' ', i ) for i in tmp ])) tmp = list(set([ re.sub( '^[0-9 ]+$','',i) for i in tmp ])) tmp = list(set([ capwords(i).replace(' ','') for i in tmp ])) tags = [ j[0].lower() + j[1:] for j in tmp if j.strip() <> ''] card.tags += u" ".join(tags) if self.META.tagMemorizedItems and item.Interval >0: card.tags += " Memorized" self.logger(u'Element tags\t- ' + card.tags, level=3) self.cards.append(card)
def foreignCards(self): "Return a list of foreign cards for importing." if self.iHaveSeenTheEnd: return None self.cards = [] curCard = ForeignCard() while True: logger.debug("here we go (again); state is " + self.curState) match = re.search("_reinterpret$", self.curState) if match: logger.debug("it's a reinterpret, so, pre-sub: " + self.curState) self.curState = re.sub("_reinterpret", "", self.curState) logger.debug("post-sub: " + self.curState) else: line = self.fh.readline() logger.debug("pristine line: '''" + line + "'''") if line == "": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError( type="systemError", info="file terminated unexpectedly in state " + self.curState) break line = re.sub("#.*$", "", line) logger.debug("line with comments removed: '''" + line + "'''") line = line.rstrip() logger.debug("stripped line: '''" + line + "'''") if line == "__END__": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError( type="systemError", info="file terminated unexpectedly in state " + self.curState) # return whatever we've gathered so far, but flag so that we don't re-enter the state machine next time we get called self.iHaveSeenTheEnd = True break logger.debug("state is " + self.curState) if self.curState == 'collecting_globals': if re.search("^\s*$", line): continue match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: self.curState = 'between_facts_reinterpret' logger.debug("found an m: line; reinterpreting in state " + self.curState) continue raise ImportFormatError(type="systemError", info="expected m: or st: but got " + line) if self.curState == 'between_facts': logger.debug("between facts handler") match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: logger.debug("found an m: line") self.curModel = None for m in self.deck.models: logger.debug("looking for model named '" + match.group('modelname') + "' ok") if m.name == match.group('modelname'): self.curModel = m logger.debug("chose model " + self.curModel.name + " with " + str(self.numFields) + " fields, ok") break if not self.curModel: raise ImportFormatError( type="systemError", info="model " + match.group('modelname') + " doesn't exist; choose from " + self.deck.models.fieldModels) continue if line != "": # special case: go back around and reinterpret same line in different context logger.debug("non-empty, reinterpreting...") self.curState = 'in_fact_reinterpret' continue if self.curState == 'in_fact': logger.debug("in_fact handler") # Sub-state - are we in a multiline field? logger.debug("are we in multiline?") if self.endOfMultilineFieldMarker != "": logger.debug("we are in multiline") match = re.search( re.escape(self.endOfMultilineFieldMarker) + "$", line) if match: logger.debug("ending multiline") self.endOfMultilineFieldMarker = "" else: logger.debug("still collecting in multiline") curCard.fields[-1] += line + self.multilineEOL continue logger.debug("not in multiline; perhaps terminating fact?") if line == ".": # Fact terminator logger.debug("we are terminating fact") # Not sure why but ForeignCard takes *both* the contents of .tags # *and* contents of a special member of .fields and concatenates # those to get overall tags. If you don't give the special field # it gets angry. So, provide an empty one. Default mapping will # expect to find this in the last field. curCard.tags += self.sharedTags curCard.fields = [ unicode(nonUTF) for nonUTF in curCard.fields ] # Pad out any remaining fields with empty strings. # Perhaps at a later point we'll support syntax for specifying a field by "^<s>: " # where s is some unambiguous prefix of the field name, at which point we'll need # to specify our own mappings. logger.debug("purely simply collected fields: " + str(curCard.fields)) have = len(curCard.fields) want = len(self.curModel.fieldModels) logger.debug("numFields: %d, have: %d, want: %d" % (self.numFields, have, want)) curCard.fields[have:want] = [ u"" for i in range(have, want) ] # special tags field curCard.fields.append(u"") logger.debug( "full set of fields with trailing null strings and empty tags: " + str(curCard.fields)) self.numFields = len(curCard.fields) # Setting model computes mapping. self.model = self.curModel self.cards.append(curCard) self.curState = 'between_facts' # Fresh card curCard = ForeignCard() # Stop here and return what we have. break else: logger.debug( "not terminating fact; perhaps starting multiline?") # Single-line field, or start of a multiline one. # ` # this is a # multiline string # ` # => # this is a\nmultiline string match = re.search("^(?P<eof>`)(?P<flags>[b]?)$", line) if match: logger.debug("starting `multiline") self.endOfMultilineFieldMarker = match.group('eof') # FIXME: Anki seems to f**k with this if not set to <br/> #if match.group('flags') == "b": #self.multilineEOL = "<br/>" #else: self.multilineEOL = "\n" # Start off with new empty field, added to as we collect constituent lines curCard.fields.append("") continue match = re.search("^t:(?P<facttags> .*)$", line) if match: logger.debug("adding per-fact tags") curCard.tags += match.group('facttags') continue # Single-line. logger.debug("collecting single line into single field") curCard.fields.append(line) continue continue raise ImportFormatError(type="systemError", info="ended up in unhandled state") logger.debug("stick a fork in me: ") logger.debug(self.cards) return self.cards