def build(self, paths): self.paths = paths debug("\n\nMaking cards from paths!\n========================") self.debugPaths() for i in range(len(self.paths)): try: self.indexesToIgnore.index(i) # print 'ignoring index ', i, self.paths[i] except: self.makeCardFromPath(self.paths[i]) # adds the '(#)'to the end of cards if needed for c in self.cards: c.finalizeFront() return self.cards
def makeCardFromSubset(self, indexes, backIndex): debug("makeCardFromSubset()") debug(indexes) debug("backIndex: %d" % backIndex) uniqueBacks = [] for i in indexes: maybeBack = self.paths[i][backIndex] try: uniqueBacks.index(maybeBack) except: uniqueBacks.append(maybeBack) num = len(uniqueBacks) back = constants.htmlBr.join(uniqueBacks) back = re.sub(r"{{.+?}}", "", back) # print 'back// ', back # any of the paths should do, since they should all start the same frontPath = self.paths[indexes[0]][:backIndex] front = self.makeFront(frontPath) if num > 1: front += " (%d)" % num # print 'front// ', front self.cards.append(RawCard(front, back, 1))
def checkForForce(self, path): nodeIndex = 0 for node in path: if node.find("{{force}}") > -1: debug("Force found in %d of path:" % nodeIndex) debug(path) matchingIndexes = self.getMatchingSubset(path, nodeIndex) # if the '*' is used inappropriately, it is possible for matchingIndexes # to return an empty list. check for it if len(matchingIndexes) > 0: self.removeForce(matchingIndexes, nodeIndex) self.makeCardFromSubset(matchingIndexes, nodeIndex + 1) self.addIgnores(matchingIndexes, nodeIndex + 2) # check to see if the current card was added to the ignore list # if it was, do not continue with card creation currentIndex = self.paths.index(path) # this should never fail try: self.indexesToIgnore.index(currentIndex) return False # do not continue except: return True # not ignoring the index, so make the card nodeIndex += 1 return True
def parse(self, p): self.reset() paragraphStyle = self.getStyle(p) debug("\nParsing paragraph with style: %s" % paragraphStyle.name) debug(paragraphStyle.prettyPrint()) for e in p.iter(): debug("Parsing element with text: %s" % e.text) if e.text != None: if e.tag == self.names['span']: spanStyle = self.getStyle(e) debug("Span found: %s" % spanStyle.name) debug(spanStyle.prettyPrint()) style = spanStyle.inherit(paragraphStyle) debug("inherited %s" % style.prettyPrint()) isEqual = style.equals(paragraphStyle) debug("paragraph equal span style? %s" % isEqual) if isEqual: self.add(e.text, True) else: openSpan = self.getOpenSpan(style) # there is no paragraph style, but there is a span style if paragraphStyle.isDefault(): if openSpan: self.addSpan(openSpan, e.text) else: self.add(e.text, True) else: outerSpan = self.getOpenSpan(paragraphStyle) debug("This should be the paragraph span: %s" % outerSpan) self.addInnerSpan(openSpan, outerSpan, e.text) else: self.add(e.text, True) if e.tag == self.names['line-break']: debug('*the line break*') self.add(constants.htmlBr) # google docs uses this heavily if e.tag == self.names['s']: self.add(' ', True) # not sure if this is a good idea if e.tag == self.names['tab']: self.add(' ') if e.tag == self.names['frame']: debug("text mode @ img? %s" % self.isTextMode) if self.isTextMode: self.add("[img_removed]", True) else: self.add(self.getImageHtml(e)) if e.tail != None: debug("*the tail %s" % e.tail) self.add(e.tail, True) debug(self.frags) prefix = self.getSpan(p) if prefix: self.addSpan(prefix) self.makePlainText() self.strip() self.addCodes() self.finalize()
def debugPaths(self): index = 0 for path in self.paths: debug(index) debug(path) index += 1