def data(verbose=True): """ Get a list of skid pdfs which have authors annotated. """ for filename in iterview(CACHE.glob('*.pdf')): d = Document(filename) meta = d.parse_notes() if meta['author']: if verbose: ff = ' file://' + filename print print red % ('#' + '_' * len(ff)) print red % ('#' + ff) print print('%s: %s' % (yellow % 'meta', meta['title'])).encode('utf8') print('%s: %s' % (yellow % 'meta', ' ; '.join( meta['author']))).encode('utf8') print try: yield (meta, d, pdfminer(filename)) except Exception: # XXX: silently skips examples which cause pdfminer to throw an # exception. pass
def data(): ix = defaultdict(list) docs = [] # documents with authors annotated for filename in CACHE.glob('*.pdf'): d = Document(filename) d.meta = d.parse_notes() authors = d.meta['author'] if authors: docs.append(d) for x in authors: ix[simplify(x)].append(d) return ix, docs
def data(verbose=True): """ Get a list of skid pdfs which have authors annotated. """ for filename in iterview(CACHE.glob('*.pdf')): d = Document(filename) meta = d.parse_notes() if meta['author']: if verbose: ff = ' file://' + filename print print red % ('#' + '_' *len(ff)) print red % ('#' + ff) print print ('%s: %s' % (yellow % 'meta', meta['title'])).encode('utf8') print ('%s: %s' % (yellow % 'meta', ' ; '.join(meta['author']))).encode('utf8') print yield (meta, d, pdfminer(filename))
def data(verbose=True): """ Get a list of skid pdfs which have authors annotated. """ for filename in iterview(CACHE.glob('*.pdf')): d = Document(filename) meta = d.parse_notes() if meta['author']: if verbose: ff = ' file://' + filename print() print(colors.red % ('#' + '_' *len(ff))) print(colors.red % ('#' + ff)) print() print(('%s: %s' % (colors.yellow % 'meta', meta['title'])).encode('utf8')) print(('%s: %s' % (colors.yellow % 'meta', ' ; '.join(meta['author']))).encode('utf8')) print() try: yield (meta, d, pdfminer(filename)) except Exception: # XXX: silently skips examples which cause pdfminer to throw an # exception. pass