def author_count(obj): if isinstance(obj, list): author_map = {} for filename in obj: authors = PdfFileReader(open(filename, 'rb')).getDocumentInfo().author author_list = authors.split(", ") # print author_list for author in author_list: if author in author_map: author_map[author] += 1 else: author_map[author] = 1 print "author\tcount:" for author in author_map: print author.encode("utf-8") + '\t'.encode("utf-8") + str(author_map[author]).encode("utf-8")
def keyword_count(obj): if isinstance(obj, list): keyword_map = {} not_found = 0 for filename in obj: keywords = PdfFileReader(open(filename, 'rb')).getDocumentInfo().getText("/Keywords") if keywords is None: not_found += 1 continue keyword_list = keywords.split(", ") keyword_list = [keyword.lower() for keyword in keyword_list] # print author_list for keyword in keyword_list: if keyword in keyword_map: keyword_map[keyword] += 1 else: keyword_map[keyword] = 1 print "not found keywords", not_found print "keyword\tcount:" for keyword in keyword_map: print keyword.encode("utf-8") + '\t'.encode("utf-8") + str(keyword_map[keyword]).encode("utf-8")