예제 #1
0
def author_count(obj):
    if isinstance(obj, list):
        author_map = {}
        for filename in obj:
            authors = PdfFileReader(open(filename, 'rb')).getDocumentInfo().author
            author_list = authors.split(", ")
            # print author_list
            for author in author_list:
                if author in author_map:
                    author_map[author] += 1
                else:
                    author_map[author] = 1
        print "author\tcount:"
        for author in author_map:
            print author.encode("utf-8") + '\t'.encode("utf-8") + str(author_map[author]).encode("utf-8")
예제 #2
0
def keyword_count(obj):
    if isinstance(obj, list):
        keyword_map = {}
        not_found = 0
        for filename in obj:
            keywords = PdfFileReader(open(filename, 'rb')).getDocumentInfo().getText("/Keywords")
            if keywords is None:
                not_found += 1
                continue
            keyword_list = keywords.split(", ")
            keyword_list = [keyword.lower() for keyword in keyword_list]
            # print author_list
            for keyword in keyword_list:
                if keyword in keyword_map:
                    keyword_map[keyword] += 1
                else:
                    keyword_map[keyword] = 1
        print "not found keywords", not_found
        print "keyword\tcount:"
        for keyword in keyword_map:
            print keyword.encode("utf-8") + '\t'.encode("utf-8") + str(keyword_map[keyword]).encode("utf-8")