コード例 #1
0
ファイル: read.py プロジェクト: zhenchentl/MentorRec
class Docs():
    """docstring for Docs"""
    def __init__(self):
        logging.info('conduct docs firstly')
        self.redis = RedisHelper()

    def conductDocs(self):
        fileWtriter = file(PATH_DOC_AUTHOR, 'w')
        authorList = self.redis.getAuthorList()
        authorDoc = dict() # year-->docs. the docs of an author in every year
        index = 0
        for author in authorList:
            authorDoc = {}
            papers = self.redis.getAuthorPapers(author)
            for paper in papers:
                year = self.redis.getPaperYear(paper)
                if int(year) <= TEST_DATA_YEAR: # we only use the data in ten years
                    content = self.redis.getPaperAbstract(paper)
                    if len(content) < 3: # if there is no abstract,return title
                        content = self.redis.getPaperTitle(paper)
                    doc = authorDoc.setdefault(year, "")
                    authorDoc[year] = doc + content
            for year, doc in authorDoc.items():
                if index % 10000 == 0: print index
                fileWtriter.write(doc + '\n')
                self.redis.addDocAuthorYear(index, author, year)
                index += 1
        fileWtriter.close()