コード例 #1
0
jours = dict([(journal['中文名称'],journal.get('复合影响因子')) for journal in journals])
jours_set = jours.keys()

STEP_ONE = False
STEP_TWO = True

# 2. 进行CNKI网站操作
if STEP_ONE:
    cnki_obj = Cnki()
    #cnki_obj = Cnki(PROXY_LIST[random.randint(0,len(PROXY_LIST)-1)])
    cnki_obj.set_query(QUERY_STRING)
    cnki_obj.set_period(start_period=START_PERIOD,end_period=END_PERIOD)
    cnki_obj.set_subject(subjects=SUBJECTS)
    cnki_obj.submit()
    cnki_obj.sort()
    cnki_obj.select_all_literature()
    cnki_obj.child_operation()
    cnki_obj.get_more()

    cnki_obj.export_to_json(file=LITERATURE_JSON_FILE)
    cnki_obj.close()

if STEP_TWO:
    literatures = json.load(open(LITERATURE_JSON_FILE))
    for liter in literatures:
        paper_dict = literatures[liter]
        paper_dict['title'] = liter
        print(paper_dict)
        if paper_dict['journal'] in jours_set:
            paper_dict['rate'] = ''.join([str(jours[paper_dict['journal']]),'-',paper_dict['ISBN/ISSN']])
        else:
コード例 #2
0
class CnkiLiterature:
    """ CnkiLiterature类用来自动化cnki文献搜索

    """
    def __init__(self,proxy=None):
        self.__jounal_db = ChinaJournalDatabase()
        self.__literatrues = None
        if proxy is None:
            self.cnki_obj = Cnki()
        else:
            self.cnki_obj = Cnki(proxy=proxy)

    def query(self,query_str=None,start_period=None,end_period=None,sort_by='被引',
              limit=4,subjects=['经济与管理科学']):
        """ 查询

        :param str query_str: 查询字符串
        :param str start_period: 起始年份
        :param str end_period: 终止年份
        :param str sort_by: 排序
        :param int limit: 搜索限制数
        :param list subjects: 学科
        :return: 无返回值
        """
        # 设定查询字符串
        self.cnki_obj.set_query(query_str)
        # 设定起始日期
        self.cnki_obj.set_period(start_period=start_period,end_period=end_period)
        # 设定领域
        self.cnki_obj.set_subject(subjects=subjects)
        # 确定搜索
        self.cnki_obj.submit()
        # 排序
        self.cnki_obj.sort(by=sort_by)
        # 选择所有的文献
        self.cnki_obj.select_all_literature()
        # 后续操作
        self.cnki_obj.child_operation()
        # 更多的文献
        self.cnki_obj.get_more(limit=limit)

        self.__literatrues = self.cnki_obj.export_to_dict()

    def sort_by(self,by=None):
        """ 对查询结果排序

        :param str by: 排序根据
        :return: 无返回值
        """
        for item in self.__literatrues:
            paper = self.__literatrues[item]
            paper['title'] = item
            journal = self.__jounal_db.getByName(journal_name=paper['journal'],auto=True)
            if len(journal) < 1:
                paper['rate'] = '---'.join(['0',paper['title']])
            else:
                paper['rate'] = '---'.join([str(journal[0][by]),paper['title']])

        tmp_result = dict([(self.__literatrues[item]['rate'],self.__literatrues[item])
                          for item in self.__literatrues])
        for key in tmp_result:
            del tmp_result[key]['rate']
        self.__literatrues = [tmp_result[item] for item in sorted(tmp_result,reverse=True)]

    def export_to_pdf(self,out_file,title='',abstract=''):
        """ 输出文献综述到pdf文件

        :param out_file: 文档名称
        :param title: 文档标题
        :param abstract: 文档摘要
        :return: 无返回值
        """
        replace_word = {'articleTitle':title,'arcticleabstract':abstract}
        doc = Article(r'E:\gitrobot\files\latex_template\article_template_01.tex',replace_word)

        for item in self.__literatrues:
            doc.document.add_section(item['title'],3)
            doc.document.add_list(['---'.join([item['journal'],item['year']])],type=1)
            doc.document.append(item['abstract'].encode().decode())

        doc.document.generate_tex(out_file)
        doc.document.generate_pdf(out_file)

    @property
    def literatrues(self):
        """ 文献

        :return: 文献
        """
        return self.__literatrues

    def close(self):
        """ 关闭浏览器

        :return: 无返回值
        """
        self.cnki_obj.close()