Python Paper.authors 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: models

클래스/타입: Paper

메소드/함수: authors

hotexamples.com에서의 예제들: 2

Python Paper.authors - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 models.Paper.authors에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Paper(24)

save(10)

select(9)

title(7)

get(4)

create(4)

find(4)

doi(3)

abstract(3)

findAll(3)

set_self_citations(2)

publisher(2)

jeeves_get_private_author(2)

id(2)

get_by_id(2)

findNumber(2)

update(2)

authors(2)

all(2)

doctype(1)

author(1)

publish_origin(1)

publish_time(1)

publish_year(1)

bibcode(1)

put(1)

self_score(1)

pubdate(1)

arxiv_identifier(1)

start_referencing(1)

table_exists(1)

tag(1)

to_json(1)

type(1)

publication(1)

permalink(1)

delete_papers(1)

book_title(1)

create_table(1)

comment(1)

cited_num(1)

first_author(1)

full_clean(1)

cite_num(1)

citation_num(1)

_make(1)

pdf_downloaded(1)

isbn(1)

bibtex(1)

journal(1)

예제 #1

파일 보기

    def write_db(self):

        print "len of entry list " + str(len(self.entry_list))

        for entry in self.entry_list:
            paper = Paper()
            if entry.has_key("id"):
                paper.id = entry["id"]
            if entry.has_key("type"):
                paper.type = entry["type"]
            if entry.has_key("title"):
                paper.title = entry["title"]
            if entry.has_key("author"):
                paper.authors = entry["author"]
            if entry.has_key("year"):
                paper.year = int(entry["year"])
            if entry.has_key("journal"):
                paper.journal = entry["journal"]
            if entry.has_key("booktitle"):
                paper.book_title = entry["booktitle"]
            if entry.has_key("publisher"):
                paper.publisher = entry["publisher"]
            if entry.has_key("institution"):
                paper.institution = entry["institution"]
            if entry.has_key("volume"):
                paper.volume = int(entry["volume"])
            if entry.has_key("number"):
                paper.number = int(entry["number"])
            if entry.has_key("pages"):
                paper.pages = entry["pages"]
            if entry.has_key("url"):
                paper.url = entry["url"]
            if entry.has_key("doi"):
                paper.doi = entry["doi"]
            if entry.has_key("isbn"):
                paper.isbn = entry["isbn"]

            paper.save()

예제 #2

파일 보기

파일: spiders.py 프로젝트: Spico197/CitationPace

def get_references_citations_by_id(profile_id):
    if isinstance(profile_id, dict):
        profile_id = profile_id.get('profile_id')
        if MONGO:
            if data_collection.find({"id": profile_id}).count() > 0:
                # 说明这个数据已经被爬取过了
                return []
    print('func2')
    if not profile_id:
        return -1
    headers = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
        'accept-language': 'zh-CN,zh;q=0.9'
    }
    session = requests.Session()
    while True:
        try:
            response = session.get(
                'https://cn.bing.com/academic/profile?id={}&encoded=0&v=paper_preview&mkt=zh-cn'
                .format(profile_id),
                headers=headers)
            response.raise_for_status()
            response.encoding = 'utf-8'
            break
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            time.sleep(3.0)
            print(e)
    result = re.search(r'IG:"(.*?)"', response.text)
    if result:
        ig = result.group(1)
    result = re.search(
        r'被 引 量</span></span><span class="aca_content"><div>(\d*)</div>',
        response.text)
    if result:
        citation_num = result.group(1)

    html = etree.HTML(response.text)

    paper = Paper(save2mongo=MONGO)
    try:
        paper.title = html.xpath('//li[@class="aca_title"]/text()')[0]
        paper.id = profile_id
        paper.citation_num = citation_num
        result = re.search(
            r'<span class="aca_label">DOI</span></span><span class="aca_content"><div>(.*?)</div>',
            response.text)
        if result:
            paper.doi = result.group(1)
        paper.authors = html.xpath(
            '//div[@class="aca_desc b_snippet"]/span//a/text()')
        paper.abstract = html.xpath(
            '//div[@class="aca_desc b_snippet"]/span[1]//text()')[-1]
        result = re.search(
            r'<span class="aca_label">发表日期</span></span><span class="aca_content"><div>(\d*)</div>',
            response.text)
        if result:
            paper.publish_year = result.group(1)

        base_url = 'https://cn.bing.com/academic/papers?ajax=scroll&infscroll=1&id={id}&encoded=0&v=paper_preview&mkt=zh-cn&first={first}&count={count}&IG={ig}&IID=morepage.{num}&SFX={num}&rt={rt}'

        count = 9
        citation_links = list()
        for i in range(1, int(citation_num) // count):
            ajax_url = base_url.format(id=profile_id,
                                       first=i * (count + 1),
                                       count=count + 1,
                                       ig=ig,
                                       num=i,
                                       rt='2')
            while True:
                try:
                    response = session.get(ajax_url, headers=headers)
                    response.raise_for_status()
                    response.encoding = 'utf-8'
                    break
                except KeyboardInterrupt:
                    raise KeyboardInterrupt
                except Exception as e:
                    time.sleep(3.0)
                    print(e)
            html = etree.HTML(response.text)
            citation_links.extend(html.xpath('//a[@target="_blank"]/@href'))
        print('number of citation_links', len(citation_links), 'citation_num',
              citation_num)
        if len(citation_links) >= 0:
            for i, citation_link in enumerate(citation_links):
                profile_id = get_profile_id(citation_link)
                if profile_id.get('title', False):
                    paper.citations.append(profile_id)
                print('get_profile_id: {}/{}\r'.format(i + 1,
                                                       len(citation_links)),
                      end='')
        print('\nnumber of ids:', len(paper.citations))
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except Exception as e:
        print(e)
    paper.save()
    # for profile_id in paper.citations:
    #     get_references_citations_by_id(profile_id)
    return paper.citations