Python Paper.get Examples

Programming Language: Python

Namespace/Package Name: models

Class/Type: Paper

Method/Function: get

Examples at hotexamples.com: 4

Python Paper.get - 4 examples found. These are the top rated real world Python examples of models.Paper.get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Paper(24)

save(10)

select(9)

title(7)

get(4)

create(4)

find(4)

doi(3)

abstract(3)

findAll(3)

set_self_citations(2)

publisher(2)

jeeves_get_private_author(2)

id(2)

get_by_id(2)

findNumber(2)

update(2)

authors(2)

all(2)

doctype(1)

author(1)

publish_origin(1)

publish_time(1)

publish_year(1)

bibcode(1)

put(1)

self_score(1)

pubdate(1)

arxiv_identifier(1)

start_referencing(1)

table_exists(1)

tag(1)

to_json(1)

type(1)

publication(1)

permalink(1)

delete_papers(1)

book_title(1)

create_table(1)

comment(1)

cited_num(1)

first_author(1)

full_clean(1)

cite_num(1)

citation_num(1)

_make(1)

pdf_downloaded(1)

isbn(1)

bibtex(1)

journal(1)

Example #1

Show file

def start_get_paper_thread(id, author_id):
    with db_session:
        p = Paper.get(paper_id=id)
        if p:
            logger.debug('paper has existed, paper_id=%s', id)
            return
    executor.submit(get_paper, id, author_id)

Example #2

Show file

File: db.py Project: GivenZeng/sospider

def upsert_paper(info, author_id):
    try:
        with db_session:
            publisher_id = info['publisher_id']
            if publisher_id:
                publisher = Publisher.get(publisher_id=publisher_id)
                if publisher:
                    pass
                else:
                    publisher = Publisher(publisher_id=publisher_id)
                    publisher.name = info['publishername']
    except:
        pass
    with db_session:
        p = Paper.get(paper_id=info['paper_id'])
        if p:
            logger.debug('paper has existed, paper_id=%s', info['paper_id'])
            return
        paper = Paper(paper_id=info['paper_id'])
        paper.title = info['title']
        paper.abstract = info['abstract']
        paper.cite_num = info['cite_num']
        paper.cited_num = info['cited_num']

        publisher_id = info['publisher_id']
        if publisher_id:
            publisher = Publisher.get(publisher_id=publisher_id)
            if publisher:
                paper.publisher = publisher

        if author_id is None:
            return
        a = Author.get(author_id=author_id)
        if a:
            paper.authors.add(a)
        else:
            a_info = api.get_author(author_id)
            author = Author(author_id=a_info['author_id'])
            author.name = a_info['name']
            author.image_url = a_info['image_url']
            author.organization = a_info['organization']
            author.home_page = a_info['home_page']
            author.paper_count = a_info['paper_count']
            author.citied_count = a_info['cited_count']
            paper.authors.add(author)

Example #3

Show file

File: main.py Project: Findus23/PaperLibrary-old

def add(search_query, author, title):
    fl = [
        'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title',
        'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype",
        "identifier", "links_data"
    ]
    if author:
        search_query += "author:" + author
    if title:
        search_query += "title:" + title
    papers = list(ads.SearchQuery(q=search_query, fl=fl))
    if len(papers) == 0:
        selection = ads.search.Article
        exit()
    elif len(papers) == 1:
        selection = papers[0]  # type:ads.search.Article
    else:
        # first_ten = itertools.islice(papers, 10)
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
            print(index, single_paper.title[0], single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article

    assert len(selection.doi) == 1
    doi = selection.doi[0]

    try:

        paper = Paper.get(Paper.doi == doi)
        print("this paper has already been added")
        exit(1)

    except peewee.DoesNotExist:
        pass

    print("fetching bibcode")
    q = ads.ExportQuery([selection.bibcode])
    bibtex = q.execute()

    print("saving in db")

    paper = Paper()
    assert len(selection.title) == 1
    paper.doi = doi
    paper.title = selection.title[0]
    paper.abstract = selection.abstract
    paper.bibcode = selection.bibcode
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [
        ident for ident in selection.identifier if "arXiv:" in ident
    ][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [
        Keyword.get_or_create(keyword=keyword)[0]
        for keyword in selection.keyword
    ]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
    print(arxiv_url)
    with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
        chunk_size = 1024  # bytes
        file_size = int(r.headers.get('content-length', 0))
        progress_length = math.ceil(file_size // chunk_size)
        with click.progressbar(r.iter_content(chunk_size=20),
                               length=progress_length) as progress_chunks:
            for chunk in progress_chunks:
                f.write(chunk)
    paper.pdf_downloaded = True
    paper.save()

Example #4

Show file

def crawl_category(term='cs.LG'):
    index_iteration = 500
    logging.info("Crawling category : %s", term)
    for index in range(start_index, end_index, index_iteration):
        logging.info("\nBatch : %d-%d" % (index, index + index_iteration))
        articles = arxivpy.query(search_query=[term],
                                 start_index=index,
                                 max_index=index + index_iteration,
                                 results_per_iteration=index_iteration,
                                 wait_time=0.2,
                                 sort_by='lastUpdatedDate')
        article_batch_count = len(articles)
        if article_batch_count == 0:
            logging.warning('Article not found in batch %d - %d' %
                            (index, index + index_iteration))
        for idx, article in tqdm(enumerate(articles),
                                 total=article_batch_count):
            arvixID = article['id'].split('v')[0]
            query = Paper.select().where(Paper.arvixID == arvixID)
            if query.exists():
                paper = Paper.get(Paper.arvixID == arvixID)
                categories = paper.category
                if term not in categories:
                    categories.append(term)
                Paper.update(category=categories).where(
                    Paper.arvixID == arvixID).execute()
                continue
            success, article_meta = get_arvixpaper_semantic_scholar(arvixID)
            if success is False:
                logging.debug(
                    "Paper not exists in semantic scholar, arvixID : %s" %
                    arvixID)
                continue
            authorIDList = [
                int(author['authorId'])
                if author['authorId'] is not None else -1
                for author in article_meta['authors']
            ]
            authorNames = [article['main_author']]
            authorCount = len(article_meta['authors'])
            if authorCount > 1:
                other_author = [
                    name.strip() for name in article['authors'].split(',')
                    if len(name) > 1 and name != article['main_author']
                ]
                authorNames += other_author
            paper_category = [article['term']]
            if article['term'] != term:
                paper_category.append(term)
            try:
                paper = Paper.create(
                    indexID=idx + index,
                    arvixID=arvixID,
                    paperId=article_meta['paperId'],
                    doiID=str(article_meta['doi']),
                    title=article['title'],
                    summary=article['abstract'],
                    category=paper_category,
                    comments=article['comment'],
                    journal_ref=article['journal_ref'],
                    url=article['url'],
                    authorID=authorIDList,
                    authorName=authorNames,
                    authorCount=authorCount,
                    publishedDate=article['publish_date'],
                    citationVelocity=article_meta['citationVelocity'],
                    referencesCount=len(article_meta['references']),
                    topics=article_meta['topics'],
                    venue=str(article_meta['venue']),
                    year=article_meta['year'],
                    influentialCitationCount=article_meta[
                        'influentialCitationCount'],
                    citationCount=len(article_meta['citations']),
                    citations=article_meta['citations'],
                )
                try:
                    for meta in ['page', 'figure', 'table']:
                        if meta in article['comment']:
                            comment = article['comment'].replace(';', ',')
                            for segment in comment.split(','):
                                if meta in segment:
                                    page_prefix = segment.split(meta)[0]
                                    if meta == 'page':
                                        paper.pages = int(page_prefix.strip())
                                    elif meta == 'figure':
                                        paper.figures = int(
                                            page_prefix.strip())
                                    elif meta == 'table':
                                        paper.table = int(page_prefix.strip())
                                    break
                except:
                    logging.debug("Error in parsing meta data")
                paper.save()
            except BaseException as e:
                logging.warning("Error in arvix id %s, error: %s" %
                                (arvixID, str(e)))
            time.sleep(0.3)