Python Paper.update Examples

Programming Language: Python

Namespace/Package Name: models

Class/Type: Paper

Method/Function: update

Examples at hotexamples.com: 2

Python Paper.update - 2 examples found. These are the top rated real world Python examples of models.Paper.update extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Paper(24)

save(10)

select(9)

title(7)

get(4)

create(4)

find(4)

doi(3)

abstract(3)

findAll(3)

set_self_citations(2)

publisher(2)

jeeves_get_private_author(2)

id(2)

get_by_id(2)

findNumber(2)

update(2)

authors(2)

all(2)

doctype(1)

author(1)

publish_origin(1)

publish_time(1)

publish_year(1)

bibcode(1)

put(1)

self_score(1)

pubdate(1)

arxiv_identifier(1)

start_referencing(1)

table_exists(1)

tag(1)

to_json(1)

type(1)

publication(1)

permalink(1)

delete_papers(1)

book_title(1)

create_table(1)

comment(1)

cited_num(1)

first_author(1)

full_clean(1)

cite_num(1)

citation_num(1)

_make(1)

pdf_downloaded(1)

isbn(1)

bibtex(1)

journal(1)

Example #1

Show file

def download_extract(paper, extract_figure=False, extract_table=False):
    if paper.pages >= 0 and paper.table >= 0:
        return False
    paper_info = {
        'pdf_url': paper.url,
        'title': paper.title,
    }
    api_paper = arxiv.query(id_list=[paper.arvixID])[0]
    if 'pdf_url' not in api_paper:
        return False
    pdf_url = api_paper['pdf_url']
    # pdf_url = 'https://arxiv.org/pdf/' + paper.url.split('/')[-1] +'.pdf'
    file_path = os.path.join(store_path, paper.paperId + '.pdf')
    # if not os.path.isfile(file_path):
    urllib.request.urlretrieve(pdf_url, file_path)

    if extract_table:
        df = wrapper.read_pdf(file_path, multiple_tables=True, pages='all')
        table_count = len(df)
        del df

    if extract_figure:
        figure_count, page_count = get_figure_count(file_path)
        modified = False
        if paper.pages == -1:
            modified = True
            paper.pages = page_count
        else:
            page_count = paper.pages
        if paper.table == -1:
            modified = True
            paper.table = table_count
        if os.path.exists(file_path):
            os.remove(file_path)
        if modified:
            Paper.update(table=table_count, pages=page_count).where(
                Paper.arvixID == paper.arvixID).execute()
            # paper.save()
            return modified
    # api_paper = arxiv.query(id_list=[paper.arvixID])[0]
    # if 'pdf_url' not in api_paper:
    #     return False
    # pdf_url = api_paper['pdf_url']
    texts = extract_text(file_path, pdf_url)
    if texts is None:
        print("PDF either do not exists or failed : ", paper.url)
        return False
    affiliation = []
    for text in texts.split():
        if re.match("[^@]+@[^@]+\.[^@]+", text):
            domain_name = text.split('@')[-1]
            affiliation.append(domain_name)
    if len(affiliation) > 0:
        Paper.update(affiliation=affiliation).where(
            Paper.arvixID == paper.arvixID).execute()

    return False

Example #2

Show file

def crawl_category(term='cs.LG'):
    index_iteration = 500
    logging.info("Crawling category : %s", term)
    for index in range(start_index, end_index, index_iteration):
        logging.info("\nBatch : %d-%d" % (index, index + index_iteration))
        articles = arxivpy.query(search_query=[term],
                                 start_index=index,
                                 max_index=index + index_iteration,
                                 results_per_iteration=index_iteration,
                                 wait_time=0.2,
                                 sort_by='lastUpdatedDate')
        article_batch_count = len(articles)
        if article_batch_count == 0:
            logging.warning('Article not found in batch %d - %d' %
                            (index, index + index_iteration))
        for idx, article in tqdm(enumerate(articles),
                                 total=article_batch_count):
            arvixID = article['id'].split('v')[0]
            query = Paper.select().where(Paper.arvixID == arvixID)
            if query.exists():
                paper = Paper.get(Paper.arvixID == arvixID)
                categories = paper.category
                if term not in categories:
                    categories.append(term)
                Paper.update(category=categories).where(
                    Paper.arvixID == arvixID).execute()
                continue
            success, article_meta = get_arvixpaper_semantic_scholar(arvixID)
            if success is False:
                logging.debug(
                    "Paper not exists in semantic scholar, arvixID : %s" %
                    arvixID)
                continue
            authorIDList = [
                int(author['authorId'])
                if author['authorId'] is not None else -1
                for author in article_meta['authors']
            ]
            authorNames = [article['main_author']]
            authorCount = len(article_meta['authors'])
            if authorCount > 1:
                other_author = [
                    name.strip() for name in article['authors'].split(',')
                    if len(name) > 1 and name != article['main_author']
                ]
                authorNames += other_author
            paper_category = [article['term']]
            if article['term'] != term:
                paper_category.append(term)
            try:
                paper = Paper.create(
                    indexID=idx + index,
                    arvixID=arvixID,
                    paperId=article_meta['paperId'],
                    doiID=str(article_meta['doi']),
                    title=article['title'],
                    summary=article['abstract'],
                    category=paper_category,
                    comments=article['comment'],
                    journal_ref=article['journal_ref'],
                    url=article['url'],
                    authorID=authorIDList,
                    authorName=authorNames,
                    authorCount=authorCount,
                    publishedDate=article['publish_date'],
                    citationVelocity=article_meta['citationVelocity'],
                    referencesCount=len(article_meta['references']),
                    topics=article_meta['topics'],
                    venue=str(article_meta['venue']),
                    year=article_meta['year'],
                    influentialCitationCount=article_meta[
                        'influentialCitationCount'],
                    citationCount=len(article_meta['citations']),
                    citations=article_meta['citations'],
                )
                try:
                    for meta in ['page', 'figure', 'table']:
                        if meta in article['comment']:
                            comment = article['comment'].replace(';', ',')
                            for segment in comment.split(','):
                                if meta in segment:
                                    page_prefix = segment.split(meta)[0]
                                    if meta == 'page':
                                        paper.pages = int(page_prefix.strip())
                                    elif meta == 'figure':
                                        paper.figures = int(
                                            page_prefix.strip())
                                    elif meta == 'table':
                                        paper.table = int(page_prefix.strip())
                                    break
                except:
                    logging.debug("Error in parsing meta data")
                paper.save()
            except BaseException as e:
                logging.warning("Error in arvix id %s, error: %s" %
                                (arvixID, str(e)))
            time.sleep(0.3)