def setUp(self):
        # set up the test DB
        self.db = tested_db
        self.db.create_all()
        self.db.session.add(Paper(id=1, title="Software", content="Computer project", student_id=1))
        self.db.session.add(Paper(id=2, title="Art", content="Art project", student_id=2))
        self.db.session.commit()

        self.app = tested_app.test_client()
def handle_paper(l, out_dir, http, year, no_pdf):
    l = re.sub("<var>([^\<]+)</var>", "\1", l)
    match = re.search('<li><a href="([^\"]+)">([^<]+)</a> (.*)</li>', l)

    if match is None:
        print("Bad line",l)
        return None

    authors_strs = match.group(3)[1:-1].split(">, <")
    title = match.group(2)

    authors = []
    for author_str in authors_strs:
        match2 = re.search('a [^>]+>([^<]+)</a', author_str)
        if match2 is None:
            print("No authors found for", title)
        else:
            authors.append(match2.group(1))

    data = handle_url2(match.group(1), out_dir, http, year, no_pdf)

    if data is None:
        return None

    [reviews, id, publication_type, abstract] = data

    conference = "NIPS"+str(year)
    p = Paper(title, abstract, id, reviews, authors, conference, True, None, publication_type)

    p.to_json(out_dir+"/reviews"+str(id)+".json")

    return True
Exemple #3
0
 def test_author(self):
     with db_session:
         a = Author(author_id='3333')
         a.name = 'given'
         p = Paper(paper_id='qqqq')
         p.title = 'titttttttle'
         a.papers.add(p)
Exemple #4
0
 def __init__(self, file, test_tsv=None):
     self.file = file
     self.uni_rank = ReadPickle('uni_rank.pickle')
     self.sjr = ReadPickle('journal_dictionary.pkl')
     self.document = test_tsv
     self.paper = Paper()
     with open(file, 'rb') as tei:
         self.soup = BeautifulSoup(tei, features="lxml")
Exemple #5
0
def api_create_paper(request, *, name, brief, tag):
    check_admin(request)
    if not name or not name.strip():
        raise APIValueError('name', 'name cannot be empty.')
    if not tag or not tag.strip():
        raise APIValueError('tag', 'tag cannot be empty.')
    paper = Paper(name=name.strip(), brief=brief.strip(), tag=tag.strip(), munber=0, total=0)
    yield from paper.save()
    return paper
Exemple #6
0
def createTestPaper():
    for i in range(3):
        paper = Paper(name="paper" + str(i),
                      description="paper" + str(i) + "...",
                      state='A')
        paper.save()
        userpaper = UserPaper(user=testUser,
                              paper=paper,
                              finish_state="finished")
        userpaper.save()
    for i in range(4, 7):
        paper = Paper(name="paper" + str(i),
                      description="paper" + str(i) + "...",
                      state='A')
        paper.save()
        userpaper = UserPaper(user=testUser,
                              paper=paper,
                              finish_state="unfinished")
        userpaper.save()
Exemple #7
0
def AbstractPool(request, review_name_slug):
    review = Review.objects.get(user=request.user, slug=review_name_slug)
    if request.method == "POST":
        if request.POST.get('results') == None:
            q = request.POST.get('queryField')
            s = request.POST.get('sortType')
            n = request.POST.get('noResults')
            abstractList = search.main(q, s, n)
            for document in abstractList:
                documentURL = document.get("url")
                if Paper.objects.filter(paper_url=documentURL,
                                        review=review).exists():
                    abstractList.remove(document)
        else:
            abstractList = eval(request.POST.get('results'))
            q = request.POST.get('queryField')
        relevant = "Unchecked"
        if request.POST.get("relevanceField") == "relevant":
            relevant = "Relevant"
        else:
            if request.POST.get("relevanceField") == "irrelevant":
                relevant = "Not Relevant"
        if relevant != "Unchecked":
            print "traceA"
            compareCount_value = int(request.POST.get("hiddenCompareCount"))
            for s in abstractList:
                if s.get('compareCount') == compareCount_value:
                    currentDoc = s
                    paper = Paper(review=review,
                                  title=currentDoc["title"],
                                  paper_url=currentDoc["url"],
                                  full_text=currentDoc['fullText'],
                                  abstract=currentDoc["abstract"],
                                  authors=currentDoc["author"],
                                  abstract_relevance=relevant)
                    paper.save()
            if len(abstractList) > 1:
                for abstract in abstractList:
                    if int(abstract.get(
                            'compareCount')) > compareCount_value - 1:
                        abstract['compareCount'] -= 1
                del abstractList[compareCount_value - 1]
            else:
                abstractList = []
                #for abstract in abstractList:
                #if int(abstract.get('compareCount')) > compareCount_value:
                #abstract['compareCount'] -= 1
                #del abstractList[compareCount_value]
        return render(
            request, 'ultimatereview/AbstractPool.html', {
                "Abstracts": abstractList,
                'query': q,
                'review': review.title,
                'slug': review_name_slug
            })
Exemple #8
0
def put_paper():
    # get the title first, if no title then fail
    title = request.form.get("title")
    content = request.form.get("content")
    if not title:
        return make_response(jsonify({"code": 403,
                                      "msg": "Cannot put student. Missing mandatory fields."}), 403)
    paper_id = request.form.get("id")
    if not paper_id:
        p = Paper(title=title, content=content)
    else:
        p = Paper(id=paper_id, title=title, content=content, student_id=paper_id)

    db.session.add(p)
    try:
        db.session.commit()
    except sqlalchemy.exc.SQLAlchemyError as e:
        error = "Cannot put student. "
        print(app.config.get("DEBUG"))
        if app.config.get("DEBUG"):
            error += str(e)
        return make_response(jsonify({"code": 404, "msg": error}), 404)
    return jsonify({"code": 200, "msg": "success"})
Exemple #9
0
def upsert_paper(info, author_id):
    try:
        with db_session:
            publisher_id = info['publisher_id']
            if publisher_id:
                publisher = Publisher.get(publisher_id=publisher_id)
                if publisher:
                    pass
                else:
                    publisher = Publisher(publisher_id=publisher_id)
                    publisher.name = info['publishername']
    except:
        pass
    with db_session:
        p = Paper.get(paper_id=info['paper_id'])
        if p:
            logger.debug('paper has existed, paper_id=%s', info['paper_id'])
            return
        paper = Paper(paper_id=info['paper_id'])
        paper.title = info['title']
        paper.abstract = info['abstract']
        paper.cite_num = info['cite_num']
        paper.cited_num = info['cited_num']

        publisher_id = info['publisher_id']
        if publisher_id:
            publisher = Publisher.get(publisher_id=publisher_id)
            if publisher:
                paper.publisher = publisher

        if author_id is None:
            return
        a = Author.get(author_id=author_id)
        if a:
            paper.authors.add(a)
        else:
            a_info = api.get_author(author_id)
            author = Author(author_id=a_info['author_id'])
            author.name = a_info['name']
            author.image_url = a_info['image_url']
            author.organization = a_info['organization']
            author.home_page = a_info['home_page']
            author.paper_count = a_info['paper_count']
            author.citied_count = a_info['cited_count']
            paper.authors.add(author)
Exemple #10
0
    def post(self, user_id, paper_id=None, create=None):
        if user_id == 0:
            user_id = g.user
        assert_permission([MASTER, GRANDMASTER], user_id=user_id)

        if create:
            from async_tasks.fetch_paper import fetch
            worker = fetch.delay(paper_id)
            paper_data = worker.get(10)[0]
            if Paper.query.filter_by(
                    id=Paper.url_to_id(paper_data['id'])).first() is None:
                paper = Paper(**paper_data)
                db.session.add(paper)
                db.session.commit()
                paper_id = paper.id
        has_paper = User.get_by_primary_key(user_id).add_paper(paper_id)
        if has_paper:
            return has_paper
        return error('Subscribe failed.'), 400
Exemple #11
0
def ingest_csv(csv_file_name, index_name):
    with open(csv_file_name, "r") as csv_file:
        reader = csv.reader(csv_file)
        headers = next(reader)
        # Normalize header titles to become valid attribute names
        headers = [(re.sub(r'\W+', '',
                           h.strip().replace(' ', '_'))).lower()
                   for h in headers]
        for row in reader:
            paper = Paper(meta={'id': row[0], 'index': index_name})
            for ind, header in enumerate(headers):
                setattr(paper, header, row[ind])
            try:
                paper.save(refresh=True)
                paper._index.refresh()
            except ValidationException as e:
                # There are a few blank publish_time values. Didn't find time to make optional
                print(f"Unable to save record with id {row[0]}")
                print(e)
Exemple #12
0
    def save(self, user):
        d = self.cleaned_data
        
        authors = [user]
        if 'coauthor1' in d:
            authors.append(d['coauthor1'])
        if 'coauthor2' in d:
            authors.append(d['coauthor2'])
        if 'coauthor3' in d:
            authors.append(d['coauthor3'])

        paper = Paper()
        paper.save()

        paper.authors.add(user)
        for coauthor in d['coauthors']:
            paper.authors.add(coauthor)
        paper.save()

        d['contents'].name = '%030x' % random.randrange(16**30) + ".pdf"

        paper_version = PaperVersion(
            paper = paper,
            title = d['title'],
            abstract = d['abstract'],
            contents = d['contents'],
        )
        paper_version.save()

        # need to save paper twice since paper and paper_version point to each other...
        paper.latest_version = paper_version
        paper.save()

        for conflict_username in d['conflicts']:
            ra = ReviewAssignment()
            ra.user = User.objects.get(username=conflict_username)
            ra.paper = paper
            ra.type = 'conflict'
            ra.save()

        return paper
Exemple #13
0
    def write_db(self):

        print "len of entry list " + str(len(self.entry_list))

        for entry in self.entry_list:
            paper = Paper()
            if entry.has_key("id"):
                paper.id = entry["id"]
            if entry.has_key("type"):
                paper.type = entry["type"]
            if entry.has_key("title"):
                paper.title = entry["title"]
            if entry.has_key("author"):
                paper.authors = entry["author"]
            if entry.has_key("year"):
                paper.year = int(entry["year"])
            if entry.has_key("journal"):
                paper.journal = entry["journal"]
            if entry.has_key("booktitle"):
                paper.book_title = entry["booktitle"]
            if entry.has_key("publisher"):
                paper.publisher = entry["publisher"]
            if entry.has_key("institution"):
                paper.institution = entry["institution"]
            if entry.has_key("volume"):
                paper.volume = int(entry["volume"])
            if entry.has_key("number"):
                paper.number = int(entry["number"])
            if entry.has_key("pages"):
                paper.pages = entry["pages"]
            if entry.has_key("url"):
                paper.url = entry["url"]
            if entry.has_key("doi"):
                paper.doi = entry["doi"]
            if entry.has_key("isbn"):
                paper.isbn = entry["isbn"]

            paper.save()
Exemple #14
0
def upload(request):
    if request.method == 'POST':
        t = []
        #form = UploaderForm(request.POST,request.FILES)
        #if form.is_valid():
        paper_title = request.FILES['ups'].name
        uploaded = request.FILES['ups']
        tags = request.POST['tags']
        t = tags.split(',')
        print t
        p = []

        for i in range(len(t) - 1):
            l = Topic.objects.get(subject=t[i].encode('ascii', 'replace'))
            p.append(l.id)
        #print uploaded
        print p
        #print uploaded.size
        username = get_user_fromcookie(request)
        data = {'title': paper_title, 'paper_file': uploaded}
        form = UploadForm(data)
        obj = Paper(title=paper_title, paper_file=uploaded, user=username)
        obj.save()
        pap = Paper.objects.get(title=paper_title)
        for i in p:
            pap.tags.add(int(i))
        convert(paper_title)
        #form.save()
        '''
        if form.is_valid():
            form.save()
        else:
            print form.errors
        '''
        #return HttpResponse("errros")
        return render(request, 'thanks.html',
                      {'message': 'Research Paper Uploaded'})
Exemple #15
0
# echo true goes on engine
engine = create_engine('sqlite:///my_papers.db')
Session = sessionmaker(bind=engine)
session = Session()

Base.metadata.create_all(engine)

jchem_phys = Journal(name='J. Chem. Phys', publisher='AIP')
jcim = Journal(name='J. Chem. Inf. Model', publisher='ACS')

# Add a paper to the DB
molssi_paper = Paper(
    DOI='10.1063/1.5052551',
    paper_title=
    'Perspective: Computational chemistry software and its advancement as illustrated through three grand challenge cases for molecular science',
    journal=jchem_phys,
    publication_year=2018,
    authors=
    'Anna Krylov, Theresa L. Windus, Taylor Barnes, Eliseo Marin-Rimoldi, Jessica A. Nash, Benjamin Pritchard, Daniel G.A. Smith, Doaa Altarawy, Paul Saxe, Cecilia Clementi, T. Daniel Crawford, Robert J. Harrison, Shantenu Jha, Vijay S. Pande, Teresa Head-Gordon'
)

# Add another paper
bse_paper = Paper(
    DOI='10.1021/acs.jcim.9b00725',
    paper_title=
    'New Basis Set Exchange: An Open, Up-to-Date Resource for the Molecular Sciences Community',
    journal=jcim,
    publication_year=2019,
    authors=
    'Benjamin P. Pritchard, Doaa Altarawy, Brett Didier, Tara D. Gibson, Theresa L. Windus'
)
    def save_papers(self,
                    pdf_files_dir,
                    html_files_dir,
                    doi_pdf_map,
                    collection='papers',
                    overwrite=False,
                    file_locs=[],
                    para_classifier=None):
        self.dl_doi_pdf_map = loads(open(doi_pdf_map, 'rb').read())

        for filename in file_locs:
            doi = ''
            is_html_file = bool(filename[-4:] == 'html')
            if is_html_file:
                if filename[:-5] + '.pdf' in self.dl_doi_pdf_map:
                    doi = self.dl_doi_pdf_map[filename[:-5] + '.pdf']
            else:  #PDF file
                if filename[:-2] in self.dl_doi_pdf_map:
                    doi = self.dl_doi_pdf_map[filename[:-2]]

            if doi == '':
                if not is_html_file:
                    doi = filename[:
                                   -6]  #strip the file suffix and use safe_doi as doi
                else:
                    doi = filename[:-5]
                self.__logger.info("INFO: Used backup DOI (not in map): " +
                                   str(doi))

            if self.connection[self.db][collection].find({
                    'doi': doi
            }).count() == 1 and not overwrite:
                self.__logger.info(
                    "SKIPPED: Not overwriting and paper already in DB: " +
                    str(doi))
                continue

            try:
                paper = open(path.join(pdf_files_dir, filename,
                                       'docseg.json')).read()
                try:
                    plaintext = loads(unicode(paper), strict=False)
                except:
                    self.__logger.warning(
                        "FAILURE: Invalid JSON from watr-works: " + str(doi))
                    pass
            except:
                self.__logger.warning(
                    "FAILURE: No docseg found from watr-works: " + str(doi))
                pass

            safe_doi = str(doi).translate(None, '/.()')

            title = None
            if title is None:
                title_match = self.connection[
                    self.db].doi_title_abstract_map.find_one({'doi': doi},
                                                             {'title': True})
                if title_match is not None:
                    title = title_match['title']

            if title is None:
                title = self.ad.get_title_from_doi(doi, 'crossref')
            if title is None: title = unicode('')

            if not self.is_title_relevant(title):
                self.__logger.info(
                    "WARNING: Irrelevant title detected; paper skipped: " +
                    str(doi))
                continue

            try:
                abstract = unicode('')

                if abstract == '':
                    abstract_match = self.connection[
                        self.db].doi_title_abstract_map.find_one(
                            {'doi': doi}, {'abstract': True})
                    if abstract_match is not None:
                        abstract = abstract_match['abstract']

                if abstract == '':
                    #Use DOI prefixes to optimize downloading attempts
                    els_dois = ['10\.1016', '10\.1006']

                    if any([search(d, doi) for d in els_dois]):
                        abstract = self.ad.get_abstract_from_doi(
                            doi, 'elsevier')
                    else:
                        abstract = unicode('')

                if abstract is None: abstract = unicode('')

                new_paper = Paper()
                del new_paper[
                    '_id']  #prevents duplication; ID assigned on insertion
                new_paper['doi'] = doi
                new_paper['abstract'] = abstract
                new_paper['title'] = title
                if not is_html_file:
                    new_paper['pdf_loc'] = unicode(
                        path.join(pdf_files_dir, filename, safe_doi + '.pdf'))
                else:
                    new_paper['pdf_loc'] = unicode(
                        path.join(html_files_dir, filename,
                                  safe_doi + '.html'))
                new_paper['modified'] = int(time.time())
                new_paper['paragraphs'] = []

                #Compute paragraphs
                html_paragraphs_used = False
                recipe_found = False

                #Override to use HTML paragraphs when available
                if path.isfile(path.join(html_files_dir, safe_doi + '.html')):
                    html_text = open(
                        path.join(html_files_dir, safe_doi + '.html'),
                        'rb').read()
                    soup = BeautifulSoup(html_text, 'html.parser')
                    paragraphs = soup.find_all('p') + soup.find_all(
                        'div', {'class': 'NLM_p'}) + soup.find_all('span')
                    paragraphs = [p.getText() for p in paragraphs]
                    paragraphs = [
                        p.replace('\n', '').replace('\t', '')
                        for p in paragraphs
                    ]
                    paragraphs = [p for p in paragraphs if len(p) > 80]

                    if len(paragraphs) > 20:
                        for paragraph in paragraphs:
                            new_paragraph = Paragraph()
                            new_paragraph['_id'] = unicode(ObjectId())
                            if para_classifier.predict_one(paragraph):
                                new_paragraph['type'] = unicode('recipe')
                                recipe_found = True
                            new_paragraph['text'] = paragraph
                            new_paper['paragraphs'].append(new_paragraph)

                        html_paragraphs_used = True
                        self.__logger.info(
                            "INFO: Used HTML paragraphs for paper: " +
                            str(doi))

                if not html_paragraphs_used:
                    para_label_ids = []
                    for line in plaintext['labels']:
                        if line[0] == 'ds:para-begin':
                            para_label_ids.append(line[1][0])

                    para_label_iter = iter(para_label_ids)
                    try:
                        next_label = next(para_label_iter)
                    except:
                        self.__logger.warning(
                            "WARNING: No paragraphs detected in file: " +
                            str(doi))
                        continue

                    current_para = ''
                    for line in plaintext['lines']:
                        if line[2] == next_label:
                            if current_para != '':
                                new_paragraph = Paragraph()
                                new_paragraph['_id'] = unicode(ObjectId())
                                if para_classifier.predict_one(current_para):
                                    new_paragraph['type'] = unicode('recipe')
                                    recipe_found = True
                                new_paragraph['text'] = current_para
                                new_paper['paragraphs'].append(new_paragraph)

                                current_para = ''

                            try:
                                next_label = next(para_label_iter)
                            except:
                                break

                        for token in line[0]:
                            if search('{.*?}', token) is not None:
                                token = sub('[{}_^]', '', token)
                            current_para += token + ' '

            except Exception, e:
                self.__logger.warning('FAILURE: Unable to save paper: ' +
                                      str(doi))
                self.__logger.warning('ERR_MSG: ' + str(e))
                continue

            if len(new_paper['paragraphs']) == 0:
                self.__logger.warning(
                    'WARNING: No paragraphs found; skipping paper: ' +
                    str(doi))
                continue

            if not recipe_found:
                self.__logger.warning(
                    'WARNING: No recipe found; skipping paper: ' + str(doi))
                continue

            if self.connection[self.db][collection].find({
                    'doi': doi
            }).count() == 0:
                self.connection[self.db][collection].insert_one(new_paper)
            elif self.connection[self.db][collection].find({
                    'doi': doi
            }).count() == 1 and overwrite and not used_backup_doi:
                self.connection[self.db][collection].update_one(
                    {'doi': doi}, {'$set': new_paper})

            if self.connection[self.db].doi_title_abstract_map.find({
                    'doi': doi
            }).count() == 0:
                self.connection[self.db].doi_title_abstract_map.insert_one({
                    'doi':
                    new_paper['doi'],
                    'title':
                    new_paper['title'],
                    'abstract':
                    new_paper['abstract']
                })
Exemple #17
0
def get_portfolio_papers(portfolio_id: int):
    return [Paper(row['id']) for row in query.fetch('shares', ['id'], portfolio_id=portfolio_id)]
Exemple #18
0
from models import Paper
import re
import handlers.keyboard as kb


# Bunch of handlers to create new paper.
class PaperFSM(StatesGroup):
    portfolio = State()
    stock = State()
    ticker = State()
    amount = State()
    price = State()


""" New instance of Paper() class to save data """
new_paper = Paper()

""" Start of FSM """
@dp.message_handler(commands=['newpaper'])
async def newpaper(message: types.Message):
    await PaperFSM.portfolio.set()
    answer_message = 'Choose your portfolio or create new:\nAvoid messages with /\n'
    answer_message += messages.portfolios(message.from_user)
    await bot.send_message(message.chat.id, answer_message, reply_markup=kb.cancel_kb)


""" When choosing portfolio message doesn't start with /port """
@dp.message_handler(lambda message: not message.text.startswith('/port_'), state=PaperFSM.portfolio)
async def wrong_input(message: types.Message):
    return await message.reply("Choose portfolio by clicking on /port_ command",
                               reply_markup=kb.cancel_kb)
Exemple #19
0
def get_paper(ticker: str, portfolio_id: int, holder_id: int) -> Paper:
    try:
        return Paper(query.fetch('shares', ['id'], ticker=ticker, holder_id=holder_id, portfolio_id=portfolio_id)[0]['id'])
    except IndexError:
        return
Exemple #20
0
 def test_paper(self):
     with db_session:
         p = Paper(paper_id='666aaaaa66')
         p.title = 'hehehehhehe'
Exemple #21
0
def get_references_citations_by_id(profile_id):
    if isinstance(profile_id, dict):
        profile_id = profile_id.get('profile_id')
        if MONGO:
            if data_collection.find({"id": profile_id}).count() > 0:
                # 说明这个数据已经被爬取过了
                return []
    print('func2')
    if not profile_id:
        return -1
    headers = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36',
        'accept-language': 'zh-CN,zh;q=0.9'
    }
    session = requests.Session()
    while True:
        try:
            response = session.get(
                'https://cn.bing.com/academic/profile?id={}&encoded=0&v=paper_preview&mkt=zh-cn'
                .format(profile_id),
                headers=headers)
            response.raise_for_status()
            response.encoding = 'utf-8'
            break
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            time.sleep(3.0)
            print(e)
    result = re.search(r'IG:"(.*?)"', response.text)
    if result:
        ig = result.group(1)
    result = re.search(
        r'被 引 量</span></span><span class="aca_content"><div>(\d*)</div>',
        response.text)
    if result:
        citation_num = result.group(1)

    html = etree.HTML(response.text)

    paper = Paper(save2mongo=MONGO)
    try:
        paper.title = html.xpath('//li[@class="aca_title"]/text()')[0]
        paper.id = profile_id
        paper.citation_num = citation_num
        result = re.search(
            r'<span class="aca_label">DOI</span></span><span class="aca_content"><div>(.*?)</div>',
            response.text)
        if result:
            paper.doi = result.group(1)
        paper.authors = html.xpath(
            '//div[@class="aca_desc b_snippet"]/span//a/text()')
        paper.abstract = html.xpath(
            '//div[@class="aca_desc b_snippet"]/span[1]//text()')[-1]
        result = re.search(
            r'<span class="aca_label">发表日期</span></span><span class="aca_content"><div>(\d*)</div>',
            response.text)
        if result:
            paper.publish_year = result.group(1)

        base_url = 'https://cn.bing.com/academic/papers?ajax=scroll&infscroll=1&id={id}&encoded=0&v=paper_preview&mkt=zh-cn&first={first}&count={count}&IG={ig}&IID=morepage.{num}&SFX={num}&rt={rt}'

        count = 9
        citation_links = list()
        for i in range(1, int(citation_num) // count):
            ajax_url = base_url.format(id=profile_id,
                                       first=i * (count + 1),
                                       count=count + 1,
                                       ig=ig,
                                       num=i,
                                       rt='2')
            while True:
                try:
                    response = session.get(ajax_url, headers=headers)
                    response.raise_for_status()
                    response.encoding = 'utf-8'
                    break
                except KeyboardInterrupt:
                    raise KeyboardInterrupt
                except Exception as e:
                    time.sleep(3.0)
                    print(e)
            html = etree.HTML(response.text)
            citation_links.extend(html.xpath('//a[@target="_blank"]/@href'))
        print('number of citation_links', len(citation_links), 'citation_num',
              citation_num)
        if len(citation_links) >= 0:
            for i, citation_link in enumerate(citation_links):
                profile_id = get_profile_id(citation_link)
                if profile_id.get('title', False):
                    paper.citations.append(profile_id)
                print('get_profile_id: {}/{}\r'.format(i + 1,
                                                       len(citation_links)),
                      end='')
        print('\nnumber of ids:', len(paper.citations))
    except KeyboardInterrupt:
        raise KeyboardInterrupt
    except Exception as e:
        print(e)
    paper.save()
    # for profile_id in paper.citations:
    #     get_references_citations_by_id(profile_id)
    return paper.citations
Exemple #22
0
def add(search_query, author, title):
    fl = [
        'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title',
        'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype",
        "identifier", "links_data"
    ]
    if author:
        search_query += "author:" + author
    if title:
        search_query += "title:" + title
    papers = list(ads.SearchQuery(q=search_query, fl=fl))
    if len(papers) == 0:
        selection = ads.search.Article
        exit()
    elif len(papers) == 1:
        selection = papers[0]  # type:ads.search.Article
    else:
        # first_ten = itertools.islice(papers, 10)
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
            print(index, single_paper.title[0], single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article

    assert len(selection.doi) == 1
    doi = selection.doi[0]

    try:

        paper = Paper.get(Paper.doi == doi)
        print("this paper has already been added")
        exit(1)

    except peewee.DoesNotExist:
        pass

    print("fetching bibcode")
    q = ads.ExportQuery([selection.bibcode])
    bibtex = q.execute()

    print("saving in db")

    paper = Paper()
    assert len(selection.title) == 1
    paper.doi = doi
    paper.title = selection.title[0]
    paper.abstract = selection.abstract
    paper.bibcode = selection.bibcode
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [
        ident for ident in selection.identifier if "arXiv:" in ident
    ][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [
        Keyword.get_or_create(keyword=keyword)[0]
        for keyword in selection.keyword
    ]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
    print(arxiv_url)
    with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
        chunk_size = 1024  # bytes
        file_size = int(r.headers.get('content-length', 0))
        progress_length = math.ceil(file_size // chunk_size)
        with click.progressbar(r.iter_content(chunk_size=20),
                               length=progress_length) as progress_chunks:
            for chunk in progress_chunks:
                f.write(chunk)
    paper.pdf_downloaded = True
    paper.save()
Exemple #23
0
       end="",
       flush=True)
 print("\r", end="", flush=True)
 for item in result['items']:
     # paper_mapper(item)
     pubname = item['publisher'] or 'Unknown'
     pub = get_or_create_record(Publisher, name=pubname)
     pubfreq = item['frequency'] or 'Unknown'
     freq = get_or_create_record(Frequency, frequency=pubfreq)
     if not Paper.query.filter_by(lccn=item['lccn']).first():
         paper = Paper(
             title=item['title'],
             place_of_publication=item['place_of_publication'],
             start_year=item['start_year'],
             end_year=item['end_year'],
             notes='\n'.join(item['note']),
             alt_titles='\n'.join(item['alt_title']),
             lccn=item['lccn'],
             page=cur_page,
             publisher_id=pub.id,
             frequency_id=freq.id)
         session.add(paper)
     else:
         paper = Paper.query.filter_by(lccn=item['lccn']).first()
     #Many-to-one relationships
     #Many-to-Many relationships - verifying integrity due to duplicates in data, then appending
     for state in item['state']:
         s = get_or_create_record(State, name=state)
         if s not in paper.states.all():
             s.papers.append(paper)
     for place in item['place']:
Exemple #24
0
    def post(self, request):
        param = QueryDict(request.body)

        uuid = param.get('uuid')
        title = param.get('title')
        time = param.get('time')
        origin = param.get('origin')
        _authors = param.getlist('authors')
        link = param.get('link')
        _tags = param.getlist('tags')
        content = param.get('content')
        refer_to = param.getlist('reference')
        score = param.get('score')

        try:
            year, month = time.split('-')
            year, month = int(year), int(month)
            publish_time = datetime.date(year, month, 1)
        except Exception as e:
            logger.error(traceback.format_exc(e))
            return JsonResponse({'msg': '提供的日期{}有误'.format(time)}, status=500)

        for _tag in _tags:
            try:
                _tag = int(_tag)
                _ = ResearchTag.objects.get(research_tag_id=_tag)
            except Exception as e:
                logger.error(traceback.format_exc(e))
                return JsonResponse({'msg': '错误的标签{}'.format(_tag)},
                                    status=500)
        tags = ResearchTag.objects.filter(
            research_tag_id__in=[int(_t) for _t in _tags])

        author_ids = []
        for _author in _authors:
            if _author.isdigit():
                author_ids.append(int(_author))
            elif Author.objects.filter(name=_author).exists():
                a = Author.objects.get(name=_author).author_id
                author_ids.append(a)
            else:
                a = Author(name=_author)
                a.save()
                author_ids.append(a.author_id)

        authors = Author.objects.filter(author_id__in=author_ids)

        try:
            score = int(score)
        except Exception as e:
            logger.error(traceback.format_exc(e))
            return JsonResponse({'msg': '错误的评分分数格式'}, status=500)

        if not Paper.objects.filter(paper_uuid=uuid).exists():
            # 新建的场合
            try:
                comment = PaperComment(content=content)
                comment.save()
                paper = Paper(paper_uuid=uuid,
                              title=title,
                              publish_origin=origin,
                              publish_time=publish_time,
                              author=authors,
                              link=link,
                              tag=tags,
                              comment=comment,
                              self_score=score)
                paper.save()
                redis.set(self.LATEST_KEY, str(uuid_gen.uuid4()))
            except Exception as e:
                logger.error(traceback.format_exc(e))
                return JsonResponse({'msg': '保存失败'}, status=500)
            else:
                return JsonResponse({
                    'next':
                    reverse('paperdb.detail',
                            kwargs={'paper_uuid': paper.paper_uuid})
                })

        try:
            # 编辑的场合
            paper = Paper.objects.get(paper_uuid=uuid)
        except Exception as e:
            logger.error(traceback.format_exc(e))
            return JsonResponse({'msg': '错误的uuid/未找到相关论文记录'}, status=404)
        else:
            paper.title = title
            paper.publish_time = publish_time
            paper.publish_origin = origin
            paper.author = authors
            paper.link = paper.link
            paper.tag = tags
            paper.self_score = score

            try:
                paper.save()
            except Exception as e:
                logger.error(traceback.format_exc(e))
                return JsonResponse({'msg': '保存失败'}, status=500)

            if paper.comment is None:
                if content != '':
                    comment = PaperComment(content=content)
                    comment.save()
                    paper.comment = comment
                    paper.save()
            elif content != paper.comment.content.replace(
                    '\r\n', '\n'):  # traditional下的换行符出入
                paper.comment.content = content
                paper.comment.save()

        for refer_to_paper in Paper.objects.filter(paper_uuid__in=refer_to):
            if not Reference.objects.filter(
                    reference_src=paper,
                    reference_trg=refer_to_paper).exists():
                reference = Reference(reference_src=paper,
                                      reference_trg=refer_to_paper)
                reference.save()

        return JsonResponse({
            'next':
            reverse('paperdb.detail', kwargs={'paper_uuid': paper.paper_uuid})
        })