def AbstractPool(request, review_name_slug): review = Review.objects.get(user=request.user, slug=review_name_slug) if request.method == "POST": if request.POST.get('results') == None: q = request.POST.get('queryField') s = request.POST.get('sortType') n = request.POST.get('noResults') abstractList = search.main(q, s, n) for document in abstractList: documentURL = document.get("url") if Paper.objects.filter(paper_url=documentURL, review=review).exists(): abstractList.remove(document) else: abstractList = eval(request.POST.get('results')) q = request.POST.get('queryField') relevant = "Unchecked" if request.POST.get("relevanceField") == "relevant": relevant = "Relevant" else: if request.POST.get("relevanceField") == "irrelevant": relevant = "Not Relevant" if relevant != "Unchecked": print "traceA" compareCount_value = int(request.POST.get("hiddenCompareCount")) for s in abstractList: if s.get('compareCount') == compareCount_value: currentDoc = s paper = Paper(review=review, title=currentDoc["title"], paper_url=currentDoc["url"], full_text=currentDoc['fullText'], abstract=currentDoc["abstract"], authors=currentDoc["author"], abstract_relevance=relevant) paper.save() if len(abstractList) > 1: for abstract in abstractList: if int(abstract.get( 'compareCount')) > compareCount_value - 1: abstract['compareCount'] -= 1 del abstractList[compareCount_value - 1] else: abstractList = [] #for abstract in abstractList: #if int(abstract.get('compareCount')) > compareCount_value: #abstract['compareCount'] -= 1 #del abstractList[compareCount_value] return render( request, 'ultimatereview/AbstractPool.html', { "Abstracts": abstractList, 'query': q, 'review': review.title, 'slug': review_name_slug })
def createTestPaper(): for i in range(3): paper = Paper(name="paper" + str(i), description="paper" + str(i) + "...", state='A') paper.save() userpaper = UserPaper(user=testUser, paper=paper, finish_state="finished") userpaper.save() for i in range(4, 7): paper = Paper(name="paper" + str(i), description="paper" + str(i) + "...", state='A') paper.save() userpaper = UserPaper(user=testUser, paper=paper, finish_state="unfinished") userpaper.save()
def dbSavePapersAndAuthors(papers, latestMailing=True): """Saves an array of paper information into the database. Returns numbers of new papers and authors added. If the latestMailing argument is true, then sets the paper dates to either today or tomorrow, regardless of the date from the arXiv. It sets to today if the function is run before 8pm ET, and to tomorrow otherwise. The idea is that this function should be run regularly every day, the night that the mailing goes out. If run late in the day before midnight, then the mailing has tomorrow's date. If run early in the day, e.g., if for some reason it didn't run when it should have, then the mailing was sent out yesterday and is for today. """ if latestMailing: latestMailingDate = datetime.date.today() now = datetime.datetime.now(pytz.timezone('US/Eastern')) cutoff = now.replace(hour=20,minute=0,second=0,microsecond=0) if now > cutoff: latestMailingDate += datetime.timedelta(days=+1) # note: The official mailing date is the day the email goes out, a few hours after the paper was made available numNewPapersAdded = numNewAuthorsAdded = 0 for paper in papers: authors = [] for author in paper['authors']: authorsWithSameName = Author.objects.filter(name=author) if authorsWithSameName: # author with same name already exists in database---don't add a duplicate a = authorsWithSameName[0] # there might be duplicates --- take the first (maybe fix later) else: a = Author(name=author) a.save() numNewAuthorsAdded += 1 authors.append(a) if Paper.objects.filter(arxivId=paper['arxivId']): continue # NOTE: If we make a mistake adding the paper the first time, this line will keep the code below from ever running to fix it if latestMailing: mailing_date = latestMailingDate else: mailing_date = mailingDate(paper['datePublished']) p = Paper( arxivId = paper['arxivId'], title = paper['title'], abstract = paper['abstract'], date_published = paper['datePublished'], date_mailed = mailing_date, #authors = authors, # ManyToManyField is set up later category = paper['category'], categories = paper['categories'], version = paper['version'], linkAbsPage = paper['linkAbsPage'], linkPdf = paper['linkPdf'] ) p.save() # need to save before setting up the ManyToMany field of authors for author in authors: # alternatively, to clear a ManyToMany field, use p.authors.clear() p.authors.add(author) p.save() numNewPapersAdded += 1 print "%d new papers, %d new authors added" % (numNewPapersAdded, numNewAuthorsAdded) return numNewPapersAdded, numNewAuthorsAdded
def api_create_paper(request, *, name, brief, tag): check_admin(request) if not name or not name.strip(): raise APIValueError('name', 'name cannot be empty.') if not tag or not tag.strip(): raise APIValueError('tag', 'tag cannot be empty.') paper = Paper(name=name.strip(), brief=brief.strip(), tag=tag.strip(), munber=0, total=0) yield from paper.save() return paper
def ingest_csv(csv_file_name, index_name): with open(csv_file_name, "r") as csv_file: reader = csv.reader(csv_file) headers = next(reader) # Normalize header titles to become valid attribute names headers = [(re.sub(r'\W+', '', h.strip().replace(' ', '_'))).lower() for h in headers] for row in reader: paper = Paper(meta={'id': row[0], 'index': index_name}) for ind, header in enumerate(headers): setattr(paper, header, row[ind]) try: paper.save(refresh=True) paper._index.refresh() except ValidationException as e: # There are a few blank publish_time values. Didn't find time to make optional print(f"Unable to save record with id {row[0]}") print(e)
def AbstractPool(request, review_name_slug): review = Review.objects.get(user=request.user, slug=review_name_slug) if request.method == "POST": if request.POST.get('results') == None: q = request.POST.get('queryField') s = request.POST.get('sortType') n = request.POST.get('noResults') abstractList = search.main(q,s, n) for document in abstractList: documentURL = document.get("url") if Paper.objects.filter(paper_url= documentURL, review= review).exists(): abstractList.remove(document) else: abstractList = eval(request.POST.get('results')) q = request.POST.get('queryField') relevant="Unchecked" if request.POST.get("relevanceField") == "relevant": relevant="Relevant" else: if request.POST.get("relevanceField") == "irrelevant": relevant="Not Relevant" if relevant!="Unchecked": print "traceA" compareCount_value = int(request.POST.get("hiddenCompareCount")) for s in abstractList: if s.get('compareCount') == compareCount_value: currentDoc = s paper = Paper(review=review, title=currentDoc["title"], paper_url=currentDoc["url"], full_text=currentDoc['fullText'], abstract=currentDoc["abstract"], authors=currentDoc["author"], abstract_relevance=relevant) paper.save() if len(abstractList)>1: for abstract in abstractList: if int(abstract.get('compareCount')) > compareCount_value-1: abstract['compareCount'] -= 1 del abstractList[compareCount_value-1] else: abstractList = [] #for abstract in abstractList: #if int(abstract.get('compareCount')) > compareCount_value: #abstract['compareCount'] -= 1 #del abstractList[compareCount_value] return render(request, 'ultimatereview/AbstractPool.html', {"Abstracts": abstractList, 'query': q, 'review':review.title,'slug': review_name_slug})
def _paperAdd(requestData, user): ''' 新增一个问卷的具体处理过程 ''' # 获取Paper模型中的所有属性 keys = requestData.keys() data = {} # fields = zip(*Paper._meta.get_fields_with_model())[0] for field in getModelFields(Paper): # 跳过系统自动增加的字段 if field.auto_created: continue # 读取request数据 value = requestData.get(field.name, None) # 特殊处理json的Boolean型的变量 if type(field) == BooleanField: value = jsonBoolean2Python(value) # 对创建人和修改人的信息进行特殊处理 if field.name in [USER_CREATE_BY_FIELD_NAME, USER_MODIFY_BY_FIELD_NAME]: value = user # 如果调用者没有显示执行字段值为空,则不增加到data中去,让模型的默认值发挥作用 # 字段代码不能早于对createBy和modifyBy的处理 if value is None and field.name not in keys: continue # 将校验的数据添加到data,准备为创建数据库用 data[field.name] = value paper = Paper(**data) # 校验数据 try: paper.full_clean() except ValidationError as exception: return packageResult( RESULT_CODE.ERROR, RESULT_MESSAGE.VALIDATION_ERROR, {'validationMessage': exception.message_dict}) # 保存到数据库 paper.save() return packageResult(RESULT_CODE.SUCCESS, RESULT_MESSAGE.SUCCESS, {'paperId': paper.id})
def write_db(self): print "len of entry list " + str(len(self.entry_list)) for entry in self.entry_list: paper = Paper() if entry.has_key("id"): paper.id = entry["id"] if entry.has_key("type"): paper.type = entry["type"] if entry.has_key("title"): paper.title = entry["title"] if entry.has_key("author"): paper.authors = entry["author"] if entry.has_key("year"): paper.year = int(entry["year"]) if entry.has_key("journal"): paper.journal = entry["journal"] if entry.has_key("booktitle"): paper.book_title = entry["booktitle"] if entry.has_key("publisher"): paper.publisher = entry["publisher"] if entry.has_key("institution"): paper.institution = entry["institution"] if entry.has_key("volume"): paper.volume = int(entry["volume"]) if entry.has_key("number"): paper.number = int(entry["number"]) if entry.has_key("pages"): paper.pages = entry["pages"] if entry.has_key("url"): paper.url = entry["url"] if entry.has_key("doi"): paper.doi = entry["doi"] if entry.has_key("isbn"): paper.isbn = entry["isbn"] paper.save()
def upload(request): if request.method =='POST': #form = UploaderForm(request.POST,request.FILES) #if form.is_valid(): paper_title=request.FILES['ups'].name uploaded=request.FILES['ups'] print uploaded print uploaded.size username = get_user_fromcookie(request) data = {'title': paper_title,'paper_file':uploaded} form = UploadForm(data) obj = Paper(title=paper_title,paper_file=uploaded,user=username) obj.save() #form.save() ''' if form.is_valid(): form.save() else: print form.errors ''' #return HttpResponse("errros") return HttpResponse('Research Paper uploaded')
def upload(request): if request.method == 'POST': t = [] #form = UploaderForm(request.POST,request.FILES) #if form.is_valid(): paper_title = request.FILES['ups'].name uploaded = request.FILES['ups'] tags = request.POST['tags'] t = tags.split(',') print t p = [] for i in range(len(t) - 1): l = Topic.objects.get(subject=t[i].encode('ascii', 'replace')) p.append(l.id) #print uploaded print p #print uploaded.size username = get_user_fromcookie(request) data = {'title': paper_title, 'paper_file': uploaded} form = UploadForm(data) obj = Paper(title=paper_title, paper_file=uploaded, user=username) obj.save() pap = Paper.objects.get(title=paper_title) for i in p: pap.tags.add(int(i)) convert(paper_title) #form.save() ''' if form.is_valid(): form.save() else: print form.errors ''' #return HttpResponse("errros") return render(request, 'thanks.html', {'message': 'Research Paper Uploaded'})
def upload(request): if request.method =='POST': t = [] #form = UploaderForm(request.POST,request.FILES) #if form.is_valid(): paper_title=request.FILES['ups'].name uploaded=request.FILES['ups'] tags = request.POST['tags'] t = tags.split(',') print t p = [] for i in range(len(t)-1): l = Topic.objects.get(subject=t[i].encode('ascii','replace')) p.append(l.id) #print uploaded print p #print uploaded.size username = get_user_fromcookie(request) data = {'title': paper_title,'paper_file':uploaded} form = UploadForm(data) obj = Paper(title=paper_title,paper_file=uploaded,user=username) obj.save() pap = Paper.objects.get(title=paper_title) for i in p: pap.tags.add(int(i)) convert(paper_title) #form.save() ''' if form.is_valid(): form.save() else: print form.errors ''' #return HttpResponse("errros") return render(request,'thanks.html',{'message': 'Research Paper Uploaded'})
def save(self, user): d = self.cleaned_data authors = [user] if 'coauthor1' in d: authors.append(d['coauthor1']) if 'coauthor2' in d: authors.append(d['coauthor2']) if 'coauthor3' in d: authors.append(d['coauthor3']) paper = Paper() paper.save() paper.authors.add(user) for coauthor in d['coauthors']: paper.authors.add(coauthor) paper.save() d['contents'].name = '%030x' % random.randrange(16**30) + ".pdf" paper_version = PaperVersion( paper = paper, title = d['title'], abstract = d['abstract'], contents = d['contents'], ) paper_version.save() # need to save paper twice since paper and paper_version point to each other... paper.latest_version = paper_version paper.save() for conflict_username in d['conflicts']: ra = ReviewAssignment() ra.user = User.objects.get(username=conflict_username) ra.paper = paper ra.type = 'conflict' ra.save() return paper
def add(search_query, author, title): fl = [ 'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype", "identifier", "links_data" ] if author: search_query += "author:" + author if title: search_query += "title:" + title papers = list(ads.SearchQuery(q=search_query, fl=fl)) if len(papers) == 0: selection = ads.search.Article exit() elif len(papers) == 1: selection = papers[0] # type:ads.search.Article else: # first_ten = itertools.islice(papers, 10) first_ten = papers[:10] single_paper: ads.search.Article for index, single_paper in enumerate(first_ten): print(index, single_paper.title[0], single_paper.first_author) selected_index = click.prompt('select paper', type=int) selection = papers[selected_index] # type:ads.search.Article assert len(selection.doi) == 1 doi = selection.doi[0] try: paper = Paper.get(Paper.doi == doi) print("this paper has already been added") exit(1) except peewee.DoesNotExist: pass print("fetching bibcode") q = ads.ExportQuery([selection.bibcode]) bibtex = q.execute() print("saving in db") paper = Paper() assert len(selection.title) == 1 paper.doi = doi paper.title = selection.title[0] paper.abstract = selection.abstract paper.bibcode = selection.bibcode paper.year = selection.year paper.pubdate = selection.pubdate paper.pdf_downloaded = False paper.first_author = Author.get_or_create(name=selection.first_author)[0] paper.publication = Publication.get_or_create(name=selection.pub)[0] paper.doctype = Doctype.get_or_create(name=selection.doctype)[0] paper.arxiv_identifier = [ ident for ident in selection.identifier if "arXiv:" in ident ][0].split("arXiv:")[-1] paper.bibtex = bibtex links = [json.loads(string) for string in selection.links_data] print(links) paper.save() authors = [Author.get_or_create(name=name)[0] for name in selection.author] for author in db.batch_commit(authors, 100): PaperAuthors.create(author=author, paper=paper) keywords = [ Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword ] for keyword in db.batch_commit(keywords, 100): PaperKeywords.create(keyword=keyword, paper=paper) print("fetching PDF") arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier) r = requests.get(arxiv_url, stream=True) print(arxiv_url) with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f: chunk_size = 1024 # bytes file_size = int(r.headers.get('content-length', 0)) progress_length = math.ceil(file_size // chunk_size) with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks: for chunk in progress_chunks: f.write(chunk) paper.pdf_downloaded = True paper.save()
def post(self, request): param = QueryDict(request.body) uuid = param.get('uuid') title = param.get('title') time = param.get('time') origin = param.get('origin') _authors = param.getlist('authors') link = param.get('link') _tags = param.getlist('tags') content = param.get('content') refer_to = param.getlist('reference') score = param.get('score') try: year, month = time.split('-') year, month = int(year), int(month) publish_time = datetime.date(year, month, 1) except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '提供的日期{}有误'.format(time)}, status=500) for _tag in _tags: try: _tag = int(_tag) _ = ResearchTag.objects.get(research_tag_id=_tag) except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '错误的标签{}'.format(_tag)}, status=500) tags = ResearchTag.objects.filter( research_tag_id__in=[int(_t) for _t in _tags]) author_ids = [] for _author in _authors: if _author.isdigit(): author_ids.append(int(_author)) elif Author.objects.filter(name=_author).exists(): a = Author.objects.get(name=_author).author_id author_ids.append(a) else: a = Author(name=_author) a.save() author_ids.append(a.author_id) authors = Author.objects.filter(author_id__in=author_ids) try: score = int(score) except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '错误的评分分数格式'}, status=500) if not Paper.objects.filter(paper_uuid=uuid).exists(): # 新建的场合 try: comment = PaperComment(content=content) comment.save() paper = Paper(paper_uuid=uuid, title=title, publish_origin=origin, publish_time=publish_time, author=authors, link=link, tag=tags, comment=comment, self_score=score) paper.save() redis.set(self.LATEST_KEY, str(uuid_gen.uuid4())) except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '保存失败'}, status=500) else: return JsonResponse({ 'next': reverse('paperdb.detail', kwargs={'paper_uuid': paper.paper_uuid}) }) try: # 编辑的场合 paper = Paper.objects.get(paper_uuid=uuid) except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '错误的uuid/未找到相关论文记录'}, status=404) else: paper.title = title paper.publish_time = publish_time paper.publish_origin = origin paper.author = authors paper.link = paper.link paper.tag = tags paper.self_score = score try: paper.save() except Exception as e: logger.error(traceback.format_exc(e)) return JsonResponse({'msg': '保存失败'}, status=500) if paper.comment is None: if content != '': comment = PaperComment(content=content) comment.save() paper.comment = comment paper.save() elif content != paper.comment.content.replace( '\r\n', '\n'): # traditional下的换行符出入 paper.comment.content = content paper.comment.save() for refer_to_paper in Paper.objects.filter(paper_uuid__in=refer_to): if not Reference.objects.filter( reference_src=paper, reference_trg=refer_to_paper).exists(): reference = Reference(reference_src=paper, reference_trg=refer_to_paper) reference.save() return JsonResponse({ 'next': reverse('paperdb.detail', kwargs={'paper_uuid': paper.paper_uuid}) })
def get_references_citations_by_id(profile_id): if isinstance(profile_id, dict): profile_id = profile_id.get('profile_id') if MONGO: if data_collection.find({"id": profile_id}).count() > 0: # 说明这个数据已经被爬取过了 return [] print('func2') if not profile_id: return -1 headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36', 'accept-language': 'zh-CN,zh;q=0.9' } session = requests.Session() while True: try: response = session.get( 'https://cn.bing.com/academic/profile?id={}&encoded=0&v=paper_preview&mkt=zh-cn' .format(profile_id), headers=headers) response.raise_for_status() response.encoding = 'utf-8' break except KeyboardInterrupt: raise KeyboardInterrupt except Exception as e: time.sleep(3.0) print(e) result = re.search(r'IG:"(.*?)"', response.text) if result: ig = result.group(1) result = re.search( r'被 引 量</span></span><span class="aca_content"><div>(\d*)</div>', response.text) if result: citation_num = result.group(1) html = etree.HTML(response.text) paper = Paper(save2mongo=MONGO) try: paper.title = html.xpath('//li[@class="aca_title"]/text()')[0] paper.id = profile_id paper.citation_num = citation_num result = re.search( r'<span class="aca_label">DOI</span></span><span class="aca_content"><div>(.*?)</div>', response.text) if result: paper.doi = result.group(1) paper.authors = html.xpath( '//div[@class="aca_desc b_snippet"]/span//a/text()') paper.abstract = html.xpath( '//div[@class="aca_desc b_snippet"]/span[1]//text()')[-1] result = re.search( r'<span class="aca_label">发表日期</span></span><span class="aca_content"><div>(\d*)</div>', response.text) if result: paper.publish_year = result.group(1) base_url = 'https://cn.bing.com/academic/papers?ajax=scroll&infscroll=1&id={id}&encoded=0&v=paper_preview&mkt=zh-cn&first={first}&count={count}&IG={ig}&IID=morepage.{num}&SFX={num}&rt={rt}' count = 9 citation_links = list() for i in range(1, int(citation_num) // count): ajax_url = base_url.format(id=profile_id, first=i * (count + 1), count=count + 1, ig=ig, num=i, rt='2') while True: try: response = session.get(ajax_url, headers=headers) response.raise_for_status() response.encoding = 'utf-8' break except KeyboardInterrupt: raise KeyboardInterrupt except Exception as e: time.sleep(3.0) print(e) html = etree.HTML(response.text) citation_links.extend(html.xpath('//a[@target="_blank"]/@href')) print('number of citation_links', len(citation_links), 'citation_num', citation_num) if len(citation_links) >= 0: for i, citation_link in enumerate(citation_links): profile_id = get_profile_id(citation_link) if profile_id.get('title', False): paper.citations.append(profile_id) print('get_profile_id: {}/{}\r'.format(i + 1, len(citation_links)), end='') print('\nnumber of ids:', len(paper.citations)) except KeyboardInterrupt: raise KeyboardInterrupt except Exception as e: print(e) paper.save() # for profile_id in paper.citations: # get_references_citations_by_id(profile_id) return paper.citations