def test_callable(self): "Regression for #10349: A callable can be provided as the initial value for an m2m field" # Set up a callable initial value def formfield_for_dbfield(db_field, **kwargs): if db_field.name == 'publications': kwargs['initial'] = lambda: Publication.objects.all().order_by( 'date_published')[:2] return db_field.formfield(**kwargs) # Set up some Publications to use as data Publication(title="First Book", date_published=date(2007, 1, 1)).save() Publication(title="Second Book", date_published=date(2008, 1, 1)).save() Publication(title="Third Book", date_published=date(2009, 1, 1)).save() # Create a ModelForm, instantiate it, and check that the output is as expected ModelForm = modelform_factory(Article, formfield_callback=formfield_for_dbfield) form = ModelForm() self.assertEquals( form.as_ul(), u"""<li><label for="id_headline">Headline:</label> <input id="id_headline" type="text" name="headline" maxlength="100" /></li> <li><label for="id_publications">Publications:</label> <select multiple="multiple" name="publications" id="id_publications"> <option value="1" selected="selected">First Book</option> <option value="2" selected="selected">Second Book</option> <option value="3">Third Book</option> </select> Hold down "Control", or "Command" on a Mac, to select more than one.</li>""" )
def save_models(pub_list, requested_author): """Saves a publication and the author for this particular publication into the DB""" ses = get_session() pub_dict = prepare_pubs_for_db(pub_list) for pub in pub_dict: try: p = ses.query(Publication).filter_by(title=pub['title']).one() except NoResultFound: tasks.notify_subscribed_users(pub) p = Publication(pub['title'], pub['venue'], pub['year'], pub['url']) p.date_added = datetime.datetime.utcnow().date() if len(requested_author.split()) == 1: name = '' surname = requested_author.split()[0] else: name = requested_author.split()[0] surname = requested_author.split()[-1] try: author = ses.query(Author).filter_by(name=name).filter_by(surname=surname).one() except NoResultFound: author = Author(name, surname) p.authors.append(author) ses.add(p) ses.commit()
def monuser(): from faker import Faker fake = Faker() user = User.create(username='******', password='******', first_name=fake.first_name(), last_name=fake.last_name(), email=fake.email()) Publication.create(title=fake.sentence(), body=fake.text(), user_created=user)
def fakedata(): from faker import Faker fake = Faker() for user_ex in range(0, 5): user = User.create(username=fake.last_name(), password=fake.password(), first_name=fake.first_name(), last_name=fake.last_name(), email=fake.email()) for publications_ex in range(0, 10): Publication.create(title=fake.sentence(), body=fake.text(), user_created=user)
def get(self): disease_counts = Publication.get_disease_counts() authors = Author.all() pubs = Publication.all() template_values = { "authors": authors, "disease_counts": disease_counts, "pubs": pubs } template = jinja_environment.get_template('admin.html') self.response.out.write(template.render(template_values))
def post(self, pub_no=None): try: pub = Publication() pub_json = json.loads(self.request.body) pub.no_babillard = int(pub_json['noBabillard']) pub.texte = pub_json['texte'] pub.date = datetime.datetime.strptime(pub_json['date'], '%Y-%m-%d') pub.noBabillard = int(pub_json['noBabillard']) pub.noCreateur = int(pub_json['noCreateur']) cle_pub = pub.put() self.response.set_status(201) self.response.headers['Location'] = (self.request.url + '/' + str(cle_pub.id())) self.response.headers['Content-Type'] = ('application/json;' + ' charset=utf-8') pub_dict = pub.to_dict() pub_dict["no"] = pub.key.id() pub_json = json.dumps(pub_dict, default=serialiser_pour_json) self.response.out.write(pub_json) except (db.BadValueError, ValueError, KeyError): logging.error("%s", traceback.format_exc()) self.error(400) except Exception: logging.error("%s", traceback.format_exc()) self.error(500)
def create_comment(request, form, reply_to): if not form.is_valid(): return None text = form.cleaned_data['text'] title = form.cleaned_data['title'] comment = Comment(title=title, text=text) comment.save() reply_pub = Publication.objects.get(pk=reply_to) pub = Publication(content=comment, reply_to_pub=reply_pub, is_public=True, published_by=request.user.get_profile()) pub.save() return pub
def create_snippet(request, form): if not form.is_valid(): return None text = form.cleaned_data['text'] title = form.cleaned_data['title'] is_public = form.cleaned_data['is_public'] snippet = Snippet(title=title, text=text) snippet.save() add_tags(snippet, form.cleaned_data['tags']) pub = Publication(content=snippet, reply_to_pub=None, is_public=is_public, published_by=request.user.get_profile()) pub.save() return pub
def test(): username = "******" user = User.select().where(User.username == username).get() publications = Publication.select().where( Publication.user_created == user.id) for publi in publications: print(publi)
def addActivity(request): activity = request.POST.get("Identificador", '') try: page = UsersPage.objects.get(user = str(request.user)) except UsersPage.DoesNotExist: page = UsersPage(user = str(request.user), name = "pagina de " + str(request.user)) page.save() cts = Activitie.objects.get(id=activity) page.activities.add(cts) if not exitPub(str(request.user),activity): usr = UsersPage.objects.get(user = str(request.user)) act = Activitie.objects.get(id=activity) dat = datetime.now() pub = Publication(date = dat,user = usr, activities = act) pub.save() return HttpResponseRedirect("/todas")
def main(): if not config_found: print('config.py file must be present') return print('config.py found') create_publications() if len(sys.argv) < 2 and config.fetching_config['publication'] is None: print('You need to pass publication_name as argv. For example:') print('python fetch_wordpress_articles.py "CNN esp"') return if len(sys.argv) == 2: pub_name = sys.argv[1] else: pub_name = config.fetching_config['publication'] pub = Publication.objects(name=pub_name).get() print(f'url ro fetch: {pub.api_url}') if pub == 'iProfesional': art_to_db = iProfesional_to_db get_url = get_iProfesional_url get_articles = get_iProfesional_articles else: art_to_db = wordpress_to_db get_url = get_wp_url get_articles = get_wp_articles fetch_articles(pub_name, art_to_db=art_to_db, get_url=get_url, get_articles=get_articles, api_url=pub.api_url, date_after=config.fetching_config['date_after'], date_before=config.fetching_config['date_before'])
def get(self): self.templateVars["institutions"] = Institution.query().fetch() self.templateVars["authors"] = Author.query().fetch() self.templateVars["conferences"] = Conference.query().fetch() self.templateVars["publications"] = Publication.query().fetch() self.templateVars["contents"] = Content.query().fetch() return self.render("admin.html")
def publication(username=None): if username: #si on lui passe un username en param user = User.select().where( User.username == username).get() #on sélectionne le user publications = Publication.select( ).where(Publication.user_created == user.id).order_by( Publication.created_date.desc()) #on prend la liste de ses publi else: # si on lui passe pas de username publications = Publication.select().order_by( Publication.created_date.desc( )) #on prend la liste de toutes les publi if publications.count() == 0: flash("Aucune publication trouvée") return object_list('publications/list.html', publications, paginate_by=3, check_bounds=False)
def get_articles(): google = int(flask.request.args.get('google') or 0) aws = int(flask.request.args.get('aws') or 0) azure = int(flask.request.args.get('azure') or 0) selected_pub = ( flask.request.args.get('pub') or Publication.objects(name=config.active_publication).first().id) page_num = int(flask.request.args.get('page') or 1) per_page = int(flask.request.args.get('count') or 10) from_date = flask.request.args.get('from') or '2000-01-01' to_date = flask.request.args.get('to') or datetime.now().strftime( "%Y-%m-%d") # if to_date is None: # to_date = datetime.now().strftime("%Y-%m-%d") # if from_date is None: # from_date = '2000-01-01' cloud_args = {} if google == 1: cloud_args['ner_google_id__ne'] = None if aws == 1: cloud_args['ner_aws_id__ne'] = None if azure == 1: cloud_args['ner_azure_id__ne'] = None print(datetime.strptime(from_date, "%Y-%m-%d").date()) articles_page = Article.objects( publication=selected_pub, **cloud_args, publish_date__gte=datetime.strptime(from_date, "%Y-%m-%d").date(), publish_date__lte=datetime.strptime( to_date, "%Y-%m-%d").date()).order_by('-publish_date').paginate( page=page_num, per_page=per_page) pubs = [] for pub in Publication.objects(): if selected_pub == str(pub.id): selected = 'selected' else: selected = '' pubs.append([pub, selected]) return articles_page, pubs, cloud_args, from_date, to_date
def publish_book(body): user = select(user for user in User if user.name == body['username']).first() new_book = Publication(title=body['title'], author=user, content=body['content'], status=2) print(new_book.title, new_book.content)
def publication_delete(id=None): if id: try: publication = Publication.get(id) except: flash("error") return redirect(url_for('publication')) publication.delete_instance() flash("success") return redirect(url_for('publication'))
def main(*args, **kwargs): for publication in Publication.select(): yield [[ publication.year, publication.title, publication.citation_count, publication.author, ]] raise StopIteration
def create_message(request, form): if not form.is_valid(): return None text = form.cleaned_data['text'] title = form.cleaned_data['title'] to = form.cleaned_data['to'] try: referencedUser = User.objects.get(username=to) except: return None comment = Comment(title=title, text=text) comment.save() pub = Publication(content=comment, reply_to_pub=None, is_public=False, published_by=request.user.get_profile()) pub.save() pub.to.add(referencedUser.get_profile()) return pub
def get_citation_count_for_queries(queries, api_key): # Create a new fetch index. last_fetch_index = Publication.select( fn.Max(Publication.fetch_index)).scalar() or 0 fetch_index = last_fetch_index + 1 for query in queries: # Fetch the citation count! get_citation_count(query, fetch_index, api_key)
def get_citation_count(query, fetch_index, api_key): # Request for citation counts for the publication params = DEFAULT_PARAMS.copy() params['expr'] = ( "AND(" + # we will search based on two criteria: "Ti=\'{title}\'...," + # the title prefix "Y={year})" # the publication year ).format(title=query['title'], year=int(query['year'])) response = make_request( default_requests_session.get, URL, params=params, headers={'Ocp-Apim-Subscription-Key': api_key}, ) time.sleep(REQUEST_DELAY) # enforce a pause between each fetch to be respectful to API # Go no further if the call failed if not response: return publications = response.json()['entities'] if len(publications) == 0: logger.warn("No publications found for title: %s", query['title']) return # Store data from the fetched publications first_publication = publications[0] authors = ','.join([author['AuN'] for author in first_publication['AA']]) Publication.create( fetch_index=fetch_index, citation_count=first_publication['CC'], author=authors, year=first_publication['Y'], title=first_publication['Ti'], )
def get_article_by_cms_id(publication_name, cms_id): """ Gets article from publication API by cmd_id Gets an article from publication API (like wordpress for example). At the moment only support wordpress Parameters: publication_name (str): name of publication cms_id (str): id of article to fetch Returns: json: article as JSON """ api_url = Publication.objects(name=publication_name).get()['api_url'] url_by_ids = get_wp_url_by_ids(api_url, [cms_id]) response = requests.get(url_by_ids) return response.json()[0]
def wordpress_to_db(art, publication_name): """ Converts wordpress data to mongodb format Parameters: art (json): wordpress article as JSON publication_name (str): name of publication Returns: json: article as Article instance """ publication = Publication.objects(name=publication_name).get() query = Article.objects( Q(url=art['link']) | Q(publication=publication, pub_art_id=str(art['id']))) if len(query) > 0: # Duplicate!!! return None if type(art['content']['rendered']) == bool: print('Article with no content') return article = Article() article.title = art['title']['rendered'] article.summary = art['excerpt']['rendered'] article.text = art['content']['rendered'] article.publish_date = datetime.datetime.strptime(art['date'], "%Y-%m-%dT%H:%M:%S") article.url = art['link'] if type(art['author']) == str: article.author = [art['author']] elif type(art['author']) == list: article.author = art['author'] elif type(art['author']) == int: article.author = [str(art['author'])] else: print('author error') # article.keywords = art['keywords'] # article.categories = art['title'] article.publication = publication publication_id = str(art['id']) article.pub_art_id = publication_id return article
def iProfesional_to_db(art, publication_name='iProfesional'): """ Converts iProfesional data to mongodb format Parameters: art (json): iProfesional article as JSON publication_name (str): should be iProfesional Returns: json: article as Article instance """ publication = Publication.objects(name=publication_name).get() publication_id = str(art['id']) query = Article.objects( Q(url=art['absoluteUrl']) | Q(publication=publication, pub_art_id=publication_id)) if len(query) > 0: # Duplicate!!! return None article = Article() article.title = art['title'] article.summary = art['summary'] article.text = art['text'] article.publish_date = art['publication'] article.url = art['absoluteUrl'] if type(art['author']) == str: article.author = [art['author']] elif type(art['author']) == list: article.author = art['author'] else: print('author error') # article.keywords = art['keywords'] # article.categories = art['title'] article.publication = publication article.pub_art_id = publication_id return article
def put(self, pub_no): try: cle = ndb.Key("Publication", int(pub_no)) pub = cle.get() pub_json = json.loads(self.request.body) if pub is None: status = 201 pub = Publication(key=cle) else: if int(pub_json['noCreateur']) == int(pub.noCreateur): status = 200 pub.texte = str(pub_json['texte']) pub.date = datetime.datetime.strptime(pub_json["date"], '%Y-%m-%d') pub.noBabillard = int(pub_json['noCreateur']) pub.put() else: status = 400 return pub_dict = pub.to_dict() pub_dict["no"] = pub.key.id() pub_json = json.dumps(pub_dict, default=serialiser_pour_json) self.response.set_status(status) self.response.headers['Content-Type'] = ('application/json;' + ' charset=utf-8') self.response.out.write(pub_json) # Exceptions en lien avec les données fournies. except (db.BadValueError, ValueError, KeyError): logging.error("%s", traceback.format_exc()) self.error(400) # Exceptions graves lors de l'exécution du code. except Exception: logging.error("%s", traceback.format_exc()) self.error(500)
def index(request): """ Index view It can be accessed via GET: when we first access the web page It can also be accessed via POST: when the form is executed The view behaviour depends on the http method making the call: get or post. Let's take a look at the implementation! """ # if it is called through post method, register the publication will be needed if request.method == 'POST': # getting the arguments via POST username = request.POST.get('username', '') email = request.POST.get('email', '') note = request.POST.get('note', '') # check whether or not the fields have a real value # it also has to be checked here taking into account the fact of how easy avoid the front-end security through the browser console is if username.strip() and email.strip(): # if the user has not filled the note out, a generic message is added if not note.strip(): note = 'This user did not want to write a note!' # this is the Django ORM method used to insert elements into the database Publication(username=username, email=email, note=note).save() # redirect to the publications' page return redirect("/test/publications") # applications context: in this case it only contains the form context = {'form': PublicationForm()} # if it is called via get, the landing page will be shown normally return render(request, 'index.html', context)
def post(self, request, *args, **kwargs): self.form = self.form_class(request.POST) # self.SetFormUser (request) # context = {'form': self.form} # ib = request.session['requst_POST'] # aaaaaaaaaa = ffffffffff if self.form.is_valid(): cd = self.form.cleaned_data self.db_error = False try: Publication(date=datetime.now(), text=cd['message'], author=request.user).save() # form = MsgForm() except DatabaseError: self.form = self.form_class(request.POST) self.db_error = exc_info()[1].message return super(BlogMainView, self).get( request ) # self.render_to_response(self.get_context_data(context)) # return HttpResponseRedirect(reverse('blogclass')) return super(BlogMainView, self).get( request ) # self.render_to_response(self.get_context_data(context))
def publications_form(id=None): if id: publication = Publication.get(id) else: publication = Publication() publication.user_created = session["id"] if request.method == 'POST': form = PublicationForm(request.form, obj=publication) if id else PublicationForm( request.form) if form.validate(): form.populate_obj(publication) publication.save() flash('Your publication has been saved') return redirect(url_for('publication')) else: form = PublicationForm(obj=publication) if id else PublicationForm() return render_template('publications/form.html', form=form, publication=publication)
def create_publications(): connect(config.database['db_name'], host=config.database['host'], port=config.database['port']) print('Verificando publicaciones') for pub_dict in config.publications: pub_list = Publication.objects(name=pub_dict.get('name')) if len(pub_list) == 0: new_pub = Publication(name=pub_dict.get('name'), url=pub_dict.get('url'), location=pub_dict.get('location'), fetch_method=pub_dict.get('fetch_method'), api_url=pub_dict.get('api_url')) new_pub.save() print(f'Publication creada: {pub_dict.get("name")}') else: exitent_pub = pub_list.get() exitent_pub.name = pub_dict.get('name') exitent_pub.url = pub_dict.get('url') exitent_pub.location = pub_dict.get('location') exitent_pub.fetch_method = pub_dict.get('fetch_method') exitent_pub.api_url = pub_dict.get('api_url') exitent_pub.save() print(f'Publication Modificada: {pub_dict.get("name")}') pubs = Publication.objects() print(f'Total de publicaciones en la db: {len(pubs)}') for p in pubs: print(f'- {p.name}') print('#################################')
def save_training_tokens(self): already_tokenized_ids, n_files = self.get_tokenized_articles_list() print(f'Found {len(already_tokenized_ids)} already tokenized articles') try: publication = Publication.objects(name=self.publication_name).get() except: print( 'Problemas buscando publicación. Creo la publicación? Tiene el mismo nombre en config y config_train' ) return -1 print(publication) articles = Article.objects( publication=publication).order_by('-publish_date') N = articles.count() if N == 0: print( f'No hay articulos en la base de datos. Correr fetch_articles') return -1 else: print(f'Total number to tokenize: {N}') # N_chunks = np.ceil(N/self.chunk_size) texts = [] titles = [] texts_titles = [] ids = [] # Es necesario para arrancar con la cantidad que había +1 chunk = n_files training_data_folder = self.get_training_folder() if not os.path.exists(training_data_folder): os.makedirs(training_data_folder) for i, article in enumerate(articles): if len(ids) % self.chunk_size == 0 and len(ids) != 0: chunk += 1 file_name = f'{training_data_folder}all_{chunk}.npy' np.save(file_name, texts_titles) texts_titles = [] file_name = f'{training_data_folder}titles_{chunk}.npy' np.save(file_name, titles) titles = [] file_name = f'{training_data_folder}content_{chunk}.npy' np.save(file_name, texts) texts = [] print() print(f'{file_name} saved!') file_name_ids = f'{training_data_folder}ids_{chunk}.npy' np.save(file_name_ids, ids) ids = [] if str(article['id']) not in already_tokenized_ids: text, title = Train.article2text(article) print(f'\r{i}/{N}', end=' ') doc_text = self.nlp(text) doc_title = self.nlp(title) tokens_text = RelatedArticles.doc2tokens(doc_text) tokens_title = RelatedArticles.doc2tokens(doc_title) texts.append(tokens_text) titles.append(tokens_title) texts_titles.append(tokens_title + ['\n'] + tokens_text) ids.append(str(article['id'])) if len(ids) > 0: chunk += 1 file_name = f'{training_data_folder}all_{chunk}.npy' np.save(file_name, texts_titles) texts_titles = [] file_name = f'{training_data_folder}titles_{chunk}.npy' np.save(file_name, titles) titles = [] file_name = f'{training_data_folder}content_{chunk}.npy' np.save(file_name, texts) texts = [] print() print(f'{file_name} saved!') file_name_ids = f'{training_data_folder}ids_{chunk}.npy' np.save(file_name_ids, ids) return 0
def post(self): if not self.user.administrator: return webapp2.redirect("/") mode = self.request.POST["mode"] if mode == "0": # Institution institution = Institution(name=self.request.POST["name"], website=self.request.POST["website"]) institution.put() elif mode == "1": thumbnail_url = self.request.POST["thumbnail"] try: content = urllib2.urlopen(thumbnail_url) image = content.read() except urllib2.HTTPError: logging.warning("URL: " + thumbnail_url + "was not found.") image = "" institution = ndb.Key(urlsafe=self.request.POST["institution"]) author = Author( name=self.request.POST["name"], website=self.request.POST["website"], thumbnail=image, institution=institution, ) author.put() elif mode == "2": # Conference conference = Conference(name=self.request.POST["name"], acronym=self.request.POST["acronym"]) conference.put() pass elif mode == "3": # Publication date = datetime.strptime(self.request.POST["date"], "%Y-%m-%d") # A bit messy, does author order authors = self.request.params.getall("authors") idx = 0 author_order = [int(order_idx) for order_idx in self.request.POST["order"].split(",")] ordered_authors = [] for author_idx in range(len(authors)): ordered_authors.append(ndb.Key(urlsafe=authors[author_order[author_idx] - 1])) conference = ndb.Key(urlsafe=self.request.POST["conference"]) pdf_image_url = self.request.POST["pdfimage"] image = "" if pdf_image_url: try: content = urllib2.urlopen(pdf_image_url) image = content.read() except urllib2.HTTPError: logging.warning("URL: " + pdf_image_url + "was not found.") publication = Publication( title=self.request.POST["title"], abstract=self.request.POST["abstract"], date=date, authors=ordered_authors, citation=self.request.POST["citation"], conference=conference, pdf=self.request.POST["pdf"], pdf_image=image, arxiv_link=self.request.POST["arxiv"], project_page=self.request.POST["projectpage"], ) publication.put() elif mode == "4": # Content content = Content(name=self.request.POST["name"], content=self.request.POST["content"]) content.put() elif mode == "5": # Project authors = [] for author in self.request.params.getall("authors"): authors.append(ndb.Key(urlsafe=author)) image_url = self.request.POST["image"] if image_url: try: content = urllib2.urlopen(image_url) image = content.read() except urllib2.HTTPError: logging.warning("URL: " + image_url + "was not found.") image = "" else: image = "" publications = [] for publication in self.request.params.getall("publications"): publications.append(ndb.Key(urlsafe=publication)) contents = [] for content in self.request.params.getall("contents"): contents.append(ndb.Key(urlsafe=content)) tags = [] for tag in self.request.POST["tags"].split(","): # Try to find tag. stripped_tag = tag.strip() query = Tag.query(Tag.name == stripped_tag) if query.count() == 1: query_tag = query.get(keys_only=True) tags.append(query_tag) elif query.count() == 0: query_tag = Tag(name=stripped_tag) tags.append(query_tag.put()) else: logging.error("Tag count > 1 | < 0 (%s)." % stripped_tag) project = Project( title=self.request.POST["title"], description=self.request.POST["description"], authors=authors, image=image, publications=publications, extra_content=contents, tags=tags, ) project.put() return self.get()
def get_publications(self, disease_slug): publications = Publication.get_for_disease(disease_slug) return publications
def create(): form = RegistrationForm() if form.validate() is False: return render_template('news/new.html', form=form) else: publication = Publication() publication.title = form.title.data subject_query = PublicationSubject.query.filter_by( name=form.subject.data) if (subject_query.first()): publication.subject_id = subject_query.first().id else: subject = PublicationSubject() subject.name = form.subject.data db.session.add(subject) db.session.commit() publication.subject_id = subject.id publication.text_content = form.text_content.data publication.text_call = form.text_call.data publication.last_modification = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d') publication.show_home = form.show_home.data publication.thumb = form.thumb.data publication.active = 0 publication.author = form.author.data db.session.add(publication) db.session.commit() message = u'Muito obrigado! Sua notícia foi submetida com sucesso!' flash(message, 'success') return redirect(url_for('news.admin'))
def scrape_pubmed(disease, query): publication_list_url = "%s&term=%s&retmax=%s" % (base_url, query, ret_max) dom = get_dom(publication_list_url) count = int(extract_value(get_text(dom, "Count"), "Count")) num_results = min(ret_max, count) for i in range(num_results): #retrieve the first xml tag (<tag>data</tag>) that the parser finds with name tagName: publication_id = get_value(dom, "Id", i) publication_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=%s" % publication_id publication_dom = get_dom(publication_url) # extract features title = get_value(publication_dom, "ArticleTitle") affiliation = get_value(publication_dom, "Affiliation") year = int(get_value(publication_dom, "Year")) month = int(get_value(publication_dom, "Month")) day = int(get_value(publication_dom, "Day")) pub_date = date(year, month, day) # get all author_names author_names = [] def getText(nodelist): rc = [] for node in nodelist: if node.nodeType == node.TEXT_NODE: rc.append(node.data) return ''.join(rc) def handleLastName(lastName): lastNameOut = getText(lastName.childNodes) return lastNameOut def handleForeName(foreName): foreNameOut = getText(foreName.childNodes) return foreNameOut def processAuthorElem(elem): if len(elem.getElementsByTagName("LastName")) != 0: last_name = handleLastName(elem.getElementsByTagName("LastName")[0]) fore_name = handleForeName(elem.getElementsByTagName("ForeName")[0]) author_names.append("%s, %s" % (last_name, fore_name)) def processAuthors(dom): author_elems = dom.getElementsByTagName("Author") for elem in author_elems: processAuthorElem(elem) processAuthors(publication_dom) # create author models for name in author_names: author = Author.get_or_insert(name, name=name) # TODO: fix this so it's appending instead of overwriting author.diseases = [disease] author.put() # create publication model pub = Publication.get_or_insert(publication_id, pubmed_id=publication_id) pub.title = title pub.publication_date = pub_date pub.author_names = author_names pub.affiliation = affiliation # TODO: fix this so it's appending instead of overwriting pub.diseases = [disease] pub.put()
def add(search_query, author, title): fl = [ 'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype", "identifier", "links_data" ] if author: search_query += "author:" + author if title: search_query += "title:" + title papers = list(ads.SearchQuery(q=search_query, fl=fl)) if len(papers) == 0: selection = ads.search.Article exit() elif len(papers) == 1: selection = papers[0] # type:ads.search.Article else: # first_ten = itertools.islice(papers, 10) first_ten = papers[:10] single_paper: ads.search.Article for index, single_paper in enumerate(first_ten): print(index, single_paper.title[0], single_paper.first_author) selected_index = click.prompt('select paper', type=int) selection = papers[selected_index] # type:ads.search.Article assert len(selection.doi) == 1 doi = selection.doi[0] try: paper = Paper.get(Paper.doi == doi) print("this paper has already been added") exit(1) except peewee.DoesNotExist: pass print("fetching bibcode") q = ads.ExportQuery([selection.bibcode]) bibtex = q.execute() print("saving in db") paper = Paper() assert len(selection.title) == 1 paper.doi = doi paper.title = selection.title[0] paper.abstract = selection.abstract paper.bibcode = selection.bibcode paper.year = selection.year paper.pubdate = selection.pubdate paper.pdf_downloaded = False paper.first_author = Author.get_or_create(name=selection.first_author)[0] paper.publication = Publication.get_or_create(name=selection.pub)[0] paper.doctype = Doctype.get_or_create(name=selection.doctype)[0] paper.arxiv_identifier = [ ident for ident in selection.identifier if "arXiv:" in ident ][0].split("arXiv:")[-1] paper.bibtex = bibtex links = [json.loads(string) for string in selection.links_data] print(links) paper.save() authors = [Author.get_or_create(name=name)[0] for name in selection.author] for author in db.batch_commit(authors, 100): PaperAuthors.create(author=author, paper=paper) keywords = [ Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword ] for keyword in db.batch_commit(keywords, 100): PaperKeywords.create(keyword=keyword, paper=paper) print("fetching PDF") arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier) r = requests.get(arxiv_url, stream=True) print(arxiv_url) with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f: chunk_size = 1024 # bytes file_size = int(r.headers.get('content-length', 0)) progress_length = math.ceil(file_size // chunk_size) with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks: for chunk in progress_chunks: f.write(chunk) paper.pdf_downloaded = True paper.save()
def crawl_author(): """ Crawls Google Scholar in order to retrieve information about an author. """ # The ID of the author in Google Scholar. scholar_id = request.form['scholar_id'] print 'Crawl author ' + scholar_id + '.' # Retrieve the author with that ID (if any). author = Author.query.filter_by(scholar_id = scholar_id).first() if author is None: author = Author() cookie_jar = CookieJar() opener = build_opener(HTTPCookieProcessor(cookie_jar)) install_opener(opener) url = 'https://scholar.google.com/citations'; params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate', 'user': scholar_id, 'cstart': 0, 'pagesize': 20}) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) no_content = doc.xpath('.//div[contains(text(), "Sorry, no content found for this URL")]') if len(no_content): print 'Author ' + scholar_id + ' not found.' return 'Done.' author.scholar_id = scholar_id nname = doc.find('.//div[@id="gsc_prf_in"]') if nname is not None: # The name of the author. author.name = nname.text_content() nemaildomain = doc.find('.//div[@id="gsc_prf_ivh"]') if nemaildomain is not None: # The domain where the author has an email. author.email_domain = nemaildomain.text_content().split(" - ")[0].split()[-1] ncitations = doc.find('.//table[@id="gsc_rsb_st"]') if ncitations is not None: # The total citations for the author. author.total_citations = ncitations.xpath('.//tr[2]/td')[1].text # The h-index for the author. author.h_index = ncitations.xpath('.//tr[3]/td')[1].text # The i10-index for the author. author.i10_index = ncitations.xpath('.//tr[4]/td')[1].text params = urlencode({'hl': 'en', 'view_op': 'citations_histogram', 'user': scholar_id}) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) # The citations per year for the author. author_citations_per_year = [] nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]') if nhistogram is not None: years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')] for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'): i = int(a.get('style').split('z-index:')[1]) year = int(years[-i]) citations_per_year = AuthorCitationsPerYear.query.filter_by(author_id = author.id, year = year).first() if citations_per_year is None: citations_per_year = AuthorCitationsPerYear() citations_per_year.year = int(years[-i]) citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text) author_citations_per_year.append(citations_per_year) author.citations_per_year = author_citations_per_year params = urlencode({'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id}) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) # The co-authors of the author. author_coauthors = [] for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'): co_scholar_id = a.get('href').split('user='******'&hl')[0] coauthor = Author.query.filter_by(scholar_id = co_scholar_id).first() if coauthor is None: coauthor = Author() coauthor.scholar_id = co_scholar_id author_coauthors.append(coauthor) author.coauthors = author_coauthors # The publications. author_publications = [] cstart = 0 pagesize = 100 while True: params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate', 'user': scholar_id, 'cstart': cstart, 'pagesize': pagesize}) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'): a = tr.find('.//td[@class="gsc_a_t"]//a') # NOTE: When there are no publications, there is a single tr. # <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr> if a is None: continue purl = a.get('href') # The ID of the publication in Google Scholar. pub_scholar_id = purl.split('citation_for_view=')[1] # Retrieve the publication with that ID (if any). publication = Publication.query.filter_by(scholar_id = pub_scholar_id).first() if publication is None: publication = Publication() publication.scholar_id = pub_scholar_id # The title of the publication. publication.title = a.text_content() pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span') if pub_nyear is not None: year_of_publication = pub_nyear.text_content().strip() if year_of_publication: # The year of the publication. publication.year_of_publication = int(year_of_publication) pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]') if pub_ncitations is not None: total_citations = pub_ncitations.text_content().strip() if total_citations: # The total citations for the publication. publication.total_citations = int(total_citations) author_publications.append(publication) if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"): break cstart += 100 author.publications = author_publications # When information about the author was retrieved from Google Scholar. author.retrieved_at = datetime.datetime.now() db.session.add(author) db.session.commit() print 'Crawled author ' + scholar_id + '.' return 'Done.'
def testdb(): for user in User.select(): for publi in Publication.select(): if publi.user_created == user: print(publi.user_created.id)
def fetch_articles(publication_name, art_to_db=wordpress_to_db, get_url=get_wp_url, get_articles=get_wp_articles, api_url=None, per_page=50, starting_page=1, date_after=None, date_before=None): """ Fetch articles from publication Fetch articles from publication by date and independent of publication (wordpress, iProfesional). Saves them to mongodb database Parameters: publication_name (str): name of publication in db. Needs it to get api url from db art_to_db (func): iProfesional_to_db or wordpress_to_db are the only supported for the moment get_url (func): get_wp_url or get_iProfesional_url are the only supported for the moment get_articles (func): get_iProfesional_articles or get_wp_articles are the only supported for the moment api_url (str): if not None rewrite api_url in db per_page (int): number of articles per page starting_page (int): page number - First page is 1 date_after (str): Get articles after this date with format: '%Y-%m-%d' date_before (str): Get articles before this date with format: '%Y-%m-%d' Returns: None """ publication = Publication.objects(name=publication_name).get() if api_url is not None: publication.api_url = api_url publication.save() articles = Article.objects( publication=publication).order_by('-publish_date').limit(1).first() if articles is None or len(articles) == 0: # No hay articulos print('No articles') if date_after is None: date_after = datetime.date.fromtimestamp(-10000000000) else: if date_after is None: date_after = articles['publish_date'] if date_before is None: date_before = datetime.datetime.now() url_endpoint = publication.api_url if url_endpoint is None: print('api_url not defined in publication') return page = starting_page total_pages = None while True: url = get_url(url_endpoint, page, per_page, date_after, date_before) # url = f'{url_endpoint}posts?page={page}&per_page={per_page}&orderby=date&order=asc&after={date_str}' if total_pages: print(f'\rPage: {page}/{total_pages} - {url}', end='') else: print(f'\rPage: {page} - {url}', end='') response = requests.get(url) articles, total_pages = get_articles( response) # int(response.headers['X-WP-TotalPages']) for article in articles: art = art_to_db(article, publication_name) # print(art.publish_date) if art is not None: art.save() else: print('\rAlready in DB') if page == total_pages or total_pages == 0: break page += 1 if 'code' in articles: print() print(articles['code']) break
def crawl_publication(): """ Crawls Google Scholar in order to retrieve information about a publication. """ # The ID of the publication in Google Scholar. scholar_id = request.form['scholar_id'] print 'Crawl publication ' + scholar_id + '.' url = 'https://scholar.google.com/citations'; publication = Publication.query.filter_by(scholar_id = scholar_id).first() if publication is None: publication = Publication() cookie_jar = CookieJar() opener = build_opener(HTTPCookieProcessor(cookie_jar)) install_opener(opener) url = 'https://scholar.google.com/citations'; params = urlencode({'hl': 'en', 'view_op': 'view_citation', 'citation_for_view': scholar_id}) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) publication.scholar_id = scholar_id ntitle = doc.find('.//a[@class="gsc_title_link"]') if ntitle is not None: # The title of the publication. publication.title = ntitle.text_content() ntype = doc.find('.//div[@class="gs_scl"][3]//div[@class="gsc_field"]') if ntype is not None: # The type of the publication. publication.type = ntype.text_content() if publication.type == 'Description': publication.type = 'Other' nyear = doc.xpath('.//div[text()="Publication date"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]') if nyear is not None and len(nyear): # The year of the publication. publication.year_of_publication = int(nyear[0].text.split('/')[0]) ncitations = doc.xpath('.//div[text()="Total citations"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]//a') if ncitations is not None and len(ncitations): # The total citations for the publication. publication.total_citations = ncitations[0].text.split(' ')[-1] nauthors = doc.xpath('.//div[text()="Authors"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]') if nauthors is not None and len(nauthors): # The authors of the publication. publication.author_names = nauthors[0].text # The citations per year for the publication. publication_citations_per_year = [] nhistogram = doc.find('.//div[@id="gsc_graph_bars"]') if nhistogram is not None: years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')] for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'): i = int(a.get('style').split('z-index:')[1]) year = int(years[-i]) citations_per_year = PublicationCitationsPerYear.query.filter_by(publication_id = publication.id, year = year).first() if citations_per_year is None: citations_per_year = PublicationCitationsPerYear() citations_per_year.year = int(years[-i]) citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text) publication_citations_per_year.append(citations_per_year) publication.citations_per_year = publication_citations_per_year # When information about the author was retrieved from Google Scholar. publication.retrieved_at = datetime.datetime.now() db.session.add(publication) db.session.commit() print 'Crawled publication ' + scholar_id + '.' return 'Done.'
def create(): form = RegistrationForm() if form.validate() is False: form.set_choices() return render_template('news/new.html', form=form) else: publication = Publication() publication.title = form.title.data publication.text_call = form.text_call.data publication.last_modification = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d') publication.show_home = form.show_home.data publication.active = 0 publication.author = form.author.data publication.language = form.language.data publication.add_subjects(form.subject.data, form.language.data) if form.thumb_src.data: publication.thumb_src = form.thumb_src.data db.session.add(publication) db.session.flush() text_content = upload_images_to_s3(form.text_content.data, mod.name, publication.id) Publication.query.get(publication.id).text_content = text_content clean_s3_folder(text_content, mod.name, publication.id) if len(form.thumb.data.split(',')) > 1: upload_folder = os.path.join(app.config['UPLOAD_FOLDER'], mod.name, str(publication.id), 'images') publication.thumb = save_b64_image( form.thumb.data.split(',')[1], upload_folder, 'thumb') db.session.commit() log_operation(module=mod.name, operation='create', user=(g.user.id, g.user.email), objs=[(publication.id, publication.title)]) message = u'Muito obrigado! Sua notícia foi submetida com sucesso!' flash(message, 'success') return redirect(url_for('news.admin'))
def publi_create(): publi = Publication.create(title='osef', body='essai', username=user) assert publi.user_created == datetime.datetime.now
def crawl_publication(): """ Crawls Google Scholar in order to retrieve information about a publication. """ # The ID of the publication in Google Scholar. scholar_id = request.form['scholar_id'] print 'Crawl publication ' + scholar_id + '.' url = 'https://scholar.google.com/citations' publication = Publication.query.filter_by(scholar_id=scholar_id).first() if publication is None: publication = Publication() cookie_jar = CookieJar() opener = build_opener(HTTPCookieProcessor(cookie_jar)) install_opener(opener) url = 'https://scholar.google.com/citations' params = urlencode({ 'hl': 'en', 'view_op': 'view_citation', 'citation_for_view': scholar_id }) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) publication.scholar_id = scholar_id ntitle = doc.find('.//a[@class="gsc_title_link"]') if ntitle is not None: # The title of the publication. publication.title = ntitle.text_content() ntype = doc.find('.//div[@class="gs_scl"][3]//div[@class="gsc_field"]') if ntype is not None: # The type of the publication. publication.type = ntype.text_content() if publication.type == 'Description': publication.type = 'Other' nyear = doc.xpath( './/div[text()="Publication date"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]' ) if nyear is not None and len(nyear): # The year of the publication. publication.year_of_publication = int(nyear[0].text.split('/')[0]) ncitations = doc.xpath( './/div[text()="Total citations"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]//a' ) if ncitations is not None and len(ncitations): # The total citations for the publication. publication.total_citations = ncitations[0].text.split(' ')[-1] nauthors = doc.xpath( './/div[text()="Authors"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]' ) if nauthors is not None and len(nauthors): # The authors of the publication. publication.author_names = nauthors[0].text # The citations per year for the publication. publication_citations_per_year = [] nhistogram = doc.find('.//div[@id="gsc_graph_bars"]') if nhistogram is not None: years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')] for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'): i = int(a.get('style').split('z-index:')[1]) year = int(years[-i]) citations_per_year = PublicationCitationsPerYear.query.filter_by( publication_id=publication.id, year=year).first() if citations_per_year is None: citations_per_year = PublicationCitationsPerYear() citations_per_year.year = int(years[-i]) citations_per_year.citations = int( a.xpath('./span[@class="gsc_g_al"]')[0].text) publication_citations_per_year.append(citations_per_year) publication.citations_per_year = publication_citations_per_year # When information about the author was retrieved from Google Scholar. publication.retrieved_at = datetime.datetime.now() db.session.add(publication) db.session.commit() print 'Crawled publication ' + scholar_id + '.' return 'Done.'
def labadmin(request): try: if request.user.get_profile().utype < 2: return HttpResponseForbidden() except: return HttpResponseForbidden() pinameAdmin = request.user.username form = UeditorModelForm(request.POST) coverselect = Cover.objects.filter(username=pinameAdmin) articleselect = Article.objects.filter(username=pinameAdmin) try: labobj = Lab.objects.get(pi=pinameAdmin) except: labobj = Lab(pi=pinameAdmin) labobj.save() if 'cover' in request.FILES: img = request.FILES['cover'] #newimg = Image.open(img) #cover = newimg.thumbnail((128,128),Image.ANTIALIAS) if coverselect: coverobj = Cover.objects.get(username=pinameAdmin) coverobj.coverimg = img coverobj.save() else: coverobj = Cover(username=pinameAdmin,coverimg=img) coverobj.save() labobj.cover_set.add(coverobj) labobj.save() if 'content' in request.POST: text = request.POST['content'] #form = UeditorModelForm(request.POST) #text = form.cd['content'] if articleselect: articleobj = Article.objects.get(username=pinameAdmin) #form = UeditorModelForm(instance=articleobj) articleobj.content = text articleobj.save() return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request)) else: articleobj = Article(username=pinameAdmin,content=text) articleobj.save() labobj.article_set.add(articleobj) labobj.save() return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request)) if 'author' in request.POST: authorraw = request.POST['author'] paperraw = request.POST['paper'] urlraw = request.POST['url'] cellraw = request.POST['cell'] pmidraw = request.POST['pmid'] if 'id' in request.POST: idraw = request.POST['id'] pubobj = Publication.objects.get(id=idraw) pubobj.author = authorraw pubobj.paper = paperraw pubobj.url = urlraw pubobj.cell = cellraw pubobj.pmid = pmidraw pubobj.save() else: pubobj = Publication(username=pinameAdmin,author=authorraw,paper=paperraw,url=urlraw,cell=cellraw,pmid=pmidraw) pubobj.save() labobj.publication_set.add(pubobj) labobj.save() if 'album' in request.FILES: photoraw = request.FILES['album'] albumobj = Album(username=pinameAdmin,photo=photoraw) albumobj.save() labobj.album_set.add(albumobj) labobj.save() if coverselect: covertarget = Cover.objects.get(username=pinameAdmin) else: covertarget = 0 if articleselect: articletarget = Article.objects.get(username=pinameAdmin) else: articletarget = 0 pubtarget = Publication.objects.filter(username__contains=pinameAdmin) labobj = Lab.objects.get(pi=pinameAdmin) membertarget = labobj.labrelated.all() albumtarget = Album.objects.filter(username__contains=pinameAdmin) return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request))
def crawl_author(): """ Crawls Google Scholar in order to retrieve information about an author. """ # The ID of the author in Google Scholar. scholar_id = request.form['scholar_id'] print 'Crawl author ' + scholar_id + '.' # Retrieve the author with that ID (if any). author = Author.query.filter_by(scholar_id=scholar_id).first() if author is None: author = Author() cookie_jar = CookieJar() opener = build_opener(HTTPCookieProcessor(cookie_jar)) install_opener(opener) url = 'https://scholar.google.com/citations' params = urlencode({ 'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate', 'user': scholar_id, 'cstart': 0, 'pagesize': 20 }) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) no_content = doc.xpath( './/div[contains(text(), "Sorry, no content found for this URL")]') if len(no_content): print 'Author ' + scholar_id + ' not found.' return 'Done.' author.scholar_id = scholar_id nname = doc.find('.//div[@id="gsc_prf_in"]') if nname is not None: # The name of the author. author.name = nname.text_content() nemaildomain = doc.find('.//div[@id="gsc_prf_ivh"]') if nemaildomain is not None: # The domain where the author has an email. author.email_domain = nemaildomain.text_content().split( " - ")[0].split()[-1] ncitations = doc.find('.//table[@id="gsc_rsb_st"]') if ncitations is not None: # The total citations for the author. author.total_citations = ncitations.xpath('.//tr[2]/td')[1].text # The h-index for the author. author.h_index = ncitations.xpath('.//tr[3]/td')[1].text # The i10-index for the author. author.i10_index = ncitations.xpath('.//tr[4]/td')[1].text params = urlencode({ 'hl': 'en', 'view_op': 'citations_histogram', 'user': scholar_id }) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) # The citations per year for the author. author_citations_per_year = [] nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]') if nhistogram is not None: years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')] for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'): i = int(a.get('style').split('z-index:')[1]) year = int(years[-i]) citations_per_year = AuthorCitationsPerYear.query.filter_by( author_id=author.id, year=year).first() if citations_per_year is None: citations_per_year = AuthorCitationsPerYear() citations_per_year.year = int(years[-i]) citations_per_year.citations = int( a.xpath('./span[@class="gsc_g_al"]')[0].text) author_citations_per_year.append(citations_per_year) author.citations_per_year = author_citations_per_year params = urlencode({ 'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id }) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) # The co-authors of the author. author_coauthors = [] for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'): co_scholar_id = a.get('href').split('user='******'&hl')[0] coauthor = Author.query.filter_by(scholar_id=co_scholar_id).first() if coauthor is None: coauthor = Author() coauthor.scholar_id = co_scholar_id author_coauthors.append(coauthor) author.coauthors = author_coauthors # The publications. author_publications = [] cstart = 0 pagesize = 100 while True: params = urlencode({ 'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate', 'user': scholar_id, 'cstart': cstart, 'pagesize': pagesize }) req = Request(url + '?' + params) opener.open(req) res = opener.open(req) doc = html.parse(res) for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'): a = tr.find('.//td[@class="gsc_a_t"]//a') # NOTE: When there are no publications, there is a single tr. # <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr> if a is None: continue purl = a.get('href') # The ID of the publication in Google Scholar. pub_scholar_id = purl.split('citation_for_view=')[1] # Retrieve the publication with that ID (if any). publication = Publication.query.filter_by( scholar_id=pub_scholar_id).first() if publication is None: publication = Publication() publication.scholar_id = pub_scholar_id # The title of the publication. publication.title = a.text_content() pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span') if pub_nyear is not None: year_of_publication = pub_nyear.text_content().strip() if year_of_publication: # The year of the publication. publication.year_of_publication = int(year_of_publication) pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]') if pub_ncitations is not None: total_citations = pub_ncitations.text_content().strip() if total_citations: # The total citations for the publication. publication.total_citations = int(total_citations) author_publications.append(publication) if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"): break cstart += 100 author.publications = author_publications # When information about the author was retrieved from Google Scholar. author.retrieved_at = datetime.datetime.now() db.session.add(author) db.session.commit() print 'Crawled author ' + scholar_id + '.' return 'Done.'
def create(): form = RegistrationForm() if form.validate() is False: form.set_choices() return render_template('news/new.html', form=form) else: publication = Publication() publication.title = form.title.data publication.text_call = form.text_call.data publication.last_modification = datetime.now().strftime( '%Y-%m-%d %H:%M:%S') publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d') publication.show_home = form.show_home.data publication.active = 0 publication.author = form.author.data publication.language = form.language.data publication.add_subjects(form.subject.data, form.language.data) if form.thumb_src.data: publication.thumb_src = form.thumb_src.data db.session.add(publication) db.session.flush() text_content = upload_images_to_s3( form.text_content.data, mod.name, publication.id) Publication.query.get(publication.id).text_content = text_content clean_s3_folder(text_content, mod.name, publication.id) if len(form.thumb.data.split(',')) > 1: upload_folder = os.path.join( app.config['UPLOAD_FOLDER'], mod.name, str(publication.id), 'images') publication.thumb = save_b64_image( form.thumb.data.split(',')[1], upload_folder, 'thumb') db.session.commit() log_operation(module=mod.name, operation='create', user=(g.user.id, g.user.email), objs=[(publication.id, publication.title)]) message = u'Muito obrigado! Sua notícia foi submetida com sucesso!' flash(message, 'success') return redirect(url_for('news.admin'))
def create(): form = RegistrationForm() if form.validate() is False: return render_template('news/new.html', form=form) else: publication = Publication() publication.title = form.title.data subject_query = PublicationSubject.query.filter_by(name=form.subject.data) if (subject_query.first()): publication.subject_id = subject_query.first().id else: subject = PublicationSubject() subject.name = form.subject.data db.session.add(subject) db.session.commit() publication.subject_id = subject.id publication.text_content = form.text_content.data publication.text_call = form.text_call.data publication.last_modification = datetime.now().strftime('%Y-%m-%d %H:%M:%S') publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d') publication.show_home = form.show_home.data publication.active = 0 publication.author = form.author.data db.session.add(publication) db.session.flush() if len(form.thumb.data.split(',')) > 1: upload_folder = os.path.join(app.config['UPLOAD_FOLDER'], mod.name, str(publication.id), 'images') publication.thumb = save_b64_image(form.thumb.data.split(',')[1], upload_folder, 'thumb') db.session.commit() message = u'Muito obrigado! Sua notícia foi submetida com sucesso!' flash(message, 'success') return redirect(url_for('news.admin'))
def publications_detail(id): publication = Publication.get(id) return render_template('publications/details.html', publication=publication)
def post(self): with open("twitface.json") as json_file: json_data = json.load(json_file) ndb.delete_multi(Membre.query().fetch(keys_only=True)) ndb.delete_multi(DemandeAmi.query().fetch(keys_only=True)) ndb.delete_multi(Ami.query().fetch(keys_only=True)) ndb.delete_multi(Publication.query().fetch(keys_only=True)) lst_membre = [] lst_demande_ami = [] lst_publication = [] lst_ami = [] for membre in json_data["membres"]: cle = ndb.Key("Membre", int(membre["MemNo"])) m = Membre(key=cle, nom=membre["MemNom"].split(' ')[1], prenom=membre["MemNom"].split(' ')[0], sexe=membre["MemSexe"], dateNaissance=datetime.datetime.strptime( membre["MemDateNaissance"], '%Y-%m-%d'), villeOrigine=membre["MemVilleOrigine"], villeActuelle=membre["MemVilleActuelle"], courriel=membre["MemCourriel"], nomUtil=membre["MemNomUtil"], motPasse=membre["MemMotPasse"]) lst_membre.append(m) for ami in json_data["amis"]: cle1 = ndb.Key('Membre', int(ami["MemNo1"])) cle2 = ndb.Key('Membre', int(ami["MemNo2"])) a = Ami(noAmi1=cle1, noAmi2=cle2, dateAmite=datetime.datetime.strptime( ami["DateAmitie"], '%Y-%m-%d')) lst_membre.append(a) for demAmi in json_data["demandesAmis"]: cle = ndb.Key("Membre", int(demAmi["MemNoInvite"]), "DemandeAmi", int(demAmi["DemAmiNo"])) d = DemandeAmi(key=cle, noDemandeur=int(demAmi["MemNoDemandeur"]), amiDate=datetime.datetime.strptime( demAmi["DemAmiDate"], '%Y-%m-%d')) lst_membre.append(d) for pub in json_data["publications"]: cle = ndb.Key("Publication", int(pub["PubNo"])) p = Publication(key=cle, texte=pub["PubTexte"], noCreateur=int(pub["MemNoCreateur"]), noBabillard=int(pub["MemNoBabillard"]), date=datetime.datetime.strptime( pub["PubDate"], '%Y-%m-%d')) lst_membre.append(p) ndb.put_multi(lst_membre) ndb.put_multi(lst_demande_ami) ndb.put_multi(lst_publication) ndb.put_multi(lst_ami) self.response.set_status(200)