Ejemplo n.º 1
0
    def test_callable(self):
        "Regression for #10349: A callable can be provided as the initial value for an m2m field"

        # Set up a callable initial value
        def formfield_for_dbfield(db_field, **kwargs):
            if db_field.name == 'publications':
                kwargs['initial'] = lambda: Publication.objects.all().order_by(
                    'date_published')[:2]
            return db_field.formfield(**kwargs)

        # Set up some Publications to use as data
        Publication(title="First Book", date_published=date(2007, 1, 1)).save()
        Publication(title="Second Book", date_published=date(2008, 1,
                                                             1)).save()
        Publication(title="Third Book", date_published=date(2009, 1, 1)).save()

        # Create a ModelForm, instantiate it, and check that the output is as expected
        ModelForm = modelform_factory(Article,
                                      formfield_callback=formfield_for_dbfield)
        form = ModelForm()
        self.assertEquals(
            form.as_ul(),
            u"""<li><label for="id_headline">Headline:</label> <input id="id_headline" type="text" name="headline" maxlength="100" /></li>
<li><label for="id_publications">Publications:</label> <select multiple="multiple" name="publications" id="id_publications">
<option value="1" selected="selected">First Book</option>
<option value="2" selected="selected">Second Book</option>
<option value="3">Third Book</option>
</select>  Hold down "Control", or "Command" on a Mac, to select more than one.</li>"""
        )
def save_models(pub_list, requested_author):
    """Saves a publication and the author for this particular publication into the DB"""
    ses = get_session()
    pub_dict = prepare_pubs_for_db(pub_list)
    for pub in pub_dict:
        try:
            p = ses.query(Publication).filter_by(title=pub['title']).one()
        except NoResultFound:
            tasks.notify_subscribed_users(pub)
            p = Publication(pub['title'],
                    pub['venue'], pub['year'], pub['url'])
            p.date_added = datetime.datetime.utcnow().date()
        if len(requested_author.split()) == 1:
            name = ''
            surname = requested_author.split()[0]
        else:
            name = requested_author.split()[0]
            surname = requested_author.split()[-1]
        try:
            author = ses.query(Author).filter_by(name=name).filter_by(surname=surname).one()
        except NoResultFound:
            author = Author(name, surname)
        p.authors.append(author)
        ses.add(p)
        ses.commit()
Ejemplo n.º 3
0
def monuser():
    from faker import Faker
    fake = Faker()
    user = User.create(username='******',
                       password='******',
                       first_name=fake.first_name(),
                       last_name=fake.last_name(),
                       email=fake.email())
    Publication.create(title=fake.sentence(),
                       body=fake.text(),
                       user_created=user)
Ejemplo n.º 4
0
def fakedata():
    from faker import Faker
    fake = Faker()
    for user_ex in range(0, 5):
        user = User.create(username=fake.last_name(),
                           password=fake.password(),
                           first_name=fake.first_name(),
                           last_name=fake.last_name(),
                           email=fake.email())
        for publications_ex in range(0, 10):
            Publication.create(title=fake.sentence(),
                               body=fake.text(),
                               user_created=user)
Ejemplo n.º 5
0
    def get(self):

        disease_counts = Publication.get_disease_counts()
        authors = Author.all()
        pubs = Publication.all()

        template_values = {
            "authors": authors,
            "disease_counts": disease_counts,
            "pubs": pubs
        }

        template = jinja_environment.get_template('admin.html')
        self.response.out.write(template.render(template_values))
    def post(self, pub_no=None):
        try:
            pub = Publication()
            pub_json = json.loads(self.request.body)
            pub.no_babillard = int(pub_json['noBabillard'])
            pub.texte = pub_json['texte']
            pub.date = datetime.datetime.strptime(pub_json['date'], '%Y-%m-%d')
            pub.noBabillard = int(pub_json['noBabillard'])
            pub.noCreateur = int(pub_json['noCreateur'])
            cle_pub = pub.put()

            self.response.set_status(201)
            self.response.headers['Location'] = (self.request.url +
                                                 '/' + str(cle_pub.id()))
            self.response.headers['Content-Type'] = ('application/json;' +
                                                     ' charset=utf-8')
            pub_dict = pub.to_dict()
            pub_dict["no"] = pub.key.id()
            pub_json = json.dumps(pub_dict, default=serialiser_pour_json)
            self.response.out.write(pub_json)

        except (db.BadValueError, ValueError, KeyError):
            logging.error("%s", traceback.format_exc())
            self.error(400)
        except Exception:
            logging.error("%s", traceback.format_exc())
            self.error(500)
Ejemplo n.º 7
0
def create_comment(request, form, reply_to):
    if not form.is_valid():
        return None
    
    text = form.cleaned_data['text']
    title = form.cleaned_data['title']
    
    comment = Comment(title=title, text=text)
    comment.save()    
    reply_pub = Publication.objects.get(pk=reply_to)
    
    pub = Publication(content=comment, reply_to_pub=reply_pub, is_public=True, published_by=request.user.get_profile())
    pub.save()
    return pub 
Ejemplo n.º 8
0
def create_snippet(request, form):
    if not form.is_valid():
        return None
    
    text = form.cleaned_data['text']
    title = form.cleaned_data['title']
    is_public = form.cleaned_data['is_public']
    
    snippet = Snippet(title=title, text=text)
    snippet.save()
    add_tags(snippet, form.cleaned_data['tags'])
    pub = Publication(content=snippet, reply_to_pub=None, is_public=is_public, published_by=request.user.get_profile())
    pub.save()
    
    return pub
Ejemplo n.º 9
0
def test():
    username = "******"
    user = User.select().where(User.username == username).get()
    publications = Publication.select().where(
        Publication.user_created == user.id)
    for publi in publications:
        print(publi)
Ejemplo n.º 10
0
def addActivity(request):
    activity = request.POST.get("Identificador", '')
    try:
        page = UsersPage.objects.get(user = str(request.user))
    except UsersPage.DoesNotExist:
        page = UsersPage(user = str(request.user), name = "pagina de " + str(request.user))
        page.save()
    cts = Activitie.objects.get(id=activity)
    page.activities.add(cts)
    if not exitPub(str(request.user),activity):
        usr = UsersPage.objects.get(user = str(request.user))
        act = Activitie.objects.get(id=activity)
        dat = datetime.now()
        pub = Publication(date = dat,user = usr, activities = act)
        pub.save()
    return HttpResponseRedirect("/todas")
Ejemplo n.º 11
0
def main():
    if not config_found:
        print('config.py file must be present')
        return

    print('config.py found')
    create_publications()
    if len(sys.argv) < 2 and config.fetching_config['publication'] is None:
        print('You need to pass publication_name as argv. For example:')
        print('python fetch_wordpress_articles.py "CNN esp"')
        return

    if len(sys.argv) == 2:
        pub_name = sys.argv[1]
    else:
        pub_name = config.fetching_config['publication']

    pub = Publication.objects(name=pub_name).get()
    print(f'url ro fetch: {pub.api_url}')
    if pub == 'iProfesional':
        art_to_db = iProfesional_to_db
        get_url = get_iProfesional_url
        get_articles = get_iProfesional_articles
    else:
        art_to_db = wordpress_to_db
        get_url = get_wp_url
        get_articles = get_wp_articles
    fetch_articles(pub_name,
                   art_to_db=art_to_db,
                   get_url=get_url,
                   get_articles=get_articles,
                   api_url=pub.api_url,
                   date_after=config.fetching_config['date_after'],
                   date_before=config.fetching_config['date_before'])
Ejemplo n.º 12
0
    def get(self):

        self.templateVars["institutions"] = Institution.query().fetch()
        self.templateVars["authors"] = Author.query().fetch()
        self.templateVars["conferences"] = Conference.query().fetch()
        self.templateVars["publications"] = Publication.query().fetch()
        self.templateVars["contents"] = Content.query().fetch()
        return self.render("admin.html")
Ejemplo n.º 13
0
def publication(username=None):
    if username:  #si on lui passe un username en param
        user = User.select().where(
            User.username == username).get()  #on sélectionne le user
        publications = Publication.select(
        ).where(Publication.user_created == user.id).order_by(
            Publication.created_date.desc())  #on prend la liste de ses publi
    else:  # si on lui passe pas de username
        publications = Publication.select().order_by(
            Publication.created_date.desc(
            ))  #on prend la liste de toutes les publi
    if publications.count() == 0:
        flash("Aucune publication trouvée")
    return object_list('publications/list.html',
                       publications,
                       paginate_by=3,
                       check_bounds=False)
Ejemplo n.º 14
0
def get_articles():
    google = int(flask.request.args.get('google') or 0)
    aws = int(flask.request.args.get('aws') or 0)
    azure = int(flask.request.args.get('azure') or 0)
    selected_pub = (
        flask.request.args.get('pub')
        or Publication.objects(name=config.active_publication).first().id)
    page_num = int(flask.request.args.get('page') or 1)
    per_page = int(flask.request.args.get('count') or 10)
    from_date = flask.request.args.get('from') or '2000-01-01'
    to_date = flask.request.args.get('to') or datetime.now().strftime(
        "%Y-%m-%d")

    #     if to_date is None:
    #         to_date = datetime.now().strftime("%Y-%m-%d")

    #     if from_date is None:
    #         from_date = '2000-01-01'

    cloud_args = {}
    if google == 1:
        cloud_args['ner_google_id__ne'] = None
    if aws == 1:
        cloud_args['ner_aws_id__ne'] = None
    if azure == 1:
        cloud_args['ner_azure_id__ne'] = None

    print(datetime.strptime(from_date, "%Y-%m-%d").date())

    articles_page = Article.objects(
        publication=selected_pub,
        **cloud_args,
        publish_date__gte=datetime.strptime(from_date, "%Y-%m-%d").date(),
        publish_date__lte=datetime.strptime(
            to_date, "%Y-%m-%d").date()).order_by('-publish_date').paginate(
                page=page_num, per_page=per_page)

    pubs = []
    for pub in Publication.objects():
        if selected_pub == str(pub.id):
            selected = 'selected'
        else:
            selected = ''
        pubs.append([pub, selected])
    return articles_page, pubs, cloud_args, from_date, to_date
Ejemplo n.º 15
0
def publish_book(body):
    user = select(user for user in User
                  if user.name == body['username']).first()

    new_book = Publication(title=body['title'],
                           author=user,
                           content=body['content'],
                           status=2)

    print(new_book.title, new_book.content)
Ejemplo n.º 16
0
def publication_delete(id=None):
    if id:
        try:
            publication = Publication.get(id)
        except:
            flash("error")
            return redirect(url_for('publication'))
        publication.delete_instance()
        flash("success")
    return redirect(url_for('publication'))
Ejemplo n.º 17
0
def main(*args, **kwargs):

    for publication in Publication.select():
        yield [[
            publication.year,
            publication.title,
            publication.citation_count,
            publication.author,
        ]]

    raise StopIteration
Ejemplo n.º 18
0
def create_message(request, form):
    if not form.is_valid():
        return None
    
    text = form.cleaned_data['text']
    title = form.cleaned_data['title']
    to = form.cleaned_data['to']
    try:
        referencedUser = User.objects.get(username=to)
    except:
        return None
    
    comment = Comment(title=title, text=text)
    comment.save()
    
    pub = Publication(content=comment, reply_to_pub=None, is_public=False, published_by=request.user.get_profile())
    pub.save()
    
    pub.to.add(referencedUser.get_profile())
    return pub 
Ejemplo n.º 19
0
def main(*args, **kwargs):

    for publication in Publication.select():
        yield [[
            publication.year,
            publication.title,
            publication.citation_count,
            publication.author,
        ]]

    raise StopIteration
Ejemplo n.º 20
0
def get_citation_count_for_queries(queries, api_key):

    # Create a new fetch index.
    last_fetch_index = Publication.select(
        fn.Max(Publication.fetch_index)).scalar() or 0
    fetch_index = last_fetch_index + 1

    for query in queries:

        # Fetch the citation count!
        get_citation_count(query, fetch_index, api_key)
Ejemplo n.º 21
0
def get_citation_count(query, fetch_index, api_key):

    # Request for citation counts for the publication
    params = DEFAULT_PARAMS.copy()
    params['expr'] = (
        "AND(" +  # we will search based on two criteria:
        "Ti=\'{title}\'...," +  # the title prefix
        "Y={year})"  # the publication year
        ).format(title=query['title'], year=int(query['year']))
    response = make_request(
        default_requests_session.get,
        URL,
        params=params,
        headers={'Ocp-Apim-Subscription-Key': api_key},
    )
    time.sleep(REQUEST_DELAY)  # enforce a pause between each fetch to be respectful to API

    # Go no further if the call failed
    if not response:
        return

    publications = response.json()['entities']
    if len(publications) == 0:
        logger.warn("No publications found for title: %s", query['title'])
        return

    # Store data from the fetched publications
    first_publication = publications[0]
    authors = ','.join([author['AuN'] for author in first_publication['AA']])
    Publication.create(
        fetch_index=fetch_index,
        citation_count=first_publication['CC'],
        author=authors,
        year=first_publication['Y'],
        title=first_publication['Ti'],
    )
Ejemplo n.º 22
0
def get_article_by_cms_id(publication_name, cms_id):
    """
    Gets article from publication API by cmd_id
  
    Gets an article from publication API (like wordpress for example). At the moment only support wordpress 
  
    Parameters: 
    publication_name (str): name of publication 
    cms_id (str): id of article to fetch 
  
    Returns: 
    json: article as JSON
    """
    api_url = Publication.objects(name=publication_name).get()['api_url']
    url_by_ids = get_wp_url_by_ids(api_url, [cms_id])
    response = requests.get(url_by_ids)
    return response.json()[0]
Ejemplo n.º 23
0
def wordpress_to_db(art, publication_name):
    """
    Converts wordpress data to mongodb format
  
    Parameters: 
    art (json): wordpress article as JSON
    publication_name (str): name of publication
  
    Returns: 
    json: article as Article instance
    """
    publication = Publication.objects(name=publication_name).get()
    query = Article.objects(
        Q(url=art['link'])
        | Q(publication=publication, pub_art_id=str(art['id'])))
    if len(query) > 0:
        # Duplicate!!!
        return None

    if type(art['content']['rendered']) == bool:
        print('Article with no content')
        return
    article = Article()
    article.title = art['title']['rendered']
    article.summary = art['excerpt']['rendered']
    article.text = art['content']['rendered']
    article.publish_date = datetime.datetime.strptime(art['date'],
                                                      "%Y-%m-%dT%H:%M:%S")
    article.url = art['link']
    if type(art['author']) == str:
        article.author = [art['author']]
    elif type(art['author']) == list:
        article.author = art['author']
    elif type(art['author']) == int:
        article.author = [str(art['author'])]
    else:
        print('author error')
    # article.keywords = art['keywords']
    # article.categories = art['title']
    article.publication = publication
    publication_id = str(art['id'])
    article.pub_art_id = publication_id
    return article
Ejemplo n.º 24
0
def iProfesional_to_db(art, publication_name='iProfesional'):
    """
    Converts iProfesional data to mongodb format
  
    Parameters: 
    art (json): iProfesional article as JSON
    publication_name (str): should be iProfesional
  
    Returns: 
    json: article as Article instance
    """
    publication = Publication.objects(name=publication_name).get()
    publication_id = str(art['id'])
    query = Article.objects(
        Q(url=art['absoluteUrl'])
        | Q(publication=publication, pub_art_id=publication_id))
    if len(query) > 0:
        # Duplicate!!!
        return None
    article = Article()
    article.title = art['title']
    article.summary = art['summary']
    article.text = art['text']
    article.publish_date = art['publication']
    article.url = art['absoluteUrl']
    if type(art['author']) == str:
        article.author = [art['author']]
    elif type(art['author']) == list:
        article.author = art['author']
    else:
        print('author error')
    # article.keywords = art['keywords']
    # article.categories = art['title']
    article.publication = publication

    article.pub_art_id = publication_id
    return article
    def put(self, pub_no):
        try:
            cle = ndb.Key("Publication", int(pub_no))
            pub = cle.get()
            pub_json = json.loads(self.request.body)

            if pub is None:
                status = 201
                pub = Publication(key=cle)
            else:
                if int(pub_json['noCreateur']) == int(pub.noCreateur):
                    status = 200
                    pub.texte = str(pub_json['texte'])
                    pub.date = datetime.datetime.strptime(pub_json["date"],
                                                          '%Y-%m-%d')
                    pub.noBabillard = int(pub_json['noCreateur'])
                    pub.put()
                else:
                    status = 400
                    return

            pub_dict = pub.to_dict()
            pub_dict["no"] = pub.key.id()
            pub_json = json.dumps(pub_dict, default=serialiser_pour_json)

            self.response.set_status(status)
            self.response.headers['Content-Type'] = ('application/json;' +
                                                     ' charset=utf-8')
            self.response.out.write(pub_json)

        # Exceptions en lien avec les données fournies.
        except (db.BadValueError, ValueError, KeyError):
            logging.error("%s", traceback.format_exc())
            self.error(400)

        # Exceptions graves lors de l'exécution du code.
        except Exception:
            logging.error("%s", traceback.format_exc())
            self.error(500)
Ejemplo n.º 26
0
def index(request):
    """
    Index view
    It can be accessed via GET: when we first access the web page
    It can also be accessed via POST: when the form is executed
    The view behaviour depends on the http method making the call: get or post.
    Let's take a look at the implementation!
    """

    # if it is called through post method, register the publication will be needed
    if request.method == 'POST':

        # getting the arguments via POST
        username = request.POST.get('username', '')
        email = request.POST.get('email', '')
        note = request.POST.get('note', '')

        # check whether or not the fields have a real value
        # it also has to be checked here taking into account the fact of how easy avoid the front-end security through the browser console is
        if username.strip() and email.strip():

            # if the user has not filled the note out, a generic message is added
            if not note.strip():
                note = 'This user did not want to write a note!'

            # this is the Django ORM method used to insert elements into the database
            Publication(username=username, email=email, note=note).save()

            # redirect to the publications' page
            return redirect("/test/publications")

    # applications context: in this case it only contains the form
    context = {'form': PublicationForm()}

    # if it is called via get, the landing page will be shown normally
    return render(request, 'index.html', context)
Ejemplo n.º 27
0
 def post(self, request, *args, **kwargs):
     self.form = self.form_class(request.POST)
     #        self.SetFormUser (request)
     #        context = {'form': self.form}
     #        ib = request.session['requst_POST']
     #        aaaaaaaaaa = ffffffffff
     if self.form.is_valid():
         cd = self.form.cleaned_data
         self.db_error = False
         try:
             Publication(date=datetime.now(),
                         text=cd['message'],
                         author=request.user).save()
             # form = MsgForm()
         except DatabaseError:
             self.form = self.form_class(request.POST)
             self.db_error = exc_info()[1].message
             return super(BlogMainView, self).get(
                 request
             )  #  self.render_to_response(self.get_context_data(context)) #
         return HttpResponseRedirect(reverse('blogclass'))
     return super(BlogMainView, self).get(
         request
     )  #  self.render_to_response(self.get_context_data(context))
Ejemplo n.º 28
0
def publications_form(id=None):
    if id:
        publication = Publication.get(id)
    else:
        publication = Publication()
        publication.user_created = session["id"]

    if request.method == 'POST':
        form = PublicationForm(request.form,
                               obj=publication) if id else PublicationForm(
                                   request.form)
        if form.validate():
            form.populate_obj(publication)
            publication.save()
            flash('Your publication has been saved')
            return redirect(url_for('publication'))
    else:
        form = PublicationForm(obj=publication) if id else PublicationForm()
    return render_template('publications/form.html',
                           form=form,
                           publication=publication)
Ejemplo n.º 29
0
def create_publications():
    connect(config.database['db_name'], host=config.database['host'], port=config.database['port'])    
    print('Verificando publicaciones')
    for pub_dict in config.publications:
        pub_list = Publication.objects(name=pub_dict.get('name'))
        if len(pub_list) == 0:
            new_pub = Publication(name=pub_dict.get('name'), url=pub_dict.get('url'), location=pub_dict.get('location'), 
            fetch_method=pub_dict.get('fetch_method'), api_url=pub_dict.get('api_url'))
            new_pub.save()
            print(f'Publication creada: {pub_dict.get("name")}')
        else:
            exitent_pub = pub_list.get()
            exitent_pub.name = pub_dict.get('name')
            exitent_pub.url = pub_dict.get('url')
            exitent_pub.location = pub_dict.get('location')
            exitent_pub.fetch_method = pub_dict.get('fetch_method')
            exitent_pub.api_url = pub_dict.get('api_url')
            exitent_pub.save()
            print(f'Publication Modificada: {pub_dict.get("name")}')
    pubs = Publication.objects()
    print(f'Total de publicaciones en la db: {len(pubs)}')
    for p in pubs:
        print(f'- {p.name}')
    print('#################################')
Ejemplo n.º 30
0
    def save_training_tokens(self):
        already_tokenized_ids, n_files = self.get_tokenized_articles_list()
        print(f'Found {len(already_tokenized_ids)} already tokenized articles')
        try:
            publication = Publication.objects(name=self.publication_name).get()
        except:
            print(
                'Problemas buscando publicación. Creo la publicación? Tiene el mismo nombre en config y config_train'
            )
            return -1
        print(publication)
        articles = Article.objects(
            publication=publication).order_by('-publish_date')
        N = articles.count()

        if N == 0:
            print(
                f'No hay articulos en la base de datos. Correr fetch_articles')
            return -1
        else:
            print(f'Total number to tokenize: {N}')
        # N_chunks = np.ceil(N/self.chunk_size)
        texts = []
        titles = []
        texts_titles = []
        ids = []
        # Es necesario para arrancar con la cantidad que había +1
        chunk = n_files
        training_data_folder = self.get_training_folder()
        if not os.path.exists(training_data_folder):
            os.makedirs(training_data_folder)

        for i, article in enumerate(articles):
            if len(ids) % self.chunk_size == 0 and len(ids) != 0:
                chunk += 1
                file_name = f'{training_data_folder}all_{chunk}.npy'
                np.save(file_name, texts_titles)
                texts_titles = []
                file_name = f'{training_data_folder}titles_{chunk}.npy'
                np.save(file_name, titles)
                titles = []
                file_name = f'{training_data_folder}content_{chunk}.npy'
                np.save(file_name, texts)
                texts = []

                print()
                print(f'{file_name} saved!')
                file_name_ids = f'{training_data_folder}ids_{chunk}.npy'
                np.save(file_name_ids, ids)
                ids = []
            if str(article['id']) not in already_tokenized_ids:
                text, title = Train.article2text(article)
                print(f'\r{i}/{N}', end=' ')
                doc_text = self.nlp(text)
                doc_title = self.nlp(title)
                tokens_text = RelatedArticles.doc2tokens(doc_text)
                tokens_title = RelatedArticles.doc2tokens(doc_title)
                texts.append(tokens_text)
                titles.append(tokens_title)
                texts_titles.append(tokens_title + ['\n'] + tokens_text)
                ids.append(str(article['id']))

        if len(ids) > 0:
            chunk += 1
            file_name = f'{training_data_folder}all_{chunk}.npy'
            np.save(file_name, texts_titles)
            texts_titles = []
            file_name = f'{training_data_folder}titles_{chunk}.npy'
            np.save(file_name, titles)
            titles = []
            file_name = f'{training_data_folder}content_{chunk}.npy'
            np.save(file_name, texts)
            texts = []
            print()
            print(f'{file_name} saved!')
            file_name_ids = f'{training_data_folder}ids_{chunk}.npy'
            np.save(file_name_ids, ids)

        return 0
Ejemplo n.º 31
0
    def post(self):
        if not self.user.administrator:
            return webapp2.redirect("/")

        mode = self.request.POST["mode"]

        if mode == "0":
            # Institution
            institution = Institution(name=self.request.POST["name"], website=self.request.POST["website"])
            institution.put()
        elif mode == "1":
            thumbnail_url = self.request.POST["thumbnail"]
            try:
                content = urllib2.urlopen(thumbnail_url)
                image = content.read()
            except urllib2.HTTPError:
                logging.warning("URL: " + thumbnail_url + "was not found.")
                image = ""

            institution = ndb.Key(urlsafe=self.request.POST["institution"])

            author = Author(
                name=self.request.POST["name"],
                website=self.request.POST["website"],
                thumbnail=image,
                institution=institution,
            )
            author.put()
        elif mode == "2":
            # Conference
            conference = Conference(name=self.request.POST["name"], acronym=self.request.POST["acronym"])
            conference.put()
            pass
        elif mode == "3":
            # Publication
            date = datetime.strptime(self.request.POST["date"], "%Y-%m-%d")

            # A bit messy, does author order
            authors = self.request.params.getall("authors")
            idx = 0
            author_order = [int(order_idx) for order_idx in self.request.POST["order"].split(",")]
            ordered_authors = []
            for author_idx in range(len(authors)):
                ordered_authors.append(ndb.Key(urlsafe=authors[author_order[author_idx] - 1]))

            conference = ndb.Key(urlsafe=self.request.POST["conference"])

            pdf_image_url = self.request.POST["pdfimage"]
            image = ""
            if pdf_image_url:
                try:
                    content = urllib2.urlopen(pdf_image_url)
                    image = content.read()
                except urllib2.HTTPError:
                    logging.warning("URL: " + pdf_image_url + "was not found.")

            publication = Publication(
                title=self.request.POST["title"],
                abstract=self.request.POST["abstract"],
                date=date,
                authors=ordered_authors,
                citation=self.request.POST["citation"],
                conference=conference,
                pdf=self.request.POST["pdf"],
                pdf_image=image,
                arxiv_link=self.request.POST["arxiv"],
                project_page=self.request.POST["projectpage"],
            )
            publication.put()
        elif mode == "4":
            # Content
            content = Content(name=self.request.POST["name"], content=self.request.POST["content"])
            content.put()
        elif mode == "5":
            # Project
            authors = []
            for author in self.request.params.getall("authors"):
                authors.append(ndb.Key(urlsafe=author))

            image_url = self.request.POST["image"]
            if image_url:
                try:
                    content = urllib2.urlopen(image_url)
                    image = content.read()
                except urllib2.HTTPError:
                    logging.warning("URL: " + image_url + "was not found.")
                    image = ""
            else:
                image = ""

            publications = []
            for publication in self.request.params.getall("publications"):
                publications.append(ndb.Key(urlsafe=publication))

            contents = []
            for content in self.request.params.getall("contents"):
                contents.append(ndb.Key(urlsafe=content))

            tags = []
            for tag in self.request.POST["tags"].split(","):
                # Try to find tag.
                stripped_tag = tag.strip()
                query = Tag.query(Tag.name == stripped_tag)
                if query.count() == 1:
                    query_tag = query.get(keys_only=True)
                    tags.append(query_tag)
                elif query.count() == 0:
                    query_tag = Tag(name=stripped_tag)
                    tags.append(query_tag.put())
                else:
                    logging.error("Tag count > 1 | < 0 (%s)." % stripped_tag)

            project = Project(
                title=self.request.POST["title"],
                description=self.request.POST["description"],
                authors=authors,
                image=image,
                publications=publications,
                extra_content=contents,
                tags=tags,
            )
            project.put()
        return self.get()
Ejemplo n.º 32
0
 def get_publications(self, disease_slug):
     publications = Publication.get_for_disease(disease_slug)
     return publications
Ejemplo n.º 33
0
def create():
    form = RegistrationForm()
    if form.validate() is False:
        return render_template('news/new.html', form=form)
    else:
        publication = Publication()
        publication.title = form.title.data
        subject_query = PublicationSubject.query.filter_by(
            name=form.subject.data)

        if (subject_query.first()):
            publication.subject_id = subject_query.first().id
        else:
            subject = PublicationSubject()
            subject.name = form.subject.data
            db.session.add(subject)
            db.session.commit()
            publication.subject_id = subject.id

        publication.text_content = form.text_content.data
        publication.text_call = form.text_call.data
        publication.last_modification = datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d')
        publication.show_home = form.show_home.data
        publication.thumb = form.thumb.data
        publication.active = 0
        publication.author = form.author.data

        db.session.add(publication)
        db.session.commit()

        message = u'Muito obrigado! Sua notícia foi submetida com sucesso!'
        flash(message, 'success')
        return redirect(url_for('news.admin'))
Ejemplo n.º 34
0
def scrape_pubmed(disease, query):
    publication_list_url = "%s&term=%s&retmax=%s" % (base_url, query, ret_max)

    dom = get_dom(publication_list_url)
    count = int(extract_value(get_text(dom, "Count"), "Count"))

    num_results = min(ret_max, count)
    for i in range(num_results):

        #retrieve the first xml tag (<tag>data</tag>) that the parser finds with name tagName:
        publication_id = get_value(dom, "Id", i)
        publication_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=%s" % publication_id
        publication_dom = get_dom(publication_url)

        # extract features
        title = get_value(publication_dom, "ArticleTitle")
        affiliation = get_value(publication_dom, "Affiliation")

        year = int(get_value(publication_dom, "Year"))
        month = int(get_value(publication_dom, "Month"))
        day = int(get_value(publication_dom, "Day"))
        pub_date = date(year, month, day)

        # get all author_names
        author_names = []

        def getText(nodelist):
            rc = []
            for node in nodelist:
                if node.nodeType == node.TEXT_NODE:
                    rc.append(node.data)
            return ''.join(rc)

        def handleLastName(lastName):
            lastNameOut = getText(lastName.childNodes)
            return lastNameOut

        def handleForeName(foreName):
            foreNameOut = getText(foreName.childNodes)
            return foreNameOut

        def processAuthorElem(elem):
            if len(elem.getElementsByTagName("LastName")) != 0:
                last_name = handleLastName(elem.getElementsByTagName("LastName")[0])
                fore_name = handleForeName(elem.getElementsByTagName("ForeName")[0])
                author_names.append("%s, %s" % (last_name, fore_name))

        def processAuthors(dom):
            author_elems = dom.getElementsByTagName("Author")
            for elem in author_elems:
                processAuthorElem(elem)

        processAuthors(publication_dom)

        # create author models
        for name in author_names:
            author = Author.get_or_insert(name, name=name)
            # TODO: fix this so it's appending instead of overwriting
            author.diseases = [disease]

            author.put()

        # create publication model
        pub = Publication.get_or_insert(publication_id, pubmed_id=publication_id)
        pub.title = title
        pub.publication_date = pub_date
        pub.author_names = author_names
        pub.affiliation = affiliation
        # TODO: fix this so it's appending instead of overwriting
        pub.diseases = [disease]

        pub.put()
Ejemplo n.º 35
0
def create():
    form = RegistrationForm()
    if form.validate() is False:
        return render_template('news/new.html', form=form)
    else:
        publication = Publication()
        publication.title = form.title.data
        subject_query = PublicationSubject.query.filter_by(
            name=form.subject.data)

        if (subject_query.first()):
            publication.subject_id = subject_query.first().id
        else:
            subject = PublicationSubject()
            subject.name = form.subject.data
            db.session.add(subject)
            db.session.commit()
            publication.subject_id = subject.id

        publication.text_content = form.text_content.data
        publication.text_call = form.text_call.data
        publication.last_modification = datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d')
        publication.show_home = form.show_home.data
        publication.thumb = form.thumb.data
        publication.active = 0
        publication.author = form.author.data

        db.session.add(publication)
        db.session.commit()

        message = u'Muito obrigado! Sua notícia foi submetida com sucesso!'
        flash(message, 'success')
        return redirect(url_for('news.admin'))
Ejemplo n.º 36
0
def add(search_query, author, title):
    fl = [
        'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title',
        'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype",
        "identifier", "links_data"
    ]
    if author:
        search_query += "author:" + author
    if title:
        search_query += "title:" + title
    papers = list(ads.SearchQuery(q=search_query, fl=fl))
    if len(papers) == 0:
        selection = ads.search.Article
        exit()
    elif len(papers) == 1:
        selection = papers[0]  # type:ads.search.Article
    else:
        # first_ten = itertools.islice(papers, 10)
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
            print(index, single_paper.title[0], single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article

    assert len(selection.doi) == 1
    doi = selection.doi[0]

    try:

        paper = Paper.get(Paper.doi == doi)
        print("this paper has already been added")
        exit(1)

    except peewee.DoesNotExist:
        pass

    print("fetching bibcode")
    q = ads.ExportQuery([selection.bibcode])
    bibtex = q.execute()

    print("saving in db")

    paper = Paper()
    assert len(selection.title) == 1
    paper.doi = doi
    paper.title = selection.title[0]
    paper.abstract = selection.abstract
    paper.bibcode = selection.bibcode
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [
        ident for ident in selection.identifier if "arXiv:" in ident
    ][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [
        Keyword.get_or_create(keyword=keyword)[0]
        for keyword in selection.keyword
    ]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
    print(arxiv_url)
    with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
        chunk_size = 1024  # bytes
        file_size = int(r.headers.get('content-length', 0))
        progress_length = math.ceil(file_size // chunk_size)
        with click.progressbar(r.iter_content(chunk_size=20),
                               length=progress_length) as progress_chunks:
            for chunk in progress_chunks:
                f.write(chunk)
    paper.pdf_downloaded = True
    paper.save()
Ejemplo n.º 37
0
def crawl_author():
  """
  Crawls Google Scholar in order to retrieve information about an author.
  """

  # The ID of the author in Google Scholar.
  scholar_id = request.form['scholar_id']

  print 'Crawl author ' + scholar_id + '.'

  # Retrieve the author with that ID (if any).
  author = Author.query.filter_by(scholar_id = scholar_id).first()
  if author is None:
    author = Author()

  cookie_jar = CookieJar()
  opener = build_opener(HTTPCookieProcessor(cookie_jar))
  install_opener(opener)

  url = 'https://scholar.google.com/citations';
  params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate',
                      'user': scholar_id, 'cstart': 0, 'pagesize': 20})
  req = Request(url + '?' + params)
  opener.open(req)
  res = opener.open(req)
  doc = html.parse(res)

  no_content = doc.xpath('.//div[contains(text(), "Sorry, no content found for this URL")]')
  if len(no_content):
    print 'Author ' + scholar_id + ' not found.'
    return 'Done.'

  author.scholar_id = scholar_id

  nname = doc.find('.//div[@id="gsc_prf_in"]')
  if nname is not None:

    # The name of the author.
    author.name = nname.text_content()

  nemaildomain = doc.find('.//div[@id="gsc_prf_ivh"]')
  if nemaildomain is not None:

    # The domain where the author has an email.
    author.email_domain = nemaildomain.text_content().split(" - ")[0].split()[-1]

  ncitations = doc.find('.//table[@id="gsc_rsb_st"]')
  if ncitations is not None:

    # The total citations for the author.
    author.total_citations = ncitations.xpath('.//tr[2]/td')[1].text

    # The h-index for the author.
    author.h_index = ncitations.xpath('.//tr[3]/td')[1].text

    # The i10-index for the author.
    author.i10_index = ncitations.xpath('.//tr[4]/td')[1].text

  params = urlencode({'hl': 'en', 'view_op': 'citations_histogram',
                      'user': scholar_id})
  req = Request(url + '?' + params)
  opener.open(req)
  res = opener.open(req)
  doc = html.parse(res)

  # The citations per year for the author.
  author_citations_per_year = []
  nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]')
  if nhistogram is not None:
    years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
    for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
      i = int(a.get('style').split('z-index:')[1])
      year = int(years[-i])
      citations_per_year = AuthorCitationsPerYear.query.filter_by(author_id = author.id, year = year).first()
      if citations_per_year is None:
        citations_per_year = AuthorCitationsPerYear()
      citations_per_year.year = int(years[-i])
      citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text)
      author_citations_per_year.append(citations_per_year)
  author.citations_per_year = author_citations_per_year

  params = urlencode({'hl': 'en', 'view_op': 'list_colleagues', 'user': scholar_id})
  req = Request(url + '?' + params)
  opener.open(req)
  res = opener.open(req)
  doc = html.parse(res)

  # The co-authors of the author.
  author_coauthors = []
  for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'):
    co_scholar_id = a.get('href').split('user='******'&hl')[0]
    coauthor = Author.query.filter_by(scholar_id = co_scholar_id).first()
    if coauthor is None:
      coauthor = Author()
    coauthor.scholar_id = co_scholar_id
    author_coauthors.append(coauthor)
  author.coauthors = author_coauthors

  # The publications.
  author_publications = []
  cstart = 0
  pagesize = 100
  while True:
    params = urlencode({'hl': 'en', 'view_op': 'list_works', 'sortby': 'pubdate',
                        'user': scholar_id, 'cstart': cstart, 'pagesize': pagesize})
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'):
      a = tr.find('.//td[@class="gsc_a_t"]//a')
      # NOTE: When there are no publications, there is a single tr.
      # <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr>
      if a is None:
        continue
      purl = a.get('href')

      # The ID of the publication in Google Scholar.
      pub_scholar_id = purl.split('citation_for_view=')[1]

      # Retrieve the publication with that ID (if any).
      publication = Publication.query.filter_by(scholar_id = pub_scholar_id).first()
      if publication is None:
        publication = Publication()
      publication.scholar_id = pub_scholar_id

      # The title of the publication.
      publication.title = a.text_content()

      pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span')
      if pub_nyear is not None:
        year_of_publication = pub_nyear.text_content().strip()
        if year_of_publication:

          # The year of the publication.
          publication.year_of_publication = int(year_of_publication)

      pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]')

      if pub_ncitations is not None:
        total_citations = pub_ncitations.text_content().strip()
        if total_citations:

          # The total citations for the publication.
          publication.total_citations = int(total_citations)

      author_publications.append(publication)

    if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"):
      break

    cstart += 100
  author.publications = author_publications

  # When information about the author was retrieved from Google Scholar.
  author.retrieved_at = datetime.datetime.now()

  db.session.add(author)
  db.session.commit()

  print 'Crawled author ' + scholar_id + '.'
  return 'Done.'
Ejemplo n.º 38
0
def testdb():
    for user in User.select():
        for publi in Publication.select():
            if publi.user_created == user:
                print(publi.user_created.id)
Ejemplo n.º 39
0
def fetch_articles(publication_name,
                   art_to_db=wordpress_to_db,
                   get_url=get_wp_url,
                   get_articles=get_wp_articles,
                   api_url=None,
                   per_page=50,
                   starting_page=1,
                   date_after=None,
                   date_before=None):
    """
    Fetch articles from publication
  
    Fetch articles from publication by date and independent of publication (wordpress, iProfesional). Saves them to mongodb database
  
    Parameters: 
    publication_name (str): name of publication in db. Needs it to get api url from db
    art_to_db (func): iProfesional_to_db or wordpress_to_db are the only supported for the moment 
    get_url (func):  get_wp_url or get_iProfesional_url are the only supported for the moment 
    get_articles (func): get_iProfesional_articles or get_wp_articles are the only supported for the moment 
    api_url (str): if not None rewrite api_url in db
    per_page (int): number of articles per page
    starting_page (int): page number - First page is 1
    date_after (str): Get articles after this date with format: '%Y-%m-%d'
    date_before (str): Get articles before this date with format: '%Y-%m-%d'

    Returns: 
    None
    """
    publication = Publication.objects(name=publication_name).get()
    if api_url is not None:
        publication.api_url = api_url
        publication.save()

    articles = Article.objects(
        publication=publication).order_by('-publish_date').limit(1).first()
    if articles is None or len(articles) == 0:
        # No hay articulos
        print('No articles')
        if date_after is None:
            date_after = datetime.date.fromtimestamp(-10000000000)
    else:
        if date_after is None:
            date_after = articles['publish_date']

    if date_before is None:
        date_before = datetime.datetime.now()

    url_endpoint = publication.api_url
    if url_endpoint is None:
        print('api_url not defined in publication')
        return
    page = starting_page
    total_pages = None
    while True:
        url = get_url(url_endpoint, page, per_page, date_after, date_before)
        # url = f'{url_endpoint}posts?page={page}&per_page={per_page}&orderby=date&order=asc&after={date_str}'

        if total_pages:
            print(f'\rPage: {page}/{total_pages} - {url}', end='')
        else:
            print(f'\rPage: {page} - {url}', end='')

        response = requests.get(url)
        articles, total_pages = get_articles(
            response)  # int(response.headers['X-WP-TotalPages'])

        for article in articles:
            art = art_to_db(article, publication_name)
            # print(art.publish_date)
            if art is not None:
                art.save()
            else:
                print('\rAlready in DB')

        if page == total_pages or total_pages == 0:
            break
        page += 1
        if 'code' in articles:
            print()
            print(articles['code'])
            break
Ejemplo n.º 40
0
def crawl_publication():
  """
  Crawls Google Scholar in order to retrieve information about a publication.
  """

  # The ID of the publication in Google Scholar.
  scholar_id = request.form['scholar_id']

  print 'Crawl publication ' + scholar_id + '.'

  url = 'https://scholar.google.com/citations';

  publication = Publication.query.filter_by(scholar_id = scholar_id).first()
  if publication is None:
    publication = Publication()

  cookie_jar = CookieJar()
  opener = build_opener(HTTPCookieProcessor(cookie_jar))
  install_opener(opener)

  url = 'https://scholar.google.com/citations';
  params = urlencode({'hl': 'en', 'view_op': 'view_citation', 'citation_for_view': scholar_id})
  req = Request(url + '?' + params)
  opener.open(req)
  res = opener.open(req)
  doc = html.parse(res)

  publication.scholar_id = scholar_id

  ntitle = doc.find('.//a[@class="gsc_title_link"]')
  if ntitle is not None:

    # The title of the publication.
    publication.title = ntitle.text_content()

  ntype = doc.find('.//div[@class="gs_scl"][3]//div[@class="gsc_field"]')
  if ntype is not None:

    # The type of the publication.
    publication.type = ntype.text_content()
    if publication.type == 'Description':
      publication.type = 'Other'

  nyear = doc.xpath('.//div[text()="Publication date"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]')
  if nyear is not None and len(nyear):

    # The year of the publication.
    publication.year_of_publication = int(nyear[0].text.split('/')[0])

  ncitations = doc.xpath('.//div[text()="Total citations"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]//a')
  if ncitations is not None and len(ncitations):

    # The total citations for the publication.
    publication.total_citations = ncitations[0].text.split(' ')[-1]

  nauthors = doc.xpath('.//div[text()="Authors"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]')
  if nauthors is not None and len(nauthors):

    # The authors of the publication.
    publication.author_names = nauthors[0].text

  # The citations per year for the publication.
  publication_citations_per_year = []
  nhistogram = doc.find('.//div[@id="gsc_graph_bars"]')
  if nhistogram is not None:
    years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
    for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
      i = int(a.get('style').split('z-index:')[1])
      year = int(years[-i])
      citations_per_year = PublicationCitationsPerYear.query.filter_by(publication_id = publication.id, year = year).first()
      if citations_per_year is None:
        citations_per_year = PublicationCitationsPerYear()
      citations_per_year.year = int(years[-i])
      citations_per_year.citations = int(a.xpath('./span[@class="gsc_g_al"]')[0].text)
      publication_citations_per_year.append(citations_per_year)
  publication.citations_per_year = publication_citations_per_year

  # When information about the author was retrieved from Google Scholar.
  publication.retrieved_at = datetime.datetime.now()

  db.session.add(publication)
  db.session.commit()

  print 'Crawled publication ' + scholar_id + '.'
  return 'Done.'
Ejemplo n.º 41
0
def create():
    form = RegistrationForm()
    if form.validate() is False:
        form.set_choices()
        return render_template('news/new.html', form=form)
    else:
        publication = Publication()
        publication.title = form.title.data
        publication.text_call = form.text_call.data
        publication.last_modification = datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d')
        publication.show_home = form.show_home.data
        publication.active = 0
        publication.author = form.author.data
        publication.language = form.language.data
        publication.add_subjects(form.subject.data, form.language.data)

        if form.thumb_src.data:
            publication.thumb_src = form.thumb_src.data

        db.session.add(publication)
        db.session.flush()

        text_content = upload_images_to_s3(form.text_content.data, mod.name,
                                           publication.id)
        Publication.query.get(publication.id).text_content = text_content
        clean_s3_folder(text_content, mod.name, publication.id)

        if len(form.thumb.data.split(',')) > 1:
            upload_folder = os.path.join(app.config['UPLOAD_FOLDER'], mod.name,
                                         str(publication.id), 'images')
            publication.thumb = save_b64_image(
                form.thumb.data.split(',')[1], upload_folder, 'thumb')

        db.session.commit()
        log_operation(module=mod.name,
                      operation='create',
                      user=(g.user.id, g.user.email),
                      objs=[(publication.id, publication.title)])
        message = u'Muito obrigado! Sua notícia foi submetida com sucesso!'
        flash(message, 'success')
        return redirect(url_for('news.admin'))
Ejemplo n.º 42
0
def publi_create():
    publi = Publication.create(title='osef', body='essai', username=user)
    assert publi.user_created == datetime.datetime.now
Ejemplo n.º 43
0
def crawl_publication():
    """
  Crawls Google Scholar in order to retrieve information about a publication.
  """

    # The ID of the publication in Google Scholar.
    scholar_id = request.form['scholar_id']

    print 'Crawl publication ' + scholar_id + '.'

    url = 'https://scholar.google.com/citations'

    publication = Publication.query.filter_by(scholar_id=scholar_id).first()
    if publication is None:
        publication = Publication()

    cookie_jar = CookieJar()
    opener = build_opener(HTTPCookieProcessor(cookie_jar))
    install_opener(opener)

    url = 'https://scholar.google.com/citations'
    params = urlencode({
        'hl': 'en',
        'view_op': 'view_citation',
        'citation_for_view': scholar_id
    })
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    publication.scholar_id = scholar_id

    ntitle = doc.find('.//a[@class="gsc_title_link"]')
    if ntitle is not None:

        # The title of the publication.
        publication.title = ntitle.text_content()

    ntype = doc.find('.//div[@class="gs_scl"][3]//div[@class="gsc_field"]')
    if ntype is not None:

        # The type of the publication.
        publication.type = ntype.text_content()
        if publication.type == 'Description':
            publication.type = 'Other'

    nyear = doc.xpath(
        './/div[text()="Publication date"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]'
    )
    if nyear is not None and len(nyear):

        # The year of the publication.
        publication.year_of_publication = int(nyear[0].text.split('/')[0])

    ncitations = doc.xpath(
        './/div[text()="Total citations"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]//a'
    )
    if ncitations is not None and len(ncitations):

        # The total citations for the publication.
        publication.total_citations = ncitations[0].text.split(' ')[-1]

    nauthors = doc.xpath(
        './/div[text()="Authors"]/ancestor::div[@class="gs_scl"]//div[@class="gsc_value"]'
    )
    if nauthors is not None and len(nauthors):

        # The authors of the publication.
        publication.author_names = nauthors[0].text

    # The citations per year for the publication.
    publication_citations_per_year = []
    nhistogram = doc.find('.//div[@id="gsc_graph_bars"]')
    if nhistogram is not None:
        years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
        for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
            i = int(a.get('style').split('z-index:')[1])
            year = int(years[-i])
            citations_per_year = PublicationCitationsPerYear.query.filter_by(
                publication_id=publication.id, year=year).first()
            if citations_per_year is None:
                citations_per_year = PublicationCitationsPerYear()
            citations_per_year.year = int(years[-i])
            citations_per_year.citations = int(
                a.xpath('./span[@class="gsc_g_al"]')[0].text)
            publication_citations_per_year.append(citations_per_year)
    publication.citations_per_year = publication_citations_per_year

    # When information about the author was retrieved from Google Scholar.
    publication.retrieved_at = datetime.datetime.now()

    db.session.add(publication)
    db.session.commit()

    print 'Crawled publication ' + scholar_id + '.'
    return 'Done.'
Ejemplo n.º 44
0
def labadmin(request):
    try:
        if request.user.get_profile().utype < 2:
            return HttpResponseForbidden()
    except:
        return HttpResponseForbidden()
    pinameAdmin = request.user.username
    form = UeditorModelForm(request.POST)
    coverselect = Cover.objects.filter(username=pinameAdmin)
    articleselect = Article.objects.filter(username=pinameAdmin)
    try:
        labobj = Lab.objects.get(pi=pinameAdmin)
    except:
        labobj = Lab(pi=pinameAdmin)
        labobj.save()
    if 'cover' in request.FILES:
        img = request.FILES['cover']
        #newimg = Image.open(img) 
        #cover = newimg.thumbnail((128,128),Image.ANTIALIAS)
        if coverselect:
            coverobj = Cover.objects.get(username=pinameAdmin)
            coverobj.coverimg = img
            coverobj.save()
        else:
            coverobj = Cover(username=pinameAdmin,coverimg=img)
            coverobj.save()
            labobj.cover_set.add(coverobj)
            labobj.save()
    if 'content' in request.POST:
        text = request.POST['content']
        #form = UeditorModelForm(request.POST)
        #text = form.cd['content']
        if articleselect:
            articleobj = Article.objects.get(username=pinameAdmin)
            #form = UeditorModelForm(instance=articleobj)
            articleobj.content = text
            articleobj.save()
            return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request))
        else:
            articleobj = Article(username=pinameAdmin,content=text)
            articleobj.save()
            labobj.article_set.add(articleobj)
            labobj.save()
            return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request))
    if 'author' in request.POST:
        authorraw = request.POST['author']
        paperraw = request.POST['paper']
        urlraw = request.POST['url']
        cellraw = request.POST['cell']
        pmidraw = request.POST['pmid']
        if 'id' in request.POST:
            idraw = request.POST['id']
            pubobj = Publication.objects.get(id=idraw)
            pubobj.author = authorraw
            pubobj.paper = paperraw
            pubobj.url = urlraw 
            pubobj.cell = cellraw
            pubobj.pmid = pmidraw
            pubobj.save()
        else:
            pubobj = Publication(username=pinameAdmin,author=authorraw,paper=paperraw,url=urlraw,cell=cellraw,pmid=pmidraw)
            pubobj.save()
            labobj.publication_set.add(pubobj)
            labobj.save()
    if 'album' in request.FILES:
        photoraw = request.FILES['album']
        albumobj = Album(username=pinameAdmin,photo=photoraw)
        albumobj.save()
        labobj.album_set.add(albumobj)
        labobj.save()

    if coverselect:
        covertarget = Cover.objects.get(username=pinameAdmin)
    else:
        covertarget = 0
    if articleselect:
        articletarget = Article.objects.get(username=pinameAdmin)
    else:
        articletarget = 0
    pubtarget = Publication.objects.filter(username__contains=pinameAdmin)
    labobj = Lab.objects.get(pi=pinameAdmin)
    membertarget = labobj.labrelated.all()
    albumtarget = Album.objects.filter(username__contains=pinameAdmin)
    return render_to_response("lab/labadmin.html", locals(), context_instance=RequestContext(request))
Ejemplo n.º 45
0
def crawl_author():
    """
  Crawls Google Scholar in order to retrieve information about an author.
  """

    # The ID of the author in Google Scholar.
    scholar_id = request.form['scholar_id']

    print 'Crawl author ' + scholar_id + '.'

    # Retrieve the author with that ID (if any).
    author = Author.query.filter_by(scholar_id=scholar_id).first()
    if author is None:
        author = Author()

    cookie_jar = CookieJar()
    opener = build_opener(HTTPCookieProcessor(cookie_jar))
    install_opener(opener)

    url = 'https://scholar.google.com/citations'
    params = urlencode({
        'hl': 'en',
        'view_op': 'list_works',
        'sortby': 'pubdate',
        'user': scholar_id,
        'cstart': 0,
        'pagesize': 20
    })
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    no_content = doc.xpath(
        './/div[contains(text(), "Sorry, no content found for this URL")]')
    if len(no_content):
        print 'Author ' + scholar_id + ' not found.'
        return 'Done.'

    author.scholar_id = scholar_id

    nname = doc.find('.//div[@id="gsc_prf_in"]')
    if nname is not None:

        # The name of the author.
        author.name = nname.text_content()

    nemaildomain = doc.find('.//div[@id="gsc_prf_ivh"]')
    if nemaildomain is not None:

        # The domain where the author has an email.
        author.email_domain = nemaildomain.text_content().split(
            " - ")[0].split()[-1]

    ncitations = doc.find('.//table[@id="gsc_rsb_st"]')
    if ncitations is not None:

        # The total citations for the author.
        author.total_citations = ncitations.xpath('.//tr[2]/td')[1].text

        # The h-index for the author.
        author.h_index = ncitations.xpath('.//tr[3]/td')[1].text

        # The i10-index for the author.
        author.i10_index = ncitations.xpath('.//tr[4]/td')[1].text

    params = urlencode({
        'hl': 'en',
        'view_op': 'citations_histogram',
        'user': scholar_id
    })
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    # The citations per year for the author.
    author_citations_per_year = []
    nhistogram = doc.find('.//div[@id="gsc_md_hist_b"]')
    if nhistogram is not None:
        years = [x.text for x in nhistogram.xpath('.//span[@class="gsc_g_t"]')]
        for a in nhistogram.xpath('.//a[@class="gsc_g_a"]'):
            i = int(a.get('style').split('z-index:')[1])
            year = int(years[-i])
            citations_per_year = AuthorCitationsPerYear.query.filter_by(
                author_id=author.id, year=year).first()
            if citations_per_year is None:
                citations_per_year = AuthorCitationsPerYear()
            citations_per_year.year = int(years[-i])
            citations_per_year.citations = int(
                a.xpath('./span[@class="gsc_g_al"]')[0].text)
            author_citations_per_year.append(citations_per_year)
    author.citations_per_year = author_citations_per_year

    params = urlencode({
        'hl': 'en',
        'view_op': 'list_colleagues',
        'user': scholar_id
    })
    req = Request(url + '?' + params)
    opener.open(req)
    res = opener.open(req)
    doc = html.parse(res)

    # The co-authors of the author.
    author_coauthors = []
    for a in doc.xpath('.//h3[@class="gsc_1usr_name"]//a'):
        co_scholar_id = a.get('href').split('user='******'&hl')[0]
        coauthor = Author.query.filter_by(scholar_id=co_scholar_id).first()
        if coauthor is None:
            coauthor = Author()
        coauthor.scholar_id = co_scholar_id
        author_coauthors.append(coauthor)
    author.coauthors = author_coauthors

    # The publications.
    author_publications = []
    cstart = 0
    pagesize = 100
    while True:
        params = urlencode({
            'hl': 'en',
            'view_op': 'list_works',
            'sortby': 'pubdate',
            'user': scholar_id,
            'cstart': cstart,
            'pagesize': pagesize
        })
        req = Request(url + '?' + params)
        opener.open(req)
        res = opener.open(req)
        doc = html.parse(res)

        for tr in doc.xpath('.//tr[@class="gsc_a_tr"]'):
            a = tr.find('.//td[@class="gsc_a_t"]//a')
            # NOTE: When there are no publications, there is a single tr.
            # <tr class="gsc_a_tr"><td class="gsc_a_e" colspan="3">There are no articles in this profile.</td></tr>
            if a is None:
                continue
            purl = a.get('href')

            # The ID of the publication in Google Scholar.
            pub_scholar_id = purl.split('citation_for_view=')[1]

            # Retrieve the publication with that ID (if any).
            publication = Publication.query.filter_by(
                scholar_id=pub_scholar_id).first()
            if publication is None:
                publication = Publication()
            publication.scholar_id = pub_scholar_id

            # The title of the publication.
            publication.title = a.text_content()

            pub_nyear = tr.find('.//td[@class="gsc_a_y"]//span')
            if pub_nyear is not None:
                year_of_publication = pub_nyear.text_content().strip()
                if year_of_publication:

                    # The year of the publication.
                    publication.year_of_publication = int(year_of_publication)

            pub_ncitations = tr.find('.//a[@class="gsc_a_ac"]')

            if pub_ncitations is not None:
                total_citations = pub_ncitations.text_content().strip()
                if total_citations:

                    # The total citations for the publication.
                    publication.total_citations = int(total_citations)

            author_publications.append(publication)

        if doc.xpath('.//button[@id="gsc_bpf_next"]')[0].get("disabled"):
            break

        cstart += 100
    author.publications = author_publications

    # When information about the author was retrieved from Google Scholar.
    author.retrieved_at = datetime.datetime.now()

    db.session.add(author)
    db.session.commit()

    print 'Crawled author ' + scholar_id + '.'
    return 'Done.'
Ejemplo n.º 46
0
def create():
    form = RegistrationForm()
    if form.validate() is False:
        form.set_choices()
        return render_template('news/new.html', form=form)
    else:
        publication = Publication()
        publication.title = form.title.data
        publication.text_call = form.text_call.data
        publication.last_modification = datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')
        publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d')
        publication.show_home = form.show_home.data
        publication.active = 0
        publication.author = form.author.data
        publication.language = form.language.data
        publication.add_subjects(form.subject.data, form.language.data)

        if form.thumb_src.data:
            publication.thumb_src = form.thumb_src.data

        db.session.add(publication)
        db.session.flush()

        text_content = upload_images_to_s3(
            form.text_content.data, mod.name, publication.id)
        Publication.query.get(publication.id).text_content = text_content
        clean_s3_folder(text_content, mod.name, publication.id)

        if len(form.thumb.data.split(',')) > 1:
            upload_folder = os.path.join(
                app.config['UPLOAD_FOLDER'], mod.name, str(publication.id), 'images')
            publication.thumb = save_b64_image(
                form.thumb.data.split(',')[1], upload_folder, 'thumb')

        db.session.commit()
        log_operation(module=mod.name, operation='create', user=(g.user.id, g.user.email), objs=[(publication.id, publication.title)])
        message = u'Muito obrigado! Sua notícia foi submetida com sucesso!'
        flash(message, 'success')
        return redirect(url_for('news.admin'))
Ejemplo n.º 47
0
def create():
    form = RegistrationForm()
    if form.validate() is False:
        return render_template('news/new.html', form=form)
    else:
        publication = Publication()
        publication.title = form.title.data
        subject_query = PublicationSubject.query.filter_by(name=form.subject.data)

        if (subject_query.first()):
            publication.subject_id = subject_query.first().id
        else:
            subject = PublicationSubject()
            subject.name = form.subject.data
            db.session.add(subject)
            db.session.commit()
            publication.subject_id = subject.id

        publication.text_content = form.text_content.data
        publication.text_call = form.text_call.data
        publication.last_modification = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        publication.publish_date = form.publish_date.data.strftime('%Y-%m-%d')
        publication.show_home = form.show_home.data
        publication.active = 0
        publication.author = form.author.data

        db.session.add(publication)
        db.session.flush()

        if len(form.thumb.data.split(',')) > 1:
            upload_folder = os.path.join(app.config['UPLOAD_FOLDER'], mod.name, str(publication.id), 'images')
            publication.thumb = save_b64_image(form.thumb.data.split(',')[1], upload_folder, 'thumb')

        db.session.commit()

        message = u'Muito obrigado! Sua notícia foi submetida com sucesso!'
        flash(message, 'success')
        return redirect(url_for('news.admin'))
Ejemplo n.º 48
0
def publications_detail(id):
    publication = Publication.get(id)
    return render_template('publications/details.html',
                           publication=publication)
    def post(self):
        with open("twitface.json") as json_file:
            json_data = json.load(json_file)
        ndb.delete_multi(Membre.query().fetch(keys_only=True))
        ndb.delete_multi(DemandeAmi.query().fetch(keys_only=True))
        ndb.delete_multi(Ami.query().fetch(keys_only=True))
        ndb.delete_multi(Publication.query().fetch(keys_only=True))

        lst_membre = []
        lst_demande_ami = []
        lst_publication = []
        lst_ami = []

        for membre in json_data["membres"]:
            cle = ndb.Key("Membre", int(membre["MemNo"]))
            m = Membre(key=cle,
                       nom=membre["MemNom"].split(' ')[1],
                       prenom=membre["MemNom"].split(' ')[0],
                       sexe=membre["MemSexe"],
                       dateNaissance=datetime.datetime.strptime(
                           membre["MemDateNaissance"], '%Y-%m-%d'),
                       villeOrigine=membre["MemVilleOrigine"],
                       villeActuelle=membre["MemVilleActuelle"],
                       courriel=membre["MemCourriel"],
                       nomUtil=membre["MemNomUtil"],
                       motPasse=membre["MemMotPasse"])
            lst_membre.append(m)

        for ami in json_data["amis"]:
            cle1 = ndb.Key('Membre', int(ami["MemNo1"]))
            cle2 = ndb.Key('Membre', int(ami["MemNo2"]))
            a = Ami(noAmi1=cle1,
                    noAmi2=cle2,
                    dateAmite=datetime.datetime.strptime(
                        ami["DateAmitie"], '%Y-%m-%d'))
            lst_membre.append(a)

        for demAmi in json_data["demandesAmis"]:
            cle = ndb.Key("Membre", int(demAmi["MemNoInvite"]),
                          "DemandeAmi", int(demAmi["DemAmiNo"]))
            d = DemandeAmi(key=cle,
                           noDemandeur=int(demAmi["MemNoDemandeur"]),
                           amiDate=datetime.datetime.strptime(
                               demAmi["DemAmiDate"], '%Y-%m-%d'))
            lst_membre.append(d)

        for pub in json_data["publications"]:
            cle = ndb.Key("Publication", int(pub["PubNo"]))
            p = Publication(key=cle,
                            texte=pub["PubTexte"],
                            noCreateur=int(pub["MemNoCreateur"]),
                            noBabillard=int(pub["MemNoBabillard"]),
                            date=datetime.datetime.strptime(
                                pub["PubDate"], '%Y-%m-%d'))
            lst_membre.append(p)

        ndb.put_multi(lst_membre)
        ndb.put_multi(lst_demande_ami)
        ndb.put_multi(lst_publication)
        ndb.put_multi(lst_ami)

        self.response.set_status(200)