Exemple #1
0
 def user_upvote_based_recommend(self, upvotes, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserUpvoteBasedRecommendEngine(self)
     data = engine.run(Upvotes(upvotes), Content(target_content))
     return data
Exemple #2
0
    def __scrape(self, page_source, minimumUpvotes, minimumComments):

        results = []
        soup = BeautifulSoup(page_source, "html.parser")
        #save source
        #self.gather_web(soup.prettify())
        articles = soup.findAll("article", "badge-entry-container")
        for ele in articles:
            try:
                read_more_link = ele.find("a", {'class': 'post-read-more'})
                if read_more_link is not None:
                    continue
                upvotes = ele.find("span", {'class': 'badge-item-love-count'})
                comments = ele.find("a", {'class': 'comment'})
                if upvotes is not None:
                   likes = int(upvotes.text.replace(",", ""))
                   if likes > minimumUpvotes or \
                           (comments is not None and int(comments.text.replace(" comments", "")) > minimumComments):
                      title = ele.find("h2", {'class': 'badge-item-title'})
                      content = Content()
                      content = self.__get_image_or_video(ele)
                      if content is not None and title is not None:
                        src = content.src
                        post = PostModel(title.text, src, content.type, src, likes, content.thumbnail)
                        results.append(post)
            except Exception as ex:
                   print('Exception has occured when scraping data! ' + str(ex))
        return results
Exemple #3
0
    def __scrape(self, posts, minimumUpvotes):

        results = []
        ele = posts
        html = ele.get_attribute('innerHTML')
        soup = BeautifulSoup(html, "html.parser")
        try:
            upvotes = soup.find("div", {'class': 'ratingblock'})
            if upvotes is not None:
               parseRating = upvotes.text.split("Rating: ")[1].split("(")[0].split("/")[0] #ugly but easy.

               likes = float(parseRating)

               if likes > minimumUpvotes:
                  title = soup.find("h1")
                  content = Content()
                  content = self.__get_image_or_video(soup)
                  likes = int(likes * 1000) #it's rating 3.5/5
                  if content is not None and title is not None:
                    src = content.src
                    post = PostModel(title.text, src, content.type, src, likes, content.thumbnail)
                    results.append(post)
        except Exception as ex:
               print('Exception has occured when scraping data! ' + str(ex))
        return results
Exemple #4
0
 def user_comment_based_recommend(self, comments, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserCommentBasedRecommendEngine(self)
     data = engine.run(Contents(comments), Content(target_content))
     return data
Exemple #5
0
 def user_viewed_based_recommend(self, viewHistories, target_content):
     """
     @data: data is lists of recommend contents's ids that can get from key of user id
     """
     engine = UserViewedBasedRecommendEngine(self)
     data = engine.run(ViewHistories(viewHistories), Content(target_content))
     return data
Exemple #6
0
    def __scrape(self, posts, minimumUpvotes):
        results = []
        for ele in posts:
            html = ele.get_attribute('innerHTML')
            soup = BeautifulSoup(html, "html.parser")
            #self.gather_web(soup.prettify())
            try:
                upvotes = soup.find("div", {'class': 'score unvoted'})
                if upvotes is not None:
                    if upvotes.text == '•':
                        continue
                    likes = int(upvotes.text)

                    if likes > minimumUpvotes:
                        title = soup.find("a", {'class': 'title'})
                        content = Content()
                        content = self.__get_image_or_video(soup)
                        if content is not None and title is not None:
                            src = content.src
                            post = PostModel(title.text, src, content.type,
                                             src, likes, content.thumbnail)
                            results.append(post)
            except Exception as ex:
                print('Exception has occured when scraping data! ' + str(ex))
        return results
def when_get_contents_with_one_far_around_keyword_returns_no_result(app):
    # given
    content1 = Content(url="http://content1.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Can hipster-neo-farmers save the world ?")
    content2 = Content(url="http://content2.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Do we have enough quinoa for all the children ?")

    ApiHandler.save(content1, content2)

    # when
    contents = filter_contents_with_keywords('hipsterssss').all()

    # then
    assert len(contents) == 0
Exemple #8
0
 def __get_image_or_video(self, soup):
     content = Content()
     item = soup.find('img', {'class': 'post-image'})
     if item is not None:
         src = item.get('src')
         content.src = src
         if src.endswith(".gif"):
             content.type = "gif"
         else:
             content.type = "image"
         return content
     else:
         return None
Exemple #9
0
 def __get_image_or_video(self, soup): #more like get video
     content = Content()
     video = soup.find('video')
     if video is not None:
         src = soup.findAll('source')[1]
         src = src.get("src")
         content.src = src
         content.type = 'video/mp4'
         thumbnail =  video.get("poster")
         content.thumbnail = self.website + thumbnail
         return content
     else:
         return None
def when_get_contents_with_keyword_tag_returns_result(app):
    # given
    content1 = Content(url="http://content1.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Can hipster-neo-farmers save the world ?")
    content2 = Content(url="http://content2.com",
                       authors=None,
                       summary=None,
                       tags=None,
                       title="Do we have enough quinoa for all the children ?")
    tag1 = Tag(label="Climate")
    content_tag1 = ContentTag(content=content1, tag=tag1)

    ApiHandler.save(content1, content2, content_tag1)

    # when
    contents = filter_contents_with_keywords('clim').all()

    # then
    assert len(contents) == 1
    assert content1 in contents
Exemple #11
0
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            content = Content(
                os.path.join(app.config['UPLOAD_FOLDER'], filename))
            chars, locs, orgs = content.char_recognition()
            return render_template('uploading.html',
                                   name=filename,
                                   chars=chars,
                                   locs=locs,
                                   orgs=orgs)
        elif file and not allowed_file(file.filename):
            return redirect('/bad_extension')
    return '''
 def post(self, args):
     ## Check the carousel exists
     carousel = Carousel.query.filter_by(name=args['carousel']).first()
     if carousel is None:
         abort(404, message="Carousel not found")
     ## Parse the parameters into mappable data
     contentData = {}
     contentData['name'] = args['name']
     contentData['content_type'] = args['type']
     contentData['content_location'] = args['location']
     contentData['slide_interval'] = args['interval']
     contentData['is_enabled'] = args['enabled']
     ## Create the content instance
     content = Content(**contentData)
     content.carousel_id = carousel.identifier
     ## Insert the Content instance into the database
     db.session.add(content)
     db.session.commit()
     ## Return that the resource has been created
     return "", 201
Exemple #13
0
 def __get_image_or_video(self, soup):
     content = Content()
     link = soup.find("a", {'class': 'title'})
     if link is not None:
         #expand = soup.find("div", {'class': 'expando-button'})  # this means it's a gif or video
         src = link.get('href')
         thumbnail_placeholder = soup.find("a", {'class': 'thumbnail'})
         thumbnail_src = ''
         if thumbnail_placeholder is not None:
             thumbnail_src = thumbnail_placeholder.find("img").get('src')
         if not src.strip().endswith(
             ('.jpg', '.jpeg', '.gif', '.png', '.bmp', '.tiff', '.tif',
              '.jpe', '.jfif')):
             content.src = src
             content.type = 'video/mp4'
             content.thumbnail = thumbnail_src
         else:
             content.src = link.get('href')
             content.type = 'image'
             content.thumbnail = thumbnail_src
     return content
Exemple #14
0
 def __get_image_or_video(self, ele):
     content = Content()
     video = ele.find("source")
     if video is not None:
         content.type = 'video/mp4'
         content.src = video.get('src')
         thumbnail = ele.find("img", {'class': 'badge-item-img'})
         if thumbnail is not None:
             thumbnail = thumbnail.get('src')
             content.thumbnail = thumbnail
         else:
             content.thumbnail = ''
         return content
     else:
          image = ele.find("img", {'class': 'badge-item-img'})
          if image is not None:
             content.type = 'image'
             content.src = image.get('src')
             content.thumbnail = ''
             return content
          else: return None
Exemple #15
0
 def __scrape(self, posts, minimumUpvotes, __blank):
     results = []
     for ele in posts:
         html = ele.get_attribute('innerHTML')
         soup = BeautifulSoup(html, "html.parser")
         try:
             upvotes = soup.find("div", {'class': 'sharecounts'})
             if upvotes is not None:
                 upvotes = upvotes.p
             if upvotes is not None:
                 likes = int(
                     upvotes.text.replace(",", "").replace(" shares", ""))
                 if likes > minimumUpvotes:
                     title = soup.find("h2", {'class': 'post-title'})
                     content = Content()
                     content = self.__get_image_or_video(soup)
                     if content is not None and title is not None:
                         src = content.src
                         post = PostModel(title.text, src, content.type,
                                          src, likes)
                         results.append(post)
         except Exception as ex:
             print('Exception has occured when scraping data! ' + str(ex))
     return results
Exemple #16
0
import sys
import os

sys.path.insert(0, os.path.abspath('..'))
from models.content import Content

content = Content("https://en.wikipedia.org/wiki/The_Royal_Opera")
content.push()
Exemple #17
0
def create_contents(with_capture=False):
    logger.info('create_contents')

    contents = []

    contents.append(Content(
        authors='James Delingpole',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        publishedDate=strftime(TODAY),
        summary='Like the thriving polar bear, like the recovering ice caps, like the doing-just-fine Pacific islands, the Great Barrier Reef has become a totem for the liberal-left not because it’s in any kind of danger but because it’s big and famous and photogenic and lots and lots of people would be really sad if it disappeared.',
        tags='great barrier;',
        theme='Climate',
        title='Great Barrier Reef Still Not Dying, Whatever Washington Post Says…',
        url='https://www.breitbart.com/big-government/2017/03/20/delingpole-great-barrier-reef-still-not-dying-whatever-washington-post-says'
    ))

    contents.append(Content(
        authors='David Rose',
        externalThumbUrl=API_URL + '/static/logo.png',
        summary='The Mail on Sunday TODAY reveals astonishing evidence that the organisation that is the world’s leading source of climate data rushed to publish a landmark paper that exaggerated global warming and was timed to influence the historic Paris Agreement on climate change.',
        tags='data',
        title='Daily Mail inflates disagreement between scientists about data handling to make unsupported accusation of data manipulation',
        url='http://www.dailymail.co.uk/sciencetech/article-4192182/World-leaders-duped-manipulated-global-warming-data.html'
    ))

    contents.append(Content(
        authors='Chris Mooney',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        publishedDate=strftime(TODAY + timedelta(days=-1)),
        summary='A large research synthesis, published in one of the world’s most influential scientific journals, has detected a decline in the amount of dissolved oxygen in oceans around the world — a long-predicted result of climate change that could have severe consequences for marine organisms if it continues.',
        tags='ocean;oxygen',
        title='Scientists have just detected a major change to the Earth’s oceans linked to a warming climate',
        url='https://www.washingtonpost.com/news/energy-environment/wp/2017/02/15/its-official-the-oceans-are-losing-oxygen-posing-growing-threats-to-marine-life'
    ))

    contents.append(Content(
        authors='Pascal Santi;Sandrine Cabut',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        summary='Selon plusieurs organisations, les recommandations de prise en charge de cette infection bactérienne sont inadaptées.',
        tags='lyme;',
        theme='Health',
        title='Maladie de Lyme : fronde contre la Haute Autorité de santé',
        url='https://www.lemonde.fr/sciences/content/2018/07/24/maladie-de-lyme-fronde-contre-la-haute-autorite-de-sante_5335369_1650684.html'
    ))

    contents.append(Content(
        authors='C. Le Quéré1, R. Moriarty1, R. M. Andrew2, G. P. Peters2, P. Ciais3, P. Friedlingstein4, S. D. Jones1, S. Sitch5, P. Tans6, A. Arneth7, T. A. Boden8, L. Bopp3, Y. Bozec9,10, J. G. Canadell11, L. P. Chini12, F. Chevallier3, C. E. Cosca13, I. Harris14, M. Hoppema15, R. A. Houghton16, J. I. House17, A. K. Jain18, T. Johannessen19,20, E. Kato21,22, R. F. Keeling23, V. Kitidis24, K. Klein Goldewijk25, C. Koven26, C. S. Landa19,20, P. Landschützer27, A. Lenton28, I. D. Lima29, G. Marland30, J. T. Mathis13, N. Metzl31, Y. Nojiri21, A. Olsen19,20, T. Ono32, S. Peng3, W. Peters33, B. Pfeil19,20, B. Poulter34, M. R. Raupach35,†, P. Regnier36, C. Rödenbeck37, S. Saito38, J. E. Salisbury39, U. Schuster5, J. Schwinger19,20, R. Séférian40, J. Segschneider41, T. Steinhoff42, B. D. Stocker43,44, A. J. Sutton13,45, T. Takahashi46, B. Tilbrook47, G. R. van der Werf48, N. Viovy3, Y.-P. Wang49, R. Wanninkhof50, A. Wiltshire51, and N. Zeng',
        externalThumbUrl=API_URL + '/static/logo.png',
        isReviewable=True,
        summary='Accurate assessment of anthropogenic carbon dioxide (CO2) emissions and their redistribution among the atmosphere, ocean, and terrestrial biosphere is important to better understand the global carbon cycle, support the development of climate policies, and project future climate change. Here we describe data sets and a methodology to quantify all major components of the global carbon budget, including their uncertainties, based on the combination of a range of data, algorithms, statistics, and model estimates and their interpretation by a broad scientific community. We discuss changes compared to previous estimates, consistency within and among components, alongside methodology and data limitations. CO2 emissions from fossil fuel combustion and cement production (EFF) are based on energy statistics and cement production data, respectively, while emissions from land-use change (ELUC), mainly deforestation, are based on combined evidence from land-cover-change data, fire activity associated with deforestation, and models. The global atmospheric CO2 concentration is measured directly and its rate of growth (GATM) is computed from the annual changes in concentration. The mean ocean CO2 sink (SOCEAN) is based on observations from the 1990s, while the annual anomalies and trends are estimated with ocean models. The variability in SOCEAN is evaluated with data products based on surveys of ocean CO2 measurements. The global residual terrestrial CO2 sink (SLAND) is estimated by the difference of the other terms of the global carbon budget and compared to results of independent dynamic global vegetation models forced by observed climate, CO2, and land-cover-change (some including nitrogen–carbon interactions). We compare the mean land and ocean fluxes and their variability to estimates from three atmospheric inverse methods for three broad latitude bands. All uncertainties are reported as ±1σ, reflecting the current capacity to characterise the annual estimates of each component of the global carbon budget. For the last decade available (2004–2013), EFF was 8.9 ± 0.4 GtC yr−1, ELUC 0.9 ± 0.5 GtC yr−1, GATM 4.3 ± 0.1 GtC yr−1, SOCEAN 2.6 ± 0.5 GtC yr−1, and SLAND 2.9 ± 0.8 GtC yr−1. For year 2013 alone, EFF grew to 9.9 ± 0.5 GtC yr−1, 2.3% above 2012, continuing the growth trend in these emissions, ELUC was 0.9 ± 0.5 GtC yr−1, GATM was 5.4 ± 0.2 GtC yr−1, SOCEAN was 2.9 ± 0.5 GtC yr−1, and SLAND was 2.5 ± 0.9 GtC yr−1. GATM was high in 2013, reflecting a steady increase in EFF and smaller and opposite changes between SOCEAN and SLAND compared to the past decade (2004–2013). The global atmospheric CO2 concentration reached 395.31 ± 0.10 ppm averaged over 2013. We estimate that EFF will increase by 2.5% (1.3–3.5%) to 10.1 ± 0.6 GtC in 2014 (37.0 ± 2.2 GtCO2 yr−1), 65% above emissions in 1990, based on projections of world gross domestic product and recent changes in the carbon intensity of the global economy. From this projection of EFF and assumed constant ELUC for 2014, cumulative emissions of CO2 will reach about 545 ± 55 GtC (2000 ± 200 GtCO2) for 1870–2014, about 75% from EFF and 25% from ELUC. This paper documents changes in the methods and data sets used in this new carbon budget compared with previous publications of this living data set (Le Quéré et al., 2013, 2014). All observations presented here can be downloaded from the Carbon Dioxide Information Analysis Center (doi:10.3334/CDIAC/GCP_2014).',
        tags='carbon;PeerVerified',
        title='Global carbon budget 2014',
        url='https://www.earth-syst-sci-data.net/7/47/2015/essd-7-47-2015.html'
    ))

    contents.append(Content(
        authors='Clarisse Fabre',
        isReviewable=False,
        externalThumbUrl=API_URL + '/static/logo.png',
        publishedDate=strftime(TODAY + timedelta(hours=-2)),
        summary='C’est l’histoire d’un garçon qui voulait être Iggy Pop. A Mulhouse, dans les années 1980, il s’imaginait torse nu, le pantalon taille basse, électrisant les foules et se roulant par terre. Mais le rêve post-punk s’est dissous dans les paillettes des combinaisons disco. Et Fred Poulet s’est mis à écrire des chansons, tout en gagnant sa vie comme peintre sur des tournages de film. « C’est pour continuer à rêver que j’écris depuis une trentaine d’années. C’est un peu l’histoire de ma vie », résume le chanteur, emmitouflé dans son imperméable. A 57 ans,il revendique « la désinvolture » comme attitude, au sens de la liberté et de l’élégance.',
        tags='KFC;OnEstChampion;',
        title='Cocorico, Fred Poulet revient à la chanson',
        url='https://www.lemonde.fr/cinema/content/2019/01/10/cocorico-fred-poulet-revient-a-la-chanson_5407141_3476.html'
    ))

    ApiHandler.save(*contents)

    if with_capture:
        for content in contents:
            if content.url:
                logger.info('capture screenshot for {}...'.format(content.url))
                thumb = capture(content.url)
                save_thumb(content, thumb, 0)
                logger.info('capture screenshot for {}...Done.'.format(content.url))

    logger.info('created {} contents'.format(len(contents)))
#! /usr/bin/env python

from models.source import Source
from models.content import Content
import random


# Main
if __name__ == "__main__":
    source = Source()
    content = Content(source.url)
    content.save()

    content.push()
def crowdtangle_test(url):
    content = Content(url=url)
    ApiHandler.save(content)
    attach_crowdtangle_entities_from_content(content,
                                             request_start_date='2019-09-01')
    return content