Esempio n. 1
0
from creds import db
from newspaper import Article
from contextlib import closing

with closing(db.cursor()) as cur:
    cur.execute(
        """
    select url, id,headline from database.table
    where url != '' and url is not null and scraped_content is null
    order by id
    """
    )
    rows = cur.fetchall()
    for article in rows:
        url = article[0]
        lang = "en"
        int_id = article[1]
        headline = article[2]
        print str(int_id)
        try:
            print "getting article..."
            article_get = Article(url=url, language=lang)
            print "downloading..."
            article_get.download()
            print "parsing..."
            article_get.parse()
            print "setting scraped content..."
            scraped_content = article_get.text
            print "importing data..."
            cur.execute(
                """
Esempio n. 2
0
        end_date=date,
        metrics=
        'ga:pageviews,ga:sessionDuration,ga:pageviewsPerSession,ga:bounces,ga:sessions',
        dimensions=
        'ga:channelGrouping',  # sort='-ga:visits',  #filters='ga:medium==organic',  #start_index='1',
        max_results='25').execute()

    json_str = json.dumps(response)
    json_dict = json.loads(json_str)

    data = json_dict['rows']
    for record in data:
        channel = record[0]
        page_views = record[1]
        avg_session_dur = record[2]
        views_per_session = record[3]
        bounces = record[4]
        sessions = record[5]

        with closing(db.cursor()) as cur:
            cur.execute(
                """
			insert ignore into database.table (date, channel,type, page_views, bounces, avg_session_dur, views_per_session,sessions)
			values (%s,%s,%s,%s,%s,%s,%s,%s)
			on duplicate key update page_views = %s, bounces = %s, 
			avg_session_dur = %s, views_per_session = %s, sessions = %s
			""", (date, channel, type, page_views, bounces, avg_session_dur,
            views_per_session, sessions, page_views, bounces, avg_session_dur,
            views_per_session, sessions))
            db.commit()
Esempio n. 3
0
    def insert_time(self):
        try:
            with closing(db.cursor()) as cur:
                cur.execute("""
                insert ignore into gnip.twitter (post_id,
                author_name,
                author_username,
                author_link,
                author_created,
                author_profile_image,
                author_personal_url,
                author_followers,
                author_following,
                author_lists_count,
                author_statuses_count,
                author_time_zone,
                author_verified,
                author_languages,
                author_favorites_count,
                post_type,
                post_date,
                post_source,
                post_link,
                post_content,
                post_favorites_count,
                post_hashtags,
                post_trends,
                post_urls,
                post_mentions,
                post_symbols,
                media_urls,
                post_retweet_count,
                post_language,
                post_tags,
                post_clients,
                author_location,
                author_bio,
                post_channel,
                post_lat,
                post_long )

                VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                """,(self.post_id,
                self.author_name,
                self.author_username,
                self.author_link,
                self.author_created,
                self.author_profile_image,
                self.author_personal_url,
                self.author_followers,
                self.author_following,
                self.author_lists_count,
                self.author_statuses_count,
                self.author_time_zone,
                self.author_verified,
                self.author_languages,
                self.author_favorites_count,
                self.post_type,
                self.post_date,
                self.post_source,
                self.post_link,
                self.post_content,
                self.post_favorites_count,
                self.post_hashtags,
                self.post_trends,
                self.post_urls,
                self.post_mentions,
                self.post_symbols,
                self.media_urls,
                self.post_retweet_count,
                self.post_language,
                self.topic_tags,
                self.client_tags,
                self.author_location,
                self.author_bio,
                self.post_action,
                self.lat,
                self.long
                )
                            )
                db.commit()
        except MySQLdb.Error, e:
            logging.error(datetime.datetime.now(),'SQL Error: ' + e)
            time.sleep(60)
            continue
Esempio n. 4
0
    def insert_time(self):
        try:
            with closing(db.cursor()) as cur:
                cur.execute(
                    """
                insert ignore into gnip.twitter (post_id,
                author_name,
                author_username,
                author_link,
                author_created,
                author_profile_image,
                author_personal_url,
                author_followers,
                author_following,
                author_lists_count,
                author_statuses_count,
                author_time_zone,
                author_verified,
                author_languages,
                author_favorites_count,
                post_type,
                post_date,
                post_source,
                post_link,
                post_content,
                post_favorites_count,
                post_hashtags,
                post_trends,
                post_urls,
                post_mentions,
                post_symbols,
                media_urls,
                post_retweet_count,
                post_language,
                post_tags,
                post_clients,
                author_location,
                author_bio,
                post_channel,
                post_lat,
                post_long )

                VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                """, (self.post_id, self.author_name, self.author_username,
                      self.author_link, self.author_created,
                      self.author_profile_image, self.author_personal_url,
                      self.author_followers, self.author_following,
                      self.author_lists_count, self.author_statuses_count,
                      self.author_time_zone, self.author_verified,
                      self.author_languages, self.author_favorites_count,
                      self.post_type, self.post_date, self.post_source,
                      self.post_link, self.post_content,
                      self.post_favorites_count, self.post_hashtags,
                      self.post_trends, self.post_urls, self.post_mentions,
                      self.post_symbols, self.media_urls,
                      self.post_retweet_count, self.post_language,
                      self.topic_tags, self.client_tags, self.author_location,
                      self.author_bio, self.post_action, self.lat, self.long))
                db.commit()
        except MySQLdb.Error, e:
            logging.error(datetime.datetime.now(), 'SQL Error: ' + e)
            time.sleep(60)
            continue