def fb_scrape(url): """ Scripe facebook url, get likes, talking_about_count and checkins @type url: string @param url: facebook url @rtype: dict @return: likes, talking_about_count and checkins dict data """ data = {'likes': 0, 'talking_about_count': 0, 'checkins': 0} facebook_data = None if check_url(url, 'www.facebook.com'): facebook_id = get_facebook_id(url) try: facebook_data = facebook_api.request(facebook_id) except Exception as e: log.error(e) pass if facebook_data: if facebook_data.get('likes'): data['likes'] = facebook_data.get('likes') else: data['likes'] = 0 if facebook_data.get('talking_about_count'): data['talking_about_count'] = facebook_data.get( 'talking_about_count') else: data['talking_about_count'] = 0 if facebook_data.get('were_here_count'): data['checkins'] = facebook_data.get('were_here_count') else: data['checkins'] = 0 return data
def fb_scrape(url): """ Scripe facebook url, get likes, talking_about_count and checkins @type url: string @param url: facebook url @rtype: dict @return: likes, talking_about_count and checkins dict data """ data = {'likes': 0, 'talking_about_count': 0, 'checkins': 0} facebook_data = None if check_url(url, 'www.facebook.com'): facebook_id = get_facebook_id(url) try: facebook_data = facebook_api.request(facebook_id) except Exception as e: log.error(e) pass if facebook_data: if facebook_data.get('likes'): data['likes'] = facebook_data.get('likes') else: data['likes'] = 0 if facebook_data.get('talking_about_count'): data['talking_about_count'] = facebook_data.get('talking_about_count') else: data['talking_about_count'] = 0 if facebook_data.get('were_here_count'): data['checkins'] = facebook_data.get('were_here_count') else: data['checkins'] = 0 return data
def fblogin(): try: fblogin = FacebookLogin(fb_username, fb_password) urllib2.install_opener(fblogin.opener) except Exception as e: log.error(e) pass
def scrap_facebook_raw_data(url): data = {'likes': 0, 'talking_about_count': 0, 'checkins': 0} if check_url(url, 'www.facebook.com'): number_pat = "[0-9]+" stat_pat ='<div class="fsm fwn fcg"><div class="fsm fwn fcg">([0-9]+)(.*)([0-9]+)(.*)([0-9]+)(.*)\w+</div></div>' try: with closing(urllib2.urlopen(url=url, timeout=30)) as page: content = page.read() content= re.sub(',', '', content) result = re.search(stat_pat, content) if result: #print result.group() result = re.findall(number_pat, result.group()) if len(result)>=3: data['likes']=int(result[0]) data['talking_about_count']=int(result[1]) data['checkins']=int(result[2]) return data elif len(result)>=2: data['likes']=int(result[0]) data['talking_about_count']=int(result[1]) except Exception as e: log.error('Facebook url %s scrape error!' % url) log.error(e) pass return data
def cal_fb_hm(fb_likes, fb_talking_about_count, fb_checkins): fb_metrics = { 'fb_likes': fb_likes, 'fb_talking_about_count': fb_talking_about_count, 'fb_checkins': fb_checkins, 'fb_tl': 0, 'fb_chl': 0, 'fb_combined': 0, 'fb_likes_sqrt': 0, 'fb_tchk_sqrt': 0, 'fb_health': 0 } if fb_likes == 0 and fb_talking_about_count == 0 and fb_checkins == 0: return fb_metrics try: fb_tl = float(fb_talking_about_count) / float(fb_likes) * 1500 fb_chl = float(fb_checkins) / float(fb_likes) * 100 fb_combined = fb_tl + fb_chl fb_likes_sqrt = float(fb_likes)**0.9 fb_tchk_sqrt = (float(fb_talking_about_count) + float(fb_checkins))**0.2 fb_health = (((fb_combined**0.5) * fb_likes_sqrt * fb_tchk_sqrt) / 30000000)**0.65 fb_metrics['fb_tl'] = fb_tl fb_metrics['fb_chl'] = fb_chl fb_metrics['fb_combined'] = fb_combined fb_metrics['fb_likes_sqrt'] = fb_likes_sqrt fb_metrics['fb_tchk_sqrt'] = fb_tchk_sqrt fb_metrics['fb_health'] = fb_health except Exception as e: log.error(e) pass return fb_metrics
def scrap_facebook_raw_data(url): data = {'likes': 0, 'talking_about_count': 0, 'checkins': 0} if check_url(url, 'www.facebook.com'): number_pat = "[0-9]+" stat_pat = '<div class="fsm fwn fcg"><div class="fsm fwn fcg">([0-9]+)(.*)([0-9]+)(.*)([0-9]+)(.*)\w+</div></div>' try: with closing(urllib2.urlopen(url=url, timeout=30)) as page: content = page.read() content = re.sub(',', '', content) result = re.search(stat_pat, content) if result: #print result.group() result = re.findall(number_pat, result.group()) if len(result) >= 3: data['likes'] = int(result[0]) data['talking_about_count'] = int(result[1]) data['checkins'] = int(result[2]) return data elif len(result) >= 2: data['likes'] = int(result[0]) data['talking_about_count'] = int(result[1]) except Exception as e: log.error('Facebook url %s scrape error!' % url) log.error(e) pass return data
def yt_scrape(url): """ Scripe youtube url, get view_count, subscriber_count @type url: string @param url: youtube url @rtype: dict @return: view_count, subscriber_count dict data """ data = {'view_count': 0, 'subscriber_count': 0} youtube_data = None if check_url(url, 'www.youtube.com'): youtube_id = get_youtube_id(url) try: youtube_data = youtube_api.GetYouTubeUserEntry(username=youtube_id) except Exception as e: log.error('Youtube %s scrape error' % url) log.error(e) pass if youtube_data: data['view_count'] = int(youtube_data.statistics.view_count) data['subscriber_count'] = int( youtube_data.statistics.subscriber_count) return data
def reSchedule(seconds=86400): ''' Re-schedule the job with new interval. @type seconds: int @param seconds: the new interval seconds @rtype: None @return: None ''' log.debug('job reschedule seconds %d' % seconds) try: sched.unschedule_func(doJob) except Exception as e: log.error(e) pass sched.add_interval_job(doJob, seconds=seconds)
def tw_scrape(url): """ Scripe twitter url, get followers_count, tweets @type url: string @param url: twitter url @rtype: dict @return: followers_count, tweets dict data """ data = {'twitter_id': '', 'followers_count': 0, 'tweets': 0} twitter_data = None if check_url(url, 'twitter.com'): twitter_id = get_twitter_id(url) data['twitter_id'] = twitter_id try: twitter_data = twitter_api.GetUser(twitter_id) except Exception as e: log.error(e) pass if twitter_data: data['followers_count'] = twitter_data.followers_count data['tweets'] = twitter_data.statuses_count return data
def yt_scrape(url): """ Scripe youtube url, get view_count, subscriber_count @type url: string @param url: youtube url @rtype: dict @return: view_count, subscriber_count dict data """ data = {'view_count': 0, 'subscriber_count': 0} youtube_data = None if check_url(url, 'www.youtube.com'): youtube_id = get_youtube_id(url) try: youtube_data = youtube_api.GetYouTubeUserEntry(username=youtube_id) except Exception as e: log.error('Youtube %s scrape error' % url) log.error(e) pass if youtube_data: data['view_count'] = int(youtube_data.statistics.view_count) data['subscriber_count'] = int(youtube_data.statistics.subscriber_count) return data
def get_tw_data(twitter_id): data = {'impact': 0, 'engagement': 0, 'influence': 0, 'retweeted': 0, 'klout_truereach': 0} url = 'http://www.twitalyzer.com/api/2/user.asp?k=%s&u=%s&f=JSON' % (api_key, twitter_id) try: tw_api_data = urllib2.urlopen(url).read() if 'error' in tw_api_data: log.error('TWITTER NAME: %s:%s' %(twitter_id, tw_api_data)) tw_api_data = tw_api_data.replace('[{', '').replace('}]', '') tw_api_data_list = tw_api_data.split(',') for item in tw_api_data_list: key = item.split(':')[0].strip() value = item.split(':')[1] if data.has_key(key): data[key] = value except Exception as e: log.error('Get twitter data error for %s' % twitter_id) log.error(e) pass return data
def write_db(self, company_list, db_filename): """ write CompanySocialMedia object list into sqlite3 database @type company_list: list @param company_list: the CompanySocialMedia object list @type db_filename: string @param db_filename: the sqlite database file name @rtype: int @return: insert total count """ conn = sqlite3.connect(db_filename) #conn.text_factory = str c = conn.cursor() # Create table c.execute('''CREATE TABLE IF NOT EXISTS COMPANY ( COMPANY_NAME TEXT, FB_LIKES INTEGER, FB_TALKING_ABOUT_COUNT INTEGER, FB_CHECKINS INTEGER, FB_TL REAL, FB_CHL REAL, FB_COMBINED REAL, FB_LIKES_SQRT REAL, FB_TCHK_SQRT REAL, FB_HEALTH REAL, TW_FOLLOWERS_COUNT INTEGER, TW_TWEETS INTEGER, TW_IMPACT REAL, TW_ENGAGEMENT REAL, TW_INFLUENCE REAL, TW_RETWEETED REAL, TW_KLOUT_TRUEREACH REAL, TW_HEALTH REAL, YT_SUBSCRIBER_COUNT INTEGER, YT_VIEW_COUNT INTEGER, YT_HEALTH REAL, TSSH_RAW REAL, TSSH_PWR_REDUCED REAL, FB_PERCENT REAL, TW_PERCENT REAL, YT_PERCENT REAL, FB_ABS REAL, TW_ABS REAL, YT_ABS REAL, TIME_TAKEN TIMESTAMP )''') count = 0 for company in company_list: # Insert a row of data try: c.execute("INSERT INTO COMPANY VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (company.company_name, company.fb_likes, company.fb_talking_about_count, company.fb_checkins, company.fb_metrics['fb_tl'], company.fb_metrics['fb_chl'], company.fb_metrics['fb_combined'], company.fb_metrics['fb_likes_sqrt'], company.fb_metrics['fb_tchk_sqrt'], company.fb_metrics['fb_health'], company.tw_followers_count, company.tw_tweets, company.tw_metrics['impact'], company.tw_metrics['engagement'], company.tw_metrics['influence'], company.tw_metrics['retweeted'], company.tw_metrics['klout_truereach'], company.tw_metrics['tw_health'], company.yt_subscriber_count, company.yt_view_count, company.yt_metrics['yt_health'], company.micro_metrics['tssh_raw'], company.micro_metrics['tssh_pwr_reduced'], company.micro_metrics['fb_percent'], company.micro_metrics['tw_percent'], company.micro_metrics['yt_percent'], company.micro_metrics['fb_abs'], company.micro_metrics['tw_abs'], company.micro_metrics['yt_abs'], company.time_taken )) count += 1 except Exception as e: log.error(e) pass conn.commit() c.close() conn.close() return count
from contextlib import closing import urllib2 from logUtil import log import twitter twitter_api = twitter.Api() import gdata.youtube.service youtube_api = gdata.youtube.service.YouTubeService() import facebook try: #access_token = facebook.get_app_access_token('193618104088301', '659217362b250bbdae0b61d1e437e8ca') access_token = None except Exception as e: log.error(e) access_token = None facebook_api = facebook.GraphAPI(access_token) def check_url(url, netloc): return url and urlparse(url).netloc == netloc def get_facebook_id(url): """ Get facebook id or name from the url @type url: string @param url: facebook url
from contextlib import closing import urllib2 from logUtil import log import twitter twitter_api = twitter.Api() import gdata.youtube.service youtube_api = gdata.youtube.service.YouTubeService() import facebook try: #access_token = facebook.get_app_access_token('193618104088301', '659217362b250bbdae0b61d1e437e8ca') access_token = None except Exception as e: log.error(e) access_token = None facebook_api = facebook.GraphAPI(access_token) def check_url(url, netloc): return url and urlparse(url).netloc == netloc def get_facebook_id(url): """ Get facebook id or name from the url @type url: string @param url: facebook url @rtype: string @return: facebook id or name