def TwitterScan(encoded_url): # This tries to request the api, if successful extract the json try: twitter_req = urllib.urlopen("http://urls.api.twitter.com/1/urls/count.json?url={url}".format(url=encoded_url)) # Exception occured with urllib.urlopen(), store it in the Error model except Exception as e: error = Error( error="URL: {url}\nTwitter API error: {error}".format(error=e, url=encoded_url), created_by="social_sync.py" ) error.save() # Print that it failed print " This one failed! Automatically saved in errors model" # Return False so the Command function can know it failed. return False # Successfully requested the API, transform to JSON and return the total interactions twitter_json = json.load(twitter_req) return twitter_json["count"]
def FacebookScan(encoded_url, use_proxies): # This checks if the user said that it wants to sync using proxies if use_proxies: # This assings the maximum time in seconds to wait for the urllib.urlopen() below to # respond socket.setdefaulttimeout(15) # This gets the proxy that was used longer ago from the FreeProxy model. # It also returns the proxy IP in a dictionary to use with urllib.urlopen oldest_proxy, oldest_proxy_dic = OldestProxyDic() # The function to get the proxies failed, without proxies this function can't continue # return false. if oldest_proxy is False: return False # If this runs it means the user didn't select any proxy. # Just assign an empty dictionary, that way urllib.openurl() will ignore the proxies # argument and use localhost else: oldest_proxy_dic = {"": ""} # We have to use this because if the URL contains things like "?X=" the facebook API # will think we are passing parameters to it. encoded_url = urllib.quote(encoded_url) # This tries to request the Facebook API. If successful, it will extract the JSON. # If it fails it will return False to let the Command function know it did so. try: # IMPORTANT NOTE: The idea to use proxies was to bypass the API request limit. # If we are using proxies + an access_token, it makes no sense to use proxies but # for now we have to use an access_token until a new function is coded for the proxies. # The reason for that is, the facebook API json changes when you visit it without an # access token, that means the json is different and so a new function is required. facebook_req = urllib.urlopen( "https://graph.facebook.com/v2.3/{url}?access_token=" "CONFIGURE-ME: Add your own Facebook access token".format(url=encoded_url), proxies=oldest_proxy_dic, ) # The urllib.urlopen() returned an exception, store it in the Error model except Exception as e: error = Error( error="URL: {url}\nurllib.urlopen() error: {error}".format(error=e, url=encoded_url), created_by="social_sync.py", ) error.save() # This checks if the exception was caused for any of the following reasons # If it did, call the DisableProxy function to mark the used proxy as obsolete. if "[Errno socket error]" in str(e) or "http protocol error" in str(e): DisableProxy(oldest_proxy, e) # Tell the user it failed print " This one failed! Automatically saved in errors model" # Return false instead of the social interactions return False # This checks if the returned html is actually a json. If it's not json, the proxy works, # if it isn't, the proxy is obsolete try: facebook_json = json.load(facebook_req) except Exception as e: error = Error( error=("URL: {url}\n" "json.load() error: {error}\n" "Proxy used during error: {proxy_ip}").format( error=e, url=encoded_url, proxy_ip=oldest_proxy.ip ), created_by="social_sync.py", ) error.save() # This proxy is obsolete, call DisableProxy to mark it as such in the FreeProxy model DisableProxy(oldest_proxy, e) # Tell the user it failed print " This one failed! Automatically saved in errors model" # Tell the Command function it failed return False # This checks if there's a key called error in the facebook json. # If so, the facebook api returned an error. if "error" in facebook_json: # Tell the user the API returned an exception and the exception message print " Facebook API returned exception:" print " {exception}".format(exception=facebook_json["error"]["message"]) # Store the error in the Error model error = Error( error="URL: {url}\nFacebook API returned exception:\n{error}".format( error=facebook_json["error"]["message"], url=encoded_url ), created_by="social_sync.py", ) error.save() # Return False instead of the total interactions return False # This checks if the Facebook API returned nothing for the requested URL # This can happen if the URL is from facebook itself. I don't know why the facebook API # does this. if not "share" in facebook_json: # Store the error in the errors model, it will probably spam the error model with this # for a couple of days but it doesn't hurt. error = Error( error="URL: {url}\n" "The Facebook API returned no shares.\n" "This is known to happen if the URL in question is from Facebook itself".format(url=encoded_url), created_by="social_sync.py", ) error.save() # Tell the user what happend, never bad practice print " The Facebook API returned no shares, setting it to 0" # It returns 0 because this way it will not delete the social interaction entry. # We can't get the facebook interactions for this URL but it's still of # interest to get the twitter ones. return 0 # PLEASE READ BEFORE ADDING MORE CODE BELOW: # it is important that "if not 'share' in facebook_json" stays at the bottom. # Because this 'if' returns 0 if true, it is important that the other 'ifs' to detect # errors run first. # Everything went fine; return the total interactions return facebook_json["share"]["share_count"]