def main(): argparser = configargparse.ArgumentParser( description="AWIS API Proof of Concept") argparser.add_argument('--key-id', required=True) argparser.add_argument('--secret-key', required=True) argparser.add_argument('--sites', required=True, nargs='+') args = argparser.parse_args() client = AwisApi(args.key_id, args.secret_key) tree = client.url_info(args.sites, "Rank", "LinksInCount", "Speed") print etree_tostring(tree) print "client ns_prefixes: ", client.NS_PREFIXES alexa_prefix = client.NS_PREFIXES['alexa'] awis_prefix = client.NS_PREFIXES['awis'] elem = tree.find('//{%s}StatusCode' % alexa_prefix) assert elem.text == 'Success' for elem_result in tree.findall('//{%s}UrlInfoResult' % awis_prefix): # print etree_tostring(elem_result) print "elem_result tag: %s, text: %s" % (elem_result.tag, elem_result.text) tree_result = ElementTree(elem_result) elem_url = tree_result.find('//{%s}DataUrl' % awis_prefix) if elem_url is not None: print "elem_url tag: %s, text: %s" % (elem_url.tag, elem_url.text) elem_metric = tree_result.find('//{%s}Rank' % awis_prefix) if elem_metric is not None: print "elem_metric tag: %s, text: %s " % (elem_metric.tag, elem_metric.text)
def get_metrics(cls, domains, metrics, options): awis_client = AwisApi(options.key_id, options.secret_key) tree = awis_client.url_info(domains, *metrics) alexa_prefix = awis_client.NS_PREFIXES['alexa'] awis_prefix = awis_client.NS_PREFIXES['awis'] elem = tree.find('//{%s}StatusCode' % alexa_prefix) if elem.text != 'Success': raise UserWarning('unable to get metrics: %s' % etree_tostring(tree)) metric_values = [] elems_results = enumerate( tree.findall('//{%s}UrlInfoResult' % awis_prefix)) for result_count, elem_result in elems_results: # print("UrlInfoResult Elem: %s" % etree_tostring(elem_result)) # print("elem_result tag: %s, text: %s" % (elem_result.tag, elem_result.text)) tree_result = ElementTree(elem_result) domain = None elem_url = tree_result.find('//{%s}DataUrl' % awis_prefix) if elem_url is not None: # print("elem_url tag: %s, text: %s" % (elem_url.tag, elem_url.text)) domain = elem_url.text if domain[-1] == "/": domain = domain[:-1] assert domain == domains[result_count], \ "sanity check %s == %s" % (domain, domains[result_count]) # if domain: # print("getting results for domain %s" % domain) domain_metrics = {} for metric in metrics: elem_metric = tree_result.find('//{%s}%s' % (awis_prefix, metric)) if elem_metric is None: raise UserWarning('unable to find metric within UrlInfoResult: %s' \ % etree_tostring(tree_result)) domain_metrics[metric] = elem_metric.text metric_values.append(domain_metrics) print("success: %s" % metric_values) return metric_values
# AUTHOR: JOHN SKANDALAKIS # USE: This program queries the Alexa API to get the URLs associated with an # alexa category. To use this file you need an alexa api key file as provided # by AWS. The file needs to be located in the same directory from awis import AwisApi import os.path # grab the api key and id from file and create the alexa object ACCESS_ID = None SECRET_ACCESS_KEY = None with open("rootkey.csv", "r") as keyfile: ACCESS_ID = keyfile.readline().split("=")[1].strip() SECRET_ACCESS_KEY = keyfile.readline().split("=")[1].strip() api = AwisApi(ACCESS_ID, SECRET_ACCESS_KEY) # check the list of categories you want to take from alexa with open('categories.csv', 'r') as c: for category in c: category = category.strip() fname = "categories/" + category.replace('/', '-') # check to make sure you haven't already # done this so you don't spend money if os.path.isfile(fname): print fname, "already exists" continue i = 1
def start(self, baseUrl): queryUrl = 'http://' + baseUrl content = {} api = AwisApi(aws_config['accessKeyId'], aws_config['secretAccessKey']) respXml = api.url_info(queryUrl, 'RelatedLinks', 'Categories', 'Rank', 'RankByCountry', 'UsageStats', 'ContactInfo', 'Speed', 'Language', 'Keywords', 'OwnedDomains', 'LinksInCount', 'SiteData') xml = etree.tostring(respXml, encoding='UTF-8') respStatus = respXml.find('//{%s}StatusCode' % api.NS_PREFIXES['alexa']).text if 'Success' == respStatus: dom_doc = parseString(xml) rank_list_items = [] for country in dom_doc.getElementsByTagName('aws:Country'): country_code = country.getAttribute('Code') country_name = country_name_by_code(country_code) ranks = country.getElementsByTagName('aws:Rank') if len(ranks) > 0 and ranks[0].firstChild is not None: rank = ranks[0].firstChild.nodeValue try: rank_list_items.append( '<li>%(rank)s<sup>th</sup> most visited website in <img src="/images/flags/%(countryCode)s.png" alt="%(countryName)s flag" /> %(countryName)s</li>' % { 'countryCode': country_code.lower(), 'countryName': country_name, 'rank': rank }) except: pass content['visitorsLocation'] = '<ul>' + ''.join( rank_list_items[:3]) + '</ul>' related_list_items = [] for related in dom_doc.getElementsByTagName('aws:RelatedLink'): related_url = related.getElementsByTagName( 'aws:NavigableUrl')[0].firstChild.nodeValue related_title = related.getElementsByTagName( 'aws:Title')[0].firstChild.nodeValue related_list_items.append( '<li><a href="%s" rel="nofollow" class="external" target="_blank">%s</a></li>' % (related_url, related_title)) content['relatedLinks'] = '<ul>' + ''.join( related_list_items[:5]) + '</ul>' content['worldRank'] = respXml.find('//{%s}Rank' % api.NS_PREFIXES['awis']).text temp = respXml.find('//{%s}MedianLoadTime' % api.NS_PREFIXES['awis']).text if temp is not None: content['loadTimeMs'] = long(temp) if int( respXml.find('//{%s}Percentile' % api.NS_PREFIXES['awis']).text) < 50: pass self.sendAndSaveReport(baseUrl, content)