Beispiel #1
0
def main():

    argparser = configargparse.ArgumentParser(
        description="AWIS API Proof of Concept")

    argparser.add_argument('--key-id', required=True)
    argparser.add_argument('--secret-key', required=True)
    argparser.add_argument('--sites', required=True, nargs='+')
    args = argparser.parse_args()

    client = AwisApi(args.key_id, args.secret_key)

    tree = client.url_info(args.sites, "Rank", "LinksInCount", "Speed")
    print etree_tostring(tree)

    print "client ns_prefixes: ", client.NS_PREFIXES
    alexa_prefix = client.NS_PREFIXES['alexa']
    awis_prefix = client.NS_PREFIXES['awis']

    elem = tree.find('//{%s}StatusCode' % alexa_prefix)
    assert elem.text == 'Success'

    for elem_result in tree.findall('//{%s}UrlInfoResult' % awis_prefix):
        # print etree_tostring(elem_result)
        print "elem_result tag: %s, text: %s" % (elem_result.tag,
                                                 elem_result.text)

        tree_result = ElementTree(elem_result)
        elem_url = tree_result.find('//{%s}DataUrl' % awis_prefix)
        if elem_url is not None:
            print "elem_url tag: %s, text: %s" % (elem_url.tag, elem_url.text)
        elem_metric = tree_result.find('//{%s}Rank' % awis_prefix)
        if elem_metric is not None:
            print "elem_metric tag: %s, text: %s " % (elem_metric.tag,
                                                      elem_metric.text)
Beispiel #2
0
    def get_metrics(cls, domains, metrics, options):
        awis_client = AwisApi(options.key_id, options.secret_key)

        tree = awis_client.url_info(domains, *metrics)

        alexa_prefix = awis_client.NS_PREFIXES['alexa']
        awis_prefix = awis_client.NS_PREFIXES['awis']

        elem = tree.find('//{%s}StatusCode' % alexa_prefix)
        if elem.text != 'Success':
            raise UserWarning('unable to get metrics: %s' %
                              etree_tostring(tree))

        metric_values = []
        elems_results = enumerate(
            tree.findall('//{%s}UrlInfoResult' % awis_prefix))
        for result_count, elem_result in elems_results:
            # print("UrlInfoResult Elem: %s" % etree_tostring(elem_result))
            # print("elem_result tag: %s, text: %s" % (elem_result.tag, elem_result.text))
            tree_result = ElementTree(elem_result)
            domain = None
            elem_url = tree_result.find('//{%s}DataUrl' % awis_prefix)
            if elem_url is not None:
                # print("elem_url tag: %s, text: %s" % (elem_url.tag, elem_url.text))
                domain = elem_url.text
                if domain[-1] == "/":
                    domain = domain[:-1]

            assert domain == domains[result_count], \
                "sanity check %s == %s" % (domain, domains[result_count])
            # if domain:
            # print("getting results for domain %s" % domain)

            domain_metrics = {}
            for metric in metrics:
                elem_metric = tree_result.find('//{%s}%s' %
                                               (awis_prefix, metric))
                if elem_metric is None:
                    raise UserWarning('unable to find metric within UrlInfoResult: %s' \
                        % etree_tostring(tree_result))
                domain_metrics[metric] = elem_metric.text
            metric_values.append(domain_metrics)

        print("success: %s" % metric_values)
        return metric_values
Beispiel #3
0
# AUTHOR: JOHN SKANDALAKIS
# USE: This program queries the Alexa API to get the URLs associated with an
# alexa category. To use this file you need an alexa api key file as provided
# by AWS. The file needs to be located in the same directory

from awis import AwisApi
import os.path

# grab the api key and id from file and create the alexa object
ACCESS_ID = None
SECRET_ACCESS_KEY = None
with open("rootkey.csv", "r") as keyfile:
    ACCESS_ID = keyfile.readline().split("=")[1].strip()
    SECRET_ACCESS_KEY = keyfile.readline().split("=")[1].strip()
api = AwisApi(ACCESS_ID, SECRET_ACCESS_KEY)

# check the list of categories you want to take from alexa
with open('categories.csv', 'r') as c:

    for category in c:
        category = category.strip()

        fname = "categories/" + category.replace('/', '-')

        # check to make sure you haven't already
        # done this so you don't spend money
        if os.path.isfile(fname):
            print fname, "already exists"
            continue

        i = 1
Beispiel #4
0
    def start(self, baseUrl):
        queryUrl = 'http://' + baseUrl

        content = {}

        api = AwisApi(aws_config['accessKeyId'], aws_config['secretAccessKey'])
        respXml = api.url_info(queryUrl, 'RelatedLinks', 'Categories', 'Rank',
                               'RankByCountry', 'UsageStats', 'ContactInfo',
                               'Speed', 'Language', 'Keywords', 'OwnedDomains',
                               'LinksInCount', 'SiteData')
        xml = etree.tostring(respXml, encoding='UTF-8')

        respStatus = respXml.find('//{%s}StatusCode' %
                                  api.NS_PREFIXES['alexa']).text
        if 'Success' == respStatus:
            dom_doc = parseString(xml)
            rank_list_items = []
            for country in dom_doc.getElementsByTagName('aws:Country'):
                country_code = country.getAttribute('Code')
                country_name = country_name_by_code(country_code)
                ranks = country.getElementsByTagName('aws:Rank')
                if len(ranks) > 0 and ranks[0].firstChild is not None:
                    rank = ranks[0].firstChild.nodeValue
                    try:
                        rank_list_items.append(
                            '<li>%(rank)s<sup>th</sup> most visited website in <img src="/images/flags/%(countryCode)s.png" alt="%(countryName)s flag" /> %(countryName)s</li>'
                            % {
                                'countryCode': country_code.lower(),
                                'countryName': country_name,
                                'rank': rank
                            })
                    except:
                        pass
            content['visitorsLocation'] = '<ul>' + ''.join(
                rank_list_items[:3]) + '</ul>'

            related_list_items = []
            for related in dom_doc.getElementsByTagName('aws:RelatedLink'):
                related_url = related.getElementsByTagName(
                    'aws:NavigableUrl')[0].firstChild.nodeValue
                related_title = related.getElementsByTagName(
                    'aws:Title')[0].firstChild.nodeValue
                related_list_items.append(
                    '<li><a href="%s" rel="nofollow" class="external" target="_blank">%s</a></li>'
                    % (related_url, related_title))
            content['relatedLinks'] = '<ul>' + ''.join(
                related_list_items[:5]) + '</ul>'

            content['worldRank'] = respXml.find('//{%s}Rank' %
                                                api.NS_PREFIXES['awis']).text

            temp = respXml.find('//{%s}MedianLoadTime' %
                                api.NS_PREFIXES['awis']).text
            if temp is not None:
                content['loadTimeMs'] = long(temp)
                if int(
                        respXml.find('//{%s}Percentile' %
                                     api.NS_PREFIXES['awis']).text) < 50:
                    pass

        self.sendAndSaveReport(baseUrl, content)