Beispiel #1
0
def Clients():
    A = GoogleQuery()
    Mozarr = []
    listO = open('credentials.json')
    B = json.load(listO)
    zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B)
    for zipList in zip_list:
        try:
            #print zipList[-1]['key'], zipList[-1]['value']
            values = zipList[-1]['key'] + zipList[-1]['value']
            print zipList[0]['rootDomain']
            client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
            authorities = client.urlMetrics(zipList[0]['root_domain'],
                                            Mozscape.UMCols.domainAuthority)
            Links = client.urlMetrics(
                zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority
                | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
            internal_dictionary = {}
            internal_dictionary['backURL'] = zipList[0]['rootDomain']
            internal_dictionary['PA'] = Links['upa']
            internal_dictionary['DA'] = authorities['pda']
            internal_dictionary['MozRank'] = Links['umrp']
            internal_dictionary['links'] = Links['uid']
            #print internal_dictionary['backURL']
            Mozarr.append(internal_dictionary)
        except MozscapeError:
            sleep(11)
            continue
    with open('mozscapedata.json', 'wb') as outfile:
        json.dump(Mozarr, outfile, indent=4)
def MozscapeData(query):
    urls = Google.query.filter_by(googleQuery=query).all()
    A = []
    Mozarr = []
    for items in urls:
        mozMiniDict = {}
        mozMiniDict['full_url'] = items.googleFullURL
        mozMiniDict['root_domain'] = items.googleRootDomain
        A.append(mozMiniDict)
    listO = open('credentials.json')
    B = json.load(listO)
    for zipList in zip(A, cycle(B)):
        try:
            client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
            authorities = client.urlMetrics(str(zipList[0]['root_domain']),
                                            Mozscape.UMCols.domainAuthority)
            Links = client.urlMetrics(
                str(zipList[0]['full_url']), Mozscape.UMCols.pageAuthority
                | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
            internal_dictionary = {}
            internal_dictionary['root_domain'] = zipList[0]['root_domain']
            internal_dictionary['backURL'] = zipList[0]['full_url']
            internal_dictionary['PA'] = Links['upa']
            internal_dictionary['DA'] = authorities['pda']
            internal_dictionary['MozRank'] = Links['umrp']
            internal_dictionary['links'] = Links['uid']
            Mozarr.append(internal_dictionary)
        except MozscapeError:
            print "Moz threw error!"
            sleep(11)
            continue

    for updateMoz in Mozarr:
        update = Google.query.filter_by(
            googleRootDomain=updateMoz['root_domain']).first()
        update.Links = updateMoz['links']
        update.PA = updateMoz['PA']
        update.DA = updateMoz['DA']
        update.moz_rank = updateMoz['MozRank']
        db.session.commit()
Beispiel #3
0
def calBackLinkCount(index):
    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    # for testing purpose to limit to 10.
    # file = os.path.join(__location__, '../Sites/sites10.txt')
    file = configs.getSiteFile(index)
    client = MongoClient()
    # A new ACCESS_KEY & SECRET_KEY has to be generated for each new user
    # "https://moz.com/community/join"
    mozClient = Mozscape(
        'mozscape-211f9c0fa5',  # ACCESS ID
        'f38b3766d3d991a4054f38f9746d5d2d'  # SECRET_KEY
    )

    totalLength = sum(1 for line in open(file))
    x = 0
    tempTenSites = []
    counter = 0
    # backLinkObjects = []

    # file is opened and each url is read
    with open(file) as sites:
        for site in sites:
            counter = counter + 1
            site = site.rstrip('\n')

            if (
                    x < 9 or counter == totalLength - 1
            ):  # logic to read 10 websites from the file before calling the API
                tempTenSites.append(site)
                x = x + 1
            else:
                print("Completed for 10 websites..!")
                tempTenSites.append(site)
                # resetting the counter
                x = 0
                # MOZ api free tier has a limit of accessing the api once in 10 seconds
                # Each api call can process 10 websites and respond accordingly
                if counter > 11:
                    time.sleep(10)
                urlMetrics = mozClient.urlMetrics(
                    tempTenSites)  # MOZ API call for url metrics
                for urlMetric in urlMetrics:
                    # backLinkObjects.append(urlMetric)
                    saveBacklinkCount(urlMetric['uid'], urlMetric['uu'],
                                      client)
                # empty the temp array
                tempTenSites = []

    print("Backlink calculation completed!!")
    print("Backlink count has been added to innovationCount.")
    client.close()
Beispiel #4
0
def retrieve_mozrank(url, accessid, secret_key):
    # if access id or secret is not provided, don't query Mozscape metrics.
    # https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
    if not accessid or not secret_key:
        return {}
    # If URL is local, skip it too.
    client = Mozscape(accessid, secret_key)
    for i in range(0, 3):
        try:
            return client.urlMetrics([url])[0]
        except Exception as e:
            print("mozscape failed trial %s: %s (%s)" % (i, url, str(e)))
            time.sleep(11)
    return {}
Beispiel #5
0
def moz_metrics(url):
    """
    Return data from moz.com for exact URL.
    """
    try:
        client = Mozscape('mozscape-d6c8be9444', '3801b7906546108d881d9153131b412e')
        metrics = client.urlMetrics(
            [url],
            Mozscape.UMCols.links | Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)

        metrics = metrics[0]
        metrics['pda'] = round(metrics['pda'], 2)
        metrics['upa'] = round(metrics['upa'], 2)

    except MozscapeError:  # maybe use general exception
        metrics = {'uid': None, 'pda': None, 'upa': None}

    return metrics
#!/usr/bin/env python

from mozscape import Mozscape

client = Mozscape(
    'my-access-id',
    'my-secret-key')

# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(
    ['www.moz.com'],
    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText(
    'www.moz.com/blog', cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
Beispiel #7
0
for items in biggerArr[:2]:
    eachQuery = items
    domainArray = []
    eachPageWhoisResult = []
    async_list = []
    url_list = []
    for eachQueryString in eachQuery:
        bingDictionary = {}

        bingDictionary['prospect_url'] = eachQueryString['displayUrl']
        try:
            defined = d.next()
            client = Mozscape(str(defined[0]), str(defined[1]))

            mozscape_dictionary = {}
            metrics = client.urlMetrics(str(bingDictionary['prospect_url']))
            bingDictionary['PA'] = metrics['upa']
            bingDictionary['DA'] = metrics['pda']
            bingDictionary['MozRank'] = metrics['ut']
        except:
            bingDictionary['PA'] = 0
            bingDictionary['DA'] = 0
            bingDictionary['MozRank'] = 0
            pass
        try:
            if "https://" in str(bingDictionary['prospect_url']):
                response = requests.get('http://graph.facebook.com/?id=' +
                                        str(eachQueryString['displayUrl']))
                print 'Facebookgraph takes time'
                loadAsJson = json.loads(response.text)
                # print loadAsJson
Beispiel #8
0
#!/usr/bin/env python

from mozscape import Mozscape
from datetime import datetime
import csv

# Mozscape API Details
client = Mozscape('mozscape-f03b16db58', '5f7418e041cf61841d72ef26c6f7a905')

domain = "smythson.com"

smythsonMetrics = client.urlMetrics(domain)
smythsonDA = smythsonMetrics['pda']

# smythsonMetrics contains other metrics, as well as DA. Alternatively, un-comment the following to find PA:
# authorities = client.urlMetrics(
#    ('www.smythson.com'),
#    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)

# Get current month in user-friendly format.
now = datetime.now()
month = now.strftime("%B-%y")

update = [month, domain, str(smythsonDA)]
print(update)

with open('smythsonda.csv', 'a') as fd:
    wr = csv.writer(fd, delimiter=',')
    wr.writerow(update)
Beispiel #9
0
#!/usr/bin/env python

from mozscape import Mozscape

client = Mozscape('my-access-id', 'my-secret-key')

# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(['www.moz.com'],
                                Mozscape.UMCols.domainAuthority
                                | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText('www.moz.com/blog',
                                      cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links('www.moz.com',
def OutReacherDesk(query):
    m_dictionary = {}
    m_dictionary['member-79ea116cb0'] = '43053334ef958fa5668a8afd8018195b'
    m_dictionary['member-89df24f83c'] = '0d08685d31a8f724047decff5e445861'
    m_dictionary['member-aad6e04a94'] = '8a08a4f2477b3eda0a7b3afa8eb6faaf'
    m_dictionary['member-1e51eae111'] = '4f1deaa49d0f4ec8f36778b80a58dba5'
    m_dictionary['member-c1d37816b1'] = '47501159d505413721caac9687818f68'
    m_dictionary['member-700eebf334'] = '0e7136b3468cd832f6dda555aa917661'
    m_dictionary['member-774cfbde7e'] = '481981b24f4a4f08d7c7dc9d5038428f'
    m_dictionary['member-34c9052fba'] = '999d2d727bfc11256421c42c529331de'
    m_dictionary['member-587eb1767c'] = '8c36e3b36b7d6d352fd943429d97837e'
    m_dictionary['member-5fa34d7383'] = '3986edd244ae54e1aa96c71404914578'

    headers = {
        'User-Agent':
        'Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X)AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0'
    }
    arr = ['0', '23', '37', '51', '65', '79']
    appendArr = []
    biggerArr = []
    for i in arr:
        response = requests.get(
            'https://c.bingapis.com/api/custom/opal/otherpage/search?q=' +
            str(query) + '&first=' + str(i) + '&rnoreward=1',
            headers=headers).text
        LoadAsJson = json.loads(response)
        with open('check_thisoutput.json', 'wb') as outfile:
            json.dump(LoadAsJson, outfile, indent=4)
        actualItem = LoadAsJson['answers'][0]['webResults']
        appendArr.append(actualItem)

        try:
            biggerArr.append(appendArr[0] + appendArr[1] + appendArr[2] +
                             appendArr[3] + appendArr[4] + appendArr[5])
        except:
            pass

    with open("check_output.json", 'wb') as outfile:
        json.dump(biggerArr, outfile, indent=4)

    rearr = []
    d = cycle(m_dictionary.iteritems())
    for items in biggerArr:
        eachQuery = items
        domainArray = []
        eachPageWhoisResult = []
        async_list = []
        url_list = []
        for eachQueryString in eachQuery:
            try:
                ## moz goes here

                bingDictionary = {}

                bingDictionary['prospect_url'] = eachQueryString['displayUrl']
                try:
                    defined = d.next()
                    client = Mozscape(str(defined[0]), str(defined[1]))

                    mozscape_dictionary = {}
                    metrics = client.urlMetrics(
                        str(eachQueryString['displayUrl']))
                    print metrics
                    bingDictionary['PA'] = metrics['upa']
                    bingDictionary['DA'] = metrics['pda']
                    bingDictionary['MozRank'] = metrics['ut']
                except:
                    #pass
                    bingDictionary['PA'] = "none"
                    bingDictionary['DA'] = "none"
                    bingDictionary['MozRank'] = "none"

                response = requests.get('http://graph.facebook.com/?id=' +
                                        str(eachQueryString['displayUrl']))
                loadAsJson = json.loads(response.text)
                try:
                    bingDictionary['facebook_shares'] = loadAsJson['share'][
                        'share_count']
                except:
                    bingDictionary['facebook_shares'] = "null"

                bingDictionary['meta_title'] = eachQueryString[
                    'shortTitle'].encode('ascii', 'ignore')
                url = urlparse(eachQueryString['url'])
                domain = '{uri.scheme}://{uri.netloc}/'.format(uri=url)
                bingDictionary['root_domain'] = domain
                likes = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:facebook.com%20'
                    + str(domain)).text
                loadAsJson = json.loads(likes)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    for items in parse_likes:
                        if "likes" in items['snippet']:
                            list_of_words = items['snippet'].split()
                            next_word = list_of_words[
                                list_of_words.index("likes") - 1]
                            bingDictionary['facebook_likes'] = next_word
                            bingDictionary['facebook_url'] = items['url']
                except:
                    bingDictionary['facebook_likes'] = None
                    bingDictionary['facebook_url'] = None

                twitter = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:twitter.com%20'
                    + str(domain)).text
                loadAsJson = json.loads(twitter)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    bingDictionary['twitter_url'] = parse_likes[0]['url']
                    bingDictionary['twitter_followers'] = parse_likes[0][
                        'formattedFacts'][0]['items'][0]['text']
                except:
                    bingDictionary['twitter_followers'] = None
                    bingDictionary['twitter_url'] = None

                google_plus = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:https://plus.google.com%20http://www.jackdaniels.com/'
                ).text
                loadAsJson = json.loads(google_plus)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    dictionary = {}
                    for items in parse_likes:
                        list_of_words = items['snippet'].split()
                        for items in list_of_words:
                            if "follower" in items:
                                next_word = list_of_words[
                                    list_of_words.index(items) - 1]
                                bingDictionary[
                                    'google_plus_followers'] = next_word
                                bingDictionary['google_plus_url'] = items[
                                    'url']
                except:
                    bingDictionary['google_plus_followers'] = None
                    bingDictionary['google_plus_url'] = None
                formatDomain = str(domain).replace('http://',
                                                   '').replace('https://', '')
                fixedDomain = formatDomain.split('/')[0].replace(
                    'https://www.', '').replace('http://www.',
                                                '').replace('www.', '')
                print fixedDomain
                whoisAPI = 'http://api.whoxy.com/?key=f5bd9ed47568013u5c00d35155ec3884&whois=' + \
                    str(fixedDomain)
                domainArray.append(whoisAPI)
                bingDictionary['whoisData'] = "None"
                bingDictionary['social_shares'] = "None"
                miniArz = []
                try:
                    response = requests.get('http://104.131.43.184/whois/' +
                                            str(fixedDomain)).text
                    min_text = 'http://104.131.43.184/whois/' + str(
                        fixedDomain)
                    url_list.append(str(min_text))
                    loadAsJson = json.loads(response)
                except:
                    pass
                whoisDictionary = {}
                try:
                    whoisDictionary['domain_name'] = loadAsJson['domain_name']
                except:
                    whoisDictionary['domain_name'] = "None"
                try:
                    whoisDictionary['whois_full_name'] = loadAsJson[
                        'registrant']['name']
                except:
                    whoisDictionary['whois_full_name'] = "None"
                try:
                    whoisDictionary['whois_city_name'] = loadAsJson[
                        'registrant']['city_name']
                except:
                    whoisDictionary['whois_city_name'] = "None"
                try:
                    whoisDictionary['whois_country_code'] = loadAsJson[
                        'registrant']['country_code']
                except:
                    whoisDictionary['whois_country_code'] = "None"
                try:
                    whoisDictionary['whois_email_address'] = loadAsJson[
                        'registrant']['email']
                except:
                    whoisDictionary['whois_email_address'] = "None"
                try:
                    whoisDictionary['whois_phone_number'] = loadAsJson[
                        'registrant']['phone_number']
                except:
                    whoisDictionary['whois_phone_number'] = "None"
                try:
                    email_response = requests.get(domain).text
                except:
                    pass
                phoneRegex = re.compile(
                    r'''
                    # 415-555-0000, 555-9999, (415) 555-0000, 555-000 ext 12345, ext. 12345 x12345
                    (
                    ((\d\d\d) | (\(\d\d\d\)))?          #area code (optional)
                    (/s|-)                              #first seperator
                    \d\d\d                              #first 3 digits
                    -                                   #second seperator
                    \d\d\d\d                            #last 4 digits
                    (((ext(\.)?\s) |x)                  #extension word-part (optional)
                    (\d{2,5}))?                         #extension number-part (optional)
                    )                                   
                    ''', re.VERBOSE)

                soup = BeautifulSoup(email_response)
                # print soup
                extractedPhone = phoneRegex.findall(str(soup))
                RSS_ARR = []
                for link in soup.find_all("link",
                                          {"type": "application/rss+xml"}):
                    href = link.get('href')
                    RSS_ARR.append(href)
                allPhoneNumbers = []
                for phoneNumber in extractedPhone:
                    allPhoneNumbers.append(phoneNumber[0])
                email_arr = []
                bingDictionary['phone_numbers'] = allPhoneNumbers
                bingDictionary['RSS_URL'] = RSS_ARR
                emails = re.search(r'[\w\.-]+@[\w\.-]+', str(soup))
                if emails:
                    if "." in emails.group()[-1]:
                        new_emails = emails.group()[:-1]
                        email_validator = lepl.apps.rfc3696.Email()
                        if not email_validator(new_emails):
                            pass
                        else:
                            email_arr.append(new_emails)
                    else:
                        email_string = emails.group()
                        email_validator = lepl.apps.rfc3696.Email()
                        if not email_validator(email_string):
                            pass
                        else:
                            email_arr.append(email_string)
                bingDictionary['emails'] = email_arr
                miniArz.append(whoisDictionary)
                bingDictionary['whoisData'] = miniArz
                rearr.append(bingDictionary)
            except KeyError, RuntimeError:
                pass
Beispiel #11
0
def querygoogle():
    form = QueryGoogle(request.form)
    arr = []
    index = 0
    if form.validate_on_submit():
        arr = []
        query = form.QueryGoogle.data
        response = requests.get('https://www.google.com/search?num=3&q=' +
                                query + '&oq=' + query + '&&start=10',
                                headers=headers,
                                proxies=proxies).text
        soup = BeautifulSoup(response)
        title = soup.findAll('div', attrs={'class': 'g'})
        for titles in title:
            try:
                dictionary = {}
                index += 1
                #dictionary['index#'] = str(index)
                dictionary['index#'] = str(index)
                dictionary['meta_title'] = titles.find('h3').text
                dictionary['full_url'] = titles.find('a')['href']
                rootDomain = dictionary['full_url'].replace('/url?q=', '')
                parsed_uri = urlparse(rootDomain)
                dictionary['rootDomain'] = rootDomain
                domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
                dictionary['root_domain'] = domain
                #print dictionary['root_domain']
                dictionary['description'] = titles.find('span',
                                                        attrs={
                                                            'class': 'st'
                                                        }).text
                addUser = Google(googleQuery=query,
                                 googleMetaTitle=dictionary['meta_title'],
                                 googleFullURL=dictionary['rootDomain'],
                                 googleRootDomain=dictionary['root_domain'])
                db.session.add(addUser)
                arr.append(dictionary)
            except AttributeError:
                continue
        db.session.commit()

        Mozarr = []
        secondaryURL = []
        listO = open('credentials.json')
        B = json.load(listO)
        A = arr
        zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B)
        for zipList in zip_list:
            try:
                #print zipList[0]['rootDomain']
                #print zipList[-1]['key'], zipList[-1]['value']
                values = zipList[-1]['key'] + zipList[-1]['value']
                #print zipList[0]['rootDomain']
                client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
                authorities = client.urlMetrics(
                    zipList[0]['root_domain'], Mozscape.UMCols.domainAuthority)
                Links = client.urlMetrics(
                    zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority
                    | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
                internal_dictionary = {}
                internal_dictionary['backURL'] = zipList[0]['rootDomain']
                internal_dictionary['PA'] = Links['upa']
                internal_dictionary['DA'] = authorities['pda']
                internal_dictionary['MozRank'] = Links['umrp']
                internal_dictionary['links'] = Links['uid']
                #print internal_dictionary['backURL']
                Mozarr.append(internal_dictionary)
                secondaryURL.append(zipList[0]['rootDomain'])
            except MozscapeError:
                sleep(11)
                continue

        removeDuplicates = list(set(secondaryURL))
        for addtoDb in removeDuplicates:
            updateDatabase = Google.query.filter_by(
                googleRootDomain=addtoDb).all()
            print updateDatabase

        return render_template('google.html', form=form, arr=arr)
    return render_template('google.html', form=form, arr=arr)
def parse_sites(sites_file):

	#get the list of websites for testing
	with open(sites_file) as sl:
		sites = [x.strip().lstrip('http://').rstrip('/') for x in sl.readlines()]
		
	print(sites)	
	

	#get MOZ API credentials
	with open('keys.txt') as fp:
		credentials = [x.strip() for x in fp.readlines()]

	moz_id = credentials[0]
	moz_key = credentials[1]

	#access the MOZ client using credentials
	client = Mozscape(moz_id, moz_key)

	#creating new list for storing websites MOZ DA data
	sites_da = []
	sites_backlinks = []
	sites_ips = []

	for i in sites:

			print 'Getting DA data for ', i
			i = client.urlMetrics([i], Mozscape.UMCols.domainAuthority)
			sites_da.append(math.ceil(i[0]['pda']))
			print 'Complete. Going to next domain in list'
			print '...'
			time.sleep(5)
			
	for k in sites:		
			
		print 'Getting total backlinks for ', k 
		k = client.urlMetrics([k], Mozscape.UMCols.links)
		sites_backlinks.append(k[0]['uid'])
		print 'Complete. Going to next domain in list'
		print '...'
		time.sleep(5)		
		
	for s in sites:
	
		try:
	
			print 'Getting the ip address of ', s 
			s = socket.gethostbyname(s)
			sites_ips.append(s)
			
		except:
			print 'Could not get ip address for ', s
			sites_ips.append('Error getting ip')
			continue
			
	
			

	#create a dictionary for storing the websites, their DA score and number of backlinks
	sites_metrics = dict(zip(sites, zip(sites_da, sites_backlinks, sites_ips)))

	print sites_metrics

	
	
	df = pd.DataFrame.from_dict(data=sites_metrics, orient='index')
	df.columns = ['Domain Authority', 'Backlinks', 'IP']
	
	print(df)

	to_file(df)
Beispiel #13
0
from mozscape import Mozscape, MozscapeError
client = Mozscape('member-79ea116cb0', '43053334ef958fa5668a8afd8018195b')
Links = client.urlMetrics(
    'https://www.benfolds.com/&sa=U&ved=0ahUKEwjSxZKtiP_PAhXIqFQKHaomCSsQFggsMAM&usg=AFQjCNGQFScP5dGElKPvz8zL1VyZvkQlxg',
    Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank
    | Mozscape.UMCols.links)
DA = client.urlMetrics('https://www.benfolds.com/',
                       Mozscape.UMCols.domainAuthority)
print DA
Beispiel #14
0
# metrics = client.urlMetrics('roc7o.1homevideos.us/dY')
# print metrics
# print metrics
# #print metrics
# # metrics = client.urlMetrics(str(items))
# # #zip_list = zip(B, cycle(dictionary.iteritems()))
d = cycle(dictionary.iteritems())
#print d.next()
for items in B:
    defined = d.next()
    try:
        #print defined[0] + ' ' + defined[1]
        # 		#print type(d.next()[1])
        client = Mozscape(defined[0], defined[1])
        metrics = client.urlMetrics(str(items))
        print metrics
# 		print metrics
    except:
        print defined[0] + ' ' + defined[1] + ' ' + items
        # print metrics
# 	#print metrics
# 	except:
# 		pass
#print d.next()[0]#items, d.next()

#	print d
# for items in zip_list:
# 	try:
# 		mozscape_dictionary = {}
# 		client = Mozscape(items[1][0],items[1][1])
Beispiel #15
0
#!/usr/bin/env python

from mozscape import Mozscape
from datetime import datetime
import csv

client = Mozscape('mozscape-f03b16db58', '5f7418e041cf61841d72ef26c6f7a905')

domain = input()

Metrics = client.urlMetrics(domain)
DA = Metrics['pda']

# for Page Authority too:
# authorities = client.urlMetrics(
#    (domain),
#    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)

now = datetime.now()
month = now.strftime("%B-%y")
print(month)
print(DA)

update = [month, domain, str(DA)]

with open('daquery.csv', 'a') as fd:
    wr = csv.writer(fd, delimiter=',')
    wr.writerow(update)
Beispiel #16
0
from mozscape import Mozscape
from contextlib import closing
import traceback
import time
import pandas as pd

# Imput your member ID and secret key here
client = Mozscape('mozscape-d3909f420d', 'a223a4a9f87f71c8ba7c353598ec43ff')

url = 'https://filecoin.io/'
metrics = client.urlMetrics(url)

# Or if you just need results for one URL
mozMetrics = client.urlMetrics(url)

df = pd.DataFrame(mozMetrics)

# https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
# fmrp: normalized measure of the MozRank of the subdomain of the target URL
# fmrr: raw measure of the MozRank of the subdomain of the target URL
# pda: domain authority of the target URL's paid-level domain
# ueid: The number of external equity links from other root domains to pages on the target URL's root domain
# uid: number of internal and external, equity and non-equity links to the subdomain of the target URL
# umrp: normalized (ten-point, logarithmically-scaled) MozRank of the target URL
# umrr: raw (zero to one, linearly-scaled) MozRank of the target URL
# upa: normalized (zero to one hundred, logarithmically-scaled) page authority of the target URL
# us: HTTP status of the target URL
# ut: title of the target URL, if a title is available
# uu: canonical form of the source URL

df = df.rename(index=str,
Beispiel #17
0
        "subdomain_mozrank": subdomain_mozrank,
        "da": da,
        "pa": pa,
        "equity_links": equity_links,
        "links": links,
        "status_code": status_code,
        "moz_time_last_crawled": moz_time_last_crawled
    }
    return result


for chunk in domain_chunks:
    log('--- Starting request ---')
    metrics = None
    try:
        metrics = client.urlMetrics(chunk)
    except MozscapeError as e:
        log('ERROR! : %s' % (e))
        continue

    results = []
    for idx, domain in enumerate(chunk):
        metric = metrics[idx]

        result = get_result(metric)

        log_domain(result)
        results.append(result)

    write_chunk(results)
    results = []