def Clients(): A = GoogleQuery() Mozarr = [] listO = open('credentials.json') B = json.load(listO) zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B) for zipList in zip_list: try: #print zipList[-1]['key'], zipList[-1]['value'] values = zipList[-1]['key'] + zipList[-1]['value'] print zipList[0]['rootDomain'] client = Mozscape(zipList[-1]['key'], zipList[-1]['value']) authorities = client.urlMetrics(zipList[0]['root_domain'], Mozscape.UMCols.domainAuthority) Links = client.urlMetrics( zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank | Mozscape.UMCols.links) internal_dictionary = {} internal_dictionary['backURL'] = zipList[0]['rootDomain'] internal_dictionary['PA'] = Links['upa'] internal_dictionary['DA'] = authorities['pda'] internal_dictionary['MozRank'] = Links['umrp'] internal_dictionary['links'] = Links['uid'] #print internal_dictionary['backURL'] Mozarr.append(internal_dictionary) except MozscapeError: sleep(11) continue with open('mozscapedata.json', 'wb') as outfile: json.dump(Mozarr, outfile, indent=4)
def MozscapeData(query): urls = Google.query.filter_by(googleQuery=query).all() A = [] Mozarr = [] for items in urls: mozMiniDict = {} mozMiniDict['full_url'] = items.googleFullURL mozMiniDict['root_domain'] = items.googleRootDomain A.append(mozMiniDict) listO = open('credentials.json') B = json.load(listO) for zipList in zip(A, cycle(B)): try: client = Mozscape(zipList[-1]['key'], zipList[-1]['value']) authorities = client.urlMetrics(str(zipList[0]['root_domain']), Mozscape.UMCols.domainAuthority) Links = client.urlMetrics( str(zipList[0]['full_url']), Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank | Mozscape.UMCols.links) internal_dictionary = {} internal_dictionary['root_domain'] = zipList[0]['root_domain'] internal_dictionary['backURL'] = zipList[0]['full_url'] internal_dictionary['PA'] = Links['upa'] internal_dictionary['DA'] = authorities['pda'] internal_dictionary['MozRank'] = Links['umrp'] internal_dictionary['links'] = Links['uid'] Mozarr.append(internal_dictionary) except MozscapeError: print "Moz threw error!" sleep(11) continue for updateMoz in Mozarr: update = Google.query.filter_by( googleRootDomain=updateMoz['root_domain']).first() update.Links = updateMoz['links'] update.PA = updateMoz['PA'] update.DA = updateMoz['DA'] update.moz_rank = updateMoz['MozRank'] db.session.commit()
def calBackLinkCount(index): __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) # for testing purpose to limit to 10. # file = os.path.join(__location__, '../Sites/sites10.txt') file = configs.getSiteFile(index) client = MongoClient() # A new ACCESS_KEY & SECRET_KEY has to be generated for each new user # "https://moz.com/community/join" mozClient = Mozscape( 'mozscape-211f9c0fa5', # ACCESS ID 'f38b3766d3d991a4054f38f9746d5d2d' # SECRET_KEY ) totalLength = sum(1 for line in open(file)) x = 0 tempTenSites = [] counter = 0 # backLinkObjects = [] # file is opened and each url is read with open(file) as sites: for site in sites: counter = counter + 1 site = site.rstrip('\n') if ( x < 9 or counter == totalLength - 1 ): # logic to read 10 websites from the file before calling the API tempTenSites.append(site) x = x + 1 else: print("Completed for 10 websites..!") tempTenSites.append(site) # resetting the counter x = 0 # MOZ api free tier has a limit of accessing the api once in 10 seconds # Each api call can process 10 websites and respond accordingly if counter > 11: time.sleep(10) urlMetrics = mozClient.urlMetrics( tempTenSites) # MOZ API call for url metrics for urlMetric in urlMetrics: # backLinkObjects.append(urlMetric) saveBacklinkCount(urlMetric['uid'], urlMetric['uu'], client) # empty the temp array tempTenSites = [] print("Backlink calculation completed!!") print("Backlink count has been added to innovationCount.") client.close()
def retrieve_mozrank(url, accessid, secret_key): # if access id or secret is not provided, don't query Mozscape metrics. # https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics if not accessid or not secret_key: return {} # If URL is local, skip it too. client = Mozscape(accessid, secret_key) for i in range(0, 3): try: return client.urlMetrics([url])[0] except Exception as e: print("mozscape failed trial %s: %s (%s)" % (i, url, str(e))) time.sleep(11) return {}
def moz_metrics(url): """ Return data from moz.com for exact URL. """ try: client = Mozscape('mozscape-d6c8be9444', '3801b7906546108d881d9153131b412e') metrics = client.urlMetrics( [url], Mozscape.UMCols.links | Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) metrics = metrics[0] metrics['pda'] = round(metrics['pda'], 2) metrics['upa'] = round(metrics['upa'], 2) except MozscapeError: # maybe use general exception metrics = {'uid': None, 'pda': None, 'upa': None} return metrics
#!/usr/bin/env python from mozscape import Mozscape client = Mozscape( 'my-access-id', 'my-secret-key') # As you may have noticed, there are lots of columns available # I did what I could to make them easily-accessible, but there # are a lot, and the names are long. So, the API calls have # defaults # Let's get some URL metrics. Results are now an array of dictionaries # the i'th dictionary is the results for the i'th URL metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog']) # Now let's say we only want specific columns in the results authorities = client.urlMetrics( ['www.moz.com'], Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) # Or if you just need results for one URL mozMetrics = client.urlMetrics('www.moz.com') # Now for some anchor text results anchorResults = client.anchorText('www.moz.com/blog') # Or for just specific columns anchorTermResults = client.anchorText( 'www.moz.com/blog', cols=Mozscape.ATCols.term) # Now for some links results links = client.links('www.moz.com')
for items in biggerArr[:2]: eachQuery = items domainArray = [] eachPageWhoisResult = [] async_list = [] url_list = [] for eachQueryString in eachQuery: bingDictionary = {} bingDictionary['prospect_url'] = eachQueryString['displayUrl'] try: defined = d.next() client = Mozscape(str(defined[0]), str(defined[1])) mozscape_dictionary = {} metrics = client.urlMetrics(str(bingDictionary['prospect_url'])) bingDictionary['PA'] = metrics['upa'] bingDictionary['DA'] = metrics['pda'] bingDictionary['MozRank'] = metrics['ut'] except: bingDictionary['PA'] = 0 bingDictionary['DA'] = 0 bingDictionary['MozRank'] = 0 pass try: if "https://" in str(bingDictionary['prospect_url']): response = requests.get('http://graph.facebook.com/?id=' + str(eachQueryString['displayUrl'])) print 'Facebookgraph takes time' loadAsJson = json.loads(response.text) # print loadAsJson
#!/usr/bin/env python from mozscape import Mozscape from datetime import datetime import csv # Mozscape API Details client = Mozscape('mozscape-f03b16db58', '5f7418e041cf61841d72ef26c6f7a905') domain = "smythson.com" smythsonMetrics = client.urlMetrics(domain) smythsonDA = smythsonMetrics['pda'] # smythsonMetrics contains other metrics, as well as DA. Alternatively, un-comment the following to find PA: # authorities = client.urlMetrics( # ('www.smythson.com'), # Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) # Get current month in user-friendly format. now = datetime.now() month = now.strftime("%B-%y") update = [month, domain, str(smythsonDA)] print(update) with open('smythsonda.csv', 'a') as fd: wr = csv.writer(fd, delimiter=',') wr.writerow(update)
#!/usr/bin/env python from mozscape import Mozscape client = Mozscape('my-access-id', 'my-secret-key') # As you may have noticed, there are lots of columns available # I did what I could to make them easily-accessible, but there # are a lot, and the names are long. So, the API calls have # defaults # Let's get some URL metrics. Results are now an array of dictionaries # the i'th dictionary is the results for the i'th URL metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog']) # Now let's say we only want specific columns in the results authorities = client.urlMetrics(['www.moz.com'], Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) # Or if you just need results for one URL mozMetrics = client.urlMetrics('www.moz.com') # Now for some anchor text results anchorResults = client.anchorText('www.moz.com/blog') # Or for just specific columns anchorTermResults = client.anchorText('www.moz.com/blog', cols=Mozscape.ATCols.term) # Now for some links results links = client.links('www.moz.com') # The links API has more columns to specify, as well as sort, scope, etc. links = client.links('www.moz.com',
def OutReacherDesk(query): m_dictionary = {} m_dictionary['member-79ea116cb0'] = '43053334ef958fa5668a8afd8018195b' m_dictionary['member-89df24f83c'] = '0d08685d31a8f724047decff5e445861' m_dictionary['member-aad6e04a94'] = '8a08a4f2477b3eda0a7b3afa8eb6faaf' m_dictionary['member-1e51eae111'] = '4f1deaa49d0f4ec8f36778b80a58dba5' m_dictionary['member-c1d37816b1'] = '47501159d505413721caac9687818f68' m_dictionary['member-700eebf334'] = '0e7136b3468cd832f6dda555aa917661' m_dictionary['member-774cfbde7e'] = '481981b24f4a4f08d7c7dc9d5038428f' m_dictionary['member-34c9052fba'] = '999d2d727bfc11256421c42c529331de' m_dictionary['member-587eb1767c'] = '8c36e3b36b7d6d352fd943429d97837e' m_dictionary['member-5fa34d7383'] = '3986edd244ae54e1aa96c71404914578' headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X)AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0' } arr = ['0', '23', '37', '51', '65', '79'] appendArr = [] biggerArr = [] for i in arr: response = requests.get( 'https://c.bingapis.com/api/custom/opal/otherpage/search?q=' + str(query) + '&first=' + str(i) + '&rnoreward=1', headers=headers).text LoadAsJson = json.loads(response) with open('check_thisoutput.json', 'wb') as outfile: json.dump(LoadAsJson, outfile, indent=4) actualItem = LoadAsJson['answers'][0]['webResults'] appendArr.append(actualItem) try: biggerArr.append(appendArr[0] + appendArr[1] + appendArr[2] + appendArr[3] + appendArr[4] + appendArr[5]) except: pass with open("check_output.json", 'wb') as outfile: json.dump(biggerArr, outfile, indent=4) rearr = [] d = cycle(m_dictionary.iteritems()) for items in biggerArr: eachQuery = items domainArray = [] eachPageWhoisResult = [] async_list = [] url_list = [] for eachQueryString in eachQuery: try: ## moz goes here bingDictionary = {} bingDictionary['prospect_url'] = eachQueryString['displayUrl'] try: defined = d.next() client = Mozscape(str(defined[0]), str(defined[1])) mozscape_dictionary = {} metrics = client.urlMetrics( str(eachQueryString['displayUrl'])) print metrics bingDictionary['PA'] = metrics['upa'] bingDictionary['DA'] = metrics['pda'] bingDictionary['MozRank'] = metrics['ut'] except: #pass bingDictionary['PA'] = "none" bingDictionary['DA'] = "none" bingDictionary['MozRank'] = "none" response = requests.get('http://graph.facebook.com/?id=' + str(eachQueryString['displayUrl'])) loadAsJson = json.loads(response.text) try: bingDictionary['facebook_shares'] = loadAsJson['share'][ 'share_count'] except: bingDictionary['facebook_shares'] = "null" bingDictionary['meta_title'] = eachQueryString[ 'shortTitle'].encode('ascii', 'ignore') url = urlparse(eachQueryString['url']) domain = '{uri.scheme}://{uri.netloc}/'.format(uri=url) bingDictionary['root_domain'] = domain likes = requests.get( 'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:facebook.com%20' + str(domain)).text loadAsJson = json.loads(likes) try: parse_likes = loadAsJson['answers'][0]['webResults'] for items in parse_likes: if "likes" in items['snippet']: list_of_words = items['snippet'].split() next_word = list_of_words[ list_of_words.index("likes") - 1] bingDictionary['facebook_likes'] = next_word bingDictionary['facebook_url'] = items['url'] except: bingDictionary['facebook_likes'] = None bingDictionary['facebook_url'] = None twitter = requests.get( 'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:twitter.com%20' + str(domain)).text loadAsJson = json.loads(twitter) try: parse_likes = loadAsJson['answers'][0]['webResults'] bingDictionary['twitter_url'] = parse_likes[0]['url'] bingDictionary['twitter_followers'] = parse_likes[0][ 'formattedFacts'][0]['items'][0]['text'] except: bingDictionary['twitter_followers'] = None bingDictionary['twitter_url'] = None google_plus = requests.get( 'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:https://plus.google.com%20http://www.jackdaniels.com/' ).text loadAsJson = json.loads(google_plus) try: parse_likes = loadAsJson['answers'][0]['webResults'] dictionary = {} for items in parse_likes: list_of_words = items['snippet'].split() for items in list_of_words: if "follower" in items: next_word = list_of_words[ list_of_words.index(items) - 1] bingDictionary[ 'google_plus_followers'] = next_word bingDictionary['google_plus_url'] = items[ 'url'] except: bingDictionary['google_plus_followers'] = None bingDictionary['google_plus_url'] = None formatDomain = str(domain).replace('http://', '').replace('https://', '') fixedDomain = formatDomain.split('/')[0].replace( 'https://www.', '').replace('http://www.', '').replace('www.', '') print fixedDomain whoisAPI = 'http://api.whoxy.com/?key=f5bd9ed47568013u5c00d35155ec3884&whois=' + \ str(fixedDomain) domainArray.append(whoisAPI) bingDictionary['whoisData'] = "None" bingDictionary['social_shares'] = "None" miniArz = [] try: response = requests.get('http://104.131.43.184/whois/' + str(fixedDomain)).text min_text = 'http://104.131.43.184/whois/' + str( fixedDomain) url_list.append(str(min_text)) loadAsJson = json.loads(response) except: pass whoisDictionary = {} try: whoisDictionary['domain_name'] = loadAsJson['domain_name'] except: whoisDictionary['domain_name'] = "None" try: whoisDictionary['whois_full_name'] = loadAsJson[ 'registrant']['name'] except: whoisDictionary['whois_full_name'] = "None" try: whoisDictionary['whois_city_name'] = loadAsJson[ 'registrant']['city_name'] except: whoisDictionary['whois_city_name'] = "None" try: whoisDictionary['whois_country_code'] = loadAsJson[ 'registrant']['country_code'] except: whoisDictionary['whois_country_code'] = "None" try: whoisDictionary['whois_email_address'] = loadAsJson[ 'registrant']['email'] except: whoisDictionary['whois_email_address'] = "None" try: whoisDictionary['whois_phone_number'] = loadAsJson[ 'registrant']['phone_number'] except: whoisDictionary['whois_phone_number'] = "None" try: email_response = requests.get(domain).text except: pass phoneRegex = re.compile( r''' # 415-555-0000, 555-9999, (415) 555-0000, 555-000 ext 12345, ext. 12345 x12345 ( ((\d\d\d) | (\(\d\d\d\)))? #area code (optional) (/s|-) #first seperator \d\d\d #first 3 digits - #second seperator \d\d\d\d #last 4 digits (((ext(\.)?\s) |x) #extension word-part (optional) (\d{2,5}))? #extension number-part (optional) ) ''', re.VERBOSE) soup = BeautifulSoup(email_response) # print soup extractedPhone = phoneRegex.findall(str(soup)) RSS_ARR = [] for link in soup.find_all("link", {"type": "application/rss+xml"}): href = link.get('href') RSS_ARR.append(href) allPhoneNumbers = [] for phoneNumber in extractedPhone: allPhoneNumbers.append(phoneNumber[0]) email_arr = [] bingDictionary['phone_numbers'] = allPhoneNumbers bingDictionary['RSS_URL'] = RSS_ARR emails = re.search(r'[\w\.-]+@[\w\.-]+', str(soup)) if emails: if "." in emails.group()[-1]: new_emails = emails.group()[:-1] email_validator = lepl.apps.rfc3696.Email() if not email_validator(new_emails): pass else: email_arr.append(new_emails) else: email_string = emails.group() email_validator = lepl.apps.rfc3696.Email() if not email_validator(email_string): pass else: email_arr.append(email_string) bingDictionary['emails'] = email_arr miniArz.append(whoisDictionary) bingDictionary['whoisData'] = miniArz rearr.append(bingDictionary) except KeyError, RuntimeError: pass
def querygoogle(): form = QueryGoogle(request.form) arr = [] index = 0 if form.validate_on_submit(): arr = [] query = form.QueryGoogle.data response = requests.get('https://www.google.com/search?num=3&q=' + query + '&oq=' + query + '&&start=10', headers=headers, proxies=proxies).text soup = BeautifulSoup(response) title = soup.findAll('div', attrs={'class': 'g'}) for titles in title: try: dictionary = {} index += 1 #dictionary['index#'] = str(index) dictionary['index#'] = str(index) dictionary['meta_title'] = titles.find('h3').text dictionary['full_url'] = titles.find('a')['href'] rootDomain = dictionary['full_url'].replace('/url?q=', '') parsed_uri = urlparse(rootDomain) dictionary['rootDomain'] = rootDomain domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) dictionary['root_domain'] = domain #print dictionary['root_domain'] dictionary['description'] = titles.find('span', attrs={ 'class': 'st' }).text addUser = Google(googleQuery=query, googleMetaTitle=dictionary['meta_title'], googleFullURL=dictionary['rootDomain'], googleRootDomain=dictionary['root_domain']) db.session.add(addUser) arr.append(dictionary) except AttributeError: continue db.session.commit() Mozarr = [] secondaryURL = [] listO = open('credentials.json') B = json.load(listO) A = arr zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B) for zipList in zip_list: try: #print zipList[0]['rootDomain'] #print zipList[-1]['key'], zipList[-1]['value'] values = zipList[-1]['key'] + zipList[-1]['value'] #print zipList[0]['rootDomain'] client = Mozscape(zipList[-1]['key'], zipList[-1]['value']) authorities = client.urlMetrics( zipList[0]['root_domain'], Mozscape.UMCols.domainAuthority) Links = client.urlMetrics( zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank | Mozscape.UMCols.links) internal_dictionary = {} internal_dictionary['backURL'] = zipList[0]['rootDomain'] internal_dictionary['PA'] = Links['upa'] internal_dictionary['DA'] = authorities['pda'] internal_dictionary['MozRank'] = Links['umrp'] internal_dictionary['links'] = Links['uid'] #print internal_dictionary['backURL'] Mozarr.append(internal_dictionary) secondaryURL.append(zipList[0]['rootDomain']) except MozscapeError: sleep(11) continue removeDuplicates = list(set(secondaryURL)) for addtoDb in removeDuplicates: updateDatabase = Google.query.filter_by( googleRootDomain=addtoDb).all() print updateDatabase return render_template('google.html', form=form, arr=arr) return render_template('google.html', form=form, arr=arr)
def parse_sites(sites_file): #get the list of websites for testing with open(sites_file) as sl: sites = [x.strip().lstrip('http://').rstrip('/') for x in sl.readlines()] print(sites) #get MOZ API credentials with open('keys.txt') as fp: credentials = [x.strip() for x in fp.readlines()] moz_id = credentials[0] moz_key = credentials[1] #access the MOZ client using credentials client = Mozscape(moz_id, moz_key) #creating new list for storing websites MOZ DA data sites_da = [] sites_backlinks = [] sites_ips = [] for i in sites: print 'Getting DA data for ', i i = client.urlMetrics([i], Mozscape.UMCols.domainAuthority) sites_da.append(math.ceil(i[0]['pda'])) print 'Complete. Going to next domain in list' print '...' time.sleep(5) for k in sites: print 'Getting total backlinks for ', k k = client.urlMetrics([k], Mozscape.UMCols.links) sites_backlinks.append(k[0]['uid']) print 'Complete. Going to next domain in list' print '...' time.sleep(5) for s in sites: try: print 'Getting the ip address of ', s s = socket.gethostbyname(s) sites_ips.append(s) except: print 'Could not get ip address for ', s sites_ips.append('Error getting ip') continue #create a dictionary for storing the websites, their DA score and number of backlinks sites_metrics = dict(zip(sites, zip(sites_da, sites_backlinks, sites_ips))) print sites_metrics df = pd.DataFrame.from_dict(data=sites_metrics, orient='index') df.columns = ['Domain Authority', 'Backlinks', 'IP'] print(df) to_file(df)
from mozscape import Mozscape, MozscapeError client = Mozscape('member-79ea116cb0', '43053334ef958fa5668a8afd8018195b') Links = client.urlMetrics( 'https://www.benfolds.com/&sa=U&ved=0ahUKEwjSxZKtiP_PAhXIqFQKHaomCSsQFggsMAM&usg=AFQjCNGQFScP5dGElKPvz8zL1VyZvkQlxg', Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank | Mozscape.UMCols.links) DA = client.urlMetrics('https://www.benfolds.com/', Mozscape.UMCols.domainAuthority) print DA
# metrics = client.urlMetrics('roc7o.1homevideos.us/dY') # print metrics # print metrics # #print metrics # # metrics = client.urlMetrics(str(items)) # # #zip_list = zip(B, cycle(dictionary.iteritems())) d = cycle(dictionary.iteritems()) #print d.next() for items in B: defined = d.next() try: #print defined[0] + ' ' + defined[1] # #print type(d.next()[1]) client = Mozscape(defined[0], defined[1]) metrics = client.urlMetrics(str(items)) print metrics # print metrics except: print defined[0] + ' ' + defined[1] + ' ' + items # print metrics # #print metrics # except: # pass #print d.next()[0]#items, d.next() # print d # for items in zip_list: # try: # mozscape_dictionary = {} # client = Mozscape(items[1][0],items[1][1])
#!/usr/bin/env python from mozscape import Mozscape from datetime import datetime import csv client = Mozscape('mozscape-f03b16db58', '5f7418e041cf61841d72ef26c6f7a905') domain = input() Metrics = client.urlMetrics(domain) DA = Metrics['pda'] # for Page Authority too: # authorities = client.urlMetrics( # (domain), # Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) now = datetime.now() month = now.strftime("%B-%y") print(month) print(DA) update = [month, domain, str(DA)] with open('daquery.csv', 'a') as fd: wr = csv.writer(fd, delimiter=',') wr.writerow(update)
from mozscape import Mozscape from contextlib import closing import traceback import time import pandas as pd # Imput your member ID and secret key here client = Mozscape('mozscape-d3909f420d', 'a223a4a9f87f71c8ba7c353598ec43ff') url = 'https://filecoin.io/' metrics = client.urlMetrics(url) # Or if you just need results for one URL mozMetrics = client.urlMetrics(url) df = pd.DataFrame(mozMetrics) # https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics # fmrp: normalized measure of the MozRank of the subdomain of the target URL # fmrr: raw measure of the MozRank of the subdomain of the target URL # pda: domain authority of the target URL's paid-level domain # ueid: The number of external equity links from other root domains to pages on the target URL's root domain # uid: number of internal and external, equity and non-equity links to the subdomain of the target URL # umrp: normalized (ten-point, logarithmically-scaled) MozRank of the target URL # umrr: raw (zero to one, linearly-scaled) MozRank of the target URL # upa: normalized (zero to one hundred, logarithmically-scaled) page authority of the target URL # us: HTTP status of the target URL # ut: title of the target URL, if a title is available # uu: canonical form of the source URL df = df.rename(index=str,
"subdomain_mozrank": subdomain_mozrank, "da": da, "pa": pa, "equity_links": equity_links, "links": links, "status_code": status_code, "moz_time_last_crawled": moz_time_last_crawled } return result for chunk in domain_chunks: log('--- Starting request ---') metrics = None try: metrics = client.urlMetrics(chunk) except MozscapeError as e: log('ERROR! : %s' % (e)) continue results = [] for idx, domain in enumerate(chunk): metric = metrics[idx] result = get_result(metric) log_domain(result) results.append(result) write_chunk(results) results = []