class Moz_Search(object): def __init__(self, access_id, secret_key): self.client = Mozscape(access_id, secret_key) def search_backlinks(self, url, limit=5): """ Return a list of urls Args: limit: maximum number of results to return """ urls = [] try: results = self.client.links(url, scope="page_to_page", sort="page_authority", filters=["external"], limit=limit) #results = self.client.links(url, scope="page_to_page", sort="spam_score", filters=["external"], limit=limit) #results = self.client.links(url, scope="page_to_page", sort="page_authority") for res in results: if 'uu' in res: url = URLUtility.normalize(res['uu']) if url: urls.append(url) else: print "Error: key does not exisit" print res except: traceback.print_exc() return urls
'my-access-id', 'my-secret-key') # As you may have noticed, there are lots of columns available # I did what I could to make them easily-accessible, but there # are a lot, and the names are long. So, the API calls have # defaults # Let's get some URL metrics. Results are now an array of dictionaries # the i'th dictionary is the results for the i'th URL metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog']) # Now let's say we only want specific columns in the results authorities = client.urlMetrics( ['www.moz.com'], Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) # Or if you just need results for one URL mozMetrics = client.urlMetrics('www.moz.com') # Now for some anchor text results anchorResults = client.anchorText('www.moz.com/blog') # Or for just specific columns anchorTermResults = client.anchorText( 'www.moz.com/blog', cols=Mozscape.ATCols.term) # Now for some links results links = client.links('www.moz.com') # The links API has more columns to specify, as well as sort, scope, etc. links = client.links( 'www.moz.com', scope='domain_to_domain', sort='domain_authority', filters=['external', 'nofollow'], targetCols=Mozscape.UMCols.url)
# As you may have noticed, there are lots of columns available # I did what I could to make them easily-accessible, but there # are a lot, and the names are long. So, the API calls have # defaults # Let's get some URL metrics. Results are now an array of dictionaries # the i'th dictionary is the results for the i'th URL metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog']) # Now let's say we only want specific columns in the results authorities = client.urlMetrics(['www.moz.com'], Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority) # Or if you just need results for one URL mozMetrics = client.urlMetrics('www.moz.com') # Now for some anchor text results anchorResults = client.anchorText('www.moz.com/blog') # Or for just specific columns anchorTermResults = client.anchorText('www.moz.com/blog', cols=Mozscape.ATCols.term) # Now for some links results links = client.links('www.moz.com') # The links API has more columns to specify, as well as sort, scope, etc. links = client.links('www.moz.com', scope='domain_to_domain', sort='domain_authority', filters=['external', 'nofollow'], targetCols=Mozscape.UMCols.url)
#!/usr/bin/env python from mozscape import Mozscape import argparse import csv import sys import pandas as pd client = Mozscape('my-access-id', 'my-secret-key') # The links API has more columns to specify, as well as sort, scope, etc. links = client.links('ENTER URL', scope='page_to_page', sort='page_authority', filters=['internal'], targetCols=Mozscape.UMCols.url) ## PUT ABOVE INTO A PANDAS DATAFRAME ## #df = pd.DataFrame(authorities.items()) df = pd.DataFrame(links) df = df.rename(columns={'upa': 'Page Authority', 'uu': 'URL'}) print(df) df.to_excel('mozLinks.xls', index=False) # use xlsx to stop limiting of row export - check csv module anchorTermResults = client.anchorText('ENTER_URL', cols=Mozscape.ATCols.freeCols2) df2 = pd.DataFrame(anchorTermResults) df2 = df2.rename(
client = Mozscape('mozscape-c75d15e616', '5128f75efae32ed916a6eac395831ee3') data = pd.read_csv('ICO-URL-Symbol.csv', encoding = "utf8") url_column = data['URL'] remove_row_index = [] urls = data.drop(data.index[remove_row_index]) url_columns = urls['URL'] clean_list = [] for i in url_columns: clean_list.append(i) clean_urls = [] for i in clean_list: clean_urls.append(i.split('/', 1)[0]) urls['index_URL'] = clean_urls urls_10 = clean_urls for i in urls_10: # metrics = client.urlMetrics(i) try: print(client.links(i)) time.sleep(random.randint(2, 3)) time.sleep(random.randint(2, 3)) except: print(i + " could not be looked up due to rate limiting.") time.sleep(10)