예제 #1
0
class Moz_Search(object):
    def __init__(self, access_id, secret_key):
        self.client = Mozscape(access_id, secret_key)

    def search_backlinks(self, url, limit=5):
        """
        Return a list of urls
        Args:
            limit: maximum number of results to return
        """
        urls = []
        try:
            results = self.client.links(url,
                                        scope="page_to_page",
                                        sort="page_authority",
                                        filters=["external"],
                                        limit=limit)
            #results = self.client.links(url, scope="page_to_page", sort="spam_score", filters=["external"], limit=limit)
            #results = self.client.links(url, scope="page_to_page", sort="page_authority")

            for res in results:
                if 'uu' in res:
                    url = URLUtility.normalize(res['uu'])
                    if url:
                        urls.append(url)
                else:
                    print "Error: key does not exisit"
                    print res
        except:
            traceback.print_exc()

        return urls
예제 #2
0
    'my-access-id',
    'my-secret-key')

# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(
    ['www.moz.com'],
    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText(
    'www.moz.com/blog', cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links(
    'www.moz.com', scope='domain_to_domain', sort='domain_authority',
    filters=['external', 'nofollow'], targetCols=Mozscape.UMCols.url)
예제 #3
0
# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(['www.moz.com'],
                                Mozscape.UMCols.domainAuthority
                                | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText('www.moz.com/blog',
                                      cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links('www.moz.com',
                     scope='domain_to_domain',
                     sort='domain_authority',
                     filters=['external', 'nofollow'],
                     targetCols=Mozscape.UMCols.url)
#!/usr/bin/env python

from mozscape import Mozscape
import argparse
import csv
import sys
import pandas as pd

client = Mozscape('my-access-id', 'my-secret-key')

# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links('ENTER URL',
                     scope='page_to_page',
                     sort='page_authority',
                     filters=['internal'],
                     targetCols=Mozscape.UMCols.url)

## PUT ABOVE INTO A PANDAS DATAFRAME ##
#df = pd.DataFrame(authorities.items())
df = pd.DataFrame(links)
df = df.rename(columns={'upa': 'Page Authority', 'uu': 'URL'})

print(df)
df.to_excel('mozLinks.xls', index=False)
# use xlsx to stop limiting of row export - check csv module

anchorTermResults = client.anchorText('ENTER_URL',
                                      cols=Mozscape.ATCols.freeCols2)

df2 = pd.DataFrame(anchorTermResults)
df2 = df2.rename(
예제 #5
0
client = Mozscape('mozscape-c75d15e616', '5128f75efae32ed916a6eac395831ee3')
data = pd.read_csv('ICO-URL-Symbol.csv', encoding = "utf8")
url_column = data['URL']
remove_row_index = []
urls = data.drop(data.index[remove_row_index])
url_columns = urls['URL']
clean_list = []

for i in url_columns:

        clean_list.append(i)
        
clean_urls = []

for i in clean_list:
    clean_urls.append(i.split('/', 1)[0])
urls['index_URL'] = clean_urls
urls_10 = clean_urls

for i in urls_10:
#    metrics = client.urlMetrics(i)
    try:
        print(client.links(i))
        time.sleep(random.randint(2, 3))
        time.sleep(random.randint(2, 3))
    except:
        print(i + " could not be looked up due to rate limiting.")
        time.sleep(10)