-
Notifications
You must be signed in to change notification settings - Fork 9
/
google_url_scrapper.py
46 lines (37 loc) · 1.45 KB
/
google_url_scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os, time, random, datetime, sys, requests
from xgoogle.search import GoogleSearch, SearchError
class google_url_scrapper:
def __init__(self):
self.urls = []
self.seo = ''
def scrape(self, keyword, pages=2):
try:
gs = GoogleSearch(keyword)
gs.results_per_page = 10
gs.page = 0
results = gs.get_results()
for res in results:
url = res.url.encode('utf8')
Title = res.title
self.urls.append((url, Title))
except SearchError, e:
print "Search failed: %s" % e
return self.urls
def MajesticSEO_API(self, url):
"""Get metrics for a single URL"""
data = {
'getcsv': 'Get backlink counts as .CSV',
'SortBy': '-1',
'items': '1',
'Datasource': 'Fresh',
'item0': url
}
request = requests.post('https://csv.majesticseo.com/getcsvdata/bulkbacklinks',
data=data,
cookies={'STOK': 'anything'})
if 'We could not determine your user creditials' not in request.text:
self.seo = dict(zip(request.text.splitlines()[0].split(','),
request.text.splitlines()[1].split(',')))
return self.seo
else:
raise Exception("Couldn't authenticate without proper session variable")