forked from kevinmel2000/autologin
/
autologin.py
85 lines (70 loc) · 3.29 KB
/
autologin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from scrapy.utils.project import get_project_settings
from crawler.logincrawl.spiders.login_finder import LoginFinderSpider
import sys
from auth_analysis.auth_analysis import AuthHeaderFinder
from scrapy.xlib.pydispatch import dispatcher
import crawler.logincrawl.settings
import json
import os
os.environ["SCRAPY_SETTINGS_MODULE"] = "crawler.logincrawl.settings"
import pickledb
from scrapy import Request as Request
from scrapy.settings import Settings
import traceback
from scrapy.crawler import CrawlerRunner
#import importlib
#settings_module = importlib.import_module('crawler.logincrawl.settings')
#settings = Settings(settings_module)
#crawler_settings = CrawlerSettings(settings_module)
class AutoLogin(object):
def __init__(self, db_name):
self.db_name = db_name
def get_auth_headers_and_redirect_url(self):
#run login spider, saves results to /tmp/autologin.db
#self.__run_login_spider(seed_url = self.seed_url, username = self.username, password = self.password)
#determine header that looks most reasonable as login header and return it
ahf = AuthHeaderFinder(self.db_name)
try:
auth_info = ahf.get_auth_header()
except:
log.msg("No valid login headers found. Here is the traceback: ", level = log.CRITICAL)
traceback.print_exc()
raise Exception("No valid login headers found.")
#redirected_to = auth_info["response_url"]
#auth_headers = auth_info["auth_headers"]
log.msg("Got auth headers %s" % json.dumps(auth_info["auth_headers"]))
return auth_info
def return_authenticated_request_item(self, callback = None, meta = None):
auth_headers, redirected_to = self.get_auth_headers_and_redirect_url()
log.msg("Returning auth headers %s and redirected_to url %s" % (json.dumps(auth_headers), str(redirected_to)), level = log.INFO)
if callback:
return Request(redirected_to, callback = callback, meta = meta, headers = auth_headers)
else:
return Request(redirected_to, meta = meta, headers = auth_headers)
#usage: the login_finder spider must be run and a database populated with AuthInfoItems for AutoLogin object to work
def init_db(db_name):
try:
os.remove(db_name)
except:
pass
db = pickledb.load(db_name, False)
db.dump()
def run_login_spider(seed_url, username, password, db_name, logfile = "results.log"):
init_db(db_name)
settings = get_project_settings()
runner = CrawlerRunner(settings)
d = runner.crawl(LoginFinderSpider, seed_url = seed_url, username = username, password = password)
d.addBoth(lambda _: reactor.stop())
log.start(loglevel=log.DEBUG, logfile=logfile)
log.msg("Item pipelines enabled: %s" % str(settings.get("ITEM_PIPELINES")), level = log.INFO)
reactor.run()
if __name__ == "__main__":
run_login_spider("https://www.signupgenius.com/", "actest@hyperiongray.com", "passpasspass123", "eawfwefawefaewweeawf.db", logfile = "results.log")
al = AutoLogin()
req = al.return_authenticated_request_item()
log.msg("Request object returned: %s" % req.url)
log.msg("Request object returned: %s" % req.headers)
reactor.stop()