-
Notifications
You must be signed in to change notification settings - Fork 0
/
start.py
107 lines (90 loc) · 2.37 KB
/
start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#! /usr/bin/python
import config as c
import requests
import func as f
lite = f.sql_connect(c.DB_FILE)
# holt seite (str)
#untestet
def getPage(url):
page = requests.get(url,verify=False)
return page.content
# seite beschneiden (str)
#untestet
def findStart(page,start,stop):
start = page.find(start) + len(start)
stop = page.find(stop)
return page[start:stop]
# zerteilt page in lines (list)
#untestet
def findLines(page,start,stop):
loop = True
lines = []
while loop:
t1 = page.find(start) + len(start)
if t1 >= 0:
stop = str(stop)
t2 = page.find(stop,t1)
if t2 >= 0:
line = page[t1:t2]
lines.append(line)
page = page[t2:]
else:
loop = False
else:
loop = False
return lines
# extrahiert daten (str)
def findData(line,start,stop):
if not(start == "" or stop == ""):
start = line.find(start) + len(start)
stop = line.find(stop)
return line[start:stop]
else:
return ""
def sendData(url, data, title, cat):
global lite
url = f.no_inject(url)
data = f.no_inject(data)
title = f.no_inject(title)
cat = f.no_inject(cat)
url = url.replace("&","&")
if c.DEBUG:
print(len(f.sql(lite,"SELECT * FROM ergebnisse WHERE `url` = '"+url+"'")))
print("INSERT INTO ergebnisse (url) VALUES ('"+url+"')")
return True
else:
if len(f.sql(lite,"SELECT * FROM ergebnisse WHERE `url` = '"+url+"'")) <= 0:
f.sql(lite,"INSERT INTO ergebnisse (`url`) VALUES ('"+url+"')")
return True
return False
def updateData(url,data,title,cat):
if c.DEBUG:
return True
else:
r = requests.post(c.UPLOAD_PATH, data={'url': url, 'data': data,'title':title,'cat': cat})
if r.text == "OK":
return True
return False
def doStuff(i):
page = getPage(i['url'])
page = findStart(page,i['pStart'],i['pStop'])
if c.DEBUG:
print(page)
print("----------------------------------------------------------------------")
lines = findLines(page,i['lStart'],i['lStop'])
if c.DEBUG:
print(lines)
print("-----------------------------------------------------------------------")
for j in lines:
url = findData(j,i['uStart'],i['uStop'])
data = findData(j,i['dStart'],i['dStop'])
title = findData(j,i['tStart'],i['tStop'])
if not url.startswith("http"):
url = i['root'] +"/"+ url
if sendData(url,data,title,i['cat']):
updateData(url,data,title,i['cat'])
if type(c.SEEDS) == dict:
doStuff(c.SEEDS)
else:
for i in c.SEEDS:
doStuff(i)