def get_attrs(src, tag): t = src.count('="') n = 0 di = {} a2 = 0 de = {} bi = src.count('<a') sc = mcscrp() nm = sc.scrp(src, tag)['txt'] for u in range(bi): for i in range(int(t / bi)): c = src.find('<{}'.format(tag), n) e = src.find(' ', a2 + 1) s = src.find('=', e + 1) a1 = src.find('"', s + 1) a2 = src.find('"', a1 + 1) de[src[e + 1:s]] = src[a1 + 1:a2] if len(nm[n]) == 0: di['a%s' % (n)] = de else: di[nm[n]] = de n += 1 de = {} return di
def scrpping(self): sc = mcscrp() data = get(self.text.text()).text tags = sc.get_tags(data) self.tableWidget.setColumnCount(len(tags)) self.tableWidget.setRowCount(len(tags)) self.tableWidget.setHorizontalHeaderLabels(tags) for r, tag in enumerate(tags): for c, dt in enumerate(sc.scrp(data, tag)['txt']): self.tableWidget.setItem(c, r, QtWidgets.QTableWidgetItem(str(dt)))
from McScrp import mcscrp import requests t = requests.get('https://simple.wikipedia.org/wiki/List_of_colors').text a = mcscrp() tb = a.scrp(t, 'tbody')['tag'][0] tr = a.scrp(tb, 'tr')['tag'] for i in tr: g = a.get_attr(i, 'href') print(g)
from McScrp import mcscrp from requests import get sc = mcscrp() data = get( 'https://www.google.com/search?source=hp&ei=0o1VXJPjM5KvgweJ4IPYBg&q=python&btnK=%D8%A8%D8%AD%D8%AB+Google%E2%80%8F&oq=python&gs_l=psy-ab.3..35i39l2j0i203l8.289.2048..2350...1.0..0.344.2075.0j2j4j2......0....1..gws-wiz.....0..0j0i131.j8hrx2vNR3s' ).text tags = sc.get_tags(data) for tag in tags: for dt in sc.scrp(data, tag)['txt']: print(dt)