def detect(self, observer_id, url, language): origin = self.urlUtil.get_origin(url) data = self.har.capture(url) # print(data) try: entries = data['log']['entries'] observer = Observer() for entry in entries: req = entry['request'] req_url = req['url'] req_origin = self.urlUtil.get_origin(req_url) info = self.urlUtil.url_parse(req_url) info['query_string'] = req.get('queryString', {}) if origin != req_origin: black = observer.get_black(sub_domain=info['sub_domain'], src='blu') if black == None: b = observer.get_black(domain=req_origin) if b != None: whites = b.get('whites', []) if origin in whites: print('is white') else: n = observer.get_normal(info['sub_domain']) if n == None: gray = { 'url': req_url, 'observer_id': observer_id, 'sub_domain': info['sub_domain'], 'domain': req_origin, 'observer_url': url, 'path': info['path'], 'query_string': info['query_string'], 'language': language } observer.add_gray(gray) # else: # whites = black.get('whites', []) # if origin in whites: # print('is white') # else: # print('is black: ' + req_origin) # else: # print('is origin: ' + req_origin) except Exception as e: print(e) data = {'status': 'done'} observer.update_observer(observer_id, data)
def capture(self, url): try: origin = self.urlUtil.get_origin(url) p = self.urlUtil.url_parse(url) label = { 'host': p['sub_domain'], 'path': p['path'], 'label_f': None, 'label_t': None } label_t = [] label_f = [] data = self.har.capture(url) entries = data['log']['entries'] for entry in entries: try: observer = Observer() req = entry['request'] url = req['url'] req_origin = self.urlUtil.get_origin(url) info = self.urlUtil.url_parse(url) info['query_string'] = req.get('queryString', {}) if origin != req_origin: # print('req_origin', req_origin) # print('sub_comain', sub_domain) black = observer.get_black(sub_domain=req_origin) if black == None: b = observer.get_black( sub_domain=info['sub_domain'], src='blu') if b != None: whites = b.get('whites', []) if origin in whites: # print('white 1') label_f.append(info) else: label_t.append(info) else: whites = black.get('whites', []) if origin in whites: # print('white 2') label_f.append(info) else: label_t.append(info) else: label_f.append(info) except Exception as e: traceback.print_exc() continue if len(label_f) > 0: label['label_f'] = label_f if len(label_t) > 0: label['label_t'] = label_t dataset = DataSet() dataset.add_data(label) except Exception as e: traceback.print_exc()
from tld import get_tld, get_fld from urllib.parse import urlparse from capture.har import Har from database.observer import Observer from database.site import Site import json from os.path import splitext # COUNTRIES = ['KR', 'US', 'VN', 'ID'] COUNTRIES = ['KR'] har = Har() observer = Observer() site = Site() # f = open("label_f_t2.txt", "w") d = [] def capture(url): origin = get_fld('http://%s' % url, fail_silently=True) if origin is None: print("error: TldDomainNotFount") pass # print(origin) label = { 'domain': url,
from database.observer import Observer api = Observer() black = 'www.xxx.com' white = 'bbb.com' try: res = api.add_white(black, white) print(res) except Exception as e: print(e)
from database.observer import Observer api = Observer() domain = "facebook.com" try: res = api.get_black(domain) print(res) except Exception as e: print(e)
from database.observer import Observer from dataset.black_list import BlackList from dataset.white_list import WhiteList observer = Observer() black = BlackList() white = WhiteList() bl = black.get_list() wl = white.get_list() for b in bl: res = observer.add_black({'sub_domain': b, 'src': 'disconnect'}) print(res) for w in wl: resources = wl[w] for resource in resources: res = observer.add_white(resource, w) print(w, res)
from database.observer import Observer api = Observer() black = {'domain': "www.xxx.com", 'lang': "en", 'whites': None} try: res = api.add_black(black) print(res) except Exception as e: print(e)