def main(): for user in create_login_payload(): logging.info('Opening a Grafana session...') session = Session() login(session, user) if check_initialized(session): logging.info('Grafana has already been initialized, skipping!') return logging.info('Attempting to add configured datasource...') r = session.post('{url}/api/datasources'.format(url=GRAFANA_URL), json=create_datasource_payload()) logging.debug('Response: %r', r.json()) r.raise_for_status() for path in sorted(glob.glob('{dir}/*.json'.format(dir=DASHBOARDS_DIR))): logging.info('Creating dashboard from file: {path}'.format(path=path)) r = session.post('{url}/api/dashboards/db'.format(url=GRAFANA_URL), json=create_dashboard_payload(path)) logging.debug('Response: %r', r.json()) r.raise_for_status() logging.info('Ending %r session...', user.get('user')) session.get('{url}/logout'.format(url=GRAFANA_URL)) logging.info('Finished successfully.')
def vote_login(username, passwd): """login in the univs Args: username: the account name passwd: the passwd Returns: the session """ try: s = Session() sso_url = "http://uzone.univs.cn/sso.action" sso_data = {} sso_data["gUser.loginName"] = username sso_data["gUser.password"] = passwd r = s.post(sso_url, data=sso_data) if not r.content.find("<code>0</code>") > 0: return None res1 = s.get("http://mzml.univs.cn:8081/common/checkcode") code = json.loads(res1.content) check_sso_url = "http://uzone.univs.cn/checkSSOLogin.action?token=%s&subSiteId=%s&checkCode=%s&returnUrl=http://mzml.univs.cn:8081/land.html" res2 = s.get(check_sso_url % (code["data"]["date"], code["data"]["subSiteId"], code["data"]["checkout"])) codes = res2.url sign_in = "http://mzml.univs.cn:8081/user/sigin" sign_data = {} sign_data["uid"] = codes.split("?")[1].split("&")[1].split("=")[1] sign_data["token"] = code["data"]["date"] sign_data["checkcode"] = codes.split("?")[1].split("&")[0].split("=")[1] s.post(sign_in, data=sign_data) return s except ConnectionError, Timeout: logging.exception("Timeout" + username) return 1
def sendMicrosoft(filename, help_text, email, name): br = Session() hostUrl = "https://www.microsoft.com/en-us/security/portal/submission/submit.aspx" br.headers.update({'referer': hostUrl}) page = br.get(hostUrl) br.get("http://c.microsoft.com/trans_pixel.aspx") # get additional cookies page = BeautifulSoup(page.text, 'html.parser') form = page.find('form', id='Newsubmission') form_data = dict([(el['name'], el.get('value', None)) for el in form.find_all('input') if el.has_attr('name')]) form_data["Name"] = email form_data["Product"] = "Windows Server Antimalware" form_data["Comments"] = help_text form_data["Priority"] = 2 response = br.post( hostUrl, data=form_data, files={u'File': open(filename, 'rb')}) text = response.text.encode('utf-8') result = text.find('window.location.href="SubmissionHistory.aspx') if result != -1: sub_url = text[result + 44:] sub_url = "/SubmissionHistory.aspx" + sub_url[:sub_url.find('"')] url = response.url[:response.url.rfind('/')] + sub_url return 0, "Success! Your status is <a href='%s'>here</a>" % url else: logger.warning("Microsoft error: %s" % text) return 1, "Something wrong: %s" % text
def hit_example_com(self): try: start_time = time() session = Session() http_adapter = HTTPAdapter(max_retries=0) session.mount('http://', http_adapter) session.mount('https://', http_adapter) session.get("http://www.example.com", timeout=30) # # print("Doing a task that is not a request...") # login = Login() # r = login.sw_valid_login(GC.USERNAME, GC.PASSWORD, "http://www.sowatest.com") stats_latency['latency'].append(time() - start_time) events.request_success.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, response_length=0) session.close() # # Assert Section # assert r.status_code == 200 # assert "Access Denied" in str(html.fromstring(r.text).xpath("//title/text()")) # assert '<div id="blockedBanner">' in r.text except Exception, e: """ * *request_type*: Request type method used * *name*: Path to the URL that was called (or override name if it was used in the call to the client) * *response_time*: Time in milliseconds until exception was thrown * *exception*: Exception instance that was thrown """ events.request_failure.fire(request_type="Transaction", name="hit_sowatest", response_time=time() - start_time, exception=e)
def get(self): session = Session() availabilityJSON = session.get(iPhone6AvailabilityURL).content availabilityDict = json.loads(availabilityJSON) caStoresXML = session.get(appleCAStoreURL).content # self.write(caStoresXML) storesDict = xmltodict.parse(caStoresXML)["records"]["country"] # self.dumpJSON(storesDict) ontarioStoresList = [] for eachStateDict in storesDict["state"]: if eachStateDict["@name"] == "Ontario": ontarioStoresList = eachStateDict["store"] # self.dumpJSON(ontarioStoresList) # self.write(storeNameForStoreID(ontarioStoresList, "R447")) # # logging.info(availabilityDict) lastUpdatedTimestamp = availabilityDict.pop("updated") storeIDs = availabilityDict.keys() for storeID in storeIDs: phonesDictInThisStore = availabilityDict[storeID] phoneKeys = phonesDictInThisStore.keys() for eachPhoneKey in phoneKeys: if (phonesDictInThisStore[eachPhoneKey] == True) and (eachPhoneKey in targetModels) and (storeID in targetStores): sendEmail(storeNameForStoreID(ontarioStoresList, storeID), eachPhoneKey) if eachPhoneKey in iphone6Dictionary: replaceKeyInDictionary(phonesDictInThisStore, eachPhoneKey, iphone6Dictionary[eachPhoneKey]) for storeID in storeIDs: replaceKeyInDictionary(availabilityDict, storeID, storeNameForStoreID(ontarioStoresList, storeID)) availabilityDict["_updated"] = lastUpdatedTimestamp orderedDict = collections.OrderedDict(sorted(availabilityDict.items())) self.dumpJSON(orderedDict)
class JoolaBaseClient(object): def __init__(self, base_url, credentials=None, api_token=None, **kwargs): self.base_url = str(base_url) self.session = Session() self.session.mount('http://', CachingHTTPAdapter()) self.session.mount('https://', CachingHTTPAdapter()) if api_token: self.session.auth = APITokenAuth(api_token) elif credentials: self.session.auth = credentials def list(self): return self.session.get(self.base_url) def get(self, lookup): return self.session.get('%s%s' % (self.base_url, str(lookup))) def insert(self, **kwargs): return self.session.post(self.base_url, data=kwargs) def patch(self, lookup, **kwargs): return self.session.patch('%s%s' % (self.base_url, str(lookup)), data=kwargs) def delete(self, lookup): return self.session.delete('%s%s' % (self.base_url, str(lookup)))
def get_all_setlists(artist, page_number, sets_per_page): headers = {'Accept': 'application/json'} url = "http://api.setlist.fm/rest/0.1/search/setlists?artistName={0}&p={1}".format(artist, page_number) session = Session() response = session.get(url, headers=headers) data = response.json() setlists = data['setlists']['setlist'] total = data['setlists']['@total'] total_pages = math.ceil(int(total) / sets_per_page) # Continue to make requests until max setlists are downloaded for page in range(page_number + 1, total_pages + 1): print('{0} Page {1}'.format(artist, page)) url = "http://api.setlist.fm/rest/0.1/search/setlists?artistName={0}&p={1}".format(artist, page) response = session.get(url, headers=headers) data = response.json() # If more than one result, concatenate lists, else append element to list. if type(data['setlists']['setlist']) is list: setlists = setlists + data['setlists']['setlist'] elif type(data['setlists']['setlist']) is dict: setlists.append(data['setlists']['setlist']) return setlists
def search(apn): s = Session() s.mount('https://', HTTPSAdapter()) url = 'https://www.acgov.org/ptax_pub_app/RealSearch.do' data = { 'displayApn': apn, 'situsStreetNumber': '', 'situsStreetName': '', 'situsStreetSuffix': '', 'situsUnitNumber': '', 'situsCity': '', 'searchBills': 'Search', 'showHistory': 'N', } headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36', } s.get('https://www.acgov.org/ptax_pub_app/RealSearchInit.do?showSearchParmsFromLookup=true', headers = headers) r = s.post(url, data = data) try: os.mkdir('results') except OSError: pass f = open(os.path.join('results', apn), 'w') f.write(r.text) f.close()
def upload(cls, url, f, filename=None, session=None): if session is None: session = Session() should_close = True if isinstance(f, six.string_types): if not filename: filename = os.path.basename(f) f = open(f, 'rb') should_close = True try: if 'csrftoken' not in session.cookies: session.get('http://databasin.org') r = session.post( url, data={'csrfmiddlewaretoken': session.cookies['csrftoken']}, files={'file': (filename, f)} ) raise_for_authorization(r, session.client.username is not None) r.raise_for_status() o = urlparse(url) return cls.get( '{0}://{1}{2}'.format(o.scheme, o.netloc, TEMPORARY_FILE_DETAIL_PATH.format(uuid=r.json()['uuid'])), session=session, lazy=False ) finally: if should_close: f.close()
def gen_session(): session = Session() url = 'http://www.sccredit.gov.cn/queryInfo.do?behavior=enterSearch&panel=corp' try: session.get(url, timeout=3) return session except Exception: pass
def test_login_logout(): s = Session() rt = s.get(url('fl')) assert_response(rt, 'Login OK') rt = s.get(url('whoami')) assert_response(rt, common.FAKE_USER) s.get(url('logout')) rt = s.get(url('whoami')) assert_response(rt, None, status=401)
def update_analytics(self): session = Session() if 'TWITLOG_COOKIES' in os.environ: cookies = json.loads(os.environ['TWITLOG_COOKIES']) session.cookies.update(cookies) else: print 'Fetching homepage for auth token' res = session.get('https://twitter.com') body = BeautifulSoup(res.text) input_ = body.find(lambda tag: tag.name == 'input' and tag.get('name') == 'authenticity_token') authe_token = input_['value'] print 'Logging into account' res = session.post('https://twitter.com/sessions', data={ 'session[username_or_email]': self.args.username, 'session[password]': self.args.password, 'return_to_ssl': 'true', 'scribe_log': '', 'redirect_after_login': '******', 'authenticity_token': authe_token, }) cookies = dict(session.cookies.iteritems()) print print 'export TWITLOG_COOKIES=\'%s\'' % json.dumps(cookies) print with self.db.connect() as con: for tid, old_json in con.execute(''' SELECT tweet.id, last.json FROM tweets as tweet LEFT JOIN tweet_metrics as last ON tweet.last_metrics_id = last.id WHERE tweet.last_metrics_id IS NULL OR last.created_at > datetime('now','-1 day') ORDER BY tweet.id DESC '''): res = session.get('https://twitter.com/i/tfb/v1/tweet_activity/web/poll/%s' % tid) new_metrics = {k: int(v) for k, v in res.json()['metrics']['all'].iteritems()} new_metrics.pop('Engagements', None) # Just a total of the others. new_json = json.dumps(new_metrics, sort_keys=True) changed = new_json != old_json print tid, new_json if changed else 'unchanged' if changed: mid = con.insert('tweet_metrics', { 'tweet_id': tid, 'json': new_json, }) con.update('tweets', {'last_metrics_id': mid}, {'id': tid}) con.commit()
class Compranet(FishFinder): def setup(self): self.session = Session() self.session.get('http://compranet-pa.funcionpublica.gob.mx/PAAASOP/buscador.jsp') self.post_url = 'http://compranet-pa.funcionpublica.gob.mx/PAAASOP/DownloadArchivo' def write_xls(self, data, filename): with open(filename, 'wb') as f: f.write(data) def search(self, query): """ Submit a search query and return results """ params = { 'ocultarParam':'0', 'ocultarDetalle':'1', 'cveEntFederativa':'0', 'cveDependencia':'0', 'concepto': query, 'valCompraDirPyme':'1000', 'entidadesSelect':'0', 'dependenciasSelect':'0' } r = self.session.post(self.post_url, params = params) return r.content def test(self, result, query): """ With our results, test whethere the query was legitimate. 0 = No Results 1 = Pass 2 = Needs More """ xls = xlrd.open_workbook(file_contents=result) sheet = xls.sheet_by_index(0) nrows = sheet.nrows if nrows == 1: print "%s has no results" % query return 0 elif nrows < 2501: filename = "data/%s.xls" % query print "Writing %s" % filename self.write_xls(result, filename) return 1 else: print "%s has too many results" % query return 2
class TheSubDBProvider(Provider): """TheSubDB Provider.""" languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes} required_hash = 'thesubdb' server_url = 'http://api.thesubdb.com/' subtitle_class = TheSubDBSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' % __short_version__) def terminate(self): self.session.close() def query(self, hash): # make the query params = {'action': 'search', 'hash': hash} logger.info('Searching subtitles %r', params) r = self.session.get(self.server_url, params=params, timeout=10) # handle subtitles not found and errors if r.status_code == 404: logger.debug('No subtitles found') return [] r.raise_for_status() # loop over languages subtitles = [] for language_code in r.text.split(','): language = Language.fromthesubdb(language_code) subtitle = self.subtitle_class(language, hash) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages] def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) params = {'action': 'download', 'hash': subtitle.hash, 'language': subtitle.language.alpha2} r = self.session.get(self.server_url, params=params, timeout=10) r.raise_for_status() subtitle.content = fix_line_ending(r.content)
class HTTPRestore(object): def __init__(self, host, port, site, pgdata=None): self.log = logging.getLogger("HTTPRestore") self.host = host self.port = port self.site = site self.pgdata = pgdata self.session = Session() def list_basebackups(self): uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/basebackups" response = self.session.get(uri) basebackups = [] for basebackup, values in response.json()["basebackups"].items(): basebackups.append({"name": basebackup, "size": values["size"]}) return basebackups def show_basebackup_list(self): basebackups = self.list_basebackups() line = "Available %r basebackups:" % self.site print(line) print("=" * len(line)) print("basebackup\t\tsize") for r in basebackups: print("{}\t{}".format(r["name"], r["size"])) def get_basebackup_file(self, basebackup): uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/basebackups/" + basebackup response = self.session.get(uri, stream=True) if response.status_code != 200: raise Error("Incorrect basebackup: %{!r} or site: {!r} defined".format(basebackup, self.site)) basebackup_path = os.path.join(self.pgdata, "base.tar.xz") store_response_to_file(basebackup_path, response) tar = tarfile.TarFile(fileobj=open(basebackup_path, "rb")) return response.headers["x-pghoard-start-wal-segment"], basebackup_path, tar def get_archive_file(self, filename, target_path, path_prefix=None): start_time = time.time() self.log.debug("Getting archived file: %r, target_path: %r, path_prefix: %r", filename, target_path, path_prefix) uri = "http://" + self.host + ":" + str(self.port) + "/" + self.site + "/" + filename if not path_prefix: final_target_path = os.path.join(os.getcwd(), target_path) else: final_target_path = os.path.join(path_prefix, target_path) headers = {"x-pghoard-target-path": final_target_path} response = self.session.get(uri, headers=headers, stream=True) self.log.debug("Got archived file: %r, %r status_code: %r took: %.2fs", filename, target_path, response.status_code, time.time() - start_time) return response.status_code in (200, 206)
class ModelRegistryClient(object): def __init__(self, host_and_port): self.endpoint = "http://%s/api/v1/models/" % host_and_port self.session = Session() def retrieve_model(self, slug, timestamp, destination): response = self.session.get("%s/%s/%s/default/model.bin" % (self.endpoint, slug, timestamp)) response.raise_for_status() data = StringIO.StringIO(response.content) z = zipfile.ZipFile(data) z.extractall(destination) metadata = self.session.get("%s/%s/%s/default/metadata" % (self.endpoint, slug, timestamp)) metadata.raise_for_status() return metadata.json()
def establish_moodle_session(user, passwd): session = Session() response = session.get('https://www.moodle.tum.de') response = session.get('https://www.moodle.tum.de/Shibboleth.sso/Login?providerId=https://tumidp.lrz.de/idp/shibboleth&target=https://www.moodle.tum.de/auth/shibboleth/index.php') response = session.post('https://tumidp.lrz.de/idp/profile/SAML2/Redirect/SSO?execution=e1s1', data={'j_username': user, 'j_password': passwd, '_eventId_proceed':''}) parsed = html.fromstring(response.text) session.post('https://www.moodle.tum.de/Shibboleth.sso/SAML2/POST', data={'RelayState': parsed.forms[0].fields['RelayState'], 'SAMLResponse': parsed.forms[0].fields['SAMLResponse']}) return session
class Uploader: def __init__(self): self.__s = Session() self.__header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.10 Safari/537.36"} self.__url = "http://webhd1.ttu.edu.tw/" self.__index = "index.php" self.__main = "main/" self.__showhd = "showhd.php" self.__upload = "upload.php" sefl.__sharefile = "sharefile.php" self.__login_data = {"ID": "", "PWD": "", "Submit": "登入"} self.__upload_form = {"GoUpload": "1", "MAX_FILE_SIZE": "102400000", "fname1": "", "fname2": "", "fname3": "", "fname4": "", "orgfn1": "", "orgfn2": "", "orgfn3": "", "orgfn4": "" } self.__file_path = "C:\\fakepath\\" self.__file_name = "" self.__files = {"userfile1": "", "userfile2": "", "userfile3": "", "userfile4": ""} self.__share_form = {"fname": "", "idname": "", "ShareKey": "", "Confirm": "確認送出"} self.__share_idname = "" self.__s.headers.update(headers) def login(self): self.__response = self.__s.get(self.__url + self.__index) self.__login_data["ID"] = "" self.__login_data["PWD"] = "" self.__response = self.__s.post(self.__url + self.__index, data=self.__login_data) def upload(self): self.__response = self.__s.get(self.__url + self.__main + self.__showhd, params={"Action": "Upload"}) self.__upload_form["fname1"] = file_name self.__upload_form["orgfin1"] = file_path + file_name self.__files["userfile1"] = open(file_name, "rb") self.__response = self.__s.post(self.__url + self.__main + self.__upload, data=self.__upload_form, files=self.__files) def share(self): self.__response = self.__s.get(self.__url + self.__main + self.__showhd, params={"Action": "ShareFile", "fname": self.__file_name}) self.__share_form["fname"] = self.__file_name self.__share_form["idname"] = self.__share_idname self.__response = self.__s.post(self.__url + self.__main + self.__sharefile, data=self.__share_form)
def test_saves_content_as_gzip(self): s = Session() cassette_name = 'handles_digest_auth' match = ['method', 'uri', 'digest-auth'] with Betamax(s).use_cassette(cassette_name, match_requests_on=match): r = s.get('https://httpbin.org/digest-auth/auth/user/passwd', auth=HTTPDigestAuth('user', 'passwd')) assert r.ok assert r.history[0].status_code == 401 s = Session() with Betamax(s).use_cassette(cassette_name, match_requests_on=match): r = s.get('https://httpbin.org/digest-auth/auth/user/passwd', auth=HTTPDigestAuth('user', 'passwd')) assert r.json() is not None
def get_maomaos(location_id=12030): start = 0 maomaos = [] ids = [] while(True): print(start) if start > 29: break sess = Session() res = sess.get(api(start=start, location_id=12030), headers=HEADERS, timeout=50, allow_redirects=False) table = BeautifulSoup(res.content, "html.parser").find('table') trs = table.findAll('tr') for i, tr in enumerate(trs): img_url = to_img_full_url(tr.find('img').attrs['src']) detail_url = BASE_URL + tr.find('a').attrs['href'] desc, _, location, _, breed, _ = tr.find( 'div').text.strip().split('\n\t\t\t\t\t\t\t') detail_res = sess.get( detail_url, headers=HEADERS, timeout=50, allow_redirects=False) if detail_res.status_code == 301: continue soup = BeautifulSoup(detail_res.content, "html.parser") detail_table, info_table, _ = soup.findAll('table') img_tr, content_tr = detail_table.findAll("tr") if len( detail_table.findAll("tr")) == 2 else [[]] + detail_table.findAll("tr") info = [td.text.strip() for td in info_table.findAll("td")] info_dict = dict(zip(info[0::2], info[1::2])) posted = to_str_date(info_dict['Posted']) print(desc) img_urls = [to_img_full_url(img.attrs['src']) for img in img_tr.findAll('img')] if not isinstance(img_tr, list) else ["https://ws1.sinaimg.cn/large/006tNc79gy1fovo1rjkghj305o05p40b.jpg"] id = detail_url.split('/')[3] if id in ids: break ids.append(id) maomaos.append({'id': detail_url.split('/')[3], 'title': tr.find('img').attrs['alt'], "desc": desc, 'location': location, 'order': i + start * 15, 'posted': to_str_date(info_dict['Posted']), 'updated': to_str_date(info_dict['Updated']), 'breed': breed, 'price': tr.find('span').text, 'content': content_tr.text.strip(), 'main_img_url': img_url, 'detail_url': detail_url, 'img_urls': img_urls, 'is_bicolor': is_bi_color((desc + ' ' + tr.find('img').attrs['alt']).lower())}) time.sleep(2) write_json("data/maomaos.json", maomaos) leancloud_objects = [leancloud_object( "Maomao", maomao, id_key="id") for maomao in maomaos] leancloud.Object.save_all([leancloud_object( "Maomao", maomao, id_key="id") for maomao in maomaos]) start += 10
class LocustioWebActions: def __init__(self): self.session = Session() def start_locust(self): if not os.path.exists(GC.RESULTS_BASE_PATH): os.makedirs(GC.RESULTS_BASE_PATH) current_date_time = "{}_{}".format(strftime("%x").replace("/", "."), strftime("%X")) latest_result_folder = "{}/{}".format(GC.RESULTS_BASE_PATH, current_date_time) os.makedirs(latest_result_folder) form_data = {"locust_count": LC.USERS, "hatch_rate": LC.RAMPUP} self.session.post("http://localhost:8089/swarm", data=form_data) return latest_result_folder def stop_locust(self): response = self.session.get("http://localhost:8089/stop") print("Response from stop: {}".format(response.content)) def reset_locust(self): response = self.session.get("http://localhost:8089/stats/reset") print("Response from Reset stats: {}".format(response.content)) def get_request_stats_csv(self): response = self.session.get("http://localhost:8089/stats/requests/csv") return response.content def get_stats_distribution_csv(self): response = self.session.get("http://localhost:8089/stats/distribution/csv") return response.content def get_exceptions_csv(self): response = self.session.get("http://localhost:8089/exceptions/csv") return response.content def get_starting_info(self): with open(GC.STARTING_INFO_FILE_PATH, "rb") as f: starting_info = pickle.load(f) return starting_info def kill_master(self): starting_info = self.get_starting_info() os.kill(starting_info["pid"], signal.SIGTERM)
class BaseClass: """ pass """ def __init__(self, keywords): """ pass """ self.session = Session() self.headers = { "User-Agent": generate_user_agent, "Accept": "*/*", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "Connection": "keep-alive" } self.base_url = 'http://baidu.com/s?' self.key_words = 'ie=utf-&wd={}&rn=50'.format(keywords) def request(self, url): """ request function """ response = self.session.get(url) response.encoding = 'utf-8' return response.content @staticmethod def parser(html, xpath): """ parser pages source """ html = re.sub('<em>|</em>|<em class>', '', html) source = etree.HTML(html) return source.xpath(xpath) def output(self, url, xpath): """ return html tags """ return self.parser(self.request(url).decode(), xpath)
def get_response(url, **kwargs): header_info = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/44.0.2403.157 Safari/537.36' } if 'retries' in kwargs: retries = kwargs.pop('retries') kwargs['headers'] = header_info else: retries = 3 if 'sess' in kwargs: sess = kwargs.pop('sess') else: sess = Session() if 'timeout' not in kwargs: kwargs['timeout'] = 10 response = None try: response = sess.get(url, **kwargs) except Timeout, e: if retries > 0: kwargs['retries'] = retries - 1 kwargs['sess'] = sess response = get_response(**kwargs) else: print e
def _serializeIngredients(self): """Convert children ingredients in triples""" res = [] session = Session() nutritionalInformations = {} ingredients = [] for ingredient in self.ingredients: response = session.get(config.USDA_API.format(config.USDA_API_KEY, ingredient['food'])).json() ing = Ingredient(name=response.get('report').get('food').get('name'), quantity=ingredient['quantity'], nutrients=response.get('report').get('food').get('nutrients')) nutritionalInformations = self._calculateNutrients(ingredient=ing, data=nutritionalInformations) ingredients.append(ing) ingredientList = IngredientList(ingredients) res.append((self.uri, SFO.ingredients, ingredientList.getURI())) res.extend(ingredientList.serialize()) res.extend(self._parseNutritionTable(nutritionalInformations, res)) return res
def login(self, username, password): s = Session() # login over bogi.ru params = {"login": username, "password": password} r1 = s.post(self.login_url, params, verify=False) # in case of failed login, bogi redirects to: # http://www.lostfilm.tv/blg.php?code=6&text=incorrect%20login/password if r1.request.url != self.login_url: url = urlparse(r1.url) if url.netloc == self.netloc: query = parse_qs(url.query) code = int(query.get('code', ['-1'])[0]) text = query.get('text', "-") r1.encoding = 'windows-1251' message = r1.text raise LostFilmTVLoginFailedException(code, text, message) else: raise LostFilmTVLoginFailedException(-1, None, None) # callback to lostfilm.tv soup = BeautifulSoup(r1.text) inputs = soup.findAll("input") action = soup.find("form")['action'] cparams = dict([(i['name'], i['value']) for i in inputs if 'value' in i.attrs]) s.post(action, cparams, verify=False) # call to profile page r3 = s.get(self.profile_url) # read required params self.c_uid = s.cookies['uid'] self.c_pass = s.cookies['pass'] self.c_usess = self.search_usess_re.findall(r3.text)[0]
class Site: def __init__(self, username, password): self.username = username self.password = password self.session = None def url(self): return "http://{}/collection/all".format(HOST) def login(self): self.session = Session() # drupal requires that you first GET the form r = self.session.get(self.url()) # then POST to it s = self.session.post( self.url(), data={ 'name': self.username, 'pass': self.password, 'form_id': 'user_login', 'op': 'Log in', }, headers={ 'referer': self.url(), } ) print("=== logged in ===") return self.session def get_session(self): if self.session is not None: return self.session self.session = self.login() return self.session def get_collection_page(self, page): return CollectionPage(self.session, page)
def get_url_page(self, url="http://www.optimalstackfacts.org/"): for l in xrange(3): # proxies_url = choice(self.proxies_url_list) proxies_url = "http://82.209.49.200:8080" proxies = { # "http": "http://*****:*****@93.127.146.106:80/", "http": proxies_url, "https": proxies_url } try: session = Session() r = session.get(url, proxies=proxies, headers=self.headers, timeout=10) # r = requests.get(url, proxies=proxies,) print r.status_code if r.status_code in [200, 301]: page = r.content r.cookies.clear() r.close() return page else: r.cookies.clear() r.close() except: pass
def get_service(hass, config): """ Get the NMA notification service. """ if not validate_config(config, {DOMAIN: [CONF_API_KEY]}, _LOGGER): return None try: # pylint: disable=unused-variable from requests import Session except ImportError: _LOGGER.exception( "Unable to import requests. " "Did you maybe not install the 'Requests' package?") return None nma = Session() response = nma.get(_RESOURCE + 'verify', params={"apikey": config[DOMAIN][CONF_API_KEY]}) tree = ET.fromstring(response.content) if tree[0].tag == 'error': _LOGGER.error("Wrong API key supplied. %s", tree[0].text) else: return NmaNotificationService(config[DOMAIN][CONF_API_KEY])
class NmaNotificationService(BaseNotificationService): """ Implements notification service for NMA. """ def __init__(self, api_key): # pylint: disable=no-name-in-module, unused-variable from requests import Session self._api_key = api_key self._data = {"apikey": self._api_key} self.nma = Session() def send_message(self, message="", **kwargs): """ Send a message to a user. """ title = kwargs.get(ATTR_TITLE) self._data['application'] = 'home-assistant' self._data['event'] = title self._data['description'] = message self._data['priority'] = 0 response = self.nma.get(_RESOURCE + 'notify', params=self._data) tree = ET.fromstring(response.content) if tree[0].tag == 'error': _LOGGER.exception( "Unable to perform request. Error: %s", tree[0].text)
def getReferer(url, referer): useragent = ( "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14" ) session = Session() session.headers.update({"referer": referer, "user-agent": useragent}) return session.get(url)
def test_session(): s = Session() s.proxies = proxies s.get(url_get)
class HosszupuskaProvider(Provider, ProviderSubtitleArchiveMixin): """Hosszupuska Provider.""" languages = {Language('hun', 'HU')} | {Language(l) for l in [ 'hun', 'eng' ]} video_types = (Episode,) server_url = 'http://hosszupuskasub.com/' subtitle_class = HosszupuskaSubtitle hearing_impaired_verifiable = False multi_result_throttle = 2 # seconds def initialize(self): self.session = Session() self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")} def terminate(self): self.session.close() def get_language(self, text): if text == '1.gif': return Language.fromhosszupuska('hu') if text == '2.gif': return Language.fromhosszupuska('en') return None def query(self, series, season, episode, year=None, video=None): # Search for s01e03 instead of s1e3 seasona = "%02d" % season episodea = "%02d" % episode series = fix_inconsistent_naming(series) seriesa = series.replace(' ', '+').replace('\'', '') # get the episode page logger.info('Getting the page for episode %s', episode) url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \ "&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8" logger.info('Url %s', url) r = self.session.get(url, timeout=10).content i = 0 soup = ParserBeautifulSoup(r, ['lxml']) table = soup.find_all("table")[9] subtitles = [] # loop over subtitles rows for row in table.find_all("tr"): i = i + 1 if "this.style.backgroundImage='url(css/over2.jpg)" in str(row) and i > 5: datas = row.find_all("td") # Currently subliminal not use these params, but maybe later will come in handy # hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0] # Translator of subtitle # sub_translator = datas[3].getText() # Posting date of subtitle # sub_date = datas[4].getText() sub_year = sub_english_name = sub_version = None # Handle the case when '(' in subtitle if datas[1].getText().count('(') == 2: sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText())[3] if datas[1].getText().count('(') == 3: sub_year = re.findall(r"(?<=\()(\d{4})(?=\))", datas[1].getText().strip())[0] sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText().split('(')[0])[0] if not sub_english_name: continue sub_season = int((re.findall('s(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0]) .lstrip('0')) sub_episode = int((re.findall('e(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0]) .lstrip('0')) if sub_season == season and sub_episode == episode: sub_language = self.get_language(datas[2].find_all('img')[0]['src'].split('/')[1]) sub_downloadlink = datas[6].find_all('a')[1]['href'] sub_id = sub_downloadlink.split('=')[1].split('.')[0] if datas[1].getText().count('(') == 2: sub_version = datas[1].getText().split('(')[1].split(')')[0] if datas[1].getText().count('(') == 3: sub_version = datas[1].getText().split('(')[2].split(')')[0] # One subtitle can be used for several releases sub_releases = [s.strip() for s in sub_version.split(',')] subtitle = self.subtitle_class(sub_language, sub_downloadlink, sub_id, sub_english_name.strip(), sub_season, sub_episode, sub_version, sub_releases, sub_year, asked_for_release_group=video.release_group, asked_for_episode=episode) logger.debug('Found subtitle: %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): titles = [video.series] + video.alternative_series for title in titles: subs = self.query(title, video.season, video.episode, video.year, video=video) if subs: return subs time.sleep(self.multi_result_throttle) def download_subtitle(self, subtitle): r = self.session.get(subtitle.page_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: raise ProviderError('Unidentified archive type') subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
from utils import choose_mod parser = ArgumentParser(add_help=False) parser.add_argument( "url", nargs="?", default="https://courses.softlab.ntua.gr/pl2/2019b/exercises/combmod.php", ) args = parser.parse_args() session = Session() i = 1 while True: response = session.get(args.url) soup = BeautifulSoup(markup=response.text, features="lxml") N = int(soup.find(id="N").text) K = int(soup.find(id="K").text) P = int(soup.find(id="P").text) print(f"Round {i}, C({N}, {K}) modulo {P}") answer = choose_mod(N, K, P) print(f"Answer: {answer}") response = session.post(args.url, data={"answer": answer}) soup = BeautifulSoup(markup=response.text, features="lxml") right = soup.find(attrs={"class": "right"}) if right: print(right.text) else:
def main(): sess = Session() # 存放此次登录的 cookie # === read xls === speed_level = input("搜索速度等级(1至60,默认为20):") if not speed_level: speed_level = "20" print(speed_level) print("读xls电话列…") if len(sys.argv)<=1: raise Exception("没有输入 xls 文件") print("文件名: " + sys.argv[1]) wb = xlrd.open_workbook(sys.argv[1]) sheet1 = wb.sheet_by_index(0) tels = filter_tels(sheet1) print("搜寻到可用的电话号码数: " + str(len(tels))) # === logging === print("登录账户…") verify_code = input("输入你当前的验证码:") while not verify_code: verify_code = input("输入你当前的验证码:") LOGIN_INFO.update({"code": verify_code}) resp = sess.post(URL_LOGIN, data=LOGIN_INFO, headers=req_headers) if not is_success(resp.status_code): raise Exception("登录失败。(%s)" % resp.status_code) # === requests === print("查询数据…") print("设置时间起始终止, 输入格式为:年年年年-月月-日日, 然后回车。") time_begin = input("起始日期: ") time_end = input("终止日期: ") if time_begin: time_begin = arrow.get(time_begin) time_begin = time_begin.format("YYYY-MM-DD HH:mm:ss") print("起始时间为: " + time_begin) if time_end: time_end = arrow.get(time_end) time_end = time_end.format("YYYY-MM-DD HH:mm:ss") print("结束时间为: " + time_end) # 产生文件名,然后写入 xls 表的首行 file_name = generate_new_xls_filename() print("输出文件: " + file_name) doc = xlwt.Workbook() sheet = doc.add_sheet("sheet1") # 写入第一行,列名 for n in range(len(PAGE_ROW)): sheet.write(0,n,PAGE_ROW[n]) doc.save(file_name) current_line = 1 # 当前 xls 写的行数 for current_tel in tels: # FIXME only fetch the first page resp = sess.get(URL_QUERY_ACCOUNT_PURCHASE_INFO_WITH_TIME_RANGE, params={ "purchaseDatebegin":time_begin, "purchaseDateend":time_end, "account": current_tel }, headers=req_headers, timeout=REQ_TIMEOUT) if not is_success(resp.status_code): raise Exception("请求数据时返回状态错误, code: {code}, account: {account}".format( code=resp.status_code, account=current_tel )) rst = parse_account_info(resp.content) for an_order in rst: order_id = an_order[1] data_to_write = [current_tel, order_id] print((current_tel, order_id)) resp = sess.get(URL_QUERY_ORDER_DETAIL, params={"account":current_tel, "id":order_id}, headers=req_headers, timeout=REQ_TIMEOUT) new_rst = parse_purchase_info(resp.content) data_to_write.append(new_rst[1]) data_to_write += [an_order[0], an_order[2], an_order[3], an_order[4]] data_to_write.append(new_rst[0]) for j in range(len(data_to_write)): sheet.write(current_line, j, data_to_write[j]) current_line += 1 # 行数增加 doc.save(file_name) print("写入%s" % current_tel) random_pause(speed_level)
class Icinga2Api(object): """ Main Class to implement the Icinga2 API Client """ module = None def __init__(self): """ Initialize all needed Variables """ self.icinga_host = module.params.get("host") self.icinga_port = module.params.get("port") self.icinga_username = module.params.get("username") self.icinga_password = module.params.get("password") self.state = module.params.get("state") self.hostname = module.params.get("hostname") self.hostnames = module.params.get("hostnames") self.start_time = module.params.get("start_time") self.end_time = module.params.get("end_time") self.duration = module.params.get("duration") self.object_type = module.params.get("object_type") self.all_services = module.params.get("all_services") self.author = module.params.get("author") self.comment = module.params.get("comment") self.fixed = module.params.get("fixed") self.filter_vars = None self.trigger_name = None self.icinga_url = "{0}:{1}/v1".format(self.icinga_host, self.icinga_port) self.connection = Session() self.connection.headers.update({'Accept': 'application/json'}) self.connection.auth = (self.icinga_username, self.icinga_password) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def run(self): res = dict(changed=False, ansible_module_results="none") print("hostname : {} ({})".format(self.hostname, type(self.hostname))) print("hostnames : {} ({})".format(self.hostnames, type(self.hostnames))) if self.hostname and self.hostnames: module.fail_json(msg=("Please choose whether to set downtimes for " "'hostname' or 'hostnames'. " "Both at the same time is not supported.")) if len(self.hostnames) != 0: res['changed'] = True r = dict() if iter(self.hostnames): for h in self.hostnames: r[h] = dict() if self.__host_exists(h): """ """ payload = { 'type': self.object_type, 'filter': "host.name == \"{}\"".format(h), 'author': self.author, 'comment': self.comment, 'start_time': self.start_time, 'end_time': self.end_time, 'duration': self.duration } if self.fixed: payload.update(fixed=True) else: payload.update(fixed=False) if self.filter_vars: payload.update(filter_vars=self.filter_vars) if self.trigger_name: payload.update(trigger_name=self.trigger_name) if self.object_type == 'Host' and self.all_services is True: payload.update(all_services=True) module.log(msg="downtime for: {}".format(h)) module.log(msg="payload: {}".format(payload)) code, msg = self.__schedule_downtime(payload) module.log(msg="{}: {}".format(code, msg)) r[h] = dict( msg=msg, status_code=code, ) else: module.log(msg="404: host {} is not known".format(h)) r[h] = dict( msg="host {} is not known".format(h), status_code=404, ) res['result'] = r elif len(self.hostname) != 0: pass else: print("hoo") # print(res) # result = dict(changed=True, # ansible_module_results="Downtimes removed", # result=dict(req.json(), status_code=req.status_code)) return res def __call_url(self, method='GET', path=None, data=None, headers=None): """ """ if headers is None: headers = { 'Accept': 'application/json', 'X-HTTP-Method-Override': method, } url = "{0}/{1}".format(self.icinga_url, path) print(url) self.connection.headers.update(headers) try: if (method == 'GET'): ret = self.connection.get(url, verify=False) self.connection.close() elif (method == 'POST'): self.connection.close() ret = self.connection.post(url, data=data, verify=False) else: print("unsupported") ret.raise_for_status() # print("------------------------------------------------------------------") # print(" text : {}".format(ret.text)) # print(" headers : {}".format(ret.headers)) # print(" code : {}".format(ret.status_code)) # print("------------------------------------------------------------------") return ret.status_code, json.loads(ret.text) except Exception as e: print(e) raise def __host_exists(self, hostname): """ """ code = 0 data = { "type": "Host", "attrs": ["name"], "filter": "match(\"{0}\", host.name)".format(hostname), } code, ret = self.__call_url(method='POST', path="objects/hosts", data=module.jsonify(data), headers={ 'Accept': 'application/json', 'X-HTTP-Method-Override': 'GET' }) results = ret['results'] if (code == 200 and len(results) != 0): # code = results[0]['code'] # status = results[0]['status'] attrs = results[0]['attrs'] if attrs.get('name') == hostname: return True return False def __schedule_downtime(self, data): """ """ code = 0 status = "no status available" path = 'actions/schedule-downtime' code, ret = self.__call_url(method='POST', path=path, data=module.jsonify(data), headers={ 'Accept': 'application/json', 'X-HTTP-Method-Override': 'POST' }) results = ret['results'] if (len(results) != 0): # print(json.dumps(results[0])) code = int(results[0]['code']) status = results[0]['status'] return code, status
class User(object): """ class for user credentials and sending and posting requests Attributes ---------- session : requests.Session username : str logged_in : bool Boolean standing for login state. True if logged in """ def __init__(self): super(User, self).__init__() self.session = Session() self.logged_in = False self.username = None def __del__(self): try: self.session.close() except TypeError: pass def login(self, username): """ Sets the attributes according to login """ self.username = username self.logged_in = True return self def logout(self): """ When logging out """ self.logged_in = False def check_login(self): """ Raise an error if user is not logged in """ if self.logged_in is False: raise AUTHError('%s is not logged in.' % self.username) def post(self, url, **kwargs): """ Wrap session post """ response = self.session.post(url, **kwargs) return response def get(self, url, **kwargs): """ Wrap session get """ response = self.session.get(url, **kwargs) return response @staticmethod def check_response(response): """ Check for errors in a REST call """ if response.ok: return response.json() else: response.raise_for_status()
class Base: """访问教务系统的基类""" def __init__(self, user_dictionary, debug=False): if debug: self._open_debug() self.cache = Session() # 登陆教务系统 message = self.cache.post( # "http://mjwgl.ahnu.edu.cn/login/check.shtml", # 旧接口 "http://mjwgl.ahnu.edu.cn/login/remotelogin", data={ "username": user_dictionary["username"], "password": user_dictionary["password"], "usertype": "stu", "device": "aphone", }, headers={ "Host": "mjwgl.ahnu.edu.cn", }) self.sessionid = message.cookies["PHPSESSID"] assert message.json()["success"] == "success", message.json( )["message"] def _open_debug(self): logging.basicConfig( level=logging.DEBUG, format='[%(levelname)s]-[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S', ) def get_page(self, target_url): """GET 获取页面内容""" if target_url.startswith("http://"): rep = self.cache.get(target_url, allow_redirects=False) else: rep = self.cache.get("http://mjwgl.ahnu.edu.cn/" + target_url, allow_redirects=False) assert rep.status_code == 200, "未登陆" return rep.content.decode("UTF-8") def post_data(self, target_url, data=None, json=None, **kwargs): if target_url.startswith("http://"): rep = self.cache.post(target_url, data=data, json=json, allow_redirects=False, **kwargs) else: rep = self.cache.post("http://mjwgl.ahnu.edu.cn" + target_url, data=data, json=json, allow_redirects=False, **kwargs) assert rep.status_code == 200, "未登陆" return rep.json() def get_url(self, op: str) -> str: """ 根据操作的拼音缩写获取对应的URL 如: 课表查询: kbcx """ rep = self.cache.post("http://mjwgl.ahnu.edu.cn/appdata.shtml", { "requesttype": op, "sessionid": self.sessionid }, allow_redirects=False) self.sessionid = rep.cookies["PHPSESSID"] return rep.headers["Location"]
class LoginGithub(object): def __init__(self, username="******", password="******"): self.username = username self.password = password self.login_url = 'https://github.com/login' self.post_url = 'https://github.com/session' self.profile_url = 'https://github.com/settings/profile' # session维持会话, 不用cookies self.session = Session() self.headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 'Referer': 'https://github.com', } def token(self): response = self.session.get(self.login_url, headers=self.headers) selector = etree.HTML(response.text) token_value = selector.xpath( '//div[@id="login"]/form/input[2]/@value')[0] print(token_value) return token_value def login(self): post_data = { 'authenticity_token': self.token(), 'commit': 'Sign in', 'login': self.username, 'password': self.password, 'utf8': '✓', } response = self.session.post(self.post_url, data=post_data, headers=self.headers) if response.status_code == 200: print('登录成功') self.parse_html(response.text) response = self.session.get(self.profile_url, headers=self.headers) if response.status_code == 200: self.profile(response.text) def parse_html(self, html): select = etree.HTML(html) # obj = BeautifulSoup(html, 'html.parser') # # //*[@id="dashboard"]/div[2]/div[7] # dynamics = select.xpath( # '//*[@id="dashboard"]/div[2]/div[contains(@class, "watch_started")]') # div = obj.find('div', {'id': 'dashboard'}) # print(div) # l2_div = div.find('div', {'class': 'news'}) # print(l2_div) # divs = l2_div.find_all('div', {'class': 'watch_started'}) # print(divs) # dashboard > div.news.column.two-thirds > div:nth-child(7) if dynamics: for dynamic in dynamics: """只看started, 还有follow 类的""" user = dynamic.xpath( './/div[contains(@class, "width-full")]/div[contains(@class, "flex-items-baseline")]/div/a[1]/text()' ).strip() started_item = dynamic.xpath( './/div[contains(@class, "width-full")]/div[contains(@class, "flex-items-baseline")]/div/a[2]/text()' ).strip() print('{} started {}'.format(user, started_item)) def profile(self, html): selector = etree.HTML(html) user_profile_email = selector.xpath( '//select[@id="user_profile_email"]/option[last()]/text()') print('Your email is {}'.format(user_profile_email))
IGNORED_UIDS = [ uid for uid, username in PLEX_USERS.items() if username.lower() in USERNAME_IGNORE ] IGNORED_UIDS.extend((int(ACCOUNT.id), 0)) # Get the Tautulli history. PARAMS = { 'cmd': 'get_users_table', 'order_column': 'last_seen', 'order_dir': 'asc', 'length': 200, 'apikey': TAUTULLI_APIKEY } TAUTULLI_USERS = [] try: GET = SESSION.get(TAUTULLI_URL.rstrip('/') + '/api/v2', params=PARAMS).json()['response']['data']['data'] for user in GET: if user['user_id'] in IGNORED_UIDS: continue elif IGNORE_NEVER_SEEN and not user['last_seen']: continue TAUTULLI_USERS.append(user) except Exception as e: exit("Tautulli API 'get_users_table' request failed. Error: {}.".format(e)) def time_format(total_seconds): # Display user's last history entry days = total_seconds // 86400 hours = (total_seconds - days * 86400) // 3600 minutes = (total_seconds - days * 86400 - hours * 3600) // 60
import pandas as pd symbols = [] topCoins = [] url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest' parameters = {'start': '1', 'limit': '1000', 'convert': 'USD'} headers = { 'Accepts': 'application/json', 'X-CMC_PRO_API_KEY': '7b96af50-71b9-48c8-8211-3396385f4b08', } session = Session() session.headers.update(headers) try: response = session.get(url, params=parameters) data = json.loads(response.text) d = data['data'] #f=d[0]['slug'] for index in range(len(d)): topCoins.append([d[index]['slug'], d[index]['symbol']]) for index in range(len(d)): symbols.append(d[index]['symbol']) #print(topCoins) print(len(topCoins)) print(topCoins) except (ConnectionError, Timeout, TooManyRedirects) as e: print(e) with open('TopCoins.csv', 'w') as filehandle:
def _collect_sapcloudconnector(self): # # Uses monitoring API: # https://help.sap.com/viewer/cca91383641e40ffbe03bdc78f00f681/Cloud/en-US/f6e7a7bc6af345d2a334c2427a31d294.html # # Configuring : Make port 8443 available. add this to users.xml and restart SCC. # # <user username="******" password="******" roles="sccmonitoring"/> # cloud_connector_url = "{0}:{1}/".format(self.url, "8443").replace( "http://", "https://") self.log.debug( "{0}: Trying to connect to sapcloudconnector on url: {1}".format( self.host, cloud_connector_url)) health_url = cloud_connector_url + "exposed?action=ping" # # 1 second timeout to connect, 30 to read data. # status_code = 0 session = Session() session.auth = HTTPBasicAuth(self.user, self.password) session.timeout = (1, 30) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) try: health = session.get(cloud_connector_url) status_code = health.status_code except Exception: self.log.debug( "{0}: No SAP Cloud connector found on url: {1}".format( self.host, health_url)) status_code = 500 if status_code == 200: self.log.info( "{0}: Got health from cloud connector on url: {1}".format( self.host, health_url)) external_id = str(self._scc_external_id()) component_data = { "name": "SCC", "description": "SAP Cloud Connector", # "type": "SAP Cloud Connector", # "sid": "SCC", "host": self.host, # "system_number": "99", # "version": "v1", "domain": self.domain, "environment": self.stackstate_environment, "tags": self.tags # "labels": [] } self.log.debug("{0}: -----> component_data : {1}".format( self.host, component_data)) self.log.debug("{0}: -----> external_id : {1}".format( self.host, external_id)) self.component(external_id, "sap-cloud-connector", component_data) # define relation cloud connector --> host # is hosted on source_id = external_id target_id = self._host_external_id() relation_data = {} self.relation(source_id, target_id, "is hosted on", relation_data) # define scc status event self.event({ "timestamp": int(time.time()), "source_type_name": "SAP:scc state", # "source_type_name": "SAP:host instance", "msg_title": "SCC status update.", "msg_text": "", "host": self.host, "tags": ["instance_id:99", "status:sapcontrol-green"] }) # # Lists sub accounts to the SAP Cloud and connection tunnels # subaccount_url = cloud_connector_url + "api/monitoring/subaccounts" subaccount_reply = session.get(subaccount_url) if subaccount_reply.status_code == 200: reply = subaccount_reply.text.encode('utf-8') self.log.debug( "{0}: Sub accounts reply from cloud connector : {1}". format(self.host, reply)) subaccounts = json.loads(subaccount_reply.text) self.log.debug( "{0}: JSON sub accounts from cloud connector : {1}".format( self.host, subaccounts)) for subaccount in subaccounts["subaccounts"]: self.log.debug("{0}: subaccount: {1}".format( self.host, subaccount)) # define cloud connector component subaccount_name = str(subaccount.get("displayName")) # display name is not always setup if subaccount_name == "None": subaccount_name = str(subaccount.get("subaccount")) external_id = str( self._scc_subaccount_external_id( subaccount.get("subaccount"))) tunnel = subaccount.get("tunnel") component_data = { "name": subaccount_name, "description": str(subaccount.get("description")), "state": str(tunnel.get("state")), "connectedSince": str(tunnel.get("connectedSince")), "connections": str(tunnel.get("connections")), "user": str(tunnel.get("user")), "regionHost": str(subaccount.get("regionHost")), "subaccount": str(subaccount.get("subaccount")), "locationID": str(subaccount.get("locationID")), "layer": "SAP SCC Sub Accounts", "domain": self.domain, "environment": self.stackstate_environment, "host": self.host, "tags": self.tags # "labels": [] } self.log.debug("{0}: -----> component_data : {1}".format( self.host, component_data)) self.log.debug("{0}: -----> external_id : {1}".format( self.host, external_id)) self.component(external_id, "sap-scc-subaccount", component_data) # define relation cloud connector --> host # is hosted on source_id = external_id target_id = self._scc_external_id() relation_data = {} self.relation(source_id, target_id, "is_setup_on", relation_data) # define cloud connector status event tunnel_status = self._scc_subaccount_status( tunnel.get("state")) self.event({ "timestamp": int(time.time()), "source_type_name": "SAP:scc subaccount state", "msg_title": "SAP Cloud Connector '{0}' status update.".format( subaccount_name), "msg_text": "", "host": self.host, "tags": [ "status:{0}".format(tunnel_status), "subaccount_name:{0}".format(subaccount_name) ] }) else: if subaccount_reply.status_code == 400: msg = "{0}: SAP Cloud connector monitoring sub account page not " \ "supported in this version of SCC.".format(self.host) self.log.info(msg) else: status = subaccount_reply.status_code self.log.error( "{0}: No SAP Cloud connector sub account found. Status code: {1}" .format(self.host, status)) # # List backend SAP systems and virtual names. # backends_url = cloud_connector_url + "api/monitoring/connections/backends" backends_reply = session.get(backends_url) if backends_reply.status_code == 200: reply = backends_reply.text.encode('utf-8') self.log.debug( "{0}: Backends reply from cloud connector : {1}".format( self.host, reply)) backends = json.loads(backends_reply.text) self.log.info( "{0}: JSON backends from cloud connector : {1}".format( self.host, backends)) for subaccount in backends["subaccounts"]: # subaccount["regionHost"] # subaccount["subaccount"] # subaccount["locationID"] virtualbackend = str(subaccount.get("virtualBackend")) for backend in subaccount["backendConnections"]: external_id = self._scc_backend_external_id( subaccount["subaccount"], virtualbackend) component_data = { "virtualBackend": virtualbackend, "internalBackend": str(backend.get("internalBackend")), "protocol": str(backend.get("protocol")), "idle": str(backend.get("idle")), "active": str(backend.get("active")), "labels": [], "layer": "SAP SCC Back-ends", "domain": self.domain, "environment": self.stackstate_environment, "tags": self.tags } self.log.debug("{0}: ------> external_id : {1}".format( self.host, external_id)) self.component(external_id, "sap-cloud", component_data) # define relation cloud connector --> host # is hosted on source_id = external_id target_id = self._scc_subaccount_external_id( subaccount["subaccount"]) relation_data = {} self.relation(source_id, target_id, "is connected to", relation_data) self.event({ "timestamp": int(time.time()), "source_type_name": "SAP:cloud component state", "msg_title": "SAP Cloud Connector '{0}' status update.".format( backend["virtualBackend"]), "msg_text": "", "host": self.host, "tags": [ "active:{0}".format(backend["active"]), "idle:{0}".format(backend["idle"]) ] }) else: if backends_reply.status_code == 400: msg = "{0}: SAP Cloud connector monitoring backend page not supported " \ "in this version of SCC.".format(self.host) self.log.info(msg) else: status = backends_reply.status_code self.log.error( "{0}: No SAP Cloud connector backends found. Status code: {1}" .format(self.host, status)) if status_code == 401: msg = "{0}: Authentication failed, check your config.yml and SCC users.xml " \ "for corresponding username and password.".format(self.host) self.log.error(msg) session.close()
class Crawler: '''Blueprint for creating new crawlers''' def __init__(self): self._destroyed = False self.executor = futures.ThreadPoolExecutor(max_workers=3) # Initialize cloudscrapper try: self.scraper = cloudscraper.create_scraper(browser={ 'platform': 'linux', 'mobile': False }) except Exception as err: logger.exception('Failed to initialize cloudscraper') self.scraper = Session() # end try # Must resolve these fields inside `read_novel_info` self.novel_title = 'N/A' self.novel_author = 'N/A' self.novel_cover = None self.is_rtl = False # Each item must contain these keys: # `id` - 1 based index of the volume # `title` - the volume title (can be ignored) self.volumes = [] # Each item must contain these keys: # `id` - 1 based index of the chapter # `title` - the title name # `volume` - the volume id of this chapter # `volume_title` - the volume title (can be ignored) # `url` - the link where to download the chapter self.chapters = [] # Other stuffs - not necessary to resolve from crawler instance. self.home_url = '' self.novel_url = '' self.last_visited_url = None # end def def destroy(self): self._destroyed = True self.volumes.clear() self.chapters.clear() self.scraper.close() self.executor.shutdown(False) # end def # ------------------------------------------------------------------------- # # Implement these methods # ------------------------------------------------------------------------- # @abstractmethod def initialize(self): pass # end def @abstractmethod def login(self, email, password): pass # end def @abstractmethod def logout(self): pass # end def @abstractmethod def search_novel(self, query): '''Gets a list of results matching the given query''' pass # end def @abstractmethod def read_novel_info(self): '''Get novel title, autor, cover etc''' pass # end def @abstractmethod def download_chapter_body(self, chapter): '''Download body of a single chapter and return as clean html format.''' pass # end def def get_chapter_index_of(self, url): '''Return the index of chapter by given url or 0''' url = (url or '').strip().strip('/') for chapter in self.chapters: if chapter['url'] == url: return chapter['id'] # end if # end for return 0 # end def # ------------------------------------------------------------------------- # # Helper methods to be used # ------------------------------------------------------------------------- # @property def headers(self): return self.scraper.headers.copy() # end def @property def cookies(self): return {x.name: x.value for x in self.scraper.cookies} # end def def absolute_url(self, url, page_url=None): url = (url or '').strip() if not page_url: page_url = self.last_visited_url # end if if not url or len(url) == 0: return None elif url.startswith('//'): return self.home_url.split(':')[0] + ':' + url elif url.find('//') >= 0: return url elif url.startswith('/'): return self.home_url + url[1:] elif page_url: return page_url.strip('/') + '/' + url else: return self.home_url + url # end if # end def def is_relative_url(self, url): page = urlparse(self.novel_url) url = urlparse(url) return (page.hostname == url.hostname and url.path.startswith(page.path)) # end def def get_response(self, url, **kargs): if self._destroyed: return None # end if kargs = kargs or dict() # kargs['verify'] = kargs.get('verify', False) kargs['timeout'] = kargs.get('timeout', 150) # in seconds self.last_visited_url = url.strip('/') response = self.scraper.get(url, **kargs) response.encoding = 'utf-8' self.cookies.update({x.name: x.value for x in response.cookies}) response.raise_for_status() return response # end def def submit_form(self, url, data={}, multipart=False, headers={}): '''Submit a form using post request''' if self._destroyed: return None # end if headers.update({ 'Content-Type': 'multipart/form-data' if multipart else 'application/x-www-form-urlencoded; charset=UTF-8', }) response = self.scraper.post(url, data=data, headers=headers) response.encoding = 'utf-8' self.cookies.update({x.name: x.value for x in response.cookies}) response.raise_for_status() return response # end def def get_soup(self, *args, **kwargs): parser = kwargs.pop('parser', None) response = self.get_response(*args, **kwargs) return self.make_soup(response, parser) # end def def make_soup(self, response, parser=None): html = response.content.decode('utf-8', 'ignore') soup = BeautifulSoup(html, parser or 'lxml') if not soup.find('body'): raise ConnectionError('HTML document was not loaded properly') # end if return soup # end def def get_json(self, *args, **kargs): response = self.get_response(*args, **kargs) return response.json() # end def def download_cover(self, output_file): response = self.get_response(self.novel_cover) with open(output_file, 'wb') as f: f.write(response.content) # end with # end def # ------------------------------------------------------------------------- # blacklist_patterns = [ r'^[\W\D]*(volume|chapter)[\W\D]+\d+[\W\D]*$', ] bad_tags = [ 'noscript', 'script', 'iframe', 'form', 'hr', 'img', 'ins', 'button', 'input', 'amp-auto-ads', 'pirate' ] block_tags = ['h3', 'div', 'p'] def is_blacklisted(self, text): if len(text.strip()) == 0: return True # end if for pattern in self.blacklist_patterns: if re.search(pattern, text, re.IGNORECASE): return True # end if # end for return False # end def def clean_contents(self, div): if not div: return div # end if div.attrs = {} for tag in div.find_all(True): if isinstance(tag, Comment): tag.extract() # Remove comments elif tag.name == 'br': next_tag = getattr(tag, 'next_sibling') if next_tag and getattr(next_tag, 'name') == 'br': tag.extract() # end if elif tag.name in self.bad_tags: tag.extract() # Remove bad tags elif not tag.text.strip(): tag.extract() # Remove empty tags elif self.is_blacklisted(tag.text): tag.extract() # Remove blacklisted contents elif hasattr(tag, 'attrs'): tag.attrs = {} # Remove attributes # end if # end for return div # end def def extract_contents(self, tag, level=0): body = [] if level == 0: self.clean_contents(tag) # end if for elem in tag.contents: if self.block_tags.count(elem.name): body += self.extract_contents(elem, level + 1) continue # end if text = '' if not elem.name: text = str(elem).strip() else: text = '<%s>%s</%s>' text = text % (elem.name, elem.text.strip(), elem.name) # end if if text: body.append(text) # end if # end for if level > 0: return body else: return [x for x in body if len(x.strip())] # end if # end def def cleanup_text(self, text): return re.sub(u'[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]', '', str(text), flags=re.UNICODE)
class OneDrive: """ Downloads shared file/folder to localhost with persisted structure. params: `str:url`: url to the shared one drive folder or file `str:path`: local filesystem path methods: `download() -> None`: fire async download of all files found in URL """ def __init__(self, url=None, path=None): if not (url and path): raise ValueError("URL to shared resource or path to download is missing.") self.url = url self.path = path self.prefix = "https://api.onedrive.com/v1.0/shares/" self.suffix = "/root?expand=children" self.session = Session() self.session.headers.update( { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" " (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } ) def _token(self, url): return "u!" + b64encode(url.encode()).decode() def _traverse_url(self, url, name=""): """ Traverse the folder tree and store leaf urls with filenames """ r = self.session.get(f"{self.prefix}{self._token(url)}{self.suffix}").json() name = name + os.sep + r["name"] # shared file if not r["children"]: file = {} file["name"] = name.lstrip(os.sep) file["url"] = r["@content.downloadUrl"] self.to_download.append(file) print(f"Found {file['name']}") # shared folder for child in r["children"]: if "folder" in child: self._traverse_url(child["webUrl"], name) if "file" in child: file = {} file["name"] = (name + os.sep + child["name"]).lstrip(os.sep) file["url"] = child["@content.downloadUrl"] self.to_download.append(file) print(f"Found {file['name']}") async def _download_file(self, file, session): async with session.get(file["url"], timeout=None) as r: filename = os.path.join(self.path, file["name"]) os.makedirs(os.path.dirname(filename), exist_ok=True) async with aiofiles.open(filename, "wb") as f: async for chunk in r.content.iter_chunked(1024 * 16): if chunk: await f.write(chunk) self.downloaded += 1 progress = int(self.downloaded / len(self.to_download) * 100) print(f"Download progress: {progress}%") async def _downloader(self): async with aiohttp.ClientSession() as session: await asyncio.wait( [self._download_file(file, session) for file in self.to_download] ) def download(self): print("Traversing public folder\n") self.to_download = [] self.downloaded = 0 self._traverse_url(self.url) print("\nStarting async download\n") asyncio.get_event_loop().run_until_complete(self._downloader())
from bs4 import BeautifulSoup as bs from requests import Session url = 'http://torlinkbgs6aabns.onion/' s = Session() s.proxies = {'http': 'socks5h://127.0.0.1:9052'} tor_links = s.get(url) page = bs(tor_links, 'html.parser') links = page.find('div', {'id':'links'}) xpto = links.find_all(['h3', 'a'])
class ThreatConnect: """ """ def __init__(self, api_aid=None, api_sec=None, api_org=None, api_url=None, api_token=None, api_token_expires=None): """ """ # logger self.log_level = { 'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL } self.formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s (%(funcName)s:%(lineno)d)' ) self.tcl = tc_logger() # debugging self._memory_monitor = True # credentials self._api_aid = api_aid self._api_sec = api_sec self._api_token = api_token self._api_token_expires = api_token_expires # user defined values self._api_org = api_org self._api_url = api_url self._api_result_limit = 200 # default values self._activity_log = False self._api_request_timeout = 30 self._api_retries = 5 # maximum of 5 minute window self._api_sleep = 59 # seconds self._bulk_on_demand = False self._enable_report = False self._indicators_regex = indicators_regex self._proxies = {'https': None} self._retype = type(re.compile('')) # config items self._report = [] self._verify_ssl = False # initialize request session handle self._session = Session() # instantiate report object self.report = Report() # save custom types for later self._indicator_parser = IndicatorObjectParser(self) # # Memory Testing # # self._p = psutil.Process(os.getpid()) # self._memory = self._p.memory_info().rss # @property def indicator_parser(self): return self._indicator_parser def _renew_token(self): """ { "success":true, "apiToken":"2:1:-1:1474673195:poZAT:syqtNUKnGn9ZijE5hQ5/D99aD8dIEdgdDCIMbjk2Poc\u003d", "apiTokenExpires":"1474673195" } """ # make api call to get new token url = '{0!s}{1!s}'.format(self._api_url, '/appAuth') payload = {'expiredToken': self._api_token} token_response = self._session.get(url, params=payload, verify=self._verify_ssl, timeout=self._api_request_timeout, proxies=self._proxies, stream=False) if token_response.status_code == 401: if 'application/json' in token_response.headers['content-type']: err_data = token_response.json().get('message') else: err_data = token_response.text err = 'Could not refresh ThreatConnect Token ({}).'.format( err_data) raise RuntimeError(err) # bcs - return new token and set expiration date token_data = token_response.json() self._api_token = token_data['apiToken'] self._api_token_expires = int(token_data['apiTokenExpires']) def _api_request_headers(self, ro): """ """ timestamp = int(time.time()) if self._api_token is not None and self._api_token_expires is not None: window_padding = 15 # bcs - possible configuration option current_time = int(time.time()) - window_padding if (int(self._api_token_expires) < current_time): self._renew_token() authorization = 'TC-Token {0}'.format(self._api_token) elif self._api_aid is not None and self._api_sec is not None: signature = "{0}:{1}:{2}".format(ro.path_url, ro.http_method, timestamp) # python 2.7, does not work on 3.x and not tested on 2.6 # hmac_signature = hmac.new(self._api_sec, signature, digestmod=hashlib.sha256).digest() # authorization = 'TC {0}:{1}'.format(self._api_aid, base64.b64encode(hmac_signature)) # python 3.x hmac_signature = hmac.new(self._api_sec.encode(), signature.encode(), digestmod=hashlib.sha256).digest() authorization = 'TC {0}:{1}'.format( self._api_aid, base64.b64encode(hmac_signature).decode()) ro.add_header('Timestamp', timestamp) ro.add_header('Authorization', authorization) def api_filter_handler(self, resource_obj, filter_objs): """ """ data_set = None if not filter_objs: # build api call (no filters) default_request_object = resource_obj.default_request_object data_set = self.api_response_handler(resource_obj, default_request_object) else: # # process each filter added to the resource object for retrieve # first_run = True # # each resource object can have x filter objects with an operator to join or intersect results # for filter_obj in filter_objs: obj_list = [ ] # temp storage for results on individual filter objects owners = filter_obj.owners if len(owners) == 0: # handle filters with no owners owners = [self._api_org] # use default org # iterate through all owners for o in owners: self.tcl.debug('owner: {0!s}'.format(o)) if len(filter_obj) > 0: # request object are for api filters for ro in filter_obj: if ro.owner_allowed: ro.set_owner(o) if hasattr(filter_obj, 'api_entity'): results = self.api_response_handler( resource_obj, ro, api_entity=filter_obj.api_entity) else: results = self.api_response_handler( resource_obj, ro) if ro.resource_type not in [ ResourceType.OWNERS, ResourceType.VICTIMS, ResourceType.BATCH_JOBS ]: # TODO: should this be done? # post filter owners for obj in results: if obj.owner_name.upper() != o.upper(): results.remove(obj) obj_list.extend(results) else: ro = filter_obj.default_request_object if ro.owner_allowed: ro.set_owner(o) if hasattr(filter_obj, 'api_entity'): results = self.api_response_handler( resource_obj, ro, api_entity=filter_obj.api_entity) else: results = self.api_response_handler( resource_obj, ro) if ro.resource_type not in [ ResourceType.OWNERS, ResourceType.VICTIMS ]: # TODO: should this be done? # post filter owners for obj in results: if obj.owner_name.upper() != o.upper(): results.remove(obj) obj_list.extend(results) # # post filters # pf_obj_set = set(obj_list) self.tcl.debug('count before post filter: {0:d}'.format( len(obj_list))) for pfo in filter_obj.post_filters: self.tcl.debug('pfo: {0!s}'.format(pfo)) # # Report Entry # report_entry = ReportEntry() report_entry.add_post_filter_object(pfo) # current post filter method filter_method = getattr(resource_obj, pfo.method) # current post filter results post_filter_results = set( filter_method(pfo.filter, pfo.operator, pfo.description)) pf_obj_set = pf_obj_set.intersection( post_filter_results) self.report.add(report_entry) # set obj_list to post_filter results if filter_obj.post_filters_len > 0: obj_list = list(pf_obj_set) self.tcl.debug('count after post filter: {0:d}'.format( len(obj_list))) # no need to join or intersect on first run if first_run: data_set = set(obj_list) first_run = False continue # # depending on the filter type the result will be intersected or joined # if filter_obj.operator is FilterSetOperator.AND: data_set = data_set.intersection(obj_list) elif filter_obj.operator is FilterSetOperator.OR: data_set.update(set(obj_list)) # # only add to report if these results should be tracked (exclude attribute, tags, etc) # self.report.add_filtered_results(len(data_set)) # # after intersection or join add the objects to the resource object # for obj in data_set: resource_obj.add_obj(obj) def api_request(self, ro, log=True): """ """ api_response = None fail_msg = None h_content_length = None h_content_type = None start = datetime.now() # # enable activity log # if self._activity_log: ro.enable_activity_log() # # prepare request # url = '{0!s}{1!s}'.format(self._api_url, ro.request_uri) api_request = Request(ro.http_method, url, data=ro.body, params=ro.payload) request_prepped = api_request.prepare() # # generate headers # ro.set_path_url(request_prepped.path_url) self._api_request_headers(ro) request_prepped.prepare_headers(ro.headers) # # Debug # if log: self.tcl.debug('request_object: {0!s}'.format(ro)) self.tcl.debug('url: {0!s}'.format(url)) self.tcl.debug('path url: {0!s}'.format(request_prepped.path_url)) # # api request (gracefully handle temporary communications issues with the API) # for i in range(1, self._api_retries + 1, 1): try: api_response = self._session.send( request_prepped, verify=self._verify_ssl, timeout=self._api_request_timeout, proxies=self._proxies, stream=ro.stream) break except exceptions.ReadTimeout as e: self.tcl.error('Error: {0!s}'.format(e)) self.tcl.error( 'The server may be experiencing delays at the moment.') self.tcl.info( 'Pausing for {0!s} seconds to give server time to catch up.' .format(self._api_sleep)) time.sleep(self._api_sleep) self.tcl.info('Retry {0!s} ....'.format(i)) if i == self._api_retries: self.tcl.critical('Exiting: {0!s}'.format(e)) raise RuntimeError(e) except exceptions.ConnectionError as e: self.tcl.error('Error: {0!s}'.format(e)) self.tcl.error('Connection Error. The server may be down.') self.tcl.info( 'Pausing for {0!s} seconds to give server time to catch up.' .format(self._api_sleep)) time.sleep(self._api_sleep) self.tcl.info('Retry {0!s} ....'.format(i)) if i == self._api_retries: self.tcl.critical('Exiting: {0!s}'.format(e)) raise RuntimeError(e) except socket.error as e: self.tcl.critical('Exiting: {0!s}'.format(e)) raise RuntimeError(e) # # header values # if 'content-length' in api_response.headers: h_content_length = api_response.headers['content-length'] if 'content-type' in api_response.headers: h_content_type = api_response.headers['content-type'] # # raise exception on *critical* errors # non_critical_errors = [ b'The MD5 for this File is invalid, a File with this MD5 already exists', # 400 (application/json) b'The SHA-1 for this File is invalid, a File with this SHA-1 already exists', # 400 (application/json) b'The SHA-256 for this File is invalid, a File with this SHA-256 already exists', # 400 (application/json) b'The requested resource was not found', # 404 (application/json) b'Could not find resource for relative', # 500 (text/plain) b'The requested Security Label was not removed - access was denied', # 401 (application/json) ] # # TODO: work out some logic to improve the API error handling, possible area where API could improve # # valid status codes 200, 201, 202 # if api_response.status_code in [400, 401, 403, 500, 503]: if api_response.status_code not in [200, 201, 202]: # check for non critical errors that have bad status codes nce_found = False fail_msg = api_response.content for nce in non_critical_errors: # api_response_dict['message'] not in non_critical_errors: if re.findall(nce, api_response.content): nce_found = True break if ro.failure_callback is not None: ro.failure_callback(api_response.status_code) # raise error on bad status codes that are not defined as nce if not nce_found: self.tcl.critical('Status Code: {0:d}'.format( api_response.status_code)) self.tcl.critical('Failed API Response: {0!s}'.format( api_response.content)) if ro.failure_callback is not None: ro.failure_callback(api_response.status_code) raise RuntimeError(api_response.content) # # set response encoding (best guess) # if api_response.encoding is None: ## api_response.encoding = api_response.apparent_encoding api_response.encoding = 'utf-8' # using apparent encoding is costly with bulk # # Debug # if log: self.tcl.debug('url: %s', api_response.url) self.tcl.debug('status_code: %s', api_response.status_code) self.tcl.debug('content-length: %s', h_content_length) self.tcl.debug('content-type: %s', h_content_type) # # Report # self.report.add_api_call() # count api calls self.report.add_request_time(datetime.now() - start) if log: self.tcl.debug('Request Time: {0!s}'.format(datetime.now() - start)) if self._enable_report: report_entry = ReportEntry() report_entry.add_request_object(ro) report_entry.set_request_url(api_response.url) report_entry.set_status_code(api_response.status_code) report_entry.set_failure_msg(fail_msg) self.report.add(report_entry) # # return response # return api_response def api_response_handler(self, resource_obj, ro, api_entity=None): """ """ # # initialize vars # api_response_dict = {} obj_list = [] # only track filter counts on request from this method ro.enable_track() # # debug # self.tcl.debug('Results Limit: {0!s}'.format(self._api_result_limit)) # only resource supports pagination if ro.resource_pagination: ro.set_result_limit(self._api_result_limit) ro.set_result_start(0) else: ro.set_remaining_results(1) while ro.remaining_results > 0: # # api request # api_response = self.api_request(ro) # self.tcl.debug('Results Content: {0!s}'.format(api_response.content)) self.tcl.debug('Status Code: {0!s}'.format( api_response.status_code)) self.tcl.debug('Content Type: {0!s}'.format( api_response.headers['content-type'])) # # Process API response # if api_response.headers['content-type'] == 'application/json': api_response_dict = api_response.json() # try and free memory for next api request api_response.close() del api_response # doesn't appear to clear memory # # BULK INDICATOR (does not have status) # if 'indicator' in api_response_dict: if ro.resource_type == ResourceType.INDICATORS: data = api_response_dict['indicator'] for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) if len(obj_list) % 500 == 0: self.tcl.debug('obj_list len: {0!s}'.format( len(obj_list))) elif api_response_dict['status'] == 'Failure': # handle failed request (404 Resource not Found) if 'message' in api_response_dict: self.tcl.error('{0!s} "{1!s}"'.format( api_response_dict['message'], ro.description)) ro.set_remaining_results(0) continue # # ADVERSARIES # elif ro.resource_type == ResourceType.ADVERSARIES: data = api_response_dict['data']['adversary'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.ADVERSARIES, resource_obj, ro.description, ro.request_uri)) # # CAMPAIGNS # elif ro.resource_type == ResourceType.CAMPAIGNS: data = api_response_dict['data']['campaign'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.CAMPAIGNS, resource_obj, ro.description, ro.request_uri)) # # INDICATORS # elif ro.resource_type == ResourceType.INDICATORS: data = api_response_dict['data']['indicator'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # ADDRESSES # elif ro.resource_type == ResourceType.ADDRESSES: data = api_response_dict['data']['address'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # DOCUMENTS # elif ro.resource_type == ResourceType.DOCUMENTS: data = api_response_dict['data']['document'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.DOCUMENTS, resource_obj, ro.description, ro.request_uri)) # # EMAILS # elif ro.resource_type == ResourceType.EMAILS: data = api_response_dict['data']['email'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.EMAILS, resource_obj, ro.description, ro.request_uri)) # # EMAIL ADDRESSES # elif ro.resource_type == ResourceType.EMAIL_ADDRESSES: data = api_response_dict['data']['emailAddress'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # CUSTOM INDICATORS # elif ro.resource_type == ResourceType.CUSTOM_INDICATORS: # api_entity MUST be provided for Custom Indicators data = api_response_dict['data'][api_entity] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # GROUPS # elif ro.resource_type == ResourceType.GROUPS: data = api_response_dict['data']['group'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: if item.get('type') in self.group_types: obj_list.append( parse_group(item, ResourceType.GROUPS, resource_obj, ro.description, ro.request_uri)) else: self.tcl.debug('Skipping unsupported Group Type') # # FILES # elif ro.resource_type == ResourceType.FILES: data = api_response_dict['data']['file'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # HOSTS # elif ro.resource_type == ResourceType.HOSTS: data = api_response_dict['data']['host'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # DNSResolutions # elif ro.resource_type == ResourceType.DNS_RESOLUTIONS: data = api_response_dict['data']['dnsResolution'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: if 'addresses' in item: # don't process dns resolutions that have no addresses obj_list.append(parse_dns_resolution(item)) # # INCIDENTS # elif ro.resource_type == ResourceType.INCIDENTS: data = api_response_dict['data']['incident'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.INCIDENTS, resource_obj, ro.description, ro.request_uri)) # # METRICS # # elif ro.resource_type == ResourceType.OWNER_METRICS: # data = api_response_dict['data']['ownerMetric'] # if not isinstance(data, list): # data = [data] # for single results to be a list # for item in data: # obj_list.append( # parse_metrics(item, resource_obj, ro.description, ro.request_uri)) # # MINE # # elif ro.resource_type == ResourceType.OWNER_MINE: # data = api_response_dict['data']['owner'] # if not isinstance(data, list): # data = [data] # for single results to be a list # for item in data: # obj_list.append( # parse_metrics(item, resource_obj, ro.description, ro.request_uri)) # # MEMBERS # # elif ro.resource_type == ResourceType.OWNER_MEMBERS: # data = api_response_dict['data']['user'] # if not isinstance(data, list): # data = [data] # for single results to be a list # for item in data: # obj_list.append( # parse_metrics(item, resource_obj, ro.description, ro.request_uri)) # # OWNERS # elif ro.resource_type == ResourceType.OWNERS: data = api_response_dict['data']['owner'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_owner(item, resource_obj, ro.description, ro.request_uri)) # # SIGNATURES # elif ro.resource_type == ResourceType.SIGNATURES: data = api_response_dict['data']['signature'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.SIGNATURES, resource_obj, ro.description, ro.request_uri)) # # TASKS # elif ro.resource_type == ResourceType.TASKS: data = api_response_dict['data']['task'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_task(item, ResourceType.TASKS, resource_obj, ro.description, ro.request_uri)) # # THREATS # elif ro.resource_type == ResourceType.THREATS: data = api_response_dict['data']['threat'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_group(item, ResourceType.THREATS, resource_obj, ro.description, ro.request_uri)) # # URLS # elif ro.resource_type == ResourceType.URLS: data = api_response_dict['data']['url'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: obj_list.append( parse_typed_indicator( item, resource_obj, ro.description, ro.request_uri, self._indicators_regex, indicator_parser=self.indicator_parser)) # # VICTIMS # elif ro.resource_type == ResourceType.VICTIMS: data = api_response_dict['data']['victim'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: # victims data comes back with no owner, manually add owner here item['owner'] = ro.owner obj_list.append( parse_victim(item, resource_obj, ro.description, ro.request_uri)) # # BatchJobs # elif ro.resource_type == ResourceType.BATCH_JOBS: data = api_response_dict['data']['batchStatus'] if not isinstance(data, list): data = [data] # for single results to be a list for item in data: # victims data comes back with no owner, manually add owner here item['owner'] = ro.owner obj_list.append( parse_batch_job(item, resource_obj, ro.description, ro.request_uri)) elif api_response.headers['content-type'] == 'text/plain': self.tcl.error('{0!s} "{1!s}"'.format(api_response.content, ro.description)) ro.set_remaining_results(0) continue # add_obj resource_pagination if required if ro.resource_pagination: # get the number of results returned by the api if ro.result_start == 0: ro.set_remaining_results( api_response_dict['data']['resultCount'] - ro.result_limit) else: ro.set_remaining_results(ro.remaining_results - ro.result_limit) # increment the start position ro.set_result_start(ro.result_start + ro.result_limit) else: ro.set_remaining_results(0) self.tcl.debug('Result Count: {0!s}'.format(len(obj_list))) self.report.add_unfiltered_results(len(obj_list)) return obj_list # # api / sdk settings # def result_pagination(self, ro, identifier): data = [] ro.set_result_limit(self._api_result_limit) ro.set_result_start(0) while ro.remaining_results > 0: api_response = self.api_request(ro) if api_response.headers['content-type'] != 'application/json': break api_response_dict = api_response.json() if api_response_dict['status'] != 'Success': break data.extend(api_response_dict['data'][identifier]) # get the number of results returned by the api if ro.result_start == 0: ro.set_remaining_results( api_response_dict['data']['resultCount'] - ro.result_limit) else: ro.set_remaining_results(ro.remaining_results - ro.result_limit) # increment the start position ro.set_result_start(ro.result_start + ro.result_limit) return data def report_enable(self): """ """ self._enable_report = True def report_disable(self): """ """ self._enable_report = False def set_activity_log(self, data_bool): """ enable or disable api activity log """ if isinstance(data_bool, bool): self._activity_log = data_bool def set_api_request_timeout(self, data_int): """ set timeout value for the requests module """ if isinstance(data_int, int): self._api_request_timeout = data_int else: raise AttributeError(ErrorCodes.e0110.value.format(data_int)) def set_api_retries(self, data): """ set the number of api retries before exception is raised """ if isinstance(data, int): self._api_retries = data else: raise AttributeError(ErrorCodes.e0120.value.format(data)) def set_api_sleep(self, data): """ set the amount of time between retries """ if isinstance(data, int): self._api_sleep = data else: raise AttributeError(ErrorCodes.e0130.value.format(data)) def set_api_result_limit(self, data_int): """ set the number of result to return per api request (500 max) """ if isinstance(data_int, int): self._api_result_limit = data_int else: raise AttributeError(ErrorCodes.e0140.value.format(data_int)) def set_proxies(self, proxy_address, proxy_port, proxy_user=None, proxy_pass=None): """ define proxy server to use with the requests module """ # "http": "http://*****:*****@10.10.1.10:3128/", # accept host with http(s) or without proxy_method = 'http://' if re.match('^http', proxy_address): proxy_method, proxy_host = proxy_address.split('//') proxy_method += '//' proxy_address = proxy_host # TODO: add validation if proxy_user is not None and proxy_pass is not None: self._proxies['https'] = '{0!s}{1!s}:{2!s}@{3!s}:{4!s}'.format( proxy_method, proxy_user, proxy_pass, proxy_address, proxy_port) else: self._proxies['https'] = '{0!s}{1!s}:{2!s}'.format( proxy_method, proxy_address, proxy_port) def get_proxies(self): """ get proxy settings """ return self._proxies def set_tcl_file(self, fqpn, level='info'): """ set the log file destination and log level """ file_path = os.path.dirname(fqpn) if os.access(file_path, os.W_OK): if self.tcl.level > self.log_level[level]: self.tcl.setLevel(self.log_level[level]) if self._api_token is not None: fh = ApiLoggingHandler(fqpn, self) else: fh = FileHandler(fqpn) # fh.set_name('tc_log_file') # not supported in python 2.6 if level in self.log_level.keys(): fh.setLevel(self.log_level[level]) else: fh.setLevel(self.log_level['info']) fh.setFormatter(self.formatter) self.tcl.addHandler(fh) # def set_tcl_level(self, level): # """ """ # if level in self.log_level.keys(): # if self.tcl.level > self.log_level[level]: # self.tcl.setLevel(self.log_level[level]) # self.tcl.handlers[0].setLevel(self.log_level[level]) def set_tcl_console_level(self, level): """ set the console log level """ if level in self.log_level.keys(): if self.tcl.level > self.log_level[level]: self.tcl.setLevel(self.log_level[level]) ch = logging.StreamHandler() # ch.set_name('console') # not supported in python 2.6 ch.setLevel(self.log_level[level]) ch.setFormatter(self.formatter) self.tcl.addHandler(ch) def set_indicator_regex(self, type_enum, compiled_regex): """ overwrite default SDK regex """ self.tcl.debug('overwrite regex for {0!s}'.format(type_enum.name)) if not isinstance(type_enum, IndicatorType): raise AttributeError(ErrorCodes.e0150.value.format(type_enum)) if not isinstance(compiled_regex, list): compiled_regex = [compiled_regex] cr_list = [] for cr in compiled_regex: if isinstance(cr, self._retype): cr_list.append(cr) else: raise AttributeError(ErrorCodes.e0160.value.format(cr)) self._indicators_regex[type_enum.name] = cr_list @property def group_types(self): """Return all defined ThreatConnect Group types. Returns: (list): A list of ThreatConnect Group types. """ return [ 'Adversary', 'Campaign', 'Document', 'Email', # 'Event', 'Incident', # 'Intrusion Set', 'Signature', # 'Report', 'Threat', 'Task' ] # # Resources # def adversaries(self): """ return an adversary container object """ self._indicator_parser.init() return Adversaries(self) def bulk(self): """ return a bulk container object """ self._indicator_parser.init() return Bulk(self) def bulk_indicators(self, on_demand=False): """ return a bulk indicator container object """ self._indicator_parser.init() return BulkIndicators(self, on_demand) def campaigns(self): """ return an adversary container object """ self._indicator_parser.init() return Campaigns(self) def documents(self): """ return a document container object """ self._indicator_parser.init() return Documents(self) def emails(self): """ return an email container object """ self._indicator_parser.init() return Emails(self) def groups(self): """ return an group container object """ self._indicator_parser.init() return Groups(self) def incidents(self): """ return an incident container object """ self._indicator_parser.init() return Incidents(self) def indicators(self): """ return an indicator container object """ self._indicator_parser.init() return Indicators(self) def owners(self): """ return an owner container object """ self._indicator_parser.init() return Owners(self) def signatures(self): """ return a signature container object """ self._indicator_parser.init() return Signatures(self) def tasks(self): """ return a task container object """ self._indicator_parser.init() return Tasks(self) def threats(self): """ return a threat container object """ self._indicator_parser.init() return Threats(self) def victims(self): """ return a victim container object """ self._indicator_parser.init() return Victims(self) def batch_jobs(self): """ return a batch container object """ self._indicator_parser.init() return BatchJobs(self)
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} server_url = 'https://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = Session() self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \ '(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' logger.debug('User-Agent set to %s', self.session.headers['User-Agent']) self.session.headers['Referer'] = self.server_url logger.debug('Referer set to %s', self.session.headers['Referer']) def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video)] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive(subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
from requests import Session ses = Session() ses.trust_env = False r = ses.get('http://127.0.0.1:2600/salt') if r.status_code >= 400: raise ValueError(f'Get {r.status_code} {r.reason}')
class EDSM: _TIMEOUT = 10 FAKE = ['CQC', 'Training', 'Destination'] # Fake systems that shouldn't be sent to EDSM def __init__(self): self.result = {'img': None, 'url': None, 'done': True} self.syscache = set() # Cache URLs of systems with known coordinates self.session = Session() self.lastship = None # Description of last ship that we sent to EDSM # Can't be in class definition since can only call PhotoImage after window is created EDSM._IMG_KNOWN = tk.PhotoImage( data= 'R0lGODlhEAAQAMIEAFWjVVWkVWS/ZGfFZ////////////////yH5BAEKAAQALAAAAAAQABAAAAMvSLrc/lAFIUIkYOgNXt5g14Dk0AQlaC1CuglM6w7wgs7rMpvNV4q932VSuRiPjQQAOw==' ) # green circle EDSM._IMG_UNKNOWN = tk.PhotoImage( data= 'R0lGODlhEAAQAKEDAGVLJ+ddWO5fW////yH5BAEKAAMALAAAAAAQABAAAAItnI+pywYRQBtA2CtVvTwjDgrJFlreEJRXgKSqwB5keQ6vOKq1E+7IE5kIh4kCADs=' ) # red circle EDSM._IMG_NEW = tk.PhotoImage( data= 'R0lGODlhEAAQAMZwANKVHtWcIteiHuiqLPCuHOS1MN22ZeW7ROG6Zuu9MOy+K/i8Kf/DAuvCVf/FAP3BNf/JCf/KAPHHSv7ESObHdv/MBv/GRv/LGP/QBPXOPvjPQfjQSvbRSP/UGPLSae7Sfv/YNvLXgPbZhP7dU//iI//mAP/jH//kFv7fU//fV//ebv/iTf/iUv/kTf/iZ/vgiP/hc/vgjv/jbfriiPriiv7ka//if//jd//sJP/oT//tHv/mZv/sLf/rRP/oYv/rUv/paP/mhv/sS//oc//lkf/mif/sUf/uPv/qcv/uTv/uUv/vUP/qhP/xP//pm//ua//sf//ubf/wXv/thv/tif/slv/tjf/smf/yYP/ulf/2R//2Sv/xkP/2av/0gP/ylf/2df/0i//0j//0lP/5cP/7a//1p//5gf/7ev/3o//2sf/5mP/6kv/2vP/3y//+jP///////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAQABAAAAePgH+Cg4SFhoJKPIeHYT+LhVppUTiPg2hrUkKPXWdlb2xHJk9jXoNJQDk9TVtkYCUkOy4wNjdGfy1UXGJYOksnPiwgFwwYg0NubWpmX1ArHREOFYUyWVNIVkxXQSoQhyMoNVUpRU5EixkcMzQaGy8xhwsKHiEfBQkSIg+GBAcUCIIBBDSYYGiAAUMALFR6FAgAOw==' ) EDSM._IMG_ERROR = tk.PhotoImage( data= 'R0lGODlhEAAQAKEBAAAAAP///////////yH5BAEKAAIALAAAAAAQABAAAAIwlBWpeR0AIwwNPRmZuVNJinyWuClhBlZjpm5fqnIAHJPtOd3Hou9mL6NVgj2LplEAADs=' ) # BBC Mode 5 '?' # Call an EDSM endpoint with args (which should be quoted) def call(self, endpoint, args, check_msgnum=True): try: url = 'https://www.edsm.net/%s?commanderName=%s&apiKey=%s&fromSoftware=%s&fromSoftwareVersion=%s' % ( endpoint, urllib2.quote(config.get('edsm_cmdrname').encode('utf-8')), urllib2.quote(config.get('edsm_apikey')), urllib2.quote(applongname), urllib2.quote(appversion), ) + args r = self.session.get(url, timeout=EDSM._TIMEOUT) r.raise_for_status() reply = r.json() if not check_msgnum: return reply (msgnum, msg) = reply['msgnum'], reply['msg'] except: if __debug__: print_exc() raise Exception(_("Error: Can't connect to EDSM")) # Message numbers: 1xx = OK, 2xx = fatal error, 3xx = error (but not generated in practice), 4xx = ignorable errors if msgnum // 100 not in (1, 4): raise Exception(_('Error: EDSM {MSG}').format(MSG=msg)) else: return reply # Just set link without doing a lookup def link(self, system_name): self.cancel_lookup() if system_name in self.FAKE: self.result = { 'img': '', 'url': None, 'done': True, 'uncharted': False } else: self.result = { 'img': '', 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': True, 'uncharted': False } def lookup(self, system_name, known=0): self.cancel_lookup() if system_name in self.FAKE: self.result = { 'img': '', 'url': None, 'done': True, 'uncharted': False } elif known or system_name in self.syscache: self.result = { 'img': EDSM._IMG_KNOWN, 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': True, 'uncharted': False } else: self.result = { 'img': EDSM._IMG_ERROR, 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': True, 'uncharted': False } data = self.call('api-v1/system', '&sysname=%s&coords=1' % urllib2.quote(system_name), check_msgnum=False) if data == -1 or not data: # System not present - but don't create it on the assumption that the caller will self.result['img'] = EDSM._IMG_NEW self.result['uncharted'] = True elif data.get('coords'): self.result['img'] = EDSM._IMG_KNOWN self.syscache.add(system_name) else: self.result['img'] = EDSM._IMG_UNKNOWN self.result['uncharted'] = True # Asynchronous version of the above def start_lookup(self, system_name, known=0): self.cancel_lookup() if system_name in self.FAKE: self.result = { 'img': '', 'url': None, 'done': True, 'uncharted': False } elif known or system_name in self.syscache: self.result = { 'img': EDSM._IMG_KNOWN, 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': True, 'uncharted': False } else: self.result = { 'img': '', 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': False, 'uncharted': False } self.thread = threading.Thread(target=self.worker, name='EDSM worker', args=(system_name, self.result)) self.thread.daemon = True self.thread.start() def cancel_lookup(self): self.thread = None # orphan any existing thread self.result = { 'img': '', 'url': None, 'done': True } # orphan existing thread's results def worker(self, system_name, result): try: data = self.call('api-v1/system', '&sysname=%s&coords=1' % urllib2.quote(system_name), check_msgnum=False) if data == -1 or not data: # System not present - create it result['img'] = EDSM._IMG_NEW result['uncharted'] = True elif data.get('coords'): result['img'] = EDSM._IMG_KNOWN self.syscache.add(system_name) else: result['img'] = EDSM._IMG_UNKNOWN result['uncharted'] = True except: if __debug__: print_exc() result['img'] = EDSM._IMG_ERROR result['done'] = True # Send flight log and also do lookup def writelog(self, timestamp, system_name, coordinates, shipid=None): if system_name in self.FAKE: self.result = { 'img': '', 'url': None, 'done': True, 'uncharted': False } return self.result = { 'img': EDSM._IMG_ERROR, 'url': 'https://www.edsm.net/show-system?systemName=%s' % urllib2.quote(system_name), 'done': True, 'uncharted': False } args = '&systemName=%s&dateVisited=%s' % ( urllib2.quote(system_name), urllib2.quote( time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp))), ) if coordinates: args += '&x=%.3f&y=%.3f&z=%.3f' % coordinates if shipid: args += '&shipId=%d' % shipid reply = self.call('api-logs-v1/set-log', args) if reply.get('systemCreated'): self.result['img'] = EDSM._IMG_NEW else: self.result['img'] = EDSM._IMG_KNOWN self.syscache.add(system_name) def setranks(self, ranks): args = '' if ranks: for k, v in ranks.iteritems(): if v is not None: args += '&%s=%s' % (k, urllib2.quote('%d;%d' % v)) if args: self.call('api-commander-v1/set-ranks', args) def setcredits(self, credits): if credits: self.call('api-commander-v1/set-credits', '&balance=%d&loan=%d' % credits) def setshipid(self, shipid): if shipid is not None: self.call('api-commander-v1/set-ship-id', '&shipId=%d' % shipid) def updateship(self, shipid, shiptype, props=[]): if shipid is not None and shiptype: args = '&shipId=%d&type=%s' % (shipid, shiptype) for (slot, thing) in props: args += '&%s=%s' % (slot, urllib2.quote(unicode(thing))) self.call('api-commander-v1/update-ship', args) def sellship(self, shipid): if shipid is not None: self.call('api-commander-v1/sell-ship', '&shipId=%d' % shipid)
class PodnapisiProvider(Provider): """Podnapisi Provider.""" languages = ({Language('por', 'BR'), Language('srp', script='Latn')} | { Language.fromalpha2(l) for l in language_converters['alpha2'].codes }) server_url = 'https://www.podnapisi.net/subtitles/' subtitle_class = PodnapisiSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers['User-Agent'] = self.user_agent def terminate(self): self.session.close() def query(self, language, keyword, season=None, episode=None, year=None): # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': str(language), 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server r = self.session.get(self.server_url + 'search/old', params=params, timeout=10) r.raise_for_status() xml = etree.fromstring(r.content) # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue language = Language.fromietf( subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): release = re.sub(r'\.+$', '', release) # remove trailing dots release = ''.join( filter(lambda x: ord(x) < 128, release)) # remove non-ascii characters releases.append(release) title = subtitle_xml.find('title').text season = int(subtitle_xml.find('tvSeason').text) episode = int(subtitle_xml.find('tvEpisode').text) year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title, season=season, episode=episode, year=year) else: subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title, year=year) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int( xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): titles = [video.series] + video.alternative_series season = video.season episode = video.episode else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [ s for l in languages for s in self.query( l, title, season=season, episode=episode, year=video.year) ] if subtitles: return subtitles return [] def download_subtitle(self, subtitle): # download as a zip logger.info('Downloading subtitle %r', subtitle) r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10) r.raise_for_status() # open the zip with ZipFile(io.BytesIO(r.content)) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
class PlateResolver: def __init__(self, canton, stat_queue): if canton not in ['AG', 'LU', 'SH', 'ZG', 'ZH']: raise ValueError('unsupported canton') self.__canton = canton self.__session = Session() self.__session.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64, x64; Trident/7.0; rv:11.0) like Gecko'} self.__submit_page = None self.__result_page = None self.__stat_queue = stat_queue def __get_auth_token(self): return self.__session.cookies.get_dict().get('.AUTOINDEXAUTH') def __check_auth_token(self): if self.__get_auth_token() is None: raise RuntimeError('token expired') def __get_remaining_tries(self): tries_search = re.search(r'(\d+)/(\d+)$', self.__submit_page.find('span', id='LabelAnzahl').contents[0]) return int(tries_search.group(2)) - int(tries_search.group(1)) def __login(self): while self.__get_auth_token() is None: login_page = None solution = None is_first_solution = None while solution is None: response = self.__session.get('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton) login_page = BeautifulSoup(response.text, 'lxml') captcha_oracle = CaptchaOracle() start_time = int(time()) while solution is None and int(time()) - start_time < 60: response = self.__session.get( 'https://www.viacar.ch/eindex/' + login_page.find('img', id='SecBild').get('src'), headers={'Referer': 'https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton}, stream=True) if response.status_code == 200: recognized_text = Captcha(response.raw).solve() if captcha_oracle.add_possible_solution(recognized_text): (solution, is_first_solution) = captcha_oracle.guess_solution() sleep(3) response = self.__session.post('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton, data={ '__VIEWSTATE': login_page.find('input', id='__VIEWSTATE').get('value'), '__VIEWSTATEGENERATOR': login_page.find('input', id='__VIEWSTATEGENERATOR').get( 'value'), '__EVENTVALIDATION': login_page.find('input', id='__EVENTVALIDATION').get( 'value'), login_page.find('input', type='text').get('id'): solution }) if self.__get_auth_token() is not None: self.__stat_queue.put(1 if is_first_solution else 2) self.__submit_page = BeautifulSoup(response.text, 'lxml') else: self.__stat_queue.put(0) def __reset_remaining_tries(self): auth_token = self.__get_auth_token() self.__session.cookies.set('ViaInd' + self.__canton, 'Anzahl=0&Date=' + date.today().strftime('%d.%m.%Y') + '&de-CH=de-CH', domain='www.viacar.ch', path='/') self.__session.get('https://www.viacar.ch/eindex/Login.aspx?Kanton=' + self.__canton) self.__session.cookies.set('.AUTOINDEXAUTH', auth_token, domain='www.viacar.ch', path='/') sleep(3) def __request_submit_page(self): if self.__get_remaining_tries() <= 1: self.__reset_remaining_tries() response = self.__session.post('https://www.viacar.ch/eindex/Result.aspx', data={ '__VIEWSTATE': self.__result_page.find('input', id='__VIEWSTATE').get( 'value'), '__VIEWSTATEGENERATOR': self.__result_page.find('input', id='__VIEWSTATEGENERATOR').get( 'value'), '__EVENTVALIDATION': self.__result_page.find('input', id='__EVENTVALIDATION').get( 'value') }) self.__check_auth_token() self.__submit_page = BeautifulSoup(response.text, 'lxml') def __prepare_submit(self): self.__login() if self.__get_auth_token() is None else self.__request_submit_page() def __submit(self, plate): self.__session.post('https://www.viacar.ch/eindex/Search.aspx', data={ '__VIEWSTATE': self.__submit_page.find('input', id='__VIEWSTATE').get('value'), '__VIEWSTATEGENERATOR': self.__submit_page.find('input', id='__VIEWSTATEGENERATOR').get( 'value'), '__EVENTVALIDATION': self.__submit_page.find('input', id='__EVENTVALIDATION').get( 'value'), 'TextBoxKontrollschild': plate }) self.__check_auth_token() response = self.__session.get('https://www.viacar.ch/eindex/Result.aspx') self.__check_auth_token() self.__result_page = BeautifulSoup(response.text, 'lxml') def __parse_result_page(self): if self.__result_page.find(string=re.compile('key was not present in the dictionary')) is not None: self.__submit_page = self.__result_page return None owners = [] for owner in self.__result_page.find_all(bgcolor='whitesmoke'): owners.append(VehicleOwner(owner)) return owners def get_vehicle_owner(self, plate): if plate < 1 or plate > 999999: raise ValueError('plate must be in range [1,999999]') while True: try: self.__prepare_submit() owners = None while owners is None: self.__submit(plate) owners = self.__parse_result_page() return owners except RuntimeError: self.__session.cookies.clear()
class Client: def __init__(self, server, dataset=None, token=None, verify=True): """ Client constructor. The first ``Client`` you create will be stored as the default ``Client`` to be used with all ``neuprint-python`` functions if you don't explicitly specify one. Args: server: URL of neuprintHttp server token: neuPrint token. Either pass explitily as an argument or set as ``NEUPRINT_APPLICATION_CREDENTIALS`` environment variable. Your token can be retrieved by clicking on your account in the NeuPrint web interface. verify: If ``True`` (default), enforce signed credentials. dataset: The dataset to run all queries against, e.g. 'hemibrain'. If not provided, the server will use a default dataset for all queries. """ if not token: token = os.environ.get('NEUPRINT_APPLICATION_CREDENTIALS') if not token: raise RuntimeError("No token provided. Please provide one or set NEUPRINT_APPLICATION_CREDENTIALS") if ':' in token: try: token = ujson.loads(token)['token'] except Exception: raise RuntimeError("Did not understand token. Please provide the entire JSON document or (only) the complete token string") token = token.replace('"', '') if '://' not in server: server = 'https://' + server elif server.startswith('http://'): raise RuntimeError("Server must be https, not http") elif not server.startswith('https://'): protocol = server.split('://')[0] raise RuntimeError(f"Unknown protocol: {protocol}") # Remove trailing backslash while server.endswith('/'): server = server[:-1] self.server = server self.session = Session() self.session.headers.update({"Authorization": "Bearer " + token, "Content-type": "application/json"}) # If the connection fails, retry a couple times. retries = Retry(connect=2, backoff_factor=0.1) self.session.mount('https://', HTTPAdapter(max_retries=retries)) self.verify = verify if not verify: urllib3.disable_warnings(InsecureRequestWarning) all_datasets = [*self.fetch_datasets().keys()] if len(all_datasets) == 0: raise RuntimeError(f"The neuprint server {self.server} has no datasets!") if len(all_datasets) == 1 and not dataset: self.dataset = all_datasets[0] logger.info(f"Initializing neuprint.Client with dataset: {self.dataset}") elif dataset in all_datasets: self.dataset = dataset else: raise RuntimeError(f"Dataset '{dataset}' does not exist on" f" the neuprint server ({self.server}).\n" f"Available datasets: {all_datasets}") # Set this as the default client if there isn't one already global DEFAULT_NEUPRINT_CLIENT if DEFAULT_NEUPRINT_CLIENT is None: set_default_client(self) from .queries.general import fetch_meta from .queries.rois import _all_rois_from_meta # Pre-cache these metadata fields, # to avoid re-fetching them for many queries that need them. self.meta = fetch_meta(client=self) self.primary_rois = sorted(self.meta['primaryRois']) self.all_rois = _all_rois_from_meta(self.meta) def __repr__(self): s = f'Client("{self.server}", "{self.dataset}"' if not self.verify: s += ", verify=False" s += ")" return s @verbose_errors def _fetch(self, url, json=None, ispost=False): if ispost: r = self.session.post(url, json=json, verify=self.verify) else: assert json is None, "Can't provide a body via GET method" r = self.session.get(url, verify=self.verify) r.raise_for_status() return r def _fetch_raw(self, url, json=None, ispost=False): return self._fetch(url, json=json, ispost=ispost).content def _fetch_json(self, url, json=None, ispost=False): r = self._fetch(url, json=json, ispost=ispost) return ujson.loads(r.content) ## ## CUSTOM QUERIES ## ## Note: Transaction queries are not implemented here. See admin.py ## def fetch_custom(self, cypher, dataset="", format='pandas'): """ Query the neuprint server with a custom Cypher query. Args: cypher: A cypher query string dataset: *Deprecated. Please provide your dataset as a Client constructor argument.* Which neuprint dataset to query against. If None provided, the client's default dataset is used. format: Either ``'pandas'`` or ``'json'``. Whether to load the results into a ``pandas.DataFrame``, or return the server's raw JSON response as a Python ``dict``. Returns: Either json or DataFrame, depending on ``format``. """ url = f"{self.server}/api/custom/custom" return self._fetch_cypher(url, cypher, dataset, format) def _fetch_cypher(self, url, cypher, dataset, format='pandas'): """ Fetch cypher from an endpoint. Called by fetch_custom and by Transaction queries. """ assert format in ('json', 'pandas') if set("‘’“”").intersection(cypher): msg = ("Your cypher query contains 'smart quotes' (e.g. ‘foo’ or “foo”)," " which are not valid characters in cypher." " Please replace them with ordinary quotes (e.g. 'foo' or \"foo\").\n" "Your query was:\n" + cypher) raise RuntimeError(msg) dataset = dataset or self.dataset cypher = indent(dedent(cypher), ' ') logger.debug(f"Performing cypher query against dataset '{dataset}':\n{cypher}") result = self._fetch_json(url, json={"cypher": cypher, "dataset": dataset}, ispost=True) if format == 'json': return result df = pd.DataFrame(result['data'], columns=result['columns']) return df ## ## API-META ## def fetch_available(self): """ Fetch the list of REST API endpoints supported by the server. """ return self._fetch_json(f"{self.server}/api/available") def fetch_help(self): """ Fetch auto-generated REST API documentation, as YAML text. """ return self._fetch_raw(f"{self.server}/api/help/swagger.yaml").decode('utf-8') def fetch_server_info(self): """ Returns whether or not the server is public. """ return self._fetch_json(f"{self.server}/api/serverinfo")['IsPublic'] def fetch_version(self): """ Returns the version of the ``neuPrintHTTP`` server. """ return self._fetch_json(f"{self.server}/api/version")['Version'] @lru_cache(None) def fetch_neuron_keys(self): """ Returns all available :Neuron properties in the database. Cached. """ # Fetch available keys c = """ MATCH (n :`Neuron`) UNWIND KEYS(n) AS k RETURN DISTINCT k AS neuron_fields """ raw = self.fetch_custom(c, format='json') return [r[0] for r in raw['data']] ## ## DB-META ## def fetch_database(self): """ Fetch the address of the neo4j database that the neuprint server is using. """ return self._fetch_json(f"{self.server}/api/dbmeta/database") def fetch_datasets(self): """ Fetch basic information about the available datasets on the server. """ return self._fetch_json(f"{self.server}/api/dbmeta/datasets") def fetch_instances(self): """ Fetch secondary data instances avaiable through neupint http """ return self._fetch_json(f"{self.server}/api/dbmeta/instances") def fetch_db_version(self): """ Fetch the database version """ return self._fetch_json(f"{self.server}/api/dbmeta/version")['Version'] ## ## USER ## def fetch_profile(self): """ Fetch basic information about your user profile, including your access level. """ return self._fetch_json(f"{self.server}/profile") def fetch_token(self): """ Fetch your user authentication token. Note: This method just echoes the token back to you for debug purposes. To obtain your token for the first time, use the neuprint explorer web UI to login and obtain your token as explained elsewhere in this documentation. """ return self._fetch_json(f"{self.server}/token")['token'] ## ## Cached ## def fetch_daily_type(self, format='pandas'): """ Return information about today's cell type of the day. The server updates the completeness numbers each day. A different cell type is randomly picked and an exemplar is chosen from this type. Returns: If ``format='json'``, a dictionary is returned with keys ``['info', 'connectivity', 'skeleton']``. If ``format='pandas'``, three values are returned: ``(info, connectivity, skeleton)``, where ``connectivity`` and ``skeleton`` are DataFrames. """ assert format in ('json', 'pandas') url = f"{self.server}/api/cached/dailytype?dataset={self.dataset}" result = self._fetch_json(url, ispost=False) if format == 'json': return result conn_df = pd.DataFrame(result['connectivity']['data'], columns=result['connectivity']['columns']) skel_df = pd.DataFrame(result['skeleton']['data'], columns=result['skeleton']['columns']) return result['info'], conn_df, skel_df def fetch_roi_completeness(self, format='pandas'): """ Fetch the pre-computed traced "completeness" statistics for each primary ROI in the dataset. The completeness statistics indicate how many synapses belong to Traced neurons. Note: These results are not computed on-the-fly. They are computed periodically and cached. """ assert format in ('json', 'pandas') url = f"{self.server}/api/cached/roicompleteness?dataset={self.dataset}" result = self._fetch_json(url, ispost=False) if format == 'json': return result df = pd.DataFrame(result['data'], columns=result['columns']) return df def fetch_roi_connectivity(self, format='pandas'): """ Fetch the pre-computed connectivity statistics between primary ROIs in the dataset. Note: These results are not computed on-the-fly. They are computed periodically and cached. """ assert format in ('json', 'pandas') url = f"{self.server}/api/cached/roiconnectivity?dataset={self.dataset}" result = self._fetch_json(url, ispost=False) if format == 'json': return result # Example result: # { # "roi_names": [['ME(R)', "a'L(L)", 'aL(L)', ...]], # "weights": { # 'EPA(R)=>gL(L)': {'count': 7, 'weight': 1.253483174941712}, # 'EPA(R)=>gL(R)': {'count': 29, 'weight': 2.112117795621343}, # 'FB=>AB(L)': {'count': 62, 'weight': 230.11732347331355}, # 'FB=>AB(R)': {'count': 110, 'weight': 496.733276906109}, # ... # } # } weights = [(*k.split('=>'), v['count'], v['weight']) for k,v in result["weights"].items()] df = pd.DataFrame(weights, columns=['from_roi', 'to_roi', 'count', 'weight']) return df ## ## ROI MESHES ## def fetch_roi_mesh(self, roi, export_path=None): """ Fetch a mesh for the given ROI, in ``.obj`` format. Args: roi: Name of an ROI export_path: Optional. Writes the ``.obj`` file to the given path. Returns: bytes The contents of the fetched ``.obj`` mesh file. Note: ROI meshes are intended for visualization only. (They are not suitable for quantitative analysis.) """ url = f"{self.server}/api/roimeshes/mesh/{self.dataset}/{roi}" data = self._fetch_raw(url, ispost=False) if export_path: with open(export_path, 'wb') as f: f.write(data) return data ## ## SKELETONS ## def fetch_skeleton(self, body, heal=False, export_path=None, format='pandas', with_distances=False): """ Fetch the skeleton for a neuron or segment. Args: body (int): A neuron or segment ID heal (bool): If ``True`` and the skeleton is fragmented, 'heal' it by connecting its fragments into a single tree. The fragments are joined by selecting the minimum spanning tree after joining all fragments via their pairwise nearest neighbors. See :py:func:`.heal_skeleton()` for more details. If you want the healing procedure to refrain from connecting very distant fragments, set ``heal`` to a maximum allowed distance, e.g. ``heal=1000.0`` format (str): Either 'pandas', 'swc' (similar to CSV), or 'nx' (``networkx.DiGraph``). export_path (str): Optional. Writes the ``.swc`` file to disk. (SWC format is written, regardless of the returned ``format``.) with_distances: Only valid when format is ``pandas`` or ``nx``. If True, a 'distance' column (or edge attribute) will be added to the dataframe (or nx.Graph), indicating the distances from each node to its parent node. In DataFrame results, root nodes will be assigned a distance of ``np.inf``. Distances are computed AFTER healing is performed. Distances will not be present in any exported SWC file. Returns: Either a string (swc), a DataFrame (pandas), or ``networkx.DiGraph`` (nx). See also: - :py:func:`.heal_skeleton()` - :py:func:`.skeleton_df_to_nx()` - :py:func:`.skeleton_df_to_swc()` """ from .skeleton import skeleton_df_to_nx, heal_skeleton, skeleton_df_to_swc, skeleton_swc_to_df, calc_segment_distances try: body = int(body) except ValueError: raise RuntimeError(f"Please pass an integer body ID, not '{body}'") assert format in ('swc', 'pandas', 'nx'), f'Invalid format: {format}' assert not with_distances or format in ('pandas', 'nx'), \ f"The with_distances option can only be used with the 'pandas' or 'nx' output formats, not {format}" url = f"{self.server}/api/skeletons/skeleton/{self.dataset}/{body}?format=swc" swc = self._fetch_raw(url, ispost=False).decode('utf-8') if heal or format != 'swc': df = skeleton_swc_to_df(swc) if heal: df = heal_skeleton(df, heal) if export_path or format == 'swc': swc = skeleton_df_to_swc(df) if export_path: with open(export_path, 'w') as f: f.write(swc) if format == 'swc': return swc if format == 'pandas': if with_distances: df['distance'] = calc_segment_distances(df) return df if format == 'nx': return skeleton_df_to_nx(df, with_distances=with_distances) raise AssertionError('Should not get here.') ## ## RAW KEY-VALUE ## def fetch_raw_keyvalue(self, instance, key): """ Fetch a value from the ``neuprintHTTP`` server. The data address is given by both the instance name and key. (For admins and experts only.) """ url = f"{self.server}/api/raw/keyvalue/key/{instance}/{key}" return self._fetch_raw(url, ispost=False) def post_raw_keyvalue(self, instance, key, value): """ Post a value from the ``neuprintHTTP`` server. The data address is given by both the instance name and key. (For admins and experts only.) """ assert isinstance(value, bytes) url = f"{self.server}/api/raw/keyvalue/key/{instance}/{key}" r = self.session.post(url, data=value, verify=self.verify) r.raise_for_status()
class TVsubtitlesProvider(Provider): """TVsubtitles Provider.""" languages = {Language('por', 'BR')} | { Language(l) for l in [ 'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho' ] } video_types = (Episode, ) server_url = 'http://www.tvsubtitles.net/' subtitle_class = TVsubtitlesSubtitle def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers[ 'User-Agent'] = 'Subliminal/%s' % __short_version__ def terminate(self): self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if any. :rtype: int """ # make the search logger.info('Searching show id for %r', series) r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10) r.raise_for_status() # get the series out of the suggestions soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) show_id = None for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'): match = link_re.match(suggestion.text) if not match: logger.error('Failed to match %s', suggestion.text) continue if match.group('series').lower() == series.lower(): if year is not None and int(match.group('first_year')) != year: logger.debug('Year does not match') continue show_id = int(suggestion['href'][8:-5]) logger.debug('Found show id %d', show_id) break return show_id @region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME) def get_episode_ids(self, show_id, season): """Get episode ids from the show id and the season. :param int show_id: show id. :param int season: season of the episode. :return: episode ids per episode number. :rtype: dict """ # get the page of the season of the show logger.info('Getting the page of show id %d, season %d', show_id, season) r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10) soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over episode rows episode_ids = {} for row in soup.select('table#table5 tr'): # skip rows that do not have a link to the episode page if not row('a', href=episode_id_re): continue # extract data from the cells cells = row('td') episode = int(cells[0].text.split('x')[1]) episode_id = int(cells[1].a['href'][8:-5]) episode_ids[episode] = episode_id if episode_ids: logger.debug('Found episode ids %r', episode_ids) else: logger.warning('No episode ids found') return episode_ids def query(self, show_id, series, season, episode, year=None): # get the episode ids episode_ids = self.get_episode_ids(show_id, season) if episode not in episode_ids: logger.error('Episode %d not found', episode) return [] # get the episode page logger.info('Getting the page for episode %d', episode_ids[episode]) r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10) soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # loop over subtitles rows subtitles = [] for row in soup.select('.subtitlen'): # read the item language = Language.fromtvsubtitles(row.h5.img['src'][13:-4]) subtitle_id = int(row.parent['href'][10:-5]) page_link = self.server_url + 'subtitle-%d.html' % subtitle_id rip = row.find('p', title='rip').text.strip() or None release = row.find('h5').text.strip() or None subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip, release) logger.debug('Found subtitle %s', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): # lookup show_id titles = [video.series] + video.alternative_series show_id = None for title in titles: show_id = self.search_show_id(title, video.year) if show_id is not None: break # query for subtitles with the show_id if show_id is not None: subtitles = [ s for s in self.query(show_id, title, video.season, video.episode, video.year) if s.language in languages and s.episode == video.episode ] if subtitles: return subtitles else: logger.error('No show id found for %r (%r)', video.series, {'year': video.year}) return [] def download_subtitle(self, subtitle): # download as a zip logger.info('Downloading subtitle %r', subtitle) r = self.session.get(self.server_url + 'download-%d.html' % subtitle.subtitle_id, timeout=10) r.raise_for_status() # open the zip with ZipFile(io.BytesIO(r.content)) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))
def main(): sock = None udp_remote_address = None rxbuff = list() # UDP incoming buffer txbuff = list() # UDP outcoming buffer # Loop with authentication to TecoRoute service while True: try: user = '******' password = '******' plc = 'L2_0202' session = Session() rs = session.get('http://77.236.203.188:61682/INDEX.XML', headers={ 'User-Agent': 'tecoroute', 'x-aplic': 'AKRCON tecoroute', 's-tcm': 'NT_Key', 'n-user': tc_secret(user) }) print(rs.content) hash1 = sha1((rs.text[:8] + password).encode()).hexdigest().upper() rs = session.put('http://77.236.203.188:61682/IAM.TXT', data=hash1 + '\r\n', headers={'User-Agent': 'tecoroute'}) print(rs.content) rs = session.put('http://77.236.203.188:61682/PLC.TXT', data=tc_secret(plc) + '\r\n', headers={'User-Agent': 'tecoroute'}) print(rs.content) sock = socket.socket(type=socket.SOCK_DGRAM) sock.setblocking(False) sock.bind(('', 50000)) sleep(10) # Loop for send/receive data while True: # Receive UDP data receiving = True while receiving: try: data, udp_remote_address = sock.recvfrom( 65507) # Max UDP packet size except BlockingIOError: receiving = False else: rxbuff.append(data) # Send UDP data if udp_remote_address and txbuff: for data in txbuff: sock.sendto(data, udp_remote_address) del txbuff[:] rxdata = b'' for data in rxbuff: rxdata += tc_encode(data) #if not rxdata: # rxdata = b"\x01\x00\x01\x00\xf9\xf8\xe7\xe6\x81\x17\x16\xff" print('Sending', rxdata[:10]) rs = session.get( 'http://77.236.203.188:61682/DATA.BIN', data=rxdata, headers={ 'Cache-Control': 'no-cache', 'Content-Type': 'binary', 'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', 'u-tcm': 'U-TCM' }) txbuff.append(tc_decode(rs.content)) print('Received', txbuff[-1][:10]) sleep(1) except ConnectionError as e: if sock: sock.close() raise e sleep(10)
def get_remaining_meals(s: Session): page = bs(s.get(meal_url).content, 'html.parser') status = page.find_all('td', attrs={'data-title': 'Status:'}) any10 = re.search(r'.*=([0-9]+)', status[0].text).group(1) meal50 = re.search(r'.*= ([0-9]+)', status[1].text).group(1) return any10, meal50
class DNACHelper(object): """Utility class for interacting with Onboarding service on DNA-C. The only class level attribute stored is the Northbound REST API client. """ address = '' user = '' password = '' client = None _rest_base_api = 'api/v1' _auth_uri = 'api/system/v1/auth/login' def __init__(self, address='', user='', password=''): """Initializes NB REST API client for DNA-C. Performs initial auth login and token exchange. Specify DNA-C instance by providing pyATS device instance OR all of the following: address, user, password. Args: device (Device, optional): pyATS device object for DNA-C cluster address (str, optional): DNA-C address user (str, optional): DNA-C login user password (str, optional): DNA-C login password Raises: ConnectionError: failed to authenticate client """ self.address = address self.user = user self.password = password self._create_client() def _create_client(self): """Initializes REST client Raises: ConnectionError: failed to authenticate client """ self.client = Session() # Disable server authentication from client-side self.client.verify = False self._gen_token() def _gen_token(self): """Performs auth login, extracts JWT, and sets JWT in header""" # Set login authorization required for token generation self.client.auth = HTTPBasicAuth(self.user, self.password) # Attempt login self.client.headers.update({'Content-Type': 'application/json'}) resp = self.client.get('https://{}/{}'.format(self.address, self._auth_uri)) if (resp.status_code != 200) or ('set-cookie' not in resp.headers): logger.error('Failed to initialize client') logger.debug(resp) raise ConnectionRefusedError("HTTP Status %s" % resp.status_code) # Set session cookie to JWT retrieved from response header cookie = SimpleCookie() cookie.load(resp.headers['set-cookie']) client_cookies = {key: morsel.value for key, morsel in cookie.items()} self.client.cookies.update(client_cookies) def _call_api(self, request_type, path, params=None, data=None, files=None): """Calls REST API with provided information Args: request_type (str): GET, POST, PUT, DELETE, HEAD, and OPTIONS path (str): REST API path (e.g. onboarding/pnp-device) params (dict, optional): Dictionary to send in the query string data (dict, optional): Dictionary to send in the body of the Request Returns: requests.Response: requests response object None: upon call failure """ url = 'https://{}/{}/{}'.format(self.address, self._rest_base_api, path) if not hasattr(self.client, request_type.lower()): raise ValueError("request_type (%s) unsupported" % request_type) send_kwargs = dict(url=url, params=params, data=data, files=files) if files: if isinstance(send_kwargs["files"], dict): fd = send_kwargs["files"] if len(fd) == 1 and isinstance(list(fd.values())[0], tuple): ft = fd[list(fd.keys())[0]] fd[list(fd.keys())[0]] = ft[:1] + (open(ft[1], "rb"), ) + ft[2:] else: send_kwargs["files"] = { key: open(val, "rb") for key, val in fd.items() } if isinstance(send_kwargs["files"], str): send_kwargs["files"] = open(send_kwargs["files"], "rb") if self.client.headers.get("Content-Type") == "application/json": self.client.headers.pop("Content-Type") response = getattr(self.client, request_type.lower())(**send_kwargs) self.client.headers.update({"Content-Type": "application/json"}) if not response or response.status_code not in (200, 204): logger.error("API call failed") logger.debug(response) return None return response def get_device(self, serialnumber=None, state=None, limit=None): request_type = 'GET' path = 'onboarding/pnp-device' payload = {} if serialnumber: payload.update(serialNumber=serialnumber) if state: payload.update(state=state) if state: payload.update(limit=1000) response = self._call_api(request_type, path, params=payload) if not response or response.status_code not in (200, 204): return '' response_body = response.json() if not response_body: return '' return response_body def delete_device(self, device_id): """Deletes the specified device from DNA-C database API: onboarding/pnp-device Args: device_id (str): Device ID Returns: bool: True if deleted successfully, False otherwise """ request_type = 'DELETE' path = 'onboarding/pnp-device/{deviceId}'.format(deviceId=device_id) response = self._call_api(request_type, path) if not response or response.status_code not in (200, 204): return False return True def post_file(self, namespace, file): request_type = 'POST' path = 'file/' + namespace data = {} files = {"fileUpload": file} # self.client.headers['Content-Type']='multipart/form-data' response = self._call_api(request_type, path, files=files) if not response or response.status_code not in (200, 204): return '' response_body = response.json() if not response_body: return '' return response_body def get_files(self, namespace): request_type = 'GET' path = 'file/namespace/' + namespace response = self._call_api(request_type, path) if not response or response.status_code not in (200, 204): return "" return response.json()['response'] def post_workflow(self, workflow): request_type = 'POST' path = 'onboarding/pnp-workflow' response = self._call_api(request_type, path, data=json.dumps(workflow)) if not response or response.status_code not in (200, 204): return response return response.json() def post_project(self, project): request_type = 'POST' path = 'onboarding/pnp-project' response = self._call_api(request_type, path, data=json.dumps(project)) if not response or response.status_code not in (200, 204): return response return response.json() def post_device_claim(self, claim_data): request_type = 'POST' path = 'onboarding/pnp-device/claim' response = self._call_api(request_type, path, data=json.dumps(claim_data)) if not response or response.status_code not in (200, 204): return response return response.json() def delete_config(self, id): request_type = 'DELETE' path = 'file/{fileId}'.format(fileId=id) response = self._call_api(request_type, path)
def close(self): pass if __name__ == '__main__': #main() d = b"\x09\x08\x05\x04\x03\xcc\x63\x32\x01\x98\x97\x95\x28\x1c\x18\x03\xc5\xc0\xbc\xa3\x65\x60\x5c\x3f\x01\xfc\xf8\xd7\x99\x94\x90\x6b\x2d\x2b\x27\xf1\xb3\xae\xaa\x70\x32\x2d\x29\xeb\xad\xab\xa7\x68\x2a\x28\x24\xe2\xa4\x9f\x9b\x55\x17\x12\xbb\xa4" print(tc_decode(d + d)) exit() ses = Session() ses.mount('http+teco', TecoHttpAdapter()) #rs = ses.get('http+teco://77.236.203.188:61682/INDEX.XML', # headers={'User-Agent': 'tecoroute', 'x-aplic': 'AKRCON tecoroute', 's-tcm': 'NT_Key', # 'n-user': tc_secret('TRCtest')}) rs = ses.get('http+teco://www.example.com') print(rs) """string = b"\x01\x00\xfd\xfc\xfb\xec\x83\x7a\x71\x08\x07\x05\x98\x8c\x88\xa9\x82\x4f\x9d\x86" dec = tc_decode(string) for i in range(len(dec)): print(i, hex(dec[i])) exit()""" """# These two lines enable debugging at httplib level (requests->urllib3->http.client) # You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA. # The only thing missing will be the response.body which is not logged. try: import http.client as http_client except ImportError: # Python 2 import httplib as http_client http_client.HTTPConnection.debuglevel = 1
class LegendasTVProvider(Provider): """LegendasTV Provider. :param str username: username. :param str password: password. """ languages = { Language.fromlegendastv(l) for l in language_converters['legendastv'].codes } server_url = 'http://legendas.tv/' subtitle_class = LegendasTVSubtitle def __init__(self, username=None, password=None): # Provider needs UNRAR installed. If not available raise ConfigurationError try: rarfile.custom_check(rarfile.UNRAR_TOOL) except rarfile.RarExecError: raise ConfigurationError('UNRAR tool not available') if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.logged_in = False self.session = None def initialize(self): self.session = Session() self.session.headers[ 'User-Agent'] = 'Subliminal/%s' % __short_version__ # login if self.username and self.password: logger.info('Logging in') data = { '_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password } r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10) raise_for_status(r) soup = ParserBeautifulSoup(r.content, ['html.parser']) if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')): raise AuthenticationError(self.username) logger.debug('Logged in') self.logged_in = True def terminate(self): # logout if self.logged_in: logger.info('Logging out') r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10) raise_for_status(r) logger.debug('Logged out') self.logged_in = False self.session.close() @staticmethod def is_valid_title(title, title_id, sanitized_title, season, year): """Check if is a valid title.""" sanitized_result = sanitize(title['title']) if sanitized_result != sanitized_title: logger.debug("Mismatched title, discarding title %d (%s)", title_id, sanitized_result) return # episode type if season: # discard mismatches on type if title['type'] != 'episode': logger.debug( "Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result) return # discard mismatches on season if 'season' not in title or title['season'] != season: logger.debug('Mismatched season %s, discarding title %d (%s)', title.get('season'), title_id, sanitized_result) return # movie type else: # discard mismatches on type if title['type'] != 'movie': logger.debug( "Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result) return # discard mismatches on year if year is not None and 'year' in title and title['year'] != year: logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result) return return True @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value) def search_titles(self, title, season, title_year): """Search for titles matching the `title`. For episodes, each season has it own title :param str title: the title to search for. :param int season: season of the title :param int title_year: year of the title :return: found titles. :rtype: dict """ titles = {} sanitized_titles = [sanitize(title)] ignore_characters = {'\'', '.'} if any(c in title for c in ignore_characters): sanitized_titles.append( sanitize(title, ignore_characters=ignore_characters)) for sanitized_title in sanitized_titles: # make the query if season: logger.info('Searching episode title %r for season %r', sanitized_title, season) else: logger.info('Searching movie title %r', sanitized_title) r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10) raise_for_status(r) results = json.loads(r.text) # loop over results for result in results: source = result['_source'] # extract id title_id = int(source['id_filme']) # extract type title = {'type': type_map[source['tipo']]} # extract title, year and country name, year, country = title_re.match( source['dsc_nome']).groups() title['title'] = name # extract imdb_id if source['id_imdb'] != '0': if not source['id_imdb'].startswith('tt'): title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7) else: title['imdb_id'] = source['id_imdb'] # extract season if title['type'] == 'episode': if source['temporada'] and source['temporada'].isdigit(): title['season'] = int(source['temporada']) else: match = season_re.search(source['dsc_nome_br']) if match: title['season'] = int(match.group('season')) else: logger.debug( 'No season detected for title %d (%s)', title_id, name) # extract year if year: title['year'] = int(year) elif source['dsc_data_lancamento'] and source[ 'dsc_data_lancamento'].isdigit(): # year is based on season air date hence the adjustment title['year'] = int( source['dsc_data_lancamento']) - title.get( 'season', 1) + 1 # add title only if is valid # Check against title without ignored chars if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year): titles[title_id] = title logger.debug('Found %d titles', len(titles)) return titles @region.cache_on_arguments( expiration_time=timedelta(minutes=15).total_seconds()) def get_archives(self, title_id, language_code, title_type, season, episode): """Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`. :param int title_id: title id. :param int language_code: language code. :param str title_type: episode or movie :param int season: season :param int episode: episode :return: the archives. :rtype: list of :class:`LegendasTVArchive` """ archives = [] page = 0 while True: # get the archive page url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format( language=language_code, page=page, title=title_id) r = self.session.get(url) raise_for_status(r) # parse the results soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) for archive_soup in soup.select( 'div.list_element > article > div > div.f_left'): # create archive archive = LegendasTVArchive( archive_soup.a['href'].split('/')[2], archive_soup.a.text, 'pack' in archive_soup.parent['class'], 'destaque' in archive_soup.parent['class'], self.server_url + archive_soup.a['href'][1:]) # clean name of path separators and pack flags clean_name = archive.name.replace('/', '-') if archive.pack and clean_name.startswith('(p)'): clean_name = clean_name[3:] # guess from name guess = guessit(clean_name, {'type': title_type}) # episode if season and episode: # discard mismatches on episode in non-pack archives # Guessit may return int for single episode or list for multi-episode # Check if archive name has multiple episodes releases on it if not archive.pack and 'episode' in guess: wanted_episode = set(episode) if isinstance( episode, list) else {episode} archive_episode = guess['episode'] if isinstance( guess['episode'], list) else {guess['episode']} if not wanted_episode.intersection(archive_episode): logger.debug( 'Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name) continue # extract text containing downloads, rating and timestamp data_text = archive_soup.find('p', class_='data').text # match downloads archive.downloads = int( downloads_re.search(data_text).group('downloads')) # match rating match = rating_re.search(data_text) if match: archive.rating = int(match.group('rating')) # match timestamp and validate it time_data = { k: int(v) for k, v in timestamp_re.search( data_text).groupdict().items() } archive.timestamp = pytz.timezone( 'America/Sao_Paulo').localize(datetime(**time_data)) if archive.timestamp > datetime.utcnow().replace( tzinfo=pytz.utc): raise ProviderError('Archive timestamp is in the future') # add archive logger.info( 'Found archive for title %d and language %d at page %s: %s', title_id, language_code, page, archive) archives.append(archive) # stop on last page if soup.find('a', attrs={'class': 'load_more'}, string='carregar mais') is None: break # increment page count page += 1 logger.debug('Found %d archives', len(archives)) return archives def download_archive(self, archive): """Download an archive's :attr:`~LegendasTVArchive.content`. :param archive: the archive to download :attr:`~LegendasTVArchive.content` of. :type archive: :class:`LegendasTVArchive` """ logger.info('Downloading archive %s', archive.id) r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id)) raise_for_status(r) # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Identified rar archive') archive.content = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Identified zip archive') archive.content = ZipFile(archive_stream) else: raise ValueError('Not a valid archive') def query(self, language, title, season=None, episode=None, year=None): # search for titles titles = self.search_titles(title, season, year) subtitles = [] # iterate over titles for title_id, t in titles.items(): logger.info('Getting archives for title %d and language %d', title_id, language.legendastv) archives = self.get_archives(title_id, language.legendastv, t['type'], season, episode) if not archives: logger.info('No archives found for title %d and language %d', title_id, language.legendastv) # iterate over title's archives for a in archives: # compute an expiration time based on the archive timestamp expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds() # attempt to get the releases from the cache cache_key = releases_key.format(archive_id=a.id, archive_name=a.name) releases = region.get(cache_key, expiration_time=expiration_time) # the releases are not in cache or cache is expired if releases == NO_VALUE: logger.info('Releases not found in cache') # download archive self.download_archive(a) # extract the releases releases = [] for name in a.content.namelist(): # discard the legendastv file if name.startswith('Legendas.tv'): continue # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue releases.append(name) # cache the releases region.set(cache_key, releases) # iterate over releases for r in releases: subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'), t.get('season'), a, r) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): titles = [video.series] + video.alternative_series season = video.season episode = video.episode else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [ s for l in languages for s in self.query( l, title, season=season, episode=episode, year=video.year) ] if subtitles: return subtitles return [] def download_subtitle(self, subtitle): # download archive in case we previously hit the releases cache and didn't download it if subtitle.archive.content is None: self.download_archive(subtitle.archive) # extract subtitle's content subtitle.content = fix_line_ending( subtitle.archive.content.read(subtitle.name))
class Kaufland(Shop): search_url_prefix = 'https://shop.kaufland.de/search?pageSize=48&sort=relevance&text=' def __init__(self, email, password, captcha_service, cookie_file="kl_cookies"): self.logger = Logger('Kaufland') self.captcha_service = captcha_service self.base_url = 'https://shop.kaufland.de' self.login_url = "https://shop.kaufland.de/login" self.account_url = "https://shop.kaufland.de/my-account" self.take_url = 'https://shop.kaufland.de/cart/modify' self.basket_url = 'https://shop.kaufland.de/cart' self.driver = webdriver.PhantomJS(executable_path='/usr/local/bin/phantomjs') # self.driver = webdriver.Chrome('./chromedriver') self.driver.set_window_size(1280, 1024) Shop.__init__(self, email, password, cookie_file) @staticmethod def search_url(name): return Kaufland.search_url_prefix + quote(name.encode('utf-8')) def login(self): self.logger.info("Logging in...") self.session = Session() self.driver.get(self.account_url) time.sleep(2) x = self.driver.find_element_by_id('kLoginForm') x.find_element_by_id('j_username').send_keys(self.email) x.find_element_by_id('j_password').send_keys(self.password) x.find_element_by_tag_name('button').click() time.sleep(3) self.new_session_with_cookies(self.driver.get_cookies()) self.save_session() def is_logged_in(self, html=None): if not html: html = self.session.get(self.account_url).text return html.find('Abmelden') > 0 def save_session(self): with open(self.cookie_file, 'w') as f: pickle.dump(self.session.cookies, f) def load_session(self): try: with open(self.cookie_file) as f: cookies = pickle.load(f) self.session = Session() self.session.cookies = cookies return self.is_logged_in() except IOError: return False def get(self, url): html = self.session.get(url).text if self.is_logged_in(html): self.save_session() return html self.login() html = self.session.get(url).text if self.is_logged_in(html): self.save_session() return html self.logger.error("Can not log in") exit(1) def cart(self): blob = BeautifulSoup(self.get(self.basket_url), "html.parser") r = blob.select('section.product-list') if len(r) == 0: return [] r = r[0] ids = [] for i in r.findAll('article'): a = i.find('a') link = urllib.parse.urljoin(self.base_url, a['href']) title = i.find('p', {'class': 'product-list__title'}).text.strip() amount = i.find('div', {'class': 'product-list__amount'}) article_id = amount['data-dynamicblock'] amount = int(amount.find('input', {'name': 'quantity'}).get('value')) price = i.find('div', {'data-dynamiccontent': 'prices'}) red = price.find('span', {'class': 'product-list__reduced-price'}) if red: price = red price = price.text.replace('€', '').strip() price = int(float(price) * 100) title = unicodedata.normalize('NFKC', title) item = ShopItem(article_id, amount, title, price, link) ids.append(item) return ids def search(self, term, sub_term=None): html = self.get(Kaufland.search_url(term)) ids = self.parse_search(html) split_terms = [x for x in re.split('-| |\n', term) if len(x) > 1] if 0 < len(ids) < 48: return self.order_by_matches(split_terms, ids) if sub_term and len(ids) == 0: return self.search(term + " " + sub_term) if len(split_terms) > 1: ids = [] for criteria in split_terms: if len(criteria) > 1: ids += self.search(criteria) return self.order_by_matches(split_terms, ids, max=20, perfect=0.6, cut_off=0.25) def parse_search(self, html): blob = BeautifulSoup(html, "html.parser") ids = [] r = blob.select('div.productmatrix') if len(r) > 0: r = r[0] for i in r.findAll('article'): a = i.find('a') article_id = i['data-dynamicblock'].split('_')[0] link = urllib.parse.urljoin(self.base_url, a['href']) title = a.find('p', {'class': 'product-tile__infos--title'}).text.strip() price = a.find('div', {'class': 'product-tile__price--regular'}) if not price: price = a.find('div', {'class': 'product-tile__price--reduced'}) price = price.text.replace('€', '').strip() price = int(float(price) * 100) title = unicodedata.normalize('NFKC', title) item = ShopItem(article_id, 1, title, price, link) ids.append(item) return ids def order_by_matches(self, terms, ids, max=None, perfect=None, cut_off=None): if len(ids) == 0: return [] normal_fit = {} perfect_fit = {} normal_ids = [] perfect_ids = [] for item in ids: if item in normal_ids or item in perfect_ids: continue match = len([x for x in terms if x.lower() in item.name.lower()]) if not cut_off or match > len(terms) * cut_off: normal_ids.append(item) normal_fit[item] = match if perfect and match > len(terms) * perfect: perfect_ids.append(item) perfect_fit[item] = match if len(perfect_fit) > 0: normal_ids = perfect_ids normal_fit = perfect_fit ordered = sorted(normal_ids, key=normal_fit.__getitem__, reverse=True) if max: ordered = ordered[:max] return ordered def take(self, item): html = self.get(Kaufland.search_url(item.name)) blob = BeautifulSoup(html, "html.parser") token = blob.find('input', {'name': 'CSRFToken'}).get('value') self.session.post(self.take_url, data=[ ('qty', item.amount), ('productCodePost', item.article_id), ('pageTemplate', 'producttile'), ('CSRFToken', token), ]) self.save_session() def shelf_life(self, item_link): pass
class Provider(BaseProvider): """ he.net provider """ def __init__(self, config): super(Provider, self).__init__(config) self.domain = self.domain self.domain_id = None def authenticate(self): """ """ # Create the session GET the login page to retrieve a session cookie self.session = Session() self.session.get("https://dns.he.net/") # Hit the login page with authentication info to login the session login_response = self.session.post( "https://dns.he.net", data={ "email": self._get_provider_option('auth_username') or '', "pass": self._get_provider_option('auth_password') or '' }) # Parse in the HTML, if the div containing the error message is found, error html = BeautifulSoup(login_response.content, "html.parser") if html.find("div", {"id": "dns_err"}) is not None: logger.warning("HE login failed, check HE_USER and HE_PASS") return False # Make an authenticated GET to the DNS management page zones_response = self.session.get("https://dns.he.net") html = BeautifulSoup(zones_response.content, "html.parser") zone_img = html.find("img", {"name": self.domain, "alt": "delete"}) # If the tag couldn't be found, error, otherwise, return the value of the tag if zone_img is None: logger.warning("Domain {0} not found in account".format( self.domain)) raise AssertionError("Domain {0} not found in account".format( self.domain)) self.domain_id = zone_img["value"] logger.debug("HENET domain ID: {}".format(self.domain_id)) return True # Create record. If record already exists with the same content, do nothing def create_record(self, type, name, content): logger.debug("Creating record for zone {0}".format(name)) # Pull a list of records and check for ours records = self.list_records(type=type, name=name, content=content) if len(records) >= 1: logger.warning("Duplicate record {} {} {}, NOOP".format( type, name, content)) return True data = { "account": "", "menu": "edit_zone", "Type": type, "hosted_dns_zoneid": self.domain_id, "hosted_dns_recordid": "", "hosted_dns_editzone": "1", "Priority": "", "Name": name, "Content": content, "TTL": "3600", "hosted_dns_editrecord": "Submit" } ttl = self._get_lexicon_option('ttl') if ttl: if ttl <= 0: data['TTL'] = "3600" else: data['TTL'] = str(ttl) prio = self._get_lexicon_option('priority') if prio: if prio <= 0: data['Priority'] = "10" else: data['Priority'] = str(prio) create_response = self.session.post("https://dns.he.net/index.cgi", data=data) # Pull a list of records and check for ours records = self.list_records(name=name) if len(records) >= 1: logger.info("Successfully added record {}".format(name)) return True else: logger.info("Failed to add record {}".format(name)) return False # List all records. Return an empty list if no records found. # type, name and content are used to filter records. # If possible filter during the query, otherwise filter after response is # received. def list_records(self, type=None, name=None, content=None, id=None): records = [] # Make an authenticated GET to the DNS management page edit_response = self.session.get( "https://dns.he.net/?hosted_dns_zoneid={0}&menu=edit_zone&hosted_dns_editzone" .format(self.domain_id)) # Parse the HTML response, and list the table rows for DNS records html = BeautifulSoup(edit_response.content, "html.parser") def is_dns_tr_type(klass): return klass and re.compile("dns_tr").search(klass) records = html.findAll("tr", class_=is_dns_tr_type) # If the tag couldn't be found, error, otherwise, return the value of the tag if records is None or len(records) == 0: logger.warning("Domains not found in account") else: new_records = [] for dns_tr in records: tds = dns_tr.findAll("td") # Process HTML in the TR children to derive each object rec = {} rec['zone_id'] = tds[0].string rec['id'] = tds[1].string rec['name'] = tds[2].string # the 4th entry is a comment type_elem = tds[3].find("span", class_='rrlabel') if type_elem: rec['type'] = type_elem.string else: rec['type'] = None rec['ttl'] = tds[4].string if tds[5].string != '-': rec['priority'] = tds[5] rec['content'] = tds[6].string if tds[7].string == '1': rec['is_dynamic'] = True else: rec['is_dynamic'] = False rec = self._clean_TXT_record(rec) new_records.append(rec) records = new_records if id: logger.debug("Filtering {} records by id: {}".format( len(records), id)) records = [record for record in records if record['id'] == id] if type: logger.debug("Filtering {} records by type: {}".format( len(records), type)) records = [ record for record in records if record['type'] == type ] if name: logger.debug("Filtering {} records by name: {}".format( len(records), name)) if name.endswith('.'): name = name[:-1] records = [ record for record in records if name in record['name'] ] if content: logger.debug("Filtering {} records by content: {}".format( len(records), content.lower())) records = [ record for record in records if record['content'].lower() == content.lower() ] logger.debug("Final records ({}): {}".format( len(records), records)) return records # Create or update a record. def update_record(self, identifier, type=None, name=None, content=None): # Delete record if it exists self.delete_record(identifier, type, name, content) return self.create_record(type, name, content) # Delete an existing record. # If record does not exist, do nothing. def delete_record(self, identifier=None, type=None, name=None, content=None): delete_record_ids = [] if not identifier: records = self.list_records(type, name, content) delete_record_ids = [record['id'] for record in records] else: delete_record_ids.append(identifier) logger.debug("Record IDs to delete: {}".format(delete_record_ids)) for rec_id in delete_record_ids: # POST to the DNS management UI with form values to delete the record delete_response = self.session.post("https://dns.he.net/index.cgi", data={ "menu": "edit_zone", "hosted_dns_zoneid": self.domain_id, "hosted_dns_recordid": rec_id, "hosted_dns_editzone": "1", "hosted_dns_delrecord": "1", "hosted_dns_delconfirm": "delete" }) # Parse the HTML response, if the <div> tag indicating success isn't found, error html = BeautifulSoup(delete_response.content, "html.parser") if html.find("div", {"id": "dns_status"}) is None: logger.warning("Unable to delete record {}".format(rec_id)) return False return True
def get_subway_realtime_position(self, subway_name: str, start_index: int = 0, end_index: int = 1000) -> List[Train]: """Get realtime train position for subway line. Specification: http://data.seoul.go.kr/dataList/OA-12601/A/1/datasetView.do """ if self.api_key == SAMPLE_API_KEY: start_index = 0 end_index = 5 s = Session() retries = Retry(status_forcelist=[503]) s.mount(SUBWAY_BASE_URL, HTTPAdapter(max_retries=retries)) url = SUBWAY_BASE_URL + SUBWAY_REALTIME_POSITION_URL.format( api_key=self.api_key, format='json', subway_name=subway_name, start_index=start_index, end_index=end_index, ) r = s.get(url) d = r.json() if 'realtimePositionList' not in d: """ code: 'INFO-200' message: '해당하는 데이터가 없습니다.' Returned when the subway line has ended operations for the day. """ if d['status'] == 500 and d['code'] == 'INFO-200': return [] raise Exception(f"{d['code']}: {d['message']}") else: trains = [] for t in d['realtimePositionList']: data = { 'subway_id': t['subwayId'], 'subway_name': t['subwayNm'], 'station_id': t['statnId'], 'station_name': t['statnNm'], 'terminal_station_id': t['statnTid'], 'terminal_station_name': t['statnTnm'], 'number': t['trainNo'], 'status': t['trainSttus'], 'direction': Direction(int(t['updnLine'])), 'updated_at': datetime.strptime(t['recptnDt'], '%Y-%m-%d %H:%M:%S'), 'express': t['directAt'] == '1', 'last': t['lstcarAt'] == '1', } trains.append(Train(**data)) return trains