def new_instance( host, index, type=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None): try: known = known_hosts.get((host, port)) if known: return known(kwargs=kwargs) url = URL(host) url.port = port status = http.get_json(url, stream=False) version = status.version.number if version.startswith(("5.", "6.")): from jx_elasticsearch.es52 import ES52 type2container.setdefault("elasticsearch", ES52) known_hosts[(host, port)] = ES52 output = ES52(kwargs=kwargs) return output else: Log.error("No jx interpreter for Elasticsearch {{version}}", version=version) except Exception as e: Log.error("Can not make an interpreter for Elasticsearch", cause=e)
def get(url): """ USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON """ url = text_type(url) if url.find("://") == -1: Log.error("{{url}} must have a prototcol (eg http://) declared", url=url) base = URL("") if url.startswith("file://") and url[7] != "/": if os.sep == "\\": base = URL("file:///" + os.getcwd().replace(os.sep, "/").rstrip("/") + "/.") else: base = URL("file://" + os.getcwd().rstrip("/") + "/.") elif url[url.find("://") + 3] != "/": Log.error("{{url}} must be absolute", url=url) phase1 = _replace_ref(wrap({"$ref": url}), base) # BLANK URL ONLY WORKS IF url IS ABSOLUTE try: phase2 = _replace_locals(phase1, [phase1]) return wrap(phase2) except Exception as e: Log.error("problem replacing locals in\n{{phase1}}", phase1=phase1, cause=e)
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if isinstance(node, Mapping): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = mo_dots.get_attr(new_value, ref.fragment) DEBUG and Log.note( "Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif isinstance(output, text_type): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif isinstance(node, list): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if isinstance(node, Mapping): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = mo_dots.get_attr(new_value, ref.fragment) DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif isinstance(output, text_type): Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value) else: output = unwrap(set_default(output, new_value)) DEBUG and Log.note("Return {{output}}", output=output) return output elif isinstance(node, list): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def setUpClass(self): while True: try: es = test_jx.global_settings.backend_es http.get_json(URL(es.host, port=es.port)) break except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") else: Log.error("Server raised exception", e) # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): create_time = Date( a.index[-15:], "%Y%m%d_%H%M%S" ) # EXAMPLE testing_0ef53e45b320160118_180420 if create_time < Date.now() - 10 * MINUTE: cluster.delete_index(a.index) except Exception as e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None): self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD) self.rate = coalesce(rate, HG_REQUEST_PER_SECOND) self.cache_locker = Lock() self.cache = {} # MAP FROM url TO (ready, headers, response, timestamp) PAIR self.no_cache = {} # VERY SHORT TERM CACHE self.workers = [] self.todo = Queue(APP_NAME+" todo") self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds)) self.url = URL(source.url) self.db = Sqlite(database) self.inbound_rate = RateLogger("Inbound") self.outbound_rate = RateLogger("hg.mo") if not self.db.query("SELECT name FROM sqlite_master WHERE type='table'").data: with self.db.transaction() as t: t.execute( "CREATE TABLE cache (" " path TEXT PRIMARY KEY, " " headers TEXT, " " response TEXT, " " timestamp REAL " ")" ) self.threads = [ Thread.run(APP_NAME+" worker" + text_type(i), self._worker) for i in range(CONCURRENCY) ] self.limiter = Thread.run(APP_NAME+" limiter", self._rate_limiter) self.cleaner = Thread.run(APP_NAME+" cleaner", self._cache_cleaner)
def expand(doc, doc_url="param://", params=None): """ ASSUMING YOU ALREADY PULED THE doc FROM doc_url, YOU CAN STILL USE THE EXPANDING FEATURE USE mo_json_config.expand({}) TO ASSUME CURRENT WORKING DIRECTORY :param doc: THE DATA STRUCTURE FROM JSON SOURCE :param doc_url: THE URL THIS doc CAME FROM (DEFAULT USES params AS A DOCUMENT SOURCE) :param params: EXTRA PARAMETERS NOT FOUND IN THE doc_url PARAMETERS (WILL SUPERSEDE PARAMETERS FROM doc_url) :return: EXPANDED JSON-SERIALIZABLE STRUCTURE """ if doc_url.find("://") == -1: Log.error("{{url}} must have a prototcol (eg http://) declared", url=doc_url) url = URL(doc_url) url.query = set_default(url.query, params) phase1 = _replace_ref(doc, url) # BLANK URL ONLY WORKS IF url IS ABSOLUTE phase2 = _replace_locals(phase1, [phase1]) return wrap(phase2)
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) try: c = response.all_content path = URL(url).path if path.endswith(".zip"): buff = StringIO(c) archive = zipfile.ZipFile(buff, mode='r') c = archive.read(archive.namelist()[0]) elif path.endswith(".gz"): c = zip2bytes(c) return json2value(c.decode('utf8')) except Exception as e: if mo_math.round(response.status_code, decimal=-2) in [400, 500]: Log.error(u"Bad GET response: {{code}}", code=response.status_code) else: Log.error(u"Good GET requests, but bad JSON", cause=e)
def new_instance( host, index, type=None, name=None, port=9200, read_only=True, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) wait_for_active_shards=1, # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) typed=None, kwargs=None ): try: known = known_hosts.get((host, port)) if known: return known(kwargs=kwargs) url = URL(host) url.port = port status = http.get_json(url, stream=False) version = status.version.number if version.startswith("1."): from jx_elasticsearch.es14 import ES14 type2container.setdefault("elasticsearch", ES14) known_hosts[(host, port)] = ES14 output = ES14(kwargs=kwargs) return output elif version.startswith(("5.", "6.")): from jx_elasticsearch.es52 import ES52 type2container.setdefault("elasticsearch", ES52) known_hosts[(host, port)] = ES52 output = ES52(kwargs=kwargs) return output else: Log.error("No jx interpreter for Elasticsearch {{version}}", version=version) except Exception as e: Log.error("Can not make an interpreter for Elasticsearch", cause=e)
def _run_sql_query(self, sql): if not self.utils.testing.sql: Log.error( "This test requires a `testing.sql` parameter in the config file" ) test = Data(data=simple_test_data) self.utils.fill_container(test) sql = sql.replace(TEST_TABLE, test.query['from']) url = URL(self.utils.testing.sql) response = self.utils.post_till_response(str(url), json={ "meta": { "testing": True }, "sql": sql }) self.assertEqual(response.status_code, 200) return json2value(response.content.decode('utf8'))
def setUpClass(self): while True: try: es = test_jx.global_settings.backend_es http.get_json(URL(es.host, port=es.port)) break except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") Till(seconds=WAIT_AFTER_PROBLEM).wait() else: Log.error("Server raised exception", e) # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): cluster.delete_index(a.index) except Exception as e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if is_data(node): refs = None output = {} for k, v in node.items(): if k == "$ref": refs = URL(v) else: output[k] = _replace_ref(v, url) if not refs: return output ref_found = False ref_error = None ref_remain = [] for ref in listwrap(refs): if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET ref_remain.append(ref) ref_found = True continue if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme not in scheme_loaders: raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) try: new_value = scheme_loaders[ref.scheme](ref, url) ref_found = True except Exception as e: ref_error = e continue if ref.fragment: new_value = get_attr(new_value, ref.fragment) DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value elif is_text(output): pass # WE HAVE A VALUE else: set_default(output, new_value) if not ref_found: raise ref_error if ref_remain: output["$ref"] = unwraplist(ref_remain) DEBUG and Log.note("Return {{output}}", output=output) return output elif is_list(node): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def request(method, url, headers=None, data=None, json=None, zip=None, retry=None, timeout=None, session=None, kwargs=None): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None :param method: GET, POST, etc :param url: URL :param headers: dict OF HTTP REQUEST HEADERS :param data: BYTES (OR GENERATOR OF BYTES) :param json: JSON-SERIALIZABLE STRUCTURE :param zip: ZIP THE REQUEST BODY, IF BIG ENOUGH :param retry: {"times": x, "sleep": y} STRUCTURE :param timeout: SECONDS TO WAIT FOR RESPONSE :param session: Session OBJECT, IF YOU HAVE ONE :param kwargs: ALL PARAMETERS (DO NOT USE) :return: """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning( text( "The mo_http.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `mo_http.http.default_headers`")) _warning_sent = True if is_list(url): # TRY MANY URLS failures = [] for remaining, u in countdown(url): try: response = request(url=u, kwargs=kwargs) if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if session: close_after_response = Null else: close_after_response = session = sessions.Session() with closing(close_after_response): if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, DEFAULTS) # HEADERS headers = unwrap( set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) # RETRY retry = wrap(retry) if retry == None: retry = set_default({}, DEFAULTS['retry']) elif isinstance(retry, Number): retry = set_default({"times": retry}, DEFAULTS['retry']) elif isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds # JSON if json != None: data = value2json(json).encode('utf8') # ZIP zip = coalesce(zip, DEFAULTS['zip']) set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if zip: if is_sequence(data): compressed = ibytes2icompressed(data) headers['content-encoding'] = 'gzip' data = compressed elif len(coalesce(data)) > 1000: compressed = bytes2zip(data) headers['content-encoding'] = 'gzip' data = compressed except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: request_count += 1 with Timer("http {{method|upper}} to {{url}}", param={ "method": method, "url": text(url) }, verbose=DEBUG): return _session_request(session, url=str(url), headers=headers, data=data, json=None, kwargs=kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith( "https://" ) and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note( "Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error( u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def request(method, url, headers=None, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning( text( "The pyLibrary.env.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `pyLibrary.env.http.default_headers`")) _warning_sent = True if is_list(url): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, retry=retry, **kwargs) if mo_math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() with closing(sess): if PY2 and is_text(url): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, {"zip": zip, "retry": retry}, DEFAULTS) _to_ascii_dict(kwargs) # HEADERS headers = kwargs['headers'] = unwrap( set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) del kwargs['headers'] # RETRY retry = wrap(kwargs['retry']) if isinstance(retry, Number): retry = set_default({"times": retry}, DEFAULTS['retry']) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds del kwargs['retry'] # JSON if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] # ZIP set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed del kwargs['zip'] except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text(url)) request_count += 1 return session.request(method=method, headers=headers, url=str(url), **kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith( "https://" ) and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note( "Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error( u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
# License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals from collections import namedtuple from jx_python import jx from mo_files.url import URL from mo_hg.apply import Line, SourceFile from mo_logs import Log from pyLibrary.sql import quote_set, sql_list from pyLibrary.sql.sqlite import quote_value HG_URL = URL('https://hg.mozilla.org/') class TuidLine(Line, object): def __init__(self, tuidmap, **kwargs): super(TuidLine, self).__init__(tuidmap.line, **kwargs) self.tuid = tuidmap.tuid def __str__(self): return "TuidLine{tuid=" + str(self.tuid) + ": line=" + str( self.line) + "}" class AnnotateFile(SourceFile, object): def __init__(self, filename, lines, tuid_service=None): super(AnnotateFile, self).__init__(filename, lines)