def initialCommands(self): rt = Runtime() cx = rt.new_context() if self.value_node != None: cx.add_global(self.value_node.name, self.value_node.value) action_nodes = self.false_nodes if (cx.execute(self.condition)): action_nodes = self.true_nodes last_node = None first_node = None for action_node in action_nodes: if (first_node is None): first_node = action_node if last_node is not None and action_node is not None: last_node.next_node = action_node last_node = action_node if last_node is not None and self.next_node is not None: last_node.next_node = self.next_node self.next_node = first_node return [NextCommand()]
def process(self, url, value): s = requests.Session() req = s.get(url) self.display_message("Server answered: %s status code" % req.status_code) pattern = r'S=\'([a-zA-Z0-9\=]+)\'' cookie_sucuri = base64.b64decode(re.findall(pattern, req.content)[0]) cookie_sucuri = cookie_sucuri.replace('document.cookie', 'res') cookie_sucuri = cookie_sucuri.replace('location.reload();', '') # executing the javascript rt = Runtime() cx = rt.new_context() result = cx.execute(cookie_sucuri) self.display_message("Sucuri cookie: %s" % result) cookie_sucuri = result.split('=') cookies = {cookie_sucuri[0]: cookie_sucuri[1]} data = {'domainName': value, 'domainResolved': '', 'resolveDomain': ''} req = s.post(url, cookies=cookies, data=data) self.display_message("Server answered: %s status code" % req.status_code) soup = BeautifulSoup(req.content, 'html.parser') res = soup.find('input', attrs={'name': 'domainResolved'})['value'] if res: return filter(None, res.split(', ')) else: return None
def process(self, url, value): s = requests.Session() req = s.get(url) self.display_message("Server answered: %s status code" % req.status_code) pattern = r"S=\'([a-zA-Z0-9\=]+)\'" cookie_sucuri = base64.b64decode(re.findall(pattern, req.content)[0]) cookie_sucuri = cookie_sucuri.replace("document.cookie", "res") cookie_sucuri = cookie_sucuri.replace("location.reload();", "") # executing the javascript rt = Runtime() cx = rt.new_context() result = cx.execute(cookie_sucuri) self.display_message("Sucuri cookie: %s" % result) cookie_sucuri = result.split("=") cookies = {cookie_sucuri[0]: cookie_sucuri[1]} data = {"domainName": value, "domainResolved": "", "resolveDomain": ""} req = s.post(url, cookies=cookies, data=data) self.display_message("Server answered: %s status code" % req.status_code) soup = BeautifulSoup(req.content, "html.parser") res = soup.find("input", attrs={"name": "domainResolved"})["value"] if res: return filter(None, res.split(", ")) else: return None
def __init__(self, connection, name): self._name = name self._connection = connection self._collections = {} if Runtime is not None: self._jsruntime = Runtime() else: self._jsruntime = None
def setUp(self): rt = Runtime() self.cx = rt.new_context() self.x = [] def echo(arg): self.x.append(arg) return arg self.cx.bind_callable("echo", echo)
def __init__(self, client, name, **__): super(Database, self).__init__(client, name) self._name = name self._client = client self._collections = {} if Runtime is not None: self._jsruntime = Runtime() else: self._jsruntime = None
def activate(self): super(Hubot, self).activate() self.process = HubotProcess(self) self.rt = Runtime() if not self.get('scripts', None): self['scripts'] = {} else: for name, snippet in self['scripts'].iteritems(): logging.debug("Inserting %s... " % name) self.add_snippet(name, snippet)
def translate_passwd(uin, pw, verify): # import pdb # pdb.set_trace() # pw1 = (hashlib.md5(pw).hexdigest().upper()).decode("hex") # pw2 = hashlib.md5(pw1 + uin).hexdigest().upper() # return hashlib.md5(pw2 + verify).hexdigest().upper() cx = Runtime().new_context() cx.execute(open("encrypt.js").read()) res = cx.execute('Encryption.getEncryption("{password}","{qq_number}",\ "{verify_code}")'.format(password=pw, qq_number=uin, verify_code=verify)) return res
def setUp(self): class Nonce: pass class Window: def __init__(self): self.arg = Nonce() self.window = self self.name = "foobar" self.val = 42 def foo(self, arg): self.arg = arg self.window = Window() rt = Runtime() self.cx = rt.new_context(self.window) self.cx.bind_class(Nonce)
def setUp(self): rt = Runtime() self.cx = rt.new_context() class spam: def __init__(self): self.args = [] self.val = 42 self._private = "no peeking" def foo(self, *args): self.args.append(args) def _private_method(self): assert False def __getitem__(self, key): assert type(key) == IntType self.args.append(key) return self.val def __setitem__(self, key, value): assert type(key) == IntType self.args.append((key, value)) self.val = value self.cx.bind_class(spam) self.spam = spam() self.cx.bind_object("bs", self.spam)
def __init__(self, timeout=120): self.rt = Runtime() self.ctx = self.rt.new_context() self.timeout = timeout self.api_spec_file = join(THIS_DIR, "iedoc-core.xml") js_files = ["python", "remoteControl", "testCase", "formatCommandOnlyAdapter"] js = "" for jsf in js_files: with open(join(THIS_DIR, jsf + ".js")) as h: js += h.read() self.ctx.execute(js) self.replace_loadAPI()
class Deobfuscator: initString = "document = new Document(); window = new Window(); navigator = new Navigator(); " evals = [] def __init__(self): self.rt = Runtime() self.cx = self.get_cx() def get_cx(self): cx = self.rt.new_context() cx.bind_callable("CollectGarbage",self.CollectGarbage) cx.bind_callable("eval",self.eval) # bind_attribute # bind_object window = Window(); cx.bind_class(Navigator, bind_constructor=True) cx.bind_class(Document, bind_constructor=True) cx.bind_class(Window, bind_constructor=True) cx.bind_class(Element, bind_constructor=True) cx.bind_object("window",window) cx.bind_class(Body, bind_constructor=True) cx.bind_class(ActiveXObject, bind_constructor=True) return cx def eval(self,arg): #print "Evaluating script (eval): %s" % arg print "eval(...) delayed" arg.replace("function", "function ") self.evals.append(arg) return 1 def CollectGarbage(self): print "Collected garbage" def evaluate_script(self, arg): s = self.initString + arg try: self.cx.eval_script(s) except Exception: #print "This execution has failed" return 0 return 1
def analyseJS(code): ''' Search for obfuscated functions in the Javascript code @param code: The Javascript code (string) @return: List with analysis information of the Javascript code: [JSCode,unescapedBytes,urlsFound], where JSCode is a list with the several stages Javascript code, unescapedBytes is a list with the parameters of unescape functions, and urlsFound is a list with the URLs found in the unescaped bytes. ''' errors = [] JSCode = [] unescapedBytes = [] urlsFound = [] oldStdErr = sys.stderr errorFile = StringIO() sys.stderr = errorFile try: scriptCode = re.findall(reJSscript, code, re.DOTALL | re.IGNORECASE) if scriptCode != []: for c in scriptCode: code = unescapeHTMLEntities(c) code = jsbeautifier.beautify(c) JSCode.append(c) else: code_items = filter( lambda x: re.match('^\s*\d+\s+\d+', x) == None, [ re.sub('^\s*\(', '', re.sub('\)[^\)]+$', '', a.split('JavaScript')[0])) for a in re.split('/\s*JS', code)[1:] ]) if code_items != []: for ci in code_items: ci = ci.replace("\\\\", "\\").replace("\(", "(").replace( "\)", ")").replace("\ ", " ").replace("\\r", "\r").replace("\\n", "\n") ci = unescapeHTMLEntities(ci) ci = jsbeautifier.beautify(ci) JSCode.append(ci) else: code = unescapeHTMLEntities(code) code = jsbeautifier.beautify(code) JSCode.append(code) for code in JSCode: if code != None and JS_MODULE: r = Runtime() context = r.new_context() while True: evalFunctionsData = searchObfuscatedFunctions(code, 'eval') originalElement = code for evalFunctionData in evalFunctionsData: if not evalFunctionData[2]: modifiedCode = evalFunctionData[1][0].replace( evalFunctionData[0], 'return') code = originalElement.replace( evalFunctionData[1][0], modifiedCode) else: code = originalElement.replace( evalFunctionData[1][0], evalFunctionData[1][1] + ';') try: executedJS = context.eval_script(code) if executedJS == None: raise Exception break except: if evalFunctionData[2]: modifiedCode = evalFunctionData[1][0].replace( evalFunctionData[0], 'return') code = originalElement.replace( evalFunctionData[1][0], modifiedCode) else: code = originalElement.replace( evalFunctionData[1][0], evalFunctionData[1][1] + ';') try: executedJS = context.eval_script(code) if executedJS == None: raise Exception except: code = originalElement continue else: break if executedJS != originalElement and executedJS != None and executedJS != '': code = executedJS JSCode.append(code) else: break if code != None: escapedVars = re.findall( '(\w*?)\s*?=\s*?(unescape\((.*?)\))', code, re.DOTALL) for var in escapedVars: bytes = var[2] if bytes.find('+') != -1: varContent = getVarContent(code, bytes) if len(varContent) > 150: ret = unescape(varContent) if ret[0] != -1: bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: unescapedBytes.append(bytes) for url in urls: if url not in urlsFound: urlsFound.append(url) else: bytes = bytes[1:-1] if len(bytes) > 150: ret = unescape(bytes) if ret[0] != -1: bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: unescapedBytes.append(bytes) for url in urls: if url not in urlsFound: urlsFound.append(url) except Exception, e: errors.append('Unknown error!! [%s]' % e)
class Hubot(BotPlugin): # Store here the patterns to listen to hear_matchers = {} js_cache = {} def activate(self): super(Hubot, self).activate() self.process = HubotProcess(self) self.rt = Runtime() if not self.get('scripts', None): self['scripts'] = {} else: for name, snippet in self['scripts'].iteritems(): logging.debug("Inserting %s... " % name) self.add_snippet(name, snippet) def callback_message(self, conn, mess): logging.debug("Hubot is hearing [%s]" % mess.getBody()) try: for pattern in self.hear_matchers: match = re.match(pattern, mess.getBody()) if match: self.hear_matchers[pattern](HubotMessage(self, mess, match)) except JSError as jse: logging.exception("Error interpreting Javascript") exc_type, exc_value, exc_traceback = sys.exc_info() tb_next = exc_traceback js_error = '\n\n Guessed stacktrack from JS:' while tb_next: code = tb_next.tb_frame.f_code if code.co_name == 'JavaScript code': js = self.js_cache[code.co_filename] ln = code.co_firstlineno lines = js.split('\n') js_error += '\n\n ' + lines[ln - 2 ] + '\n-->' + lines[ln - 1] + '\n ' + lines[ln] tb_next = tb_next.tb_next self.send(mess.getFrom(), str(jse) + js_error, mess) def hear(self, pattern, function): """The hubot callback to register a listening function """ pattern = repr(pattern) first_slash = pattern.index('/') last_slash = pattern.rindex('/') regexp = pattern[first_slash+1:last_slash] modifiers = pattern[last_slash:] logging.debug("Registering a hubot snippet %s -> %s" % (regexp, repr(function))) self.hear_matchers[regexp] = function def respond(self, pattern, function): """The hubot callback to register a listening function to himself only TODO dissociate from hear """ pattern = repr(pattern) first_slash = pattern.index('/') last_slash = pattern.rindex('/') regexp = pattern[first_slash+1:last_slash] modifiers = pattern[last_slash:] logging.debug("Registering a hubot snippet %s -> %s" % (regexp, repr(function))) self.hear_matchers[regexp] = function def add_snippet(self, name, coffee): #logging.debug("Trying to insert this gloubiboulga [%s]" % coffee) logging.debug("Creating a face Hubot context...") def require(module_name): logging.debug("Trying to load " + module_name) module = HubotModule() cx = self.rt.new_context() cx.add_global("module", module) cx.add_global("process", self.process) cx.add_global("require", require) cx.add_global("JSON", JSONStub()) logging.debug("Compiling coffeescript...") js = coffeescript.compile(coffee, bare=True) nummed_js = numerotatedJS(js) self.js_cache[name] = nummed_js logging.debug("Translated JS:\n" + nummed_js) logging.debug("Executing Hubot script...") cx.execute(code = js, filename = name) module.exports(self) # triggers the listening callbacks @botcmd def hubot_add(self, mess, args): """Adds a hubot script in the bot takes an url has parameter directly from the row github file for example : !hubot add https://raw.github.com/github/hubot-scripts/master/src/scripts/botsnack.coffee """ script_name = args.split('/')[-1].replace('.coffee', '') res = urllib2.urlopen(args) script = res.read() logging.debug("Adding script %s -> %s" % (script_name, script)) copy = self['scripts'] copy[script_name] = script self['scripts'] = copy self.add_snippet(script_name, script) return 'Script %s added.' % script_name @botcmd def hubot_del(self, mess, args): """remove a hubot script in from the bot. You need to restart the hubot plugin to make it effective. takes the name of the script for example : !hubot del botsnack """ copy = self['scripts'] copy.pop(args) self['scripts'] = copy return 'Done' @botcmd def hubot_list(self, mess, args): return '\n'.join(self['scripts'].keys())
def __init__(self): self.rt = Runtime() self.cx = self.get_cx()
def analyseJS(code): ''' Search for obfuscated functions in the Javascript code @param code: The Javascript code (string) @return: List with analysis information of the Javascript code: [JSCode,unescapedBytes,urlsFound], where JSCode is a list with the several stages Javascript code, unescapedBytes is a list with the parameters of unescape functions, and urlsFound is a list with the URLs found in the unescaped bytes. ''' error = '' errors = [] JSCode = [] unescapedBytes = [] urlsFound = [] oldStdErr = sys.stderr errorFile = open('jserror.log','w') sys.stderr = errorFile if code != None and JS_MODULE: r = Runtime() context = r.new_context() while True: evalFunctionsData = searchObfuscatedFunctions(code, 'eval') originalElement = code for evalFunctionData in evalFunctionsData: if not evalFunctionData[2]: modifiedCode = evalFunctionData[1][0].replace(evalFunctionData[0],'return') code = originalElement.replace(evalFunctionData[1][0],modifiedCode) else: code = originalElement.replace(evalFunctionData[1][0],evalFunctionData[1][1]+';') try: executedJS = context.eval_script(code) if executedJS == None: raise exception break except: if evalFunctionData[2]: modifiedCode = evalFunctionData[1][0].replace(evalFunctionData[0],'return') code = originalElement.replace(evalFunctionData[1][0],modifiedCode) else: code = originalElement.replace(evalFunctionData[1][0],evalFunctionData[1][1]+';') try: executedJS = context.eval_script(code) if executedJS == None: raise exception except: code = originalElement continue else: break if executedJS != originalElement and executedJS != None and executedJS != '': code = executedJS JSCode.append(code) else: break if code != None: escapedVars = re.findall('(\w*?)\s*?=\s*?(unescape\((.*?)\))', code, re.DOTALL) for var in escapedVars: bytes = var[2] if bytes.find('+') != -1: varContent = getVarContent(code, bytes) if len(varContent) > 150: ret = unescape(varContent) if ret[0] != -1: bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: unescapedBytes.append(bytes) for url in urls: if url not in urlsFound: urlsFound.append(url) else: bytes = bytes[1:-1] if len(bytes) > 150: ret = unescape(bytes) if ret[0] != -1: bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: unescapedBytes.append(bytes) for url in urls: if url not in urlsFound: urlsFound.append(url) errorFile.close() sys.stderr = oldStdErr errorFileContent = open('jserror.log','r').read() if errorFileContent != '' and errorFileContent.find('JavaScript error') != -1: lines = errorFileContent.split(newLine) for line in lines: if line.find('JavaScript error') != -1 and line not in errors: errors.append(line) return [JSCode,unescapedBytes,urlsFound,errors]
class SeleniumToPython(object): def __init__(self, timeout=120): self.rt = Runtime() self.ctx = self.rt.new_context() self.timeout = timeout self.api_spec_file = join(THIS_DIR, "iedoc-core.xml") js_files = ["python", "remoteControl", "testCase", "formatCommandOnlyAdapter"] js = "" for jsf in js_files: with open(join(THIS_DIR, jsf + ".js")) as h: js += h.read() self.ctx.execute(js) self.replace_loadAPI() # this function is necessary because the DomParser is missing from # spidermonkey. long story. basically there's a file...the iedoc-core.xml # that is read from the javascript via the DomParser. the file is parsed # and put into a datastructure which is used for future processing. since # we don't have access to the DomParser, we have to simulate what it did # with python, and create/replace the function that was calling DomParser # in it, such that the return value is the same def replace_loadAPI(self): api_functions = {} with open(self.api_spec_file, "r") as h: api_spec = h.read() xml = lxml.etree.fromstring(api_spec) for fn in xml.xpath("function"): name = fn.get("name") rt = fn.xpath("return") or None if rt: rt = rt[0].get("type").capitalize() params = [] for param in fn.xpath("param"): params.append(param.get("name")) api_functions[name] = (params, rt) api_functions['assertFailureOnNext'] = ([], None) api_functions['verifyFailureOnNext'] = ([], None) api_functions['assertErrorOnNext'] = ([], None) api_functions['verifyErrorOnNext'] = ([], None) load_api = """ Command.loadAPI = function() { if (!this.functions) { var functions = {}; %s this.functions = functions; } return this.functions; } """.strip() lines = [] for fn, (params, rt) in api_functions.iteritems(): lines.append("var fn = new CommandDefinition(%r);" % fn) if rt: lines.append("fn.returnType = %r" % rt) for param in params: lines.append("var param = {};") lines.append("param.name = %r;" % param) lines.append("fn.params.push(param);") lines.append("functions[fn.name] = fn;") if re.match("^(is|get)", fn): lines.append("fn.isAccessor = true;") lines.append("functions['!'+fn.name] = fn.negativeAccessor();") if fn.startswith("assert"): lines.append("var verify_fn = new CommandDefinition(%r);" % fn) lines.append("verify_fn.params = fn.params;") lines.append("functions['verify'+fn.name+fn.name.substring(6)] = fn;") js = load_api % "\n".join(lines) self.ctx.execute(js) def _parse_commands(self, url, commands): if len(commands) > maximum_commands: raise TooManyCommands, "selenium file can have a maximum of %d commands" % maximum_commands lines = [ "sc = [];", "tc = new TestCase();", "tc.baseURL = %r;" % url, ] timeout = self.timeout def get_sleep(amt, timeout): timeout -= amt if timeout < 0: amt += timeout timeout = 0 return amt, timeout for name, target, value in commands: # many commands can end with AndWait, and they aren't all listed # in the command reference, so we test if the base command is # whitelisted, and allow all AndWait commands tmp_name = name if name.endswith("AndWait"): tmp_name = name.replace("AndWait", "") if tmp_name not in command_whitelist: continue if tmp_name in ("pause", "waitForFrameToLoad", "waitForPageToLoad", "waitForPopUp"): value, timeout = get_sleep(int(target or value)/1000.0, timeout) value = "%d" % (value * 1000) # i'm pretty sure this is a bug with selenium. in the selenium # file, the sleep value is in the "value" placeholder, but # it will only work if it's set as the "target". the regular # exporter doesn't account for this actually, and exporting # to python from the IDE will yield "time.sleep(NaN)" if you # try to use "pause" target = value line = "sc.push(new Command(%r, %r, %r));" % (name, target, value) lines.append(line) # now that we know how much timeout we have left over, hand this off # to javascript as the first parameter, so the rest of the evaluation # can pull off of this lines.insert(0, "timeout = %d;" % timeout) lines.append("tc.setCommands(sc);") lines.append("format(tc, 'test');") #import pprint #pprint.pprint(lines) js = "\n".join(lines) return self.ctx.execute(js) def parse_source(self, source): source = source.encode("utf8") doc = lxml.html.fromstring(source) url = doc.cssselect("link[rel='selenium.base']")[0].get("href") fqdn = urlparse.urlsplit(url).netloc commands = [] for row in doc.cssselect("tbody tr"): td = row.cssselect("td") name = td[0].text target = td[1].text or '' value = td[2].text or '' commands.append((name, target, value)) return {"fqdn": fqdn, "source": source, "code": self._parse_commands(url, commands)} def parse_from_file(self, sf): with open(sf, "r") as h: source = h.read() return self.parse_source(source)
class Hubot(BotPlugin): # Store here the patterns to listen to hear_matchers = {} js_cache = {} def activate(self): super(Hubot, self).activate() self.process = HubotProcess(self) self.rt = Runtime() if not self.get('scripts', None): self['scripts'] = {} else: for name, snippet in self['scripts'].iteritems(): logging.debug("Inserting %s... " % name) self.add_snippet(name, snippet) def callback_message(self, conn, mess): logging.debug("Hubot is hearing [%s]" % mess.getBody()) try: for pattern in self.hear_matchers: match = re.match(pattern, mess.getBody()) if match: self.hear_matchers[pattern](HubotMessage( self, mess, match)) except JSError as jse: logging.exception("Error interpreting Javascript") exc_type, exc_value, exc_traceback = sys.exc_info() tb_next = exc_traceback js_error = '\n\n Guessed stacktrack from JS:' while tb_next: code = tb_next.tb_frame.f_code if code.co_name == 'JavaScript code': js = self.js_cache[code.co_filename] ln = code.co_firstlineno lines = js.split('\n') js_error += '\n\n ' + lines[ln - 2] + '\n-->' + lines[ ln - 1] + '\n ' + lines[ln] tb_next = tb_next.tb_next self.send(mess.getFrom(), str(jse) + js_error, mess) def hear(self, pattern, function): """The hubot callback to register a listening function """ pattern = repr(pattern) first_slash = pattern.index('/') last_slash = pattern.rindex('/') regexp = pattern[first_slash + 1:last_slash] modifiers = pattern[last_slash:] logging.debug("Registering a hubot snippet %s -> %s" % (regexp, repr(function))) self.hear_matchers[regexp] = function def respond(self, pattern, function): """The hubot callback to register a listening function to himself only TODO dissociate from hear """ pattern = repr(pattern) first_slash = pattern.index('/') last_slash = pattern.rindex('/') regexp = pattern[first_slash + 1:last_slash] modifiers = pattern[last_slash:] logging.debug("Registering a hubot snippet %s -> %s" % (regexp, repr(function))) self.hear_matchers[regexp] = function def add_snippet(self, name, coffee): #logging.debug("Trying to insert this gloubiboulga [%s]" % coffee) logging.debug("Creating a face Hubot context...") def require(module_name): logging.debug("Trying to load " + module_name) module = HubotModule() cx = self.rt.new_context() cx.add_global("module", module) cx.add_global("process", self.process) cx.add_global("require", require) cx.add_global("JSON", JSONStub()) logging.debug("Compiling coffeescript...") js = coffeescript.compile(coffee, bare=True) nummed_js = numerotatedJS(js) self.js_cache[name] = nummed_js logging.debug("Translated JS:\n" + nummed_js) logging.debug("Executing Hubot script...") cx.execute(code=js, filename=name) module.exports(self) # triggers the listening callbacks @botcmd def hubot_add(self, mess, args): """Adds a hubot script in the bot takes an url has parameter directly from the row github file for example : !hubot add https://raw.github.com/github/hubot-scripts/master/src/scripts/botsnack.coffee """ script_name = args.split('/')[-1].replace('.coffee', '') res = urllib2.urlopen(args) script = res.read() logging.debug("Adding script %s -> %s" % (script_name, script)) copy = self['scripts'] copy[script_name] = script self['scripts'] = copy self.add_snippet(script_name, script) return 'Script %s added.' % script_name @botcmd def hubot_del(self, mess, args): """remove a hubot script in from the bot. You need to restart the hubot plugin to make it effective. takes the name of the script for example : !hubot del botsnack """ copy = self['scripts'] copy.pop(args) self['scripts'] = copy return 'Done' @botcmd def hubot_list(self, mess, args): return '\n'.join(self['scripts'].keys())
class Database(database.Database): def __init__(self, connection, name): self._name = name self._connection = connection self._collections = {} if Runtime is not None: self._jsruntime = Runtime() else: self._jsruntime = None @property def name(self): return self._name @property def connection(self): return self._connection def _make_collection(self): return Collection(self) def command(self, command, value=1, check=True, allowable_errors=None, **kwargs): if isinstance(command, basestring): command = {command: value} command.update(**kwargs) if 'filemd5' in command: checksum = md5() for chunk in self.chef.file.chunks.find().sort('n'): checksum.update(chunk['data']) return dict(md5=checksum.hexdigest()) elif 'findandmodify' in command: coll = self._collections[command['findandmodify']] before = coll.find_one(command['query'], sort=command.get('sort')) upsert = False if before is None: upsert = True if command.get('upsert'): before = dict(command['query']) coll.insert(before) else: raise OperationFailure, 'No matching object found' coll.update(command['query'], command['update']) if command.get('new', False) or upsert: return dict(value=coll.find_one(dict(_id=before['_id']))) return dict(value=before) elif 'mapreduce' in command: collection = command.pop('mapreduce') return self._handle_mapreduce(collection, **command) elif 'distinct' in command: collection = self._collections[command['distinct']] key = command['key'] return list(set(_lookup(d, key) for d in collection.find())) elif 'getlasterror' in command: return dict(connectionId=None, err=None, n=0, ok=1.0) else: raise NotImplementedError, repr(command) def _handle_mapreduce(self, collection, query=None, map=None, reduce=None, out=None, finalize=None): if self._jsruntime is None: raise ImportError, 'Cannot import spidermonkey, required for MIM mapreduce' j = self._jsruntime.new_context() tmp_j = self._jsruntime.new_context() temp_coll = collections.defaultdict(list) def emit(k, v): k = topy(k) if isinstance(k, dict): k = bson.BSON.encode(k) temp_coll[k].append(v) def emit_reduced(k, v): print k, v # Add some special MongoDB functions j.execute('var NumberInt = Number;') j.add_global('emit', emit) j.add_global('emit_reduced', emit_reduced) j.execute('var map=%s;' % map) j.execute('var reduce=%s;' % reduce) if finalize: j.execute('var finalize=%s;' % finalize) if query is None: query = {} # Run the map phase def topy(obj): if isinstance(obj, spidermonkey.Array): return [topy(x) for x in obj] if isinstance(obj, spidermonkey.Object): tmp_j.add_global('x', obj) js_source = tmp_j.execute('x.toSource()') if js_source.startswith('(new Date'): # Date object by itself obj = datetime.fromtimestamp( tmp_j.execute('x.valueOf()') / 1000.) elif js_source.startswith('({'): # Handle recursive conversion in case we got back a # mapping with multiple values. obj = dict((a, topy(obj[a])) for a in obj) else: assert False, 'Cannot convert %s to Python' % (js_source) elif isinstance(obj, collections.Mapping): return dict((k, topy(v)) for k, v in obj.iteritems()) elif isinstance(obj, basestring): return obj elif isinstance(obj, collections.Sequence): return [topy(x) for x in obj] return obj def tojs(obj): if isinstance(obj, basestring): return obj elif isinstance(obj, datetime): ts = 1000. * time.mktime(obj.timetuple()) ts += (obj.microsecond / 1000.) return j.execute('new Date(%f)' % (ts)) elif isinstance(obj, collections.Mapping): return dict((k, tojs(v)) for k, v in obj.iteritems()) elif isinstance(obj, collections.Sequence): result = j.execute('new Array()') for v in obj: result.push(tojs(v)) return result else: return obj for obj in self._collections[collection].find(query): obj = tojs(obj) j.execute('map').apply(obj) # Run the reduce phase reduced = topy( dict((k, j.execute('reduce')(k, tojs(values))) for k, values in temp_coll.iteritems())) # Run the finalize phase if finalize: reduced = topy( dict((k, j.execute('finalize')(k, tojs(value))) for k, value in reduced.iteritems())) # Handle the output phase result = dict() assert len(out) == 1 if out.keys() == ['reduce']: result['result'] = out.values()[0] out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): doc = out_coll.find_one(dict(_id=k)) if doc is None: out_coll.insert(dict(_id=k, value=v)) else: doc['value'] = topy( j.execute('reduce')(k, tojs([v, doc['value']]))) out_coll.save(doc) elif out.keys() == ['merge']: result['result'] = out.values()[0] out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): out_coll.save(dict(_id=k, value=v)) elif out.keys() == ['replace']: result['result'] = out.values()[0] self._collections.pop(out.values()[0], None) out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): out_coll.save(dict(_id=k, value=v)) elif out.keys() == ['inline']: result['results'] = [ dict(_id=k, value=v) for k, v in reduced.iteritems() ] else: raise TypeError, 'Unsupported out type: %s' % out.keys() return result def __getattr__(self, name): return self[name] def __getitem__(self, name): return self._get(name) def _get(self, name): try: return self._collections[name] except KeyError: db = self._collections[name] = Collection(self, name) return db def __repr__(self): return 'mim.Database(%s)' % self.name def collection_names(self): return self._collections.keys() def drop_collection(self, name): del self._collections[name] def clear(self): for coll in self._collections.values(): coll.clear()
def _get_runtime(self): return Runtime()
class Database(database.Database): def __init__(self, connection, name): self._name = name self._connection = connection self._collections = {} if Runtime is not None: self._jsruntime = Runtime() else: self._jsruntime = None @property def name(self): return self._name @property def connection(self): return self._connection def _make_collection(self): return Collection(self) def command(self, command, value=1, check=True, allowable_errors=None, **kwargs): if isinstance(command, basestring): command = {command:value} command.update(**kwargs) if 'filemd5' in command: checksum = md5() for chunk in self.chef.file.chunks.find().sort('n'): checksum.update(chunk['data']) return dict(md5=checksum.hexdigest()) elif 'findandmodify' in command: coll = self._collections[command['findandmodify']] before = coll.find_one(command['query'], sort=command.get('sort')) upsert = False if before is None: upsert = True if command.get('upsert'): before = dict(command['query']) coll.insert(before) else: raise OperationFailure, 'No matching object found' coll.update(command['query'], command['update']) if command.get('new', False) or upsert: return dict(value=coll.find_one(dict(_id=before['_id']))) return dict(value=before) elif 'mapreduce' in command: collection = command.pop('mapreduce') return self._handle_mapreduce(collection, **command) elif 'distinct' in command: collection = self._collections[command['distinct']] key = command['key'] return list(set(_lookup(d, key) for d in collection.find())) elif 'getlasterror' in command: return dict(connectionId=None, err=None, n=0, ok=1.0) else: raise NotImplementedError, repr(command) def _handle_mapreduce(self, collection, query=None, map=None, reduce=None, out=None, finalize=None): if self._jsruntime is None: raise ImportError, 'Cannot import spidermonkey, required for MIM mapreduce' j = self._jsruntime.new_context() tmp_j = self._jsruntime.new_context() temp_coll = collections.defaultdict(list) def emit(k, v): k = topy(k) if isinstance(k, dict): k = bson.BSON.encode(k) temp_coll[k].append(v) def emit_reduced(k, v): print k,v # Add some special MongoDB functions j.execute('var NumberInt = Number;') j.add_global('emit', emit) j.add_global('emit_reduced', emit_reduced) j.execute('var map=%s;' % map) j.execute('var reduce=%s;' % reduce) if finalize: j.execute('var finalize=%s;' % finalize) if query is None: query = {} # Run the map phase def topy(obj): if isinstance(obj, spidermonkey.Array): return [topy(x) for x in obj] if isinstance(obj, spidermonkey.Object): tmp_j.add_global('x', obj) js_source = tmp_j.execute('x.toSource()') if js_source.startswith('(new Date'): # Date object by itself obj = datetime.fromtimestamp(tmp_j.execute('x.valueOf()')/1000.) elif js_source.startswith('({'): # Handle recursive conversion in case we got back a # mapping with multiple values. # spidermonkey changes all js number strings to int/float # changing back to string here for key protion, since bson requires it obj = dict((str(a), topy(obj[a])) for a in obj) else: assert False, 'Cannot convert %s to Python' % (js_source) elif isinstance(obj, collections.Mapping): return dict((k, topy(v)) for k,v in obj.iteritems()) elif isinstance(obj, basestring): return obj elif isinstance(obj, collections.Sequence): return [topy(x) for x in obj] return obj def tojs(obj): if isinstance(obj, basestring): return obj elif isinstance(obj, datetime): ts = 1000. * time.mktime(obj.timetuple()) ts += (obj.microsecond / 1000.) return j.execute('new Date(%f)' % (ts)) elif isinstance(obj, collections.Mapping): return dict((k,tojs(v)) for k,v in obj.iteritems()) elif isinstance(obj, collections.Sequence): result = j.execute('new Array()') for v in obj: result.push(tojs(v)) return result else: return obj for obj in self._collections[collection].find(query): obj = tojs(obj) j.execute('map').apply(obj) # Run the reduce phase reduced = topy(dict( (k, j.execute('reduce')(k, tojs(values))) for k, values in temp_coll.iteritems())) # Run the finalize phase if finalize: reduced = topy(dict( (k, j.execute('finalize')(k, tojs(value))) for k, value in reduced.iteritems())) # Handle the output phase result = dict() assert len(out) == 1 if out.keys() == ['reduce']: result['result'] = out.values()[0] out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): doc = out_coll.find_one(dict(_id=k)) if doc is None: out_coll.insert(dict(_id=k, value=v)) else: doc['value'] = topy(j.execute('reduce')(k, tojs([v, doc['value']]))) out_coll.save(doc) elif out.keys() == ['merge']: result['result'] = out.values()[0] out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): out_coll.save(dict(_id=k, value=v)) elif out.keys() == ['replace']: result['result'] = out.values()[0] self._collections.pop(out.values()[0], None) out_coll = self[out.values()[0]] for k, v in reduced.iteritems(): out_coll.save(dict(_id=k, value=v)) elif out.keys() == ['inline']: result['results'] = [ dict(_id=k, value=v) for k,v in reduced.iteritems() ] else: raise TypeError, 'Unsupported out type: %s' % out.keys() return result def __getattr__(self, name): return self[name] def __getitem__(self, name): return self._get(name) def _get(self, name): try: return self._collections[name] except KeyError: db = self._collections[name] = Collection(self, name) return db def __repr__(self): return 'mim.Database(%s)' % self.name def collection_names(self): return self._collections.keys() def drop_collection(self, name): del self._collections[name] def clear(self): for coll in self._collections.values(): coll.clear()
class Database(database.Database): def __init__(self, connection, name): self._name = name self._connection = connection self._collections = {} if Runtime is not None: self._jsruntime = Runtime() else: self._jsruntime = None @property def name(self): return self._name @property def connection(self): return self._connection def _make_collection(self): return Collection(self) def command(self, command, value=1, check=True, allowable_errors=None, **kwargs): if isinstance(command, str): command = {command:value} command.update(**kwargs) if 'filemd5' in command: return dict(md5='42') # completely bogus value; will it work? elif 'findandmodify' in command: coll = self._collections[command['findandmodify']] before = coll.find_one(command['query'], sort=command.get('sort')) upsert = False if before is None: upsert = True if command.get('upsert'): before = dict(command['query']) coll.insert(before) else: raise OperationFailure('No matching object found') coll.update(command['query'], command['update']) if command.get('new', False) or upsert: return dict(value=coll.find_one(dict(_id=before['_id']))) return dict(value=before) elif 'mapreduce' in command: collection = command.pop('mapreduce') return self._handle_mapreduce(collection, **command) else: raise NotImplementedError(repr(command)) def _handle_mapreduce(self, collection, query=None, map=None, reduce=None, out=None): if self._jsruntime is None: raise ImportError('Cannot import spidermonkey, required for MIM mapreduce') j = self._jsruntime.new_context() temp_coll = collections.defaultdict(list) def emit(k, v): if isinstance(k, dict): k = bson.BSON.encode(k) temp_coll[k].append(v) def emit_reduced(k, v): print(k,v) j.add_global('emit', emit) j.add_global('emit_reduced', emit_reduced) j.execute('var map=%s;' % map) j.execute('var reduce=%s;' % reduce) if query is None: query = {} # Run the map phase def tojs(obj): if isinstance(obj, str): return obj elif isinstance(obj, datetime): return j.execute('new Date("%s")' % obj.ctime()) elif isinstance(obj, collections.Mapping): return dict((k,tojs(v)) for k,v in obj.items()) elif isinstance(obj, collections.Sequence): result = j.execute('new Array()') for v in obj: result.push(tojs(v)) return result else: return obj for obj in self._collections[collection].find(query): obj = tojs(obj) j.execute('map').apply(obj) # Run the reduce phase reduced = dict( (k, j.execute('reduce')(k, tojs(values))) for k, values in temp_coll.items()) # Handle the output phase result = dict() assert len(out) == 1 if list(out.keys()) == ['reduce']: result['result'] = list(out.values())[0] out_coll = self[list(out.values())[0]] for k, v in reduced.items(): doc = out_coll.find_one(dict(_id=k)) if doc is None: out_coll.insert(dict(_id=k, value=v)) else: doc['value'] = j.execute('reduce')(k, tojs([v, doc['value']])) out_coll.save(doc) elif list(out.keys()) == ['merge']: result['result'] = list(out.values())[0] out_coll = self[list(out.values())[0]] for k, v in reduced.items(): out_coll.save(dict(_id=k, value=v)) elif list(out.keys()) == ['replace']: result['result'] = list(out.values())[0] self._collections.pop(list(out.values())[0], None) out_coll = self[list(out.values())[0]] for k, v in reduced.items(): out_coll.save(dict(_id=k, value=v)) elif list(out.keys()) == ['inline']: result['results'] = [ dict(_id=k, value=v) for k,v in reduced.items() ] else: raise TypeError('Unsupported out type: %s' % list(out.keys())) return result def __getattr__(self, name): return self[name] def __getitem__(self, name): return self._get(name) def _get(self, name): try: return self._collections[name] except KeyError: db = self._collections[name] = Collection(self, name) return db def __repr__(self): return 'mim.Database(%s)' % self.name def collection_names(self): return list(self._collections.keys()) def drop_collection(self, name): del self._collections[name] def clear(self): for coll in list(self._collections.values()): coll.clear()
#!/usr/bin/env python2.6 import sys import cjson from spidermonkey import Runtime import re rt = Runtime() cx = rt.new_context() cx.eval_script('r=[];') #print "Loading jsonpath..." with file('jsonpath.js') as f: cx.eval_script(f.read()) #print "Parsing json from stdin..." cx.eval_script("i=%s;" %sys.stdin.read()) # for (j in i){ n=i[j]; r.push(%s); }" % (sys.stdin.read(), sys.argv[1])) r = cx.eval_script("jsonPath(i, '%s');" % sys.argv[1].replace('\'', '"')) for line in r: print line
def setUp(self): rt = Runtime() self.cx = rt.new_context()