class Qdb(Bdb, object): """ The Quantopian Remote Debugger. """ _instance = None def __new__(cls, *args, **kwargs): """ Qdb objects are singletons that persist until their disable method is called. """ if not cls._instance: cls._instance = super(Qdb, cls).__new__(cls) cls._instance._init(*args, **kwargs) return cls._instance def __init__(self, *args, **kwargs): pass def _init(self, config=None, merge=False, **kwargs): """ See qdb.config for more information about the configuration of qdb. merge denotes how config and kwargs should be merged. QdbConfig.kwargs_first says config will trample kwargs, QdbConfig.config_first says kwargs will trample config. Otherwise, kwargs and config cannot both be passed. """ self.super_ = super(Qdb, self) self.super_.__init__() self.reset() if config and kwargs: if merge == QdbConfig.kwargs_first: first = kwargs second = config elif merge == QdbConfig.config_first: first = config second = kwargs else: raise TypeError('Cannot pass config and kwargs') config = first.merge(second) else: config = QdbConfig.get_config(config or kwargs) self.address = config.host, config.port self.set_default_file(config.default_file) self.default_namespace = config.default_namespace or {} self.exception_serializer = config.exception_serializer or \ default_exception_serializer self.eval_fn = config.eval_fn or default_eval_fn self.green = config.green self._file_cache = {} self.redirect_output = config.redirect_output self.retry_attepts = config.retry_attepts self.repr_fn = config.repr_fn self._skip_fn = config.skip_fn or (lambda _: False) self.pause_signal = config.pause_signal \ if config.pause_signal else signal.SIGUSR2 self.uuid = str(config.uuid or uuid4()) self.watchlist = {} self.execution_timeout = config.execution_timeout self.reset() self.log_handler = None if config.log_file: self.log_handler = FileHandler(config.log_file) self.log_handler.push_application() # The timing between these lines might matter depending on the # cmd_manager. Don't seperate them. self.cmd_manager = (config.cmd_manager or RemoteCommandManager)(self) self.cmd_manager.start(config.auth_msg) # We need to be able to send stdout back to the user debugging the # program. We hold a handle to this in case the program resets stdout. if self.redirect_output: self._old_stdout = sys.stdout self._old_stderr = sys.stderr sys.stdout = OutputTee( sys.stdout, RemoteOutput(self.cmd_manager, '<stdout>'), ) sys.stderr = OutputTee( sys.stderr, RemoteOutput(self.cmd_manager, '<stderr>'), ) def skip_fn(self, path): return self._skip_fn(self.canonic(path)) def restore_output_streams(self): """ Restores the original output streams. """ if self.redirect_output: sys.stdout = self._old_stdout sys.stderr = self._old_stderr def _new_execution_timeout(self, src): """ Return a new execution timeout context manager. If not execution timeout is in place, returns ExitStack() """ # We use green=False because this could be cpu bound. This will # still throw to the proper greenlet if this is gevented. return ( Timeout( self.execution_timeout, QdbExecutionTimeout(src, self.execution_timeout), green=False ) if self.execution_timeout else ExitStack() ) def set_default_file(self, filename): """ Safely sets the new default file. """ self.default_file = self.canonic(filename) if filename else None def get_line(self, filename, line): """ Checks for any user cached files before deferring to the linecache. """ # The line - 1 is so that querying line 1 gives us the first line in # the file. try: return self._get_file_lines(filename)[line - 1] except IndexError: return 'No source available for this line.' def get_file(self, filename): """ Retrieves a file out of cache or opens and caches it. """ return '\n'.join(self._get_file_lines(filename)) def _get_file_lines(self, filename): """ Retrieves the file from the file cache as a list of lines. If the file does not exist in the cache, it is cached from disk. """ canonic_name = self.canonic(filename) try: return self._file_cache[canonic_name] except KeyError: if not self.cache_file(canonic_name): return [] return self._file_cache.get(canonic_name) def cache_file(self, filename, contents=None): """ Caches filename from disk into memory. This overrides whatever was cached for filename previously. If contents is provided, it allows the user to cache a filename to a string. Returns True if the file caching succeeded, otherwise returns false. """ canonic_name = self.canonic(filename) if contents: self._file_cache[canonic_name] = contents.splitlines() return True try: with open(canonic_name, 'r') as f: self._file_cache[canonic_name] = map( lambda l: l[:-1] if l.endswith('\n') else l, f.readlines() ) return True except IOError: # The caching operation failed. return False def set_break(self, filename, lineno, temporary=False, cond=None, funcname=None, **kwargs): """ Sets a breakpoint. This is overridden to account for the filecache and for unreachable lines. **kwargs are ignored. This is to work with payloads that pass extra fields to the set_break payload. """ filename = self.canonic(filename) if filename else self.default_file try: self.get_line(filename, lineno) except IndexError: raise QdbUnreachableBreakpoint({ 'file': filename, 'line': lineno, 'temp': temporary, 'cond': cond, 'func': funcname, }) blist = self.breaks.setdefault(filename, []) if lineno not in blist: blist.append(lineno) Breakpoint(filename, lineno, temporary, cond, funcname) def clear_break(self, filename, lineno, *args, **kwargs): """ Wrapper to make the breakpoint json standardized for setting and removing of breakpoints. This means that the same json data that was used to set a break point may be fed into this function with the extra values ignored. """ self.super_.clear_break(filename, lineno) def canonic(self, filename): canonic_filename = self.super_.canonic(filename) if canonic_filename.endswith('pyc'): return canonic_filename[:-1] return canonic_filename def reset(self): self.botframe = None self._set_stopinfo(None, None) self.forget() def forget(self): self.lineno = None self.stack = [] self.curindex = 0 self.curframe = None def setup_stack(self, stackframe, traceback): """ Sets up the state of the debugger object for this frame. """ self.forget() self.stack, self.curindex = self.get_stack(stackframe, traceback) self.curframe = self.stack[self.curindex][0] self.curframe_locals = self.curframe.f_locals self.update_watchlist() def extend_watchlist(self, *args): """ Adds every arg to the watchlist and updates. """ for expr in args: self.watchlist[expr] = (False, '') self.update_watchlist() def update_watchlist(self): """ Updates the watchlist by evaluating all the watched expressions in our current frame. """ id_ = lambda n: n # Why is this NOT a builtin? for expr in self.watchlist: try: with self._new_execution_timeout(expr), \ self.inject_default_namespace() as stackframe: self.watchlist[expr] = ( None, (self.repr_fn or id_)( self.eval_fn(expr, stackframe) ) ) except Exception as e: self.watchlist[expr] = ( type(e).__name__, self.exception_serializer(e) ) def effective(self, file, line, stackframe): """ Finds the effective breakpoint for this line; called only when we know that there is a breakpoint here. returns the breakpoint paired with a flag denoting if we should remove this breakpoint or not. """ for breakpoint in Breakpoint.bplist[file, line]: if breakpoint.enabled == 0: continue if not checkfuncname(breakpoint, stackframe): continue # Count every hit when breakpoint is enabled breakpoint.hits = breakpoint.hits + 1 if not breakpoint.cond: # If unconditional, and ignoring go on to next, else break if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 continue else: return breakpoint, True else: # Conditional breakpoint # Ignore count applies only to those bpt hits where the # condition evaluates to true. try: with self._new_execution_timeout(breakpoint.cond), \ self.inject_default_namespace(stackframe) as frame: val = self.eval_fn( breakpoint.cond, frame, 'eval' ) except Exception as e: # Send back a message to let the user know there was an # issue with their breakpoint. self.cmd_manager.send_error( 'condition', { 'cond': breakpoint.cond, 'line': line, 'exc': type(e).__name__, 'output': self.exception_serializer(e), } ) # Return this breakpoint to be safe. The user will be # stopped here so that they can fix the breakpoint. return breakpoint, False if val: if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 else: return breakpoint, True return None, False def break_here(self, stackframe): """ Checks if we should break execution in this stackframe. This function handles the cleanup and ignore counts for breakpoints. Returns True iff we should stop in the stackframe, False otherwise. """ filename = self.canonic(stackframe.f_code.co_filename) if filename not in self.breaks: return False lineno = stackframe.f_lineno if lineno not in self.breaks[filename]: # The line itself has no breakpoint, but maybe the line is the # first line of a function with breakpoint set by function name. lineno = stackframe.f_code.co_firstlineno if lineno not in self.breaks[filename]: return False # flag says ok to delete temporary breakpoints. breakpoint, flag = self.effective(filename, lineno, stackframe) if breakpoint: self.currentbp = breakpoint.number if flag and breakpoint.temporary: self.do_clear(breakpoint.number) return True else: return False def trace_dispatch(self, stackframe, event, arg): """ Trace function that does some preliminary checks and then defers to the event handler for each type of event. """ if self.quitting: # We were told to quit by the user, bubble this up to their code. return if self.skip_fn(stackframe.f_code.co_filename): # We want to skip this, don't stop but keep tracing. return self.trace_dispatch try: return self.super_.trace_dispatch(stackframe, event, arg) except BdbQuit: raise QdbQuit() # Rewrap as a QdbError object. def user_call(self, stackframe, arg): if self.break_here(stackframe): self.user_line(stackframe) def user_line(self, stackframe): self.setup_stack(stackframe, None) self.cmd_manager.send_watchlist() self.cmd_manager.send_stack() self.cmd_manager.next_command() def user_return(self, stackframe, return_value): stackframe.f_locals['__return__'] = return_value self.setup_stack(stackframe, None) self.cmd_manager.send_watchlist() self.cmd_manager.send_stack() msg = fmt_msg('return', str(return_value), serial=pickle.dumps) self.cmd_manager.next_command(msg) def user_exception(self, stackframe, exc_info): exc_type, exc_value, exc_traceback = exc_info stackframe.f_locals['__exception__'] = exc_type, exc_value self.setup_stack(stackframe, exc_traceback) self.cmd_manager.send_watchlist() self.cmd_manager.send_stack() msg = fmt_msg( 'exception', { 'type': exc_type.__name__, 'value': str(exc_value), 'traceback': traceback.format_tb(exc_traceback) }, serial=pickle.dumps, ) self.cmd_manager.next_command(msg) def do_clear(self, bpnum): """ Handles deletion of temporary breakpoints. """ if not (0 <= bpnum < len(Breakpoint.bpbynumber)): return self.clear_bpbynumber(bpnum) def set_quit(self): """ Sets the quitting state and restores the program state. """ self.quitting = True def disable(self, mode='soft'): """ Stops tracing. """ try: if mode == 'soft': self.clear_all_breaks() self.set_continue() # Remove this instance so that new ones may be created. self.__class__._instance = None elif mode == 'hard': sys.exit(1) else: raise ValueError("mode must be 'hard' or 'soft'") finally: self.restore_output_streams() if self.log_handler: self.log_handler.pop_application() self.cmd_manager.stop() if sys.gettrace() is self.trace_dispatch: sys.settrace(None) def __enter__(self): self.set_trace(sys._getframe().f_back, stop=False) return self def __exit__(self, type, value, traceback): self.disable('soft') def set_trace(self, stackframe=None, stop=True): """ Starts debugging in stackframe or in the callers frame. If stop is True, begin stepping from here, otherwise, wait for the first breakpoint or exception. """ # We need to look back 1 frame to get our caller. stackframe = stackframe or sys._getframe().f_back self.reset() while stackframe: stackframe.f_trace = self.trace_dispatch self.botframe = stackframe stackframe = stackframe.f_back if stop: self.set_step() else: self.set_continue() sys.settrace(self.trace_dispatch) @contextmanager def inject_default_namespace(self, stackframe=None): """ Adds the default namespace to the frame, or if no frame is provided, self.curframe is used. """ stackframe = stackframe or self.curframe to_remove = set() for k, v in self.default_namespace.iteritems(): if k not in stackframe.f_globals: # Only add the default things if the name is unbound. stackframe.f_globals[k] = v to_remove.add(k) try: yield stackframe finally: for k in to_remove: try: del stackframe.f_globals[k] except IndexError: # The body of this manager might have del'd this. pass # Prevent exceptions from generating ref cycles. del stackframe
) try: mail_handler.deliver(mimetext, "*****@*****.**") except: pass except Exception, e: import traceback error_msg = "".join( ("Error while harvesting: type-> ", str(type(e)), " TRACE:\n" + str(traceback.format_exc())) ) logger.error(error_msg) harvester.update_ingest_doc("error", error_msg=error_msg, items=num_recs) raise e if my_log_handler: my_log_handler.pop_application() if my_mail_handler: my_mail_handler.pop_application() return ingest_doc_id, num_recs, harvester.dir_save, harvester __all__ = (Fetcher, NoRecordsFetchedException, HARVEST_TYPES) if __name__ == "__main__": args = parse_args() main(args.user_email, args.url_api_collection) # Copyright © 2016, Regents of the University of California # All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met:
class Qdb(Bdb, object): """ The Quantopian Remote Debugger. """ _instance = None def __new__(cls, *args, **kwargs): """ Qdb objects are singletons that persist until their disable method is called. """ if not cls._instance: cls._instance = super(Qdb, cls).__new__(cls) cls._instance._init(*args, **kwargs) return cls._instance def __init__(self, *args, **kwargs): pass def _init(self, config=None, merge=False, **kwargs): """ See qdb.config for more information about the configuration of qdb. merge denotes how config and kwargs should be merged. QdbConfig.kwargs_first says config will trample kwargs, QdbConfig.config_first says kwargs will trample config. Otherwise, kwargs and config cannot both be passed. """ self.super_ = super(Qdb, self) self.super_.__init__() self.reset() if config and kwargs: if merge == QdbConfig.kwargs_first: first = kwargs second = config elif merge == QdbConfig.config_first: first = config second = kwargs else: raise TypeError('Cannot pass config and kwargs') config = first.merge(second) else: config = QdbConfig.get_config(config or kwargs) self.address = config.host, config.port self.set_default_file(config.default_file) self.default_namespace = config.default_namespace or {} self.exception_serializer = config.exception_serializer or \ default_exception_serializer self.eval_fn = config.eval_fn or default_eval_fn self._file_cache = {} self.retry_attepts = config.retry_attepts self.repr_fn = config.repr_fn self._skip_fn = config.skip_fn or (lambda _: False) self.pause_signal = config.pause_signal \ if config.pause_signal else signal.SIGUSR2 self.uuid = str(config.uuid or uuid4()) self.watchlist = {} self.execution_timeout = config.execution_timeout self.reset() self.log_handler = None if config.log_file: self.log_handler = FileHandler(config.log_file) self.log_handler.push_application() self.bound_cmd_manager = config.cmd_manager or TerminalCommandManager() self.bound_cmd_manager.start(config.auth_msg) # We need to be able to send stdout back to the user debugging the # program. We hold a handle to this in case the program resets stdout. self._old_stdout = sys.stdout self._old_stderr = sys.stderr self.redirect_output = ( config.redirect_output and not isinstance(self.cmd_manager, TerminalCommandManager) ) if self.redirect_output: sys.stdout = OutputTee( sys.stdout, RemoteOutput(self.cmd_manager, '<stdout>'), ) sys.stderr = OutputTee( sys.stderr, RemoteOutput(self.cmd_manager, '<stderr>'), ) @property def bound_cmd_manager(self): return self.__cmd_manager @bound_cmd_manager.setter def bound_cmd_manager(self, value): self.cmd_manager = value self.__cmd_manager = BoundCmdManager(self, value) def skip_fn(self, path): return self._skip_fn(self.canonic(path)) def restore_output_streams(self): """ Restores the original output streams. """ if self.redirect_output: sys.stdout = self._old_stdout sys.stderr = self._old_stderr def _new_execution_timeout(self, src): """ Return a new execution timeout context manager. If not execution timeout is in place, returns ExitStack() """ # We use no_gevent=True because this could be cpu bound. This will # still throw to the proper greenlet if this is gevented. return ( Timeout( self.execution_timeout, QdbExecutionTimeout(src, self.execution_timeout), no_gevent=True, ) if self.execution_timeout else ExitStack() ) def set_default_file(self, filename): """ Safely sets the new default file. """ self.default_file = self.canonic(filename) if filename else None def get_line(self, filename, line): """ Checks for any user cached files before deferring to the linecache. """ # The line - 1 is so that querying line 1 gives us the first line in # the file. try: return self.get_file_lines(filename)[line - 1] except IndexError: return 'No source available for this line.' def get_file(self, filename): """ Retrieves a file out of cache or opens and caches it. """ return '\n'.join(self.get_file_lines(filename)) def get_file_lines(self, filename): """ Retrieves the file from the file cache as a list of lines. If the file does not exist in the cache, it is cached from disk. """ canonic_name = self.canonic(filename) try: return self._file_cache[canonic_name] except KeyError: if not self.cache_file(canonic_name): return [] return self._file_cache.get(canonic_name) def cache_file(self, filename, contents=None): """ Caches filename from disk into memory. This overrides whatever was cached for filename previously. If contents is provided, it allows the user to cache a filename to a string. Returns True if the file caching succeeded, otherwise returns false. """ canonic_name = self.canonic(filename) if contents: self._file_cache[canonic_name] = contents.splitlines() return True try: with open(canonic_name, 'r') as f: self._file_cache[canonic_name] = f.read().splitlines() return True except IOError: # The caching operation failed. return False def set_break(self, filename, lineno, temporary=False, cond=None, funcname=None, **kwargs): """ Sets a breakpoint. This is overridden to account for the filecache and for unreachable lines. **kwargs are ignored. This is to work with payloads that pass extra fields to the set_break payload. """ filename = self.canonic(filename) if filename else self.default_file try: self.get_line(filename, lineno) except IndexError: raise QdbUnreachableBreakpoint({ 'file': filename, 'line': lineno, 'temp': temporary, 'cond': cond, 'func': funcname, }) blist = self.breaks.setdefault(filename, []) if lineno not in blist: blist.append(lineno) Breakpoint(filename, lineno, temporary, cond, funcname) def clear_break(self, filename, lineno, *args, **kwargs): """ Wrapper to make the breakpoint json standardized for setting and removing of breakpoints. This means that the same json data that was used to set a break point may be fed into this function with the extra values ignored. """ self.super_.clear_break(filename, lineno) def canonic(self, filename): canonic_filename = self.super_.canonic(filename) if canonic_filename.endswith('pyc'): return canonic_filename[:-1] return canonic_filename def reset(self): self.botframe = None self._set_stopinfo(None, None) self.forget() def forget(self): self.lineno = None self.stack = [] self.curindex = 0 self.curframe = None def setup_stack(self, stackframe, traceback): """ Sets up the state of the debugger object for this frame. """ self.forget() self.stack, self.curindex = self.get_stack(stackframe, traceback) self.curframe = self.stack[self.curindex][0] self.curframe_locals = self.curframe.f_locals self.update_watchlist() def extend_watchlist(self, *args): """ Adds every arg to the watchlist and updates. """ for expr in args: self.watchlist[expr] = (False, '') self.update_watchlist() def update_watchlist(self): """ Updates the watchlist by evaluating all the watched expressions in our current frame. """ id_ = lambda n: n # Why is this NOT a builtin? for expr in self.watchlist: try: with self._new_execution_timeout(expr), \ self.inject_default_namespace() as stackframe: self.watchlist[expr] = ( None, (self.repr_fn or id_)( self.eval_fn(expr, stackframe) ) ) except Exception as e: self.watchlist[expr] = ( type(e).__name__, self.exception_serializer(e) ) def effective(self, file, line, stackframe): """ Finds the effective breakpoint for this line; called only when we know that there is a breakpoint here. returns the breakpoint paired with a flag denoting if we should remove this breakpoint or not. """ for breakpoint in Breakpoint.bplist[file, line]: if breakpoint.enabled == 0: continue if not checkfuncname(breakpoint, stackframe): continue # Count every hit when breakpoint is enabled breakpoint.hits = breakpoint.hits + 1 if not breakpoint.cond: # If unconditional, and ignoring go on to next, else break if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 continue else: return breakpoint, True else: # Conditional breakpoint # Ignore count applies only to those bpt hits where the # condition evaluates to true. try: with self._new_execution_timeout(breakpoint.cond), \ self.inject_default_namespace(stackframe) as frame: val = self.eval_fn( breakpoint.cond, frame, 'eval' ) except Exception as e: # Send back a message to let the user know there was an # issue with their breakpoint. self.cmd_manager.send_error( 'condition', { 'cond': breakpoint.cond, 'line': line, 'exc': type(e).__name__, 'output': self.exception_serializer(e), } ) # Return this breakpoint to be safe. The user will be # stopped here so that they can fix the breakpoint. return breakpoint, False if val: if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 else: return breakpoint, True return None, False def break_here(self, stackframe): """ Checks if we should break execution in this stackframe. This function handles the cleanup and ignore counts for breakpoints. Returns True iff we should stop in the stackframe, False otherwise. """ filename = self.canonic(stackframe.f_code.co_filename) if filename not in self.breaks: return False lineno = stackframe.f_lineno if lineno not in self.breaks[filename]: # The line itself has no breakpoint, but maybe the line is the # first line of a function with breakpoint set by function name. lineno = stackframe.f_code.co_firstlineno if lineno not in self.breaks[filename]: return False # flag says ok to delete temporary breakpoints. breakpoint, flag = self.effective(filename, lineno, stackframe) if breakpoint: self.currentbp = breakpoint.number if flag and breakpoint.temporary: self.do_clear(breakpoint.number) return True else: return False def trace_dispatch(self, stackframe, event, arg): """ Trace function that does some preliminary checks and then defers to the event handler for each type of event. """ if self.quitting: # We were told to quit by the user, bubble this up to their code. return if self.skip_fn(stackframe.f_code.co_filename): # We want to skip this, don't stop but keep tracing. return self.trace_dispatch try: return self.super_.trace_dispatch(stackframe, event, arg) except BdbQuit: raise QdbQuit() # Rewrap as a QdbError object. def user_call(self, stackframe, arg): if self.break_here(stackframe): self.user_line(stackframe) def user_line(self, stackframe): self.setup_stack(stackframe, None) bound_cmd_manager = self.bound_cmd_manager bound_cmd_manager.send_watchlist() bound_cmd_manager.send_stack() bound_cmd_manager.next_command() def user_return(self, stackframe, return_value): stackframe.f_locals['__return__'] = return_value self.setup_stack(stackframe, None) bound_cmd_manager = self.bound_cmd_manager bound_cmd_manager.send_watchlist() bound_cmd_manager.send_stack() bound_cmd_manager.next_command( fmt_msg('return', str(return_value), serial=json.dumps), ) def user_exception(self, stackframe, exc_info): exc_type, exc_value, exc_traceback = exc_info stackframe.f_locals['__exception__'] = exc_type, exc_value self.setup_stack(stackframe, exc_traceback) bound_cmd_manager = self.bound_cmd_manager bound_cmd_manager.send_watchlist() bound_cmd_manager.send_stack() msg = fmt_msg( 'exception', { 'type': exc_type.__name__, 'value': str(exc_value), 'traceback': traceback.format_tb(exc_traceback) }, serial=json.dumps, ) return self.bound_cmd_manager.next_command(msg) def do_clear(self, bpnum): """ Handles deletion of temporary breakpoints. """ if not (0 <= bpnum < len(Breakpoint.bpbynumber)): return self.clear_bpbynumber(bpnum) def set_quit(self): """ Sets the quitting state and restores the program state. """ self.quitting = True def eval_(self, code, pprint=False): repr_fn = self.repr_fn outexc = None outmsg = None with capture_output() as (out, err), \ self._new_execution_timeout(code), \ self.inject_default_namespace() as stackframe: try: if not repr_fn and not pprint: self.eval_fn( code, stackframe, 'single', ) else: try: # Do some some custom single mode magic that lets us # call the repr function on the last expr. value = progn( code, self.eval_fn, stackframe, ) except QdbPrognEndsInStatement: # Statements have no value to print. pass else: if pprint: value = pformat(value) if repr_fn: value = repr_fn(value) print(value) except Exception as e: outexc = type(e).__name__ outmsg = self.exception_serializer(e) else: outmsg = out.getvalue().rstrip('\n') if outexc is not None or outmsg is not None: self.cmd_manager.send_print(code, outexc, outmsg) self.update_watchlist() def _stack_jump_to(self, index): """ Jumps the stack to a specific index. Raises an IndexError if the desired index does not exist. """ # Try to jump here first. This could raise an IndexError which will # prevent the tracer's state from being corrupted. self.curframe = self.stack[index][0] self.curindex = index self.curframe_locals = self.curframe.f_locals self.update_watchlist() def stack_shift_direction(self, direction): """ Shifts the stack up or down depending on direction. If direction is positive, travel up, if direction is negative, travel down. If direction is 0, do nothing. If you cannot shift in the desired direction, an IndexError will be raised. """ if direction == 0: return # nop stride = -1 if direction > 0 else 1 stack = self.stack stacksize = len(stack) curindex = self.curindex skip_fn = self.skip_fn target = None def pred_up(idx): return idx > 0 def pred_down(idx): return idx < stacksize - 1 pred = pred_up if direction > 0 else pred_down while pred(curindex): curindex += stride if not skip_fn(stack[curindex][0].f_code.co_filename): target = curindex break if target is None: raise IndexError('Shifted off the stack') self._stack_jump_to(target) def disable(self, mode='soft'): """ Stops tracing. """ try: if mode == 'soft': self.clear_all_breaks() self.set_continue() # Remove this instance so that new ones may be created. self.__class__._instance = None elif mode == 'hard': sys.exit(1) else: raise ValueError("mode must be 'hard' or 'soft'") finally: self.restore_output_streams() if self.log_handler: self.log_handler.pop_application() self.cmd_manager.stop() if sys.gettrace() is self.trace_dispatch: sys.settrace(None) def __enter__(self): self.set_trace(sys._getframe().f_back, stop=False) return self def __exit__(self, type, value, traceback): self.disable('soft') def set_trace(self, stackframe=None, stop=True): """ Starts debugging in stackframe or in the callers frame. If stop is True, begin stepping from here, otherwise, wait for the first breakpoint or exception. """ # We need to look back 1 frame to get our caller. stackframe = stackframe or sys._getframe().f_back self.reset() while stackframe: stackframe.f_trace = self.trace_dispatch self.botframe = stackframe stackframe = stackframe.f_back if stop: self.set_step() else: self.set_continue() sys.settrace(self.trace_dispatch) @contextmanager def inject_default_namespace(self, stackframe=None): """ Adds the default namespace to the frame, or if no frame is provided, self.curframe is used. """ stackframe = stackframe or self.curframe to_remove = set() for k, v in items(self.default_namespace): if k not in stackframe.f_globals: # Only add the default things if the name is unbound. stackframe.f_globals[k] = v to_remove.add(k) try: yield stackframe finally: for k in to_remove: try: del stackframe.f_globals[k] except IndexError: # The body of this manager might have del'd this. pass # Prevent exceptions from generating ref cycles. del stackframe
def main(user_email, url_api_collection, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file=None, **kwargs): '''Executes a harvest with given parameters. Returns the ingest_doc_id, directory harvest saved to and number of records. ''' if not config_file: config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini') num_recs = -1 my_mail_handler = None if not mail_handler: my_mail_handler = logbook.MailHandler( EMAIL_RETURN_ADDRESS, user_email, level='ERROR', bubble=True) my_mail_handler.push_application() mail_handler = my_mail_handler try: collection = Collection(url_api_collection) except Exception as e: msg = 'Exception in Collection {}, init {}'.format(url_api_collection, str(e)) logbook.error(msg) raise e if not (collection['harvest_type'] in HARVEST_TYPES): msg = 'Collection {} wrong type {} for harvesting. Harvest type {} \ is not in {}'.format(url_api_collection, collection['harvest_type'], collection['harvest_type'], HARVEST_TYPES.keys()) logbook.error(msg) raise ValueError(msg) mail_handler.subject = "Error during harvest of " + collection.url my_log_handler = None if not log_handler: # can't init until have collection my_log_handler = FileHandler(get_log_file_path(collection.slug)) my_log_handler.push_application() logger = logbook.Logger('HarvestMain') msg = 'Init harvester next. Collection:{}'.format(collection.url) logger.info(msg) # email directly mimetext = create_mimetext_msg(EMAIL_RETURN_ADDRESS, user_email, ' '.join( ('Starting harvest for ', collection.slug)), msg) try: # TODO: request more emails from AWS mail_handler.deliver(mimetext, '*****@*****.**') except: pass logger.info('Create DPLA profile document') if not profile_path: profile_path = os.path.abspath( os.path.join(dir_profile, collection.id + '.pjs')) with codecs.open(profile_path, 'w', 'utf8') as pfoo: pfoo.write(collection.dpla_profile) logger.info('DPLA profile document : ' + profile_path) harvester = None try: harvester = HarvestController( user_email, collection, profile_path=profile_path, config_file=config_file, **kwargs) except Exception as e: import traceback msg = 'Exception in harvester init: type: {} TRACE:\n{}'.format( type(e), traceback.format_exc()) logger.error(msg) raise e logger.info('Create ingest doc in couch') ingest_doc_id = harvester.create_ingest_doc() logger.info('Ingest DOC ID: ' + ingest_doc_id) logger.info('Start harvesting next') try: num_recs = harvester.harvest() msg = ''.join(('Finished harvest of ', collection.slug, '. ', str(num_recs), ' records harvested.')) harvester.update_ingest_doc('complete', items=num_recs, num_coll=1) logger.info(msg) # email directly mimetext = create_mimetext_msg( EMAIL_RETURN_ADDRESS, user_email, ' '.join( ('Finished harvest of raw records ' 'for ', collection.slug, ' enriching next')), msg) try: mail_handler.deliver(mimetext, '*****@*****.**') except: pass except Exception as e: import traceback error_msg = ''.join(("Error while harvesting: type-> ", str(type(e)), " TRACE:\n" + str(traceback.format_exc()))) logger.error(error_msg) harvester.update_ingest_doc( 'error', error_msg=error_msg, items=num_recs) raise e if my_log_handler: my_log_handler.pop_application() if my_mail_handler: my_mail_handler.pop_application() return ingest_doc_id, num_recs, harvester.dir_save, harvester
def main(user_email, url_api_collection, log_handler=None, mail_handler=None, dir_profile='profiles', profile_path=None, config_file=None, **kwargs): '''Executes a harvest with given parameters. Returns the ingest_doc_id, directory harvest saved to and number of records. ''' if not config_file: config_file = os.environ.get('DPLA_CONFIG_FILE', 'akara.ini') num_recs = -1 my_mail_handler = None if not mail_handler: my_mail_handler = logbook.MailHandler(EMAIL_RETURN_ADDRESS, user_email, level='ERROR', bubble=True) my_mail_handler.push_application() mail_handler = my_mail_handler try: collection = Collection(url_api_collection) except Exception as e: msg = 'Exception in Collection {}, init {}'.format( url_api_collection, str(e)) logbook.error(msg) raise e if not (collection['harvest_type'] in HARVEST_TYPES): msg = 'Collection {} wrong type {} for harvesting. Harvest type {} \ is not in {}'.format(url_api_collection, collection['harvest_type'], collection['harvest_type'], HARVEST_TYPES.keys()) logbook.error(msg) raise ValueError(msg) mail_handler.subject = "Error during harvest of " + collection.url my_log_handler = None if not log_handler: # can't init until have collection my_log_handler = FileHandler(get_log_file_path(collection.slug)) my_log_handler.push_application() logger = logbook.Logger('HarvestMain') msg = 'Init harvester next. Collection:{}'.format(collection.url) logger.info(msg) # email directly mimetext = create_mimetext_msg( EMAIL_RETURN_ADDRESS, user_email, ' '.join( ('Starting harvest for ', collection.slug)), msg) try: # TODO: request more emails from AWS mail_handler.deliver(mimetext, '*****@*****.**') except: pass logger.info('Create DPLA profile document') if not profile_path: profile_path = os.path.abspath( os.path.join(dir_profile, collection.id + '.pjs')) with codecs.open(profile_path, 'w', 'utf8') as pfoo: pfoo.write(collection.dpla_profile) logger.info('DPLA profile document : ' + profile_path) harvester = None try: harvester = HarvestController(user_email, collection, profile_path=profile_path, config_file=config_file, **kwargs) except Exception as e: import traceback msg = 'Exception in harvester init: type: {} TRACE:\n{}'.format( type(e), traceback.format_exc()) logger.error(msg) raise e logger.info('Create ingest doc in couch') ingest_doc_id = harvester.create_ingest_doc() logger.info('Ingest DOC ID: ' + ingest_doc_id) logger.info('Start harvesting next') num_recs = harvester.harvest() msg = ''.join(('Finished harvest of ', collection.slug, '. ', str(num_recs), ' records harvested.')) logger.info(msg) logger.debug('-- get a new harvester --') harvester = HarvestController(user_email, collection, profile_path=profile_path, config_file=config_file, **kwargs) harvester.ingest_doc_id = ingest_doc_id harvester.couch = dplaingestion.couch.Couch( config_file=harvester.config_file, dpla_db_name=harvester.couch_db_name, dashboard_db_name=harvester.couch_dashboard_name) harvester.ingestion_doc = harvester.couch.dashboard_db[ingest_doc_id] try: harvester.update_ingest_doc('complete', items=num_recs, num_coll=1) logger.debug('updated ingest doc!') except Exception as e: import traceback error_msg = ''.join(("Error while harvesting: type-> ", str(type(e)), " TRACE:\n" + str(traceback.format_exc()))) logger.error(error_msg) harvester.update_ingest_doc('error', error_msg=error_msg, items=num_recs) raise e if my_log_handler: my_log_handler.pop_application() if my_mail_handler: my_mail_handler.pop_application() return ingest_doc_id, num_recs, harvester.dir_save, harvester
class Qdb(Bdb, object): """ The Quantopian Remote Debugger. """ _instance = None def __new__(cls, *args, **kwargs): """ Qdb objects are singletons that persist until their disable method is called. """ if not cls._instance: cls._instance = super(Qdb, cls).__new__(cls, *args, **kwargs) return cls._instance def __init__(self, host='localhost', port=8001, auth_msg='', default_file=None, default_namespace=None, eval_fn=None, exception_serializer=None, skip_fn=None, pause_signal=None, redirect_output=True, retry_attepts=10, uuid=None, cmd_manager=None, green=False, repr_fn=None, log_file=None, execution_timeout=None): """ Host and port define the address to connect to. The auth_msg is a message that will be sent with the start event to the server. This can be used to do server/tracer authentication. The default_file is a file to use if the file field is ommited from payloads. eval_fn is the function to eval code where the user may provide it, for example in a conditional breakpoint, or in the repl. skip_fn is simmilar to the skip list feature of Bdb, except that it should be a function that takes a filename and returns True iff the debugger should skip this file. These files will be suppressed from stack traces. The pause_signal is signal to raise in this program to trigger a pause command. If this is none, this will default to SIGUSR2. retry_attempts is the number of times to attempt to connect to the server before raising a QdbFailedToConnect error. The repr_fn is a function to use to convert objects to strings to send then back to the server. By default, this wraps repr by catching exceptions and reporting them to the user. The uuid is the identifier on the server for this session. If none is provided, it will generate a uuid4. cmd_manager should be a callable that takes a Qdb instance and manages commands by implementing a next_command method. If none, a new, default manager will be created that reads commands from the server at (host, port). If green is True, this will use gevent safe timeouts, otherwise this will use signal based timeouts. repr_fn is the repr function to use when displaying results. If None, use the builtin repr. execution_timeout is the amount of time user code has to execute before being cut short. This is applied to the repl, watchlist and conditional breakpoints. If None, no timeout is applied. """ super(Qdb, self).__init__() self.address = host, port self.set_default_file(default_file) self.default_namespace = default_namespace or {} self.exception_serializer = exception_serializer or \ default_exception_serializer self.eval_fn = eval_fn or default_eval_fn self.green = green self._file_cache = {} self.redirect_output = redirect_output self.retry_attepts = retry_attepts self.repr_fn = repr_fn self.skip_fn = skip_fn or (lambda _: False) self.pause_signal = pause_signal if pause_signal else signal.SIGUSR2 self.uuid = str(uuid or uuid4()) self.watchlist = {} self.execution_timeout = execution_timeout # We need to be able to send stdout back to the user debugging the # program. We hold a handle to this in case the program resets stdout. if self.redirect_output: self._old_stdout = sys.stdout self._old_stderr = sys.stderr self.stdout = StringIO() self.stderr = StringIO() sys.stdout = self.stdout sys.stderr = self.stderr self.forget() self.log_handler = None if log_file: self.log_handler = FileHandler(log_file) self.log_handler.push_application() if not cmd_manager: cmd_manager = RemoteCommandManager self.cmd_manager = cmd_manager(self) self.cmd_manager.start(auth_msg) def clear_output_buffers(self): """ Clears the output buffers. """ self.stdout.close() self.stderr.close() self.stdout = StringIO() self.stderr = StringIO() sys.stdout = self.stdout sys.stderr = self.stderr def restore_output_streams(self): """ Restores the original output streams. """ if self.redirect_output: sys.stdout = self._old_stdout sys.stderr = self._old_stderr def _new_execution_timeout(self, src): """ Return a new execution timeout context manager. If not execution timeout is in place, returns ExitStack() """ # We use green=False because this could be cpu bound. This will # still throw to the proper greenlet if this is gevented. return (Timeout(self.execution_timeout, QdbExecutionTimeout(src, self.execution_timeout), green=False) if self.execution_timeout else ExitStack()) def set_default_file(self, filename): """ Safely sets the new default file. """ self.default_file = self.canonic(filename) if filename else None def get_line(self, filename, line): """ Checks for any user cached files before deferring to the linecache. """ # The line - 1 is so that querying line 1 gives us the first line in # the file. try: return self._get_file_lines(filename)[line - 1] except IndexError: return 'No source available for this line.' def get_file(self, filename): """ Retrieves a file out of cache or opens and caches it. """ return '\n'.join(self._get_file_lines(filename)) def _get_file_lines(self, filename): """ Retrieves the file from the file cache as a list of lines. If the file does not exist in the cache, it is cached from disk. """ canonic_name = self.canonic(filename) try: return self._file_cache[canonic_name] except KeyError: if not self.cache_file(canonic_name): return [] return self._file_cache.get(canonic_name) def cache_file(self, filename, contents=None): """ Caches filename from disk into memory. This overrides whatever was cached for filename previously. If contents is provided, it allows the user to cache a filename to a string. Returns True if the file caching succeeded, otherwise returns false. """ canonic_name = self.canonic(filename) if contents: self._file_cache[canonic_name] = contents.splitlines() return True try: with open(canonic_name, 'r') as f: self._file_cache[canonic_name] = map( lambda l: l[:-1] if l.endswith('\n') else l, f.readlines()) return True except IOError: # The caching operation failed. return False def set_break(self, filename, lineno, temporary=False, cond=None, funcname=None, **kwargs): """ Sets a breakpoint. This is overridden to account for the filecache and for unreachable lines. **kwargs are ignored. This is to work with payloads that pass extra fields to the set_break payload. """ filename = self.canonic(filename) if filename else self.default_file try: self.get_line(filename, lineno) except IndexError: raise QdbUnreachableBreakpoint({ 'file': filename, 'line': lineno, 'temp': temporary, 'cond': cond, 'func': funcname, }) blist = self.breaks.setdefault(filename, []) if lineno not in blist: blist.append(lineno) Breakpoint(filename, lineno, temporary, cond, funcname) def clear_break(self, filename, lineno, *args, **kwargs): """ Wrapper to make the breakpoint json standardized for setting and removing of breakpoints. This means that the same json data that was used to set a break point may be fed into this function with the extra values ignored. """ super(Qdb, self).clear_break(filename, lineno) def canonic(self, filename): canonic_filename = super(Qdb, self).canonic(filename) if canonic_filename.endswith('pyc'): return canonic_filename[:-1] return canonic_filename def reset(self): self.botframe = None self._set_stopinfo(None, None) self.forget() def forget(self): self.lineno = None self.stack = [] self.curindex = 0 self.curframe = None def setup_stack(self, stackframe, traceback): """ Sets up the state of the debugger object for this frame. """ self.forget() self.stack, self.curindex = self.get_stack(stackframe, traceback) self.curframe = self.stack[self.curindex][0] self.curframe_locals = self.curframe.f_locals self.update_watchlist() def extend_watchlist(self, *args): """ Adds every arg to the watchlist and updates. """ for expr in args: self.watchlist[expr] = (False, '') self.update_watchlist() def update_watchlist(self): """ Updates the watchlist by evaluating all the watched expressions in our current frame. """ id_ = lambda n: n # Why is this NOT a builtin? for expr in self.watchlist: try: with self._new_execution_timeout(expr), \ self.inject_default_namespace() as stackframe: self.watchlist[expr] = (None, (self.repr_fn or id_)(self.eval_fn( expr, stackframe))) except Exception as e: self.watchlist[expr] = (type(e).__name__, self.exception_serializer(e)) def effective(self, file, line, stackframe): """ Finds the effective breakpoint for this line; called only when we know that there is a breakpoint here. returns the breakpoint paired with a flag denoting if we should remove this breakpoint or not. """ for breakpoint in Breakpoint.bplist[file, line]: if breakpoint.enabled == 0: continue if not checkfuncname(breakpoint, stackframe): continue # Count every hit when breakpoint is enabled breakpoint.hits = breakpoint.hits + 1 if not breakpoint.cond: # If unconditional, and ignoring go on to next, else break if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 continue else: return breakpoint, True else: # Conditional breakpoint # Ignore count applies only to those bpt hits where the # condition evaluates to true. try: with self._new_execution_timeout(breakpoint.cond), \ self.inject_default_namespace(stackframe) as frame: val = self.eval_fn(breakpoint.cond, frame, 'eval') except Exception as e: # Send back a message to let the user know there was an # issue with their breakpoint. self.cmd_manager.send_error( 'condition', { 'cond': breakpoint.cond, 'line': line, 'exc': type(e).__name__, 'output': self.exception_serializer(e), }) # Return this breakpoint to be safe. The user will be # stopped here so that they can fix the breakpoint. return breakpoint, False if val: if breakpoint.ignore > 0: breakpoint.ignore = breakpoint.ignore - 1 else: return breakpoint, True return None, False def break_here(self, stackframe): """ Checks if we should break execution in this stackframe. This function handles the cleanup and ignore counts for breakpoints. Returns True iff we should stop in the stackframe, False otherwise. """ filename = self.canonic(stackframe.f_code.co_filename) if filename not in self.breaks: return False lineno = stackframe.f_lineno if lineno not in self.breaks[filename]: # The line itself has no breakpoint, but maybe the line is the # first line of a function with breakpoint set by function name. lineno = stackframe.f_code.co_firstlineno if lineno not in self.breaks[filename]: return False # flag says ok to delete temporary breakpoints. breakpoint, flag = self.effective(filename, lineno, stackframe) if breakpoint: self.currentbp = breakpoint.number if flag and breakpoint.temporary: self.do_clear(breakpoint.number) return True else: return False def trace_dispatch(self, stackframe, event, arg): """ Trace function that does some preliminary checks and then defers to the event handler for each type of event. """ if self.quitting: # We were told to quit by the user, bubble this up to their code. return if self.skip_fn(self.canonic(stackframe.f_code.co_filename)): # We want to skip this, don't stop but keep tracing. return self.trace_dispatch try: return super(Qdb, self).trace_dispatch(stackframe, event, arg) except BdbQuit: raise QdbQuit() # Rewrap as a QdbError object. def user_call(self, stackframe, arg): if self.break_here(stackframe): self.user_line(stackframe) def user_line(self, stackframe): self.setup_stack(stackframe, None) self.cmd_manager.send_watchlist() self.cmd_manager.send_output() self.cmd_manager.send_stack() self.cmd_manager.next_command() def user_return(self, stackframe, return_value): stackframe.f_locals['__return__'] = return_value self.setup_stack(stackframe, None) self.cmd_manager.send_watchlist() self.cmd_manager.send_output() self.cmd_manager.send_stack() msg = fmt_msg('return', str(return_value), serial=pickle.dumps) self.cmd_manager.next_command(msg) def user_exception(self, stackframe, exc_info): exc_type, exc_value, exc_traceback = exc_info stackframe.f_locals['__exception__'] = exc_type, exc_value self.setup_stack(stackframe, exc_traceback) self.cmd_manager.send_watchlist() self.cmd_manager.send_output() self.cmd_manager.send_stack() msg = fmt_msg( 'exception', { 'type': str(exc_type), 'value': str(exc_value), 'traceback': traceback.format_tb(exc_traceback) }, serial=pickle.dumps, ) self.cmd_manager.next_command(msg) def do_clear(self, bpnum): """ Handles deletion of temporary breakpoints. """ if not (0 <= bpnum < len(Breakpoint.bpbynumber)): return self.clear_bpbynumber(bpnum) def set_quit(self): """ Sets the quitting state and restores the program state. """ self.quitting = True def disable(self, mode='soft'): """ Stops tracing. """ try: if mode == 'soft': self.clear_all_breaks() self.set_continue() # Remove this instance so that new ones may be created. self.__class__._instance = None elif mode == 'hard': sys.exit(1) else: raise ValueError("mode must be 'hard' or 'soft'") finally: self.restore_output_streams() if self.log_handler: self.log_handler.pop_application() self.cmd_manager.stop() def __enter__(self): self.set_trace(sys._getframe().f_back, stop=False) return self def __exit__(self, type, value, traceback): if isinstance(value, QdbQuit) or value is None: self.disable('soft') def set_trace(self, stackframe=None, stop=True): """ Starts debugging in stackframe or in the callers frame. If stop is True, begin stepping from here, otherwise, wait for the first breakpoint or exception. """ # We need to look back 1 frame to get our caller. stackframe = stackframe or sys._getframe().f_back self.reset() while stackframe: stackframe.f_trace = self.trace_dispatch self.botframe = stackframe stackframe = stackframe.f_back if stop: self.set_step() else: self.set_continue() sys.settrace(self.trace_dispatch) @contextmanager def inject_default_namespace(self, stackframe=None): """ Adds the default namespace to the frame, or if no frame is provided, self.curframe is used. """ stackframe = stackframe or self.curframe to_remove = set() for k, v in self.default_namespace.iteritems(): if k not in stackframe.f_globals: # Only add the default things if the name is unbound. stackframe.f_globals[k] = v to_remove.add(k) try: yield stackframe finally: for k in to_remove: try: del stackframe.f_globals[k] except IndexError: # The body of this manager might have del'd this. pass # Prevent exceptions from generating ref cycles. del stackframe