def tracking_predicate(value): start = monotonic_time() try: classification = predicate(value) except TimeoutExpired: classification = Classification.TIMEOUT end = monotonic_time() result.statistics[classification.name].record(runtime=end - start, size=len(value)) if classification == Classification.INTERESTING and ( result.best_example is None or len(value) <= len(result.best_example)): result.best_example = value return classification
def wait_child(config, pid, sigkill): if config.timeout: timeout = monotonic_time() + config.timeout kill = False status = os.waitpid(pid, os.WNOHANG) while status[0] == 0: dt = timeout - monotonic_time() if dt < 0: os.kill(pid, sigkill) status = os.waitpid(pid, 0) raise Timeout() if dt > 1.0: pause = 0.100 else: pause = 0.010 # TODO: handle SIGCHLD to avoid wasting time in polling time.sleep(pause) status = os.waitpid(pid, os.WNOHANG) else: status = os.waitpid(pid, 0) if status[0] != pid: raise Exception("got the status of the wrong process!") return status[1]
def finish(self): self.end_time = monotonic_time()
def start(self): self.start_time = monotonic_time()
def execute_subprocess(sandbox, code, globals, locals): config = sandbox.config input_filename = tempfile.mktemp() output_filename = tempfile.mktemp() args = ( sys.executable, # FIXME: use '-S' '-E', '-m', 'sandbox.subprocess_child', input_filename, output_filename, ) input_data = { 'code': code, 'config': config, 'locals': locals, 'globals': globals, } try: # serialize input data with open(input_filename, 'wb') as input_file: pickle.dump(input_data, input_file) if config.max_input_size: size = input_file.tell() if size > config.max_input_size: raise SandboxError("Input data are too big: %s bytes (max=%s)" % (size, config.max_input_size)) # create the subprocess process = subprocess.Popen(args, close_fds=True, shell=False) # wait process exit if config.timeout: timeout = monotonic_time() + config.timeout kill = False exitcode = process.poll() while exitcode is None: dt = timeout - monotonic_time() if dt < 0: process.terminate() exitcode = process.wait() raise Timeout() if dt > 1.0: pause = 0.5 else: pause = 0.1 # TODO: handle SIGCHLD to avoid wasting time in polling time.sleep(pause) exitcode = process.poll() else: exitcode = process.wait() os.unlink(input_filename) input_filename = None # handle child process error if exitcode: if os.name != "nt" and exitcode < 0: signum = -exitcode text = "subprocess killed by signal %s" % signum else: text = "subprocess failed with exit code %s" % exitcode raise SandboxError(text) with open(output_filename, 'rb') as output_file: if config.max_output_size: output_file.seek(0, 2) size = output_file.tell() output_file.seek(0) if size > config.max_output_size: raise SandboxError("Output data are too big: %s bytes (max=%s)" % (size, config.max_output_size)) output_data = pickle.load(output_file) os.unlink(output_filename) output_filename = None finally: temp_filenames = [] if input_filename is not None: temp_filenames.append(input_filename) if output_filename is not None: temp_filenames.append(output_filename) for filename in temp_filenames: try: os.unlink(filename) except OSError: pass if 'error' in output_data: raise output_data['error'] if locals is not None: locals.clear() locals.update(output_data['locals']) if globals is not None: globals.clear() globals.update(output_data['globals']) return output_data['result']
def execute_subprocess(sandbox, code, globals, locals): config = sandbox.config input_filename = tempfile.mktemp() output_filename = tempfile.mktemp() args = ( sys.executable, # FIXME: use '-S' '-E', '-m', 'sandbox.subprocess_child', input_filename, output_filename, ) input_data = { 'code': code, 'config': config, 'locals': locals, 'globals': globals, } try: # serialize input data with open(input_filename, 'wb') as input_file: pickle.dump(input_data, input_file) if config.max_input_size: size = input_file.tell() if size > config.max_input_size: raise SandboxError( "Input data are too big: %s bytes (max=%s)" % (size, config.max_input_size)) # create the subprocess process = subprocess.Popen(args, close_fds=True, shell=False) # wait process exit if config.timeout: timeout = monotonic_time() + config.timeout kill = False exitcode = process.poll() while exitcode is None: dt = timeout - monotonic_time() if dt < 0: process.terminate() exitcode = process.wait() raise Timeout() if dt > 1.0: pause = 0.5 else: pause = 0.1 # TODO: handle SIGCHLD to avoid wasting time in polling time.sleep(pause) exitcode = process.poll() else: exitcode = process.wait() os.unlink(input_filename) input_filename = None # handle child process error if exitcode: if os.name != "nt" and exitcode < 0: signum = -exitcode text = "subprocess killed by signal %s" % signum else: text = "subprocess failed with exit code %s" % exitcode raise SandboxError(text) with open(output_filename, 'rb') as output_file: if config.max_output_size: output_file.seek(0, 2) size = output_file.tell() output_file.seek(0) if size > config.max_output_size: raise SandboxError( "Output data are too big: %s bytes (max=%s)" % (size, config.max_output_size)) output_data = pickle.load(output_file) os.unlink(output_filename) output_filename = None finally: temp_filenames = [] if input_filename is not None: temp_filenames.append(input_filename) if output_filename is not None: temp_filenames.append(output_filename) for filename in temp_filenames: try: os.unlink(filename) except OSError: pass if 'error' in output_data: raise output_data['error'] if locals is not None: locals.clear() locals.update(output_data['locals']) if globals is not None: globals.clear() globals.update(output_data['globals']) return output_data['result']
def collect(keywords, max_seconds, max_tweets): """ Collect tweets from the Twitter firehose and output as TSV. """ if not credentials.exists(): raise ClickException("No credentials found, please authenticate first") # Take an iterator over the streaming results of the API request json_lines_iterator = requests.post( f"https://stream.twitter.com/1.1/statuses/filter.json?track={','.join(keywords)}", auth=credentials.load(), stream=True, ).iter_lines() # Read the streaming results on a separte thread because consuming the # iterator is a blocking operation and because we must be able to exit the # program when `max_seconds` have ellapsed. Results are put on the passed # queue, on which we can specify timeouts on read operations. tweets_queue = Queue() Thread( target=parse_json_tweets, args=(json_lines_iterator, tweets_queue), daemon=True, # Kill thread automatically when main thread exits ).start() # To prevent users from having to look at a blank screen for `max_seconds` # seconds, we show them a progressbar during the collection of the tweets. progress_bar = click.progressbar( length=max_tweets, label=f"Collecting max. {max_tweets} tweets for {max_seconds} seconds", show_pos=True, show_eta=False, # Write to stderr so that users can redirect/pipe the TSV results on # stdout to somewhere else without including the progressbar. file=sys.stderr, ) tweet_rows = [] started_at = monotonic_time() while len(tweet_rows) < max_tweets: try: # Block on reading from the queue until `max_seconds` have ellapsed # and we can stop aggregating entirely, or until we have a new tweet # to process. seconds_till_end = max( max_seconds - (monotonic_time() - started_at), 0) tweet = tweets_queue.get(timeout=seconds_till_end) # There are three types of system messages, as defined here: # https://developer.twitter.com/en/docs/tutorials/consuming-streaming-data.html#consuming-the-stream # In theory only error messages can occour while streaming from the # filter API, but for the sake of completeness we'll handle them # all, and as if they are errors. for system_message_type in ["error", "warn", "info"]: if system_message_type in tweet: raise ClickException(tweet[system_message_type]["message"]) tweet_rows.append(tweet_to_row(tweet)) progress_bar.update(1) except QueueEmptyError: break # `max_seconds` have ellapsed, stop collecting if not tweet_rows: print("No tweets collected", file=sys.stderr) return tweet_rows = sorted( # Ascending by default tweet_rows, key=lambda tweet_row: ( tweet_row["author_created_at"], # Users sorted chronologicly tweet_row["author_id"], # Grouped by user tweet_row["tweet_created_at"], # Tweets sorted chronologicly ), ) writer = csv.DictWriter(sys.stdout, fieldnames=tweet_rows[0].keys(), dialect="excel-tab") writer.writeheader() writer.writerows(tweet_rows)
for file in opts.files: if file == '-': file = sys.stdin else: if not hasattr(file, 'read'): try: file = open(file) except IOError as e: sys.stderr.write("Could not open file '%s': %s\n" % (file, e.args[1])) continue locs['file'] = file if opts.relative: start = monotonic_time() while True: try: line = file.readline() except (BrokenPipeError, KeyboardInterrupt): # we're reading from stdin, and the proc at the other end of the pipe died # bail out break # py2 support if line == '': break line = chomp(line)