Ejemplo n.º 1
0
 def tracking_predicate(value):
     start = monotonic_time()
     try:
         classification = predicate(value)
     except TimeoutExpired:
         classification = Classification.TIMEOUT
     end = monotonic_time()
     result.statistics[classification.name].record(runtime=end - start,
                                                   size=len(value))
     if classification == Classification.INTERESTING and (
             result.best_example is None
             or len(value) <= len(result.best_example)):
         result.best_example = value
     return classification
Ejemplo n.º 2
0
def wait_child(config, pid, sigkill):
    if config.timeout:
        timeout = monotonic_time() + config.timeout
        kill = False
        status = os.waitpid(pid, os.WNOHANG)
        while status[0] == 0:
            dt = timeout - monotonic_time()
            if dt < 0:
                os.kill(pid, sigkill)
                status = os.waitpid(pid, 0)
                raise Timeout()

            if dt > 1.0:
                pause = 0.100
            else:
                pause = 0.010
            # TODO: handle SIGCHLD to avoid wasting time in polling
            time.sleep(pause)
            status = os.waitpid(pid, os.WNOHANG)
    else:
        status = os.waitpid(pid, 0)
    if status[0] != pid:
        raise Exception("got the status of the wrong process!")
    return status[1]
Ejemplo n.º 3
0
def wait_child(config, pid, sigkill):
    if config.timeout:
        timeout = monotonic_time() + config.timeout
        kill = False
        status = os.waitpid(pid, os.WNOHANG)
        while status[0] == 0:
            dt = timeout - monotonic_time()
            if dt < 0:
                os.kill(pid, sigkill)
                status = os.waitpid(pid, 0)
                raise Timeout()

            if dt > 1.0:
                pause = 0.100
            else:
                pause = 0.010
            # TODO: handle SIGCHLD to avoid wasting time in polling
            time.sleep(pause)
            status = os.waitpid(pid, os.WNOHANG)
    else:
        status = os.waitpid(pid, 0)
    if status[0] != pid:
        raise Exception("got the status of the wrong process!")
    return status[1]
Ejemplo n.º 4
0
 def finish(self):
     self.end_time = monotonic_time()
Ejemplo n.º 5
0
 def start(self):
     self.start_time = monotonic_time()
Ejemplo n.º 6
0
def execute_subprocess(sandbox, code, globals, locals):
    config = sandbox.config
    input_filename = tempfile.mktemp()
    output_filename = tempfile.mktemp()
    args = (
        sys.executable,
        # FIXME: use '-S'
        '-E',
        '-m', 'sandbox.subprocess_child',
        input_filename, output_filename,
    )

    input_data = {
        'code': code,
        'config': config,
        'locals': locals,
        'globals': globals,
    }

    try:
        # serialize input data
        with open(input_filename, 'wb') as input_file:
            pickle.dump(input_data, input_file)
            if config.max_input_size:
                size = input_file.tell()
                if size > config.max_input_size:
                    raise SandboxError("Input data are too big: %s bytes (max=%s)"
                                       % (size, config.max_input_size))

        # create the subprocess
        process = subprocess.Popen(args, close_fds=True, shell=False)

        # wait process exit
        if config.timeout:
            timeout = monotonic_time() + config.timeout
            kill = False
            exitcode = process.poll()
            while exitcode is None:
                dt = timeout - monotonic_time()
                if dt < 0:
                    process.terminate()
                    exitcode = process.wait()
                    raise Timeout()

                if dt > 1.0:
                    pause = 0.5
                else:
                    pause = 0.1
                # TODO: handle SIGCHLD to avoid wasting time in polling
                time.sleep(pause)
                exitcode = process.poll()
        else:
            exitcode = process.wait()
        os.unlink(input_filename)
        input_filename = None

        # handle child process error
        if exitcode:
            if os.name != "nt" and exitcode < 0:
                signum = -exitcode
                text = "subprocess killed by signal %s" % signum
            else:
                text = "subprocess failed with exit code %s" % exitcode
            raise SandboxError(text)

        with open(output_filename, 'rb') as output_file:
            if config.max_output_size:
                output_file.seek(0, 2)
                size = output_file.tell()
                output_file.seek(0)
                if size > config.max_output_size:
                    raise SandboxError("Output data are too big: %s bytes (max=%s)"
                                       % (size, config.max_output_size))
            output_data = pickle.load(output_file)
        os.unlink(output_filename)
        output_filename = None
    finally:
        temp_filenames = []
        if input_filename is not None:
            temp_filenames.append(input_filename)
        if output_filename is not None:
            temp_filenames.append(output_filename)
        for filename in temp_filenames:
            try:
                os.unlink(filename)
            except OSError:
                pass

    if 'error' in output_data:
        raise output_data['error']
    if locals is not None:
        locals.clear()
        locals.update(output_data['locals'])
    if globals is not None:
        globals.clear()
        globals.update(output_data['globals'])
    return output_data['result']
Ejemplo n.º 7
0
def execute_subprocess(sandbox, code, globals, locals):
    config = sandbox.config
    input_filename = tempfile.mktemp()
    output_filename = tempfile.mktemp()
    args = (
        sys.executable,
        # FIXME: use '-S'
        '-E',
        '-m',
        'sandbox.subprocess_child',
        input_filename,
        output_filename,
    )

    input_data = {
        'code': code,
        'config': config,
        'locals': locals,
        'globals': globals,
    }

    try:
        # serialize input data
        with open(input_filename, 'wb') as input_file:
            pickle.dump(input_data, input_file)
            if config.max_input_size:
                size = input_file.tell()
                if size > config.max_input_size:
                    raise SandboxError(
                        "Input data are too big: %s bytes (max=%s)" %
                        (size, config.max_input_size))

        # create the subprocess
        process = subprocess.Popen(args, close_fds=True, shell=False)

        # wait process exit
        if config.timeout:
            timeout = monotonic_time() + config.timeout
            kill = False
            exitcode = process.poll()
            while exitcode is None:
                dt = timeout - monotonic_time()
                if dt < 0:
                    process.terminate()
                    exitcode = process.wait()
                    raise Timeout()

                if dt > 1.0:
                    pause = 0.5
                else:
                    pause = 0.1
                # TODO: handle SIGCHLD to avoid wasting time in polling
                time.sleep(pause)
                exitcode = process.poll()
        else:
            exitcode = process.wait()
        os.unlink(input_filename)
        input_filename = None

        # handle child process error
        if exitcode:
            if os.name != "nt" and exitcode < 0:
                signum = -exitcode
                text = "subprocess killed by signal %s" % signum
            else:
                text = "subprocess failed with exit code %s" % exitcode
            raise SandboxError(text)

        with open(output_filename, 'rb') as output_file:
            if config.max_output_size:
                output_file.seek(0, 2)
                size = output_file.tell()
                output_file.seek(0)
                if size > config.max_output_size:
                    raise SandboxError(
                        "Output data are too big: %s bytes (max=%s)" %
                        (size, config.max_output_size))
            output_data = pickle.load(output_file)
        os.unlink(output_filename)
        output_filename = None
    finally:
        temp_filenames = []
        if input_filename is not None:
            temp_filenames.append(input_filename)
        if output_filename is not None:
            temp_filenames.append(output_filename)
        for filename in temp_filenames:
            try:
                os.unlink(filename)
            except OSError:
                pass

    if 'error' in output_data:
        raise output_data['error']
    if locals is not None:
        locals.clear()
        locals.update(output_data['locals'])
    if globals is not None:
        globals.clear()
        globals.update(output_data['globals'])
    return output_data['result']
Ejemplo n.º 8
0
def collect(keywords, max_seconds, max_tweets):
    """
    Collect tweets from the Twitter firehose and output as TSV.
    """
    if not credentials.exists():
        raise ClickException("No credentials found, please authenticate first")

    # Take an iterator over the streaming results of the API request
    json_lines_iterator = requests.post(
        f"https://stream.twitter.com/1.1/statuses/filter.json?track={','.join(keywords)}",
        auth=credentials.load(),
        stream=True,
    ).iter_lines()

    # Read the streaming results on a separte thread because consuming the
    # iterator is a blocking operation and because we must be able to exit the
    # program when `max_seconds` have ellapsed. Results are put on the passed
    # queue, on which we can specify timeouts on read operations.
    tweets_queue = Queue()
    Thread(
        target=parse_json_tweets,
        args=(json_lines_iterator, tweets_queue),
        daemon=True,  # Kill thread automatically when main thread exits
    ).start()

    # To prevent users from having to look at a blank screen for `max_seconds`
    # seconds, we show them a progressbar during the collection of the tweets.
    progress_bar = click.progressbar(
        length=max_tweets,
        label=f"Collecting max. {max_tweets} tweets for {max_seconds} seconds",
        show_pos=True,
        show_eta=False,
        # Write to stderr so that users can redirect/pipe the TSV results on
        # stdout to somewhere else without including the progressbar.
        file=sys.stderr,
    )

    tweet_rows = []
    started_at = monotonic_time()
    while len(tweet_rows) < max_tweets:
        try:
            # Block on reading from the queue until `max_seconds` have ellapsed
            # and we can stop aggregating entirely, or until we have a new tweet
            # to process.
            seconds_till_end = max(
                max_seconds - (monotonic_time() - started_at), 0)
            tweet = tweets_queue.get(timeout=seconds_till_end)

            # There are three types of system messages, as defined here:
            # https://developer.twitter.com/en/docs/tutorials/consuming-streaming-data.html#consuming-the-stream
            # In theory only error messages can occour while streaming from the
            # filter API, but for the sake of completeness we'll handle them
            # all, and as if they are errors.
            for system_message_type in ["error", "warn", "info"]:
                if system_message_type in tweet:
                    raise ClickException(tweet[system_message_type]["message"])

            tweet_rows.append(tweet_to_row(tweet))
            progress_bar.update(1)
        except QueueEmptyError:
            break  # `max_seconds` have ellapsed, stop collecting

    if not tweet_rows:
        print("No tweets collected", file=sys.stderr)
        return

    tweet_rows = sorted(  # Ascending by default
        tweet_rows,
        key=lambda tweet_row: (
            tweet_row["author_created_at"],  # Users sorted chronologicly
            tweet_row["author_id"],  # Grouped by user
            tweet_row["tweet_created_at"],  # Tweets sorted chronologicly
        ),
    )
    writer = csv.DictWriter(sys.stdout,
                            fieldnames=tweet_rows[0].keys(),
                            dialect="excel-tab")
    writer.writeheader()
    writer.writerows(tweet_rows)
Ejemplo n.º 9
0
    for file in opts.files:
        if file == '-':
            file = sys.stdin
        else:
            if not hasattr(file, 'read'):
                try:
                    file = open(file)
                except IOError as e:
                    sys.stderr.write("Could not open file '%s': %s\n" %
                                     (file, e.args[1]))
                    continue

        locs['file'] = file

        if opts.relative:
            start = monotonic_time()

        while True:
            try:
                line = file.readline()
            except (BrokenPipeError, KeyboardInterrupt):
                # we're reading from stdin, and the proc at the other end of the pipe died
                # bail out
                break

            # py2 support
            if line == '':
                break

            line = chomp(line)