def create_fixtures(min_user_id, max_user_id):
    """Create processed Pulse log files with data only for the given users.

    Assume processed data is available in PROCESSED_DATA_DIRECTORY.  Include
    only events performed by the given users and stories referenced in such
    events.  Reassign story IDs to account for the omission of other stories.
    Reassign the given user IDs to 0, 1, 2, etc. to account for the omission
    of other users.  Place output in a directory named Fixtures for Users
    min_user_id-max_user_id within PROCESSED_DATA_DIRECTORY, creating such a
    directory if it does not already exist.
    
    min_user_id, an int, is the smallest user ID to include in the output, and
    is in processed form (i.e., 0, 1, 2) rather than the original 38-character
    hexadecimal format.
    max_user_id, an int, is the largest user ID to include in the output, and
    is in processed form (i.e., 0, 1, 2) rather than the original 38-character
    hexadecimal format.
    """
    start_time = time.time()
    if not isinstance(min_user_id, int) or not isinstance(max_user_id, int):
        raise TypeError("min_user_id and max_user_id must both be of type int.")
    if min_user_id > max_user_id:
        raise ValueError(
            ("min_user_id is %d but must be less than or " + "equal to max_user_id, which is %d.")
            % (min_user_id, max_user_id)
        )
    if min_user_id < 0:
        raise ValueError(("min_user_id is %d, but user IDs must be " + "non-negative.") % min_user_id)
    reads_list = _read_events(min_user_id, max_user_id, PROCESSED_READS_FILE_PATH)
    clickthroughs_list = _read_events(min_user_id, max_user_id, PROCESSED_CLICKTHROUGHS_FILE_PATH)
    max_user_id_found = _get_largest_user_id(reads_list, clickthroughs_list)
    if max_user_id_found is None:
        raise LookupError(
            ("No User IDs in the range [%d, %d] were found in" + " the processed data.") % (min_user_id, max_user_id)
        )
    if max_user_id_found < max_user_id:
        raise LookupError(
            ("max_user_id is %d, but the largest user ID in " + "the processed data is %d.")
            % (max_user_id, max_user_id_found)
        )
    story_ids = frozenset([event[EVENTS_STORY_ID_INDEX] for event in reads_list + clickthroughs_list])
    stories_list, story_id_dict = _read_stories(story_ids)
    output_directory = "%sFixtures for Users %d-%d/" % (PROCESSED_DATA_DIRECTORY, min_user_id, max_user_id)
    if not os.path.exists(output_directory):
        os.mkdir(output_directory)
    output_reads_path = output_directory + READS_FILENAME
    _write_events(reads_list, output_reads_path, story_id_dict, min_user_id)
    output_clickthroughs_path = output_directory + CLICKTHROUGHS_FILENAME
    _write_events(clickthroughs_list, output_clickthroughs_path, story_id_dict, min_user_id)
    output_stories_path = output_directory + STORIES_FILENAME
    write_iterable(stories_list, output_stories_path, "")
    print("Output fixtures in directory: %s" % output_directory)
    report_time_elapsed(start_time)
def fetch_story_contents(input_directory):
    """Write processed Pulse log files with full story contents.
    
    Output files by the same name in a sub-directory of the given directory
    named SUB_DIRECTORY_NAME.  Append a space followed by the full story
    contents to the story titles.  Remove stories for which no content could not
    be fetched and events involving these stories.  Reassign story and user IDs
    to 0, 1, 2, etc. to fill the resulting gaps in the ID sequences.  Output a
    user IDs log file in which row numbers correspond to the new user IDs, and
    row values correspond to the old user IDs.
    
    input_directory, a str, is the file path to a directory containing processed
    Pulse log files lacking full story contents.
    """
    if not isinstance(input_directory, str):
        raise TypeError("Expected input_directory to be of type str.")
    
    if not path.isdir(input_directory):
        raise ValueError("Could not find given directory: %s" % input_directory)
    
    input_stories_path = path.join(input_directory, STORIES_FILENAME)
    stories_list, story_id_dict = _read_stories(input_stories_path)
    input_reads_path = path.join(input_directory, READS_FILENAME)
    reads_list = _read_events(story_id_dict, input_reads_path, READS_DESCRIPTOR)
    input_clickthroughs_path = path.join(input_directory,
                                         CLICKTHROUGHS_FILENAME)
    clickthroughs_list = _read_events(story_id_dict, input_clickthroughs_path,
                                      CLICKTHROUGHS_DESCRIPTOR)
    user_ids_list = get_user_ids(reads_list, clickthroughs_list) 
    reads_list = [map(str, read) for read in reads_list]
    clickthroughs_list = [map(str, clickthrough) for clickthrough in \
                          clickthroughs_list]
    user_ids_list = map(str, user_ids_list)
    
    output_directory = path.join(input_directory, SUB_DIRECTORY_NAME)
    if not path.exists(output_directory):
        os.mkdir(output_directory)
    
    output_stories_path = path.join(output_directory, STORIES_FILENAME)
    write_2d_iterable(stories_list, output_stories_path, "")
    output_reads_path = path.join(output_directory, READS_FILENAME)
    write_2d_iterable(reads_list, output_reads_path)
    output_clickthroughs_path = path.join(output_directory,
                                          CLICKTHROUGHS_FILENAME)
    write_2d_iterable(clickthroughs_list, output_clickthroughs_path)
    output_users_path = path.join(output_directory, USER_IDS_FILENAME)
    write_iterable(user_ids_list, output_users_path)