Python write_2d_iterable Examples

Programming Language: Python

Namespace/Package Name: utilities

Method/Function: write_2d_iterable

Examples at hotexamples.com: 2

Python write_2d_iterable - 2 examples found. These are the top rated real world Python examples of utilities.write_2d_iterable extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: stem_processed_stories.py Project: AlexLerman/Story-Reads-Prediction

def stem_processed_stories(input_file_path):
    """
    """
    start_time = time.time()
    if not isinstance(input_file_path, str):
        raise TypeError("Expected input_file_path to be of type str.")
    
    stemmer = PorterStemmer()
    stories_list = []
    prog = re.compile('\W+')
    story_stream = open_safely(input_file_path)
    for story_as_str in story_stream:
        story_as_list = story_as_str[:-1].lower().split(DELIMITER)
        story_title = story_as_list[NEW_STORIES_TITLE_INDEX]
        tok_contents = WordPunctTokenizer().tokenize(story_title)
        stem_contents = [stemmer.stem(word) for word in tok_contents if \
                         prog.match(word) is None]
        story_as_list[NEW_STORIES_TITLE_INDEX] = " ".join(stem_contents)
        stories_list.append(story_as_list)
    
    story_stream.close()
    output_file_path = input_file_path + STEMMED_STORIES_EXTENSION
    write_2d_iterable(stories_list, output_file_path)
    print("Output stemmed stories to %s" % output_file_path)
    report_time_elapsed(start_time)

Example #2

Show file

File: fetch_story_contents.py Project: AlexLerman/Story-Reads-Prediction

def fetch_story_contents(input_directory):
    """Write processed Pulse log files with full story contents.
    
    Output files by the same name in a sub-directory of the given directory
    named SUB_DIRECTORY_NAME.  Append a space followed by the full story
    contents to the story titles.  Remove stories for which no content could not
    be fetched and events involving these stories.  Reassign story and user IDs
    to 0, 1, 2, etc. to fill the resulting gaps in the ID sequences.  Output a
    user IDs log file in which row numbers correspond to the new user IDs, and
    row values correspond to the old user IDs.
    
    input_directory, a str, is the file path to a directory containing processed
    Pulse log files lacking full story contents.
    """
    if not isinstance(input_directory, str):
        raise TypeError("Expected input_directory to be of type str.")
    
    if not path.isdir(input_directory):
        raise ValueError("Could not find given directory: %s" % input_directory)
    
    input_stories_path = path.join(input_directory, STORIES_FILENAME)
    stories_list, story_id_dict = _read_stories(input_stories_path)
    input_reads_path = path.join(input_directory, READS_FILENAME)
    reads_list = _read_events(story_id_dict, input_reads_path, READS_DESCRIPTOR)
    input_clickthroughs_path = path.join(input_directory,
                                         CLICKTHROUGHS_FILENAME)
    clickthroughs_list = _read_events(story_id_dict, input_clickthroughs_path,
                                      CLICKTHROUGHS_DESCRIPTOR)
    user_ids_list = get_user_ids(reads_list, clickthroughs_list) 
    reads_list = [map(str, read) for read in reads_list]
    clickthroughs_list = [map(str, clickthrough) for clickthrough in \
                          clickthroughs_list]
    user_ids_list = map(str, user_ids_list)
    
    output_directory = path.join(input_directory, SUB_DIRECTORY_NAME)
    if not path.exists(output_directory):
        os.mkdir(output_directory)
    
    output_stories_path = path.join(output_directory, STORIES_FILENAME)
    write_2d_iterable(stories_list, output_stories_path, "")
    output_reads_path = path.join(output_directory, READS_FILENAME)
    write_2d_iterable(reads_list, output_reads_path)
    output_clickthroughs_path = path.join(output_directory,
                                          CLICKTHROUGHS_FILENAME)
    write_2d_iterable(clickthroughs_list, output_clickthroughs_path)
    output_users_path = path.join(output_directory, USER_IDS_FILENAME)
    write_iterable(user_ids_list, output_users_path)