예제 #1
0
def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
        story_output = []
        filtered_events = Utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = event[3]

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(story_date, source,
                                                        target, code, ids,
                                                        StorySource))
            event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code)
            # 15.04.30: a very crude hack around an error involving multi-word
            # verbs
            if not isinstance(event[3], basestring):
                event_str = '\t'.join(
                    event[:3]) + '\t010\t' + '\t'.join(event[4:])
            else:
                event_str = '\t'.join(event)
            #print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
                story_output.append(event_str)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)
                story_output.append(event_str)

        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    final_event_str = '\n'.join(event_output)
    with open(output_file, 'w') as f:
        f.write(final_event_str)
예제 #2
0
def pipe_output(event_dict):
    """
    Format the coded event data for use in the processing pipeline.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    Returns
    -------

    final_out: Dictionary.
                StoryIDs as the keys and a list of coded event tuples as the
                values, i.e., {StoryID: [(full_record), (full_record)]}. The
                ``full_record`` portion is structured as
                (story_date, source, target, code, joined_issues, ids,
                StorySource) with the ``joined_issues`` field being optional.
                The issues are joined in the format of ISSUE,COUNT;ISSUE,COUNT.
                The IDs are joined as ID;ID;ID.

    """
    final_out = {}
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
        filtered_events = Utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''

        if filtered_events:
            story_output = []
            for event in filtered_events:
                story_date = event[0]
                source = event[1]
                target = event[2]
                code = event[3]

                ids = ';'.join(filtered_events[event]['ids'])

                if 'issues' in filtered_events[event]:
                    iss = filtered_events[event]['issues']
                    issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                    joined_issues = ';'.join(issues)
                    event_str = (story_date, source, target, code,
                                 joined_issues, ids, url, StorySource)
                else:
                    event_str = (story_date, source, target, code, ids,
                                 url, StorySource)

                story_output.append(event_str)

            final_out[key] = story_output
        else:
            pass

    return final_out