def write_events(event_dict, output_file): """ Formats and writes the coded event data to a file in a standard event-data format. Parameters ---------- event_dict: Dictionary. The main event-holding dictionary within PETRARCH. output_file: String. Filepath to which events should be written. """ global StorySource global NEvents global StoryIssues event_output = [] for key in event_dict: story_dict = event_dict[key] if not story_dict['sents']: continue # skip cases eliminated by story-level discard story_output = [] filtered_events = Utilities.story_filter(story_dict, key) if 'source' in story_dict['meta']: StorySource = story_dict['meta']['source'] else: StorySource = 'NULL' if 'url' in story_dict['meta']: url = story_dict['meta']['url'] else: url = '' for event in filtered_events: story_date = event[0] source = event[1] target = event[2] code = event[3] ids = ';'.join(filtered_events[event]['ids']) if 'issues' in filtered_events[event]: iss = filtered_events[event]['issues'] issues = ['{},{}'.format(k, v) for k, v in iss.items()] joined_issues = ';'.join(issues) else: joined_issues = [] print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(story_date, source, target, code, ids, StorySource)) event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code) # 15.04.30: a very crude hack around an error involving multi-word # verbs if not isinstance(event[3], basestring): event_str = '\t'.join( event[:3]) + '\t010\t' + '\t'.join(event[4:]) else: event_str = '\t'.join(event) #print(event_str) if joined_issues: event_str += '\t{}'.format(joined_issues) else: event_str += '\t' if url: event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource) story_output.append(event_str) else: event_str += '\t{}\t{}'.format(ids, StorySource) story_output.append(event_str) story_events = '\n'.join(story_output) event_output.append(story_events) # Filter out blank lines event_output = [event for event in event_output if event] final_event_str = '\n'.join(event_output) with open(output_file, 'w') as f: f.write(final_event_str)
def pipe_output(event_dict): """ Format the coded event data for use in the processing pipeline. Parameters ---------- event_dict: Dictionary. The main event-holding dictionary within PETRARCH. Returns ------- final_out: Dictionary. StoryIDs as the keys and a list of coded event tuples as the values, i.e., {StoryID: [(full_record), (full_record)]}. The ``full_record`` portion is structured as (story_date, source, target, code, joined_issues, ids, StorySource) with the ``joined_issues`` field being optional. The issues are joined in the format of ISSUE,COUNT;ISSUE,COUNT. The IDs are joined as ID;ID;ID. """ final_out = {} for key in event_dict: story_dict = event_dict[key] if not story_dict['sents']: continue # skip cases eliminated by story-level discard filtered_events = Utilities.story_filter(story_dict, key) if 'source' in story_dict['meta']: StorySource = story_dict['meta']['source'] else: StorySource = 'NULL' if 'url' in story_dict['meta']: url = story_dict['meta']['url'] else: url = '' if filtered_events: story_output = [] for event in filtered_events: story_date = event[0] source = event[1] target = event[2] code = event[3] ids = ';'.join(filtered_events[event]['ids']) if 'issues' in filtered_events[event]: iss = filtered_events[event]['issues'] issues = ['{},{}'.format(k, v) for k, v in iss.items()] joined_issues = ';'.join(issues) event_str = (story_date, source, target, code, joined_issues, ids, url, StorySource) else: event_str = (story_date, source, target, code, ids, url, StorySource) story_output.append(event_str) final_out[key] = story_output else: pass return final_out