Esempi in Python per story_filter, esempi in Python per utilities.story_filter

Esempio n. 1

0

Mostra file

File: PETRwriter.py Progetto: GuardingDog/petrarch_chinese

def pipe_output(event_dict):
    """
    Format the coded event data for use in the processing pipeline.
    Parameters
    ----------
    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.
    Returns
    -------
    final_out: Dictionary.
                StoryIDs as the keys and a list of coded event tuples as the
                values, i.e., {StoryID: [(full_record), (full_record)]}. The
                ``full_record`` portion is structured as
                (story_date, source, target, code, joined_issues, ids,
                StorySource) with the ``joined_issues`` field being optional.
                The issues are joined in the format of ISSUE,COUNT;ISSUE,COUNT.
                The IDs are joined as ID;ID;ID.
    """
    final_out = {}
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue  # skip cases eliminated by story-level discard
        filtered_events = utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''

        if filtered_events:
            story_output = []
            for event in filtered_events:
                story_date = event[0]
                source = event[1]
                target = event[2]
                code = event[3]

                ids = ';'.join(filtered_events[event]['ids'])

                if 'issues' in filtered_events[event]:
                    iss = filtered_events[event]['issues']
                    issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                    joined_issues = ';'.join(issues)
                    event_str = (story_date, source, target, code,
                                 joined_issues, ids, url, StorySource)
                else:
                    event_str = (story_date, source, target, code, ids, url,
                                 StorySource)

                story_output.append(event_str)

            final_out[key] = story_output
        else:
            pass

    return final_out

Esempio n. 2

0

Mostra file

File: PETRwriter.py Progetto: JingL1014/petrarch2

def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
#        print('WE1',story_dict)
        story_output = []
        filtered_events = utilities.story_filter(story_dict, key)
#        print('WE2',filtered_events)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = filter(lambda a: not a == '\n', event[3])

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(story_date, source,
                                                         target, code, ids,
                                                         StorySource))
#            event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code)
            # 15.04.30: a very crude hack around an error involving multi-word
            # verbs
            if not isinstance(event[3], basestring):
                event_str = '\t'.join(
                    event[:3]) + '\t010\t' + '\t'.join(event[4:])
            else:
                event_str = '\t'.join(event)
            # print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)

            if PETRglobals.WriteActorText:
                if 'actortext' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actortext'][0],
                        filtered_events[event]['actortext'][1])
                else:
                    event_str += '\t---\t---'
            if PETRglobals.WriteEventText:
                if 'eventtext' in filtered_events[event]:
                    event_str += '\t{}'.format(
                        filtered_events[event]['eventtext'])
                else:
                    event_str += '\t---'
            if PETRglobals.WriteActorRoot:
                if 'actorroot' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actorroot'][0],
                        filtered_events[event]['actorroot'][1])
                else:
                    event_str += '\t---\t---'

            story_output.append(event_str)

        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    if output_file:
        f = codecs.open(output_file, encoding='utf-8', mode='w')
        for str in event_output:
            #             field = str.split('\t')  # debugging
            #            f.write(field[5] + '\n')
            f.write(str + '\n')
        f.close()

Esempio n. 3

0

Mostra file

File: PETRwriter.py Progetto: JingL1014/petrarch2

def pipe_output(event_dict):
    """
    Format the coded event data for use in the processing pipeline.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    Returns
    -------

    final_out: Dictionary.
                StoryIDs as the keys and a list of coded event tuples as the
                values, i.e., {StoryID: [(full_record), (full_record)]}. The
                ``full_record`` portion is structured as
                (story_date, source, target, code, joined_issues, ids,
                StorySource) with the ``joined_issues`` field being optional.
                The issues are joined in the format of ISSUE,COUNT;ISSUE,COUNT.
                The IDs are joined as ID;ID;ID.

    """
    final_out = {}
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
        filtered_events = utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''

        if filtered_events:
            story_output = []
            for event in filtered_events:
                story_date = event[0]
                source = event[1]
                target = event[2]
                code = event[3]

                ids = ';'.join(filtered_events[event]['ids'])

                if 'issues' in filtered_events[event]:
                    iss = filtered_events[event]['issues']
                    issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                    joined_issues = ';'.join(issues)
                    event_str = (story_date, source, target, code,
                                 joined_issues, ids, url, StorySource)
                else:
                    event_str = (story_date, source, target, code, ids,
                                 url, StorySource)

                story_output.append(event_str)

            final_out[key] = story_output
        else:
            pass

    return final_out

Esempio n. 4

0

Mostra file

File: PETRwriter.py Progetto: JonathanBowker/petrarch

def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
        story_output = []
        filtered_events = utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = event[3]

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(story_date, source,
                                                         target, code, ids,
                                                         StorySource))
#            event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code)
            # 15.04.30: a very crude hack around an error involving multi-word
            # verbs
            if not isinstance(event[3], basestring):
                event_str = '\t'.join(
                    event[:3]) + '\t010\t' + '\t'.join(event[4:])
            else:
                event_str = '\t'.join(event)
            print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
                story_output.append(event_str)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)
                story_output.append(event_str)

        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    final_event_str = '\n'.join(event_output)
    with open(output_file, 'w') as f:
        f.write(final_event_str)

Esempio n. 5

0

Mostra file

File: PETRwriter.py Progetto: erickparolin/UniversalPetrarch

def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue  # skip cases eliminated by story-level discard


#        print('WE1',story_dict)
        story_output = []
        filtered_events = utilities.story_filter(story_dict, key)
        #        print('WE2',filtered_events)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            if not isinstance(
                    event[3], basestring
            ):  # occasional issue in PETR-2 due to mishandling of multi-word verb PAS 15.04.03, modified 18.06.01
                continue
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = event[3]
            #code = filter(lambda a: not a == '\n', event[3])

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in list(iss.items())]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(
                story_date, source, target, code, ids, StorySource))
            event_str = '\t'.join(event)
            # print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)

            if PETRglobals.WriteActorText:
                if 'actortext' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actortext'][0],
                        filtered_events[event]['actortext'][1])
                else:
                    event_str += '\t---\t---'
            if PETRglobals.WriteEventText:
                if 'eventtext' in filtered_events[event]:
                    event_str += '\t{}'.format(
                        filtered_events[event]['eventtext'])
                else:
                    event_str += '\t---'
            if PETRglobals.WriteActorRoot:
                if 'actorroot' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actorroot'][0],
                        filtered_events[event]['actorroot'][1])
                else:
                    event_str += '\t---\t---'

            story_output.append(ids + "\t" + event_str)

        #sort output by story ids
        sorted_story_output = []
        for story in sorted(story_output):
            sorted_story_output.append(story[story.find('\t') + 1:])

        story_events = '\n'.join(sorted_story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    if output_file:
        f = codecs.open(output_file, encoding='utf-8', mode='w')
        for line in event_output:
            #             field = str.split('\t')  # debugging
            #            f.write(field[5] + '\n')
            f.write(line + '\n')
        f.close()

Esempio n. 6

0

Mostra file

def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue  # skip cases eliminated by story-level discard


#        print('WE1',story_dict)
        sents = list(story_dict['sents'].keys())
        reffilename = story_dict['sents'][sents[0]]['reffilename']
        story_output = []
        filtered_events = utilities.story_filter(story_dict, key)
        #        print('WE2',filtered_events)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = filter(lambda a: not a == '\n', event[3])

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}\t{}'.format(
                story_date, source, target, code, ids, StorySource,
                reffilename))
            #            event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code)
            # 15.04.30: a very crude hack around an error involving multi-word
            # verbs
            if not isinstance(event[3], basestring):
                event_str = '\t'.join(event[:3]) + '\t010\t' + '\t'.join(
                    event[4:])
            else:
                event_str = '\t'.join(event)
            # print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)

            if PETRglobals.WriteActorText:  # default true
                if 'actortext' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actortext'][0],
                        filtered_events[event]['actortext'][1])
                else:
                    event_str += '\t---\t---'
            if PETRglobals.WriteEventText:  # default true
                if 'eventtext' in filtered_events[event]:
                    event_str += '\t{}'.format(
                        filtered_events[event]['eventtext'])
                else:
                    event_str += '\t---'
            if PETRglobals.WriteActorRoot:
                if 'actorroot' in filtered_events[event]:
                    event_str += '\t{}\t{}'.format(
                        filtered_events[event]['actorroot'][0],
                        filtered_events[event]['actorroot'][1])
                else:
                    event_str += '\t---\t---'
            event_str += '\t{}'.format(reffilename)
            story_output.append(event_str)

        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    if output_file:
        f = codecs.open(output_file, encoding='utf-8', mode='w')
        for str in event_output:
            #             field = str.split('\t')  # debugging
            #            f.write(field[5] + '\n')
            f.write(str + '\n')
        f.close()

Esempio n. 7

0

Mostra file

File: PETRwriter.py Progetto: GuardingDog/petrarch_chinese

def write_events(event_dict, output_file, flag=True):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.
    Parameters
    ----------
    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.
    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues
    global StoryNer
    global StoryNer2
    global filtered_events
    global event_temp

    event_output = []
    # 测试用
    flag = False
    import globalConfigPara as gcp
    if not gcp.merge_event == "":
        flag = gcp.merge_event

    for key in sorted(event_dict):
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue  # skip cases eliminated by story-level discard


#        print('WE1',story_dict)
        story_output = []  # event_str in one story
        event_temp = []  # event_origin in one story

        filtered_events = utilities.story_filter(story_dict, key)
        #        print('WE2',filtered_events)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'

        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        StoryNer = ner_to_string(story_dict['meta']['ner'])
        # extract_location
        StoryNer2 = get_loction(story_dict)
        # event is tuple
        for event in filtered_events:
            temp_event_dict = {}
            skip_flag = False
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = filter(lambda a: not a == '\n', event[3])
            ids = filtered_events[event]["ids"]

            temp_event_dict.update({"origin": event})
            temp_event_dict.update({"ids": ids})

            if flag:
                for i, pre_dict in enumerate(event_temp):
                    pre_event = pre_dict["origin"]
                    pre_code = filter(lambda a: not a == '\n', pre_event[3])
                    pre_ids = pre_dict["ids"]
                    same_code = check_same_event(code, pre_code)
                    # 0：不是同一事件 4：是同一事件但不是同一详细事件
                    if same_code == 0 or same_code == 4:
                        continue

                    # 补充成分
                    if check_successive_sent(ids, pre_ids):
                        miss1 = check_miss_component(event)
                        miss2 = check_miss_component(pre_event)
                        # 共有 4*4 共 16种情况
                        # 均缺失source/均缺失target/同时缺失source和target/事件均完整 则不进行成分补充，进入下一步事件合并。(4种情况pass)
                        if miss1 == miss2:
                            pass
                        # event成分全部缺失，则直接将该事件跳过不处理，只将ids合并。(3种情况)
                        elif miss1 == 0:
                            skip_flag = True
                            modify_event(event, i, same_code, 0)
                        # 如果pre_event成分全部缺失,便将event的成分全部补充到pre_event中。(3种情况)
                        elif miss2 == 0:
                            skip_flag = True
                            modify_event(event, i, same_code, 3)
                        # 如果pre_event缺失source，event有source且二者target相同即合并。(2种情况)
                        elif miss2 == 1:
                            if miss1 == 2 or (miss1 == 3
                                              and target == pre_event[2]):
                                modify_event(event, i, same_code, 1)
                                if miss1 == 2:
                                    skip_flag = True
                        # 如果pre_event缺失target，event有target且二者source相同即合并。(2种情况)
                        elif miss2 == 2:
                            if miss1 == 1 or (miss1 == 3
                                              and source == pre_event[1]):
                                modify_event(event, i, same_code, 2)
                                if miss1 == 1:
                                    skip_flag = True
                        # 如果pre_event成分完整，而event缺失成分，则合并ids。(2种情况)
                        elif miss2 == 3:
                            if (miss1 == 1 and target == pre_event[2]) or (
                                    miss1 == 2 and source == pre_event[1]):
                                skip_flag = True
                                modify_event(event, i, same_code, 0)

                    # 父子事件替换
                    if not skip_flag:
                        # pre_event is modified
                        pre_event = event_temp[i]["origin"]
                        if story_date == pre_event[0] and source == pre_event[
                                1] and target == pre_event[2]:
                            skip_flag = True
                            modify_event(event, i, same_code, 0)

            if skip_flag:
                continue

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
                temp_event_dict.update({"joined_issues": joined_issues})

            if url:
                temp_event_dict.update({"url": url})

            if 'content' in filtered_events[event]:
                temp_event_dict.update(
                    {"content": filtered_events[event]['content']})

            if 'Source' in filtered_events[event]:
                temp_event_dict.update(
                    {"Source": filtered_events[event]['Source']})

            if 'Target' in filtered_events[event]:
                temp_event_dict.update(
                    {"Target": filtered_events[event]['Target']})

            if 'actortext' in filtered_events[event]:
                temp_event_dict.update(
                    {"actortext": filtered_events[event]["actortext"]})

            if 'eventtext' in filtered_events[event]:
                temp_event_dict.update(
                    {"eventtext": filtered_events[event]['eventtext']})
                # if True:

            if 'actorroot' in filtered_events[event]:
                temp_event_dict.update(
                    {"actorroot": filtered_events[event]['actorroot']})

            if 'eventroot' in filtered_events[event]:
                temp_event_dict.update(
                    {"eventroot": filtered_events[event]['eventroot']})

            if 'sentenceTime' in filtered_events[event]:
                temp_event_dict.update(
                    {"sentenceTime": filtered_events[event]['sentenceTime']})
            if 'timeText' in filtered_events[event]:
                temp_event_dict.update(
                    {"timeText": filtered_events[event]['timeText']})
            if 'locationText' in filtered_events[event]:
                temp_event_dict.update(
                    {"locationText": filtered_events[event]['locationText']})

            event_temp.append(temp_event_dict)

        event_str = get_event_str(event_temp, event_dict)
        if event_str is not None:
            event_output += event_str
        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]

    if output_file:
        if flag:
            f = codecs.open(output_file, encoding='utf-8', mode='a')
            for strw in event_output:
                #             field = str.split('\t')  # debugging
                #            f.write(field[5] + '\n')
                f.write(strw + '\n')
            f.close()
        else:
            with open("evets.result_before_merge.txt", 'a') as f:
                for strw in event_output:
                    f.write(strw + '\n')
    if output_file:
        if flag:
            f = codecs.open(output_file, encoding='utf-8', mode='a')
            for strq in event_output:
                #             field = str.split('\t')  # debugging
                #            f.write(field[5] + '\n')
                f.write(strq + '\n')
            f.close()
        else:
            story_list = []
            for strp in event_output:
                f_list = strp.splitlines()
                for index in range(len(f_list)):
                    if (index == 2):
                        story_list.append(f_list[index][5:11])
            story_list2 = list(set(story_list))

            for i in range(len(story_list2)):
                str_name = story_list2[i] + "evets.result_before_merge.txt"
                with open(str_name, 'a') as f:
                    TEXT_ROOT = os.path.abspath(
                        os.path.dirname(os.path.dirname(__file__)))  # 获取项目根目录
                    path = os.path.join(TEXT_ROOT, "input\\test.txt")  # 文件路径
                    with open(path, 'r') as file_to_read:
                        while True:
                            line = file_to_read.readline()
                            if not line:
                                break
                            # if(line[0:6]==story_list2[i]):
                            #     f.write(line)
                            if (line.split("\t")[0] == story_list2[i]):
                                f.write(line)
                    event_num = 1
                    for strss in event_output:
                        listk = strss.splitlines()
                        for index in range(len(listk)):
                            temp = listk[2].split('\t')
                            article_id = temp[len(temp) - 1].split('-')[0]
                            if (article_id == story_list2[i]):
                                if (index == 0):
                                    f.write('\n')
                                    ss = "#e" + str(event_num)
                                    f.write(ss + '\n')
                                    event_num = event_num + 1
                                if (index == 0 or index == 2 or index == 4
                                        or index == 5 or index == 6
                                        or index == 8 or index == 10):
                                    f.write(listk[index] + '\n')

                    f.close()

Esempio n. 8

0

Mostra file

File: PETRwriter.py Progetto: fsaglam2002/petrarch2

def write_events(event_dict, output_file):
    """
    Formats and writes the coded event data to a file in a standard
    event-data format.

    Parameters
    ----------

    event_dict: Dictionary.
                The main event-holding dictionary within PETRARCH.


    output_file: String.
                    Filepath to which events should be written.
    """
    global StorySource
    global NEvents
    global StoryIssues

    event_output = []
    for key in event_dict:
        story_dict = event_dict[key]
        if not story_dict['sents']:
            continue    # skip cases eliminated by story-level discard
        story_output = []
        filtered_events = utilities.story_filter(story_dict, key)
        if 'source' in story_dict['meta']:
            StorySource = story_dict['meta']['source']
        else:
            StorySource = 'NULL'
        if 'url' in story_dict['meta']:
            url = story_dict['meta']['url']
        else:
            url = ''
        for event in filtered_events:
            story_date = event[0]
            source = event[1]
            target = event[2]
            code = filter(lambda a: not a == '\n', event[3])

            ids = ';'.join(filtered_events[event]['ids'])

            if 'issues' in filtered_events[event]:
                iss = filtered_events[event]['issues']
                issues = ['{},{}'.format(k, v) for k, v in iss.items()]
                joined_issues = ';'.join(issues)
            else:
                joined_issues = []

            print('Event: {}\t{}\t{}\t{}\t{}\t{}'.format(story_date, source,
                                                         target, code, ids,
                                                         StorySource))
#            event_str = '{}\t{}\t{}\t{}'.format(story_date,source,target,code)
            # 15.04.30: a very crude hack around an error involving multi-word
            # verbs
            if not isinstance(event[3], basestring):
                event_str = '\t'.join(
                    event[:3]) + '\t010\t' + '\t'.join(event[4:])
            else:
                event_str = '\t'.join(event)
            #print(event_str)
            if joined_issues:
                event_str += '\t{}'.format(joined_issues)
            else:
                event_str += '\t'

            if url:
                event_str += '\t{}\t{}\t{}'.format(ids, url, StorySource)
                story_output.append(event_str)
            else:
                event_str += '\t{}\t{}'.format(ids, StorySource)
                story_output.append(event_str)

        story_events = '\n'.join(story_output)
        event_output.append(story_events)

    # Filter out blank lines
    event_output = [event for event in event_output if event]
    final_event_str = '\n'.join(event_output)