Python DefaultOrderedDictの例、src.util.util.DefaultOrderedDict Pythonの例

コード例 #1

0

ファイルを表示

ファイル: batchprocessor.py プロジェクト: kurtespinosa/eventmodels

def apply_actions(structures, buffer_ts, arg_ts):
    new_structures = []

    if len(structures) == 0:
        for s in range(len(arg_ts)):
            trig_sen = DefaultOrderedDict(list)
            for trig, arg in arg_ts[s].items():
                buffer = buffer_ts[s][trig]
                arg_id, _ = arg
                for a in const.ACTION_LIST:
                    structure = []
                    structure.append((arg_id, a))
                    structure.extend(buffer)
                    trig_sen[trig].append(structure)
            new_structures.append(trig_sen)
    else:
        for s in structures:
            trig_sen = DefaultOrderedDict()
            for trig, arg in arg_ts[s].items():
                buffer = buffer_ts[s][trig]
                arg_id, _ = arg
                for a in const.ACTION_LIST:
                    structure = []
                    structure.append((arg_id, a))
                    structure.extend(buffer)
                trig_sen[trig].append(structure)
            new_structures.append(trig_sen)
    return new_structures

コード例 #2

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

    def _remove_duplicates(structures):
        '''

        :param structures:
        :return:
        '''
        finallist = []
        for m in range(3):  # 3 levels

            uniquelist = DefaultOrderedDict(list)
            for trigger, v in structures[m].items():

                biglist = []
                for i in structures[m][trigger][0]:
                    lst = []  # permits repeated arguments

                    for j in i:
                        if len(j) == 0:
                            biglist.append(lst)
                            break

                        role = j[0]
                        type = j[1]
                        joined = role + "__" + type
                        lst.append(joined)
                    biglist.append(lst)

                # put them in hashtable based on the length
                newdict = DefaultOrderedDict(list)
                for i in biglist:
                    leni = len(i)
                    newdict[leni].append(i)

                # going thru each hashtable index, remove the duplicates by using multiset to represent each structure
                '''
                For each structure, check if it is already in the final list by going thru the final list and representing
                each element as a multiset then comparing it with the current structure.
                If they are the same, that means, the structure is a duplicate so do not add it to the final list
                Otherwise, add it.
                '''
                newdictlist = DefaultOrderedDict(list)
                for k, v in newdict.items():
                    newlist = []
                    for j in newdict[k]:
                        s = Counter(j)
                        found = False
                        for n in newlist:
                            sk = Counter(n)
                            ss = Counter(s)
                            if ss == sk:
                                found = True
                                break
                        if not found:
                            newlist.append(dict(s))
                    newdictlist[str(k)].append(newlist)
                uniquelist[trigger].append(newdictlist)
            finallist.append(uniquelist)
        return finallist

コード例 #3

0

ファイルを表示

def get_max_rel_and_dict_in_pred(instances, USE_FILTER):
    '''
    Return the maximum number relations of any event structure and the dictionary of counts

    The relations include NONE as one argument to represent no argument events.
    :param instances: contains the events from all instances
    :param USE_FILTER: condition to determine the index of the events
    :return: the maximum num of relations and dictionary of counts
    '''
    max = 0

    type_ctr = DefaultOrderedDict(DefaultOrderedDict)

    ind = const.PRED_FILTERED_STRUCTURES_IDX if USE_FILTER else const.PRED_CAND_STRUCTURES_IDX

    for i in instances:
        events = i[ind]
        for level in events:
            for id, info in level.items():
                for l in info:
                    cnt = len(l[0]) - 1

                    # store length of type with ctr
                    type = i[const.IDS_TRIGGERS_MERGED_IDX][id][0]
                    if cnt in type_ctr[type]:
                        temp = type_ctr[type][cnt]
                        type_ctr[type][cnt] = temp + 1
                    else:
                        type_ctr[type][cnt] = 1

                    # determine max
                    if cnt > max:
                        max = cnt
    max += 1  # to count NONE as one argument
    return max, type_ctr

コード例 #4

0

ファイルを表示

def get_max_rel_and_dict_in_gold(instances):
    '''
    Return the max number of relations of any event type and the dictionary of counts.

    :param instances: the instances with the event structures
    :return: the max num of relations and the dictionary of counts
    '''
    max = 0

    type_ctr = DefaultOrderedDict(DefaultOrderedDict)

    for i in instances:
        events = i[const.GOLD_EVENTS_IDX]
        for id, st in events.items():
            # store length of type with ctr
            type = st[0].split(":")[0]
            cnt = len(st) - 1
            if cnt in type_ctr[type]:
                temp = type_ctr[type][cnt]
                type_ctr[type][cnt] = temp + 1
            else:
                type_ctr[type][cnt] = 1

            # determine max
            if cnt > max:
                max = cnt

    max += 1  # to count NONE as one argument
    return max, type_ctr

コード例 #5

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

        def getLevel(structures, level):
            '''

            :param structures:
            :param level:
            :return:
            '''
            '''
            Loops thru all the structures and using the hierarchy of types, changes the argument
            based on the level.
            '''
            newstructures = DefaultOrderedDict(list)

            for k, v in structures.items():
                newv = []
                for i in v:
                    newi = []
                    for j in i:
                        if len(j) == 0:
                            newj = ()
                        else:
                            newarg = util.extract_category(j[1], level, const.TYPE_GENERALISATION)
                            newj = (j[0], newarg)
                        newi.append(newj)
                    newv.append(newi)
                newstructures[k].append(newv)
            return newstructures

コード例 #6

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

    def _index_using_triggers(gold_structures):
        def _transform_to_tuples(gold_structures):
            new_gold_structures = DefaultOrderedDict(list)
            for trig_id, structures in gold_structures.items():
                for s in structures:
                    if s == []:
                        new_structure = [()]
                    else:
                        new_structure = []
                        for rel in s:
                            role, arg = rel.split(":")
                            new_rel = (role, arg)
                            new_structure.append(new_rel)
                    new_gold_structures[trig_id].append(new_structure)
            return new_gold_structures

        new_gold_structures = DefaultOrderedDict(list)
        event_trigger_mapping = OrderedDict()
        for event_id, structure in gold_structures.items():
            trigger_id = structure[0].split(":")[1]
            new_gold_structures[trigger_id].append(structure[1:])
            event_trigger_mapping[event_id] = trigger_id

        transformed_structures = _transform_to_tuples(new_gold_structures)

        new_gold = DefaultOrderedDict(list)
        for trig_id, structures in transformed_structures.items():

            for structure in structures:
                if structure == const.EMPTY_STRUCTURE:
                    new_structure = [()]
                else:
                    new_structure = []
                    for rel in structure:
                        role = rel[0]
                        arg = rel[1]
                        if arg in event_trigger_mapping:
                            arg = event_trigger_mapping[arg]
                        new_rel = (role, arg)
                        new_structure.append(new_rel)
                new_gold[trig_id].append(new_structure)
        return new_gold

コード例 #7

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

        def map_dct2position(d, map):
            '''

            :param d:
            :param map:
            :return:
            '''
            def get_best_idx(fr, to, map):
                '''

                :param fr:
                :param to:
                :param map:
                :return:
                '''
                #    This is the index where [fr,to] has the most overlap
                bestidx = -1
                bestcoverage = 0
                idx = -1
                a = -1
                b = -1
                for k, v in map.items():
                    idx += 1
                    fr_,to_ = k.split("_")
                    if int(fr_) <= fr or int(to_) >= to:
                        if fr < int(fr_):
                            a = int(fr_)
                        else:
                            a = fr
                        if to > int(to_):
                            b = int(to_)
                        else:
                            b = to
                        if (b-a) >  bestcoverage:
                            bestcoverage = b - a
                            bestidx = idx
                return bestidx


            newd = DefaultOrderedDict(list)
            for k, v in d.items():
                words = v[3:]
                fr = int(v[1])
                for i in range(len(words)):
                    l = len(words[i])
                    to = fr + l
                    if str(fr) + "_" + str(to) in map:
                        idx = map[str(fr) + "_" + str(to)]
                    else:  #
                        idx = get_best_idx(fr, to, map)
                    newd[k].append(idx)

                    fr = to + 1
            return newd

コード例 #8

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

    def get_event_indices(events, ent_indices, trig_indices):
        def _same_indices(indices):
            '''

            :param indices:
            :return:
            '''
            same = True
            cur = indices[0]
            for i in indices[1:]:
                if i != cur:
                    same = False
                    break
            return same

        def _get_event_trig_map(events):
            '''

            :param events:
            :return:
            '''
            event_trig_map = OrderedDict()
            for id, defn in events.items():
                trig = defn[0].split(":")[1]
                event_trig_map[id] = trig
            return event_trig_map

        event_index = DefaultOrderedDict(dict)
        event_trig_id_map = _get_event_trig_map(events)
        excluded_events = []
        count = 0
        for e_id, defn in events.items():
            indices = []
            for t in range(len(defn)):
                pair = defn[t].split(":")
                id = pair[1]
                if id.startswith("E"):  # a sub-event so get the trigger id
                    if id in event_trig_id_map:
                        id = event_trig_id_map[id]
                    else:
                        id = None
                ind = get_arg_index(id, trig_indices)
                if ind == -1:
                    ind = get_arg_index(id, ent_indices)
                indices.append(ind)
            if _same_indices(indices):
                event_index[indices[0]][e_id] = defn
            else:
                log.debug("Not supported: intersentence event: %s, %s, %s", id, defn, indices)
                count += 1
                excluded_events.append(e_id)
        return event_index, count, excluded_events

コード例 #9

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

 def _transform_to_tuples(gold_structures):
     new_gold_structures = DefaultOrderedDict(list)
     for trig_id, structures in gold_structures.items():
         for s in structures:
             if s == []:
                 new_structure = [()]
             else:
                 new_structure = []
                 for rel in s:
                     role, arg = rel.split(":")
                     new_rel = (role, arg)
                     new_structure.append(new_rel)
             new_gold_structures[trig_id].append(new_structure)
     return new_gold_structures

コード例 #10

0

ファイルを表示

def sort_per_file_and_sen_id(data, is_gold):
    '''
    Sorts data per file and per sentence id.
    :param data: sentence-based event information
    :param is_gold: condition to determine the constant index to use to refer to the indices
    :return: a dictionary of files of dictionary sentences and their event info
    '''
    d = DefaultOrderedDict(DefaultOrderedDict)
    for i in data:
        if is_gold:
            file_id = i[const.GOLD_SENTENCE_INFO_IDX][
                const.GOLD_SENTENCE_FILE_ID_IDX]
            sen_id = i[const.GOLD_SENTENCE_INFO_IDX][
                const.GOLD_SENTENCE_POS_IDX]
        else:
            file_id = i[const.PRED_SENTENCE_INFO_IDX][
                const.PRED_SENTENCE_FILE_ID_IDX]
            sen_id = i[const.PRED_SENTENCE_INFO_IDX][
                const.PRED_SENTENCE_POS_IDX]
        d[file_id][sen_id] = i
    return d

コード例 #11

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

    def _generate(instance, group, gold_structures=None):

        def _loop_thru_relations_and_assign_actions(structure, gold_structure=None):
            def _check_action_against_gold(gold_structure, rel):
                action = [0,0,0]

                if rel in gold_structure:
                    if len(gold_structure) == 1:
                        action[const.ACTION_ADDFIX] = 1
                    else:
                        action[const.ACTION_ADD] = 1
                    gold_structure.remove(rel)
                else:
                    action[const.ACTION_IGNORE] = 1
                return action, gold_structure

            structure_actions = []
            # indices_addfix = dict()
            if gold_structure is None:
                for r in range(len(structure)):
                    rel = structure[r]
                    action = [1,0,0] #ignore
                    structure_actions.append((rel, action))

            else:
                for g in gold_structure:
                    structure_action = []
                    for r in range(len(structure)):
                        rel = structure[r]
                        action, g = _check_action_against_gold(g[:], rel)
                        structure_action.append((rel, action))
                    structure_actions.append(structure_action)

            return structure_actions

        group = instance[const.PRED_CAND_STRUCTURES_IDX][group]
        new_group = DefaultOrderedDict(list)

        for trig, data in group.items():
            for d in range(len(data)):
                structure = data[d][0]
                id = data[d][1]

                assert len(id) == len(structure), "Error: length of structure != id"
                #gather the mapping for role and args since the comparison has to be made with the gold
                #because of the numbered roles
                arg_roleid_mapping = OrderedDict()
                role_arg_mapping = OrderedDict()
                roleid_arg_mapping = OrderedDict()
                for s in structure:
                    if s != ():
                        role = s[0]
                        arg = s[1]
                        role_arg_mapping[arg] = role
                for s in id:
                    if s != ():
                        roleid = s[0]
                        arg = s[1]
                        roleid_arg_mapping[arg] = roleid
                for k,v in role_arg_mapping.items():
                    if k in roleid_arg_mapping:
                        arg_roleid_mapping[k] = roleid_arg_mapping[k]

                if structure == [()]:
                    structure = []
                    id = []
                structure.insert(0, (const.NONE_ROLE_TYPE, const.NONE_ARG_TYPE))
                id.insert(0, (const.ID_NONE_ROLE_TYPE, const.NONE_ARG_TYPE))

                gold = None
                if gold_structures:
                    if trig in gold_structures:
                        gold = gold_structures[trig]
                        if gold == [[()]]:
                            gold = [[(const.NONE_ROLE_TYPE, const.NONE_ARG_TYPE)]]
                        actions = _loop_thru_relations_and_assign_actions(structure, gold)
                    else:
                        actions = _loop_thru_relations_and_assign_actions(structure, None)
                else:
                    actions = _loop_thru_relations_and_assign_actions(structure, None)

                #replace role word with ids
                gold_actions = []
                if gold_structures:
                    if gold is not None:
                        for g in range(len(gold)):
                            new_actions = []
                            for a in actions[g]:
                                edge = a[0]
                                action = a[1]
                                # if type(edge) is tuple:
                                edge_arg = edge[1]
                                if edge_arg == const.NONE_ROLE_TYPE:
                                    edge_roleid = const.ID_NONE_ROLE_TYPE
                                else:
                                    edge_roleid = arg_roleid_mapping[edge_arg]
                                # else:
                                #     edge_roleid = const.ID_NONE_ROLE_TYPE
                                #     edge_arg = None #just a
                                new_edge = (edge_roleid, edge_arg)
                                new_actions.append((new_edge, action))
                            gold_actions.append(new_actions)
                        new_group[trig].append((structure, id, gold_actions))
                    else:
                        gold_actions.append(actions)
                        new_group[trig].append((structure, id, gold_actions))
                else:
                    gold_actions.append(actions)
                    new_group[trig].append((structure, id, gold_actions))

        return new_group

コード例 #12

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

        def extractEventStructures(file_id_events):
            '''

            :param file_id_events:
            :return:
            '''
            def extractFullString(fullstring, mentionLevel=False):
                '''

                :param fullstring:
                :param mentionLevel:
                :return:
                '''

                '''
                Given a trigger/entity definition, extract the string.
                '''
                typ = fullstring[0]
                new_word = []
                if mentionLevel:
                    word = fullstring[3]
                    new_word.append(typ)
                    new_word.append(word)
                else:
                    new_word.append(typ)
                return new_word

            def extractEventStructure(v):
                '''

                :param v:
                :return:
                '''

                def _remove_number_in_role(role):
                    '''

                    :param role:
                    :return:
                    '''
                    last_char = role[-1]
                    new_role = role
                    if last_char.isdigit():
                        new_role = role[0:-1]
                    return new_role

                structure = []
                is_valid = True
                if len(v) == 1:  # event does not have arguments/participants
                    structure.append(())
                else:
                    for i in v[1:]: #skips trigger info
                        role = [i.split(":")[0]][0]
                        role = _remove_number_in_role(role)
                        arg = i.split(":")[1]
                        isSubevent = False
                        if arg.startswith("T"):
                            try:
                                arg = extractFullString(entities[arg], False)
                            except(KeyError):
                                log.debug("Discarding structure %s as it contains intersentence argument: %s", v, arg)
                                is_valid = False
                                break
                        elif arg.startswith("E"):
                            try:
                                args = events[arg]
                                trig = triggers[args[0].split(":")[1]]
                                arg = extractFullString(trig, False)
                                isSubevent = True
                            except(KeyError):
                                log.debug("Discarding structure %s as it contains intersentence argument: %s", v, arg)
                                is_valid = False
                                break
                        if isSubevent:
                            structure.append((role, arg[0], True))
                        else:
                            structure.append((role, arg[0], False))
                return structure, is_valid

            structures = DefaultOrderedDict(list)

            for fid, quad in file_id_events.items():
                entities = quad[0][0]
                triggers = quad[0][1]
                relations = quad[0][2]
                events = quad[0][3]

                for k, v in events.items():
                    triggerstring = triggers[v[0].split(":")[1]]
                    trigger = extractFullString(triggerstring, True)
                    structure, is_valid = extractEventStructure(v)
                    if is_valid:
                        structures[trigger[0]].append(structure)
                    else: # assumes that invalid events have intersentence arguments
                        log.debug("Event has intersentence arguments")
            return structures

コード例 #13

0

ファイルを表示

ファイル: goldprocessor.py プロジェクト: kurtespinosa/eventmodels

def load(DIR, IS_EMO):
    def get_arg_index(trig, indices):
        '''

        :param trig:
        :param indices:
        :return:
        '''
        ind = -1
        for k, v in indices.items():
            if trig in v:
                ind = k
                break
        return ind

    def get_event_indices(events, ent_indices, trig_indices):
        def _same_indices(indices):
            '''

            :param indices:
            :return:
            '''
            same = True
            cur = indices[0]
            for i in indices[1:]:
                if i != cur:
                    same = False
                    break
            return same

        def _get_event_trig_map(events):
            '''

            :param events:
            :return:
            '''
            event_trig_map = OrderedDict()
            for id, defn in events.items():
                trig = defn[0].split(":")[1]
                event_trig_map[id] = trig
            return event_trig_map

        event_index = DefaultOrderedDict(dict)
        event_trig_id_map = _get_event_trig_map(events)
        excluded_events = []
        count = 0
        for e_id, defn in events.items():
            indices = []
            for t in range(len(defn)):
                pair = defn[t].split(":")
                id = pair[1]
                if id.startswith("E"):  # a sub-event so get the trigger id
                    if id in event_trig_id_map:
                        id = event_trig_id_map[id]
                    else:
                        id = None
                ind = get_arg_index(id, trig_indices)
                if ind == -1:
                    ind = get_arg_index(id, ent_indices)
                indices.append(ind)
            if _same_indices(indices):
                event_index[indices[0]][e_id] = defn
            else:
                log.debug("Not supported: intersentence event: %s, %s, %s", id, defn, indices)
                count += 1
                excluded_events.append(e_id)
        return event_index, count, excluded_events

    def _load_to_list_ann(file):
        '''

        :param file:
        :return:
        '''
        entities = OrderedDict()
        triggers = OrderedDict()
        rels = OrderedDict()
        events = OrderedDict()

        with open(file, 'r') as file_read:
            lines = file_read.readlines()
            for line in lines:
                tokens = line.split()
                id = tokens[0]
                defn = tokens[1:]
                if line.startswith("R"):
                    rels[id] = defn
                elif line.startswith("TR"):
                    triggers[id] = defn
                elif line.startswith("T"):
                    entities[id] = defn
                elif line.startswith("E"):
                    events[id] = defn
        return [entities, triggers, rels, events]

    def _associate_ann_to_sentences(sentences, boundaries, quad, file):
        '''

        :param sentences:
        :param boundaries:
        :param quad:
        :param file:
        :return:
        '''
        def _remove_events(events, excluded_events):
            '''

            :param events:
            :param excluded_events:
            :return:
            '''
            new_events = OrderedDict()
            for id, defn in events.items():
                if id not in excluded_events:
                    new_events[id] = defn
            return new_events

        inter_count = 0
        quad_ent_indices = util.get_indices(quad[0], boundaries)
        quad_trig_indices = util.get_indices(quad[1], boundaries)
        event_indices, count, excluded_events = get_event_indices(quad[3],
                                                                  quad_ent_indices,
                                                                  quad_trig_indices)
        inter_count += count
        events = quad[3]

        # loop to remove events cascade
        while len(excluded_events) > 0:
            new_events = _remove_events(events, excluded_events)
            event_indices, count, excluded_events = get_event_indices(new_events,
                                                                    quad_ent_indices,
                                                                    quad_trig_indices)
            inter_count += count
            events = new_events

        instances = []

        for i in range(len(sentences)):
            sentence_at_i = [sentences[i], file, i]
            events_ents_at_i = quad_ent_indices[i]
            events_trigs_at_i = quad_trig_indices[i]
            events_at_i = event_indices[i]
            instance = [sentence_at_i, events_ents_at_i, events_trigs_at_i, events_at_i]
            instances.append(instance)
        return instances, inter_count

    def check_for_invalid_event_structures(quad, result_file, file):
        '''

        :param quad:
        :param result_file:
        :param file:
        :return:

        TODO: generalise this to read a list of invalid structures
        '''

        def is_valid_roles_combination(roles):
            '''

            :param roles:
            :return:
            '''
            flag = True
            if 'CSite' in roles:
                if 'Cause' not in roles:
                    flag = False
            return flag

        # For now this only handles this: every structure that has CSite role must have a Cause role as well

        events = quad[3]
        for id, structure in events.items():
            roles = []
            for rel in structure[1:]:
                role, arg = rel.split(":")
                roles.append(role)
            if not is_valid_roles_combination(roles):
                log.error("ERROR: invalid structure found in gold:%s, %s, %s", file,
                          id, structure)

    def load_txt_to_list_with_boundaries(file, IS_EMO, main_txt_file=None):
        '''

        :param file:
        :param IS_EMO:
        :param main_txt_file:
        :return:
        '''
        sentences = []
        boundaries = []
        cur_len = 0

        main_txt = None
        # if IS_EMO:
        #     main_txt = open(main_txt_file, 'r').read()
        with open(file, 'r') as file_read:
            lines = file_read.readlines()

            if IS_EMO:
                i = 0
                while i < len(lines):
                    line = lines[i]
                    start, end = line.split(":")
                    boundaries.append((int(start), int(end)))
                    sentence = lines[i + 1]  # add 1 to include the last character
                    sentences.append(sentence)
                    i += 2

            else:
                for i in range(len(lines)):
                    line = lines[i].rstrip()
                    sentences.append(line)
                    boundaries.append((cur_len, cur_len + len(line)))
                    cur_len = cur_len + len(line) + 1
        return sentences, boundaries

    def add_position_information(file_instances, boundaries):
        '''

        :param file_instances:
        :param boundaries:
        :return:
        '''
        def map_words2pos(sen, start):
            '''

            :param sen:
            :param start:
            :return:
            '''
            map = OrderedDict()
            tokens = sen.split(" ")
            fr = start
            for t in range(len(tokens)):
                l = len(tokens[t])
                to = fr + l
                map[str(fr) + "_" + str(to)] = t
                fr = to + 1  # 1 for the space between tokens
            return map

        def map_dct2position(d, map):
            '''

            :param d:
            :param map:
            :return:
            '''
            def get_best_idx(fr, to, map):
                '''

                :param fr:
                :param to:
                :param map:
                :return:
                '''
                #    This is the index where [fr,to] has the most overlap
                bestidx = -1
                bestcoverage = 0
                idx = -1
                a = -1
                b = -1
                for k, v in map.items():
                    idx += 1
                    fr_,to_ = k.split("_")
                    if int(fr_) <= fr or int(to_) >= to:
                        if fr < int(fr_):
                            a = int(fr_)
                        else:
                            a = fr
                        if to > int(to_):
                            b = int(to_)
                        else:
                            b = to
                        if (b-a) >  bestcoverage:
                            bestcoverage = b - a
                            bestidx = idx
                return bestidx


            newd = DefaultOrderedDict(list)
            for k, v in d.items():
                words = v[3:]
                fr = int(v[1])
                for i in range(len(words)):
                    l = len(words[i])
                    to = fr + l
                    if str(fr) + "_" + str(to) in map:
                        idx = map[str(fr) + "_" + str(to)]
                    else:  #
                        idx = get_best_idx(fr, to, map)
                    newd[k].append(idx)

                    fr = to + 1
            return newd

        for i in range(len(file_instances)):
            start, _ = boundaries[i]
            instance = file_instances[i]
            sen = instance[0][0]
            sen_dct = map_words2pos(sen, start)
            ent_dct = map_dct2position(instance[1], sen_dct)
            trig_dct = map_dct2position(instance[2], sen_dct)
            id2position_mapping = {**ent_dct, **trig_dct}
            file_instances[i][0].append(id2position_mapping)

    instances = []
    files = os.listdir(DIR)
    file_id_events = DefaultOrderedDict(list)

    intersentence_event_count = 0
    num_events = 0
    main_txt_file = None
    for file in files:
        if file.endswith(const.GOLD_TXT_OUTPUT_EXT):
            file = file.split(const.GOLD_TXT_OUTPUT_EXT)[0]
            if IS_EMO:
                main_txt_file = DIR + file + const.GOLD_MAIN_TXT_EXT
            txt_file = DIR + file + const.GOLD_TXT_OUTPUT_EXT
            sentences, boundaries = load_txt_to_list_with_boundaries(txt_file,
                                                                          IS_EMO,
                                                                          main_txt_file)
            ann_file = DIR + file + const.GOLD_ANN_OUTPUT_EXT
            quad = _load_to_list_ann(ann_file)
            num_events += len(quad[3])

            # check for invalid event structures
            # check_for_invalid_event_structures(quad, result_file, file)

            file_id_events[file].append(quad)
            file_instances, count = _associate_ann_to_sentences(sentences, boundaries,
                                                                quad, file)

            # add position info to sentence and entities/triggers
            add_position_information(file_instances, boundaries)

            intersentence_event_count += count
            instances.extend(file_instances)
    log.info("%s: %s gold events, %s (%.2f%%) intersentence events", DIR,
             str(num_events), str(intersentence_event_count),
             intersentence_event_count / num_events * 100)
    return instances, file_id_events