def get_pair_input_list(self, pair_type_list, left_sample_type, **kwargs): indexed_event_list = self.get_indexed_events() if len(indexed_event_list) <= 1: return [] assert 'tf_arg' in pair_type_list assert left_sample_type in ['one', 'all'] left_input_list = [ rich_event.get_pos_input(include_all_pobj=False) for rich_event in indexed_event_list ] pair_input_dict = defaultdict(list) for event_idx, rich_event in enumerate(indexed_event_list): left_input_idx_list = \ range(0, event_idx) + range(event_idx, len(indexed_event_list)) for pair_type in pair_type_list: pair_input_list = \ rich_event.get_pair_input_list(pair_type, **kwargs) for pair_input in pair_input_list: if left_sample_type == 'one': left_input = left_input_list[random.choice( left_input_idx_list)] pair_input_dict[pair_type].append( IndexedEventTriple(left_input, *pair_input)) else: for left_input_idx in left_input_idx_list: left_input = left_input_list[left_input_idx] pair_input_dict[pair_type].append( IndexedEventTriple(left_input, *pair_input)) results = [] tf_arg_list = pair_input_dict['tf_arg'] results.extend(tf_arg_list) num_tf_arg = len(tf_arg_list) if 'wo_arg' in pair_input_dict: wo_arg_list = pair_input_dict['wo_arg'] if len(wo_arg_list) > num_tf_arg: results.extend( random.sample(wo_arg_list, int(0.8 * num_tf_arg))) else: results.extend(wo_arg_list) if 'two_args' in pair_input_dict: two_args_list = pair_input_dict['two_args'] if len(two_args_list) > num_tf_arg: results.extend( random.sample(two_args_list, int(0.8 * num_tf_arg))) else: results.extend(two_args_list) random.shuffle(results) return results
def get_pair_tuning_input_list_two_args(self, sample_type): # TODO: remove old function # return empty list when number of events with indexed predicate is # less than of equal to 1, since there exists no left inputs indexed_event_list = self.get_indexed_events() if len(indexed_event_list) <= 1: return [] assert sample_type in ['one', 'all'], \ 'sample_type can only be ' \ 'one (one random left event for every negative sample), or' \ 'all (every left event for every negative sample)' results = [] pos_input_list = [ rich_event.get_pos_input(include_all_pobj=False) for rich_event in indexed_event_list ] for pos_idx, pos_event in enumerate(indexed_event_list): if pos_input_list[pos_idx] is None: continue left_input_idx_list = \ range(0, pos_idx) + range(pos_idx, len(indexed_event_list)) arg_idx_with_entity = [ idx for idx in [1, 2, 3] if pos_event.has_neg(idx) ] if len(arg_idx_with_entity) < 2: continue for pos_arg_idx, neg_arg_idx in permutations( arg_idx_with_entity, 2): pos_input = deepcopy(pos_input_list[pos_idx]) pos_input.set_argument(neg_arg_idx, -1) neg_input = deepcopy(pos_input_list[pos_idx]) neg_input.set_argument(neg_arg_idx, neg_input.get_argument(pos_arg_idx)) pos_salience = \ pos_event.get_argument(pos_arg_idx).get_pos_salience() neg_salience = pos_salience neg_input.set_argument(pos_arg_idx, -1) if sample_type == 'one': left_input = pos_input_list[random.choice( left_input_idx_list)] results.append( IndexedEventTriple(left_input, pos_input, neg_input, pos_arg_idx, neg_arg_idx, pos_salience, neg_salience)) else: for left_input_idx in left_input_idx_list: left_input = pos_input_list[left_input_idx] results.append( IndexedEventTriple(left_input, pos_input, neg_input, pos_arg_idx, neg_arg_idx, pos_salience, neg_salience)) return results
def get_pair_tuning_input_list_wo_arg(self, sample_type, model, include_type=True, use_unk=True): # TODO: remove old function # return empty list when number of events with indexed predicate is # less than of equal to 1, since there exists no left inputs indexed_event_list = self.get_indexed_events() if len(indexed_event_list) <= 1: return [] assert sample_type in ['one', 'all'], \ 'sample_type can only be ' \ 'one (one random left event for every negative sample), or' \ 'all (every left event for every negative sample)' results = [] pos_input_list = [ rich_event.get_pos_input(include_all_pobj=False) for rich_event in indexed_event_list ] arg_type_map = {1: 'SUBJ', 2: 'OBJ', 3: 'PREP'} for pos_idx, pos_event in enumerate(indexed_event_list): pos_input = pos_input_list[pos_idx] if pos_input is None: continue left_input_idx_list = \ range(0, pos_idx) + range(pos_idx, len(indexed_event_list)) for arg_idx in [1, 2, 3]: if pos_event.has_neg(arg_idx): pos_salience = \ pos_event.get_argument(arg_idx).get_pos_salience() neg_input = deepcopy(pos_input) neg_input.set_argument(arg_idx, -1) neg_salience = EntitySalience(**{}) elif pos_event.get_argument(arg_idx) is None: # do not add pair when there is no entity in the script if len(self.rich_entities) == 0: continue pos_salience = EntitySalience(**{}) neg_input = deepcopy(pos_input) random_entity = random.choice(self.rich_entities) arg_type = arg_type_map[arg_idx] if include_type else '' neg_input.set_argument( arg_idx, random_entity.get_index(model, arg_type=arg_type, use_unk=use_unk)) neg_salience = random_entity.get_salience() else: continue if sample_type == 'one': left_input = pos_input_list[random.choice( left_input_idx_list)] results.append( IndexedEventTriple(left_input, pos_input, neg_input, arg_idx, arg_idx, pos_salience, neg_salience)) else: for left_input_idx in left_input_idx_list: left_input = pos_input_list[left_input_idx] results.append( IndexedEventTriple(left_input, pos_input, neg_input, arg_idx, arg_idx, pos_salience, neg_salience)) return results
def get_pair_tuning_input_list(self, neg_sample_type): # TODO: remove old function # return empty list when number of entities is less than or equal to 1, # since there exists no negative inputs if self.num_entities <= 1: return [] # return empty list when number of events with indexed predicate is # less than of equal to 1, since there exists no left inputs indexed_event_list = self.get_indexed_events() if len(indexed_event_list) <= 1: return [] assert neg_sample_type in ['one', 'neg', 'all'], \ 'neg_sample_type can only be ' \ 'one (one random negative event and one random left event), ' \ 'neg (one random left event for every negative event), or ' \ 'all (every left event for every negative event)' results = [] pos_input_list = [ rich_event.get_pos_input(include_all_pobj=False) for rich_event in indexed_event_list ] for pos_idx, pos_event in enumerate(indexed_event_list): pos_input = pos_input_list[pos_idx] if pos_input is None: continue left_input_idx_list = \ range(0, pos_idx) + range(pos_idx, len(indexed_event_list)) for arg_idx in [1, 2, 3]: if pos_event.has_neg(arg_idx): pos_salience = \ pos_event.get_argument(arg_idx).get_pos_salience() if neg_sample_type == 'one': neg_input, neg_salience = random.choice( pos_event.get_neg_input_list( arg_idx, include_salience=True)) left_input = pos_input_list[random.choice( left_input_idx_list)] results.append( IndexedEventTriple(left_input, pos_input, neg_input, arg_idx, arg_idx, pos_salience, neg_salience)) else: neg_input_list = pos_event.get_neg_input_list( arg_idx, include_salience=True) for neg_input, neg_salience in neg_input_list: if neg_sample_type == 'neg': left_input = pos_input_list[random.choice( left_input_idx_list)] results.append( IndexedEventTriple(left_input, pos_input, neg_input, arg_idx, arg_idx, pos_salience, neg_salience)) else: for left_input_idx in left_input_idx_list: left_input = pos_input_list[left_input_idx] results.append( IndexedEventTriple( left_input, pos_input, neg_input, arg_idx, arg_idx, pos_salience, neg_salience)) return results