def what_baseline(question, story, return_type, sch_flag=False): eligible_sents = [] if sch_flag: text_actual = get_sents(story['sch']) text = utils.resolve_pronouns(story['sch']) else: text_actual = get_sents(story['text']) text = utils.resolve_pronouns(story['text']) sents = get_sents(text) keywords, pattern = get_keywords_pattern_tuple(question['text'], question['par']) kw_mod = [] if return_type == 'quotation': kw_mod = ['said'] if return_type == 'verb': kw_mod = [] if return_type == 'noun': if 'day' in question['text']: kw_mod += [ 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday' ] keywords += kw_mod rem_list = ['in', 'he', 'she', 'him', 'her', 'before', 'after'] stop_words_cust = [i if i not in rem_list else '' for i in stop_words] for i in range(len(sents)): words = nltk.word_tokenize(sents[i]) words_pos = nltk.pos_tag(words) words = list( filter( lambda x: x not in (stop_words_cust + [':', '’', ',', '.', '!', "'", '"', '?']), words)) sim_score = utils.get_similarity(list(set(words)), list(set(keywords)), story['sid'], question['qid']) words = list( map(lambda x: lmtzr.lemmatize(x[0], pos=penn2wn(x[1])), words_pos)) quant = len(set(words) & set(keywords)) if sim_score: quant += sim_score eligible_sents.append((quant, text_actual[i], i)) eligible_sents = sorted(eligible_sents, key=operator.itemgetter(0), reverse=True) best = eligible_sents[0][1] index = eligible_sents[0][2] return best, index
def get_ids_from_property_value(data: EntitiesData, property_name: str, property_value: str, fix_data_delegate: Callable[[str], str] = None, match_exact: bool = False) -> List[str]: # data structure: {id: content} # fixed_data structure: {description: id} if not data or not property_name or not property_value: print(f'- get_ids_from_property_value: invalid data or property info. Return empty list.') return [] if not fix_data_delegate: fix_data_delegate = __fix_property_value fixed_value = fix_data_delegate(property_value) fixed_data = {entry_id: fix_data_delegate(entry_data[property_name]) for entry_id, entry_data in data.items() if entry_data[property_name]} if match_exact: results = [key for key, value in fixed_data.items() if value == fixed_value] else: similarity_map = {} for entry_id, entry_property in fixed_data.items(): if entry_property.startswith(fixed_value) or fixed_value in entry_property: similarity_value = utils.get_similarity(entry_property, fixed_value) if similarity_value in similarity_map.keys(): similarity_map[similarity_value].append((entry_id, entry_property)) else: similarity_map[similarity_value] = [(entry_id, entry_property)] for similarity_value, entries in similarity_map.items(): similarity_map[similarity_value] = sorted(entries, key=lambda entry: entry[1]) similarity_values = sorted(list(similarity_map.keys()), reverse=True) results = [] for similarity_value in similarity_values: if not match_exact or (match_exact is True and similarity_value.is_integer()): entry_ids = [entry_id for (entry_id, _) in similarity_map[similarity_value]] results.extend(entry_ids) return results
def __init__(self, model_to_explain, original_molecule, discount_factor, fp_len, fp_rad, similarity_set=None, weight_sim=0.5, similarity_measure="tanimoto", **kwargs): super(CF_Esol, self).__init__(**kwargs) self.discount_factor = discount_factor self.model_to_explain = model_to_explain self.weight_sim = weight_sim self.target = original_molecule.y self.orig_pred, _ = model_to_explain(original_molecule.x, original_molecule.edge_index) self.distance = lambda x, y: F.l1_loss(x, y).detach() self.base_loss = self.distance(self.orig_pred, self.target).item() self.gain = lambda p: torch.sign(self.distance(p, self.orig_pred) ).item() self.similarity, self.make_encoding, \ self.original_encoding = get_similarity(similarity_measure, model_to_explain, original_molecule, fp_len, fp_rad)
def when_baseline(question, story, kw_adds, sch_flag=False): eligible_sents = [] if sch_flag: text = story['sch'] text_actual = get_sents(story['sch']) else: text = utils.resolve_pronouns(story['text']) text_actual = get_sents(story['text']) sents = get_sents(text) if len(sents) != len(text_actual): print(len(sents), len(text_actual)) print(sents) print(text_actual) keywords, pattern = get_keywords_pattern_tuple(question['text'], question['par']) keywords += kw_adds for i in range(len(sents)): words = nltk.word_tokenize(sents[i]) words_pos = nltk.pos_tag(words) words = list( filter( lambda x: x not in (stop_words + [':', '`', '’', ',', '.', '!', "'", '"', '?']), words)) sim_score = utils.get_similarity(list(set(words)), list(set(keywords)), story['sid'], question['qid']) words = list( map(lambda x: lmtzr.lemmatize(x[0], pos=penn2wn(x[1])), words_pos)) quant = len(set(words) & set(keywords)) if sim_score: quant += sim_score #joint = (set(words) & set(keywords)) #disjointq = (set(words)-joint) #disjoints = (set(keywords)- joint) #print('\n Question DJ Set : ',disjointq,'\n Sent DJ Set : ',disjoints) eligible_sents.append((quant, text_actual[i], i)) eligible_sents = sorted(eligible_sents, key=operator.itemgetter(0), reverse=True) best = eligible_sents[0][1] index = eligible_sents[0][2] return best, index
def _compare_mention_with_node(self, mention_candidate, node, threshold, groundings, modes=None, initial_mode_weight=1.0, verbose=0): if modes is None: modes = [ utils.SimilarityMode. COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT, utils.SimilarityMode.COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG, utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME, ] # true backoff approach to ground this node groundings_key = Grounder.get_grounding_key(node) mode_weight = initial_mode_weight for mode in modes: similarity = utils.get_similarity(mention_candidate, node, mode, self._ontology.embeddings) similarity *= mode_weight if verbose > 0: print( "Ontology node: {}\nExemplars:{}\nProcesses:{}\nParticipant" "s:{}\nProperties:{}\nGrounding mode: {}\nSimilarity: {}" "\nMode weight: {}".format( node.get_path(), node.get_exemplars_with_weights(), node.get_processes(), node.get_participants(), node.get_properties(), mode.name, similarity, mode_weight)) if groundings_key in groundings: if similarity > groundings[groundings_key][0]: if verbose > 0: print("Updating previous score {}".format( groundings[groundings_key][0])) groundings[groundings_key] = (similarity, mode) else: groundings[groundings_key] = (similarity, mode) # use threshold only to determine backoff strategy if similarity > threshold: break if verbose > 0: print("-- No grounding with this mode. Backing off...".format( mode.name)) mode_weight *= 0.9
def __init__(self, model_to_explain, original_graph, discount_factor, weight_sim=0.5, similarity_measure="neural_encoding", **kwargs): super(CF_Cycliq, self).__init__(**kwargs) self.class_to_optimise = 1 - original_graph.y.item() self.discount_factor = discount_factor self.model_to_explain = model_to_explain self.weight_sim = weight_sim self.similarity, self.make_encoding, \ self.original_encoding = get_similarity(similarity_measure, model_to_explain, original_graph)
def __init__(self, model_to_explain, original_molecule, discount_factor, fp_len, fp_rad, similarity_set=None, weight_sim=0.5, similarity_measure="tanimoto", **kwargs): super(CF_Tox21, self).__init__(**kwargs) self.class_to_optimise = 1 - original_molecule.y.item() self.discount_factor = discount_factor self.model_to_explain = model_to_explain self.weight_sim = weight_sim self.similarity, self.make_encoding, \ self.original_encoding = get_similarity(similarity_measure, model_to_explain, original_molecule, fp_len, fp_rad)
def main(): parser = argparse.ArgumentParser( description= 'Computing Duality Diagram Similarity between Taskonomy Tasks') parser.add_argument( '-d', '--dataset', help= 'image dataset to use for computing DDS: options are [pascal_5000, taskonomy_5000, nyuv2]', default="pascal_5000", type=str) parser.add_argument('-fd', '--feature_dir', help='path to saved features root directory', default="./features/", type=str) parser.add_argument('-fdt', '--feature_dir_taskonomy', help='path to saved features from taskonomy models', default="./features/taskonomy_activations/", type=str) parser.add_argument('-fdp', '--feature_dir_pascal', help='path to saved features from pascal models', default="./features/pascal_activations/", type=str) parser.add_argument('-sd', '--save_dir', help='path to save the DDS results', default="./results/DDScomparison_pascal", type=str) parser.add_argument('-n', '--num_images', help='number of images to compute DDS', default=200, type=int) parser.add_argument('-i', '--num_iters', help='number of iterations for bootstrap', default=100, type=int) args = vars(parser.parse_args()) num_images = args['num_images'] dataset = args['dataset'] taskonomy_feats_path = os.path.join(args['feature_dir_taskonomy'], dataset) pascal_feats_path = os.path.join(args['feature_dir_pascal'], dataset) num_repetitions = args['num_iters'] features_filename = os.path.join( "./features", "taskonomy_pascal_feats_" + args['dataset'] + ".npy") num_total_images = 5000 if dataset == 'nyuv2': num_total_images = 1449 save_dir = os.path.join(args['save_dir'], dataset) if not os.path.exists(save_dir): os.makedirs(save_dir) task_list = list_of_tasks.split(' ') print(task_list) taskonomy_data = get_features(taskonomy_feats_path, pascal_feats_path, features_filename) # setting up DDS using Q,D,f,g for kernels kernel_type = ['rbf', 'lap', 'linear'] # possible kernels (f in DDS) feature_norm_type = ['znorm'] # possible normalizations (Q,D in DDS) save_path = os.path.join(save_dir, 'kernels.npy') affinity_ablation = {} for kernel in (kernel_type): affinity_ablation[kernel] = {} for feature_norm in feature_norm_type: np.random.seed(1993) indices = [] for i in range(num_repetitions): indices.append( np.random.choice(range(num_total_images), num_images, replace=False)) print(kernel, feature_norm) affinity_matrix = np.zeros((num_repetitions, len(task_list)), float) for i in tqdm(range(num_repetitions)): method = kernel + "__" + feature_norm start = time.time() for index1, task1 in (enumerate(task_list)): affinity_matrix[i,index1] = get_similarity(taskonomy_data[task1][indices[i],:],\ taskonomy_data['pascal_voc_segmentation'][indices[i],:],\ kernel,feature_norm) end = time.time() print("Method is ", method) print("Time taken is ", end - start) affinity_ablation[kernel][feature_norm] = affinity_matrix np.save(save_path, affinity_ablation) # setting up DDS using Q,D,f,g for distance functions save_path = os.path.join(save_dir, 'rdms.npy') dist_type = ['pearson', 'euclidean', 'cosine'] affinity_ablation = {} for dist in (dist_type): affinity_ablation[dist] = {} for feature_norm in feature_norm_type: np.random.seed(1993) indices = [] for i in range(num_repetitions): indices.append( np.random.choice(range(num_total_images), num_images, replace=False)) print(dist, feature_norm) affinity_matrix = np.zeros((num_repetitions, len(task_list)), float) for i in tqdm(range(num_repetitions)): method = dist + "__" + feature_norm start = time.time() for index1, task1 in (enumerate(task_list)): affinity_matrix[i,index1] = get_similarity_from_rdms(taskonomy_data[task1][indices[i],:],\ taskonomy_data['pascal_voc_segmentation'][indices[i],:],\ dist,feature_norm) end = time.time() print("Method is ", method) print("Time taken is ", end - start) affinity_ablation[dist][feature_norm] = affinity_matrix np.save(save_path, affinity_ablation)
def is_looping(self, threshold=0.9): if len(self.results) >= 2: similarity = get_similarity(self.results[-1], self.results[-2]) if similarity > threshold: return True return False
import utils print('\n\nWelcome to the Yoga pose estimator') print('----------------------------------\n') print('Please enter 1 or 2: ') print('1. Yoga pose from web camera') choice = int(input('2. Yoga picture from computer\n')) if choice == 1: print('Press q to start the count down timer and esc to exit once the picture is taken') utils.create_img_from_cam() path = 'images/camera.jpg' elif choice == 2: path = input('Please enter the path of your image: ') else: print('Choice is invalid') pose = utils.classify_pose(path) print("Yoga pose: ", pose) print("Cosine similarity: ", "{:.2f}".format(utils.get_similarity(path, pose))) utils.show_vectors(path)
def main(): parser = argparse.ArgumentParser(description='Computing Duality Diagram Similarity between Taskonomy Tasks') parser.add_argument('-d','--dataset', help='image dataset to use for computing DDS: options are [pascal_5000, taskonomy_5000, nyuv2]', default = "taskonomy_5000", type=str) parser.add_argument('-fd','--feature_dir', help='path to saved features from taskonomy models', default = "./features/", type=str) parser.add_argument('-sd','--save_dir', help='path to save the DDS results', default = "./results/DDScomparison_taskonomy", type=str) parser.add_argument('-n','--num_images', help='number of images to compute DDS', default = 200, type=int) args = vars(parser.parse_args()) num_images = args['num_images'] dataset = args['dataset'] features_filename = os.path.join(args['feature_dir'],"taskonomy_pascal_feats_" + args['dataset'] + ".npy") save_dir = os.path.join(args['save_dir'],dataset) if not os.path.exists(save_dir): os.makedirs(save_dir) task_list = list_of_tasks.split(' ') taskonomy_data = get_features(features_filename,num_images) # function that returns features from taskonomy models for first #num_images # setting up DDS using Q,D,f,g for kernels kernel_type = ['rbf','lap','linear'] # possible kernels (f in DDS) feature_norm_type = ['None','centering','znorm','group_norm','instance_norm','layer_norm','batch_norm'] # possible normalizations (Q,D in DDS) save_path = os.path.join(save_dir,'kernels.npy') affinity_ablation = {} for kernel in (kernel_type): affinity_ablation[kernel]={} for feature_norm in feature_norm_type: affinity_matrix = np.zeros((len(task_list), len(task_list)), float) method = kernel + "__" + feature_norm start = time.time() for index1,task1 in tqdm(enumerate(task_list)): for index2,task2 in (enumerate(task_list)): if index1 > index2: continue affinity_matrix[index1,index2] = get_similarity(taskonomy_data[task1],\ taskonomy_data[task2],\ kernel,feature_norm) affinity_matrix[index2,index1] = affinity_matrix[index1,index2] end = time.time() print("Method is ", method) print("Time taken is ", end - start) affinity_ablation[kernel][feature_norm] = affinity_matrix np.save(save_path,affinity_ablation) # setting up DDS using Q,D,f,g for distance functions save_path = os.path.join(save_dir,'rdms.npy') dist_type = ['pearson', 'euclidean', 'cosine'] affinity_ablation = {} for dist in (dist_type): affinity_ablation[dist]={} for feature_norm in (feature_norm_type): affinity_matrix = np.zeros((len(task_list), len(task_list)), float) method = dist + "__" + feature_norm start = time.time() for index1,task1 in tqdm(enumerate(task_list)): for index2,task2 in enumerate(task_list): if index1 > index2: continue affinity_matrix[index1,index2] = get_similarity_from_rdms(taskonomy_data[task1],\ taskonomy_data[task2],\ dist,feature_norm) affinity_matrix[index2,index1] = affinity_matrix[index1,index2] end = time.time() print("Method is ", method) print("Time taken is ", end - start) affinity_ablation[dist][feature_norm]=affinity_matrix np.save(save_path,affinity_ablation)