def create_reference_file(self, zero_less_than_2): # load the data dd = DatasetDexter() document_list = dd.get_dexter_dataset( path=FileLocations.get_dropbox_dexter_path()) results = '' # process the data result_count = 0 doc_count = 0 for document in document_list: data = json.loads(document) saliency_by_ent_id_golden = self.extract_saliency_by_ent_id_golden( data) docid = data['docId'] sorted_list = self.get_ordered_list_from_dictionary( saliency_by_ent_id_golden) for item in sorted_list: entity_id = item[0] salience = item[1] if zero_less_than_2: if salience < 2.0: salience = 0.0 results = results + str(docid) + ' 0 ' + str( entity_id) + ' ' + str(salience) + '\n' result_count += 1 self.logger.info('Documents Processed %d Entities Processed %d ', doc_count, result_count) doc_count += 1 fn = FileLocations.get_dropbox_intermediate_path( ) + "trec_ground_truth.txt" self.logger.info('writing to %s ', fn) file = open(fn, "w") file.write(results) file.close()
def train_model(): X, y, docid_array, entity_id_array = load_feature_matrix( feature_filename=INTERMEDIATE_PATH + 'dexter_all_heavy_catted_8_7_2018.txt', feature_names=feature_names, entity_id_index=1, y_feature_index=2, first_feature_index=4, number_features_per_line=40, tmp_filename='/tmp/temp_conversion_file.txt') # train only on records we have a golden salience for fg = FilterGolden() logger.info('X Shape = %s', X.shape) logger.info('y Shape = %s', y.shape) dexter_dataset = DatasetDexter() wikipedia_dataset = WikipediaDataset() X2, y2, docid2, entityid2 = fg.get_only_golden_rows( X, y, docid_array, entity_id_array, dexter_dataset, wikipedia_dataset) logger.info('X2 Shape = %s', X2.shape) logger.info('y2 Shape = %s', y2.shape) wrapper = GBRTWrapper() gbrt = wrapper.train_model_no_split(X2, y2, n_estimators=40) logger.info('trained') # gbrt.save_model() # from https://shankarmsy.github.io/stories/gbrt-sklearn.html # One of the benefits of growing trees is that we can understand how important each of the features are print("Feature Importances") print(gbrt.feature_importances_) print() # Let's print the R-squared value for train/test. This explains how much of the variance in the data our model is # able to decipher. print("R-squared for Train: %.2f" % gbrt.score(X2, y2)) # print ("R-squared for Test: %.2f" %gbrt.score(X_test, y_test) ) # - See more at: https://shankarmsy.github.io/stories/gbrt-sklearn.html#sthash.JNZQbnph.dpuf return gbrt, X2, y2, docid2, entityid2
def go(self, filename, feature_names, filter_only_golden): X, y, docid_array, entity_id_array = load_feature_matrix(feature_filename=filename, feature_names=feature_names, entity_id_index=1, y_feature_index=2, first_feature_index=4, number_features_per_line=len(feature_names) + 4, tmp_filename='/tmp/temp_conversion_file.txt' ) # train only on records we have a golden salience for self.logger.info('__________________________',) self.logger.info('File %s', filename) self.logger.info('X Shape = %s', X.shape) self.logger.info('y Shape = %s', y.shape) if filter_only_golden: dexterDataset = DatasetDexter() wikipediaDataset = WikipediaDataset() fg = sellibrary.filter_only_golden.FilterGolden() X, y, docid_array, entity_id_array = fg.get_only_golden_rows(X, y, docid_array, entity_id_array, dexterDataset, wikipediaDataset) self.logger.info('After filtering only golden rows:') self.logger.info('X Shape = %s', X.shape) self.logger.info('y Shape = %s', y.shape) self.logger.info('y [1] %s', y[1:10]) self.logger.info('y [1] %s', y[y > 0.0]) y[y < 2.0] = 0 y[y >= 2.0] = 1 ig = self.information_gain_v2(X, y) self.logger.info('ig %s', ig) self.logger.info('ig shape %s', ig.shape) d = {} for i in range(len(feature_names)): d[feature_names[i]] = ig[i] self.sort_and_print(d) return d
filename_B = dropbox_intermediate_path + 'wp_sentiment_simple.txt' #'base_tf_simple_v2.txt' output_filename = dropbox_intermediate_path + 'wp_joined.txt' #'joined_sel_sent_and_tf.txt' # Load File A X1, y1, docid_array1, entity_id_array1 = load_feature_matrix( feature_filename=filename_A, feature_names=file_A_feature_names, entity_id_index=1, y_feature_index=2, first_feature_index=4, number_features_per_line=len(file_A_feature_names) + 4, tmp_filename='/tmp/temp_conversion_file.txt') print(y1.shape) dexter_dataset = DatasetDexter() wikipedia_dataset = WikipediaDataset() # fg = FilterGolden() # X1, y1, docid_array1, entity_id_array1 = fg.get_only_golden_rows(X1, y1, docid_array1, entity_id_array1, dexter_dataset, # wikipedia_dataset) document_list = dexter_dataset.get_dexter_dataset( path=FileLocations.get_dropbox_dexter_path()) golden_saliency_by_entid_by_docid = dexter_dataset.get_golden_saliency_by_entid_by_docid( document_list, wikipedia_dataset) print(y1.shape) # Load File B X2, y2, docid_array2, entity_id_array2 = load_feature_matrix( feature_filename=filename_B,
class DexterFeeder(): # set up logging handler = logging.StreamHandler() handler.setFormatter( logging.Formatter( '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')) logger = logging.getLogger(__name__) logger.addHandler(handler) logger.propagate = False logger.setLevel(logging.INFO) def __init__(self): self.dd = DatasetDexter() @staticmethod def extract_body(data): body = '' for d in data['document']: if d['name'].startswith('body_par_'): body = body + d['value'] return body def dexter_dataset_sentiment(self, sentiment_processor, spotter, output_filename): dexter_json_doc_list = self.dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') self.logger.info('building list of n-grams') ngram_list = [] sent_by_entity_id_by_docid = {} file_contents = '' for json_doc in dexter_json_doc_list: data = json.loads(json_doc) body = self.extract_body(data) title = data['title'] docid = data['docId'] sent_by_entity_id_by_docid[docid] = {} for n_gram_length in range(2, 10): title_entities = spotter.get_entity_candidates(title, 0.5) for e in title_entities: n_gram = sentiment_processor.get_ngram( title, n_gram_length, e.start_char, e.end_char) sent = sentiment_processor.get_doc_sentiment(n_gram) if e.entity_id not in sent_by_entity_id_by_docid[docid]: sent_by_entity_id_by_docid[docid][e.entity_id] = 0 sent_by_entity_id_by_docid[docid][ e.entity_id] = sent_by_entity_id_by_docid[docid][ e.entity_id] + sent ngram_list.append(n_gram) body_entities = spotter.get_entity_candidates(body, 0.5) for e in body_entities: n_gram = sentiment_processor.get_ngram( body, n_gram_length, e.start_char, e.end_char) sent = sentiment_processor.get_doc_sentiment(n_gram) if e.entity_id not in sent_by_entity_id_by_docid[docid]: sent_by_entity_id_by_docid[docid][e.entity_id] = 0 sent_by_entity_id_by_docid[docid][ e.entity_id] = sent_by_entity_id_by_docid[docid][ e.entity_id] + sent #log progress for entity_id in sent_by_entity_id_by_docid[docid].keys(): sent = sent_by_entity_id_by_docid[docid][entity_id] s = '%d %d 0 0 [ %f ]' % (docid, entity_id, sent) self.logger.info(s) file_contents = file_contents + s + '\n' file = open(output_filename, "w") file.write(file_contents) file.close() self.logger.info('processing complete')
def __init__(self): self.dd = DatasetDexter()
self.logger.debug('Appending heavy parameters to %s', self.heavy_feature_filename) for entity_id in all_heavy_features_by_entity_id.keys(): output = '{0},{1},{2},{3},{4}\n'.format( str(optional_docid), str(entity_id), str('?'), str('?'), str(all_heavy_features_by_entity_id[entity_id])) file.write(output) file.close() return features_by_entity_id if __name__ == "__main__": #build a the golden spotter dd = DatasetDexter() document_list = dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') wikipedia_dataset = WikipediaDataset() spotter = GoldenSpotter(document_list, wikipedia_dataset) body = "Iranian representatives say negotiations with Europe on its nuclear program are in the final stages. Iran's foreign minister, Kamal Kharazi, told state television Saturday Iranian negotiators have given their final response to a European Union proposal to suspend Iran's uranium enrichment program. He said it is now up to the Europeans to decide whether or not to accept their decision. Iran and the European Union's big three powers; Britain, Germany, and France; have been negotiating a deal under which Tehran would agree to freeze sensitive nuclear work to avoid possible U.N. Security Council sanctions. U.S. Secretary of State Colin Powell, says that Iran's nuclear program is intended to make nuclear weapons. Iran authorities have insisted that their nuclear ambitions are limited to generating electricity from atomic energy plants, not making bombs. Critics of the position of the United States point to Israel's nuclear program. Israel maintains a policy of nuclear ambiguity, but is widely believed to possess at least 82 nuclear weapons. The program has not been condemned by the United States." title = "" sfe = SelFeatureExtractor(spotter, binary_classifier_threshold=0.5, min_candidates_to_pass_through=5, binary_classifier=None, light_feature_filename=None, heavy_feature_filename=None, num_light_features=23,
class SelModelBuilder: # set up logging handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')) logger = logging.getLogger(__name__) logger.addHandler(handler) logger.propagate = False logger.setLevel(logging.INFO) def __init__(self): self.dd = DatasetDexter() @staticmethod def extract_body(data): body = '' for d in data['document']: if d['name'].startswith('body_par_'): body = body + d['value'] return body def get_dexter_datset(self): return self.dd # noinspection PyShadowingNames def train_model_using_dexter_dataset(self, sentiment_processor, spotter, afinn_filename): dexter_json_doc_list = self.dd.get_dexter_dataset(FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') self.logger.info('building list of n-grams') ngram_list = [] for n_gram_length in range(2, 10): for json_doc in dexter_json_doc_list: data = json.loads(json_doc) # pprint.pprint(data) body = self.extract_body(data) title = data['title'] title_entities = spotter.get_entity_candidates(title, 0.5) for e in title_entities: n_gram = sentiment_processor.get_ngram(title, n_gram_length, e.start_char, e.end_char) ngram_list.append(n_gram) body_entities = spotter.get_entity_candidates(body, 0.5) for e in body_entities: n_gram = sentiment_processor.get_ngram(body, n_gram_length, e.start_char, e.end_char) ngram_list.append(n_gram) self.logger.info('processing list of n-grams') sentiment_processor.cal_term_weight_on_full_corpus(afinn_filename, ngram_list, debug_mode=1) self.logger.info('processing complete') def train_and_save_model(self, filename, spotter): afinn_filename = '../sellibrary/resources/AFINN-111.txt' sentiment_processor = SentimentProcessor() self.train_model_using_dexter_dataset(sentiment_processor, spotter, afinn_filename) sentiment_processor.save_model(filename) return sentiment_processor def get_feature_list(self, sentiment_processor, phrase): sent = sentiment_processor.get_doc_simple_sentiment(phrase) feture_list = [sent] feture_list.extend(sentiment_processor.get_doc_prop_pos_prob_neg(phrase)) return feture_list def build_output_using_dexter_dataset(self, spotter, golden_saliency_by_entid_by_docid, output_filename, document_to_feature_converter, tosent_converter, dexter_json_doc_list, min_docid = 1, max_docid = 700): self.logger.info('building features') if (output_filename != None): file = open(output_filename, "w") else: file = None salience_by_entity_by_doc_id = {} doc_number = -1 for json_doc in dexter_json_doc_list: doc_number += 1 data = json.loads(json_doc) # pprint.pprint(data) docid = data['docId'] s = str(docid) # process by docid, unless it is not numeric, where we process by doc number if (not s.isnumeric() and doc_number >= min_docid and doc_number <= max_docid ) \ or (s.isnumeric() and docid >= min_docid and docid <= max_docid): t1 = time.time() salience_by_entity_by_doc_id[docid] = {} body = self.extract_body(data) title = data['title'] title_entities = spotter.get_entity_candidates(title, docid) body_entities = spotter.get_entity_candidates(body, docid) features_by_entity_id = document_to_feature_converter.get_features(body, body_entities, title, title_entities, docid ) for entity_id in features_by_entity_id.keys(): golden = 0 if docid in golden_saliency_by_entid_by_docid: if entity_id in golden_saliency_by_entid_by_docid[docid]: golden = golden_saliency_by_entid_by_docid[docid][entity_id] line = str(docid) + ',' + str(entity_id) + ',' + str(golden) + ',0,' + str(features_by_entity_id[entity_id]) if file is not None: file.write(line) file.write('\n') self.logger.info('writing to %s',output_filename) self.logger.info(line) if tosent_converter is not None: sentiment = tosent_converter.get_salient(features_by_entity_id[entity_id]) else: sentiment = 0.0 salience_by_entity_by_doc_id[docid][entity_id] = sentiment self.logger.debug('sent %f', sentiment) t2 = time.time() self.logger.debug('Time taken to process docid %d = %f sec', docid, (t2-t1)) if file is not None: file.close() self.logger.info('written to %s',output_filename) self.logger.info('processing complete') return salience_by_entity_by_doc_id
from sellibrary.sel.dexter_dataset import DatasetDexter from sellibrary.wiki.wikipedia_datasets import WikipediaDataset from sellibrary.util.first_model_value import FirstValueModel if __name__ == "__main__": const = Const() x_sel_feature_names = const.get_sel_feature_names() print(len(x_sel_feature_names)) INTERMEDIATE_PATH = FileLocations.get_dropbox_intermediate_path() per_document_ndcg = True docid_set = set(Const.TESTSET_DOCID_LIST) dd = DatasetDexter() wikipediaDataset = WikipediaDataset() # SEL GBRT # feature_filename = INTERMEDIATE_PATH + 'aws/all.txt' # feature_names = const.get_sel_feature_names() # model_filename = INTERMEDIATE_PATH + 'sel_golden_spotter_GradientBoostingRegressor.pickle' # SEL RFR # feature_filename = INTERMEDIATE_PATH + 'aws/all.txt' # feature_names = const.get_sel_feature_names() # this was different # model_filename = INTERMEDIATE_PATH + 'sel_golden_spotter_RF.pickle' # # per_document_ndcg = True # Sent RFR # feature_filename = INTERMEDIATE_PATH + 'sentiment_simple.txt' # OK
self.logger.info('processing complete') # def train_and_save_model(self, filename): # spotter = SpotlightCachingSpotter(False) # afinn_filename = '../sellibrary/resources/AFINN-111.txt' # sentiment_processor = SentimentProcessor() # self.train_model_using_dexter_dataset(sentiment_processor, spotter, afinn_filename) # sentiment_processor.save_model(filename) # return sentiment_processor if __name__ == "__main__": fg = FilterGolden() dd = DatasetDexter() wd = WikipediaDataset() dexter_json_doc_list = dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid( dexter_json_doc_list, wd) #check which are still valid wikititle_by_id = wd.get_wikititle_by_id() not_found_count = 0 count = 0 multiple_wid_count = 0 for docid in golden_saliency_by_entid_by_docid.keys():
class PWModelBuilder: # set up logging handler = logging.StreamHandler() handler.setFormatter( logging.Formatter( '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')) logger = logging.getLogger(__name__) logger.addHandler(handler) logger.propagate = False logger.setLevel(logging.INFO) def __init__(self): self.dd = DatasetDexter() @staticmethod def extract_body(data): body = '' for d in data['document']: if d['name'].startswith('body_par_'): body = body + d['value'] return body def get_dexter_datset(self): return self.dd def get_feature_list(self, sentiment_processor, phrase): sent = sentiment_processor.get_doc_simple_sentiment(phrase) feture_list = [sent] feture_list.extend( sentiment_processor.get_doc_prop_pos_prob_neg(phrase)) return feture_list def build_output_using_dexter_dataset(self, spotter, golden_saliency_by_entid_by_docid, output_filename, docid_set, use_rand_values): dexter_json_doc_list = self.dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') self.logger.info('building features') if (output_filename != None): file = open(output_filename, "w") else: file = None salience_by_entity_by_doc_id = {} for json_doc in dexter_json_doc_list: data = json.loads(json_doc) # pprint.pprint(data) docid = data['docId'] if docid_set is None or docid in docid_set: salience_by_entity_by_doc_id[docid] = {} body = self.extract_body(data) title = data['title'] title_entities = spotter.get_entity_candidates(title, docid) body_entities = spotter.get_entity_candidates(body, docid) features_by_entity_id = {} for e in title_entities: if docid in golden_saliency_by_entid_by_docid: if e.entity_id in golden_saliency_by_entid_by_docid[ docid]: golden = golden_saliency_by_entid_by_docid[docid][ e.entity_id] if use_rand_values: features_by_entity_id[e.entity_id] = [random.random()] else: features_by_entity_id[e.entity_id] = [golden] for e in body_entities: if docid in golden_saliency_by_entid_by_docid: if e.entity_id in golden_saliency_by_entid_by_docid[ docid]: golden = golden_saliency_by_entid_by_docid[docid][ e.entity_id] if use_rand_values: features_by_entity_id[e.entity_id] = [random.random()] else: features_by_entity_id[e.entity_id] = [golden] for entity_id in features_by_entity_id.keys(): golden = 0 if docid in golden_saliency_by_entid_by_docid: if entity_id in golden_saliency_by_entid_by_docid[ docid]: golden = golden_saliency_by_entid_by_docid[docid][ entity_id] line = str(docid) + ',' + str(entity_id) + ',' + str( golden) + ',0,' + str(features_by_entity_id[entity_id]) if file is not None: file.write(line) file.write('\n') sentiment = features_by_entity_id[entity_id][0] salience_by_entity_by_doc_id[docid][entity_id] = sentiment self.logger.debug('sent %f', sentiment) if file is not None: file.close() self.logger.info('written to %s', output_filename) self.logger.info('processing complete') return salience_by_entity_by_doc_id
def __init__(self): self.dd = DatasetDexter() self._model_runner = ModelRunner()
class SentimentModelBuilder: # set up logging handler = logging.StreamHandler() handler.setFormatter( logging.Formatter( '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')) logger = logging.getLogger(__name__) logger.addHandler(handler) logger.propagate = False logger.setLevel(logging.INFO) def __init__(self): self.dd = DatasetDexter() @staticmethod def extract_body(data): body = '' for d in data['document']: if d['name'].startswith('body_par_'): body = body + d['value'] return body def get_dexter_datset(self): return self.dd # noinspection PyShadowingNames def train_model_using_dexter_dataset(self, sentiment_processor, spotter, afinn_filename): dexter_json_doc_list = self.dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') self.logger.info('building list of n-grams') ngram_list = [] for n_gram_length in range(2, 10): for json_doc in dexter_json_doc_list: data = json.loads(json_doc) # pprint.pprint(data) body = self.extract_body(data) title = data['title'] title_entities = spotter.get_entity_candidates(title, 0.5) for e in title_entities: n_gram = sentiment_processor.get_ngram( title, n_gram_length, e.start_char, e.end_char) ngram_list.append(n_gram) body_entities = spotter.get_entity_candidates(body, 0.5) for e in body_entities: n_gram = sentiment_processor.get_ngram( body, n_gram_length, e.start_char, e.end_char) ngram_list.append(n_gram) self.logger.info('processing list of n-grams') sentiment_processor.cal_term_weight_on_full_corpus(afinn_filename, ngram_list, debug_mode=1) self.logger.info('processing complete') def train_and_save_model(self, filename, spotter): afinn_filename = '../sellibrary/resources/AFINN-111.txt' sentiment_processor = SentimentProcessor() self.train_model_using_dexter_dataset(sentiment_processor, spotter, afinn_filename) sentiment_processor.save_model(filename) return sentiment_processor def get_feature_list(self, sentiment_processor, phrase): sent = sentiment_processor.get_doc_simple_sentiment(phrase) feture_list = [sent] feture_list.extend( sentiment_processor.get_doc_prop_pos_prob_neg(phrase)) return feture_list def build_output_using_dexter_dataset(self, spotter, golden_saliency_by_entid_by_docid, output_filename, document_to_feature_converter, tosent_converter, test_docid_set, train_docid_set): dexter_json_doc_list = self.dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') self.logger.info('building features') if (output_filename != None): file = open(output_filename, "w") else: file = None line_num = 0 salience_by_entity_by_doc_id = {} for json_doc in dexter_json_doc_list: line_num += 1 if line_num % 100 == 0: self.logger.info('Processed %d lines.', line_num) data = json.loads(json_doc) # pprint.pprint(data) docid = data['docId'] # if docid in test_docid_set or docid in train_docid_set: salience_by_entity_by_doc_id[docid] = {} body = self.extract_body(data) title = data['title'] title_entities = spotter.get_entity_candidates(title, docid) body_entities = spotter.get_entity_candidates(body, docid) # self.logger.info('Location:A') features_by_entity_id = document_to_feature_converter.get_features( body, body_entities, title, title_entities) # self.logger.info('Location:B.1') data_matrix = None for entity_id in features_by_entity_id.keys(): if data_matrix is None: data_matrix = np.array( features_by_entity_id[entity_id]).reshape(1, -1) else: row = np.array(features_by_entity_id[entity_id]).reshape( 1, -1) data_matrix = np.concatenate((data_matrix, row), axis=0) # self.logger.info('Location:B.2') sentiment_array = tosent_converter.get_salient_from_numpy_matrix( data_matrix) # self.logger.info('Location:B.3') i = 0 for entity_id in features_by_entity_id.keys(): sentiment = sentiment_array[i] i += 1 golden = 0 if docid in golden_saliency_by_entid_by_docid: if entity_id in golden_saliency_by_entid_by_docid[docid]: golden = golden_saliency_by_entid_by_docid[docid][ entity_id] line = str(docid) + ',' + str(entity_id) + ',' + str( golden) + ',0,' + str(features_by_entity_id[entity_id]) if file is not None: file.write(line) file.write('\n') if docid in test_docid_set: salience_by_entity_by_doc_id[docid][entity_id] = sentiment # self.logger.info('Location:C') if file is not None: file.close() self.logger.info('written to %s', output_filename) self.logger.info('processing complete') return salience_by_entity_by_doc_id
return worst_feature_number, reference_value, _contribution_by_feature_number_ordered, oob_test_score_by_feature_number if __name__ == "__main__": trec_eval_feature_name = 'P_5' const = Const() base_set_to_supress = set() base_set_to_supress = set() # 30,64,63,37,28,39,62,10,29,32,33,25,24,60,36,21,27,34,61,23,19,26,38,11,44,6, 59,45,46,35,54,42,53,55,48,41,50,49,47,43,51,56,52,57,58,40,13,31,17,14} list_of_feature_deltas = [] list_of_feature_oob_scores = [] list_of_everything_a = [] list_of_everything_oob = [] _wikipedia_dataset = WikipediaDataset() _dexter_dataset = DatasetDexter() for i in range(len(const.get_joined_feature_names()) - 1): worst_feature_num, ref_value, contribution_by_feature_number_ordered, oob_test_score_by_feature_number = find_worst_feature( base_set_to_supress, _wikipedia_dataset, _dexter_dataset, trec_eval_feature_name) if worst_feature_num != -1: list_of_feature_deltas.append([worst_feature_num, ref_value]) list_of_feature_oob_scores.append([worst_feature_num, oob_test_score_by_feature_number[worst_feature_num]]) list_of_everything_a.append(contribution_by_feature_number_ordered) list_of_everything_oob.append(oob_test_score_by_feature_number) logger.info('__________________________________________________________________________________________') logger.info('Results after round %d', i) logger.info('__________________________________________________________________________________________') logger.info('base_set_to_supress: %s', base_set_to_supress) logger.info('contribution_by_feature_number_ordered: %s', contribution_by_feature_number_ordered) logger.info('list of features removed and ' + trec_eval_feature_name + ' %s', list_of_feature_deltas)