Python FeatureExtractor.extract_features Exemples

Langage de programmation: Python

Espace de nommage/Pack: features.feature_extractor

Class/Type: FeatureExtractor

Méthode/Fonction: extract_features

Exemples au hotexamples.com: 4

Python FeatureExtractor.extract_features - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de features.feature_extractor.FeatureExtractor.extract_features extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

__init__(7)

FeatureExtractor(6)

read_feature_names(5)

get_combinations_from_config_file(3)

get_combinations_from_config_file_unsorted(3)

get_features_from_config_file_unsorted(3)

extract_features_static(2)

calculate_all(1)

extract_features(1)

get_features(1)

get_features_group_name(1)

required_factors(1)

Méthodes fréquemment utilisées

__init__ (7)

FeatureExtractor (6)

read_feature_names (5)

get_combinations_from_config_file (3)

get_combinations_from_config_file_unsorted (3)

get_features_from_config_file_unsorted (3)

extract_features_static (2)

calculate_all (1)

extract_features (1)

get_features (1)

Méthodes fréquemment utilisées

get_features_group_name (1)

required_factors (1)

Exemple #1

0

Afficher le fichier

Fichier : ranking_task.py Projet : mfomicheva/metric-dev

def get_data(self): process_wmt = PrepareWmt() data_structure1 = process_wmt.get_data_structure(self.config) data_structure2 = process_wmt.get_data_structure2(self.config) process_wmt.print_data_set(self.config, data_structure1) if 'Parse' in loads(self.config.get("Resources", "processors")): process_wmt_parse = PrepareWmt(data_type='parse') data_structure_parse = process_wmt_parse.get_data_structure(self.config) process_wmt_parse.print_data_set(self.config, data_structure_parse) f_judgements = self.config.get('WMT', 'human_ranking') maximum_comparisons = int(self.config.get('WMT', 'maximum_comparisons')) human_rankings = HumanRanking() human_rankings.add_human_data(f_judgements, self.config, max_comparisons=maximum_comparisons) process = Process(self.config) sents_tgt, sents_ref = process.run_processors() extractor = FeatureExtractor(self.config) features_to_extract = FeatureExtractor.read_feature_names(self.config) extractor.extract_features(features_to_extract, sents_tgt, sents_ref) return data_structure2, human_rankings, extractor.vals

Exemple #2

0

Afficher le fichier

Fichier : scoring_task.py Projet : mfomicheva/metric-dev

def get_data(self): human_scores = read_reference_file(os.path.expanduser(self.config.get('Data', 'human_scores')), '\t') process = Process(self.config) sents_tgt, sents_ref = process.run_processors() extractor = FeatureExtractor(self.config) features_to_extract = FeatureExtractor.read_feature_names(self.config) extractor.extract_features(features_to_extract, sents_tgt, sents_ref) return extractor.vals, human_scores

Exemple #3

0

Afficher le fichier

Fichier : ranking_task.py Projet : mfomicheva/metric-dev

def prepare_feature_files(self): process_wmt = PrepareWmt() data_structure1 = process_wmt.get_data_structure(self.config) data_structure2 = process_wmt.get_data_structure2(self.config) process_wmt.print_data_set(self.config, data_structure1) if 'Parse' in loads(self.config.get("Resources", "processors")): process_wmt_parse = PrepareWmt(data_type='parse') data_structure_parse = process_wmt_parse.get_data_structure(self.config) process_wmt_parse.print_data_set(self.config, data_structure_parse) process = Process(self.config) sents_tgt, sents_ref = process.run_processors() extractor = FeatureExtractor(self.config) features_to_extract = FeatureExtractor.read_feature_names(self.config) extractor.extract_features(features_to_extract, sents_tgt, sents_ref) feature_values = extractor.vals datasets_language_pairs = set((x[0], x[1]) for x in data_structure2) dataset_for_all = self.config.get('WMT', 'dataset') feature_set_name = os.path.basename(self.config.get('Features', 'feature_set')).replace(".txt", "") f_features_all = open(os.path.expanduser(self.config.get('WMT', 'output_dir')) + '/' + 'x_' + dataset_for_all + '.' + feature_set_name + '.' + 'all' + '.tsv', 'w') f_meta_data_all = open(os.path.expanduser(self.config.get('WMT', 'output_dir')) + '/' + 'meta_' + dataset_for_all + '.' + feature_set_name + '.' + 'all' + '.tsv', 'w') for dataset, lp in sorted(datasets_language_pairs): f_features = open(os.path.expanduser(self.config.get('WMT', 'output_dir')) + '/' + 'x_' + dataset + '.' + feature_set_name + '.' + lp + '.tsv', 'w') for i, sentence_data in enumerate(data_structure2): if dataset in sentence_data and lp in sentence_data: f_features_all.write('\t'.join([str(x) for x in feature_values[i]]) + "\n") f_meta_data_all.write('\t'.join([str(x) for x in sentence_data]) + "\n") f_features.write('\t'.join([str(x) for x in feature_values[i]]) + "\n") f_features.close() f_features_all.close()

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : snukky/vwgec

def extract_features(txt_io, feat_io, cword_io, train=False, factor_files={}): csets = CSetPair(config['source-cset'], config['target-cset']) extractor = FeatureExtractor(csets, config['features'], config['costs']) check_factor_requirements(extractor.required_factors(), factor_files) finder = CWordFinder(csets, train) if config['nulls-ngrams']: null_finder = NullFinder(csets.src, config['nulls-ngrams']) finder.add_extra_finder(null_finder) reader = CWordReader(cword_io) log.info("Extract features from {}".format(txt_io.name)) count = 0 for sid, line, fact_sent in each_factorized_input(txt_io, factor_files): for cword in finder.find_confusion_words(line, fact_sent): feat_str = extractor.extract_features(cword, fact_sent) feat_io.write(feat_str) reader.format(sid, cword) count += 1 log.info("Found {} confusion words".format(count))