Python Dictionary.accept_new Exemples

Langage de programmation: Python

Espace de nommage/Pack: dictionary

Class/Type: Dictionary

Méthode/Fonction: accept_new

Exemples au hotexamples.com: 3

Python Dictionary.accept_new - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de dictionary.Dictionary.accept_new extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Dictionary(30)

add_term(12)

add(12)

encode_brief(7)

check(6)

add_word(5)

add_pad_token(5)

add_unk_token(5)

delete(4)

add_all(3)

accept_new(3)

doc_length(3)

build_dictionary(3)

delete_word(2)

add_new_term(2)

add_normalised_doc_length(2)

close(2)

all_docs(2)

add_single_word2dic(2)

add_start_token(2)

all_terms(2)

add_symbol(2)

create_default(2)

database_exists(2)

bos(2)

add_items(2)

add_documents(2)

add_doc_count(2)

encode_line(2)

entries(2)

open(2)

doc_to_bag_of_words(1)

is_in_dict(1)

setup(1)

confirm_multiple_words(1)

contains(1)

correct(1)

search_words(1)

search_anagrams(1)

definition(1)

has_word(1)

init_dict(1)

definitions(1)

doc2bow(1)

getPossibleWords(1)

getIDF(1)

getDefs(1)

getAllTFIDFV(1)

examples(1)

dict_learn(1)

Méthodes fréquemment utilisées

Dictionary (30)

add_term (12)

add (12)

encode_brief (7)

check (6)

add_word (5)

add_pad_token (5)

add_unk_token (5)

delete (4)

add_all (3)

Méthodes fréquemment utilisées

accept_new (3)

doc_length (3)

build_dictionary (3)

delete_word (2)

add_new_term (2)

add_normalised_doc_length (2)

close (2)

all_docs (2)

add_single_word2dic (2)

add_start_token (2)

all_terms (2)

add_symbol (2)

create_default (2)

database_exists (2)

bos (2)

add_items (2)

add_documents (2)

add_doc_count (2)

encode_line (2)

entries (2)

Méthodes fréquemment utilisées

all_terms (2)

add_symbol (2)

create_default (2)

database_exists (2)

bos (2)

add_items (2)

add_documents (2)

add_doc_count (2)

encode_line (2)

entries (2)

open (2)

doc_to_bag_of_words (1)

is_in_dict (1)

setup (1)

confirm_multiple_words (1)

contains (1)

correct (1)

search_words (1)

search_anagrams (1)

definition (1)

has_word (1)

init_dict (1)

definitions (1)

doc2bow (1)

getPossibleWords (1)

getIDF (1)

getDefs (1)

getAllTFIDFV (1)

examples (1)

dict_learn (1)

Méthodes fréquemment utilisées

open (2)

doc_to_bag_of_words (1)

is_in_dict (1)

setup (1)

confirm_multiple_words (1)

contains (1)

correct (1)

search_words (1)

search_anagrams (1)

definition (1)

has_word (1)

init_dict (1)

definitions (1)

doc2bow (1)

getPossibleWords (1)

getIDF (1)

getDefs (1)

getAllTFIDFV (1)

examples (1)

dict_learn (1)

encode (1)

clear (1)

BinarySearch (1)

classify (1)

add_doc_len (1)

GetDictionaryNames (1)

New (1)

Search (1)

SequentialSearch (1)

_debug (1)

_definitions (1)

_parse_csv (1)

addCommentsToDict (1)

addDocuments (1)

addToDictionary (1)

addWord (1)

add_cfs (1)

add_court_weight (1)

add_dfs (1)

add_doc_length (1)

Exemple #1

0

Afficher le fichier

def get_postag_data(config, train_path, dev_path, vocab_path=None, label_path=None): use_se_marker = config.use_se_marker raw_train_sents = get_sentences(train_path, use_se_marker) raw_dev_sents = get_sentences(dev_path, use_se_marker) word_to_embeddings = get_pretrained_embeddings( WORD_EMBEDDINGS[config.word_embedding]) # Prepare word dictionary. word_dict = Dictionary(unknown_token=UNKNOWN_TOKEN) if use_se_marker: word_dict.add_all([START_MARKER, END_MARKER]) if vocab_path != None: with open(vocab_path, 'r') as f_vocab: for line in f_vocab: word_dict.add(line.strip()) f_vocab.close() word_dict.accept_new = False print 'Load {} words. Dictionary freezed.'.format(word_dict.size()) # Parpare label dictionary. label_dict = Dictionary() if label_path != None: with open(label_path, 'r') as f_labels: for line in f_labels: label_dict.add(line.strip()) f_labels.close() label_dict.set_unknown_token(UNKNOWN_LABEL) label_dict.accept_new = False print 'Load {} labels. Dictionary freezed.'.format(label_dict.size()) train_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings), string_sequence_to_ids(sent[1], label_dict)) for sent in raw_train_sents] dev_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings), string_sequence_to_ids(sent[1], label_dict)) for sent in raw_dev_sents] print("Extracted {} words and {} tags".format(word_dict.size(), label_dict.size())) print("Max training sentence length: {}".format( max([len(s[0]) for s in train_sents]))) print("Max development sentence length: {}".format( max([len(s[0]) for s in dev_sents]))) word_embedding = [word_to_embeddings[w] for w in word_dict.idx2str] word_embedding_shape = [len(word_embedding), len(word_embedding[0])] return (train_sents, dev_sents, word_dict, label_dict, [word_embedding], [word_embedding_shape])

Exemple #2

0

Afficher le fichier

def get_srl_data(config, train_data_path, dev_data_path, vocab_path=None, label_path=None): ''' ''' use_se_marker = config.use_se_marker raw_train_sents = get_srl_sentences(train_data_path, use_se_marker) raw_dev_sents = get_srl_sentences(dev_data_path, use_se_marker) word_to_embeddings = get_pretrained_embeddings( WORD_EMBEDDINGS[config.word_embedding]) # get pre-trained embeddings # Prepare word dictionary. word_dict = Dictionary(padding_token=PADDING_TOKEN, unknown_token=UNKNOWN_TOKEN) if use_se_marker: word_dict.add_all([START_MARKER, END_MARKER]) if vocab_path != None: with open(vocab_path, 'r') as f_vocab: for line in f_vocab: word_dict.add(line.strip()) f_vocab.close() word_dict.accept_new = False print 'Load {} words. Dictionary freezed.'.format(word_dict.size()) # Parpare label dictionary. label_dict = Dictionary() if label_path != None: with open(label_path, 'r') as f_labels: for line in f_labels: label_dict.add(line.strip()) f_labels.close() label_dict.set_unknown_token(UNKNOWN_LABEL) label_dict.accept_new = False print 'Load {} labels. Dictionary freezed.'.format(label_dict.size()) # Get tokens and labels: [sentence_id, word, predicate, label] train_sentences_ids = [sent[0] for sent in raw_train_sents] train_tokens = [ string_sequence_to_ids(sent[1], word_dict, True, word_to_embeddings) for sent in raw_train_sents ] train_labels = [ string_sequence_to_ids(sent[3], label_dict) for sent in raw_train_sents ] if label_dict.accept_new: label_dict.set_unknown_token( UNKNOWN_LABEL) # train corpus contains the label 'O' ? label_dict.accept_new = False dev_sentences_ids = [sent[0] for sent in raw_dev_sents] dev_tokens = [ string_sequence_to_ids(sent[1], word_dict, True, word_to_embeddings) for sent in raw_dev_sents ] dev_labels = [ string_sequence_to_ids(sent[3], label_dict) for sent in raw_dev_sents ] print 'Total tokens in Dev dataset {}'.format( sum([len(sent[1]) for sent in raw_dev_sents])) # Get features print 'Extracting features' train_features, feature_shapes = features.get_srl_features( raw_train_sents, config) dev_features, feature_shapes2 = features.get_srl_features( raw_dev_sents, config) for f1, f2 in zip(feature_shapes, feature_shapes2): assert f1 == f2 # For additional features. Unused now. feature_dicts = [] for feature in config.features: feature_dicts.append(None) train_sents = [] dev_sents = [] for i in range(len(train_tokens)): train_sents.append((train_sentences_ids[i], ) + (train_tokens[i], ) + tuple(train_features[i]) + (train_labels[i], )) for i in range(len(dev_tokens)): dev_sents.append((dev_sentences_ids[i], ) + (dev_tokens[i], ) + tuple(dev_features[i]) + (dev_labels[i], )) print("Extraced {} words and {} tags".format(word_dict.size(), label_dict.size())) print("Max training sentence length: {}".format( max([len(s[1]) for s in train_sents]))) print("Max development sentence length: {}".format( max([len(s[1]) for s in dev_sents]))) word_embedding = [word_to_embeddings[w] for w in word_dict.idx2str] word_embedding_shape = [len(word_embedding), len(word_embedding[0])] return (train_sents, dev_sents, word_dict, label_dict, [word_embedding, None, None], [word_embedding_shape] + feature_shapes, [ word_dict, ] + feature_dicts)

Exemple #3

0

Afficher le fichier

Fichier : reader.py Projet : KiroSummer/A_Syntax-aware_MTL_Framework_for_Chinese_SRL

def get_srl_data(config, train_data_path, dep_path, dev_data_path, vocab_path=None, char_path=None, label_path=None): # Load sentences (documents) from data paths respectively. raw_train_sents = get_srl_sentences(train_data_path) raw_dev_sents = get_srl_sentences(dev_data_path) # Load dev data eval_data = load_eval_data(dev_data_path) # Load pretrained embeddings word_embeddings = get_pretrained_embeddings( config.word_embedding) # get pre-trained embeddings head_embeddings = get_pretrained_embeddings(config.head_embedding) # Prepare word embedding dictionary. word_dict = Dictionary(padding_token=PADDING_TOKEN, unknown_token=UNKNOWN_TOKEN) # Prepare head embedding dictionary. head_dict = Dictionary(padding_token=PADDING_TOKEN, unknown_token=UNKNOWN_TOKEN) # Prepare char dictionary. char_dict = Dictionary(padding_token=PADDING_TOKEN, unknown_token=UNKNOWN_TOKEN) with open(char_path, 'r') as f_char: for line in f_char: char_dict.add(line.strip()) f_char.close() char_dict.accept_new = False print 'Load {} chars, Dictionary freezed.'.format(char_dict.size()) # Parpare SRL label dictionary. label_dict = Dictionary() label_dict.set_unknown_token( NULL_LABEL) # train corpus contains the label 'O' ? if label_path is not None: with open(label_path, 'r') as f_labels: for line in f_labels: label_dict.add(line.strip()) f_labels.close() label_dict.set_unknown_token(NULL_LABEL) label_dict.accept_new = False print 'Load {} labels. Dictionary freezed.'.format(label_dict.size()) # Parpare SRL label dictionary. dep_label_dict = Dictionary() # Training data: Get tokens and labels: [sentence_id, word, predicate, label] train_samples = tokenize_data(raw_train_sents, word_dict, head_dict, char_dict, label_dict, False, word_embeddings, head_embeddings) # Data for dep Trees with Timer("Loading Dependency Trees"): dep_trees = SyntacticCONLL() dep_trees.read_from_file(dep_path, prune_ratio=config.dep_prune_ratio) dep_trees.tokenize_dep_trees(word_dict, char_dict, dep_label_dict, word_embeddings) # set dictionary freezed char_dict.accept_new, label_dict.accept_new, dep_label_dict.accept_new = False, False, False # Development data: dev_samples = tokenize_data(raw_dev_sents, word_dict, head_dict, char_dict, label_dict, False, word_embeddings, head_embeddings) # set word and head dict freezed. word_dict.accept_new, head_dict.accept_new = False, False print("Extract {} words and {} tags".format(word_dict.size(), label_dict.size())) print("Max training sentence length: {}".format( max([s[1] for s in train_samples]))) print("Max development sentence length: {}".format( max([s[1] for s in dev_samples]))) word_embedding = np.asarray( [word_embeddings[w] for w in word_dict.idx2str]) word_embedding_shape = [len(word_embedding), len(word_embedding[0])] head_embedding = np.asarray( [head_embeddings[w] for w in head_dict.idx2str]) head_embedding_shape = [len(head_embedding), len(head_embedding[0])] print("word embedding shape {}, head embedding shape {}".format( word_embedding_shape, head_embedding_shape)) return (train_samples, dev_samples, dep_trees.sample_dep_data, eval_data, word_dict, head_dict, char_dict, label_dict, dep_label_dict, [word_embedding, head_embedding], [word_embedding_shape, head_embedding_shape])