Python load_stop_words Exemples

Langage de programmation: Python

Espace de nommage/Pack: Utils.file_utils

Méthode/Fonction: load_stop_words

Exemples au hotexamples.com: 8

Python load_stop_words - 8 exemples trouvés. Ce sont les exemples réels les mieux notés de Utils.file_utils.load_stop_words extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

chinese_remainder_theorem

get_DEBUG

get_coords_sydney

Source

beachball

mock_ipython

gid_to_qid

parse_worker_args

User

model_path

Related in langs

TexyFactory (PHP)

liberasettimane (PHP)

OMSIngestionApi (C#)

PlanTaskDetail (C#)

GetPhoneInfo (C++)

OpenFileSocketForReuse (C++)

NewCreateServiceBroker (Go)

Walk (Go)

TikzFormatter (Java)

Ticket (Java)

Exemple #1

0

Afficher le fichier

def fact_stop_word_filter(stop_words_file): stop_words = load_stop_words(stop_words_file) def stp_flter(tokens): return [tok for tok in tokens if tok.lower() not in stop_words] return stp_flter

Exemple #2

0

Afficher le fichier

def fact_case_sensitive_stop_word_filter(stop_words_file): stop_words = load_stop_words(stop_words_file) def cs_stop_filter(tokens): return [tok for tok in tokens if tok not in stop_words] return cs_stop_filter

Exemple #3

0

Afficher le fichier

lbl, sim = kwd2cluster_sims[kword][0] f.write("%s=>%s\n" % (kword, cluster_label(lbl))) """ Extract Clustered Synonyms """ if len(sys.argv) != 2: raise Exception("Incorrect number of arguments passed - one expected, the config file name") config = GenerateClusterSynonymsConfig(sys.argv[1]) model = Word2Vec.load(config.model_file) print("Word2Vec model loaded") keywords = set() for file in config.keywords_files: keywords.update(load_stop_words(file)) print("%i keywords loaded" % (len(keywords))) id2kwd = dict() kwd2id = dict() vectors = [] for term in keywords: id2kwd[len(vectors)] = term kwd2id[term] = len(vectors) vec = get_norm_vector(term, model) if vec is not None: vectors.append(vec) start = time.time() # don't parallelize (n_jobs = -1), doesn't seem to work

Exemple #4

0

Afficher le fichier

Fichier : analysis_pipeline.py Projet : SemanticBeeng/ConceptualSearch

def fact_stop_word_filter(stop_words_file): stop_words = load_stop_words(stop_words_file) def stp_flter(tokens): return [tok for tok in tokens if tok.lower() not in stop_words] return stp_flter

Exemple #5

0

Afficher le fichier

Fichier : analysis_pipeline.py Projet : SemanticBeeng/ConceptualSearch

def fact_case_sensitive_stop_word_filter(stop_words_file): stop_words = load_stop_words(stop_words_file) def cs_stop_filter(tokens): return [tok for tok in tokens if tok not in stop_words] return cs_stop_filter

Exemple #6

0

Afficher le fichier

Fichier : generate_topn_synonyms_file.py Projet : SemanticBeeng/ConceptualSearch

pyld_f.write("%s|%f " %(kw,val)) pyld_f.write("\n") else: no_sim.add(word) #print("No matching similar terms in word2vec model for term: %s" % word) with open(synonym_file, "w+") as f: for syn in sorted(processed_syns): f.write("%s=>%s\n" % (syn, map_keyword(syn))) #Returned for analysis - do something with this if you need to investigate return missing, no_sim, processed_syns """ Generate Synonym Files """ if len(sys.argv) != 2: raise Exception("Incorrect number of arguments passed - one expected, the config file name") config = GenerateTopNSynonymsConfig(sys.argv[1]) start = time.time() model = Word2Vec.load(config.model_file) print("Word2Vec model loaded") keywords = set() for file in config.keywords_files: keywords.update(load_stop_words(file)) print("%i keywords loaded" % (len(keywords))) missing, no_sim, processed_syns = write_most_similar_synonyms(config.top_n, keywords, model, config.payload_synonyms_file, config.synonyms_file) print "%s synonyms processed" % (len(processed_syns)) end = time.time() print "Took %s seconds" % (end - start)

Exemple #7

0

Afficher le fichier

""" Extract Phrases """ import sys from Config.extract_keywords_config import ExtractKeywordsConfig if len(sys.argv) != 2: raise Exception( "Incorrect number of arguments passed - one expected, the config file name" ) #sys.argv[0] is this script file, sys.argv[1] should be the config file config = ExtractKeywordsConfig(sys.argv[1]) script_start = time.time() if config.stop_words_file: stop_words = load_stop_words(config.stop_words_file) print("%i stop words loaded" % len(stop_words)) else: stop_words = set() """ Load Documents """ start = time.time() files = find_files(config.processed_documents_folder, config.file_mask, True) print("%s files found in %s" % (len(files), config.processed_documents_folder)) documents = [] for i, fname in enumerate(files): with open(fname) as f: contents = f.read() documents.append(contents.split("\n")) end = time.time() print("Loading %i documents took %s seconds" % (len(files), str(end - start))) """ Extract Common Terms and Phrases """

Exemple #8

0

Afficher le fichier

Fichier : extract_keywords.py Projet : ross-intelligence/ConceptualSearch

find_sub_phrases_to_remove(tpl_ngram, valid_phrases, doc_freq, to_rem) """ Extract Phrases """ import sys from Config.extract_keywords_config import ExtractKeywordsConfig if len(sys.argv) != 2: raise Exception("Incorrect number of arguments passed - one expected, the config file name") # sys.argv[0] is this script file, sys.argv[1] should be the config file config = ExtractKeywordsConfig(sys.argv[1]) script_start = time.time() if config.stop_words_file: stop_words = load_stop_words(config.stop_words_file) print ("%i stop words loaded" % len(stop_words)) else: stop_words = set() """ Load Documents """ start = time.time() files = find_files(config.processed_documents_folder, config.file_mask, True) print ("%s files found in %s" % (len(files), config.processed_documents_folder)) documents = [] for i, fname in enumerate(files): with open(fname) as f: contents = f.read() documents.append(contents.split("\n")) end = time.time() print ("Loading %i documents took %s seconds" % (len(files), str(end - start)))