def find_mutation_minpairs_all_words(corpus_context, num_cores = -1, stop_check = None, call_back = None): function = partial(find_mutation_minpairs, corpus_context) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0,len(corpus_context)) cur = 0 if num_cores == -1: for w in corpus_context: if stop_check is not None and stop_check(): return cur += 1 call_back(cur) res = function(w) setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w,) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size= 1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr(corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0])
def find_mutation_minpairs_all_words(corpus_context, num_cores=-1, stop_check=None, call_back=None): function = partial(find_mutation_minpairs, corpus_context) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0, len(corpus_context)) cur = 0 if num_cores == -1: for w in corpus_context: if stop_check is not None and stop_check(): return cur += 1 call_back(cur) res = function(w) setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w, ) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size=1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr( corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0])
def neighborhood_density_all_words(corpus_context, algorithm='edit_distance', max_distance=1, num_cores=-1, stop_check=None, call_back=None): """Calculate the neighborhood density of all words in the corpus and adds them as attributes of the words. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus algorithm : str The algorithm used to determine distance max_distance : float, optional Maximum edit distance from the queried word to consider a word a neighbor. stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function """ function = partial(neighborhood_density, corpus_context, algorithm=algorithm, max_distance=max_distance) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0, len(corpus_context)) cur = 0 if num_cores == -1: for w in corpus_context: if stop_check is not None and stop_check(): return cur += 1 call_back(cur) res = function(w) setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w, ) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size=1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr( corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0])
def neighborhood_density_all_words(corpus_context, algorithm = 'edit_distance', max_distance = 1, num_cores = -1, stop_check = None, call_back = None): """Calculate the neighborhood density of all words in the corpus and adds them as attributes of the words. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus algorithm : str The algorithm used to determine distance max_distance : float, optional Maximum edit distance from the queried word to consider a word a neighbor. stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function """ function = partial(neighborhood_density, corpus_context, algorithm = algorithm, max_distance = max_distance) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0,len(corpus_context)) cur = 0 if num_cores == -1: for w in corpus_context: if stop_check is not None and stop_check(): return cur += 1 call_back(cur) res = function(w) setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w,) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size = 1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr(corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0])
def find_mutation_minpairs_all_words(corpus_context, tierdict, tier_type = None, num_cores = -1, collapse_homophones = False, stop_check = None, call_back = None): function = partial(find_mutation_minpairs, corpus_context, tier_type=tier_type, collapse_homophones = collapse_homophones) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0,len(corpus_context)) cur = 0 results = dict() last_value_removed = None last_key_removed = None if num_cores == -1 or num_cores == 1: for w in corpus_context: if stop_check is not None and stop_check(): return if last_value_removed: tierdict[last_key_removed].append(last_value_removed) w_sequence = getattr(w, corpus_context.sequence_type) last_key_removed = str(w_sequence) for i, item in enumerate(tierdict[last_key_removed]): if str(item) == str(w): last_value_removed = tierdict[last_key_removed].pop(i) break res = find_mutation_minpairs(corpus_context, w, tier_type=tier_type, collapse_homophones = collapse_homophones) results[str(w)] = res[1] setattr(w.original, corpus_context.attribute.name, res[0]) # for w in corpus_context: # if stop_check is not None and stop_check(): # return # cur += 1 # call_back(cur) # res = function(w) # results[str(w)] = res[1]#[str(r) for r in res[1]] # setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w,) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size= 1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr(corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0]) return results
def neighborhood_density_all_words(corpus_context, tierdict, tier_type = None, sequence_type = None, algorithm = 'edit_distance', max_distance = 1, output_format = 'spelling', num_cores = -1, settable_attr = None, collapse_homophones = False, stop_check = None, call_back = None): """Calculate the neighborhood density of all words in the corpus and adds them as attributes of the words. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus algorithm : str The algorithm used to determine distance max_distance : float, optional Maximum edit distance from the queried word to consider a word a neighbor. stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function settable_attr: string Name of attribute that neighbourhood density results will be assigned to """ function = partial(neighborhood_density, corpus_context, tierdict = tierdict, tier_type = tier_type, sequence_type = sequence_type, algorithm = algorithm, max_distance = max_distance, collapse_homophones = collapse_homophones) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0,len(corpus_context)) cur = 0 results = dict() last_value_removed = None last_key_removed = None if num_cores == -1 or num_cores == 1: for w in corpus_context: if stop_check is not None and stop_check(): return if last_value_removed: tierdict[last_key_removed].append(last_value_removed) w_sequence = getattr(w, corpus_context.sequence_type) last_key_removed = str(w_sequence) for i, item in enumerate(tierdict[last_key_removed]): if str(item) == str(w): last_value_removed = tierdict[last_key_removed].pop(i) break res = neighborhood_density(corpus_context, w, tierdict, tier_type = tier_type, sequence_type = sequence_type, algorithm = algorithm, max_distance = max_distance, collapse_homophones = collapse_homophones) results[str(w)] = [getattr(r, output_format) for r in res[1]] setattr(w.original, settable_attr.name, res[0]) # for w in corpus_context: # if stop_check is not None and stop_check(): # return # cur += 1 # call_back(cur) # res = function(w) # results[str(w)] = [getattr(r, output_format) for r in res[1]] # setattr(w.original, settable_attr.name, res[0]-1) # #the -1 is to account for the fact that words are counted as their own neighbour, and this is incorrect # #subtracting 1 here is easier than fixing the neighbourhood density algorithm else: iterable = ((w,) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size = 1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr(corpus_context.corpus.find(corpus_context.corpus.key(n[0])), #corpus_context.attribute.name, n[1][0]) settable_attr.name, n[1][0]) return results
def neighborhood_density_all_words(corpus_context, tierdict, tier_type=None, sequence_type=None, algorithm='edit_distance', max_distance=1, output_format='spelling', num_cores=-1, settable_attr=None, collapse_homophones=False, stop_check=None, call_back=None): """Calculate the neighborhood density of all words in the corpus and adds them as attributes of the words. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus algorithm : str The algorithm used to determine distance max_distance : float, optional Maximum edit distance from the queried word to consider a word a neighbor. stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function settable_attr: string Name of attribute that neighbourhood density results will be assigned to """ function = partial(neighborhood_density, corpus_context, tierdict=tierdict, tier_type=tier_type, sequence_type=sequence_type, algorithm=algorithm, max_distance=max_distance, collapse_homophones=collapse_homophones) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0, len(corpus_context)) cur = 0 results = dict() last_value_removed = None last_key_removed = None if num_cores == -1 or num_cores == 1: for w in corpus_context: if stop_check is not None and stop_check(): return if last_value_removed: tierdict[last_key_removed].append(last_value_removed) w_sequence = getattr(w, corpus_context.sequence_type) last_key_removed = str(w_sequence) for i, item in enumerate(tierdict[last_key_removed]): if str(item) == str(w): last_value_removed = tierdict[last_key_removed].pop(i) break res = neighborhood_density(corpus_context, w, tierdict, tier_type=tier_type, sequence_type=sequence_type, algorithm=algorithm, max_distance=max_distance, collapse_homophones=collapse_homophones) results[str(w)] = [getattr(r, output_format) for r in res[1]] setattr(w.original, settable_attr.name, res[0]) # for w in corpus_context: # if stop_check is not None and stop_check(): # return # cur += 1 # call_back(cur) # res = function(w) # results[str(w)] = [getattr(r, output_format) for r in res[1]] # setattr(w.original, settable_attr.name, res[0]-1) # #the -1 is to account for the fact that words are counted as their own neighbour, and this is incorrect # #subtracting 1 here is easier than fixing the neighbourhood density algorithm else: iterable = ((w, ) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size=1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr( corpus_context.corpus.find(corpus_context.corpus.key(n[0])), #corpus_context.attribute.name, n[1][0]) settable_attr.name, n[1][0]) return results
def find_mutation_minpairs_all_words(corpus_context, tierdict, tier_type=None, num_cores=-1, collapse_homophones=False, stop_check=None, call_back=None): function = partial(find_mutation_minpairs, corpus_context, tier_type=tier_type, collapse_homophones=collapse_homophones) if call_back is not None: call_back('Calculating neighborhood densities...') call_back(0, len(corpus_context)) cur = 0 results = dict() last_value_removed = None last_key_removed = None if num_cores == -1 or num_cores == 1: for w in corpus_context: if stop_check is not None and stop_check(): return if last_value_removed: tierdict[last_key_removed].append(last_value_removed) w_sequence = getattr(w, corpus_context.sequence_type) last_key_removed = str(w_sequence) for i, item in enumerate(tierdict[last_key_removed]): if str(item) == str(w): last_value_removed = tierdict[last_key_removed].pop(i) break res = find_mutation_minpairs( corpus_context, w, tier_type=tier_type, collapse_homophones=collapse_homophones) results[str(w)] = res[1] setattr(w.original, corpus_context.attribute.name, res[0]) # for w in corpus_context: # if stop_check is not None and stop_check(): # return # cur += 1 # call_back(cur) # res = function(w) # results[str(w)] = res[1]#[str(r) for r in res[1]] # setattr(w.original, corpus_context.attribute.name, res[0]) else: iterable = ((w, ) for w in corpus_context) neighbors = score_mp(iterable, function, num_cores, call_back, stop_check, chunk_size=1) for n in neighbors: #Have to look up the key, then look up the object due to how #multiprocessing pickles objects setattr( corpus_context.corpus.find(corpus_context.corpus.key(n[0])), corpus_context.attribute.name, n[1][0]) return results