def hill_climb(fn, optimization_pars, ml_start=None): # fn must return [input, fn(input)] with input as a tuple step = np.copy(optimization_pars['initial step size']) eval_record = {} new_inputs = [] new_inputs.append(np.copy(optimization_pars['start'])) if ml_start is not None: new_inputs.append(ml_start) all_out = parallel.parmap(fn, [np.around(input, decimals=10) for input in new_inputs]) for out in all_out: eval_record[out[0]] = out[1] center = eval_record.keys()[np.argmax(eval_record.values())] #print eval_record while len(eval_record.keys()) < optimization_pars['maximum evaluations'] and np.max(step) > 1e-6: #print "-----------" #print "[rl_tools.hill_climb]: step = " + str(step) #print center new_inputs = [] for i in range(len(step)): if optimization_pars['only positive']: new_inputs.append([max(center[i] - step[i],.000001), max(center[i].copy(),.000001), max(center[i] + step[i],.000001)]) else: new_inputs.append([center[i] - step[i], center[i].copy(), center[i] + step[i]]) new_inputs = list(itertools.product(*new_inputs)) all_out = parallel.parmap(fn, [np.around(input, decimals=10) for input in new_inputs if input not in eval_record.keys()]) for out in all_out: eval_record[out[0]] = out[1] new_center = eval_record.keys()[np.argmax(eval_record.values())] if np.all(new_center == center): step /= 2.0 else: center = np.copy(new_center) #print eval_record return np.array(eval_record.keys()[np.argmax(eval_record.values())])
def score_graph(self): self.C_nodeInfo = {} self.C_nodeByScore = [] S_node = self.nodes() rows=[] #sw=util.StopWatch() if self.CPU<=1: for node in S_node: nodeInfo = self.calc_node_info(node, self.params['degreeCutoff']) self.C_nodeInfo[node]=nodeInfo rows.append({'Node':node, 'Score':nodeInfo.score, 'Density':nodeInfo.density}) else: def f(X): return self.calc_node_info(X[0], X[1]) #mp=parallel.MP() #mp.start(f, n_CPU=self.CPU) L=[ (x, self.params['degreeCutoff']) for x in S_node] out=parallel.parmap(f, L, n_CPU=self.CPU) #out=mp.map(L) for i,node in enumerate(S_node): self.C_nodeInfo[node]=out[i] rows.append({'Node':node, 'Score':out[i].score, 'Density':out[i].density}) #sw.check('Done scoring') t=pd.DataFrame(rows) t=t.sort_values(['Score', 'Density', 'Node'], ascending=[False, False, True]) grps=t.groupby(by='Score') self.C_nodeByScore=[ (score,list(grp['Node'])) for score,grp in grps] self.C_nodeByScore.sort(key=lambda x: x[0]) self.C_nodeByScore.reverse()
def fit(self, X, y): assert isinstance(X, pd.DataFrame) assert self.metafeature in X indices = self.indices(X) mapped = parmap(get_fitted_clf, indices, (self.clf, X, y), n_jobs=self.n_jobs) self.clfs = dict(mapped)
def main(): if len(sys.argv) != 4: raise ValueError("Expected dict, finalembedding, W, b") dictionary_path = sys.argv[1] embedding_path = sys.argv[2] weights_path = sys.argv[3] with open(dictionary_path) as dictionary_file: dictionary = literal_eval(dictionary_file.read()) embedding = np.loadtxt(embedding_path) subjects = [ "als", "also", "da", "das", "dass", "de", "den", "denn", "die", "durch", "zur", "ihm", "im", "um", "nach", "noch", "war", "was" ] # subjects = ["and", "end", "as", "at", "is", "do", "for", "four", "form", "from", "he", "if", "is", "its", "it", "no", "now", "on", "one", "same", "some", "than", "that", "then", "their", "there", "them", "the", "they", "to", "was", "way", "were", "where"] print(subjects) # eval_subjects = subjects eval_subjects = ["das", "dass"] with open("/tmp/tokens-de") as file: sentences = file.read() scorer = LayeredScorer(weights_path) ngrams = get_relevant_ngrams(sentences, eval_subjects, n=scorer.context_size(embedding.shape[1]) + 1) eval_results = parmap( lambda t: evaluate_ngrams( ngrams, eval_subjects, lambda ngram: has_error(dictionary, embedding, scorer, ngram, subjects, error_threshold=t, suggestion_threshold =t)), np.arange(0, 1, .025)) # [.3]) print(eval_results)
def predict_multi(self, X): mapped = parmap(predict_with_clf, self.clfs.items(), [X], n_jobs=self.n_jobs) return pd.DataFrame(dict(mapped))
def calc(self): ''' Run a full calculation over a grid of frequencies ''' print("Computing for pulsar: %s" % self.psrnoise.name) self.sigmas = np.zeros((len(self.Cs), len(self.Bs))) if self.frac_bw == False: def loop_func(ic): C = self.Cs[ic] sigmas = np.zeros(len(self.Bs)) if self.verbose: print("Computing center freq %0.3f GHz (%i/%i)" % (C, ic, len(self.Cs))) for ib, B in enumerate(self.Bs): #print C,B if B > 1.9 * C: self.sigmas[ic, ib] = np.nan else: nulow = C - B / 2.0 nuhigh = C + B / 2.0 if self.log == False: nus = np.linspace(nulow, nuhigh, self.nchan + 1)[:-1] #more uniform sampling? else: nus = np.logspace(np.log10(nulow), np.log10(nuhigh), self.nchan + 1)[:-1] #more uniform sampling? sigmas[ib] = self.calc_single(nus) #self.sigmas[ic,ib] = self.calc_single(nus) #print self.sigmas[ic,ib] return sigmas else: def loop_func(ic): C = self.Cs[ic] sigmas = np.zeros(len(self.Fs)) if verbose: print(ic, len(self.Cs), C) for indf, F in enumerate(self.Fs): B = C * F if B > 1.9 * C or B <= 0: self.sigmas[ic, indf] = np.nan else: nulow = C - B / 2.0 nuhigh = C + B / 2.0 if self.log == False: nus = np.linspace(nulow, nuhigh, self.nchan + 1)[:-1] #more uniform sampling? else: nus = np.logspace(np.log10(nulow), np.log10(nuhigh), self.nchan + 1)[:-1] #more uniform sampling? #self.sigmas[ic,indf] = self.calc_single(nus) sigmas[indf] = self.calc_single(nus) return sigmas if self.ncpu == 1: for ic, C in enumerate(self.Cs): self.sigmas[ic, :] = loop_func(ic) else: #should set export OPENBLAS_NUM_THREADS=1 if self.verbose: print("Attempting multiprocessing, nprocs=%s" % str(self.ncpu)) self.sigmas[:, :] = parallel.parmap(loop_func, range(len(self.Cs)), nprocs=self.ncpu)
def find_clusters(self, l_decompose=True, l_optimized=True): if (self.is_empty()): util.error_msg("In find_Clusters(): input network is empty!") if (not len(self.C_nodeInfo.keys()) or not len(self.C_nodeByScore)): util.error_msg("In find_Clusters(): C_nodeInfo or C_nodeByScore is None.") C_results=[] cnt=0 #initialization c_nodeSeen= {} #key is nodeIndex, value is true/false c_nodeSeenSnapshot={} findingTotal = len(self.C_nodeInfo.keys()) rows=[] for score,alNodesWithSameScore in self.C_nodeByScore: if not l_optimized or len(alNodesWithSameScore)<=1: for currentNode in alNodesWithSameScore: if currentNode in c_nodeSeen: continue alCluster = self.get_cluster_core(currentNode, c_nodeSeen, self.params['nodeScoreCutoff'], self.params['maxDepthFromStart']) if (alCluster is not None and not alCluster.is_empty()): #make sure seed node is part of cluster, if not already in there if (not self.filter_cluster(alCluster)): if (self.params['haircut']): alCluster=MCODE.haircut_cluster(alCluster) if (self.params['fluff']): alCluster=self.fluff_Cluster_boundary(alCluster, c_nodeSeen) if l_decompose: c_components=alCluster.decompose() else: c_components=[alCluster] for comp in c_components: cnt+=1 score=self.score_network(comp) C_results.append(MCODECluster(comp, currentNode, score)) rows.append({'ID':cnt, 'Score':score, 'NofNode':comp.nof_nodes(), 'SeedScore':self.C_nodeInfo[currentNode].score}) else: def f(X): tmp_rows=[] c_stack={} currentNode=X[0] c_nodeSeenCopy=X[1].copy() #if currentNode in c_nodeSeen: continue alCluster = self.get_cluster_core(currentNode, c_nodeSeenCopy, self.params['nodeScoreCutoff'], self.params['maxDepthFromStart']) if (alCluster is not None and not alCluster.is_empty()): #make sure seed node is part of cluster, if not already in there if (not self.filter_cluster(alCluster)): if (self.params['haircut']): alCluster=MCODE.haircut_cluster(alCluster) if (self.params['fluff']): alCluster=self.fluff_Cluster_boundary(alCluster, c_nodeSeenCopy) if l_decompose: c_components=alCluster.decompose() else: c_components=[alCluster] for k,comp in enumerate(c_components): score=self.score_network(comp) tmp_rows.append({'ID':currentNode, 'Score':score, 'NofNode':comp.nof_nodes(), 'SeedScore':self.C_nodeInfo[currentNode].score, 'ComponentIndex':k}) c_stack[currentNode]={'nodeSeen':c_nodeSeenCopy, 'components':c_components} return (tmp_rows, c_stack) while (len(alNodesWithSameScore)): tmp_rows=[] c_stack={} L=[ (x, c_nodeSeen) for x in alNodesWithSameScore if x not in c_nodeSeen ] #if self.CPU<=1: # out=[f(x) for x in L] #else: # mp=parallel.MP() # mp.start(f, n_CPU=self.CPU) # out=mp.map(L) out=parallel.parmap(f, L, n_CPU=self.CPU) for X in out: tmp_rows.extend(X[0]) c_stack.update(X[1]) tmp=pd.DataFrame(tmp_rows) if len(tmp): tmp=tmp.sort_values(['Score','NofNode','SeedScore', 'ID'], ascending=[False, False, False, True]) bestNode=tmp['ID'].iloc[0] c_nodeSeen=c_stack[bestNode]['nodeSeen'] for comp in tmp_rows: if comp['ID']!=bestNode: continue compIdx=comp['ComponentIndex'] cnt+=1 C_results.append(MCODECluster(c_stack[bestNode]['components'][compIdx], bestNode, comp['Score'])) rows.append({'ID':cnt, 'Score':comp['Score'], 'NofNode':comp['NofNode'], 'SeedScore':self.C_nodeInfo[bestNode].score}) alNodesWithSameScore=[ x for x in alNodesWithSameScore if x !=bestNode] else: for x in c_stack: for s in x.nodeSeen.keys(): c_nodeSeen[s]=True alNodesWithSameScore=[] C_sorted=[] t=pd.DataFrame(rows) if len(t): t=t.sort_values(['Score','NofNode','SeedScore'], ascending=[False, False, False]) for i in range(len(t)): C_sorted.append(C_results[t['ID'].iloc[i]-1]) return C_sorted