def hill_climb(fn, optimization_pars, ml_start=None):
    # fn must return [input, fn(input)] with input as a tuple
    step = np.copy(optimization_pars['initial step size'])
    eval_record = {}
    new_inputs = []
    new_inputs.append(np.copy(optimization_pars['start']))
    if ml_start is not None:
        new_inputs.append(ml_start)
    all_out = parallel.parmap(fn, [np.around(input, decimals=10) for input in new_inputs])
    for out in all_out:
        eval_record[out[0]] = out[1]
    center = eval_record.keys()[np.argmax(eval_record.values())]
    #print eval_record
    while len(eval_record.keys()) < optimization_pars['maximum evaluations'] and np.max(step) > 1e-6:
        #print "-----------"
        #print "[rl_tools.hill_climb]: step = " + str(step)
        #print center
        new_inputs = []
        for i in range(len(step)):
            if optimization_pars['only positive']:
                new_inputs.append([max(center[i] - step[i],.000001), max(center[i].copy(),.000001), max(center[i] + step[i],.000001)])
            else:
                new_inputs.append([center[i] - step[i], center[i].copy(), center[i] + step[i]])
        new_inputs = list(itertools.product(*new_inputs))
        all_out = parallel.parmap(fn, [np.around(input, decimals=10) for input in new_inputs if input not in eval_record.keys()])
        for out in all_out:
            eval_record[out[0]] = out[1]
        new_center = eval_record.keys()[np.argmax(eval_record.values())]
        if np.all(new_center == center):
            step /= 2.0
        else:
            center = np.copy(new_center)
    #print eval_record
    return np.array(eval_record.keys()[np.argmax(eval_record.values())])
Exemple #2
0
 def score_graph(self):
     self.C_nodeInfo = {}
     self.C_nodeByScore = []
     S_node = self.nodes()
     rows=[]
     #sw=util.StopWatch()
     if self.CPU<=1:
         for node in S_node:
             nodeInfo = self.calc_node_info(node, self.params['degreeCutoff'])
             self.C_nodeInfo[node]=nodeInfo
             rows.append({'Node':node, 'Score':nodeInfo.score, 'Density':nodeInfo.density})
     else:
         def f(X):
             return self.calc_node_info(X[0], X[1])
         #mp=parallel.MP()
         #mp.start(f, n_CPU=self.CPU)
         L=[ (x, self.params['degreeCutoff']) for x in S_node]
         out=parallel.parmap(f, L, n_CPU=self.CPU)
         #out=mp.map(L)
         for i,node in enumerate(S_node):
             self.C_nodeInfo[node]=out[i]
             rows.append({'Node':node, 'Score':out[i].score, 'Density':out[i].density})
     #sw.check('Done scoring')
     t=pd.DataFrame(rows)
     t=t.sort_values(['Score', 'Density', 'Node'], ascending=[False, False, True])
     grps=t.groupby(by='Score')
     self.C_nodeByScore=[ (score,list(grp['Node'])) for score,grp in grps]
     self.C_nodeByScore.sort(key=lambda x: x[0])
     self.C_nodeByScore.reverse()
    def fit(self, X, y):
        assert isinstance(X, pd.DataFrame)
        assert self.metafeature in X

        indices = self.indices(X)
        mapped = parmap(get_fitted_clf, indices, (self.clf, X, y), n_jobs=self.n_jobs)
        self.clfs = dict(mapped)
Exemple #4
0
    def fit(self, X, y):
        assert isinstance(X, pd.DataFrame)
        assert self.metafeature in X

        indices = self.indices(X)
        mapped = parmap(get_fitted_clf,
                        indices, (self.clf, X, y),
                        n_jobs=self.n_jobs)
        self.clfs = dict(mapped)
def main():
    if len(sys.argv) != 4:
        raise ValueError("Expected dict, finalembedding, W, b")

    dictionary_path = sys.argv[1]
    embedding_path = sys.argv[2]
    weights_path = sys.argv[3]

    with open(dictionary_path) as dictionary_file:
        dictionary = literal_eval(dictionary_file.read())
    embedding = np.loadtxt(embedding_path)

    subjects = [
        "als", "also", "da", "das", "dass", "de", "den", "denn", "die",
        "durch", "zur", "ihm", "im", "um", "nach", "noch", "war", "was"
    ]
    # subjects = ["and", "end", "as", "at", "is", "do", "for", "four", "form", "from", "he", "if", "is", "its", "it", "no", "now", "on", "one", "same", "some", "than", "that", "then", "their", "there", "them", "the", "they", "to", "was", "way", "were", "where"]
    print(subjects)

    # eval_subjects = subjects
    eval_subjects = ["das", "dass"]

    with open("/tmp/tokens-de") as file:
        sentences = file.read()

    scorer = LayeredScorer(weights_path)
    ngrams = get_relevant_ngrams(sentences,
                                 eval_subjects,
                                 n=scorer.context_size(embedding.shape[1]) + 1)

    eval_results = parmap(
        lambda t: evaluate_ngrams(
            ngrams, eval_subjects, lambda ngram: has_error(dictionary,
                                                           embedding,
                                                           scorer,
                                                           ngram,
                                                           subjects,
                                                           error_threshold=t,
                                                           suggestion_threshold
                                                           =t)),
        np.arange(0, 1, .025))
    # [.3])
    print(eval_results)
Exemple #6
0
 def predict_multi(self, X):
     mapped = parmap(predict_with_clf,
                     self.clfs.items(), [X],
                     n_jobs=self.n_jobs)
     return pd.DataFrame(dict(mapped))
Exemple #7
0
    def calc(self):
        '''
        Run a full calculation over a grid of frequencies
        '''
        print("Computing for pulsar: %s" % self.psrnoise.name)
        self.sigmas = np.zeros((len(self.Cs), len(self.Bs)))
        if self.frac_bw == False:

            def loop_func(ic):
                C = self.Cs[ic]
                sigmas = np.zeros(len(self.Bs))
                if self.verbose:
                    print("Computing center freq %0.3f GHz (%i/%i)" %
                          (C, ic, len(self.Cs)))
                for ib, B in enumerate(self.Bs):
                    #print C,B
                    if B > 1.9 * C:
                        self.sigmas[ic, ib] = np.nan
                    else:
                        nulow = C - B / 2.0
                        nuhigh = C + B / 2.0

                        if self.log == False:
                            nus = np.linspace(nulow, nuhigh, self.nchan +
                                              1)[:-1]  #more uniform sampling?
                        else:
                            nus = np.logspace(np.log10(nulow),
                                              np.log10(nuhigh), self.nchan +
                                              1)[:-1]  #more uniform sampling?
                        sigmas[ib] = self.calc_single(nus)
                        #self.sigmas[ic,ib] = self.calc_single(nus)
                        #print self.sigmas[ic,ib]
                return sigmas

        else:

            def loop_func(ic):
                C = self.Cs[ic]
                sigmas = np.zeros(len(self.Fs))
                if verbose:
                    print(ic, len(self.Cs), C)
                for indf, F in enumerate(self.Fs):
                    B = C * F
                    if B > 1.9 * C or B <= 0:
                        self.sigmas[ic, indf] = np.nan
                    else:
                        nulow = C - B / 2.0
                        nuhigh = C + B / 2.0

                        if self.log == False:
                            nus = np.linspace(nulow, nuhigh, self.nchan +
                                              1)[:-1]  #more uniform sampling?
                        else:
                            nus = np.logspace(np.log10(nulow),
                                              np.log10(nuhigh), self.nchan +
                                              1)[:-1]  #more uniform sampling?

                        #self.sigmas[ic,indf] = self.calc_single(nus)
                        sigmas[indf] = self.calc_single(nus)
                return sigmas

        if self.ncpu == 1:
            for ic, C in enumerate(self.Cs):
                self.sigmas[ic, :] = loop_func(ic)
        else:  #should set export OPENBLAS_NUM_THREADS=1
            if self.verbose:
                print("Attempting multiprocessing, nprocs=%s" % str(self.ncpu))
            self.sigmas[:, :] = parallel.parmap(loop_func,
                                                range(len(self.Cs)),
                                                nprocs=self.ncpu)
Exemple #8
0
    def find_clusters(self, l_decompose=True, l_optimized=True):
        if (self.is_empty()):
            util.error_msg("In find_Clusters(): input network is empty!")
        if (not len(self.C_nodeInfo.keys()) or not len(self.C_nodeByScore)):
            util.error_msg("In find_Clusters(): C_nodeInfo or C_nodeByScore is None.")
        C_results=[]
        cnt=0
        #initialization
        c_nodeSeen= {} #key is nodeIndex, value is true/false
        c_nodeSeenSnapshot={}
        findingTotal = len(self.C_nodeInfo.keys())
        rows=[]
        for score,alNodesWithSameScore in self.C_nodeByScore:
            if not l_optimized or len(alNodesWithSameScore)<=1:
                for currentNode in alNodesWithSameScore:
                    if currentNode in c_nodeSeen: continue
                    alCluster = self.get_cluster_core(currentNode, c_nodeSeen, self.params['nodeScoreCutoff'], self.params['maxDepthFromStart'])

                    if (alCluster is not None and not alCluster.is_empty()):
                        #make sure seed node is part of cluster, if not already in there
                        if (not self.filter_cluster(alCluster)):
                            if (self.params['haircut']): alCluster=MCODE.haircut_cluster(alCluster)
                            if (self.params['fluff']): alCluster=self.fluff_Cluster_boundary(alCluster, c_nodeSeen)
                            if l_decompose:
                                c_components=alCluster.decompose()
                            else:
                                c_components=[alCluster]
                            for comp in c_components:
                                cnt+=1
                                score=self.score_network(comp)
                                C_results.append(MCODECluster(comp, currentNode, score))
                                rows.append({'ID':cnt, 'Score':score, 'NofNode':comp.nof_nodes(), 'SeedScore':self.C_nodeInfo[currentNode].score})
            else:
                def f(X):
                    tmp_rows=[]
                    c_stack={}
                    currentNode=X[0]
                    c_nodeSeenCopy=X[1].copy()
                    #if currentNode in c_nodeSeen: continue
                    alCluster = self.get_cluster_core(currentNode, c_nodeSeenCopy, self.params['nodeScoreCutoff'], self.params['maxDepthFromStart'])
                    if (alCluster is not None and not alCluster.is_empty()):
                        #make sure seed node is part of cluster, if not already in there
                        if (not self.filter_cluster(alCluster)):
                            if (self.params['haircut']): alCluster=MCODE.haircut_cluster(alCluster)
                            if (self.params['fluff']): alCluster=self.fluff_Cluster_boundary(alCluster, c_nodeSeenCopy)
                            if l_decompose:
                                c_components=alCluster.decompose()
                            else:
                                c_components=[alCluster]
                            for k,comp in enumerate(c_components):
                                score=self.score_network(comp)
                                tmp_rows.append({'ID':currentNode, 'Score':score, 'NofNode':comp.nof_nodes(), 'SeedScore':self.C_nodeInfo[currentNode].score, 'ComponentIndex':k})
                                c_stack[currentNode]={'nodeSeen':c_nodeSeenCopy, 'components':c_components}
                    return (tmp_rows, c_stack)

                while (len(alNodesWithSameScore)):
                    tmp_rows=[]
                    c_stack={}
                    L=[ (x, c_nodeSeen) for x in alNodesWithSameScore if x not in c_nodeSeen ]
                    #if self.CPU<=1:
                    #    out=[f(x) for x in L]
                    #else:
                    #    mp=parallel.MP()
                    #    mp.start(f, n_CPU=self.CPU)
                    #    out=mp.map(L)
                    out=parallel.parmap(f, L, n_CPU=self.CPU)
                    for X in out:
                        tmp_rows.extend(X[0])
                        c_stack.update(X[1])
                    tmp=pd.DataFrame(tmp_rows)
                    if len(tmp):
                        tmp=tmp.sort_values(['Score','NofNode','SeedScore', 'ID'], ascending=[False, False, False, True])
                        bestNode=tmp['ID'].iloc[0]
                        c_nodeSeen=c_stack[bestNode]['nodeSeen']
                        for comp in tmp_rows:
                            if comp['ID']!=bestNode: continue
                            compIdx=comp['ComponentIndex']
                            cnt+=1
                            C_results.append(MCODECluster(c_stack[bestNode]['components'][compIdx], bestNode, comp['Score']))
                            rows.append({'ID':cnt, 'Score':comp['Score'], 'NofNode':comp['NofNode'], 'SeedScore':self.C_nodeInfo[bestNode].score})

                        alNodesWithSameScore=[ x for x in alNodesWithSameScore if x !=bestNode]
                    else:
                        for x in c_stack:
                            for s in x.nodeSeen.keys():
                                c_nodeSeen[s]=True
                        alNodesWithSameScore=[]
        C_sorted=[]
        t=pd.DataFrame(rows)
        if len(t):
            t=t.sort_values(['Score','NofNode','SeedScore'], ascending=[False, False, False])
            for i in range(len(t)):
                C_sorted.append(C_results[t['ID'].iloc[i]-1])
        return C_sorted
 def predict_multi(self, X):
     mapped = parmap(predict_with_clf, self.clfs.items(), [X], n_jobs=self.n_jobs)
     return pd.DataFrame(dict(mapped))