Python Cluster.add_oneの例

プログラミング言語: Python

名前空間/パッケージ名: cluster

クラス/型: Cluster

メソッド/関数: add_one

hotexamples.comのコード掲載数: 1

Python Cluster.add_one - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcluster.Cluster.add_oneの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Cluster(30)

assign(7)

auth(5)

cluster_query(5)

__init__(4)

centroid(4)

add_node(4)

clear(3)

center(3)

calculateCentroid(3)

add_point(3)

kmeansCluster(2)

get_nodes(2)

close(2)

allocate(2)

can_allocated(2)

clean_bootstrap(2)

stress(2)

add_record(2)

add_semantic_for_token(1)

PD_scheduling(1)

getCenter(1)

getGameObjectList(1)

getMax(1)

getMin(1)

getNumOfClusters(1)

getSizeOfCluster(1)

__str__(1)

get_vmi(1)

incarcerate_node(1)

init_from_file(1)

lda(1)

exists(1)

loadCorpus(1)

machines(1)

nmf(1)

nodetool(1)

populateCluster(1)

remoteCommand(1)

removeGameObject(1)

round_robin_update(1)

set_semantics(1)

size(1)

findAllHosts(1)

_make_key(1)

delete_node(1)

add_neuron(1)

agregar_solucion(1)

agregar_vector(1)

add_segment(1)

コード例 #1

ファイルを表示

def cluster_message(stop_words, user_dict, msg_fname, cluster_file,
                    summary_file):
    # Init tokenizer
    jt = JiebaTokenizer(stop_words, user_dict, 'c')
    token_lines = token_message(jt, msg_fname)
    wdb = WordDictBuilder()
    wdb.add_tokens_list(token_lines)
    wdb.save('../data/word_dict.txt')
    keyword_dict = get_user_keywords(user_dict)

    cluser = Cluster(gl.gl_FUNCNUM)
    # Init feature_builder and simhash_builder
    fc = FeatureContainer(wdb.word_dict, keyword_dict)
    with open(msg_fname, 'r') as ins:
        for lineidx, line in enumerate(ins.readlines()):
            if (lineidx % 100 == 0):
                print lineidx
            (time, number, sender, message) = line.strip().split('|')[0:4]
            if (number == '10658368'):
                continue
            #替换数字、字母，截取第一句
            short_msg = re.split(u'。'.encode('utf8'), message)[0]
            new_msg = re.sub(r'[0-9a-zA-Z+=\./:\"<>|_&#\s\*\-]', '', short_msg)
            #new_msg = re.split(u'。'.encode('utf8'), re.sub(r'[0-9a-zA-Z+=\./:\"<>|_&#\s\*\-]', '', message))[0]

            # Tokenize
            tokens = jt.tokens(new_msg.strip().decode('utf8'))
            feature_vec, sim_hash, min_hash = fc.compute_feature(tokens)
            cluser.add_one(min_hash, sim_hash, short_msg)

    cluser.save_cluster(cluster_file, summary_file)
    print "cluser finish"