Python Cluster.run_till_end Exemples

Langage de programmation: Python

Espace de nommage/Pack: clustering

Class/Type: Cluster

Méthode/Fonction: run_till_end

Exemples au hotexamples.com: 2

Python Cluster.run_till_end - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de clustering.Cluster.run_till_end extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Cluster(21)

__init__(2)

run_till_end(2)

train(2)

__cluster_subject__(1)

_neighbors(1)

cluster(1)

cluster_data(1)

clusters(1)

find_similar(1)

init_from_di_list(1)

print_board(1)

setupResults(1)

Méthodes fréquemment utilisées

Cluster (21)

__init__ (2)

run_till_end (2)

train (2)

__cluster_subject__ (1)

_neighbors (1)

cluster (1)

cluster_data (1)

clusters (1)

find_similar (1)

Méthodes fréquemment utilisées

init_from_di_list (1)

print_board (1)

setupResults (1)

Exemple #1

0

Afficher le fichier

Fichier : temp_DI_for_14individual.py Projet : Magdoll/DigitalFingerprint

def jackknifing_tree(file_pattern, di_method): """ Given a pattern for the list of subsampled DI files (each file should have per-line format <sample>,<size>,<comma-separated DI> and have one of the sizes be 'real') Run clustering and count the differences between subsampled and real trees Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs), robinson_foulds_distance (size-->list of diffs) """ from clustering import Cluster import dendropy dTree = lambda x: dendropy.Tree.get_from_string(x, "newick") samples = None sizes = None trees = {} for file in glob.iglob(file_pattern): print >> sys.stderr, "reading subsampled DI file {0}....".format(file) d = {} with open(file) as f: for line in f: sample, size, di = line.strip().split(',', 2) if size not in d: d[size] = {} d[size][sample] = np.array(map(float, di.split(','))) if len(d) == 0: continue if sizes is None: sizes = d.keys() sizes.sort() samples = d[sizes[0]].keys() samples.sort() for size, di_dict in d.iteritems(): c = Cluster(None) c.init_from_di_list(di_dict, method=di_method, threshold=0) c.run_till_end() try: trees[size].append(dTree(str(c.trees[0]))) except KeyError: trees[size] = [dTree(str(c.trees[0]))] # tally (1) symmetric differences (edge weight ignored) # (2) robinson_foulds_distance (edge weight considered) # 'real' is the size that is the full pool that we compare all other trees to sym_diff = {} rob_diff = {} for size in sizes: if size == 'real': continue t_real = trees['real'][0] sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]] rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]] return trees, sym_diff, rob_diff

Exemple #2

0

Afficher le fichier

Fichier : temp_DI_for_14individual.py Projet : Magdoll/DigitalFingerprint

def jackknifing_tree_DF(file_pattern, di_method, samples_to_exclude=['1412-1','1412-4']): """ Similar as jackknifing_tree but using DF files and (probably improved clustering in clustering.py which I need manually turn on) Run clustering and count the differences between subsampled and real trees Returns: tree (size-->sample-->list of trees), symmetric_difference (size-->list of diffs), robinson_foulds_distance (size-->list of diffs) """ from clustering import Cluster import dendropy dTree = lambda x: dendropy.Tree.get_from_string(x, "newick") trees = {} for file in glob.iglob(file_pattern): print >> sys.stderr, "reading subsampled DF file {0}....".format(file) d = {} # size --> list of dfs with open(file) as f: for df in DF.DFReader(f): sample = df.name if sample in samples_to_exclude: print >> sys.stderr, "EXCLUDING SAMPLE {0}!".format(sample) continue size = df.annotations['size'] if size not in d: d[size] = [] # need to change the mask for df!!! # not a problem when we did with DI becuz it was already masked df.change_vec_mask(valid_DI_pos) d[size].append(df) for size, df_list in d.iteritems(): c = Cluster(df_list, method=di_method, threshold=0) c.run_till_end() try: trees[size].append(dTree(str(c.trees[0]))) except KeyError: trees[size] = [dTree(str(c.trees[0]))] print "size", size, "file", file print c.trees[0] # tally (1) symmetric differences (edge weight ignored) # (2) robinson_foulds_distance (edge weight considered) # 'real' is the size that is the full pool that we compare all other trees to sym_diff = {} rob_diff = {} for size in trees: if size == 'real': continue t_real = trees['real'][0] sym_diff[size] = [t_real.symmetric_difference(t) for t in trees[size]] rob_diff[size] = [t_real.robinson_foulds_distance(t) for t in trees[size]] return trees, sym_diff, rob_diff