Esempio n. 1
0
    def createSampleData(self):

      # hardcoded data for now
      tree = ClusterTree('(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);')
      leaves = tree.get_leaf_names()

      idx_dict = {'A':0,'B':1,'C':2,'D':3}
      idx_labels = sorted(idx_dict, key=idx_dict.get)

      dmat = np.zeros((4,4))

      for l1,l2 in combinations(leaves,2):
          d = tree.get_distance(l1,l2)
          dmat[idx_dict[l1],idx_dict[l2]] = dmat[idx_dict[l2],idx_dict[l1]] = d

      self.X = dmat
Esempio n. 2
0
    def newick(self, user_input_file):

        tree = ClusterTree(user_input_file)
        leaves = tree.get_leaf_names()
        ts = TreeStyle()
        ts.show_leaf_name = True
        ts.show_branch_length = True
        ts.show_branch_support = True

        leaf_dict = {}

        # Convert leaves (a list) into a dictionary
        for i in range(len(leaves)):
            leaf_dict[leaves[i]] = i
            i = i + 1

        # Cast dictionary attributes as list and create index labels
        k = list(leaf_dict.keys())
        v = list(leaf_dict.values())
        w = list(leaf_dict.items())
        leaf_labels = [k[v.index(j)] for j in range(0, len(w))]

        # Create a numpy array of zeros based on the number of taxa in the tree
        dmat = np.zeros((len(leaves), len(leaves)))

        print('Converting input tree:')

        # Compute distance matrix from newick tree (this is not yet a linked distance matrix)
        for l1, l2 in tqdm(combinations(leaves, 2)):
            d = tree.get_distance(l1, l2)
            dmat[leaf_dict[l1], leaf_dict[l2]] = dmat[leaf_dict[l2],
                                                      leaf_dict[l1]] = d

        # Convert dmat into a linkage distance matrix for scipy
        schlink = sch.linkage(scipy.spatial.distance.squareform(dmat),
                              method='average',
                              metric='euclidean')

        return dmat, schlink
Esempio n. 3
0
def newick_to_linkage(filePath):
    """ converts newick tree to scipy linkage matrix """
    tree                   = ClusterTree(filePath)
    leaves                 = tree.get_leaf_names()
    ts                     = TreeStyle()
    ts.show_leaf_name      = True
    ts.show_branch_length  = True
    ts.show_branch_support = True

    idx_dict = {}
    idx = 0
    for leaf in leaves:
        idx_dict[leaf] = idx
        idx += 1

    idx_labels = [idx_dict.keys()[idx_dict.values().index(i)] for i in range(len(idx_dict))]

    dmat = np.zeros((len(leaves), len(leaves))) # FIXME need to understand

    for leaf1, leaf2 in combinations(leaves, 2):
        d = tree.get_distance(leaf1, leaf2)
        dmat[idx_dict[leaf1], idx_dict[leaf2]] = dmat[idx_dict[leaf2], idx_dict[leaf1]] = d

    schlink = sch.linkage(scipy.spatial.distance.squareform(dmat),method='average',metric='euclidean')