Exemple #1
0
def show_GUI_TREE(ETETree):
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    ts.show_border = True

    ETETree.show(tree_style=ts)
Exemple #2
0
def plot_species_tree(tree_newick, tree_type, gene_name, tree_file_name,
                      name_list, tree_image_folder):
    # set tree parameters
    tree = Tree(tree_newick, format=2)
    ts = TreeStyle()
    ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
    ts.show_leaf_name = False
    tree_title = tree_type + ' (' + gene_name + ')'  # define tree title
    # set tree title text parameters
    ts.title.add_face(TextFace(tree_title,
                               fsize=8,
                               fgcolor='black',
                               ftype='Arial',
                               tight_text=False),
                      column=0)  # tree title text setting
    # set layout parameters
    ts.rotation = 0  # from 0 to 360
    ts.show_scale = False
    ts.margin_top = 10  # top tree image margin
    ts.margin_bottom = 10  # bottom tree image margin
    ts.margin_left = 10  # left tree image margin
    ts.margin_right = 10  # right tree image margin
    ts.show_border = False  # set tree image border
    ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

    # set tree node style
    for each_node in tree.traverse():
        # leaf node parameters
        if each_node.is_leaf():
            ns = NodeStyle()
            ns['shape'] = 'circle'  # dot shape: circle, square or sphere
            ns['size'] = 0  # dot size
            ns['hz_line_width'] = 0.5  # branch line width
            ns['vt_line_width'] = 0.5  # branch line width
            ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
            ns['vt_line_type'] = 0  # branch line type
            if each_node.name in name_list:
                ns['fgcolor'] = 'red'  # the dot setting
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=8,
                             fgcolor='red',
                             tight_text=False,
                             bold=False),
                    column=0,
                    position='branch-right')  # the node name text setting
                each_node.set_style(ns)
            else:
                ns['fgcolor'] = 'blue'  # the dot setting
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=8,
                             fgcolor='black',
                             tight_text=False,
                             bold=False),
                    column=0,
                    position='branch-right')  # the node name text setting
                each_node.set_style(ns)

        # non-leaf node parameters
        else:
            nlns = NodeStyle()
            nlns['size'] = 0  # dot size
            each_node.add_face(
                TextFace(each_node.name,
                         fsize=4,
                         fgcolor='black',
                         tight_text=False,
                         bold=False),
                column=5,
                position='branch-top')  # non-leaf node name text setting)
            each_node.set_style(nlns)
    # set figures size
    tree.render('%s/%s.png' % (tree_image_folder, tree_file_name),
                w=900,
                units='px',
                tree_style=ts)
Exemple #3
0
def plot_tree(tree, tree_title, tree_output):
    # set tree parameters
    ts = TreeStyle()
    ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
    ts.show_leaf_name = 0
    # set tree title text parameters
    ts.title.add_face(TextFace(tree_title,
                               fsize=8,
                               fgcolor='black',
                               ftype='Arial',
                               tight_text=False),
                      column=0)  # tree title text setting
    # set layout parameters
    ts.rotation = 0  # from 0 to 360
    ts.show_scale = False
    ts.margin_top = 10  # top tree image margin
    ts.margin_bottom = 10  # bottom tree image margin
    ts.margin_left = 10  # left tree image margin
    ts.margin_right = 10  # right tree image margin
    ts.show_border = False  # set tree image border
    ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

    # set tree node style
    for each_node in tree.traverse():
        # leaf node parameters
        if each_node.is_leaf():
            ns = NodeStyle()
            ns["shape"] = "circle"  # dot shape: circle, square or sphere
            ns["size"] = 0  # dot size
            ns['hz_line_width'] = 0.5  # branch line width
            ns['vt_line_width'] = 0.5  # branch line width
            ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
            ns['vt_line_type'] = 0  # branch line type
            ns["fgcolor"] = "blue"  # the dot setting
            each_node.add_face(TextFace(each_node.name,
                                        fsize=5,
                                        fgcolor='black',
                                        tight_text=False,
                                        bold=False),
                               column=0,
                               position='branch-right'
                               )  # leaf node the node name text setting

            each_node.set_style(ns)

        # non-leaf node parameters
        else:
            nlns = NodeStyle()
            nlns["size"] = 0  # dot size
            #nlns["rotation"] = 45
            each_node.add_face(
                TextFace(each_node.name,
                         fsize=3,
                         fgcolor='black',
                         tight_text=False,
                         bold=False),
                column=5,
                position='branch-top')  # non-leaf node name text setting)

            each_node.set_style(nlns)

    tree.render(tree_output, w=900, units="px",
                tree_style=ts)  # set figures size
DATA_PATH = {"northeuralex": "data/northeuralex-cldf-20210111-fix-pd.tsv",
             "ielex": "data/ielex-4-26-2016.csv"}

DATA_URL = {"northeuralex": "http://www.sfs.uni-tuebingen.de/~jdellert/northeuralex/0.9/northeuralex-0.9-forms.tsv",
            "ielex": "TEST"}

CLTS_PATH = "v1.4.1.tar.gz"
CLTS_URL = "https://github.com/cldf-clts/clts/archive/v1.4.1.tar.gz"
            
# Define tree style
ETE_TREE_STYLE = TreeStyle()
ETE_TREE_STYLE.show_scale = False
ETE_TREE_STYLE.show_leaf_name = False
ETE_TREE_STYLE.force_topology = False
ETE_TREE_STYLE.show_border = False
ETE_TREE_STYLE.margin_top = ETE_TREE_STYLE.margin_bottom = ETE_TREE_STYLE.margin_right = ETE_TREE_STYLE.margin_left = 5

ETE_NODE_STYLE = NodeStyle()
ETE_NODE_STYLE["size"] = 0  # remove balls from leaves

config = {
    "n_hidden": N_HIDDEN,
    "n_layers_encoder": N_LAYERS_ENCODER,
    "n_layers_decoder": N_LAYERS_DECODER,
    "dropout": DROPOUT,
    "bidirectional_encoder": BIDIRECTIONAL_ENCODER,
    "bidirectional_decoder": BIDIRECTIONAL_DECODER,
    "encoder_all_steps": ENCODER_ALL_STEPS,
    "batch_size": BATCH_SIZE,
    "learning_rate": LEARNING_RATE,
Exemple #5
0
def treeMaker(path_to_prokka, path_to_hmm, pwd_hmmsearch_exe, pwd_mafft_exe,
              pwd_fasttree_exe, plot_tree):

    # Tests for presence of the tmp folder and deletes it
    tmp_folder = 'get_species_tree_wd'
    if os.path.exists(tmp_folder):
        os.system('rm -r ' + tmp_folder)
    os.mkdir(tmp_folder)

    # List all prokka dirs in the target folder
    prokka_files = [
        i for i in os.listdir(path_to_prokka)
        if os.path.isdir(path_to_prokka + '/' + i)
    ]
    print('Detected %i input genomes' % len(prokka_files))

    # Running hmmsearch on each file
    print('Running hmmsearch...')
    for f in prokka_files:
        # call hmmsearch
        #os.system('hmmsearch -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' % (tmp_folder, f, path_to_hmm, path_to_prokka, f, f))
        os.system(
            '%s -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' %
            (pwd_hmmsearch_exe, tmp_folder, f, path_to_hmm, path_to_prokka, f,
             f))

        # Reading the protein file in a dictionary
        proteinSequence = {}
        for seq_record in SeqIO.parse('%s/%s/%s.faa' % (path_to_prokka, f, f),
                                      'fasta'):
            proteinSequence[seq_record.id] = str(seq_record.seq)

        # Reading the hmmersearch table/extracting the protein part found beu hmmsearch out of the protein/Writing each protein sequence that was extracted to a fasta file (one for each hmm in phylo.hmm
        hmm_id = ''
        hmm_name = ''
        hmm_pos1 = 0
        hmm_pos2 = 0
        hmm_score = 0

        with open(tmp_folder + '/' + f.replace('prokka/', '') + '_hmmout.tbl',
                  'r') as tbl:
            for line in tbl:
                if line[0] == "#": continue
                line = re.sub('\s+', ' ', line)
                splitLine = line.split(' ')

                if (hmm_id == ''):
                    hmm_id = splitLine[4]
                    hmm_name = splitLine[0]
                    hmm_pos1 = int(splitLine[17]) - 1
                    hmm_pos2 = int(splitLine[18])
                    hmm_score = float(splitLine[13])
                elif (hmm_id == splitLine[4]):
                    if (float(splitLine[13]) > hmm_score):
                        hmm_name = splitLine[0]
                        hmm_pos1 = int(splitLine[17]) - 1
                        hmm_pos2 = int(splitLine[18])
                        hmm_score = float(splitLine[13])
                else:
                    file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+')
                    file_out.write('>' + f + '\n')
                    if hmm_name != '':
                        seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2])
                    file_out.write(str(seq) + '\n')
                    file_out.close()
                    hmm_id = splitLine[4]
                    hmm_name = splitLine[0]
                    hmm_pos1 = int(splitLine[17]) - 1
                    hmm_pos2 = int(splitLine[18])
                    hmm_score = float(splitLine[13])

            else:
                file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+')
                file_out.write('>' + f + '\n')
                if hmm_name != '':
                    seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2])
                file_out.write(str(seq) + '\n')
                file_out.close()

    # Call mafft to align all single fasta files with hmms
    files = os.listdir(tmp_folder)
    fastaFiles = [i for i in files if i.endswith('.fasta')]
    print('Running mafft...')
    for f in fastaFiles:
        fastaFile1 = '%s/%s' % (tmp_folder, f)
        fastaFile2 = fastaFile1.replace('.fasta', '_aligned.fasta')
        os.system(pwd_mafft_exe + ' --quiet --maxiterate 1000 --globalpair ' +
                  fastaFile1 + ' > ' + fastaFile2 + ' ; rm ' + fastaFile1)

    # concatenating the single alignments
    # create the dictionary
    print('Concatenating alignments...')
    concatAlignment = {}
    for element in prokka_files:
        concatAlignment[element] = ''

    # Reading all single alignment files and append them to the concatenated alignment
    files = os.listdir(tmp_folder)
    fastaFiles = [i for i in files if i.endswith('.fasta')]
    for f in fastaFiles:
        fastaFile = tmp_folder + '/' + f
        proteinSequence = {}
        alignmentLength = 0
        for seq_record_2 in SeqIO.parse(fastaFile, 'fasta'):
            proteinName = seq_record_2.id
            proteinSequence[proteinName] = str(seq_record_2.seq)
            alignmentLength = len(proteinSequence[proteinName])

        for element in prokka_files:
            if element in proteinSequence.keys():
                concatAlignment[element] += proteinSequence[element]
            else:
                concatAlignment[element] += '-' * alignmentLength

    # writing alignment to file
    file_out = open('./species_tree.aln', 'w')
    for element in prokka_files:
        file_out.write('>' + element + '\n' + concatAlignment[element] + '\n')
    file_out.close()

    # calling fasttree for tree calculation
    print('Running fasttree...')
    os.system('%s -quiet species_tree.aln > species_tree.newick' %
              pwd_fasttree_exe)

    # Decomment the two following lines if tree is rooted but should be unrooted
    #phyloTree = dendropy.Tree.get(path='phylogenticTree.phy', schema='newick', rooting='force-unrooted')
    #dendropy.Tree.write_to_path(phyloTree, 'phylogenticTree_unrooted.phy', 'newick')

    # plot species tree
    if plot_tree == 1:
        print('Plot species tree')

        tree = Tree('species_tree.newick', format=1)
        # set tree parameters
        ts = TreeStyle()
        ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
        ts.show_leaf_name = 0
        # set tree title text parameters
        ts.title.add_face(TextFace('Species_Tree',
                                   fsize=8,
                                   fgcolor='black',
                                   ftype='Arial',
                                   tight_text=False),
                          column=0)  # tree title text setting
        # set layout parameters
        ts.rotation = 0  # from 0 to 360
        ts.show_scale = False
        ts.margin_top = 10  # top tree image margin
        ts.margin_bottom = 10  # bottom tree image margin
        ts.margin_left = 10  # left tree image margin
        ts.margin_right = 10  # right tree image margin
        ts.show_border = False  # set tree image border
        ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

        # set tree node style
        for each_node in tree.traverse():
            # leaf node parameters
            if each_node.is_leaf():
                ns = NodeStyle()
                ns["shape"] = "circle"  # dot shape: circle, square or sphere
                ns["size"] = 0  # dot size
                ns['hz_line_width'] = 0.5  # branch line width
                ns['vt_line_width'] = 0.5  # branch line width
                ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
                ns['vt_line_type'] = 0  # branch line type
                ns["fgcolor"] = "blue"  # the dot setting
                each_node.add_face(TextFace(each_node.name,
                                            fsize=5,
                                            fgcolor='black',
                                            tight_text=False,
                                            bold=False),
                                   column=0,
                                   position='branch-right'
                                   )  # leaf node the node name text setting

                each_node.set_style(ns)

            # non-leaf node parameters
            else:
                nlns = NodeStyle()
                nlns["size"] = 0  # dot size
                # nlns["rotation"] = 45
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=3,
                             fgcolor='black',
                             tight_text=False,
                             bold=False),
                    column=5,
                    position='branch-top')  # non-leaf node name text setting)

                each_node.set_style(nlns)

        tree.render('species_tree' + '.png', w=900, units="px",
                    tree_style=ts)  # set figures size

    if plot_tree == 0:
        print('The built species tree was exported to species_tree.newick')
    else:
        print(
            'The built species tree was exported to species_tree.newick and species_tree.png'
        )