예제 #1
0
def reconstruct(tree_file, align_file, csv_file="temp-align.csv"):

    if align_file.endswith('.fasta'):
        "Convert fasta to csv"
        traits = fasta2csv(align_file, csv_file)  # convert fasta to csv
        data = csv_file  # Path to the table containing tip/node annotations, in csv or tab format
    else:
        df = pd.read_csv(align_file, index_col=0)
        traits = [column for column in df.columns]
        data = align_file

    tree = tree_file  # Path to the tree in newick format

    # Columns present in the annotation table,
    # for which we want to reconstruct ancestral states
    columns = traits  #['Country']

    # Path to the output compressed map visualisation
    html_compressed = "tree-000_map.html"

    # (Optional) path to the output tree visualisation
    html = "tree-000_tree.html"

    pastml_pipeline(data=data,
                    data_sep=',',
                    columns=columns,
                    tree=tree,
                    verbose=True)
예제 #2
0
파일: acr.py 프로젝트: saishikawa/pastml-1
    parser.add_argument(
        '--verbose',
        action='store_true',
        help="print information on the progress of the analysis")
    params = parser.parse_args()

    for tree, html in zip(params.trees, params.htmls):
        work_dir = tempfile.mkdtemp(
        ) if not params.work_dir else params.work_dir
        pastml_pipeline(data=params.data,
                        tree=tree,
                        html_compressed=html,
                        prediction_method=params.prediction_method,
                        model=params.model,
                        columns=params.columns,
                        name_column=params.name_column,
                        date_column=params.date_column,
                        tip_size_threshold=params.threshold,
                        parameters=params.parameters,
                        out_data=params.out_data,
                        work_dir=work_dir,
                        verbose=params.verbose)
        if params.out_parameters:
            for column, out_parameters in zip(params.columns,
                                              params.out_parameters):
                pastml_out_pars = \
                    get_pastml_parameter_file(method=params.prediction_method, model=params.model, column=column)
                if pastml_out_pars:
                    copyfile(os.path.join(work_dir, pastml_out_pars),
                             out_parameters)
        if not params.work_dir:
예제 #3
0
파일: main.py 프로젝트: saishikawa/pastml-1
from pastml.acr import pastml_pipeline, COPY
from pastml.ml import MPPA, MAP, JOINT, ALL, ML
from pastml.models.f81_like import EFT, F81, JC
from pastml.parsimony import ACCTRAN, DELTRAN, DOWNPASS, MP

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
TREE_NWK = os.path.join(DATA_DIR, 'Albanian.tree.152tax.tre')
STATES_INPUT = os.path.join(DATA_DIR, 'data.txt')
STATES_COPY = os.path.join(DATA_DIR, 'copy_states.csv')

if '__main__' == __name__:
    # The initial tree without ACR
    pastml_pipeline(data=STATES_INPUT,
                    tree=TREE_NWK,
                    html=os.path.join(DATA_DIR, 'trees',
                                      'Albanian_tree_initial.html'),
                    data_sep=',',
                    verbose=True,
                    prediction_method=COPY,
                    work_dir=os.path.join(DATA_DIR, 'pastml', 'initial'))
    # Copy states
    pastml_pipeline(data=STATES_COPY,
                    tree=TREE_NWK,
                    html=os.path.join(DATA_DIR, 'trees',
                                      'Albanian_tree_{}.html'.format(COPY)),
                    html_compressed=os.path.join(
                        DATA_DIR, 'maps', 'Albanian_map_{}.html'.format(COPY)),
                    data_sep=',',
                    verbose=True,
                    prediction_method=COPY,
                    columns='Country',
                    work_dir=os.path.join(DATA_DIR, 'pastml', COPY))
예제 #4
0
import os

from pastml.ml import ML
from pastml.acr import pastml_pipeline
from pastml.visualisation.cytoscape_manager import TIMELINE_LTT

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
TREE_NWK = os.path.join(DATA_DIR, 'real', 'raxml_tree.dated.nwk')
STATES_INPUT = os.path.join(DATA_DIR, 'real', 'metadata.tab')

if '__main__' == __name__:
    character = 'Location'
    pastml_pipeline(data=STATES_INPUT,
                    tree=TREE_NWK,
                    prediction_method=ML,
                    html_compressed=os.path.join(
                        DATA_DIR, 'maps', 'map_{}.html'.format(character)),
                    html=os.path.join(DATA_DIR, 'maps',
                                      'tree_{}.html'.format(character)),
                    timeline_type=TIMELINE_LTT,
                    verbose=True,
                    columns=[character],
                    upload_to_itol=True,
                    tip_size_threshold=25)
예제 #5
0
from pastml.acr import pastml_pipeline

dados = "dados.csv"
arvore = "arvore.tre"

#nome e número de colunas deve ser o mesmo do arquivo de dados
colunas = ['Coluna1', 'Coluna2','Coluna3','Coluna4','Coluna5','Coluna6']

for nome_coluna in colunas:

     arv_compl = "arv_compl_"+nome_coluna+".html"
     arv_compr = "arv_compr_"+nome_coluna+".html"

     pastml_pipeline(data = dados,
                data_sep = ",",
                columns = colunas,
                name_column = nome_coluna,
                tree = arvore,
                html_compressed = arv_compr,
                html = arv_compl,
                verbose = True)
     print("coluna " + nome_coluna+" pronta!")

print("Uhuuul! Terminou :D")
예제 #6
0
#To run: python3 pastMLGeneReconstruction.py -g $GenePresenceAbsence.csv -t $Tree

import argparse
from pastml.acr import pastml_pipeline

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-g",
        help="Gene presence absence file produced by convertPanarooPastML.py")
    parser.add_argument(
        "-t",
        help=
        "Rooted tree containing all and only samples used in the pangenome reconstruction, with exactly the same sample names"
    )
    args = parser.parse_args()

    genePresenceAbsence = open(args.g).readlines()
    html_compressed = "~/Documents/abscessus/manchester.samples/squeaky/abscessus/cluster3/compressed.html"
    html = "~/Documents/abscessus/manchester.samples/squeaky/abscessus/cluster3/full.html"

    columns = genePresenceAbsence[0].strip().split(",")[
        1:]  #The columns to be reconstructed

    pastml_pipeline(data=args.g,
                    data_sep=",",
                    columns=columns,
                    tree=args.t,
                    html_compressed=html_compressed,
                    html=html,
                    verbose=True)
예제 #7
0
import os

from pastml.acr import pastml_pipeline
from pastml.ml import ML

DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
TREE_NWK = os.path.join(DATA_DIR, 'best', 'pastml_phyml_tree.nwk')
STATES_INPUT = os.path.join(DATA_DIR, 'metadata.tab')


if '__main__' == __name__:
    mutations = ['RT:M184V']
    pastml_pipeline(data=STATES_INPUT,
                    tree=TREE_NWK, prediction_method=ML,
                    html_compressed=os.path.join(DATA_DIR, 'maps', 'map_{}.html'.format('_'.join(mutations))),
                    verbose=True, columns=mutations, upload_to_itol=True)
예제 #8
0
def apply_pastml(id, data, tree, data_sep, id_index, columns, root_date, model,
                 prediction_method, name_column, html_compressed, html, email,
                 title, url, work_dir, no_trimming, timeline_type):
    try:
        from pastml.acr import pastml_pipeline
        import os
        import shutil

        pastml_pipeline(tree=tree,
                        data=data,
                        data_sep=data_sep,
                        id_index=id_index,
                        columns=columns,
                        root_date=root_date,
                        model=model,
                        prediction_method=prediction_method,
                        name_column=name_column,
                        html_compressed=html_compressed,
                        html=html,
                        verbose=True,
                        work_dir=work_dir,
                        upload_to_itol=True,
                        itol_id='ZxuhG2okfKLQnsgd5xAEGQ',
                        itol_project='pastmlweb',
                        itol_tree_name=id,
                        tip_size_threshold=np.inf
                        if no_trimming else REASONABLE_NUMBER_OF_TIPS,
                        timeline_type=timeline_type)
        itol_id = None
        itol_id_file = os.path.join(work_dir, 'iTOL_tree_id.txt')
        if os.path.exists(itol_id_file):
            with open(itol_id_file, 'r') as f:
                itol_id = f.readline().strip('\n')
            copyfile(
                itol_id_file,
                os.path.join(work_dir, '..', 'pastml_{}_itol.txt'.format(id)))
        if os.path.exists(html_compressed):
            copyfile(
                html_compressed,
                os.path.join(work_dir, 'pastml_compressed_visualisation.html'))
        if html and os.path.exists(html):
            copyfile(
                html,
                os.path.join(work_dir, 'pastml_full_tree_visualisation.html'))
        shutil.make_archive(
            os.path.join(work_dir, '..', 'pastml_{}'.format(id)), 'zip',
            work_dir)
        try:
            shutil.rmtree(work_dir)
        except:
            pass
        if email:
            send_analysis_email.delay(email, url, id, title, columns, model,
                                      prediction_method, itol_id, None)
    except Exception as e:
        e_str = str(e)
        with open(html_compressed, 'w+') as f:
            f.write('<p>Could not reconstruct the states...<br/>{}</p>'.format(
                e_str))
        if email:
            send_analysis_email.delay(email, url, id, title, columns, model,
                                      prediction_method, None, e_str)
        else:
            send_analysis_email.delay('*****@*****.**', url, id,
                                      title, columns, model, prediction_method,
                                      None, e_str)
        raise e