def reconstruct(tree_file, align_file, csv_file="temp-align.csv"): if align_file.endswith('.fasta'): "Convert fasta to csv" traits = fasta2csv(align_file, csv_file) # convert fasta to csv data = csv_file # Path to the table containing tip/node annotations, in csv or tab format else: df = pd.read_csv(align_file, index_col=0) traits = [column for column in df.columns] data = align_file tree = tree_file # Path to the tree in newick format # Columns present in the annotation table, # for which we want to reconstruct ancestral states columns = traits #['Country'] # Path to the output compressed map visualisation html_compressed = "tree-000_map.html" # (Optional) path to the output tree visualisation html = "tree-000_tree.html" pastml_pipeline(data=data, data_sep=',', columns=columns, tree=tree, verbose=True)
parser.add_argument( '--verbose', action='store_true', help="print information on the progress of the analysis") params = parser.parse_args() for tree, html in zip(params.trees, params.htmls): work_dir = tempfile.mkdtemp( ) if not params.work_dir else params.work_dir pastml_pipeline(data=params.data, tree=tree, html_compressed=html, prediction_method=params.prediction_method, model=params.model, columns=params.columns, name_column=params.name_column, date_column=params.date_column, tip_size_threshold=params.threshold, parameters=params.parameters, out_data=params.out_data, work_dir=work_dir, verbose=params.verbose) if params.out_parameters: for column, out_parameters in zip(params.columns, params.out_parameters): pastml_out_pars = \ get_pastml_parameter_file(method=params.prediction_method, model=params.model, column=column) if pastml_out_pars: copyfile(os.path.join(work_dir, pastml_out_pars), out_parameters) if not params.work_dir:
from pastml.acr import pastml_pipeline, COPY from pastml.ml import MPPA, MAP, JOINT, ALL, ML from pastml.models.f81_like import EFT, F81, JC from pastml.parsimony import ACCTRAN, DELTRAN, DOWNPASS, MP DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') TREE_NWK = os.path.join(DATA_DIR, 'Albanian.tree.152tax.tre') STATES_INPUT = os.path.join(DATA_DIR, 'data.txt') STATES_COPY = os.path.join(DATA_DIR, 'copy_states.csv') if '__main__' == __name__: # The initial tree without ACR pastml_pipeline(data=STATES_INPUT, tree=TREE_NWK, html=os.path.join(DATA_DIR, 'trees', 'Albanian_tree_initial.html'), data_sep=',', verbose=True, prediction_method=COPY, work_dir=os.path.join(DATA_DIR, 'pastml', 'initial')) # Copy states pastml_pipeline(data=STATES_COPY, tree=TREE_NWK, html=os.path.join(DATA_DIR, 'trees', 'Albanian_tree_{}.html'.format(COPY)), html_compressed=os.path.join( DATA_DIR, 'maps', 'Albanian_map_{}.html'.format(COPY)), data_sep=',', verbose=True, prediction_method=COPY, columns='Country', work_dir=os.path.join(DATA_DIR, 'pastml', COPY))
import os from pastml.ml import ML from pastml.acr import pastml_pipeline from pastml.visualisation.cytoscape_manager import TIMELINE_LTT DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') TREE_NWK = os.path.join(DATA_DIR, 'real', 'raxml_tree.dated.nwk') STATES_INPUT = os.path.join(DATA_DIR, 'real', 'metadata.tab') if '__main__' == __name__: character = 'Location' pastml_pipeline(data=STATES_INPUT, tree=TREE_NWK, prediction_method=ML, html_compressed=os.path.join( DATA_DIR, 'maps', 'map_{}.html'.format(character)), html=os.path.join(DATA_DIR, 'maps', 'tree_{}.html'.format(character)), timeline_type=TIMELINE_LTT, verbose=True, columns=[character], upload_to_itol=True, tip_size_threshold=25)
from pastml.acr import pastml_pipeline dados = "dados.csv" arvore = "arvore.tre" #nome e número de colunas deve ser o mesmo do arquivo de dados colunas = ['Coluna1', 'Coluna2','Coluna3','Coluna4','Coluna5','Coluna6'] for nome_coluna in colunas: arv_compl = "arv_compl_"+nome_coluna+".html" arv_compr = "arv_compr_"+nome_coluna+".html" pastml_pipeline(data = dados, data_sep = ",", columns = colunas, name_column = nome_coluna, tree = arvore, html_compressed = arv_compr, html = arv_compl, verbose = True) print("coluna " + nome_coluna+" pronta!") print("Uhuuul! Terminou :D")
#To run: python3 pastMLGeneReconstruction.py -g $GenePresenceAbsence.csv -t $Tree import argparse from pastml.acr import pastml_pipeline if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-g", help="Gene presence absence file produced by convertPanarooPastML.py") parser.add_argument( "-t", help= "Rooted tree containing all and only samples used in the pangenome reconstruction, with exactly the same sample names" ) args = parser.parse_args() genePresenceAbsence = open(args.g).readlines() html_compressed = "~/Documents/abscessus/manchester.samples/squeaky/abscessus/cluster3/compressed.html" html = "~/Documents/abscessus/manchester.samples/squeaky/abscessus/cluster3/full.html" columns = genePresenceAbsence[0].strip().split(",")[ 1:] #The columns to be reconstructed pastml_pipeline(data=args.g, data_sep=",", columns=columns, tree=args.t, html_compressed=html_compressed, html=html, verbose=True)
import os from pastml.acr import pastml_pipeline from pastml.ml import ML DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') TREE_NWK = os.path.join(DATA_DIR, 'best', 'pastml_phyml_tree.nwk') STATES_INPUT = os.path.join(DATA_DIR, 'metadata.tab') if '__main__' == __name__: mutations = ['RT:M184V'] pastml_pipeline(data=STATES_INPUT, tree=TREE_NWK, prediction_method=ML, html_compressed=os.path.join(DATA_DIR, 'maps', 'map_{}.html'.format('_'.join(mutations))), verbose=True, columns=mutations, upload_to_itol=True)
def apply_pastml(id, data, tree, data_sep, id_index, columns, root_date, model, prediction_method, name_column, html_compressed, html, email, title, url, work_dir, no_trimming, timeline_type): try: from pastml.acr import pastml_pipeline import os import shutil pastml_pipeline(tree=tree, data=data, data_sep=data_sep, id_index=id_index, columns=columns, root_date=root_date, model=model, prediction_method=prediction_method, name_column=name_column, html_compressed=html_compressed, html=html, verbose=True, work_dir=work_dir, upload_to_itol=True, itol_id='ZxuhG2okfKLQnsgd5xAEGQ', itol_project='pastmlweb', itol_tree_name=id, tip_size_threshold=np.inf if no_trimming else REASONABLE_NUMBER_OF_TIPS, timeline_type=timeline_type) itol_id = None itol_id_file = os.path.join(work_dir, 'iTOL_tree_id.txt') if os.path.exists(itol_id_file): with open(itol_id_file, 'r') as f: itol_id = f.readline().strip('\n') copyfile( itol_id_file, os.path.join(work_dir, '..', 'pastml_{}_itol.txt'.format(id))) if os.path.exists(html_compressed): copyfile( html_compressed, os.path.join(work_dir, 'pastml_compressed_visualisation.html')) if html and os.path.exists(html): copyfile( html, os.path.join(work_dir, 'pastml_full_tree_visualisation.html')) shutil.make_archive( os.path.join(work_dir, '..', 'pastml_{}'.format(id)), 'zip', work_dir) try: shutil.rmtree(work_dir) except: pass if email: send_analysis_email.delay(email, url, id, title, columns, model, prediction_method, itol_id, None) except Exception as e: e_str = str(e) with open(html_compressed, 'w+') as f: f.write('<p>Could not reconstruct the states...<br/>{}</p>'.format( e_str)) if email: send_analysis_email.delay(email, url, id, title, columns, model, prediction_method, None, e_str) else: send_analysis_email.delay('*****@*****.**', url, id, title, columns, model, prediction_method, None, e_str) raise e