コード例 #1
0
def export_sequence_json(T, path, prefix):
    from Bio import SeqIO
    plain_export = 0.99
    indent = None

    elems = {'root': {}}
    for node in T.find_clades():
        elems[node.clade] = {}

    for gene, aln_fname in get_genes_and_alignments(path, tree=True):
        seqs = {}
        for seq in SeqIO.parse(aln_fname, 'fasta'):
            seqs[seq.name] = seq

        root_seq = seqs[T.root.name]
        elems['root'][gene] = "".join(root_seq)
        for node in T.find_clades():
            nseq = seqs[node.name]
            if hasattr(node, "clade"):
                differences = {
                    pos: state
                    for pos, (
                        state,
                        ancstate) in enumerate(zip(nseq, elems['root'][gene]))
                    if state != ancstate
                }
                if len(differences) <= plain_export * len(seq):
                    elems[node.clade][gene] = differences
                else:
                    elems[node.clade][gene] = seq

    fname = sequence_json(path, prefix)
    write_json(elems, fname, indent=indent)
コード例 #2
0
def export_diversity(path, prefix, reference, indent=None):
    '''
    write the alignment entropy of each alignment (nucleotide and translations) to file
    '''
    genes = load_features(reference)
    entropy_json = {}
    for feat, aln_fname in get_genes_and_alignments(path, tree=False):
        entropy = diversity_statistics(aln_fname, nuc=feat == 'nuc')
        S = [max(0, round(x, 4)) for x in entropy]
        n = len(S)
        if feat == 'nuc':
            entropy_json[feat] = {
                'pos': range(0, n),
                'codon': [x // 3 for x in range(0, n)],
                'val': S
            }
        elif feat in genes:
            entropy_json[feat] = {
                'pos': [x for x in genes[feat]][::3],
                'codon': range(n),
                'val': S
            }
    write_json(entropy_json, diversity_json(path, prefix), indent=indent)
コード例 #3
0
        seqs[seq.name] = seq

    muts = {}
    muts[T.root.name]=''
    for node in T.get_nonterminals():
        pseq = seqs[node.name]
        for c in node:
            cseq = seqs[c.name]
            muts[c.name]=','.join([anc+str(pos+1)+der
                        for pos, (anc, der) in enumerate(zip(pseq, cseq))
                        if anc!=der])

    return muts


if __name__ == '__main__':
    parser = generic_argparse("Assign amino acid mutations to the tree")
    args = parser.parse_args()
    path = args.path

    tree_meta = read_tree_meta_data(path)
    T = Phylo.read(tree_newick(path), 'newick')

    for gene, aln_fname in get_genes_and_alignments(path, tree=True):
        if gene!='nuc':
            muts = get_amino_acid_mutations(T, aln_fname)

        for node_name in tree_meta:
            tree_meta[node_name][gene+'_mutations'] = muts[node_name]
    write_tree_meta_data(path, tree_meta)
コード例 #4
0
def export_metadata_json(T, path, prefix, reference, isvcf=False, indent=1):
    print("Writing out metaprocess")
    mjson = {}

    mjson["virus_count"] = T.count_terminals()
    from datetime import date
    mjson["updated"] = date.today().strftime('%Y-%m-%d')
    mjson["author_info"] = {
        "?": {
            "paper_url": "?",
            "journal": "?",
            "title": "?",
            "n": 1
        }
    }
    mjson["seq_author_map"] = {}

    from collections import defaultdict
    cmaps = defaultdict(list)
    with open(color_maps(path), 'r') as cfile:
        for line in cfile:
            try:
                trait, name, color = line.strip().split('\t')
            except:
                continue
            cmaps[trait].append((name, color))

    #if drug-resistance colours have been auto-generated, get these too
    import os.path
    if os.path.isfile(drm_color_maps(path)):
        with open(drm_color_maps(path), 'r') as cfile:
            for line in cfile:
                try:
                    trait, name, color = line.strip().split('\t')
                except:
                    continue
                cmaps[trait].append((name, color))

    mjson["color_options"] = {
        "gt": {
            "menuItem": "genotype",
            "type": "discrete",
            "legendTitle": "Genotype",
            "key": "genotype"
        },
        "num_date": {
            "menuItem": "date",
            "type": "continuous",
            "legendTitle": "Sampling date",
            "key": "num_date"
        }
    }
    for trait in cmaps:
        mjson["color_options"][trait] = {
            "menuItem": trait,
            "type": "discrete",
            "color_map": cmaps[trait],
            "legendTitle": trait,
            "key": trait
        }

    mjson["panels"] = ["tree", "map", "entropy"]
    mjson["title"] = "NextTB"
    mjson["maintainer"] = "Emma Hodcroft"
    mjson["geo"] = {}
    lat_long_defs = load_lat_long_defs()
    for geo_trait in ['region', "country", 'division']:
        mjson["geo"][geo_trait] = {}
        for n in T.find_clades():
            if geo_trait in n.attr:
                place = n.attr[geo_trait]
                if (place not in mjson["geo"][geo_trait]
                        and place in lat_long_defs):
                    mjson["geo"][geo_trait][place] = lat_long_defs[place]

    mjson["commit"] = "unknown"
    mjson["filters"] = ["country", "region", "division"]

    genes = load_features(reference)
    anno = {}
    for feat, aln_fname in get_genes_and_alignments(path, tree=False):
        if feat in genes:
            anno[feat] = {
                "start": int(genes[feat].location.start),
                "end": int(genes[feat].location.end),
                "strand": genes[feat].location.strand
            }

    if isvcf:
        #if vcf, there is no 'gene' called 'nuc' that will be read in
        #above, so manually include it here.
        from filenames import ref_fasta
        from Bio import SeqIO
        refSeq = SeqIO.parse(ref_fasta(path), format='fasta').next()
        anno['nuc'] = {"start": 1, "end": len(refSeq.seq), "strand": 1}

    mjson["annotations"] = anno
    write_json(mjson, meta_json(path, prefix), indent=indent)