def rolling_tree_apply(tup):
    
    group_series, seq_series, kwargs = tup
    
    fname = '/home/will/SubCData/Trees/Tree-%(sub)s-%(Prot)s-%(Start)i-%(WinSize)i.newick' % kwargs
    if os.path.exists(fname):
        return True
    
    
    alpha = generic_dna if kwargs['Prot'] == 'LTR' else generic_protein
    
    seq_series = seq_series.dropna(thresh = 5)
    
    vseq, vgroup = seq_series.align(group_series.dropna(), join = 'inner', axis = 0)
    
    nseq_ser = vseq.apply(append_seq, axis = 1)
    nseqs = sorted(nseq_ser.to_dict().items())
    
    trop_dict = vgroup.to_dict()
    #print nseqs
    #try:
    #    tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True)
    #except:
    #    return False
    #print 'treeing', fname
    tree = TreeingTools.run_FastTree(nseqs, alphabet=alpha, uniq_seqs=True)
    
    with open(fname, 'w') as handle:
        tree.write(handle, schema='newick')
    return True
    
    
    try:
        tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True)
        benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50)
    except:
        return kwargs
    
    benj_res.update(kwargs)
    try:
        out = TreeingTools.evaluate_association_index(tree, trop_dict)
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out
    except:
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = (None, None, None)
        
    return benj_res
def test_fast_tree():

    seqs = tree_seqs()
    tree = TreeingTools.run_FastTree(seqs, alphabet=generic_dna)
    for tst in check_tree(tree):
        yield tst
wanted_pat = pat_data[cols.keys()].dropna()
wanted_scores = ltr_df_cp[score_cols+seq_cols].dropna()
wanted_scores['TFJoin'] = wanted_scores[seq_cols].apply(lambda x: ''.join(x), axis=1)
#wanted_scores = wanted_scores.drop(seq_cols, axis=1)

check_data = pd.concat(wanted_pat.align(wanted_scores, axis=0, join='inner'), axis=1).rename(columns = cols)
check_data = check_data.fillna(check_data[score_cols].min())

ncols = dict((col, col.replace('-', '_').replace('/', '_')) for col in check_data.columns)
check_data = check_data.rename(columns = ncols)

# <codecell>

import TreeingTools

tree = TreeingTools.run_FastTree(check_data['TFJoin'].to_dict().items(),
                                 alphabet=TreeingTools.generic_dna)

# <codecell>

import networkx as nx
from itertools import combinations
import csv
with open('ltr_tree.nwk', 'w') as handle:
    tree.write_to_stream(handle, schema = 'phylip', exclude_chars=True)

# <codecell>



# <codecell>