def calculate_region(arg):
    gname, sub, prot, start, win, nseqs, trop_dict = arg
    
    treename = 'quicktrees/%s-%s-%s-%i-%i.tree' % (gname, sub, prot, start, win)
    matfname = 'quicktrees/%s-%s-%s-%i-%i.pkl' % (gname, sub, prot, start, win)
    
    if os.path.exists(treename):
        #benj_res = 'Already Processed'
        #return gname, sub, prot, win, start, benj_res
        
        with open(matfname) as handle:
            dmat = pickle.load(handle)
            
        with open(treename) as handle:
            tree = dendropy.Tree.get_from_stream(handle, 'newick')
        
    else:
        
        is_aa = prot != 'LTR'
        alphabet = generic_protein if is_aa else generic_dna
        
        try:
            tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alphabet)
        except ValueError:
            benj_res = 'Too few unique sequences to process'
            return gname, sub, prot, win, start, benj_res
        except:
            benj_res = 'uncaught exception in dist-mat'
            return gname, sub, prot, win, start, benj_res
        print 'writing'
        with open(matfname, 'w') as handle:
            pickle.dump(dmat, handle)
        with open(treename, 'w') as handle:
            tree.write_to_stream(handle, 'newick')
    
    try:
        benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50)
    except AssertionError:
        benj_res = 'too few groups'
        return  gname, sub, prot, win, start, benj_res
    except:
        benj_res = 'uncaught exception'
        return  gname, sub, prot, win, start, benj_res
    
    
    try:
        out = TreeingTools.evaluate_association_index(tree, trop_dict)
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out
    except:
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = ('error', 'error', 'error')
    
    return gname, sub, prot, win, start, benj_res
def rolling_tree_apply(tup):
    
    group_series, seq_series, kwargs = tup
    
    fname = '/home/will/SubCData/Trees/Tree-%(sub)s-%(Prot)s-%(Start)i-%(WinSize)i.newick' % kwargs
    if os.path.exists(fname):
        return True
    
    
    alpha = generic_dna if kwargs['Prot'] == 'LTR' else generic_protein
    
    seq_series = seq_series.dropna(thresh = 5)
    
    vseq, vgroup = seq_series.align(group_series.dropna(), join = 'inner', axis = 0)
    
    nseq_ser = vseq.apply(append_seq, axis = 1)
    nseqs = sorted(nseq_ser.to_dict().items())
    
    trop_dict = vgroup.to_dict()
    #print nseqs
    #try:
    #    tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True)
    #except:
    #    return False
    #print 'treeing', fname
    tree = TreeingTools.run_FastTree(nseqs, alphabet=alpha, uniq_seqs=True)
    
    with open(fname, 'w') as handle:
        tree.write(handle, schema='newick')
    return True
    
    
    try:
        tree, dmat = TreeingTools.phylip_tree_collapse_unique(nseqs, alphabet=alpha, use_fast=True)
        benj_res = TreeingTools.check_distance_pvals(dmat, trop_dict, nreps = 50)
    except:
        return kwargs
    
    benj_res.update(kwargs)
    try:
        out = TreeingTools.evaluate_association_index(tree, trop_dict)
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = out
    except:
        benj_res['AI'], benj_res['AI-pval'], benj_res['AI-null'] = (None, None, None)
        
    return benj_res
# <codecell>


# <codecell>

#with open('allgp120.fasta', 'w') as handle:
tres = []
for key, row in wanted_data[['gp120-seq-align', 'Tropism']].dropna().iterrows():
    oname = key+'-'+row['Tropism']
    tres.append((oname, ''.join(row['gp120-seq-align'])))
    
    

# <codecell>

tree, dmat = TreeingTools.phylip_tree_collapse_unique(tres, alphabet=generic_protein)

# <codecell>

with open('gp120tree.nexus', 'w') as handle:
    tree.write_to_stream(handle, 'nexus')

# <codecell>

import networkx
with open('gp120tree.dot') as handle:
    new_tree = networkx.read_dot(handle)

# <codecell>

pos = networkx.spring_layout(new_tree, dim=100)