import matplotlib.pyplot as plt
import sys

sys.path.append("/home/will/PySeqUtils/")
sys.path.append("/home/will/PatientPicker/")
os.chdir("/home/will/HIVSystemsBio/MoreCytokineAnalysis/")

# <codecell>

raw_cyto_data = pd.read_csv("../NewCytokineAnalysis/CytoRawData.csv", sep="\t")

# <codecell>

import LoadingTools

redcap_data = LoadingTools.load_redcap_data()
redcap_data = redcap_data.groupby(["Patient ID", "VisitNum"]).first()

# <codecell>


def count_with_skips(inser, nskips):
    skips = 0
    for row in inser.values:
        if row:
            skips = 0
        else:
            skips += 1
        if skips > nskips:
            return False
    return True
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

import pandas as pd
import sys
import os, os.path

sys.path.append('/home/will/PatientPicker/')
import LoadingTools

# <codecell>

redcap= LoadingTools.load_redcap_data().set_index(['Patient ID', 'VisitNum'])

# <codecell>

admit_no_drugs = [('Current-Drug-Use-NO', redcap['Current Drug use']=='No'),
                  ('Current-Drug-Use-NEVER', redcap['Current Drug use']=='Never'),
                  ('Date-Stopped-Drug-Use', redcap['Date Stopped Drug Use']<redcap['Date Of Visit']),
                  ('Drug-Use-And-HIV-Status-BEFORE', redcap['Drug Use And HIV Status']=='Used after HIV+')]
test_cols = [col for col in redcap.columns if col.startswith('Test-')]
admit_cols = [col for col in redcap.columns if (col.startswith('Admit-') and ('None' not in col))]
ever_test = redcap[test_cols].groupby(level='Patient ID').agg('any')

admit_no_drug_df = pd.concat([redcap[admit_cols], pd.DataFrame(dict(admit_no_drugs))], axis = 1)

# <codecell>

tmp = pd.concat(admit_no_drug_df.align(ever_test, axis=0, level='Patient ID'), 1)
from scipy.stats import linregress
import matplotlib.pyplot as plt
from matplotlib import dates
import pandas as pd
import gspread
from StringIO import StringIO
import csv
import sys
sys.path.append('/home/will/PatientPicker/')
sys.path.append('/home/will/PySeqUtils/')
import LoadingTools
from GeneralSeqTools import fasta_reader, fasta_writer, seq_align_to_ref

# <codecell>

pat_data = LoadingTools.load_redcap_data().groupby(['Patient ID', 'VisitNum']).first()

# <codecell>

mask = pat_data['Current Tobacco Use'] == 'No'
pat_data['Tobacco Use (packs/year)'][mask] = pat_data['Tobacco Use (packs/year)'][mask].fillna(0)
ewms = partial(pd.ewma, span = 2)
pat_data['Smoothed-Tobacco-Use'] = pat_data['Tobacco Use (packs/year)'].groupby(level = 'Patient ID').transform(ewms)

# <codecell>

cols = ['Tobacco Use (packs/year)', 'Smoothed-Tobacco-Use','Date Of Visit']
grouper = pat_data[cols].dropna().groupby(level = 'Patient ID')
fig, axs = plt.subplots(2, 1, figsize = (10, 10))
for pat, group in grouper: 
    if len(group) > 3:
# <codecell>

import pandas as pd
import sys
import os, os.path

sys.path.append("/home/will/PatientPicker/")

# <codecell>

import LoadingTools

# <codecell>

redcap_data = LoadingTools.load_redcap_data().set_index(["Patient ID", "VisitNum"])
cyto_data = pd.read_csv("/home/will/HIVSystemsBio/NewCytokineAnalysis/CytoRawData.csv", sep="\t")
cyto_data["HasCyto"] = True
has_cyto = cyto_data.groupby(["Patient ID", "VisitNum"])[["HasCyto"]].all()

# <codecell>

cols = ["Psychomotor Speed Score", "Memory Recall Score", "Constructional Score", "TMHDS"]
redcap_data["Psychomotor Speed Score"].unique()

# <codecell>

import glob

files = glob.glob("/home/will/HIVReportGen/Data/PatientFasta/*.fasta")
seqs = []