import matplotlib.pyplot as plt import sys sys.path.append("/home/will/PySeqUtils/") sys.path.append("/home/will/PatientPicker/") os.chdir("/home/will/HIVSystemsBio/MoreCytokineAnalysis/") # <codecell> raw_cyto_data = pd.read_csv("../NewCytokineAnalysis/CytoRawData.csv", sep="\t") # <codecell> import LoadingTools redcap_data = LoadingTools.load_redcap_data() redcap_data = redcap_data.groupby(["Patient ID", "VisitNum"]).first() # <codecell> def count_with_skips(inser, nskips): skips = 0 for row in inser.values: if row: skips = 0 else: skips += 1 if skips > nskips: return False return True
# -*- coding: utf-8 -*- # <nbformat>3.0</nbformat> # <codecell> import pandas as pd import sys import os, os.path sys.path.append('/home/will/PatientPicker/') import LoadingTools # <codecell> redcap= LoadingTools.load_redcap_data().set_index(['Patient ID', 'VisitNum']) # <codecell> admit_no_drugs = [('Current-Drug-Use-NO', redcap['Current Drug use']=='No'), ('Current-Drug-Use-NEVER', redcap['Current Drug use']=='Never'), ('Date-Stopped-Drug-Use', redcap['Date Stopped Drug Use']<redcap['Date Of Visit']), ('Drug-Use-And-HIV-Status-BEFORE', redcap['Drug Use And HIV Status']=='Used after HIV+')] test_cols = [col for col in redcap.columns if col.startswith('Test-')] admit_cols = [col for col in redcap.columns if (col.startswith('Admit-') and ('None' not in col))] ever_test = redcap[test_cols].groupby(level='Patient ID').agg('any') admit_no_drug_df = pd.concat([redcap[admit_cols], pd.DataFrame(dict(admit_no_drugs))], axis = 1) # <codecell> tmp = pd.concat(admit_no_drug_df.align(ever_test, axis=0, level='Patient ID'), 1)
from scipy.stats import linregress import matplotlib.pyplot as plt from matplotlib import dates import pandas as pd import gspread from StringIO import StringIO import csv import sys sys.path.append('/home/will/PatientPicker/') sys.path.append('/home/will/PySeqUtils/') import LoadingTools from GeneralSeqTools import fasta_reader, fasta_writer, seq_align_to_ref # <codecell> pat_data = LoadingTools.load_redcap_data().groupby(['Patient ID', 'VisitNum']).first() # <codecell> mask = pat_data['Current Tobacco Use'] == 'No' pat_data['Tobacco Use (packs/year)'][mask] = pat_data['Tobacco Use (packs/year)'][mask].fillna(0) ewms = partial(pd.ewma, span = 2) pat_data['Smoothed-Tobacco-Use'] = pat_data['Tobacco Use (packs/year)'].groupby(level = 'Patient ID').transform(ewms) # <codecell> cols = ['Tobacco Use (packs/year)', 'Smoothed-Tobacco-Use','Date Of Visit'] grouper = pat_data[cols].dropna().groupby(level = 'Patient ID') fig, axs = plt.subplots(2, 1, figsize = (10, 10)) for pat, group in grouper: if len(group) > 3:
# <codecell> import pandas as pd import sys import os, os.path sys.path.append("/home/will/PatientPicker/") # <codecell> import LoadingTools # <codecell> redcap_data = LoadingTools.load_redcap_data().set_index(["Patient ID", "VisitNum"]) cyto_data = pd.read_csv("/home/will/HIVSystemsBio/NewCytokineAnalysis/CytoRawData.csv", sep="\t") cyto_data["HasCyto"] = True has_cyto = cyto_data.groupby(["Patient ID", "VisitNum"])[["HasCyto"]].all() # <codecell> cols = ["Psychomotor Speed Score", "Memory Recall Score", "Constructional Score", "TMHDS"] redcap_data["Psychomotor Speed Score"].unique() # <codecell> import glob files = glob.glob("/home/will/HIVReportGen/Data/PatientFasta/*.fasta") seqs = []