def load(): df = pd.read_csv(os.path.join(basedir, 'Businesses_Registered_in_San_Francisco_-_Active.csv')) df.rename(columns = lambda x: re.sub(' ', '_', x), inplace = True) df['Class_Code'] = np.int32(df.Class_Code) df['PBC_Code'] = np.int32(df.PBC_Code) '''Convert lat, long from string to float''' p = re.compile('\d*\.\d*') LocX = []; LocY = []; ix = [] for i, j in df.iterrows(): if pd.notnull(j['Location']): x, y = np.float32(p.findall(j['Location'])) LocX.append(x); LocY.append(y) ix.append(i) df['LocX'] = pd.Series(LocX, index = ix) df['LocY'] = pd.Series(LocY, index = ix) '''Add description field from PBC code descriptions''' pbc2descr = dict(np.loadtxt('pbc_codes.csv', 'S', delimiter = ',')) Descr = [] for i in df.PBC_Code: try: Descr.append(pbc2descr[str(i)]) except: Descr.append('UNKNOWN') df['Descript'] = Descr '''Add founded date from str to datetime''' tmp = [(i, misc.str2date(str(j['DBA_Start_Date']), delimiter = '', format = 'YYYYMMDD')) for i, j in df.iterrows()] ix, dates = zip(*tmp) df['Founded'] = pd.Series(dates, index = ix) return df
def fileconvert_all(studydir): ''' Converts all of the text file outputs in */Gap/data and converts them to pandas formatted hdf5 files. Stores the results in */Gap/fileconversion ''' # dobs = pd.read_csv(os.path.join(studydir, 'dobs.csv')) pairpulsedir = os.path.join(studydir, 'pairpulse') if not os.path.exists(pairpulsedir): os.mkdir(pairpulsedir) animalpaths = glob.glob(os.path.join(studydir, 'data', 'PPI', '[0-9]*')) for animalpath in animalpaths: fpaths = glob.glob(os.path.join(animalpath, '[A-Za-z]*.txt')) for fpath in fpaths: absol, relat = os.path.split(fpath) animalID, gen, condition, mo, da, yr, _ = relat.split('_') animalinfo = get_animalinfo(animalID, studydir) dob_str = animalinfo.DOB.values[0] dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY') if hasattr(animalinfo, 'date1'): date1_str = animalinfo.date1.values[0] date1 = misc.str2date(date1_str, delimiter = '/', format = 'MMDDYYYY') mo = '%2.2u' % int(mo) da = '%2.2u' % int(da) yr = '%4.4u' % int(yr) sess_str = '_'.join((yr, mo, da)) sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD') age = (sess_date - dob).days if hasattr(animalinfo, 'date1'): postdate1 = (sess_date - date1).days outpath = os.path.join(pairpulsedir, '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da))) if not os.path.exists(outpath): gapratio = fileconvert(fpath) if hasattr(animalinfo, 'date1'): df = pd.DataFrame(dict(gapratio = gapratio, animalID = animalID, gen = gen, condition = condition, sess = sess_str, age = age, postdate1 = postdate1)) else: df = pd.DataFrame(dict(gapratio = gapratio, animalID = animalID, gen = gen, condition = condition, sess = sess_str, age = age)) df.to_csv(outpath)
def fileconvert_processed(studydir): freqs = [5000, 7071, 10000, 14142, 20000, 28284] gapdetectiondir = os.path.join(studydir, 'gapdetection') if not os.path.exists(gapdetectiondir): os.mkdir(gapdetectiondir) cagepaths = glob.glob(os.path.join(studydir, 'data', 'Gap', '[0-9]*')) dobs = pd.read_csv(os.path.join(studydir, 'dobs.csv')) for cagepath in cagepaths: absol, cageID = os.path.split(cagepath) animalIDs = [i for i in dobs.animalID if cageID in i] fpaths = glob.glob(os.path.join(cagepath, '*.txt')) for fpath in fpaths: df = pd.read_csv(fpath, usecols=range(6), sep='\t', header=None, nrows=len(animalIDs)) df.index = animalIDs absol, relat = os.path.split(fpath) if relat.startswith('_'): _, gen, condition, mo, da, yr, _ = relat[1:].split('_') else: _, gen, condition, mo, da, yr, _ = relat.split('_') for animalID in animalIDs: newrelat = '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da)) if relat.startswith('_'): newrelat = '_' + newrelat outpath = os.path.join(studydir, 'gapdetection', newrelat) # skip if the file already exists if os.path.exists(outpath): continue gapratio = df.ix[animalID].values # start building the dataframe dict d = OrderedDict() # add required fields (gapratio, animalID, genotype, condition, session date, age) d.update(dict(freq=freqs, gapratio=gapratio, animalID=animalID, gen=gen, condition=condition)) # load the animal DOB, group, etc. from the dobs.csv file animalinfo = get_animalinfo(animalID, studydir) # get the date of birth dob_str = animalinfo.DOB.values[0] dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY') animalinfo['DOB'] = dob # how old was the animal when this session was run? sess_str = '_'.join((yr, mo, da)) sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD') age = (sess_date - dob).days d.update(dict(sess = sess_date, age = age)) # how many days was this session from each "date" column in the animalinfo? dateinfo = animalinfo.filter(regex='date*') d_postdate = OrderedDict() for key, value in dateinfo.iteritems(): date = misc.str2date(value.values[0], delimiter='/', format='MMDDYYYY') d_postdate.update({'post'+key: (sess_date-date).days}) d.update(d_postdate) # add all supplementary animalinfo fields for key, value in animalinfo.iteritems(): d.update({key: value.values[0]}) pd.DataFrame(d).to_csv(outpath)
def fileconvert_all(studydir): ''' Converts all of the text file outputs in */Gap/data and converts them to pandas formatted hdf5 files. Stores the results in */Gap/fileconversion ''' gapdetectiondir = os.path.join(studydir, 'gapdetection') if not os.path.exists(gapdetectiondir): os.mkdir(gapdetectiondir) # loop through all animals animalpaths = glob.glob(os.path.join(studydir, 'data', 'Gap', '[0-9]*')) for animalpath in animalpaths: fpaths = glob.glob(os.path.join(animalpath, '*.txt')) for fpath in fpaths: absol, relat = os.path.split(fpath) if relat.startswith('_'): animalID, gen, condition, mo, da, yr, _ = relat[1:].split('_') else: animalID, gen, condition, mo, da, yr, _ = relat.split('_') newrelat = '%s.csv' % '_'.join((animalID, gen, condition, yr, mo, da)) if relat.startswith('_'): newrelat = '_' + newrelat outpath = os.path.join(studydir, 'gapdetection', newrelat) # skip if the file already exists if os.path.exists(outpath): continue # calculate gapratio df = fileconvert(fpath) df['animalID'] = animalID df['gen'] = gen df['condition'] = condition # load the animal DOB, group, etc. from the dobs.csv file animalinfo = get_animalinfo(animalID, studydir) # get the date of birth dob_str = animalinfo.DOB.values[0] dob = misc.str2date(dob_str, delimiter = '/', format = 'MMDDYYYY') animalinfo['DOB'] = dob # how old was the animal when this session was run? sess_str = '_'.join((yr, mo, da)) sess_date = misc.str2date(sess_str, delimiter = '_', format = 'YYYYMMDD') age = (sess_date - dob).days df['sess'] = sess_date df['age'] = age # d.update(dict(sess = sess_date, age = age)) # how many days was this session from each "date" column in the animalinfo? dateinfo = animalinfo.filter(regex='date*') d_postdate = OrderedDict() for key, value in dateinfo.iteritems(): date = misc.str2date(value.values[0], delimiter='/', format='MMDDYYYY') df['post'+key] = (sess_date-date).days # add all supplementary animalinfo fields for key, value in animalinfo.iteritems(): df[key] = value.values[0] df.to_csv(outpath)