Exemple #1
0
def read_unformatted():
    home_id_dict = util.get_homeid_dict()
    files = glob.glob(
        util.get_path('daily_reading', 'raw_data', 'all') +
        'xlsx_unformatted/*.xlsx')
    for f in files[:1]:
        filename = f[f.rfind('/') + 1:]
        tokens = util.split_string([' ', '_', '.', '-'], filename)
        home_id = 'UNKNOWN'
        for x in tokens:
            if x in home_id_dict:
                home_id = home_id_dict[x]
        df = pd.read_excel(f, sheetname=0)
        if filename == 'TC_D2_Romer_Manual readings -logs_ - Sail boat_11-1_11-18.xlsx':
            df.rename(columns={
                'Date': 'date',
                'Time': 'time',
                'Action: observations and behaviour': 'activity'
            },
                      inplace=True)
            df.dropna(axis=0, how='all', inplace=True)
            pattern = re.compile('[0-9]{1,2}')
            df['date'] = df['date'].ffill()
            df['time'] = df['time'].ffill()
            df.dropna(subset=['date', 'time'], axis=0, how='any', inplace=True)
            df['time'] = df['time'].map(remove_ampm)
            df['date'] = df['date'].map(
                lambda x: 'Nov' if x == 'Nov' else '2016-11-{0}'.format(
                    re.match(pattern, x).group()))
            df = df[['date', 'time', 'activity']]
        if filename == 'DHP Log - DHP-HP-Daily Readings week of Jan 11, 2016.xlsx':
            df = df.ix[19:23, [0, 1, 8]]
            df.rename(columns={
                'Date': 'date',
                'Unnamed: 1': 'time',
                datetime.datetime(2016, 1, 14, 0, 0): 'activity'
            },
                      inplace=True)
            df['time'] = df['time'].map(remove_ampm)
            df.info()
            print df.head()
        lastdate = df['date'].tolist()[-1]
        print type(lastdate)
        if type(lastdate) == datetime.datetime:
            timestr = lastdate.strftime('%m-%d-%Y')
        elif type(lastdate) == pd.tslib.Timestamp:
            timestr = '{0}-{1}-{2}'.format(lastdate.month, lastdate.day,
                                           lastdate.year)
        else:
            timestr = lastdate
        outfile = 'activity_{0}_{1}.csv'.format(home_id, timestr)
        print 'write to {0}'.format(outfile)
        df.to_csv(util.get_path('daily_reading', 'activity_stamp', 'all') +
                  '{0}'.format(outfile),
                  index=False)
Exemple #2
0
def read_formatted():
    # df_lookup = pd.read_csv(os.getcwd() + '/input/log_rename.csv')
    # df_lookup.set_index('oldname', inplace=True)
    home_id_dict = util.get_homeid_dict()
    files = glob.glob(
        util.get_path('daily_reading', 'raw_data', 'all') +
        'xlsx_formatted/*.xlsx')
    lastline_dict = \
        {'LCMP Log_Observation-Incident Report_V6.JJN.xlsx': 32,
         'Copy of D4-CMU-Daily Readings_RTto24jan2016.xlsx': 16,
         'D4-Hartkopf_Loftness-Daily Readings.xlsx': 14}
    sheets_dict = {
        'D4-Hartkopf_Loftness-Daily Readings.xlsx': [0, 1],
        'Copy of D4-CMU-Daily Readings_RTto24jan2016.xlsx': [0]
    }
    for f in files:
        sheetlist = [1]
        filename = f[f.rfind('/') + 1:]
        tokens = util.split_string([' ', '_', '.', '-'], filename)
        if filename in sheets_dict:
            sheetlist = sheets_dict[filename]
        home_id = 'UNKNOWN'
        for x in tokens:
            if x in home_id_dict:
                home_id = home_id_dict[x]
        for s in sheetlist:
            idx_lastline = 33
            df = pd.read_excel(f, sheetname=s)
            if filename in lastline_dict:
                idx_lastline = lastline_dict[filename]
            df2 = df.transpose().iloc[:, [1, 2, idx_lastline]]
            df2.dropna(subset=[idx_lastline], inplace=True)
            df2.rename(columns={
                1: 'date',
                2: 'time',
                33: 'activity'
            },
                       inplace=True)
            df2.drop(df2.index[0], axis=0, inplace=True)
            timestr = df2.ix[-1, 'date'].strftime('%m-%d-%Y')
            outfile = 'activity_{0}_{1}.csv'.format(home_id, timestr)
            print 'write to {0}'.format(outfile)
            df2.to_csv(
                util.get_path('daily_reading', 'activity_stamp', 'all') +
                '{0}'.format(outfile),
                index=False)
    return
Exemple #3
0
def read_unformatted():
    home_id_dict = util.get_homeid_dict()
    files = glob.glob(util.get_path('daily_reading', 'raw_data', 'all') + 'xlsx_unformatted/*.xlsx')
    for f in files[:1]:
        filename = f[f.rfind('/') + 1:]
        tokens = util.split_string([' ', '_', '.', '-'], filename)
        home_id = 'UNKNOWN'
        for x in tokens:
            if x in home_id_dict:
                home_id = home_id_dict[x]
        df = pd.read_excel(f, sheetname=0)
        if filename == 'TC_D2_Romer_Manual readings -logs_ - Sail boat_11-1_11-18.xlsx':
            df.rename(columns={'Date': 'date', 'Time': 'time',
                               'Action: observations and behaviour':
                               'activity'}, inplace=True)
            df.dropna(axis=0, how='all', inplace=True)
            pattern = re.compile('[0-9]{1,2}')
            df['date'] = df['date'].ffill()
            df['time'] = df['time'].ffill()
            df.dropna(subset=['date', 'time'], axis=0, how='any',
                      inplace=True)
            df['time'] = df['time'].map(remove_ampm)
            df['date'] = df['date'].map(lambda x: 'Nov' if x == 'Nov' else '2016-11-{0}'.format(re.match(pattern, x).group()))
            df = df[['date', 'time', 'activity']]
        if filename == 'DHP Log - DHP-HP-Daily Readings week of Jan 11, 2016.xlsx':
            df = df.ix[19:23, [0, 1, 8]]
            df.rename(columns={'Date': 'date', 'Unnamed: 1': 'time',
                               datetime.datetime(2016, 1, 14, 0, 0):
                               'activity'}, inplace=True)
            df['time'] = df['time'].map(remove_ampm)
            df.info()
            print df.head()
        lastdate = df['date'].tolist()[-1]
        print type(lastdate)
        if type(lastdate) == datetime.datetime:
            timestr = lastdate.strftime('%m-%d-%Y')
        elif type(lastdate) == pd.tslib.Timestamp:
            timestr = '{0}-{1}-{2}'.format(lastdate.month, lastdate.day, lastdate.year)
        else:
            timestr = lastdate
        outfile = 'activity_{0}_{1}.csv'.format(home_id, timestr)
        print 'write to {0}'.format(outfile)
        df.to_csv(util.get_path('daily_reading', 'activity_stamp',
                                'all') + '{0}'.format(outfile),
                  index=False)
Exemple #4
0
def read_formatted():
    # df_lookup = pd.read_csv(os.getcwd() + '/input/log_rename.csv')
    # df_lookup.set_index('oldname', inplace=True)
    home_id_dict = util.get_homeid_dict()
    files = glob.glob(util.get_path('daily_reading', 'raw_data', 'all') + 'xlsx_formatted/*.xlsx')
    lastline_dict = \
        {'LCMP Log_Observation-Incident Report_V6.JJN.xlsx': 32,
         'Copy of D4-CMU-Daily Readings_RTto24jan2016.xlsx': 16,
         'D4-Hartkopf_Loftness-Daily Readings.xlsx': 14}
    sheets_dict = {'D4-Hartkopf_Loftness-Daily Readings.xlsx': [0, 1],
        'Copy of D4-CMU-Daily Readings_RTto24jan2016.xlsx': [0]}
    for f in files:
        sheetlist = [1]
        filename = f[f.rfind('/') + 1:]
        tokens = util.split_string([' ', '_', '.', '-'], filename)
        if filename in sheets_dict:
            sheetlist = sheets_dict[filename]
        home_id = 'UNKNOWN'
        for x in tokens:
            if x in home_id_dict:
                home_id = home_id_dict[x]
        for s in sheetlist:
            idx_lastline = 33
            df = pd.read_excel(f, sheetname=s)
            if filename in lastline_dict:
                idx_lastline = lastline_dict[filename]
            df2 = df.transpose().iloc[:, [1, 2, idx_lastline]]
            df2.dropna(subset=[idx_lastline], inplace=True)
            df2.rename(columns={1: 'date', 2: 'time', 33: 'activity'}, inplace=True)
            df2.drop(df2.index[0], axis=0, inplace=True)
            timestr = df2.ix[-1, 'date'].strftime('%m-%d-%Y')
            outfile = 'activity_{0}_{1}.csv'.format(home_id, timestr)
            print 'write to {0}'.format(outfile)
            df2.to_csv(util.get_path('daily_reading', 'activity_stamp',
                                    'all') + '{0}'.format(outfile),
                    index=False)
    return