Beispiel #1
0
def insert_co_exp_ids(profile_ids, modalities, db_con_1, db_con_2):
    """
    Scan high frequency type modalities
    and extract mutually existent exp ids.
    Save it to CSV file.
    """
    print('\twriting co_exp_ids for sensor data')
    for profile_id in profile_ids:
        high_interval_mods = filter(lambda x: info.MOD_FREQ_TYPE[x] == info.FREQ_HIGH, modalities)
        co_exp_ids = []
        for mod in high_interval_mods:
            exp_ids = loader.load_exp_ids(profile_id, mod, filtered=False, server_index=1, db_con=db_con_1, close=False)
            if len(exp_ids) > 0:
                co_exp_ids.append(pd.DataFrame([0] * len(exp_ids), index=exp_ids, columns=[mod]))
        co_exp_ids = pd.concat(co_exp_ids, axis=1)
        co_exp_ids = co_exp_ids.dropna()
        co_exp_ids = list(co_exp_ids.index)
        co_exp_ids.sort()

        done_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False)
        co_exp_ids = filter(lambda x: x not in done_ids, co_exp_ids)
        if len(co_exp_ids) == 0:
            print profile_id, "all co_exp_ids are already inserted!"
            continue

        df = DataFrame(co_exp_ids, columns=['expId'])
        df['profile_id'] = profile_id
        df.to_sql("co_exp_ids", db_con_2, flavor='mysql', if_exists='append', index=False)
        print('\t\t%s number of exp ids of user %s are successfully inserted!' % (len(df), profile_id))
Beispiel #2
0
def insert_time_lines(profile_ids, modalities, db_con_1, db_con_2):
    """
    Scan time lines for high frequency type and sensitive modalities and check time jumping.
    If time jumping is identified on at least one modality, then time series is divided into partitions.
    If some partitions are shorter than parameter value, they are dropped.
    At result, survived sub time lines are saved to CSV file.
    """
    print('\tidentifying overlapped time line for sensor data')
    for profile_id in profile_ids:
        done_df = loader.load_time_lines(profile_id, db_con=db_con_2, close=False)
        exp_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False)
        exp_ids = filter(lambda x: x not in done_df['expId'].values, exp_ids)

        for exp_id in exp_ids:
            sensitive_mods = filter(lambda x: x in info.SENSITIVE_MOD, modalities)
            print('%s' % exp_id)
            mods_logs = loader.load_normally_collected_mods_logs(
                profile_id, exp_id, sensitive_mods, db_con=db_con_1, close=False)
            time_line_df = get_overlap_time_line(mods_logs)
            if len(time_line_df) == 0:
                continue

            time_line_df['expId'] = exp_id
            time_line_df.loc[:, 'start_time'] = map(lambda x: x.total_seconds(),
                                                    list(time_line_df['start_time'] - datetime.datetime(1970, 1, 1)))
            time_line_df.loc[:, 'end_time'] = map(lambda x: x.total_seconds(),
                                                  list(time_line_df['end_time'] - datetime.datetime(1970, 1, 1)))
            time_line_df['duration'] = time_line_df['end_time'] - time_line_df['start_time']
            time_line_df['count'] = (time_line_df['duration'] / param.UNIT_TIME).round()
            time_line_df = time_line_df.reindex(columns=['expId', 'start_time', 'end_time', 'duration', 'count'])

            done_df = pd.concat([done_df, time_line_df], axis=0)
            done_df = done_df.reset_index(drop=True)
            done_df = done_df.reset_index()
            done_df.columns = ['chunkId', 'expId', 'start_time', 'end_time', 'duration', 'count']
            done_df['profile_id'] = profile_id

            df_to_insert = done_df.query('expId = %s' % exp_id)
            df_to_insert.to_sql("time_lines", db_con_2, flavor='mysql', if_exists='append', index=False)
            print('\t\t%s number of time lines of user %s, expId %s are successfully inserted!'
                  % (len(df_to_insert), profile_id, exp_id))