Example #1
0
def insert_time_lines(profile_ids, modalities, db_con_1, db_con_2):
    """
    Scan time lines for high frequency type and sensitive modalities and check time jumping.
    If time jumping is identified on at least one modality, then time series is divided into partitions.
    If some partitions are shorter than parameter value, they are dropped.
    At result, survived sub time lines are saved to CSV file.
    """
    print('\tidentifying overlapped time line for sensor data')
    for profile_id in profile_ids:
        done_df = loader.load_time_lines(profile_id, db_con=db_con_2, close=False)
        exp_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False)
        exp_ids = filter(lambda x: x not in done_df['expId'].values, exp_ids)

        for exp_id in exp_ids:
            sensitive_mods = filter(lambda x: x in info.SENSITIVE_MOD, modalities)
            print('%s' % exp_id)
            mods_logs = loader.load_normally_collected_mods_logs(
                profile_id, exp_id, sensitive_mods, db_con=db_con_1, close=False)
            time_line_df = get_overlap_time_line(mods_logs)
            if len(time_line_df) == 0:
                continue

            time_line_df['expId'] = exp_id
            time_line_df.loc[:, 'start_time'] = map(lambda x: x.total_seconds(),
                                                    list(time_line_df['start_time'] - datetime.datetime(1970, 1, 1)))
            time_line_df.loc[:, 'end_time'] = map(lambda x: x.total_seconds(),
                                                  list(time_line_df['end_time'] - datetime.datetime(1970, 1, 1)))
            time_line_df['duration'] = time_line_df['end_time'] - time_line_df['start_time']
            time_line_df['count'] = (time_line_df['duration'] / param.UNIT_TIME).round()
            time_line_df = time_line_df.reindex(columns=['expId', 'start_time', 'end_time', 'duration', 'count'])

            done_df = pd.concat([done_df, time_line_df], axis=0)
            done_df = done_df.reset_index(drop=True)
            done_df = done_df.reset_index()
            done_df.columns = ['chunkId', 'expId', 'start_time', 'end_time', 'duration', 'count']
            done_df['profile_id'] = profile_id

            df_to_insert = done_df.query('expId = %s' % exp_id)
            df_to_insert.to_sql("time_lines", db_con_2, flavor='mysql', if_exists='append', index=False)
            print('\t\t%s number of time lines of user %s, expId %s are successfully inserted!'
                  % (len(df_to_insert), profile_id, exp_id))
Example #2
0
def get_targets_under_constraint(profile_id, const_index=0, policy=None):
    tl_df = loader.load_time_lines(profile_id, const_index=const_index, policy=policy, db_con=None, close=False)