Exemple #1
0
def write_mod_chunk_feats(profile_id, modalities, exp_chunk_pairs,
                          permission_free=True, unit_time=param.UNIT_TIME, filtered=True, server_index=2):
    """
    Extract some user's chunk features for specific modalities.
    It automatically checks already computed chunks and skip this chunks.
    For not computed yet chunks and modalities, every time it computes features, over writes the file.
    """
    exp_ids = list(pd.Series(map(lambda x: x[0], exp_chunk_pairs)).unique())
    for mod in modalities:
        if not os.path.exists('%s/id_%s/%s/' % (param.FEATURE_PATH, profile_id, mod)):
            os.makedirs('%s/id_%s/%s/' % (param.FEATURE_PATH, profile_id, mod))

    exp_chunk_map = {}
    for pair in exp_chunk_pairs:
        if pair[0] in exp_chunk_map.keys():
            exp_chunk_map[pair[0]].append(pair[1])
        else:
            exp_chunk_map[pair[0]] = [pair[1]]

    ### for removing files ###
    # for mod in modalities:
    #     file_list = os.listdir('data_set/features/id_%s/%s/' % (profile_id, mod))
    #     for file_name in file_list:
    #         os.remove('data_set/features/id_%s/%s/%s' % (profile_id, mod, file_name))

    for exp_id in exp_ids[:100]:
        mod_features = {}
        for mod in modalities:
            if info.MOD_FREQ_TYPE[mod] == info.FREQ_HIGH or info.MOD_FREQ_TYPE[mod] == info.FREQ_EVENT_DRIVEN:
                exp_features = loader.read_csv_chunk_features(profile_id, mod, exp_id) if os.path.isfile(
                        '%s/id_%s/%s/exp_%s.csv' % (param.FEATURE_PATH, profile_id, mod, exp_id)) else None
                mod_features[mod] = exp_features

        for chunk_id in exp_chunk_map[exp_id]:
            conditioning_info = engineer.get_ready_for_conditioning(profile_id, (exp_id, chunk_id),
                                                                    permission_free, unit_time, filtered=filtered,
                                                                    server_index=server_index)
            for mod in mod_features.keys():
                if mod_features[mod] is not None:
                    unique_chunk_ids = pd.Series(mod_features[mod].columns).unique()
                    if len(unique_chunk_ids) < len(mod_features[mod].columns):
                        unique_chunk_ids = list(unique_chunk_ids)
                        unique_chunk_ids.sort()
                        mod_features[mod] = mod_features[mod].loc[:, unique_chunk_ids]
                        mod_features[mod].to_csv(
                                "%s/id_%s/%s/exp_%s.csv" % (param.FEATURE_PATH, profile_id, mod, exp_id))

                    if chunk_id in mod_features[mod].columns:
                        print("\t\t%s, %s, %s already done." % (exp_id, chunk_id, mod))
                        continue
                print("\t\t%s, %s, %s" % (exp_id, chunk_id, mod))
                feat_series = engineer.extract_feats(profile_id, mod, exp_ids=[exp_id], chunk_ids=[chunk_id],
                                                     conditioning_info=conditioning_info,
                                                     permission_free=permission_free, unit_time=unit_time,
                                                     filtered=filtered, server_index=server_index)
                if mod_features[mod] is None:
                    mod_features[mod] = DataFrame(index=feat_series.index, columns=[])
                    mod_features[mod].columns.name = 'chunk'
                mod_features[mod][chunk_id] = feat_series
                mod_features[mod].to_csv("%s/id_%s/%s/exp_%s.csv" % (param.FEATURE_PATH, profile_id, mod, exp_id))
Exemple #2
0
def write_feats(profile_id, modality, field=None, exp_ids=None, chunk_ids=None, filtered=True, server_index=2):
    """
    Extract some user's aggregated features for specific modality with (field, exp_ids, chunk_ids) conditions.
    If a condition is None, then it is ignored.
    This method is used mostly for Event-driven or One-time type data.
    Created file is saved on FEATURE_PATH.
    """
    path = '%s/%s%s/%s/' % (param.FEATURE_PATH, param.FEATURE_INNER_PREFIX, profile_id, modality)
    if not os.path.exists(path):
        os.makedirs(path)

    file_name = ''
    if field is not None:
        file_name += '%s_' % field
    if exp_ids is not None:
        file_name += '%s-exps' % len(exp_ids)
    elif chunk_ids is not None:
        file_name += '%s-chunks' % len(chunk_ids)
    else:
        file_name += 'all-chunks'
    file_name += 'New' if param.FEATURE_SET == param.FEATURE_SET_EXTENSION_APP else ''
    wrong_name = file_name
    file_name += ".csv"

    if not os.path.isfile(path + file_name):
        feat_df = engineer.extract_feats(profile_id, modality, field_name=field, exp_ids=exp_ids,
                                         chunk_ids=chunk_ids, filtered=filtered, server_index=server_index)
        feat_df.to_csv(path + file_name)
        if os.path.isfile(path + wrong_name):
            os.remove(path + wrong_name)
    else:
        print('%s already exist!' % (path + file_name))