def write_mod_chunk_feats(profile_id, modalities, exp_chunk_pairs, permission_free=True, unit_time=param.UNIT_TIME, filtered=True, server_index=2): """ Extract some user's chunk features for specific modalities. It automatically checks already computed chunks and skip this chunks. For not computed yet chunks and modalities, every time it computes features, over writes the file. """ exp_ids = list(pd.Series(map(lambda x: x[0], exp_chunk_pairs)).unique()) for mod in modalities: if not os.path.exists('%s/id_%s/%s/' % (param.FEATURE_PATH, profile_id, mod)): os.makedirs('%s/id_%s/%s/' % (param.FEATURE_PATH, profile_id, mod)) exp_chunk_map = {} for pair in exp_chunk_pairs: if pair[0] in exp_chunk_map.keys(): exp_chunk_map[pair[0]].append(pair[1]) else: exp_chunk_map[pair[0]] = [pair[1]] ### for removing files ### # for mod in modalities: # file_list = os.listdir('data_set/features/id_%s/%s/' % (profile_id, mod)) # for file_name in file_list: # os.remove('data_set/features/id_%s/%s/%s' % (profile_id, mod, file_name)) for exp_id in exp_ids[:100]: mod_features = {} for mod in modalities: if info.MOD_FREQ_TYPE[mod] == info.FREQ_HIGH or info.MOD_FREQ_TYPE[mod] == info.FREQ_EVENT_DRIVEN: exp_features = loader.read_csv_chunk_features(profile_id, mod, exp_id) if os.path.isfile( '%s/id_%s/%s/exp_%s.csv' % (param.FEATURE_PATH, profile_id, mod, exp_id)) else None mod_features[mod] = exp_features for chunk_id in exp_chunk_map[exp_id]: conditioning_info = engineer.get_ready_for_conditioning(profile_id, (exp_id, chunk_id), permission_free, unit_time, filtered=filtered, server_index=server_index) for mod in mod_features.keys(): if mod_features[mod] is not None: unique_chunk_ids = pd.Series(mod_features[mod].columns).unique() if len(unique_chunk_ids) < len(mod_features[mod].columns): unique_chunk_ids = list(unique_chunk_ids) unique_chunk_ids.sort() mod_features[mod] = mod_features[mod].loc[:, unique_chunk_ids] mod_features[mod].to_csv( "%s/id_%s/%s/exp_%s.csv" % (param.FEATURE_PATH, profile_id, mod, exp_id)) if chunk_id in mod_features[mod].columns: print("\t\t%s, %s, %s already done." % (exp_id, chunk_id, mod)) continue print("\t\t%s, %s, %s" % (exp_id, chunk_id, mod)) feat_series = engineer.extract_feats(profile_id, mod, exp_ids=[exp_id], chunk_ids=[chunk_id], conditioning_info=conditioning_info, permission_free=permission_free, unit_time=unit_time, filtered=filtered, server_index=server_index) if mod_features[mod] is None: mod_features[mod] = DataFrame(index=feat_series.index, columns=[]) mod_features[mod].columns.name = 'chunk' mod_features[mod][chunk_id] = feat_series mod_features[mod].to_csv("%s/id_%s/%s/exp_%s.csv" % (param.FEATURE_PATH, profile_id, mod, exp_id))
def write_feats(profile_id, modality, field=None, exp_ids=None, chunk_ids=None, filtered=True, server_index=2): """ Extract some user's aggregated features for specific modality with (field, exp_ids, chunk_ids) conditions. If a condition is None, then it is ignored. This method is used mostly for Event-driven or One-time type data. Created file is saved on FEATURE_PATH. """ path = '%s/%s%s/%s/' % (param.FEATURE_PATH, param.FEATURE_INNER_PREFIX, profile_id, modality) if not os.path.exists(path): os.makedirs(path) file_name = '' if field is not None: file_name += '%s_' % field if exp_ids is not None: file_name += '%s-exps' % len(exp_ids) elif chunk_ids is not None: file_name += '%s-chunks' % len(chunk_ids) else: file_name += 'all-chunks' file_name += 'New' if param.FEATURE_SET == param.FEATURE_SET_EXTENSION_APP else '' wrong_name = file_name file_name += ".csv" if not os.path.isfile(path + file_name): feat_df = engineer.extract_feats(profile_id, modality, field_name=field, exp_ids=exp_ids, chunk_ids=chunk_ids, filtered=filtered, server_index=server_index) feat_df.to_csv(path + file_name) if os.path.isfile(path + wrong_name): os.remove(path + wrong_name) else: print('%s already exist!' % (path + file_name))