def merge_features(arr, pattern, func, do_remove): feats = ut.regex_filter(arr.dtype.names, pattern) print "features matched:", feats if len(feats) > 1: merged = [func(tuple(i)) for i in arr[feats]] name = pattern + '_' + func.__name__ removefeats = set(feats) if do_remove else set([]) keepdtypes = [dt for dt in arr.dtype.descr if not dt[0] in removefeats] newdtype = np.dtype(keepdtypes + [(name, arr[feats[0]].dtype)]) newarr = np.empty(arr.shape, dtype=newdtype) for field in [d[0] for d in keepdtypes]: newarr[field] = arr[field] newarr[name] = merged return newarr else: print "Not enough features to merge." return arr
def regex_cols(arr, pattern): return arr[['id1','id2','hit'] + ut.regex_filter(arr.dtype.names, pattern)]