def get_feature_vector(self, feature_id, cohort_id_array): start = time.time() async_params = [(feature_id, cohort_id_array)] async_result = get_feature_vectors_with_user_data(async_params) feature_type, feature_vec = async_result[feature_id]["type"], async_result[feature_id]["data"] end = time.time() time_elapsed = end - start logging.info("Time elapsed: " + str(time_elapsed)) vms = VectorMergeSupport("NA", "sample_id", [feature_id]) vms.add_dict_array(feature_vec, feature_id, "value") merged = vms.get_merged_dict() return feature_type, merged
def prepare_feature_vector(self, input_vectors): feature_vector_mapping = {} vectors = [] for item in input_vectors: feature_id, value_type, vector = item.feature_id, item.value_type, item.data if value_type == ValueType.INTEGER or value_type == ValueType.FLOAT: value_type = "N" elif value_type == ValueType.STRING: value_type = "C" else: value_type = "B" feature_vector_mapping[feature_id] = (value_type, vector) vectors.append(vector) # Create merged feature vectors feature_ids = [v.feature_id for v in input_vectors] vms = VectorMergeSupport('NA', 'sample_id', 'case_id', row_ids=feature_ids) for feature in feature_vector_mapping.keys(): vms.add_dict_array(feature_vector_mapping[feature][1], feature, 'value') merged = vms.get_merged_dict() rows = [] for feature in feature_vector_mapping.keys(): current_row = [feature_vector_mapping[feature][0] + ":" + feature] for item in merged: current_row.append(item[feature]) rows.append("\t".join(current_row)) return rows
def prepare_features(self, cohort_id, features): # Get the feature data feature_vector_mapping = {} vectors = [] for feature in features: value_type, vector = get_feature_vector(feature, cohort_id) if value_type == ValueType.INTEGER or value_type == ValueType.FLOAT: value_type = "N" elif value_type == ValueType.STRING: value_type = "C" else: value_type = "B" feature_vector_mapping[feature] = (value_type, vector) vectors.append(vector) # Create merged feature vectors vms = VectorMergeSupport('NA', 'sample_id', row_ids=features) for feature in feature_vector_mapping.keys(): vms.add_dict_array(feature_vector_mapping[feature][1], feature, 'value') merged = vms.get_merged_dict() rows = [] for feature in feature_vector_mapping.keys(): current_row = [feature_vector_mapping[feature][0] + ":" + feature] for item in merged: current_row.append(item[feature]) rows.append("\t".join(current_row)) return rows