def get_feature_vector(self, feature_id, cohort_id_array):
        start = time.time()

        async_params = [(feature_id, cohort_id_array)]
        async_result = get_feature_vectors_with_user_data(async_params)

        feature_type, feature_vec = async_result[feature_id]["type"], async_result[feature_id]["data"]

        end = time.time()
        time_elapsed = end - start
        logging.info("Time elapsed: " + str(time_elapsed))

        vms = VectorMergeSupport("NA", "sample_id", [feature_id])
        vms.add_dict_array(feature_vec, feature_id, "value")

        merged = vms.get_merged_dict()

        return feature_type, merged
Beispiel #2
0
    def prepare_feature_vector(self, input_vectors):
        feature_vector_mapping = {}
        vectors = []
        for item in input_vectors:
            feature_id, value_type, vector = item.feature_id, item.value_type, item.data
            if value_type == ValueType.INTEGER or value_type == ValueType.FLOAT:
                value_type = "N"
            elif value_type == ValueType.STRING:
                value_type = "C"
            else:
                value_type = "B"

            feature_vector_mapping[feature_id] = (value_type, vector)
            vectors.append(vector)

        # Create merged feature vectors
        feature_ids = [v.feature_id for v in input_vectors]

        vms = VectorMergeSupport('NA', 'sample_id', 'case_id', row_ids=feature_ids)

        for feature in feature_vector_mapping.keys():
            vms.add_dict_array(feature_vector_mapping[feature][1], feature, 'value')

        merged = vms.get_merged_dict()

        rows = []

        for feature in feature_vector_mapping.keys():
            current_row = [feature_vector_mapping[feature][0] + ":" + feature]

            for item in merged:
                current_row.append(item[feature])

            rows.append("\t".join(current_row))

        return rows
Beispiel #3
0
    def prepare_features(self, cohort_id, features):
        # Get the feature data
        feature_vector_mapping = {}
        vectors = []
        for feature in features:
            value_type, vector = get_feature_vector(feature, cohort_id)

            if value_type == ValueType.INTEGER or value_type == ValueType.FLOAT:
                value_type = "N"
            elif value_type == ValueType.STRING:
                value_type = "C"
            else:
                value_type = "B"

            feature_vector_mapping[feature] = (value_type, vector)
            vectors.append(vector)

        # Create merged feature vectors
        vms = VectorMergeSupport('NA', 'sample_id', row_ids=features)

        for feature in feature_vector_mapping.keys():
            vms.add_dict_array(feature_vector_mapping[feature][1], feature, 'value')

        merged = vms.get_merged_dict()

        rows = []

        for feature in feature_vector_mapping.keys():
            current_row = [feature_vector_mapping[feature][0] + ":" + feature]

            for item in merged:
                current_row.append(item[feature])

            rows.append("\t".join(current_row))

        return rows