Example #1
0
    def preprocess_cnv_patient_data(self, patients, is_pos=True):
        # get the dictionary of gene id mappers
        uni2ent, ent2uni = uniprot_mapper.json_to_dict()

        res = []
        for pat_id, patient in patients.items():
            for cnv_type, ent_ids in patient.items():
                patient[cnv_type] = [
                    uid for eid in ent_ids if eid in ent2uni
                    for uid in ent2uni[eid]
                ]

        return res
    def preprocess_seq_patient_data(self, GE, all_ent_ids):
        # get the dictionary of gene id mappers
        uni2ent, ent2uni = uniprot_mapper.json_to_dict()

        found_ent_ids = [eid in ent2uni for eid in all_ent_ids]
        ent_ids = np.array([eid for eid in all_ent_ids if eid in ent2uni])
        uni_ids = np.array([ent2uni[eid] for eid in ent_ids], dtype=object)

        log('uni_ids:', len(uni_ids))
        log('miss_ent_ids:', len(all_ent_ids) - sum(found_ent_ids))

        # prune genes whose uniprot id is not found
        GE = GE[found_ent_ids]
        return GE, uni_ids
    def preprocess_som_patient_data(self, patients):
        # get the dictionary of gene id mappers
        uni2ent, ent2uni = uniprot_mapper.json_to_dict()

        res = []
        num_empty = 0
        for pat_id, ent_ids in patients.items():
            # uni_ids = [uid for eid in ent_ids if eid in ent2uni for uid in ent2uni[eid]]
            uni_ids = [uid for eid in ent_ids if eid in ent2uni for uid in ent2uni[eid]]
            # if there are any matches map them
            res.append({
                'pat_id': pat_id,
                'mutated_nodes': uni_ids,
            })
        log('removed patients:', num_empty)

        return res
Example #4
0
def preprocess_patient_data(patients):
    # get the dictionary of gene id mappers
    uni2ent, ent2uni = uniprot_mapper.json_to_dict()

    res = []
    num_empty = 0
    for pat_id, ent_ids in patients.items():
        # uni_ids = [uid for eid in ent_ids if eid in ent2uni for uid in ent2uni[eid]]
        uni_ids = [uid for eid in ent_ids if eid in ent2uni for uid in ent2uni[eid]]
        # if there are any matches map them
        if len(uni_ids) > 0:
            res.append({
                'pat_id': pat_id,
                'mutated_nodes': uni_ids,
            })
        else:
            num_empty += 1

    return res