예제 #1
0
    def preprocess_som_patient_data(self, patients):
        # get the dictionary of gene id mappers
        uni2ent, ent2uni = uniprot_mapper.json_to_dict()

        res = []
        num_empty = 0
        for pat_id, ent_ids in patients.items():
            # uni_ids = [uid for eid in ent_ids if eid in ent2uni for uid in ent2uni[eid]]
            uni_ids = [
                uid for eid in ent_ids if eid in ent2uni
                for uid in ent2uni[eid]
            ]
            # if there are any matches map them
            '''
            if len(uni_ids) > 0: res.append({
                'pat_id': pat_id,
                'mutated_nodes': uni_ids,
            })
            else: num_empty += 1
            '''
            res.append({
                'pat_id': pat_id,
                'mutated_nodes': uni_ids,
            })
        log('removed patients:', num_empty)

        return res
예제 #2
0
    def preprocess_seq_patient_data(self, GE, all_ent_ids):
        # get the dictionary of gene id mappers
        uni2ent, ent2uni = uniprot_mapper.json_to_dict()

        found_ent_ids = [eid in ent2uni for eid in all_ent_ids]
        ent_ids = np.array([eid for eid in all_ent_ids if eid in ent2uni])
        uni_ids = np.array([ent2uni[eid] for eid in ent_ids])

        log('uni_ids:', len(uni_ids))
        log('miss_ent_ids:', len(all_ent_ids) - sum(found_ent_ids))

        # prune genes whose uniprot id is not found
        GE = GE[found_ent_ids]
        return GE, uni_ids
예제 #3
0
def patient_entrez_to_uniprot():
    list_of_gene_patient = []
    with open(KIRC_PATH, 'r') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        next(csv_reader)
        for row in csv_reader:
            if int(row[1]) != 0:
                list_of_gene_patient.append(list(row))

    u_map = []
    e_map = []
    with open(UNIPROT_ENTREZ_MAP_FPATH, 'r') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter='\t', quotechar='|')
        next(csv_reader)  # skip header
        for row in csv_reader:
            u_map.append(row[0])
            e_map.append(row[1])

    uni_to_entrez, entrez_to_uni = um.json_to_dict()

    patient_uniprot_list = []
    for row in list_of_gene_patient:
        uni_prot = []
        patient = row[2]

        try:
            uni_prot.append(entrez_to_uni[row[1]])
        except:
            if row[2] in e_map:
                uni_prot.append(u_map[e_map.index(row[2])])
            else:
                print("none")

        if len(uni_prot) != 0:
            patient_uniprot_list.append([patient, uni_prot])
    return patient_uniprot_list