Beispiel #1
0
    QC = str(case_SeqInfo['QC'])
    
    # PS1
    PS_set = ['PS1', 'PS2']
    for PS in PS_set:
        HLATyping = case_SeqInfo[PS]['HLATyping']
        Donor = case_SeqInfo[PS]['Donor']
        Recipient = case_SeqInfo[PS]['Recipient']
        record = (BMT_caseID, Audit, Active, Comment, QC, HLATyping, PS, Donor, Recipient, )
        cursor.execute('INSERT INTO OriginalSeqs VALUES (?,?,?,?,?,?,?,?,?)', record)
        conn.commit()

conn.close()

fname = output + 'SG41_52_HLA_' + locus + '_paired'
IMGTdbIO.save_dict2pickle(available_records, fname)

#aa = IMGTdbIO.load_pickle2dict(fname, output)


################# 
# Class II
#################
all_DB_files = glob.glob("../Output/SG41_52/2018/IMGTv3310/AvailDB/*.db")
db_file = all_DB_files[4] ## 0: DPB1 2:DRB1 5:DQB1

locus = db_file.split('_')[4]

conn = sql.connect(db_file) # automatically creates a file if doesn't exist
conn.row_factory = sql.Row  # Each row is a dictionary: {colNames: Value}
cursor = conn.cursor()
Beispiel #2
0
        PS2_HLATyping,
        PS2_GLstringM,
        PS2_SeqM,
        Active,
        Audit,
        Comment,
    )
    cursor.execute(
        'INSERT INTO DR_pair_comparison VALUES (?,?,?,?,?,?,?,?,?,?,?)',
        record)

conn.commit()
conn.close()

fname = '../Output/SG39/2018/SG39_DRpairs/SG39_HLA_' + locus + '_wComparison'
IMGTdbIO.save_dict2pickle(DRpair_seqInfo, fname)

########### check GL-string match, sequence matching
DRpair_seqInfo = {}
Loci = ['A', 'B', 'C', 'DRB1', 'DQB1']  #, 'DPB1']

All_caseIDs = []
for locus in Loci:
    fname = '../Output/SG39/2018/SG39_DRpairs/SG39_HLA_' + locus + '_wComparison.pkl'
    DRpair_seqInfo[locus] = IMGTdbIO.load_pickle2dict(fname)
    All_caseIDs += list(DRpair_seqInfo[locus].keys())
All_caseIDs = list(set(All_caseIDs))  # 3412 total

Matching_cases_stats = {
    "ClassI_paired": [],
    "fiveLoci_paired": [],
                    if CaseStats[caseID][locus]['PS1']['ARS'] > 0:
                        LocusStats[typing]['ARS'].append(caseID) 
                    if CaseStats[caseID][locus]['PS1']['Non_ARS_exon'] >0:
                        LocusStats[typing]['Non_ARS_exon'].append(caseID) 
                    if CaseStats[caseID][locus]['PS1']['Intron'] > 0:
                        LocusStats[typing]['Intron'].append(caseID)
               
                for key, item in mm_locus_stats['MMannotation'].items():
                    if key.isdigit():
                        if item in LocusStats[typing].keys():
                            LocusStats[typing][item].append(caseID)
                        else:
                            LocusStats[typing] = {item: [caseID]}

ClassI_stats = {'CaseStats': CaseStats, 'LocusStats': LocusStats}
IMGTdbIO.save_dict2pickle(ClassI_stats, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/ClassI_Stats_0125_'+groupType) #1220_'+groupType)

# Class II
#Group_fname = '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/ClassI_Stats_0125_' + groupType + '.pkl'
#Stats_Dict = IMGTdbIO.load_pickle2dict(Group_fname)

#CaseStats = Stats_Dict['CaseStats']
#LocusStats = Stats_Dict['LocusStats']
for caseID in group_caseIDs:
    # 
    for locus in ClassII_loci:
        bothMM_output = "../Output/SG41_52/2018/IMGTv3310/SG41_52_bothMisMatched_locus_" + locus + "_0125_TargetedAlignment/"
        
        singleMM_output = "../Output/SG41_52/2018/IMGTv3310/SG41_52_singleMisMatched_" + locus + "_0125_TargetedAlignment/"
            
        ### Cases where both sequences don't match
Beispiel #4
0
                                        CaseMatchTable[key][locus][ps][
                                            'Intron'].append(annReads)
                                    elif 'UTR' in annItem.split('.')[0]:
                                        CaseMatchTable[key][locus][ps][
                                            'UTR'].append(annReads)
                                    elif 'Exon' in annItem.split('.')[0]:
                                        if annItem.split('.')[0] in ARS_exon:
                                            CaseMatchTable[key][locus][ps][
                                                'ARS'].append(annReads)
                                        else:
                                            CaseMatchTable[key][locus][ps][
                                                'non_ARS_exon'].append(
                                                    annReads)

    IMGTdbIO.save_dict2pickle(
        CaseMatchTable,
        '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord_Locus_'
        + locus)

IMGTdbIO.save_dict2pickle(
    CaseMatchTable,
    '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord')

################
# Count the cases
################
CaseMatchTable = IMGTdbIO.load_pickle2dict(
    '../Output/SG39/2018/SG39_Stats/FiveLoci_paired_case_MatchRecord.pkl')
## : paired cases HLA typing stats
fname = '../Output/SG39/2018/SG39_DRpairs/SG39_pairedCases_Stats.pkl'
Matching_cases_stats = IMGTdbIO.load_pickle2dict(fname)
## Matching_cases_stats['fiveLoci_paired']
Beispiel #5
0
                    if CaseStats[caseID][locus]['PS1']['ARS'] > 0:
                        LocusStats[typing]['ARS'].append(caseID) 
                    if CaseStats[caseID][locus]['PS1']['Non_ARS_exon'] >0:
                        LocusStats[typing]['Non_ARS_exon'].append(caseID) 
                    if CaseStats[caseID][locus]['PS1']['Intron'] > 0:
                        LocusStats[typing]['Intron'].append(caseID)
               
                for key, item in mm_locus_stats['MMannotation'].items():
                    if key.isdigit():
                        if item in LocusStats[typing].keys():
                            LocusStats[typing][item].append(caseID)
                        else:
                            LocusStats[typing] = {item: [caseID]}

ClassI_stats = {'CaseStats': CaseStats, 'LocusStats': LocusStats}
IMGTdbIO.save_dict2pickle(ClassI_stats, '../Output/SG39/2018/SG39_Stats/ClassI_Stats_0125_'+groupType)

# Class II
for caseID in group_caseIDs:
    # 
    for locus in ClassII_loci:
        bothMM_output = "../Output/SG39/2018/SG39_bothMisMatched_locus_" + locus + "_0125_TargetedAlignment/"
        
        singleMM_output = "../Output/SG39/2018/SG39_singleMisMatched_" + locus + "_0125_TargetedAlignment/"
            
        ### Cases where both sequences don't match
        if caseID in Matching_cases_stats[locus+'_both_Seqmm']:
            mm_file_PS1 = glob.glob(bothMM_output+ 'CaseID_'+ caseID + '_Locus_' + locus + '_annotation_PS1*.pkl')

            for file_id in mm_file_PS1:
                mm_locus_stats_PS1 = IMGTdbIO.load_pickle2dict(file_id)
Beispiel #6
0
                                
                                #CaseStats[key][locus]
                                for annKey, annItem in seqAlgn_stats['MMannotation'].items():
                                    if annKey.isdigit(): 
                                        annReads = IMGTdbIO.annotationFormat(annKey, annItem, seqAlgn_stats['alignment']) #annItem+'[D:'+seqAlgn_stats['alignment']['Donor-'+ps]
                                        if 'Intron' in annItem.split('.')[0]:
                                            CaseMatchTable[key][locus][ps]['Intron'].append(annReads)
                                        elif 'UTR' in annItem.split('.')[0]:
                                            CaseMatchTable[key][locus][ps]['UTR'].append(annReads)
                                        elif 'Exon' in annItem.split('.')[0]:
                                            if annItem.split('.')[0] in ARS_exon:
                                                CaseMatchTable[key][locus][ps]['ARS'].append(annReads)
                                            else:
                                                CaseMatchTable[key][locus][ps]['non_ARS_exon'].append(annReads) 
                           
    IMGTdbIO.save_dict2pickle(CaseMatchTable, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord_Locus_'+locus)
        
IMGTdbIO.save_dict2pickle(CaseMatchTable, '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord')


################
# Count the cases
################
CaseMatchTable = IMGTdbIO.load_pickle2dict('../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/'+groupType+'_case_MatchRecord.pkl')

#caseID = '44107'
#CaseMatchTable[caseID]

## : paired cases HLA typing stats
fname = '../Output/SG41_52/2018/IMGTv3310/SG41_52_DRpair_Stats/SG41_52_pairedCases_Stats.pkl'
Matching_cases_stats = IMGTdbIO.load_pickle2dict(fname)