Exemple #1
0
    def start(self):
        freq_item = self.get_freq_item()

        with open(self.infile, 'r') as fr, open(self.suffix + '.filter_freq_pass', 'w') as fw_pass,\
             open(self.suffix + '.filter_freq_fail', 'w') as fw_fail:

            for line in fr:
                freq_status_list = []  #
                if line.startswith('Scale'):
                    fw_pass.write(line)
                    fw_fail.write(line)
                    head = line
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('').split('\t')
                for item in freq_item:  # item = gnomAD_SAS_AF
                    if not linelist[head_index[item.lower()]] == '-' and\
                         float(linelist[head_index[item.lower()]]) < self.freq:

                        freq_status_list.append('pass')
                    else:
                        freq_status_list.append('fail')

                if self.judge_freq_tag(freq_status_list):
                    fw_pass.write(line)
                else:
                    fw_fail.write(line)

                # 清空,列表为可更改对象
                freq_status_list.clear()
    def start(self):
        localcontrol_info = self.get_localcontrol_info()

        with open(self.infile, 'r') as fr, open(self.suffix + '_pass', 'w') as fw_pass,\
            open(self.suffix + '_fail', 'w') as fw_fail:
            for line in fr:
                if line.startswith('Scale'):
                    head = line
                    fw_pass.write(line)
                    fw_fail.write(line)
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('\n').split('\t')
                case_freq = float(linelist[head_index['case_var_freq']])
                _chr = linelist[head_index['chromosome']]
                pos = linelist[
                    head_index['start_position']]  # 理解为什么选取start作为pos
                ref = linelist[head_index['reference_allele']]
                alt = linelist[head_index['allele']]

                key = '{_chr}_{pos}_{ref}_{alt}'.format(**locals())

                if key in localcontrol_info and case_freq > localcontrol_info[
                        key][0]:
                    fw_pass.write('NOControl{0};{1}'.format(
                        localcontrol_info[key][1], line))
                elif key in localcontrol_info and case_freq <= localcontrol_info[
                        key][0]:
                    fw_fail.write('NOControl{0};{1}'.format(
                        localcontrol_info[key][1], line))
                else:
                    fw_pass.write(line)
Exemple #3
0
    def start(self):
        func_list = self.get_save_function_list()

        with utils.safe_open(self.infile, 'r') as fr, open(self.suffix + '_pass', 'w') as fw_pass, \
            open(self.suffix + '_fail', 'w') as fw_fail:

            for line in fr:
                if line.startswith('Scale'):
                    fw_pass.write(line)
                    fw_fail.write(line)
                    head = line
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('').split('\t')
                bgi_func = linelist[head_index['bgi_function']]  # nonsense
                vep_func = linelist[head_index['consequence']]  # 原始vep注释结果
                gene = linelist[head_index['hugo_symbol']]  # TERT
                chgvs = linelist[head_index['hgvsc']]  # c.-146C>T

                # 针对8个tert启动子,构造特殊key
                tert_promter = '{gene}:{chgvs}'.format(**locals())
                if bgi_func in func_list and self.save_span(
                        chgvs, bgi_func,
                        vep_func) or tert_promter in func_list:
                    fw_pass.write(line)
                else:
                    fw_fail.write(line)
Exemple #4
0
    def start(self):
        '''
        '''
        with open(self.infile, 'r') as fr, open(self.suffix + '_pass', 'w') as fw_pass,\
            open(self.suffix + '_fail', 'w') as fw_fail:

            for line in fr:
                if line.startswith('Scale'):
                    head = line
                    fw_pass.write(line)
                    fw_fail.write('{}\tfilter_reason\n'.format(
                        line.strip('\n')))
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('\n').split('\t')
                case_read = linelist[head_index['case_var_readsnum']]
                case_read_pos = linelist[
                    head_index['case_var_positive_readsnum']]
                case_read_neg = linelist[
                    head_index['case_var_negative_readsnum']]

                if self.pass_read_threshold(case_read, case_read_pos,
                                            case_read_neg):
                    fw_pass.write(line)
                else:
                    fw_fail.write('{}\tReadNum\n'.format(line.strip('\n')))
Exemple #5
0
    def start(self):
        '''全部的最终接口均为start函数
        '''
        uniport_info, uniport_gene_length = self.get_uniport_info()

        with open(self.infile, 'r') as fr, open(self.resullt, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    fw.write(line)
                    head = line
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('\n').split('\t')
                gene = linelist[head_index['hugo_symbol']]
                phgvs = linelist[head_index['hgvsp_short']]

                #获取氨基酸发生突变的位置
                if re.search(r'(\d+)', phgvs):
                    phgvs_pos = re.search(r'(\d+)', phgvs).group(1)
                else:
                    phgvs_pos = '*'

                key = '{gene}_{phgvs_pos}'.format(**locals())

                linelist[head_index['bgi_uniport_position(s)']] = uniport_gene_length.get(gene, '*')

                if uniport_info.get(key):
                    linelist[head_index['bgi_uniport_position(s)']] = uniport_info[key]['length'] or '*'
                    linelist[head_index['bgi_uniport_feature_key']] = uniport_info[key]['feature_key'] or '*'
                    linelist[head_index['bgi_uniport_description']] = uniport_info[key]['description'] or '*'
                


                fw.write('{0}\n'.format('\t'.join(linelist)))
Exemple #6
0
    def start(self):
        final_exon_info = self.get_final_exon_info()
        func_relation_info = self.get_func_relation_info()

        with open(self.infile, 'r') as fr, open(self.result, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    head = line
                    fw.write(line)
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('\n').split('\t')
                gene = linelist[head_index['hugo_symbol']]
                tran = linelist[head_index['transcript_id']]
                exon = linelist[head_index['exon']]

                key = '{tran}_{exon}'.format(**locals())
                # 更新Funcregion 字段
                linelist[head_index['funcregion']] = func_relation_info.get(
                    key, 'Nan')
                # 更新最后一个exon的写法
                linelist[head_index['exon']] = self.final_exon(
                    final_exon_info, tran, exon)

                fw.write('{}\n'.format('\t'.join(linelist)))
Exemple #7
0
    def start(self):

        driver_info = self.get_driver_info()
        special_driver_info = self.get_special_driver_info()

        with open(self.infile, 'r') as fr, open(self.result, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'): #GG
                    head = line
                    fw.write(line)
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('\n').split('\t')
                gene = linelist[head_index['hugo_symbol']] 
                chgvs = linelist[head_index['hgvsc']] 
                phgvs = linelist[head_index['hgvsp_short']] 
                func = linelist[head_index['bgi_function']] 
                exon = linelist[head_index['exon']] 
                case_freq = linelist[head_index['case_var_freq']]

                # 利用bgicg结果进行测试
                # gene = linelist[head_index['#gene']] # gene
                # chgvs = linelist[head_index['chgvs']] # cHGVS
                # phgvs = linelist[head_index['phgvs']] # pHGVS
                # func = linelist[head_index['function']] # Function
                # exon = linelist[head_index['exin_id']] # ExIn_ID
                # case_freq = linelist[head_index['case_var_freq']]

                kw = {
                    'gene': gene,
                    'chgvs': chgvs,
                    'phgvs': phgvs,
                    'func': func,
                    'exon': exon,
                    'driver_info': driver_info,
                    'special_driver_info': special_driver_info
                }
                # 进行TMB字段判断
                if float(case_freq) < self.tmb_freq:
                    linelist[head_index['tmb_type']] = 'noTMB'  # 
                elif func == 'span' and re.search(r'-EX1$', exon):
                    linelist[head_index['tmb_type']] = 'noTMB'
                elif self.is_driver_gene(**kw):
                    linelist[head_index['tmb_type']] = 'Driver'
                else:
                    linelist[head_index['tmb_type']] = 'TMB'
 
                fw.write('{}\n'.format('\t'.join(linelist)))
Exemple #8
0
    def start(self):
        func_info = self.get_gene_func_info()
        chgvs_info, phgvs_info = self.get_gene_pos_info()
        exon_info = self.get_gene_exon_info()

        with open(self.infile, 'r') as fr, open(self.result, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    head = line
                    fw.write(line)
                    head_index = utils.get_head_index(line)
                    continue

                linelist = line.strip('\n').split('\t')
                gene = linelist[head_index['hugo_symbol']]
                chgvs = linelist[head_index['hgvsc']]
                phgvs = linelist[head_index['hgvsp_short']]
                func = linelist[head_index['bgi_function']]
                exon = linelist[head_index['exon']]

                key_func = '{gene}_{func}'.format(**locals())
                key_chgvs = '{gene}_{chgvs}'.format(**locals())
                key_phgvs = '{gene}_{phgvs}'.format(**locals())

                # print(key_func)
                #修改Target_gene字段信息
                tmp = ''
                if func_info.get(key_func):
                    for key, value in func_info[key_func].items():
                        tmp += '{key}:{value};'.format(**locals())
                elif phgvs_info.get(key_phgvs):
                    for key, value in phgvs_info[key_phgvs].items():
                        tmp += '{key}:{value};'.format(**locals())
                elif chgvs_info.get(key_chgvs):
                    for key, value in chgvs_info[key_chgvs].items():
                        tmp += '{key}:{value};'.format(**locals())
                elif exon_info.get(gene) and exon.startswith('EX'):
                    tmp = exon_info[gene]

                # tmp = tmp.replace(';;', ';')

                if tmp:
                    linelist[head_index['target_gene']] = 'YES({tmp})'.format(
                        **locals()).replace(';;', ';')
                else:
                    linelist[head_index['target_gene']] = 'NO'

                fw.write('{}\n'.format('\t'.join(map(str, linelist))))
Exemple #9
0
 def get_tran_relation(self):
     '''
     input:
         self.transcript: gene transcript
     output:
         list: [gene=trans, gene=trans]
     '''
     tran_relation = []
     with utils.safe_open(self.transript_database, 'r') as fr:
         for line in fr:
             if line.startswith('#'):
                 head_index = utils.get_head_index(line)
                 continue
             linelist = line.strip('').split('\t')
             gene = linelist[head_index['#gene']]
             tran = linelist[head_index['transcript']]
             tran_relation.append('{gene}={tran}'.format(**locals()))
     
     return tran_relation
Exemple #10
0
    def start(self):
        maploc_info = self.get_maploc_info()
        with open(self.infile, 'r') as fr, open(self.result, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    fw.write(line)
                    head = line
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('\n').split('\t')
                _chr = linelist[head_index['chromosome']]
                start = int(linelist[head_index['start_position']])
                end = int(linelist[head_index['end_position']])

                map_location = self.get_variant_maploc(_chr, start, end,
                                                       maploc_info)
                linelist[head_index['maploc']] = map_location
                # print(linelist)
                fw.write('{}\n'.format('\t'.join(linelist)))
Exemple #11
0
    def start(self):
        with open(self.infile, 'r') as fr, open(self.suffix + '_pass', 'w') as fw_pass,\
            open(self.suffix + '_fail', 'w') as fw_fail:
            for line in fr:
                if line.startswith('Scale'):
                    fw_pass.write(line)
                    fw_fail.write('{}\tfilter_reason\n'.format(
                        line.strip('\n')))
                    head = line
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('\n').split('\t')
                tmb_tag = linelist[head_index['tmb_type']]
                func = linelist[head_index['bgi_function']]

                if func == 'coding-synon' and tmb_tag == 'noTMB':
                    fw_fail.write('{}\tsynonymy\n'.format('\t'.join(linelist)))
                else:
                    fw_pass.write(line)
 def get_tran_relation(self, **args):
     '''
     input:
         self.transcript: gene transcript
     output:
         list: [trans1, trans2]
     '''
     tran_relation = []
     with utils.safe_open(args['transcript_data'], 'r') as fr:
         for line in fr:
             line = line.strip('\n')
             if line.startswith('#'):
                 head_index = utils.get_head_index(line)
                 continue
             linelist = line.strip('\n').split('\t')
             # gene = linelist[head_index['#gene']]
             tran = linelist[head_index['transcript']]
             tran_relation.append('{tran}'.format(**locals()))
     
     return tran_relation
Exemple #13
0
    def start(self, **args):
        with open(args['infile'], 'r') as fr, open(args['result'], 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    fw.write(line)
                    head = line
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('\n').split('\t')
                func = linelist[head_index['bgi_function']]
                tran = linelist[head_index['transcript_id']]
                start = linelist[head_index['start_position']]
                end = linelist[head_index['end_position']]
                phgvs = linelist[head_index['hgvsp_short']]
                chgvs = linelist[head_index['hgvsc']]
                flank = linelist[head_index['flank']]
                strand = linelist[head_index['strand']]
                gene = linelist[head_index['hugo_symbol']]
                exon_id = linelist[head_index['exon']]

                end_exon_tag = EndExonCheck(args, tran, start, end,
                                            func).start()
                gene_extend_tag = GeneExtendCheck(args, tran, func,
                                                  phgvs).start()
                splice_affect_tag = SpliceAffectCheck(args, func, chgvs, flank,
                                                      strand).start()
                newfun_tag = NewFunction(func, gene, exon_id, end_exon_tag,
                                         gene_extend_tag,
                                         splice_affect_tag).start()

                linelist[head_index['bgi_end_exon_check']] = end_exon_tag
                linelist[head_index['bgi_gene_extend_check']] = gene_extend_tag
                linelist[
                    head_index['bgi_splice_affect_check']] = splice_affect_tag
                linelist[head_index['bgi_newfunction']] = newfun_tag
                # print(linelist)
                fw.write('{}\n'.format('\t'.join(linelist)))
Exemple #14
0
    def start(self):
        database_info = self.get_database_info()

        with open(self.infile, 'r') as fr, open(self.suffix + '_pass', 'w') as fw_pass, \
            open(self.suffix + '_fail', 'w') as fw_fail:
            for line in fr:
                if line.startswith('Scale'):
                    fw_pass.write(line)
                    fw_fail.write('{}\tfail_reason\n'.format(line.strip('\n')))
                    head = line
                    head_index = utils.get_head_index(head)
                    continue
                linelist = line.strip('\n').split('\t')
                _chr = linelist[head_index['chromosome']]
                pos = linelist[head_index['start_position']]
                ref = linelist[head_index['reference_allele']]
                alt = linelist[head_index['allele']]

                key = '{_chr}_{pos}_{ref}_{alt}'.format(**locals())
                if key in database_info:
                    fw_fail.write('{}\t{}\n'.format(line.strip('\n'), database_info[key]))
                else:
                    fw_pass.write(line)
Exemple #15
0
    def start(self):
        # print('>>>cosmic分析中')
        cosmic_info = self.get_cosmic_info()

        with open(self.infile, 'r') as fr, open(self.result, 'w') as fw:
            for line in fr:
                if line.startswith('Scale'):
                    head = line
                    fw.write(line)
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('\n').split('\t')
                gene = linelist[head_index['hugo_symbol']]
                _chr = linelist[head_index['chromosome']]
                pos = linelist[head_index['start_position']]  #清楚为什么使用start!!
                ref = linelist[head_index['reference_allele']]
                alt = linelist[head_index['allele']]
                chgvs = linelist[head_index['hgvsc']]

                key1 = '{gene}_{_chr}_{pos}_{ref}_{alt}'.format(**locals())
                key2 = '{gene}_{chgvs}'.format(**locals())

                cosmic_tmp = ''

                tmp1 = cosmic_info.get(key1, '')
                tmp2 = cosmic_info.get(key2, '')

                if tmp1 == tmp2 and tmp1:
                    cosmic_linelist = tmp1.split('\t')
                    if cosmic_linelist[11] == '-':
                        cosmic_tmp = 'the mutation {0} has been exclude from the website.:{1}'.\
                                    format(cosmic_linelist[1], cosmic_linelist[10])
                    else:
                        cosmic_tmp = '{0}:{1};{2}'.format(
                            cosmic_linelist[1], cosmic_linelist[10],
                            cosmic_linelist[11])

                elif tmp1 and not tmp2:
                    cosmic_linelist = tmp1.split('\t')
                    if cosmic_linelist[11] == '-':
                        cosmic_tmp = 'the mutation {0} has beed excluded from \
                                      the website and cosmic gene or chgvs diff {1}_{2}:{3}'.format(
                            cosmic_linelist[1], cosmic_linelist[0],
                            cosmic_linelist[2], cosmic_linelist[10])

                    else:
                        cosmic_tmp = 'Cosmic gene or cHGVS diff {0}_{1}: {2}:{3};{4}'.format(
                            cosmic_linelist[0],  # gene
                            cosmic_linelist[2],  # hgvs
                            cosmic_linelist[1],  # cosm
                            cosmic_linelist[10],  # 1
                            cosmic_linelist[11])  # large

                elif tmp2 and not tmp1:
                    cosmic_linelist = tmp2.split('\t')
                    if cosmic_linelist[11] == '-':
                        cosmic_tmp = 'The mutation {0} has beed exclude from the website and \
                                      cosmic pos or alt diff {1}:{2} {3}/{4}: {5}'.format(
                            cosmic_linelist[1],
                            cosmic_linelist[4],  # chr
                            cosmic_linelist[5],
                            cosmic_linelist[6],
                            cosmic_linelist[7],
                            cosmic_linelist[10])
                    else:
                        cosmic_tmp = 'Cosmic Pos or alt diff {0}:{1} {2}/{3}:{4}:{5};{6}'.format(
                            cosmic_linelist[4],  # chr
                            cosmic_linelist[5],
                            cosmic_linelist[6],
                            cosmic_linelist[7],
                            cosmic_linelist[1],
                            cosmic_linelist[10],
                            cosmic_linelist[11])

                else:
                    cosmic_tmp = '*'

                # 更细cosmic字段
                linelist[head_index['cosmic']] = cosmic_tmp
                fw.write('{0}\n'.format('\t'.join(linelist)))
    def start(self, **args):
        '''
        程序运行主函数
        '''
        other_info = {}  # 用于更新其他字段

        if args['vcf']:
            vcf_info = get_vcf_info.HandleVcf(args['vcf'], args['vcftype']).start()
            
        tran_relation = self.get_tran_relation(**args)

        with open(args['vep_annotation'], 'r') as fr, open(args['result'], 'w') as fw:
            fw.write('{}\n'.format('\t'.join(headers.HEAD().update_head().keys())))
            for line in fr:
                if line.startswith('##'):
                    continue
                elif line.startswith('#Uploaded_variation'):
                    head = line
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('').split('\t')
                gene = linelist[head_index['symbol']] # TERT
                transcript = linelist[head_index['feature']] # NM_198253.3
                
                #提取基因指定的转录本注释信息
                if not '{transcript}'.format(**locals()) in tran_relation:
                    continue

                #获取需要的信息
                row = headers.HEAD()
                ## 可以直接提取的信息
                upload_variation = linelist[head_index['#Uploaded_variation'.lower()]] # chr5_1295229_-/A
                location = linelist[head_index['location']] # chr5:1295187-1295188
                vep_function = linelist[head_index['consequence']] # missense_variant
                strand = linelist[head_index['strand']] # -1
                strand = '+' if strand == '1' else '-'
                protein = linelist[head_index['ensp']] # NP_937983.2
                sift = linelist[head_index['sift']] # tolerated(0.05)
                polyphen = linelist[head_index['polyphen']]
                exon_info = linelist[head_index['exon']] # 2/19 or -
                intro_info = linelist[head_index['intron']]
                chgvs = linelist[head_index['hgvsc']] # NM_198253.3:c.77C>T
                phgvs = linelist[head_index['hgvsp']] # NP_937983.2:p.Thr26Met
                tert = linelist[head_index['tert']] # 只有tert的启动子区域有
                clinvar = linelist[head_index['clinvar_clnsig']]
                rs = linelist[head_index['existing_variation']]
                bl_muttype = linelist[head_index['variant_class']]  
                af = linelist[head_index['af']]
                afr_af = linelist[head_index['afr_af']]
                amr_af = linelist[head_index['amr_af']]
                eas_af = linelist[head_index['eas_af']]
                eur_af = linelist[head_index['eur_af']]
                sas_af = linelist[head_index['sas_af']]
                aa_af = linelist[head_index['aa_af']]
                ea_af = linelist[head_index['eas_af']]
                gnomad_af = linelist[head_index['gnomad_af']]
                gnomad_afr_af = linelist[head_index['gnomad_afr_af']]
                gnomad_amr_af = linelist[head_index['gnomad_amr_af']]
                gnomad_asj_af = linelist[head_index['gnomad_asj_af']]
                gnomad_eas_af = linelist[head_index['gnomad_eas_af']]
                gnomad_fin_af = linelist[head_index['gnomad_fin_af']]
                gnomad_nfe_af = linelist[head_index['gnomad_nfe_af']]
                gnomad_oth_af = linelist[head_index['gnomad_oth_af']]
                gnomad_sas_af = linelist[head_index['gnomad_sas_af']]

                # 需要进行处理获取的信息
                hgvsc = utils.simplify_hgvsc(gene, chgvs, tert)
                hgvsp = utils.simplify_hgvsp(phgvs)  # p.Lys872_Thr874delinsAsnTer
                hgvsp_short = utils.get_oneletter_hgvsp(hgvsp) # p.K872_T874delinsN*
                exon_id = utils.get_exon_id(exon_info, intro_info)
                _chr, start, end = utils.get_chr_start_end_from_location(location)
                ref, alt = utils.get_ref_alt_from_upload_variation(upload_variation)
                muttype = utils.get_muttype(ref, alt)
                genotype = utils.get_genotype(ref, alt, strand)
                flank = utils.get_flank_according_upload_variation(upload_variation, args['hg19'])
                vep_simple_function = TransverFunction(**args).simplify_function(vep_function, tert, gene)
                vep2bgicg_function = TransverFunction(**args).vep2bgi(vep_simple_function, hgvsc, hgvsp, ref, alt, exon_id)

                ## 存在特殊情况,span,跨越整个内含子,但是phgvs还存在注释信息,这种是错误的
                ## 针对这种情况,需要对span类型的phgvs赋空值
                if vep2bgicg_function == 'span' and (not hgvsp == '-'):
                    hgvsp = '-'
                    hgvsp_short = '-'

                # 更新row
                row.gene = gene 
                row.chgvs = hgvsc
                row.phgvs = hgvsp
                row.phgvs_shoft = hgvsp_short
                row.exon_id = exon_id
                # row.tert = tert
                row.vep_function = vep_function
                row.vep_simple_function = vep_simple_function
                row.vep2bgicg_function = vep2bgicg_function
                row.sift = sift
                row.polyphen2 = polyphen
                row.chr = _chr
                row.start = start
                row.end = end 
                row.ref = ref
                row.alt = alt
                row.muttype = muttype
                row.genotype = genotype
                row.transcript = transcript
                row.protein = protein
                row.strand = strand
                row.flank = flank
                row.rs = rs 
                row.bl_muttype = bl_muttype
                row.clinvar = clinvar
                row.af = af
                row.afr_af = afr_af
                row.amr_af = amr_af
                row.eas_af = eas_af
                row.eur_af = eur_af
                row.sas_af = sas_af
                row.aa_af = aa_af
                row.ea_af = ea_af
                row.gnomad_af = gnomad_af
                row.gnomad_afr_af = gnomad_afr_af
                row.gnomad_amr_af = gnomad_amr_af
                row.gnomad_asj_af = gnomad_asj_af
                row.gnomad_eas_af = gnomad_eas_af
                row.gnomad_fin_af = gnomad_fin_af
                row.gnomad_nfe_af = gnomad_nfe_af
                row.gnomad_oth_af = gnomad_oth_af
                row.gnomad_sas_af = gnomad_sas_af

                if args['vcf']:
                    freq_tag = vcf_info[upload_variation]
                    other_info.update(freq_tag)
                info = row.update_head(**other_info)
                fw.write('\t'.join(map(str, info.values())) + '\n')
Exemple #17
0
    def start(self):
        '''
        程序运行主函数
        '''
        if self.vcf:
            vcf_info = get_vcf_info.HandleVcf(self.vcf, self.vcftype).start()

        with open(self.vep, 'r') as fr, open('test', 'w') as fw:
            for line in fr:
                if line.startswith('##'):
                    continue
                elif line.startswith('#Uploaded_variation'):
                    head = line
                    head_index = utils.get_head_index(head)
                    continue

                linelist = line.strip('').split('\t')
                #获取需要的信息
                row = headers.HEAD()
                ## 可以直接提取的信息
                upload_variation = linelist[head_index['#Uploaded_variation'.lower()]] # chr5_1295229_-/A
                location = linelist[head_index['location']] # chr5:1295187-1295188
                transcript = linelist[head_index['feature']] # NM_198253.3
                function = linelist[head_index['consequence']] # missense_variant
                strand = linelist[head_index['strand']] # -1
                gene = linelist[head_index['symbol']] # TERT
                protein = linelist[head_index['ensp']] # NP_937983.2
                sift = linelist[head_index['sift']] # tolerated(0.05)
                polyphen = linelist[head_index['polyphen']]
                exon_id = linelist[head_index['exon']] # 2/19 or -
                chgvs = linelist[head_index['hgvsc']] # NM_198253.3:c.77C>T
                phgvs = linelist[head_index['hgvsp']] # NP_937983.2:p.Thr26Met
                tert = linelist[head_index['tert']] # 只有tert的启动子区域有
                clinvar = linelist[head_index['clinvar_clnsig']]
                rs = linelist[head_index['existing_variation']]

                # 需要进行处理获取的信息
                hgvsc = utils.simplify_hgvsc(chgvs)
                hgvsp2 = utils.simplify_hgvsp(phgvs)
                hgvsp = utils.get_oneletter_hgvsp(hgvsp2)
                exon_id = utils.get_exon_id(exon_id)
                _chr, start, end = utils.get_chr_start_end_from_location(location)
                ref, alt = utils.get_ref_alt_from_upload_variation(upload_variation)
                muttype = utils.get_muttype(ref, alt)
                genotype = utils.get_genotype(ref, alt, strand)
                flank = utils.get_flank_according_upload_variation(upload_variation, self.hg19)
                bl_muttype = utils.get_bl_muttype()

                # 更新row
                row.gene = gene 
                row.chgvs = hgvsc
                row.phgvs = hgvsp
                row.phgvs2 = hgvsp2
                row.exon_id = exon_id
                row.vep_function = function
                row.sift = sift
                row.polyphen2 = polyphen
                row.chr = _chr
                row.start = start
                row.end = end 
                row.ref = ref
                row.alt = alt
                row.muttype = muttype
                row.genotype = genotype
                row.transcript = transcript
                row.protein = protein
                row.strand = strand
                row.flank = flank
                row.rs = rs 
                row.bl_muttype = bl_muttype
                row.clinvar = clinvar

                if self.vcf:
                    freq_tag = vcf_info[upload_variation]
                    # print(freq_tag)
                info = row.update_head(**freq_tag)
                fw.write('\t'.join(info.keys()) + '\n')
                fw.write('\t'.join(map(str, info.values())) + '\n')