Beispiel #1
0
    def _iterate_vcf(self, vcf_ittr, distin_dict, reg):
        """
        """

        pick_mode = distin_dict['pick_mode']
        # 辞書のキーが0。名前の文字列を示している。
        gr_list = [distin_dict[0], distin_dict[1]]
        log.info("gr_list {}.".format(gr_list))

        # At first, we check difference of genotype between two sample
        # that described at the beginning of each group
        top_smpl_list = [
            glv.conf.g_members_dict[gr_list[0]][0],
            glv.conf.g_members_dict[gr_list[1]][0]
        ]
        log.info("top_smpl_list {}.".format(top_smpl_list))

        # ================================================================
        start = time.time()
        # write out to file
        out_txt_file = distin_dict['variant']['out_path']
        utl.save_to_tmpfile(out_txt_file)

        # ここがparallele化できるか
        # f.writeの最後のflash必要か。
        with open(out_txt_file, mode='a') as f:

            # write header
            f.write("{}\n".format(distin_dict['variant']['hdr_text']))

            # access to vcf using iterater
            for record in vcf_ittr:

                # 1. Skip same GT between top two sample
                if self._skip_same_GT_between_top2sample(
                        record, top_smpl_list) > 0:
                    continue

                # 2. Check GT in your own group
                if self._skip_different_GT_in_own_group(
                        record, top_smpl_list, gr_list) > 0:
                    continue

                # 3. Select different allele combination among 2x2 allele
                asel = AlleleSelect()
                asel.select_diff_allele(record, top_smpl_list, gr_list)

                # skip if pick_mode is different
                #                if utl.is_my_pick_mode(
                #                    asel.var_type, distin_dict['pick_mode']) != True:
                #                    continue

                # 4. Save variant information as text file
                for var_type, line in zip(asel.var_types, asel.lines):
                    if utl.is_my_pick_mode(var_type,
                                           distin_dict['pick_mode']) == True:
                        f.write("{}\n".format(line))

        log.info("variant {} {}".format(utl.elapsed_time(time.time(), start),
                                        distin_dict['variant']['base_nam']))
Beispiel #2
0
    def _iterate_vcf(self, vcf_ittr, distin_dict, proc_cnt):
        """
        """

        # basic informations
        gr_list = [distin_dict[0], distin_dict[1]]

        reg = distin_dict['region']
        reg_dict = glv.conf.regions_dict[reg]
        pick_mode = distin_dict['pick_mode']
        indel_size = distin_dict['indel_size']
        min_indel_len, max_indel_len = \
            [int(i) for i in indel_size.split('-')]

        # At first, we check difference of genotype between two sample
        # that described at the beginning of each group
        top_smpl_list = [
            glv.conf.group_members_dict[gr_list[0]][0],
            glv.conf.group_members_dict[gr_list[1]][0]
        ]

        # logging current target
        utl.print_distin_info("variant", distin_dict, proc_cnt)

        start = time.time()

        # File name to export variant
        out_txt_file = distin_dict['variant']['out_path']

        utl.save_to_tmpfile(out_txt_file)

        #------------------------------------------------------
        # To add an allele_int column for all sample
        # Members of the specified group come first
        # gr0:s1 g0:s2 g0:s3 g1:s4 g1:s5 g1:s6 s7 s8 s9 s10

        sample_nickname_ordered_list, \
        sample_fullname_ordered_list = \
            utl.get_ordered_sample_list(gr_list)

        sample_added_header = "{}\t{}".format(
            distin_dict['variant']['hdr_text'],
            "\t".join(sample_nickname_ordered_list))

        # Can I parallelize here?
        with open(out_txt_file, mode='a') as f:

            # write sample added header
            f.write("{}\n".format(sample_added_header))

            # access to vcf using iterater
            for record in vcf_ittr:

                # 1. Skip same GT between top two sample
                if self._skip_same_GT_between_top2sample(
                        record, top_smpl_list) > 0:
                    continue

                # 2. Check GT in your own group
                if self._skip_different_GT_in_own_group(
                        record, top_smpl_list, gr_list) > 0:
                    continue

                # 3. Select different allele combination among 2x2 allele
                asel = AlleleSelect(min_indel_len, max_indel_len)
                asel.select_diff_allele(record, top_smpl_list, gr_list)

                # from record, construct allele_int of the member
                # who is paying attention
                allele_int_line = ""

                # 4. Save variant information as text file
                for var_type, line in zip(asel.var_types, asel.lines):
                    if utl.is_my_pick_mode(var_type,
                                           distin_dict['pick_mode']) == True:

                        # make allele_int line
                        if allele_int_line == "":
                            #self._get_ai_line(
                            allele_int_line = \
                                self._get_allele_line(
                                    record, sample_fullname_ordered_list)

                        # add allele line
                        f.write("{}\t{}\n".format(line, allele_int_line))

        log.info("variant {} > {}.txt\n".format(
            utl.elapsed_time(time.time(), start),
            distin_dict['variant']['base_nam']))