Example #1
0
               mapping on first input
        '''
        if len(set([r[-1]
                    for r in id1])) == 1 and len(set([r[-1]
                                                      for r in id2])) == 1:
            # if this case is true you have to edit also all pos,etc,desc indices such that the plotting works correctly
            # again .. maybe it is also neccessary to test for the last two characters
            cut_last_char = lambda x: x[:-1]
            binary1.index = list(map(cut_last_char, binary1.index))
            binary2.index = list(map(cut_last_char, binary2.index))
            pos.index = list(map(cut_last_char, pos.index))
            pos2.index = list(map(cut_last_char, pos2.index))
            read_details.index = list(map(cut_last_char, read_details.index))
            read_details2.index = list(map(cut_last_char, read_details2.index))

        binary_p, binary_mis, binary_un = ht.create_paired_matrix(
            binary1, binary2)

        if binary_p.shape[0] < len(id1) * 0.1:
            print((
                "\nWARNING: Less than 10%% of reads could be paired. Consider an appropriate unpaired_weight setting "
                "in your config file (currently %.3f), because you may need to resort to using unpaired reads."
            ) % unpaired_weight)

        if unpaired_weight > 0:
            if use_discordant:
                binary = pd.concat([binary_p, binary_un, binary_mis])
            else:
                binary = pd.concat([binary_p, binary_un])
        else:
            binary = binary_p
Example #2
0
               paired-end
            2) if not test if the intersection of ID-binary1 and ID-binary2 has at least 10% of the former read
               number -> do paired-end
            3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end
               mapping on first input
        '''
        if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1:
            #if this case is true you have to edit also all pos,etc,desc indixes such that the plotting works correctly
            # again .. maybe it is also neccessary to test for the last two characters
            binary1.index = map(lambda x: x[:-1], binary1.index)
            binary2.index = map(lambda x: x[:-1], binary2.index)
            pos.index = map(lambda x: x[:-1], pos.index)
            pos2.index = map(lambda x: x[:-1], pos2.index)
            etc.index = map(lambda x: x[:-1], etc.index)
            etc2.index = map(lambda x: x[:-1], etc2.index)
            binary = ht.create_paired_matrix(binary1, binary2)
        else:
            nof_reads1 = float(len(id1))*0.1
            if float(len(id1.intersection(id2))) >= nof_reads1:
                binary =  ht.create_paired_matrix(binary1, binary2)
            else:
                print "\nCould not match paired-end pairs. Switching to single-end pipeline."
                binary = binary1
                is_paired = False
    else:
        pos, etc, desc = ht.sam_to_hdf(out_dir+"/"+date+"_0.sam", verbosity=args.verbose)
        binary = pos.applymap(bool).applymap(int)

    #dimensionality reduction and typing

    alleles_to_keep = filter(is_frequent, binary.columns)
Example #3
0
               number -> do paired-end
            3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end
               mapping on first input
        '''
        if len(set([r[-1]
                    for r in id1])) == 1 and len(set([r[-1]
                                                      for r in id2])) == 1:
            #if this case is true you have to edit also all pos,etc,desc indixes such that the plotting works correctly
            # again .. maybe it is also neccessary to test for the last two characters
            binary1.index = map(lambda x: x[:-1], binary1.index)
            binary2.index = map(lambda x: x[:-1], binary2.index)
            pos.index = map(lambda x: x[:-1], pos.index)
            pos2.index = map(lambda x: x[:-1], pos2.index)
            etc.index = map(lambda x: x[:-1], etc.index)
            etc2.index = map(lambda x: x[:-1], etc2.index)
            binary = ht.create_paired_matrix(binary1, binary2)
        else:
            nof_reads1 = float(len(id1)) * 0.1
            if float(len(id1.intersection(id2))) >= nof_reads1:
                binary = ht.create_paired_matrix(binary1, binary2)
            else:
                print "\nCould not match paired-end pairs. Switching to single-end pipeline."
                binary = binary1
                is_paired = False
    else:
        pos, etc, desc = ht.sam_to_hdf(out_dir + "/" + date + "_0.sam",
                                       verbosity=args.verbose)
        binary = pos.applymap(bool).applymap(int)

    #dimensionality reduction and typing
Example #4
0
               number -> do paired-end
            3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end
               mapping on first input
        '''
        if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1:
            # if this case is true you have to edit also all pos,etc,desc indices such that the plotting works correctly
            # again .. maybe it is also neccessary to test for the last two characters
            cut_last_char = lambda x: x[:-1]
            binary1.index = list(map(cut_last_char, binary1.index))
            binary2.index = list(map(cut_last_char, binary2.index))
            pos.index = list(map(cut_last_char, pos.index))
            pos2.index = list(map(cut_last_char, pos2.index))
            read_details.index = list(map(cut_last_char, read_details.index))
            read_details2.index = list(map(cut_last_char, read_details2.index))
            
        binary_p, binary_mis, binary_un =  ht.create_paired_matrix(binary1, binary2)

        if binary_p.shape[0] < len(id1) * 0.1:
            print(("\nWARNING: Less than 10%% of reads could be paired. Consider an appropriate unpaired_weight setting "
             "in your config file (currently %.3f), because you may need to resort to using unpaired reads.") % unpaired_weight)

        if unpaired_weight > 0:
            if use_discordant:
                binary = pd.concat([binary_p, binary_un, binary_mis])
            else:
                binary = pd.concat([binary_p, binary_un])
        else:
            binary = binary_p

    else:
        pos, read_details = ht.pysam_to_hdf(bam_paths[0])