mapping on first input ''' if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1: # if this case is true you have to edit also all pos,etc,desc indices such that the plotting works correctly # again .. maybe it is also neccessary to test for the last two characters cut_last_char = lambda x: x[:-1] binary1.index = list(map(cut_last_char, binary1.index)) binary2.index = list(map(cut_last_char, binary2.index)) pos.index = list(map(cut_last_char, pos.index)) pos2.index = list(map(cut_last_char, pos2.index)) read_details.index = list(map(cut_last_char, read_details.index)) read_details2.index = list(map(cut_last_char, read_details2.index)) binary_p, binary_mis, binary_un = ht.create_paired_matrix( binary1, binary2) if binary_p.shape[0] < len(id1) * 0.1: print(( "\nWARNING: Less than 10%% of reads could be paired. Consider an appropriate unpaired_weight setting " "in your config file (currently %.3f), because you may need to resort to using unpaired reads." ) % unpaired_weight) if unpaired_weight > 0: if use_discordant: binary = pd.concat([binary_p, binary_un, binary_mis]) else: binary = pd.concat([binary_p, binary_un]) else: binary = binary_p
paired-end 2) if not test if the intersection of ID-binary1 and ID-binary2 has at least 10% of the former read number -> do paired-end 3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end mapping on first input ''' if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1: #if this case is true you have to edit also all pos,etc,desc indixes such that the plotting works correctly # again .. maybe it is also neccessary to test for the last two characters binary1.index = map(lambda x: x[:-1], binary1.index) binary2.index = map(lambda x: x[:-1], binary2.index) pos.index = map(lambda x: x[:-1], pos.index) pos2.index = map(lambda x: x[:-1], pos2.index) etc.index = map(lambda x: x[:-1], etc.index) etc2.index = map(lambda x: x[:-1], etc2.index) binary = ht.create_paired_matrix(binary1, binary2) else: nof_reads1 = float(len(id1))*0.1 if float(len(id1.intersection(id2))) >= nof_reads1: binary = ht.create_paired_matrix(binary1, binary2) else: print "\nCould not match paired-end pairs. Switching to single-end pipeline." binary = binary1 is_paired = False else: pos, etc, desc = ht.sam_to_hdf(out_dir+"/"+date+"_0.sam", verbosity=args.verbose) binary = pos.applymap(bool).applymap(int) #dimensionality reduction and typing alleles_to_keep = filter(is_frequent, binary.columns)
number -> do paired-end 3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end mapping on first input ''' if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1: #if this case is true you have to edit also all pos,etc,desc indixes such that the plotting works correctly # again .. maybe it is also neccessary to test for the last two characters binary1.index = map(lambda x: x[:-1], binary1.index) binary2.index = map(lambda x: x[:-1], binary2.index) pos.index = map(lambda x: x[:-1], pos.index) pos2.index = map(lambda x: x[:-1], pos2.index) etc.index = map(lambda x: x[:-1], etc.index) etc2.index = map(lambda x: x[:-1], etc2.index) binary = ht.create_paired_matrix(binary1, binary2) else: nof_reads1 = float(len(id1)) * 0.1 if float(len(id1.intersection(id2))) >= nof_reads1: binary = ht.create_paired_matrix(binary1, binary2) else: print "\nCould not match paired-end pairs. Switching to single-end pipeline." binary = binary1 is_paired = False else: pos, etc, desc = ht.sam_to_hdf(out_dir + "/" + date + "_0.sam", verbosity=args.verbose) binary = pos.applymap(bool).applymap(int) #dimensionality reduction and typing
number -> do paired-end 3) if nothing worked ( perhaps pair-end ID was in the middle or something) raise flag and do single-end mapping on first input ''' if len(set([r[-1] for r in id1])) == 1 and len(set([r[-1] for r in id2])) == 1: # if this case is true you have to edit also all pos,etc,desc indices such that the plotting works correctly # again .. maybe it is also neccessary to test for the last two characters cut_last_char = lambda x: x[:-1] binary1.index = list(map(cut_last_char, binary1.index)) binary2.index = list(map(cut_last_char, binary2.index)) pos.index = list(map(cut_last_char, pos.index)) pos2.index = list(map(cut_last_char, pos2.index)) read_details.index = list(map(cut_last_char, read_details.index)) read_details2.index = list(map(cut_last_char, read_details2.index)) binary_p, binary_mis, binary_un = ht.create_paired_matrix(binary1, binary2) if binary_p.shape[0] < len(id1) * 0.1: print(("\nWARNING: Less than 10%% of reads could be paired. Consider an appropriate unpaired_weight setting " "in your config file (currently %.3f), because you may need to resort to using unpaired reads.") % unpaired_weight) if unpaired_weight > 0: if use_discordant: binary = pd.concat([binary_p, binary_un, binary_mis]) else: binary = pd.concat([binary_p, binary_un]) else: binary = binary_p else: pos, read_details = ht.pysam_to_hdf(bam_paths[0])