def checkRefOverlap(sample_copy, ref_copy, sample_frame): sample_copy_py, ref_copy_py = PyRanges(sample_copy), PyRanges(ref_copy) sample_copy_py.Length, ref_copy_py.Length_b = sample_copy_py.lengths( ), ref_copy_py.lengths() overlap = sample_copy_py.join(ref_copy_py) #overlap = PyRanges(sample_copy).join(PyRanges(ref_copy)) #print(overlap.head()) overlap_frame = reciprocal_overlap(overlap) if overlap_frame.empty: filtered_sample_frame = sample_frame else: common = sample_copy.merge(overlap_frame, on=['ID']) filtered_sample_frame = sample_frame[( ~sample_frame.ID.isin(common.ID))] return filtered_sample_frame
def checkParentsOverlap(sample_copy, parent_copy, filtered_sample_frame, args, inheritance): if args.type == 'singleton' or ( args.type == 'duo' and inheritance == 'Found_in_Father' and args.mother_duo) or (args.type == 'duo' and inheritance == 'Found_in_Mother' and args.father_duo): # Initialize columns and set to -1 if parents file not provided filtered_sample_frame[inheritance] = 'None' return filtered_sample_frame colnames = [ 'CHROM', 'POS', 'ID', 'REF', "ALT_1", 'ALT_2', "ALT_3", "QUAL", 'FILTER_PASS', "END", "SVLEN" ] sample_copy_py, parent_copy_py = PyRanges(sample_copy), PyRanges( parent_copy) sample_copy_py.Length, parent_copy_py.Length_b, = sample_copy_py.lengths( ), parent_copy_py.lengths() overlap_sample_parent = sample_copy_py.join(parent_copy_py) denovo_parent_frame = reciprocal_overlap(overlap_sample_parent)[colnames] if denovo_parent_frame.empty: filtered_sample_frame[inheritance] = 'False' else: parent_filtered_sample_frame = pd.merge(filtered_sample_frame, denovo_parent_frame, on=None, how='left', indicator=inheritance) parent_filtered_sample_frame[inheritance] = np.where( parent_filtered_sample_frame[inheritance] == 'both', 'True', 'False') filtered_sample_frame = parent_filtered_sample_frame.drop_duplicates( ).reset_index(drop=True) return filtered_sample_frame