def genomic_upset(options, label_names): ''' Arguments: input_files - List of BED files to to calculate the weights output - output path label_names - names of input files Takes a list of sets a list of the sizes of non-overlapping intersections between them ''' input_files = options.input output = options.output kwargs = hlp.map_bedtools_options(options.bedtools_options) N = len(input_files) # Generate a truth table of intersections to calculate truth_table = [x for x in itertools.product("01", repeat=N)][1:] weights = {} for t in truth_table: ones = [BedTool(input_files[i]) for i in range(N) if t[i] == '1'] zeros = [BedTool(input_files[i]) for i in range(N) if t[i] == '0'] #report those entries in set A which do ovelap with other sets x = ones[0] if len(ones) > 1: for bed in ones[1:]: x = x.intersect(bed, u=True, **kwargs) #report those entries in set A which doesn't ovelap with other sets if len(zeros) > 0: #y = zeros[0] for bed in zeros[0:]: x = x.intersect(bed, v=True, **kwargs) X = (x).count() weights[''.join(t)] = X #save the intersected results if options.saveoverlaps: if X >= options.overlapthresh: file_name = '' name_itr = 0 for name in t: if name == '1': file_name += '_' + label_names[name_itr] name_itr += 1 file_name = ''.join(t) + file_name hlp.create_dir(output + '/sets') x.moveto(output + '/sets/' + file_name + '.bed') #delete all temp files helpers.cleanup() return (weights)
def list_upset(options, label_names): ''' Arguments: input_files - List of list files to calculate weights for upset plot output - output path label_names - names of input files Takes a list of sets a list of the sizes of non-overlapping intersections between them ''' input_files = options.input output = options.output S = [] for f in input_files: with open(f) as f_open: S.append(set(f_open.read().splitlines())) N = len(S) # Generate a truth table of intersections to calculate truth_table = [x for x in itertools.product("01", repeat=N)][1:] weights = {} for t in truth_table: ones = [S[i] for i in range(N) if t[i] == '1'] zeros = [S[i] for i in range(N) if t[i] == '0'] X = set.intersection(*ones) X.difference_update(*zeros) weights[''.join(t)] = len(X) #save the intersected results if options.saveoverlaps: if len(X) >= options.overlapthresh: file_name = '' name_itr = 0 for name in t: if name == '1': file_name += '_' + label_names[name_itr] name_itr += 1 file_name = ''.join(t) + file_name hlp.create_dir(output + '/sets') inter_file = open(output + '/sets/' + file_name + '.txt', 'w') inter_file.writelines('\n'.join(list(X))) inter_file.close() return (weights)