Beispiel #1
0
def genomic_upset(options, label_names):
    '''
    Arguments:
        input_files -  List of BED files to to calculate the weights
        output - output path
        label_names - names of input files
    Takes a list of sets a list of the sizes of non-overlapping intersections between them 
    '''

    input_files = options.input
    output = options.output

    kwargs = hlp.map_bedtools_options(options.bedtools_options)

    N = len(input_files)

    # Generate a truth table of intersections to calculate
    truth_table = [x for x in itertools.product("01", repeat=N)][1:]

    weights = {}

    for t in truth_table:
        ones = [BedTool(input_files[i]) for i in range(N) if t[i] == '1']
        zeros = [BedTool(input_files[i]) for i in range(N) if t[i] == '0']
        #report those entries in set A which do ovelap with other sets
        x = ones[0]
        if len(ones) > 1:
            for bed in ones[1:]:
                x = x.intersect(bed, u=True, **kwargs)
        #report those entries in set A which doesn't ovelap with other sets
        if len(zeros) > 0:
            #y = zeros[0]
            for bed in zeros[0:]:
                x = x.intersect(bed, v=True, **kwargs)
        X = (x).count()
        weights[''.join(t)] = X

        #save the intersected results
        if options.saveoverlaps:
            if X >= options.overlapthresh:
                file_name = ''
                name_itr = 0
                for name in t:
                    if name == '1':
                        file_name += '_' + label_names[name_itr]
                    name_itr += 1
                file_name = ''.join(t) + file_name
                hlp.create_dir(output + '/sets')
                x.moveto(output + '/sets/' + file_name + '.bed')

        #delete all temp files
        helpers.cleanup()

    return (weights)
Beispiel #2
0
def list_upset(options, label_names):
    '''
    Arguments:
        input_files -  List of list files to calculate weights for upset plot
        output - output path
        label_names - names of input files
    Takes a list of sets a list of the sizes of non-overlapping intersections between them 
    '''
    input_files = options.input
    output = options.output
    S = []
    for f in input_files:
        with open(f) as f_open:
            S.append(set(f_open.read().splitlines()))
    N = len(S)
    # Generate a truth table of intersections to calculate
    truth_table = [x for x in itertools.product("01", repeat=N)][1:]
    weights = {}
    for t in truth_table:
        ones = [S[i] for i in range(N) if t[i] == '1']
        zeros = [S[i] for i in range(N) if t[i] == '0']
        X = set.intersection(*ones)
        X.difference_update(*zeros)
        weights[''.join(t)] = len(X)

        #save the intersected results
        if options.saveoverlaps:
            if len(X) >= options.overlapthresh:
                file_name = ''
                name_itr = 0
                for name in t:
                    if name == '1':
                        file_name += '_' + label_names[name_itr]
                    name_itr += 1
                file_name = ''.join(t) + file_name
                hlp.create_dir(output + '/sets')
                inter_file = open(output + '/sets/' + file_name + '.txt', 'w')
                inter_file.writelines('\n'.join(list(X)))
                inter_file.close()

    return (weights)