Beispiel #1
0
    def test_merge(self):

        for merge_type in ("min", "max", "flat"):
            ordered_bed_list = [
                gqltools.load_file("../data/a.bed", "auto"),
                gqltools.load_file("../data/b.bed", "auto"),
                gqltools.load_file("../data/d.bed", "auto"),
            ]

            mods = {}

            R = gqltools.merge_beds(merge_type, ordered_bed_list, mods)

            self.assertEqual(type(R), gqltypes.BED3)

            was_error = False
            try:
                mods = {"distance": 10}
                R = gqltools.merge_beds("flat", ordered_bed_list, mods)
            except Exception:
                was_error = True
            self.assertTrue(was_error)

            was_error = False
            try:
                mods = {"name": "OTHER"}
                R = gqltools.merge_beds("flat", ordered_bed_list, mods)
            except Exception:
                was_error = True
            self.assertTrue(was_error)

            for func in ["MIN", "MAX", "SUM", "MEAN", "MEDIAN", "MODE", "ANITMODE", "COLLAPSE", "COUNT"]:
                mods = {"score": func}
                R = gqltools.merge_beds(merge_type, ordered_bed_list, mods)
                self.assertEqual(type(R), gqltypes.BED6)

            ordered_bed_list = [gqltools.load_file("../data/tiny*.bed", "auto")]

            for func in ["MIN", "MAX", "SUM", "MEAN", "MEDIAN", "MODE", "ANITMODE", "COLLAPSE", "COUNT"]:
                mods = {"score": func}
                R = gqltools.merge_beds(merge_type, ordered_bed_list, mods)
                self.assertEqual(type(R), gqltypes.BED6)
Beispiel #2
0
def eval_exp(exp, env):
    # print exp # DEBUG
    etype = exp[0]

    # {{{ if etype == "identifier":
    if etype == "identifier":
        # print exp #Debug
        # ('identifier', 'b')
        vname = exp[1]
        value = env_lookup(vname, env)
        if value == None:
            # print "ERROR: unbound variable " + vname
            raise InterpException("Unbound variable " + vname, "identifier")
        else:
            return value
            # }}}

            # {{{ if etype == 'load':
    if etype == "load":
        # ('load',
        # 	('file', ('string', 'file0')),
        # 	('filetype', 'GENOME'))

        file_path = eval_exp(exp[1], env)

        # filetype_name = eval_exp(exp[2], env)
        filetype_name = "auto"

        # make sure the type is correct
        return gqltools.load_file(file_path, filetype_name)
        # }}}

        # {{{ if etype == 'cast':
    if etype == "cast":
        # print exp
        # ('cast', ('identifier', 'a'), ('filetype', 'BED6'), [])
        ident = exp[1]
        bedx = eval_exp(ident, env)
        new_type = eval_exp(exp[2], env)

        return gqltools.cast(bedx, new_type)
        # }}}

        # {{{ if etype == 'complement':
    if etype == "complement":
        # print exp
        # ('complement', ('identifier', 'a'), ('identifier', 'g'))
        # ('complement', ('identifier', 'a'), 'hg19')
        target_bed = eval_exp(exp[1], env)
        genome = exp[2]
        if not type(genome) is str:
            genome = eval_exp(genome, env)

        return gqltools.complement_bedx(target_bed, genome)

        # return gqltools.cast(bedx,  new_type)
        # }}}

        # {{{ elif etype == 'binary-intersect':
    elif etype == "binary-intersect":
        # print exp
        # ('binary-intersect',
        # 	[('identifier', 'a')],
        # 	[('identifier', 'b'), ('identifier', 'c'), ('identifier', 'd')])
        idents = exp[1]
        n_bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            n_bedfiles = n_bedfiles + [bedx]

        n_labels = []
        for ident in idents:
            n_labels = n_labels + [ident[1]]

        idents = exp[2]
        m_bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            m_bedfiles = m_bedfiles + [bedx]

        m_labels = []
        for ident in idents:
            m_labels = m_labels + [ident[1]]

        return gqltools.binary_intersect_beds(n_bedfiles, n_labels, m_bedfiles, m_labels)

        # }}}

        # {{{ elif etype == 'binary-jaccard':
    elif etype == "binary-jaccard":
        # print exp
        # ('binary-intersect',
        # 	[('identifier', 'a')],
        # 	[('identifier', 'b'), ('identifier', 'c'), ('identifier', 'd')])
        idents = exp[1]
        n_bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            n_bedfiles = n_bedfiles + [bedx]

        n_labels = []
        for ident in idents:
            n_labels = n_labels + [ident[1]]

        idents = exp[2]
        m_bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            m_bedfiles = m_bedfiles + [bedx]

        m_labels = []
        for ident in idents:
            m_labels = m_labels + [ident[1]]

        return gqltools.binary_jaccard_beds(n_bedfiles, n_labels, m_bedfiles, m_labels)

        # }}}

        # {{{ elif etype == 'unary-intersect':
    elif etype == "unary-intersect":
        # ('intersect',
        # 	[('identifier', 'a'), ('identifier', 'b')])
        idents = exp[1]
        bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            bedfiles = bedfiles + [bedx]

        labels = []
        for ident in idents:
            labels = labels + [ident[1]]

        return gqltools.unary_intersect_beds(bedfiles, labels)
        # }}}

        # {{{ elif etype == 'subtract':
    elif etype == "subtract":
        # ('subtract',
        # 	[('identifier', 'a'), ('identifier', 'b')])
        ident = exp[1]
        bedfile = eval_exp(ident, env)
        idents = exp[2]
        bedfiles = []
        for ident in idents:
            bedfiles = bedfiles + [eval_exp(ident, env)]

        return gqltools.subtract_beds(bedfile, bedfiles)
        # }}}

        # {{{ elif etype == 'mergemin':
    elif etype == "mergemin":
        # print exp
        ident_list = exp[1]
        if len(ident_list) == 1:
            bednfile = eval_exp(ident, env)
            result_file = gqltools.mergemin_bedn(bednfile)

        return result_file

        # }}}

        # {{{ elif etype == 'merge':
    elif etype == "merge":
        # print exp # Debug
        # ('merge',
        # 	[('identifier', 'a'), ('identifier', 'b'), ('identifier', 'c')],
        # 	('score', ('function', 'MIN'), None))
        # or
        # ('merge',
        # 	[('identifier', 'a'), ('identifier', 'b'), ('identifier', 'c')],
        # 	None)
        merge_type = exp[1]
        idents = exp[2]
        modifiers = exp[3]
        bedfiles = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            bedfiles = bedfiles + [bedx]

        mods = {}
        for modifier in modifiers:
            modifier_type = modifier[0]

            if modifier_type in mods:
                raise InterpException("Multiple definitions of " + modifier_type + ".")

            mods[modifier_type] = eval_exp(modifier[1], env)

        return gqltools.merge_beds(merge_type, bedfiles, mods)
        # }}}

        # {{{ elif etype == 'filter':
    elif etype == "filter":
        # print exp # Debug
        # ('filter',
        # 	[	('identifier', 'a'),
        # 		('identifier', 'b'),
        # 		('identifier', 'c'),
        # 		('identifier', 'd')],
        # 	[   ('score', ('function', 'BOOL',
        # 			[   ('compare', ('<', ('number', 100.0))),
        # 				('conj', '&'),
        # 				('compare', ('>', ('number', 50.0))),
        # 				('conj', '&'),
        # 				('compare', ('!=', ('number', 75.0)))
        # 			])
        # 		),
        # 		('start', ('function', 'BOOL',
        # 			[   ('compare', ('>', ('number', 1000.0)))
        # 			])
        # 		),
        # 		('end', ('function', 'BOOL',
        # 			[   ('compare', ('<', ('number', 10000.0)))
        # 			])
        # 		),
        # 		('chrom', ('function', 'BOOL',
        # 			[ ('compare', ('==', ('string', 'chr1')))
        # 			])
        # 		)
        # 	]
        # )

        idents = exp[1]
        modifiers = exp[2]
        bedxs = []
        for ident in idents:
            bedx = eval_exp(ident, env)
            bedxs = bedxs + [bedx]

        mods = {}
        for modifier in modifiers:
            modifier_type = modifier[0]

            if modifier_type in mods:
                raise InterpException("Multiple definitions of " + modifier_type + ".")

            function = eval_exp(modifier[1], env)
            if function[0] != "BOOL":
                raise InterpException("Unsupported function type in FOREACH: " + function)
            bool_funcs = []
            for element in function[1]:
                bool_func = []
                if element[0] == "compare":
                    op = element[1][0]
                    val = element[1][1]
                    if val[0] == "number":
                        bool_func = [op, val[1]]
                    elif val[0] == "string":
                        bool_func = [op, '"' + val[1] + '"']
                    else:
                        raise InterpException("Unsupported value type in boolean " + "function in FOREACH.")
                elif element[0] == "conj":
                    conj = element[1][0]
                    bool_func = [conj]
                else:
                    raise InterpException("Error in boolean function in FOREACH.")
                bool_funcs.append(bool_func)

            mods[modifier_type] = bool_funcs

        return gqltools.filter_bedx(bedxs, mods)
        # }}}

        # {{{elif etype == 'count':
    elif etype == "count":

        ident = eval_exp(exp[1], env)

        return gqltools.count(ident)

        # }}}

        # {{{ simple rules
    elif etype == "file":
        #   ('file', ('string', 'file0'))
        return eval_exp(exp[1], env)

    elif etype == "filetype":
        # 	('filetype', 'GENOME'))
        return gqltypes.source_type_map[exp[1]]

    elif etype == "string":
        #   ('string', 'file0')
        return exp[1]

    elif etype == "number":
        return float(exp[1])

    elif etype == "function":
        if len(exp) == 2:
            return exp[1]
        else:
            return exp[1:]

    else:
        print "ERROR: unknown expression type ",
        print etype
    return None