Beispiel #1
0
def test_selection_from_bedfile_and_subselection(conn):
    """Test the creation of a selection based on BED data

    .. note:: Please note that the bedreader **is not** tested here!
    """
    larger_string = """
        chr1 1    10   feature1  0 +
        chr1 50   60   feature2  0 -
        chr1 51 59 another_feature 0 +
    """
    # According to VARIANTS global variable with 3 variants (pos 10, 50 and 45)
    # 1: chr1, pos 1 to 10 => 1 variant concerned (pos 10)
    # 2: chr1, pos 50 to 60 => 1 variant concerned (pos 50)
    # 3: chr1, pos 51 to 59 => 0 variants
    bedtool = BedReader(larger_string)

    # Create now a sub selection => 2 variants (pos 10, 45)
    query = "SELECT variants.id,chr,pos,ref,alt FROM variants WHERE ref='G'"
    set_A_id = sql.create_selection_from_sql(conn, query, "setA", count=None)
    # 2nd selection (1st is the default "variants")
    assert set_A_id == 2
    assert "setA" in list(s["name"] for s in sql.get_selections(conn))

    # 1: chr1, pos 1 to 10 => 1 remaining variant
    # 2: chr1, pos 50 to 60 => 0 variant
    # 3: chr1, pos 51 to 59 => 0 variant
    ret = sql.create_selection_from_bed(conn, "setA", "sub_bedname", bedtool)
    # id of selection
    assert ret == 3

    data = conn.execute(
        "SELECT * FROM selection_has_variant WHERE selection_id = ?", (ret, ))
    expected = ((1, ret), )
    record = tuple([tuple(i) for i in data])
    assert record == expected
Beispiel #2
0
    def _fill_completer(self):
        """Create Completer with his model

        Fill the model with the SQL keywords and database fields
        """
        # preload samples , selection and wordset
        samples = [i["name"] for i in sql.get_samples(self.conn)]
        selections = [i["name"] for i in sql.get_selections(self.conn)]
        wordsets = [i["name"] for i in sql.get_wordsets(self.conn)]

        # keywords = []
        self.text_edit.completer.model.clear()
        self.text_edit.completer.model.beginResetModel()

        # register keywords
        for keyword in self.text_edit.syntax.sql_keywords:
            self.text_edit.completer.model.add_item(keyword, "VQL keywords",
                                                    FIcon(0xF0169), "#f6ecf0")

        for selection in selections:
            self.text_edit.completer.model.add_item(selection, "Source table",
                                                    FIcon(0xF04EB), "#f6ecf0")

        for wordset in wordsets:
            self.text_edit.completer.model.add_item(f"WORDSET['{wordset}']",
                                                    "WORDSET", FIcon(0xF04EB),
                                                    "#f6ecf0")

        for field in sql.get_fields(self.conn):
            name = field["name"]
            description = "<b>{}</b> ({}) from {} <br/><br/> {}".format(
                field["name"], field["type"], field["category"],
                field["description"])
            color = style.FIELD_TYPE.get(field["type"], "str")["color"]
            icon = FIcon(
                style.FIELD_TYPE.get(field["type"], "str")["icon"], "white")

            if field["category"] == "variants" or field[
                    "category"] == "annotations":
                self.text_edit.completer.model.add_item(
                    name, description, icon, color)

            if field["category"] == "samples":
                # Overwrite name
                for sample in samples:
                    name = "sample['{}'].{}".format(sample, field["name"])
                    description = "<b>{}</b> ({}) from {} {} <br/><br/> {}".format(
                        field["name"],
                        field["type"],
                        field["category"],
                        sample,
                        field["description"],
                    )
                    self.text_edit.completer.model.add_item(
                        name, description, icon, color)

        self.text_edit.completer.model.endResetModel()
Beispiel #3
0
    def load(self):
        """Load all selections into the model"""

        if self.conn is None:
            return

        self.beginResetModel()
        # Add all selections from the database
        # Dictionnary of all attributes of the table.
        #    :Example: {"name": ..., "count": ..., "query": ...}
        self.records = list(sql.get_selections(self.conn))
        self.endResetModel()
Beispiel #4
0
def test_selection_from_bedfile(conn):
    """Test the creation of a selection based on BED data

    .. note:: Please note that the bedreader **is not** tested here!
    """

    larger_string = """
        chr1 1    10   feature1  0 +
        chr1 50   60   feature2  0 -
        chr1 51 59 another_feature 0 +
    """
    # According to VARIANTS global variable with 3 variants (pos 10, 50 and 45)
    # 1: chr1, pos 1 to 10 => 1 variant concerned (pos 10)
    # 2: chr1, pos 50 to 60 => 1 variant concerned (pos 50)
    # 3: chr1, pos 51 to 59 => 0 variants

    bedtool = BedReader(larger_string)

    # Create a new selection (a second one, since there is a default one during DB creation)
    selection_name = "bedname"
    ret = sql.create_selection_from_bed(conn, "variants", selection_name,
                                        bedtool)

    # Test last id of the selection
    assert ret == 2

    # Query the association table (variant_id, selection_id)
    data = conn.execute(
        "SELECT * FROM selection_has_variant WHERE selection_id = ?", (ret, ))
    # 2 variants (see above)
    # format: [(id variant, id selection),]
    expected = ((1, ret), (2, ret))
    record = tuple([tuple(i) for i in data])

    # Is the association table 'selection_has_variant' ok ?
    print("record:", record)
    assert record == expected

    bed_selection = [
        s for s in sql.get_selections(conn) if s["name"] == selection_name
    ][0]
    print("selection content", bed_selection)
    assert bed_selection["name"] == selection_name
    assert bed_selection["count"] == 2  # 2 variants retrieved
Beispiel #5
0
    def _create_completer(self):
        """Create Completer with his model

        Fill the model with the SQL keywords and database fields
        """
        model = QStringListModel()
        completer = QCompleter()

        keywords = []
        samples = [i["name"] for i in sql.get_samples(self.conn)]
        selections = [i["name"] for i in sql.get_selections(self.conn)]
        for field in sql.get_fields(self.conn):
            if field["category"] == "samples":
                for sample in samples:
                    keywords.append("sample['{}'].{}".format(sample, field["name"]))
            else:
                keywords.append(field["name"])

        keywords.extend(VqlSyntaxHighlighter.sql_keywords)
        keywords.extend(selections)
        model.setStringList(keywords)
        completer.setModel(model)
        return completer
Beispiel #6
0
def test_sql_selection_operation(conn):
    """Test set operations on selections using SQL API

    .. Todo:: Only union is tested here test intersect and expect
        (intersect is tested in test_command)
    """
    cursor = conn.cursor()

    # Query the first default selection
    all_selection = cursor.execute("SELECT * FROM selections").fetchone()

    # {'id': 1, 'name': 'variants', 'count': 3, 'query': ''}
    print("all", dict(all_selection))
    # index 0: id in db
    assert all_selection[1] == "variants"
    assert all_selection[2] == len(VARIANTS)

    # Create a selection from sql
    query = "SELECT id, chr, pos FROM variants where alt = 'A' "
    sql.create_selection_from_sql(conn, query, "test")

    # check if selection has been created
    assert "test" in [record["name"] for record in sql.get_selections(conn)]

    # Check if selection of variants returns same data than selection query
    selection_id = 2
    insert_data = cursor.execute(query).fetchall()

    read_data = cursor.execute(f"""
        SELECT variants.id, variants.chr, variants.pos FROM variants
        INNER JOIN selection_has_variant sv ON variants.rowid = sv.variant_id AND sv.selection_id = {selection_id}
        """).fetchall()

    # set because, it can contains duplicate variants
    assert set(read_data) == set(insert_data)

    # TEST Unions
    query1 = "SELECT id, chr, pos FROM variants where alt = 'A' "  # 2 variants
    query2 = "SELECT id, chr, pos FROM variants where alt = 'C' "  # 1 variant

    union_query = sql.union_variants(query1, query2)
    print(union_query)
    selection_id = sql.create_selection_from_sql(conn, union_query, "union_GT")
    print("union_GT selection id: ", selection_id)
    assert selection_id is not None
    record = cursor.execute(
        f"SELECT id, name FROM selections WHERE name = 'union_GT'").fetchone()
    print("Found record:", dict(record))
    selection_id = record[0]
    selection_name = record[1]
    assert selection_id == 3  # test if selection id equal 2 ( the first is "variants")
    assert selection_name == "union_GT"

    # Select statement from union_GT selection must contains only variant.alt G or T
    records = cursor.execute(f"""
        SELECT variants.chr, variants.pos, variants.ref, variants.alt FROM variants
        INNER JOIN selection_has_variant sv ON variants.rowid = sv.variant_id AND sv.selection_id = {selection_id}
        """).fetchall()

    # {'chr': 'chr1', 'pos': 10, 'ref': 'G', 'alt': 'A'}
    # {'chr': 'chr1', 'pos': 50, 'ref': 'C', 'alt': 'C'}
    # {'chr': 'chr1', 'pos': 45, 'ref': 'G', 'alt': 'A'}
    for found_variants, record in enumerate(records, 1):
        print(dict(record))
        assert record["alt"] in ("A", "C")

    assert found_variants == 3
Beispiel #7
0
def main():
    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter(prog),
        description="""
Cutevariant cli mode helps to run actions directly from command-line.\n
The env variable $CUTEVARIANT_DB can be used to define a database instead of
the arguments.""",
        epilog="""Examples:

    $ cutevariant-cli show --db my_database.db samples
    or
    $ export CUTEVARIANT_DB=my_database.db
    $ cutevariant-cli show samples""",
    )
    # Default log level: critical
    parser.add_argument(
        "-vv",
        "--verbose",
        nargs="?",
        default="error",
        choices=["debug", "info", "critical", "error", "warning"],
    )

    sub_parser = parser.add_subparsers(dest="subparser")

    # Common parser: Database file requirement #################################
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument(
        "--db", help="SQLite database. By default, $CUTEVARIANT_DB is used."
    )

    # Create DB parser #########################################################
    createdb_parser = sub_parser.add_parser(
        "createdb",
        help="Build a SQLite database from a vcf file",
        parents=[parent_parser],
        epilog="""Examples:

        $ cutevariant-cli createdb -i "examples/test.snpeff.vcf"
        """,
    )
    createdb_parser.add_argument("-i", "--input", help="VCF file path", required=True)

    # Show parser ##############################################################
    show_parser = sub_parser.add_parser(
        "show", help="Display table content", parents=[parent_parser]
    )
    show_parser.add_argument(
        "table",
        choices=["fields", "selections", "samples", "wordsets"],
        help="Possible names of tables.",
    )

    # Remove parser ############################################################
    remove_parser = sub_parser.add_parser(
        "remove", help="remove selection", parents=[parent_parser]
    )
    remove_parser.add_argument("names", nargs="+", help="Name(s) of selection(s).")

    # VQL parser ###############################################################
    select_parser = sub_parser.add_parser(
        "exec",
        help="Execute a VQL statement.",
        parents=[parent_parser],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""Examples:

    $ cutevariant-cli exec "SELECT favorite,chr,pos,ref,alt FROM variants"
    or
    $ cutevariant-cli exec "SELECT chr,ref,alt FROM variants" -s myselection
    or
    $ cutevariant-cli exec "IMPORT WORDSETs 'examples/gene.txt' AS mygenes"
    or
    $ cutevariant-cli exec "DROP WORDSETS mygenes"
    or
    $ cutevariant-cli exec "CREATE myselection1 FROM variants WHERE gene = 'CHID1'"
    $ cutevariant-cli exec "CREATE myselection2 FROM variants WHERE gene = 'CICP23'"
    $ cutevariant-cli exec "CREATE myselection3 = myselection2 | myselection2"
    or
    $ cutevariant-cli exec "CREATE boby FROM variants INTERSECT 'examples/test.bed'"
    """,
    )
    select_parser.add_argument("vql", help="A VQL statement.")
    select_parser.add_argument(
        "-l",
        "--limit",
        help="Limit the number of lines in output.",
        type=int,
        default=100,
    )
    # select_parser.add_argument(
    #     "-g",
    #     "--group",
    #     action="store_true",
    #     help="Group SELECT query by...(chr,pos,ref,alt).",
    # )
    select_parser.add_argument(
        "-s", "--to-selection", help="Save SELECT query into a selection name."
    )

    # Set parser ###############################################################
    # set_parser = sub_parser.add_parser("set", help="Set variable", parents=[parent_parser])

    # Workaround for sphinx-argparse module that require the object parser
    # before the call of parse_args()
    if "html" in sys.argv:
        return parser

    args = parser.parse_args()

    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)

    # Set log level
    # _logger.setLevel(logging.DEBUG)
    log_level(args.verbose)

    # Create DB parser #########################################################
    if args.subparser == "createdb":
        if not args.db:
            # Database file is not set:
            # The output file will be based on the name of the VCF one
            args.db = args.input + ".db"

        if os.path.exists(args.db):
            # Remove existing file
            os.remove(args.db)

        conn = sql.get_sql_connection(args.db)
        if conn:
            # TODO: bug ... max is not 100...
            for i, message in progressbar.progressbar(
                async_import_file(conn, args.input), redirect_stdout=True
            ):
                print(message)

        print("The database is successfully created!")
        exit()

    # Prepare SQL connection on DB file
    if "CUTEVARIANT_DB" in os.environ and args.subparser != "createdb":
        args.db = os.environ["CUTEVARIANT_DB"]
    elif not args.db:
        print("You must specify a database file via $CUTEVARIANT_DB or --db argument")
        exit()

    # Init SQL connection
    conn = sql.get_sql_connection(args.db)

    # Show parser ##############################################################
    if args.subparser == "show":
        if args.table == "fields":
            display_sql_results(
                (i.values() for i in sql.get_fields(conn)),
                ["id", "name", "table", "type", "description"],
            )

        if args.table == "samples":
            display_sql_results(
                (i.values() for i in sql.get_samples(conn)), ["id", "name"]
            )

        if args.table == "selections":
            display_sql_results(
                (i.values() for i in sql.get_selections(conn)),
                ["id", "name", "variant_count"],
            )

        if args.table == "wordsets":
            display_sql_results(
                (i.values() for i in sql.get_wordsets(conn)), ["id", "word_count"]
            )

    # Remove parser ############################################################
    if args.subparser == "remove":
        for name in args.names:
            sql.delete_selection_by_name(conn, name)

    # VQL parser ###############################################################
    if args.subparser == "exec":
        query = "".join(args.vql)
        vql_command = None

        # Test the VQL query
        try:
            cmd = vql.parse_one_vql(query)
        except (vql.textx.TextXSyntaxError, vql.VQLSyntaxError) as e:
            # Available attributes: e.message, e.line, e.col
            print("%s: %s, col: %d" % (e.__class__.__name__, e.message, e.col))
            print("For query:", query)
            exit(0)

        # Select command with redirection to selection
        if cmd["cmd"] == "select_cmd" and args.to_selection:
            vql_command = partial(
                command.create_cmd,
                conn,
                args.to_selection,
                source=cmd["source"],
                filters=cmd["filters"],
            )

        try:
            # Is it redundant with check_vql ?
            # No because we also execute SQL statement here
            if vql_command:
                ret = vql_command()
            else:
                ret = command.create_command_from_obj(conn, cmd)()
            if not isinstance(ret, dict):
                # For drop_cmd, import_cmd,
                ret = list(ret)
        except (sqlite3.DatabaseError, vql.VQLSyntaxError) as e:
            LOGGER.exception(e)
            exit(0)

        LOGGER.debug("SQL result: %s", ret)
        LOGGER.debug("VQL command: %s", cmd["cmd"])
        # Note: show_cmd is supported in a separated command option

        # Select command
        if cmd["cmd"] in ("select_cmd",) and not args.to_selection:
            display_sql_results((i.values() for i in ret), ["id"] + cmd["fields"])
            exit(1)

        if (
            cmd["cmd"] in ("drop_cmd", "import_cmd", "create_cmd", "set_cmd", "bed_cmd")
            or args.to_selection
        ):
            # PS: to_selection is used to detect select_cmd with selection creation
            display_query_status(ret)