Ejemplo n.º 1
0
def test_get_samples(conn):
    """Test default values of samples"""
    assert [sample["name"] for sample in sql.get_samples(conn)] == SAMPLES
    first_sample = list(sql.get_samples(conn))[0]

    # test default value
    assert first_sample["name"] == "sacha"
    assert first_sample["family_id"] == "fam"
    assert first_sample["father_id"] == 0
    assert first_sample["mother_id"] == 0
    assert first_sample["sex"] == 0
    assert first_sample["phenotype"] == 0
Ejemplo n.º 2
0
    def load(self):
        """Load all fields from the model
        """
        self.clear()
        self._checkable_items.clear()
        self.setColumnCount(2)
        self.setHorizontalHeaderLabels(["name", "description"])

        #  Load fields from variant categories
        self.appendRow(self._load_fields("variants"))
        #  Load fields from annotations categories
        self.appendRow(self._load_fields("annotations"))
        # Create and load fields from samples categories
        samples_items = QStandardItem("samples")
        samples_items.setIcon(FIcon(0xF0B9C))
        font = QFont()

        samples_items.setFont(font)
        for sample in sql.get_samples(self.conn):
            sample_item = self._load_fields("samples",
                                            parent_name=sample["name"])
            sample_item.setText(sample["name"])
            sample_item.setIcon(FIcon(0xF0B9C))
            samples_items.appendRow(sample_item)

        self.appendRow(samples_items)
Ejemplo n.º 3
0
    def populate(self):
        """Show the current variant attributes on the TreeWidget

        .. note:: RichText in Markdown is supported starting PySide 5.14
        """
        if not self.current_variant or "id" not in self.current_variant:
            return

        variant_id = self.current_variant["id"]

        # Populate variant
        data = dict(
            [
                (k, v)
                for k, v in sql.get_one_variant(self.conn, variant_id).items()
                if k not in ("variant_id", "sample_id", "annotations", "samples")
            ]
        )
        self.variant_view.set_dict(data)

        self.edit_panel.set_data(data)

        title = "{chr}:{pos} {ref}>{alt}".format(**data)
        # self.parent().setWindowTitle(title)

        # Populate annotations
        self.transcript_combo.blockSignals(True)
        self.transcript_combo.clear()

        for annotation in sql.get_annotations(self.conn, variant_id):
            if "transcript" in annotation:
                self.transcript_combo.addItem(annotation["transcript"], annotation)
        self.on_transcript_changed()
        self.transcript_combo.blockSignals(False)

        # Populate samples
        self.sample_combo.blockSignals(True)
        self.sample_combo.clear()
        for sample in sql.get_samples(self.conn):
            self.sample_combo.addItem(sample["name"], sample["id"])
        self.on_sample_changed()
        self.sample_combo.blockSignals(False)

        # Populate genotypes for samples
        self.genotype_view.clear()
        query = f"""SELECT samples.name, sv.gt FROM samples 
                 LEFT JOIN sample_has_variant sv ON samples.id = sv.sample_id 
                 AND sv.variant_id = {variant_id}"""

        for sample_name, genotype in self.conn.execute(query):
            item = QListWidgetItem()
            icon = self.genotype_icons.get(genotype, self.genotype_icons[-1])

            item.setText(sample_name)
            item.setIcon(icon)
            item.setToolTip(cm.GENOTYPE_DESC.get(genotype, -1))

            self.genotype_view.addItem(item)
Ejemplo n.º 4
0
    def _fill_completer(self):
        """Create Completer with his model

        Fill the model with the SQL keywords and database fields
        """
        # preload samples , selection and wordset
        samples = [i["name"] for i in sql.get_samples(self.conn)]
        selections = [i["name"] for i in sql.get_selections(self.conn)]
        wordsets = [i["name"] for i in sql.get_wordsets(self.conn)]

        # keywords = []
        self.text_edit.completer.model.clear()
        self.text_edit.completer.model.beginResetModel()

        # register keywords
        for keyword in self.text_edit.syntax.sql_keywords:
            self.text_edit.completer.model.add_item(keyword, "VQL keywords",
                                                    FIcon(0xF0169), "#f6ecf0")

        for selection in selections:
            self.text_edit.completer.model.add_item(selection, "Source table",
                                                    FIcon(0xF04EB), "#f6ecf0")

        for wordset in wordsets:
            self.text_edit.completer.model.add_item(f"WORDSET['{wordset}']",
                                                    "WORDSET", FIcon(0xF04EB),
                                                    "#f6ecf0")

        for field in sql.get_fields(self.conn):
            name = field["name"]
            description = "<b>{}</b> ({}) from {} <br/><br/> {}".format(
                field["name"], field["type"], field["category"],
                field["description"])
            color = style.FIELD_TYPE.get(field["type"], "str")["color"]
            icon = FIcon(
                style.FIELD_TYPE.get(field["type"], "str")["icon"], "white")

            if field["category"] == "variants" or field[
                    "category"] == "annotations":
                self.text_edit.completer.model.add_item(
                    name, description, icon, color)

            if field["category"] == "samples":
                # Overwrite name
                for sample in samples:
                    name = "sample['{}'].{}".format(sample, field["name"])
                    description = "<b>{}</b> ({}) from {} {} <br/><br/> {}".format(
                        field["name"],
                        field["type"],
                        field["category"],
                        sample,
                        field["description"],
                    )
                    self.text_edit.completer.model.add_item(
                        name, description, icon, color)

        self.text_edit.completer.model.endResetModel()
Ejemplo n.º 5
0
def test_update_samples(conn):
    """Test update procedure of a sample in DB (modify some of its field values)"""
    previous_sample = list(sql.get_samples(conn))[0]

    assert previous_sample["name"] == "sacha"
    assert previous_sample["id"] == 1
    # Update with info
    previous_sample["name"] = "maco"
    previous_sample["family_id"] = "fam2"
    previous_sample["father_id"] = 1
    previous_sample["mother_id"] = 1
    previous_sample["sex"] = 2
    previous_sample["phenotype"] = 2

    # Update the sample
    sql.update_sample(conn, previous_sample)

    # Get the updated sample
    edit_sample = list(sql.get_samples(conn))[0]
    # The sample in DB must be the same than the sample we modified above
    assert previous_sample == edit_sample
Ejemplo n.º 6
0
    def populate(self):
        """Fill combobox with samples from databases 
        """

        self.mother_combo.clear()
        self.father_combo.clear()
        self.child_combo.clear()

        samples = [i["name"] for i in sql.get_samples(self.conn)]

        for sample in samples:
            self.mother_combo.addItem(FIcon(0xF1077), sample)
            self.father_combo.addItem(FIcon(0xF0643), sample)
            self.child_combo.addItem(FIcon(0xF0E7D), sample)
Ejemplo n.º 7
0
def get_default_tables_and_sample_ids(conn):
    """Handy function to cache default_tables and sample_ids from database

    This function is used for every queries built in :meth:`build_full_sql_query`

    Warnings:
        Do not forget to clear this cache when samples are added in DB via
        a PED file for example.
    """
    # Get {'favorite': 'variants', 'comment': 'variants', impact': 'annotations', ...}
    default_tables = {i["name"]: i["category"] for i in sql.get_fields(conn)}
    # Get {'NORMAL': 1, 'TUMOR': 2}
    sample_ids = {i["name"]: i["id"] for i in sql.get_samples(conn)}

    return default_tables, sample_ids
Ejemplo n.º 8
0
    def populate(self):
        """Show the current variant attributes on the TreeWidget

        .. note:: RichText in Markdown is supported starting PySide 5.14
        """
        if not self.current_variant or "id" not in self.current_variant:
            return

        variant_id = self.current_variant["id"]

        # Populate variant
        self.populate_tree_widget(self.variant_view,
                                  sql.get_one_variant(self.conn, variant_id))

        # Populate annotations
        self.transcript_combo.blockSignals(True)
        self.transcript_combo.clear()
        for annotation in sql.get_annotations(self.conn, variant_id):
            if "transcript" in annotation:
                self.transcript_combo.addItem(annotation["transcript"],
                                              annotation)
        self.on_transcript_changed()
        self.transcript_combo.blockSignals(False)

        # Populate samples
        self.sample_combo.blockSignals(True)
        self.sample_combo.clear()
        for sample in sql.get_samples(self.conn):
            self.sample_combo.addItem(sample["name"], sample["id"])
        self.on_sample_changed()
        self.sample_combo.blockSignals(False)

        # Populate genotypes for samples
        self.genotype_view.clear()
        query = f"""SELECT samples.name, sv.gt FROM samples 
                 LEFT JOIN sample_has_variant sv ON samples.id = sv.sample_id 
                 AND sv.variant_id = {variant_id}"""

        for sample_name, genotype in self.conn.execute(query):
            item = QListWidgetItem()
            icon = self.genotype_icons.get(genotype, self.genotype_icons[-1])

            item.setText(sample_name)
            item.setIcon(icon)
            item.setToolTip(cm.GENOTYPE_DESC.get(genotype, -1))

            self.genotype_view.addItem(item)
Ejemplo n.º 9
0
    def load(self):
        """Load all columns avaible into the model"""
        self.clear()
        self.checkable_items.clear()

        self.appendRow(self.load_fields("variants"))
        self.appendRow(self.load_fields("annotations"))

        samples_items = QStandardItem("samples")
        samples_items.setIcon(FIcon(0xF0B9C))
        font = QFont()

        samples_items.setFont(font)

        for sample in sql.get_samples(self.conn):
            sample_item = self.load_fields("samples", parent_name=sample["name"])
            sample_item.setText(sample["name"])
            sample_item.setIcon(FIcon(0xF0B9C))
            samples_items.appendRow(sample_item)

        self.appendRow(samples_items)
Ejemplo n.º 10
0
    def save(self, conn, delimiter="\t", **kwargs):
        r"""Dump samples into a tabular file

        Notes:
            File is written without header.

        Example of line::

            `family_id\tindividual_id\tfather_id\tmother_id\tsex\tphenotype`

        Args:
            conn (sqlite.connection): sqlite connection
            delimiter (str, optional): Delimiter char used in exported file;
                (default: ``\t``).
            **kwargs (dict, optional): Arguments can be given to override
                individual formatting parameters in the current dialect.
        """
        writer = csv.DictWriter(self.device,
                                delimiter=delimiter,
                                lineterminator="\n",
                                fieldnames=[
                                    "family_id",
                                    "name",
                                    "father_id",
                                    "mother_id",
                                    "sex",
                                    "phenotype",
                                ],
                                extrasaction="ignore",
                                **kwargs)
        g = list(get_samples(conn))
        # Map DB ids with individual_ids
        individual_ids_mapping = {sample["id"]: sample["name"] for sample in g}
        # Add default value
        individual_ids_mapping[0] = 0
        # Replace DB ids
        for sample in g:
            sample["father_id"] = individual_ids_mapping[sample["father_id"]]
            sample["mother_id"] = individual_ids_mapping[sample["mother_id"]]
        writer.writerows(g)
Ejemplo n.º 11
0
def test_import_and_create_counting():
    reader = VcfReader(open("examples/test.snpeff.vcf"), "snpeff")
    pedfile = "examples/test.snpeff.pedigree.tfam"

    conn = sqlite3.connect(":memory:")

    for i, msg in async_import_reader(conn, reader, pedfile):
        print(msg)

    samples = list(sql.get_samples(conn))

    assert samples[0]["phenotype"] == 1
    assert samples[1]["phenotype"] == 2

    for record in conn.execute(
            """SELECT count_hom, count_het, count_ref, control_count_hom,control_count_het, control_count_ref,
        case_count_hom,case_count_het, case_count_ref  FROM variants"""):
        print(dict(record))
        assert record["control_count_ref"] == 1
        assert record["case_count_het"] == 1
        assert record["count_hom"] == 0
        assert record["count_het"] == 1
Ejemplo n.º 12
0
def test_create_db(reader):

    conn = sqlite3.connect(":memory:")

    sql.create_table_fields(conn)
    sql.insert_many_fields(conn, reader.get_fields())
    assert len(list(sql.get_fields(conn))) == len(list(reader.get_fields()))

    sql.create_table_samples(conn, reader.get_fields_by_category("samples"))
    sql.insert_many_samples(conn, reader.get_samples())
    assert len(list(sql.get_samples(conn))) == len(list(reader.get_samples()))

    sql.create_table_annotations(conn,
                                 reader.get_fields_by_category("annotations"))
    sql.create_table_variants(conn, reader.get_fields_by_category("variants"))

    sql.create_table_selections(conn)

    sql.insert_many_variants(conn, reader.get_variants())

    # count variant with annotation
    variant_count = len(list(reader.get_variants()))
    assert sql.get_variants_count(conn) == variant_count
Ejemplo n.º 13
0
    def _create_completer(self):
        """Create Completer with his model

        Fill the model with the SQL keywords and database fields
        """
        model = QStringListModel()
        completer = QCompleter()

        keywords = []
        samples = [i["name"] for i in sql.get_samples(self.conn)]
        selections = [i["name"] for i in sql.get_selections(self.conn)]
        for field in sql.get_fields(self.conn):
            if field["category"] == "samples":
                for sample in samples:
                    keywords.append("sample['{}'].{}".format(sample, field["name"]))
            else:
                keywords.append(field["name"])

        keywords.extend(VqlSyntaxHighlighter.sql_keywords)
        keywords.extend(selections)
        model.setStringList(keywords)
        completer.setModel(model)
        return completer
Ejemplo n.º 14
0
def main():
    # noinspection PyTypeChecker
    parser = argparse.ArgumentParser(
        formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter(prog),
        description="""
Cutevariant cli mode helps to run actions directly from command-line.\n
The env variable $CUTEVARIANT_DB can be used to define a database instead of
the arguments.""",
        epilog="""Examples:

    $ cutevariant-cli show --db my_database.db samples
    or
    $ export CUTEVARIANT_DB=my_database.db
    $ cutevariant-cli show samples""",
    )
    # Default log level: critical
    parser.add_argument(
        "-vv",
        "--verbose",
        nargs="?",
        default="error",
        choices=["debug", "info", "critical", "error", "warning"],
    )

    sub_parser = parser.add_subparsers(dest="subparser")

    # Common parser: Database file requirement #################################
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument(
        "--db", help="SQLite database. By default, $CUTEVARIANT_DB is used."
    )

    # Create DB parser #########################################################
    createdb_parser = sub_parser.add_parser(
        "createdb",
        help="Build a SQLite database from a vcf file",
        parents=[parent_parser],
        epilog="""Examples:

        $ cutevariant-cli createdb -i "examples/test.snpeff.vcf"
        """,
    )
    createdb_parser.add_argument("-i", "--input", help="VCF file path", required=True)

    # Show parser ##############################################################
    show_parser = sub_parser.add_parser(
        "show", help="Display table content", parents=[parent_parser]
    )
    show_parser.add_argument(
        "table",
        choices=["fields", "selections", "samples", "wordsets"],
        help="Possible names of tables.",
    )

    # Remove parser ############################################################
    remove_parser = sub_parser.add_parser(
        "remove", help="remove selection", parents=[parent_parser]
    )
    remove_parser.add_argument("names", nargs="+", help="Name(s) of selection(s).")

    # VQL parser ###############################################################
    select_parser = sub_parser.add_parser(
        "exec",
        help="Execute a VQL statement.",
        parents=[parent_parser],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""Examples:

    $ cutevariant-cli exec "SELECT favorite,chr,pos,ref,alt FROM variants"
    or
    $ cutevariant-cli exec "SELECT chr,ref,alt FROM variants" -s myselection
    or
    $ cutevariant-cli exec "IMPORT WORDSETs 'examples/gene.txt' AS mygenes"
    or
    $ cutevariant-cli exec "DROP WORDSETS mygenes"
    or
    $ cutevariant-cli exec "CREATE myselection1 FROM variants WHERE gene = 'CHID1'"
    $ cutevariant-cli exec "CREATE myselection2 FROM variants WHERE gene = 'CICP23'"
    $ cutevariant-cli exec "CREATE myselection3 = myselection2 | myselection2"
    or
    $ cutevariant-cli exec "CREATE boby FROM variants INTERSECT 'examples/test.bed'"
    """,
    )
    select_parser.add_argument("vql", help="A VQL statement.")
    select_parser.add_argument(
        "-l",
        "--limit",
        help="Limit the number of lines in output.",
        type=int,
        default=100,
    )
    # select_parser.add_argument(
    #     "-g",
    #     "--group",
    #     action="store_true",
    #     help="Group SELECT query by...(chr,pos,ref,alt).",
    # )
    select_parser.add_argument(
        "-s", "--to-selection", help="Save SELECT query into a selection name."
    )

    # Set parser ###############################################################
    # set_parser = sub_parser.add_parser("set", help="Set variable", parents=[parent_parser])

    # Workaround for sphinx-argparse module that require the object parser
    # before the call of parse_args()
    if "html" in sys.argv:
        return parser

    args = parser.parse_args()

    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)

    # Set log level
    # _logger.setLevel(logging.DEBUG)
    log_level(args.verbose)

    # Create DB parser #########################################################
    if args.subparser == "createdb":
        if not args.db:
            # Database file is not set:
            # The output file will be based on the name of the VCF one
            args.db = args.input + ".db"

        if os.path.exists(args.db):
            # Remove existing file
            os.remove(args.db)

        conn = sql.get_sql_connection(args.db)
        if conn:
            # TODO: bug ... max is not 100...
            for i, message in progressbar.progressbar(
                async_import_file(conn, args.input), redirect_stdout=True
            ):
                print(message)

        print("The database is successfully created!")
        exit()

    # Prepare SQL connection on DB file
    if "CUTEVARIANT_DB" in os.environ and args.subparser != "createdb":
        args.db = os.environ["CUTEVARIANT_DB"]
    elif not args.db:
        print("You must specify a database file via $CUTEVARIANT_DB or --db argument")
        exit()

    # Init SQL connection
    conn = sql.get_sql_connection(args.db)

    # Show parser ##############################################################
    if args.subparser == "show":
        if args.table == "fields":
            display_sql_results(
                (i.values() for i in sql.get_fields(conn)),
                ["id", "name", "table", "type", "description"],
            )

        if args.table == "samples":
            display_sql_results(
                (i.values() for i in sql.get_samples(conn)), ["id", "name"]
            )

        if args.table == "selections":
            display_sql_results(
                (i.values() for i in sql.get_selections(conn)),
                ["id", "name", "variant_count"],
            )

        if args.table == "wordsets":
            display_sql_results(
                (i.values() for i in sql.get_wordsets(conn)), ["id", "word_count"]
            )

    # Remove parser ############################################################
    if args.subparser == "remove":
        for name in args.names:
            sql.delete_selection_by_name(conn, name)

    # VQL parser ###############################################################
    if args.subparser == "exec":
        query = "".join(args.vql)
        vql_command = None

        # Test the VQL query
        try:
            cmd = vql.parse_one_vql(query)
        except (vql.textx.TextXSyntaxError, vql.VQLSyntaxError) as e:
            # Available attributes: e.message, e.line, e.col
            print("%s: %s, col: %d" % (e.__class__.__name__, e.message, e.col))
            print("For query:", query)
            exit(0)

        # Select command with redirection to selection
        if cmd["cmd"] == "select_cmd" and args.to_selection:
            vql_command = partial(
                command.create_cmd,
                conn,
                args.to_selection,
                source=cmd["source"],
                filters=cmd["filters"],
            )

        try:
            # Is it redundant with check_vql ?
            # No because we also execute SQL statement here
            if vql_command:
                ret = vql_command()
            else:
                ret = command.create_command_from_obj(conn, cmd)()
            if not isinstance(ret, dict):
                # For drop_cmd, import_cmd,
                ret = list(ret)
        except (sqlite3.DatabaseError, vql.VQLSyntaxError) as e:
            LOGGER.exception(e)
            exit(0)

        LOGGER.debug("SQL result: %s", ret)
        LOGGER.debug("VQL command: %s", cmd["cmd"])
        # Note: show_cmd is supported in a separated command option

        # Select command
        if cmd["cmd"] in ("select_cmd",) and not args.to_selection:
            display_sql_results((i.values() for i in ret), ["id"] + cmd["fields"])
            exit(1)

        if (
            cmd["cmd"] in ("drop_cmd", "import_cmd", "create_cmd", "set_cmd", "bed_cmd")
            or args.to_selection
        ):
            # PS: to_selection is used to detect select_cmd with selection creation
            display_query_status(ret)