def cross_test(sqlite_cmdline, codec):
	(query_bytes, expected_bytes) = test_query()
	(official_output, official_error) = call_program(sqlite_cmdline, query_bytes)
	# We can't use os.linesep here since binaries may belong to different platforms (Win32/MinGW vs. MSYS/Cygwin vs. WSL...)
	official_output = official_output.replace(b"\r\n", b"\n")
	official_error = official_error.replace(b"\r\n", b"\n")
	if official_output != expected_bytes:
		raise sqlite3.ProgrammingError("expected bytes are wrong: official %s != expected %s" % (repr(official_output), repr(expected_bytes)))
	if official_error:
		raise sqlite3.ProgrammingError("did not expect errors from official binary")
Example #2
0
    def from_database_id(cls, record_id, db_cursor):
        """
        Create a SongChunk instance from data already stored in the database. Does NOT load spectrogram into memory,
        only the archipelagos and data about where the spectrogram image is stored are loaded into the instance
        :param record_id: The RecordID of the SongChunk in the database that should be turned into an instance
        :param db_cursor: A connection to the SQLite3 database
        :return: A SongChunk instance which has its specname, archipelagos, and spec_in_memory fields populated
        using the data in the database pointed to by db_cursor
        """
        chunk = SongChunk()
        # Get the specpath

        db_cursor.execute(
            "SELECT SpecPath FROM chunks where RecordID={}".format(record_id))
        path = db_cursor.fetchone()
        # Couldn't find chunk with RecordID record_id in the database
        if path is None:
            raise sqlite3.ProgrammingError(
                "The RecordID ({}) of the chunk you tried to load wasn't"
                " found in the database".format(record_id))
        chunk.specname = path[0]

        # Fetch if the spectrogram has actually been stored at the Specpath
        db_cursor.execute(
            "SELECT SpecWritten FROM chunks where RecordID={}".format(
                record_id))
        chunk.spec_in_memory = bool(db_cursor.fetchone()[0])

        # Check if there are archipelagos for the chunk
        db_cursor.execute(
            "SELECT NumACP FROM chunks where RecordID={}".format(record_id))
        num_acp = db_cursor.fetchone()[0]
        if num_acp:
            # Fetch the archipelagos if there are supposed to be archipelagos
            db_cursor.execute(
                "SELECT ArchID FROM archs where ParentChunk={}".format(
                    record_id))
            archs = db_cursor.fetchall()
            if len(archs) != num_acp:
                raise sqlite3.ProgrammingError(
                    "Did not find the expected number of archipelagos in the database. "
                    "Expected: {}, Found: {}".format(num_acp, len(archs)))
            for arch in archs:
                chunk.archipelagos.append(
                    DenseArchipelago.from_database_id(arch[0], db_cursor))
        chunk._archipelagos_initialized = True

        db_cursor.execute(
            "SELECT Height, Width FROM chunks where RecordID={}".format(
                record_id))
        chunk.height, chunk.width = db_cursor.fetchone()

        return chunk
Example #3
0
def self_test(codec):
    (query_bytes, expected_bytes) = test_query()
    if not (lambda stdin, stdout, stderr: not main(
            sys.argv[0], stdin=stdin, stdout=stdout, stderr=stderr) and
            stdout.getvalue() == expected_bytes)(io.BytesIO(query_bytes),
                                                 io.BytesIO(), io.BytesIO()):
        raise sqlite3.ProgrammingError("byte I/O is broken")
    if not (lambda stdin, stdout, stderr: not main(
            sys.argv[0], stdin=stdin, stdout=stdout, stderr=stderr) and stdout.
            getvalue() == codec.decode(expected_bytes, 'surrogateescape'))(
                io.StringIO(query_bytes.decode(ascii)), io.StringIO(),
                io.StringIO()):
        raise sqlite3.ProgrammingError("string I/O is broken")
Example #4
0
 def test_sqlite_close(self, mock_graph):
     # make sure this wierd but harmless sqlite3 exception is
     # caught
     mock_graph.return_value.close.side_effect = sqlite3.ProgrammingError(
         "You made a wrong")
     store = TripleStore.connect("SQLITE", "", "")
     store.close()
    def __init__(self):
        self.__logger = Logger()

        self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(),
                                                            requests.TooManyRedirects(), requests.Timeout(),
                                                            requests.TooManyRedirects(),
                                                            requests.RequestException(), requests.ConnectTimeout(),
                                                            requests.ReadTimeout()]]

        self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(),
                                                       ZeroDivisionError(), SystemError(), ValueError(),
                                                       AssertionError()]]

        self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]]

        self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(),
                                                         sqlite3.ProgrammingError(), sqlite3.DatabaseError(),
                                                         sqlite3.NotSupportedError(), sqlite3.IntegrityError(),
                                                         sqlite3.InterfaceError(), sqlite3.InternalError(),
                                                         sqlite3.OperationalError()]]

        self._speech_recognizer_errors = [type(item) for item in
                                          [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(),
                                           sr.RequestError()]]

        self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
Example #6
0
def get_columns(cursor, table):
    """Returns list of column names used in table."""
    cursor.execute('PRAGMA table_info({0})'.format(table))
    columns = [x[1] for x in cursor]
    if not columns:
        raise sqlite3.ProgrammingError('no such table: {0}'.format(table))
    return columns
Example #7
0
File: s3m.py Project: ivknv/s3m
    def in_transaction(self):
        """Analogous to :any:`sqlite3.Connection.in_transaction`"""

        if self.connection is not None:
            return self.connection.in_transaction

        raise sqlite3.ProgrammingError("Cannot operate on a closed database.")
Example #8
0
 def _execute(self, cmd, *args):
     """执行sql命令,返回结果"""
     try:
         cursor = self._conn.cursor()
         return cursor.execute(cmd, *args)
     except AttributeError:
         raise sqlite3.ProgrammingError('Cannot operate on '
                                        'a closed database.')
Example #9
0
 def _cursor(self):
     try:
         return self._conn.cursor()
     except sqlite3.ProgrammingError as e:
         if not self._cursor_fail:
             self._cursor_fail = True
             self._conn = sqlite3.connect(self.db_path)
             return self._conn.cursor()
         else:
             raise sqlite3.ProgrammingError(e)
Example #10
0
File: s3m.py Project: ivknv/s3m
    def cursor(self):
        """Analogous to :any:`sqlite3.Connection.cursor`"""

        if self.single_cursor_mode:
            if self._cursor is None:
                raise sqlite3.ProgrammingError(
                    "Cannot operate on a closed database.")

            return self._cursor

        return Cursor(self)
Example #11
0
    def patched_execute(self, sql, *args, **kwargs):
        nonlocal count, sql_seen

        if sql == fail_on_sql:
            sql_seen = True

            if count == fail_on_count:
                raise sqlite3.ProgrammingError("Uh oh")

            count += 1

        return execute(self, sql, *args, **kwargs)
Example #12
0
    def executemany(self, cmd: str, arg_iter):
        """迭代执行sql命令, 返回结果

        cmd: sql语句
        arg_iter: 由完整执行一次sql命令所需参数所组成的迭代器
        """
        try:
            cursor = self._conn.cursor()
            return cursor.executemany(cmd, arg_iter)
        except AttributeError:
            raise sqlite3.ProgrammingError('Cannot operate on '
                                           'a closed database.')
Example #13
0
    def from_database_id(cls, arch_id, db_connection):
        """
        Load in a DenseArchipelago from the database
        :param arch_id: The ArchID in the database which corresponds to the DenseArchipelago we will be loading
        :param db_connection: The connection to the SQLite3 database
        :return: A DenseArchipelago instance which corresponds exactly to the archipelago with ArchID=arch_id in the
        database pointed to by db_connection
        """
        load_arch = DenseArchipelago()
        # Get the bounding box
        db_connection.execute(
            "SELECT LeftBd FROM archs WHERE ArchID={}".format(arch_id))
        left_bd = db_connection.fetchone()
        if left_bd is None:
            raise sqlite3.ProgrammingError(
                "The archipelago with ArchId={} isn't in the database".format(
                    arch_id))
        load_arch.left_bd = left_bd[0]
        db_connection.execute(
            "SELECT RightBd FROM archs WHERE ArchID={}".format(arch_id))
        load_arch.right_bd = db_connection.fetchone()[0]
        db_connection.execute(
            "SELECT UpBd FROM archs WHERE ArchID={}".format(arch_id))
        load_arch.upper_bd = db_connection.fetchone()[0]
        db_connection.execute(
            "SELECT LowBd FROM archs WHERE ArchID={}".format(arch_id))
        load_arch.lower_bd = db_connection.fetchone()[0]

        # Load all of the land into the archipelago
        db_connection.execute(
            "SELECT X, Y FROM land WHERE ParentArchipelago={}".format(arch_id))
        lands = db_connection.fetchall()
        if not lands:
            raise sqlite3.ProgrammingError(
                "Unable to find land associated with ArchId={} in database".
                format(arch_id))
        for land_piece in lands:
            load_arch.land.append(land_piece)

        return load_arch
Example #14
0
def insert_records(cursor, table, columns, records):
    table = normalize_names(table)
    columns = normalize_names(columns)
    sql = 'INSERT INTO {0} ({1}) VALUES ({2})'.format(
        table,
        ', '.join(columns),
        ', '.join(['?'] * len(columns)),
    )
    try:
        cursor.executemany(sql, records)
    except sqlite3.ProgrammingError as error:
        if 'incorrect number of bindings' in str(error).lower():
            msg = (
                '{0}\n\nThe records {1!r} contains some rows with too '
                'few or too many values. Before loading this data, it '
                'must be normalized so each row contains a number of '
                'values equal to the number of columns being loaded.').format(
                    error, records)
            error = sqlite3.ProgrammingError(msg)
            error.__cause__ = None
        raise error
Example #15
0
def getValue(cursor,
             tableName,
             desiredColumn,
             searchColumn,
             searchValue,
             getMultiple=False,
             sortedResults=False,
             sortBy="",
             descending=False):
    selectDataString = "SELECT " + desiredColumn + " FROM " + tableName
    selectDataString += " WHERE " + searchColumn + "=?"
    if sortedResults == True:
        # Sort the results by the given colum in the call requests it
        # The order is ascending by default
        selectDataString += f"ORDER BY {sortBy} "
        if descending == True:
            # Sort the results in descending order
            selectDataString += f"DESC"
    cursor.execute(selectDataString, (searchValue, ))
    # Assigns the list of tuples given by fetchall() [or None] to
    # a variable so it can be accessed multiple times
    returnList = cursor.fetchall()
    # If there was no match, then return None before trying,
    # raise an error explaining
    if (len(returnList) == 0):
        raise sqlite3.ProgrammingError(
            f"{desiredColumn} for {searchColumn} {searchValue} was not found")
    else:
        # We need to convert the list of tuples into a list of values
        for index in range(0, len(returnList)):
            returnList[index] = returnList[index][0]
    if getMultiple == True:
        # Return the whole list if the function call asks for multiple values
        return returnList
    else:
        # This returns the first value of the list (the desired value)
        return returnList[0]
Example #16
0
def perform_check(keys, shelve_stacks, args, logger):

    """
    This is the most important method. After preparing the data structure,
    this function creates the real transcript instances and checks that
    they are correct when looking at the underlying genome sequence.
    This is also the point at which we start using multithreading, if
    so requested.
    :param keys: sorted list of [tid, sequence]
    :param shelve_stacks: dictionary containing the name and the handles of the shelf DBs
    :param args: the namespace
    :param logger: logger
    :return:
    """

    counter = 0

    # FASTA extraction *has* to be done at the main process level, it's too slow
    # to create an index in each process.

    if args.json_conf["prepare"]["single"] is True or args.json_conf["threads"] == 1:

        # Use functools to pre-configure the function
        # with all necessary arguments aside for the lines
        partial_checker = functools.partial(
            create_transcript,
            canonical_splices=args.json_conf["prepare"]["canonical"],
            logger=logger,
            force_keep_cds=not args.json_conf["prepare"]["strip_cds"])

        for tid, chrom, key in keys:
            tid, shelf_name = tid
            try:
                tobj = json.loads(next(shelve_stacks[shelf_name]["cursor"].execute(
                    "SELECT features FROM dump WHERE tid = ?", (tid,)))[0])
            except sqlite3.ProgrammingError as exc:
                raise sqlite3.ProgrammingError("{}. Tids: {}".format(exc, tid))

            if chrom not in args.json_conf["reference"]["genome"].references:
                raise KeyError("Invalid chromosome name! {}, {}, {}, {}".format(tid, shelf_name, chrom, key))

            transcript_object = partial_checker(
                tobj,
                str(args.json_conf["reference"]["genome"].fetch(chrom, key[0]-1, key[1])),
                key[0], key[1],
                lenient=args.json_conf["prepare"]["lenient"],
                is_reference=tobj["is_reference"],
                strand_specific=tobj["strand_specific"])
            if transcript_object is None:
                continue
            counter += 1
            if counter >= 10**4 and counter % (10**4) == 0:
                logger.info("Retrieved %d transcript positions", counter)
            elif counter >= 10**3 and counter % (10**3) == 0:
                logger.debug("Retrieved %d transcript positions", counter)
            print(transcript_object.format("gtf"),
                  file=args.json_conf["prepare"]["files"]["out"])
            print(transcript_object.fasta,
                  file=args.json_conf["prepare"]["files"]["out_fasta"])
    else:
        # pylint: disable=no-member

        # submission_queue = multiprocessing.JoinableQueue(-1)

        batches = list(enumerate(keys, 1))
        # np.random.shuffle(batches)
        random.shuffle(batches)
        kwargs = {
            "fasta_out": os.path.basename(args.json_conf["prepare"]["files"]["out_fasta"].name),
            "gtf_out": os.path.basename(args.json_conf["prepare"]["files"]["out"].name),
            "tmpdir": args.tempdir.name,
            "seed": args.json_conf["seed"],
            "lenient": args.json_conf["prepare"]["lenient"],
            "canonical_splices": args.json_conf["prepare"]["canonical"],
            "force_keep_cds": not args.json_conf["prepare"]["strip_cds"],
            "log_level": args.level
        }

        working_processes = []
        for idx, batch in enumerate(np.array_split(batches, args.json_conf["threads"]), 1):
            batch_file = tempfile.NamedTemporaryFile(delete=False, mode="wb")
            msgpack.dump(batch.tolist(), batch_file)
            batch_file.flush()
            batch_file.close()

            proc = CheckingProcess(
                batch_file.name,
                args.logging_queue,
                args.json_conf["reference"]["genome"].filename,
                idx,
                shelve_stacks.keys(),
                **kwargs)
            proc.start()
            working_processes.append(proc)

        [_.join() for _ in working_processes]

        partial_gtf = [os.path.join(args.tempdir.name,
                                    "{0}-{1}".format(
                                        os.path.basename(args.json_conf["prepare"]["files"]["out"].name),
                                        _ + 1)) for _ in range(args.json_conf["threads"])]
        merge_partial(partial_gtf, args.json_conf["prepare"]["files"]["out"])

        partial_fasta = [os.path.join(
            args.tempdir.name,
            "{0}-{1}".format(os.path.basename(args.json_conf["prepare"]["files"]["out_fasta"].name), _ + 1))
                         for _ in range(args.json_conf["threads"])]
        merge_partial(partial_fasta, args.json_conf["prepare"]["files"]["out_fasta"])

    args.json_conf["prepare"]["files"]["out_fasta"].close()
    args.json_conf["prepare"]["files"]["out"].close()

    logger.setLevel(logging.INFO)
    # logger.info("Finished to analyse %d transcripts (%d retained)",
    #             len(exon_lines), counter)
    logger.setLevel(args.level)
    return
Example #17
0
 def execute(*args):
     raise sqlite3.ProgrammingError('unexpected error')
Example #18
0
 def exception_handling(e):
     raise sqlite3.ProgrammingError(
         str(e) +
         '\nreq = “{}”\nparams = “{}”'.format(req, params))
Example #19
0
def perform_check(keys, shelve_stacks, args, logger):
    """
    This is the most important method. After preparing the data structure,
    this function creates the real transcript instances and checks that
    they are correct when looking at the underlying genome sequence.
    This is also the point at which we start using multithreading, if
    so requested.
    :param keys: sorted list of [tid, sequence]
    :param shelve_stacks: dictionary containing the name and the handles of the shelf DBs
    :param args: the namespace
    :param logger: logger
    :return:
    """

    counter = 0

    # FASTA extraction *has* to be done at the main process level, it's too slow
    # to create an index in each process.

    if args.json_conf["prepare"]["single"] is True or args.json_conf[
            "prepare"]["procs"] == 1:

        # Use functools to pre-configure the function
        # with all necessary arguments aside for the lines
        partial_checker = functools.partial(
            create_transcript,
            canonical_splices=args.json_conf["prepare"]["canonical"],
            logger=logger,
            force_keep_cds=not args.json_conf["prepare"]["strip_cds"])

        for tid, chrom, key in keys:
            tid, shelf_name = tid
            try:
                tobj = json.loads(
                    next(shelve_stacks[shelf_name]["cursor"].execute(
                        "SELECT features FROM dump WHERE tid = ?",
                        (tid, )))[0])
            except sqlite3.ProgrammingError as exc:
                raise sqlite3.ProgrammingError("{}. Tids: {}".format(exc, tid))

            transcript_object = partial_checker(
                tobj,
                str(args.json_conf["reference"]["genome"][chrom][key[0] -
                                                                 1:key[1]]),
                key[0],
                key[1],
                lenient=args.json_conf["prepare"]["lenient"],
                is_reference=tobj["is_reference"],
                strand_specific=tobj["strand_specific"])
            if transcript_object is None:
                continue
            counter += 1
            if counter >= 10**4 and counter % (10**4) == 0:
                logger.info("Retrieved %d transcript positions", counter)
            elif counter >= 10**3 and counter % (10**3) == 0:
                logger.debug("Retrieved %d transcript positions", counter)
            print(transcript_object.format("gtf"),
                  file=args.json_conf["prepare"]["files"]["out"])
            print(transcript_object.fasta,
                  file=args.json_conf["prepare"]["files"]["out_fasta"])
    else:
        # pylint: disable=no-member

        submission_queue = multiprocessing.Queue(-1)

        working_processes = [
            CheckingProcess(
                submission_queue,
                args.logging_queue,
                args.json_conf["reference"]["genome"].filename,
                _ + 1,
                os.path.basename(
                    args.json_conf["prepare"]["files"]["out_fasta"].name),
                os.path.basename(
                    args.json_conf["prepare"]["files"]["out"].name),
                args.tempdir.name,
                lenient=args.json_conf["prepare"]["lenient"],
                canonical_splices=args.json_conf["prepare"]["canonical"],
                log_level=args.level)
            for _ in range(args.json_conf["prepare"]["procs"])
        ]

        [_.start() for _ in working_processes]

        for counter, keys in enumerate(keys):
            tid, chrom, (pos) = keys
            tid, shelf_name = tid
            tobj = json.loads(
                next(shelve_stacks[shelf_name]["cursor"].execute(
                    "SELECT features FROM dump WHERE tid = ?", (tid, )))[0])
            submission_queue.put((tobj, pos[0], pos[1], counter + 1))

        submission_queue.put(tuple(["EXIT"] * 4))

        [_.join() for _ in working_processes]

        partial_gtf = [
            os.path.join(
                args.tempdir.name, "{0}-{1}".format(
                    os.path.basename(
                        args.json_conf["prepare"]["files"]["out"].name),
                    _ + 1)) for _ in range(args.json_conf["prepare"]["procs"])
        ]
        merge_partial(partial_gtf, args.json_conf["prepare"]["files"]["out"])

        partial_fasta = [
            os.path.join(
                args.tempdir.name, "{0}-{1}".format(
                    os.path.basename(
                        args.json_conf["prepare"]["files"]["out_fasta"].name),
                    _ + 1)) for _ in range(args.json_conf["prepare"]["procs"])
        ]
        merge_partial(partial_fasta,
                      args.json_conf["prepare"]["files"]["out_fasta"])

    args.json_conf["prepare"]["files"]["out_fasta"].close()
    args.json_conf["prepare"]["files"]["out"].close()

    logger.setLevel(logging.INFO)
    # logger.info("Finished to analyse %d transcripts (%d retained)",
    #             len(exon_lines), counter)
    logger.setLevel(args.level)
    return
Example #20
0
    def run(self):
        """Start polling the queue, analyse the loci, and send them to the printer process."""
        self.logger.debug("Starting to parse data for {0}".format(self.name))
        current_chrom = None

        # Read-only connection

        conn = sqlite3.connect("file:{}?mode=ro".format(
            os.path.join(self._tempdir, "temp_store.db")),
                               uri=True,
                               isolation_level="DEFERRED",
                               timeout=60,
                               check_same_thread=False)
        cursor = conn.cursor()

        while True:
            counter = self.locus_queue.get()[0]
            if counter == "EXIT":
                self.logger.debug("EXIT received for %s", self.name)
                self.locus_queue.task_done()
                self.locus_queue.put((counter, ))
                self.__close_handles()
                break
                # self.join()
            else:
                assert isinstance(counter, int), type(counter)
                try:
                    transcripts = cursor.execute(
                        "SELECT json FROM transcripts WHERE counter=?",
                        (str(counter), )).fetchone()
                except sqlite3.ProgrammingError as exc:
                    self.logger.exception(
                        sqlite3.ProgrammingError(
                            (exc, counter, str(counter), (str(counter), ))))
                    self.__close_handles()
                    break

                if transcripts is None:
                    raise KeyError("Nothing found in the database for %s",
                                   counter)

                transcripts = json.loads(transcripts[0])
                if len(transcripts) == 0:
                    stranded_loci = []
                else:
                    tobjects = []
                    for tjson in transcripts:
                        transcript = Transcript(logger=self.logger)
                        transcript.load_dict(tjson)
                        tobjects.append(transcript)

                    slocus = Superlocus(tobjects.pop(),
                                        stranded=False,
                                        json_conf=self.json_conf,
                                        source=self.json_conf["pick"]
                                        ["output_format"]["source"])
                    while len(tobjects) > 0:
                        slocus.add_transcript_to_locus(tobjects.pop(),
                                                       check_in_locus=False)

                    if current_chrom != slocus.chrom:
                        self.__gene_counter = 0
                        current_chrom = slocus.chrom
                    if self.regressor is not None:
                        slocus.regressor = self.regressor
                    stranded_loci = self.analyse_locus(slocus, counter)

                for stranded_locus in stranded_loci:
                    self.__gene_counter = print_locus(stranded_locus,
                                                      self.__gene_counter,
                                                      self._handles,
                                                      counter=counter,
                                                      logger=self.logger,
                                                      json_conf=self.json_conf)
                self.locus_queue.task_done()

        return
Example #21
0
def database_retrieval():
    '''
    Retrieves information from the database.
    Fetches data from a D.B. Columns: 'name_of_method_to_test', 'input_data', 
    'expected_data', 'perform_test' (boolean), ...
    If the database doesn't exist, it creates is, adds the tables, and prompts 
    the user a message telling it must be populated with data.
    Output:
        A list of tuples (rows) with the values of 'name_of_method_to_test', 
        'data_input', 'expected_output'.
    '''
    table_attributes = {
        'database_filename': name_of_module_to_test + '.sqlite3',
        'table_name': 'Test',
        'method_column': 'name_of_method_to_test',
        'input_column': 'data_input',
        'output_column': 'expected_output',
        'is_test_performed_column': 'perform_test',
        'comment_column': 'comment'
    }
    connection = sqlite3.connect(table_attributes['database_filename'])
    try:
        cursor = connection.cursor()

        # Check if the table exists
        cursor.execute(
            '''
            SELECT COUNT(*) 
            FROM sqlite_master
            WHERE type='table' 
              AND name = ? ''', (table_attributes['table_name'], ))
        if int(cursor.fetchone()[0]) != 1:
            cursor.execute('''
                CREATE TABLE {table_name} 
                    ({method_column} TEXT, 
                    {input_column} TEXT, 
                    {output_column} TEXT, 
                    {is_test_performed_column} INTEGER DEFAULT 1 /* Pity there's no BOOLEAN in SQLite */,
                    {comment_column}) '''.format(**table_attributes))

            raise sqlite3.ProgrammingError('''
    The database didn't exist. Creating a new database...
    Database file '{database_filename}' has been created.
    Fill the table '{table_name}' with data and execute this program again to 
    run the tests:
        In the '{method_column}' column you should type the name of the method 
        to test. This method must accept one single parameter of type string. 
        E.G:
            frequent_words
        In '{input_column}' the string that will be passed to the method when 
        called. It can have multiple lines, E.G:
            ACGTTGCATGTCGCATGATGCATGAGAGCT
            4
        In '{output_column}' the string that the method should return if it is 
        correct. E.G:
            CATG GCAT  '''.format(**table_attributes))

        else:
            cursor.execute('''
                SELECT {method_column}, {input_column}, {output_column}, {comment_column}
                FROM {table_name}
                WHERE {is_test_performed_column} = 1  '''.format(
                **table_attributes))
            result = cursor.fetchall()
    finally:
        connection.close()

    return result
Example #22
0
    def run(self):
        """Start polling the queue, analyse the loci, and send them to the printer process."""
        self.logger.debug("Starting to parse data for {0}".format(self.name))
        # Read-only connection

        conn = sqlite3.connect("file:{}?mode=ro".format(os.path.join(self._tempdir, "temp_store.db")),
                               uri=True,  # Necessary to use the Read-only mode from file string
                               isolation_level="DEFERRED",
                               timeout=60,
                               check_same_thread=False  # Necessary for SQLite3 to function in multiprocessing
                               )
        cursor = conn.cursor()

        print_cds = (not self.json_conf["pick"]["run_options"]["exclude_cds"])
        print_monoloci = (self.json_conf["pick"]["files"]["monoloci_out"] != "")
        print_subloci = (self.json_conf["pick"]["files"]["subloci_out"] != "")

        while True:
            counter = self.locus_queue.get()[0]
            if counter == "EXIT":
                self.logger.debug("EXIT received for %s", self.name)
                self.locus_queue.task_done()
                self.locus_queue.put((counter, ))
                break
            else:
                try:
                    transcripts = cursor.execute(
                        "SELECT json FROM transcripts WHERE counter=?", (str(counter),)).fetchone()
                except sqlite3.ProgrammingError as exc:
                    self.logger.exception(sqlite3.ProgrammingError((exc, counter, str(counter), (str(counter),))))
                    # self.__close_handles()
                    break
                    
                if transcripts is None:
                    raise KeyError("Nothing found in the database for %s", counter)

                transcripts = msgpack.loads(transcripts[0], raw=False)
                if len(transcripts) == 0:
                    stranded_loci = []
                    self.logger.warning("No transcript found for index %d", counter)
                else:
                    tobjects = []
                    chroms = set()
                    for tjson in transcripts:
                        definition = GtfLine(tjson["definition"]).as_dict()
                        is_reference = definition["source"] in self.json_conf["prepare"]["files"]["reference"]
                        transcript = Transcript(logger=self.logger,
                                                source=definition["source"],
                                                intron_range=self.json_conf["pick"]["run_options"]["intron_range"],
                                                is_reference=is_reference)
                        transcript.chrom, transcript.start, transcript.end = (definition["chrom"],
                                                                              definition["start"], definition["end"])
                        chroms.add(transcript.chrom)
                        assert len(chroms) == 1, chroms
                        try:
                            transcript.id = definition["transcript"]
                        except KeyError:
                            raise KeyError(definition)
                        transcript.strand, transcript.feature = definition["strand"], definition["feature"]
                        transcript.attributes = definition["attributes"]
                        try:
                            for exon in tjson["exon_lines"]:
                                start, end, feature, phase = exon
                                transcript.add_exon((start, end), feature=feature, phase=phase)
                            transcript.finalize()
                            tobjects.append(transcript)
                        except InvalidTranscript as exc:
                            self.logger.exception("Transcript %s is invalid. Ignoring. Error: %s",
                                                  transcript.id, exc)

                    slocus = Superlocus(tobjects.pop(),
                                        stranded=False,
                                        json_conf=self.json_conf,
                                        source=self.json_conf["pick"]["output_format"]["source"])
                    while len(tobjects) > 0:
                        slocus.add_transcript_to_locus(tobjects.pop(),
                                                       check_in_locus=False)

                    if self.regressor is not None:
                        slocus.regressor = self.regressor
                    stranded_loci = self.analyse_locus(slocus, counter)

                serialise_locus(stranded_loci, self.dump_conn, counter,
                                print_cds=print_cds,
                                print_monosubloci=print_monoloci,
                                print_subloci=print_subloci)
                if len(stranded_loci) == 0:
                    self.logger.warning("No loci left for index %d", counter)
                self.status_queue.put(counter)
                self.locus_queue.task_done()

        return
Example #23
0
def perform_check(keys, shelve_names, mikado_config: MikadoConfiguration,
                  logger):
    """
    This is the most important method. After preparing the data structure,
    this function creates the real transcript instances and checks that
    they are correct when looking at the underlying genome sequence.
    This is also the point at which we start using multithreading, if
    so requested.
    :param keys: sorted list of [tid, sequence]
    :param shelve_names: list of the temporary files.
    :param mikado_config: MikadoConfiguration
    :param logger: logger
    :return:
    """

    counter = 0

    # FASTA extraction *has* to be done at the main process level, it's too slow
    # to create an index in each process.

    if mikado_config.prepare.single is True or mikado_config.threads == 1:

        shelve_stacks = dict(
            (shelf, open(shelf, "rb")) for shelf in shelve_names)
        # Use functools to pre-configure the function
        # with all necessary arguments aside for the lines
        partial_checker = functools.partial(
            create_transcript,
            canonical_splices=mikado_config.prepare.canonical,
            codon_table=mikado_config.serialise.codon_table,
            logger=logger,
            strip_faulty_cds=mikado_config.prepare.strip_faulty_cds)

        for tid, chrom, key in keys:
            tid, shelf_name, write_start, write_length = tid
            try:
                shelf = shelve_stacks[shelf_name]
                shelf.seek(write_start)
                tobj = msgpack.loads(zlib.decompress(
                    (shelf.read(write_length))),
                                     raw=False)
            except sqlite3.ProgrammingError as exc:
                raise sqlite3.ProgrammingError("{}. Tids: {}".format(exc, tid))

            if chrom not in mikado_config.reference.genome.references:
                raise KeyError(
                    "Invalid chromosome name! {}, {}, {}, {}".format(
                        tid, shelf_name, chrom, key))

            try:
                seq = str(
                    mikado_config.reference.genome.fetch(
                        chrom, key[0] - 1, key[1]))
            except ValueError:
                raise ValueError(tobj)

            transcript_object = partial_checker(
                tobj,
                seq,
                key[0],
                key[1],
                lenient=mikado_config.prepare.lenient,
                is_reference=tobj["is_reference"],
                strand_specific=tobj["strand_specific"])
            if transcript_object is None:
                continue
            counter += 1
            if counter >= 10**4 and counter % (10**4) == 0:
                logger.info("Retrieved %d transcript positions", counter)
            elif counter >= 10**3 and counter % (10**3) == 0:
                logger.debug("Retrieved %d transcript positions", counter)
            print(transcript_object.format("gtf"),
                  file=mikado_config.prepare.files.out)
            print(transcript_object.fasta,
                  file=mikado_config.prepare.files.out_fasta)
    else:
        # pylint: disable=no-member

        # submission_queue = multiprocessing.JoinableQueue(-1)

        batches = list(enumerate(keys, 1))
        # np.random.shuffle(batches)
        random.shuffle(batches)
        kwargs = {
            "fasta_out":
            os.path.basename(mikado_config.prepare.files.out_fasta.name),
            "gtf_out":
            os.path.basename(mikado_config.prepare.files.out.name),
            "tmpdir":
            mikado_config.tempdir.name,
            "seed":
            mikado_config.seed,
            "lenient":
            mikado_config.prepare.lenient,
            "canonical_splices":
            mikado_config.prepare.canonical,
            "strip_faulty_cds":
            mikado_config.prepare.strip_faulty_cds,
            "codon_table":
            mikado_config.serialise.codon_table,
            "log_level":
            mikado_config.log_settings.log_level
        }

        working_processes = []
        batch_files = []
        for idx, batch in enumerate(
                np.array_split(np.array(batches, dtype=object),
                               mikado_config.threads), 1):
            batch_file = tempfile.NamedTemporaryFile(delete=True, mode="wb")
            msgpack.dump(batch.tolist(), batch_file)
            batch_file.flush()
            batch_files.append(batch_file)

            proc = CheckingProcess(batch_file.name,
                                   mikado_config.logging_queue,
                                   mikado_config.reference.genome.filename,
                                   idx, shelve_names, **kwargs)
            try:
                proc.start()
            except TypeError as exc:
                logger.critical("Failed arguments: %s",
                                (batch_file.name, mikado_config.logging_queue,
                                 mikado_config.reference.genome.filename, idx,
                                 shelve_names))
                logger.critical("Failed kwargs: %s", kwargs)
                logger.critical(exc)
                raise
            working_processes.append(proc)

        [_.join() for _ in working_processes]

        partial_gtf = [
            os.path.join(
                mikado_config.tempdir.name, "{0}-{1}".format(
                    os.path.basename(mikado_config.prepare.files.out.name),
                    _ + 1)) for _ in range(mikado_config.threads)
        ]
        merge_partial(partial_gtf, mikado_config.prepare.files.out)

        partial_fasta = [
            os.path.join(
                mikado_config.tempdir.name, "{0}-{1}".format(
                    os.path.basename(
                        mikado_config.prepare.files.out_fasta.name), _ + 1))
            for _ in range(mikado_config.threads)
        ]
        merge_partial(partial_fasta, mikado_config.prepare.files.out_fasta)
        [batch_file.close() for batch_file in batch_files]

    mikado_config.prepare.files.out_fasta.close()
    mikado_config.prepare.files.out.close()

    logger.setLevel(logging.INFO)
    # logger.info("Finished to analyse %d transcripts (%d retained)",
    #             len(exon_lines), counter)
    logger.setLevel(mikado_config.log_settings.log_level)
    return