Beispiel #1
0
def request(method, url, zip=None, retry=None, **kwargs):
    """
    JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
    DEMANDS data IS ONE OF:
    * A JSON-SERIALIZABLE STRUCTURE, OR
    * LIST OF JSON-SERIALIZABLE STRUCTURES, OR
    * None

    Parameters
     * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
     * json - JSON-SERIALIZABLE STRUCTURE
     * retry - {"times": x, "sleep": y} STRUCTURE

    THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
    IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
    INCLUDES url AND headers
    """
    global _warning_sent
    global request_count

    if not default_headers and not _warning_sent:
        _warning_sent = True
        Log.warning(
            text_type(
                "The pyLibrary.env.http module was meant to add extra " +
                "default headers to all requests, specifically the 'Referer' "
                +
                "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
                + "function to set `pyLibrary.env.http.default_headers`"))

    if isinstance(url, list):
        # TRY MANY URLS
        failures = []
        for remaining, u in jx.countdown(url):
            try:
                response = request(method, u, zip=zip, retry=retry, **kwargs)
                if Math.round(response.status_code,
                              decimal=-2) not in [400, 500]:
                    return response
                if not remaining:
                    return response
            except Exception as e:
                e = Except.wrap(e)
                failures.append(e)
        Log.error(u"Tried {{num}} urls", num=len(url), cause=failures)

    if 'session' in kwargs:
        session = kwargs['session']
        del kwargs['session']
        sess = Null
    else:
        sess = session = sessions.Session()
    session.headers.update(default_headers)

    with closing(sess):
        if zip is None:
            zip = ZIP_REQUEST

        if PY2 and isinstance(url, unicode):
            # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
            url = url.encode('ascii')

        _to_ascii_dict(kwargs)
        timeout = kwargs['timeout'] = coalesce(kwargs.get('timeout'),
                                               default_timeout)

        if retry == None:
            retry = Data(times=1, sleep=0)
        elif isinstance(retry, Number):
            retry = Data(times=retry, sleep=1)
        else:
            retry = wrap(retry)
            if isinstance(retry.sleep, Duration):
                retry.sleep = retry.sleep.seconds
            set_default(retry, {"times": 1, "sleep": 0})

        if 'json' in kwargs:
            kwargs['data'] = value2json(kwargs['json']).encode('utf8')
            del kwargs['json']

        try:
            headers = kwargs['headers'] = unwrap(
                coalesce(kwargs.get('headers'), {}))
            set_default(headers, {'Accept-Encoding': 'compress, gzip'})

            if zip and len(coalesce(kwargs.get('data'))) > 1000:
                compressed = convert.bytes2zip(kwargs['data'])
                headers['content-encoding'] = 'gzip'
                kwargs['data'] = compressed

                _to_ascii_dict(headers)
            else:
                _to_ascii_dict(headers)
        except Exception as e:
            Log.error(u"Request setup failure on {{url}}", url=url, cause=e)

        errors = []
        for r in range(retry.times):
            if r:
                Till(seconds=retry.sleep).wait()

            try:
                if DEBUG:
                    Log.note(u"http {{method|upper}} to {{url}}",
                             method=method,
                             url=text_type(url))
                request_count += 1
                return session.request(method=method, url=url, **kwargs)
            except Exception as e:
                errors.append(Except.wrap(e))

        if " Read timed out." in errors[0]:
            Log.error(
                u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}",
                timeout=timeout,
                times=retry.times,
                cause=errors[0])
        else:
            Log.error(u"Tried {{times}} times: Request failure of {{url}}",
                      url=url,
                      times=retry.times,
                      cause=errors[0])
Beispiel #2
0
    def _process_command_item(self, command_item):
        query, result, signal, trace, transaction = command_item

        with Timer("SQL Timing", debug=DEBUG):
            if transaction is None:
                # THIS IS A TRANSACTIONLESS QUERY, DELAY IT IF THERE IS A CURRENT TRANSACTION
                if self.transaction_stack:
                    with self.locker:
                        if self.too_long is None:
                            self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
                            self.too_long.on_go(self.show_transactions_blocked_warning)
                        self.delayed_queries.append(command_item)
                    return
            elif self.transaction_stack and self.transaction_stack[-1] not in [transaction, transaction.parent]:
                # THIS TRANSACTION IS NOT THE CURRENT TRANSACTION, DELAY IT
                with self.locker:
                    if self.too_long is None:
                        self.too_long = Till(seconds=TOO_LONG_TO_HOLD_TRANSACTION)
                        self.too_long.on_go(self.show_transactions_blocked_warning)
                    self.delayed_transactions.append(command_item)
                return
            else:
                # ENSURE THE CURRENT TRANSACTION IS UP TO DATE FOR THIS query
                if not self.transaction_stack:
                    # sqlite3 ALLOWS ONLY ONE TRANSACTION AT A TIME
                    DEBUG and Log.note(FORMAT_COMMAND, command=BEGIN)
                    self.db.execute(BEGIN)
                    self.transaction_stack.append(transaction)
                elif transaction != self.transaction_stack[-1]:
                    self.transaction_stack.append(transaction)
                elif transaction.exception:
                    result.exception = Except(
                        type=ERROR,
                        template="Not allowed to continue using a transaction that failed",
                        cause=transaction.exception,
                        trace=trace
                    )
                    signal.release()
                    return

                try:
                    transaction.do_all()
                except Exception as e:
                    # DEAL WITH ERRORS IN QUEUED COMMANDS
                    # WE WILL UNWRAP THE OUTER EXCEPTION TO GET THE CAUSE
                    e = Except.wrap(e)
                    err = Except(
                        type=ERROR,
                        template="Bad call to Sqlite3 while "+FORMAT_COMMAND,
                        params={"command": e.params.current.command},
                        cause=e.cause,
                        trace=e.params.current.trace
                    )
                    transaction.exception = result.exception = err

                    if query in [COMMIT, ROLLBACK]:
                        self._close_transaction(CommandItem(ROLLBACK, result, signal, trace, transaction))

                    signal.release()
                    return

            try:
                # DEAL WITH END-OF-TRANSACTION MESSAGES
                if query in [COMMIT, ROLLBACK]:
                    self._close_transaction(command_item)
                    return

                # EXECUTE QUERY
                self.last_command_item = command_item
                DEBUG and Log.note(FORMAT_COMMAND, command=query)
                curr = self.db.execute(query)
                result.meta.format = "table"
                result.header = [d[0] for d in curr.description] if curr.description else None
                result.data = curr.fetchall()
                if DEBUG and result.data:
                    text = convert.table2csv(list(result.data))
                    Log.note("Result:\n{{data|limit(100)|indent}}", data=text)
            except Exception as e:
                e = Except.wrap(e)
                err = Except(
                    type=ERROR,
                    template="Bad call to Sqlite while " + FORMAT_COMMAND,
                    params={"command": query},
                    trace=trace,
                    cause=e
                )
                result.exception = err
                if transaction:
                    transaction.exception = err
            finally:
                signal.release()
Beispiel #3
0
    def csetLog_maintenance(self, please_stop=None):
        '''
        Handles deleting old csetLog entries and timestamping
        revisions once they pass the length for permanent
        storage for deletion later.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                # Wait until something signals the maintenance cycle
                # to begin (or end).
                (self.maintenance_signal | please_stop).wait()

                if please_stop:
                    break
                if self.disable_maintenance:
                    continue

                # Reset signal so we don't request
                # maintenance infinitely.
                with self.maintenance_signal.lock:
                    self.maintenance_signal._go = False

                with self.working_locker:
                    all_data = None
                    with self.conn.transaction() as t:
                        all_data = sorted(t.get(
                            "SELECT revnum, revision, timestamp FROM csetLog"),
                                          key=lambda x: int(x[0]))

                    # Restore maximum permanents (if overflowing)
                    new_data = []
                    modified = False
                    for count, (revnum, revision,
                                timestamp) in enumerate(all_data[::-1]):
                        if count < MINIMUM_PERMANENT_CSETS:
                            if timestamp != -1:
                                modified = True
                                new_data.append((revnum, revision, -1))
                            else:
                                new_data.append((revnum, revision, timestamp))
                        elif type(timestamp) != int or timestamp == -1:
                            modified = True
                            new_data.append(
                                (revnum, revision, int(time.time())))
                        else:
                            new_data.append((revnum, revision, timestamp))

                    # Delete annotations at revisions with timestamps
                    # that are too old. The csetLog entries will have
                    # their timestamps reset here.
                    new_data1 = []
                    annrevs_to_del = []
                    current_time = time.time()
                    for count, (revnum, revision,
                                timestamp) in enumerate(new_data[::-1]):
                        new_timestamp = timestamp
                        if timestamp != -1:
                            if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
                                modified = True
                                new_timestamp = current_time
                                annrevs_to_del.append(revision)
                        new_data1.append((revnum, revision, new_timestamp))

                    if len(annrevs_to_del) > 0:
                        # Delete any latestFileMod and annotation entries
                        # that are too old.
                        Log.note(
                            "Deleting annotations and latestFileMod for revisions for being "
                            "older than {{oldest}}: {{revisions}}",
                            oldest=TIME_TO_KEEP_ANNOTATIONS,
                            revisions=annrevs_to_del)
                        with self.conn.transaction() as t:
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(annrevs_to_del))
                            t.execute(
                                "DELETE FROM annotations WHERE revision IN " +
                                quote_set(annrevs_to_del))

                    # Delete any overflowing entries
                    new_data2 = new_data1
                    reved_all_data = all_data[::-1]
                    deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
                    delete_overflowing_revstart = None
                    if len(deleted_data) > 0:
                        _, delete_overflowing_revstart, _ = deleted_data[0]
                        new_data2 = set(all_data) - set(deleted_data)

                        # Update old frontiers if requested, otherwise
                        # they will all get deleted by the csetLog_deleter
                        # worker
                        if UPDATE_VERY_OLD_FRONTIERS:
                            _, max_revision, _ = all_data[-1]
                            for _, revision, _ in deleted_data:
                                with self.conn.transaction() as t:
                                    old_files = t.get(
                                        "SELECT file FROM latestFileMod WHERE revision=?",
                                        (revision, ))
                                if old_files is None or len(old_files) <= 0:
                                    continue

                                self.tuid_service.get_tuids_from_files(
                                    old_files,
                                    max_revision,
                                    going_forward=True,
                                )

                                still_exist = True
                                while still_exist and not please_stop:
                                    Till(seconds=TUID_EXISTENCE_WAIT_TIME
                                         ).wait()
                                    with self.conn.transaction() as t:
                                        old_files = t.get(
                                            "SELECT file FROM latestFileMod WHERE revision=?",
                                            (revision, ))
                                    if old_files is None or len(
                                            old_files) <= 0:
                                        still_exist = False

                    # Update table and schedule a deletion
                    if modified:
                        with self.conn.transaction() as t:
                            t.execute(
                                "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES "
                                + sql_list(
                                    quote_set(cset_entry)
                                    for cset_entry in new_data2))
                    if not deleted_data:
                        continue

                    Log.note("Scheduling {{num_csets}} for deletion",
                             num_csets=len(deleted_data))
                    self.deletions_todo.add(delete_overflowing_revstart)
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while maintaining csetLog, continuing to try: ",
                    cause=e)
        return
Beispiel #4
0
    def csetLog_deleter(self, please_stop=None):
        '''
        Deletes changesets from the csetLog table
        and also changesets from the annotation table
        that have revisions matching the given changesets.
        Accepts lists of csets from self.deletions_todo.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.deletions_todo.pop(till=please_stop)
                if please_stop:
                    break

                # If deletion is disabled, ignore the current
                # request - it will need to be re-requested.
                if self.disable_deletion:
                    Till(till=CSET_DELETION_WAIT_TIME).wait()
                    continue

                with self.working_locker:
                    first_cset = request

                    # Since we are deleting and moving stuff around in the
                    # TUID tables, we need everything to be contained in
                    # one transaction with no interruptions.
                    with self.conn.transaction() as t:
                        revnum = self._get_one_revnum(t, first_cset)[0]
                        csets_to_del = t.get(
                            "SELECT revnum, revision FROM csetLog WHERE revnum <= ?",
                            (revnum, ))
                        csets_to_del = [cset for _, cset in csets_to_del]
                        existing_frontiers = t.query(
                            "SELECT revision FROM latestFileMod WHERE revision IN "
                            + quote_set(csets_to_del)).data

                        existing_frontiers = [
                            existing_frontiers[i][0]
                            for i, _ in enumerate(existing_frontiers)
                        ]
                        Log.note(
                            "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
                            csets=csets_to_del)

                        if len(existing_frontiers) > 0:
                            # This handles files which no longer exist anymore in
                            # the main branch.
                            Log.note(
                                "Deleting existing frontiers for revisions: {{revisions}}",
                                revisions=existing_frontiers)
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(existing_frontiers))

                        Log.note("Deleting annotations...")
                        t.execute(
                            "DELETE FROM annotations WHERE revision IN " +
                            quote_set(csets_to_del))

                        Log.note("Deleting {{num_entries}} csetLog entries...",
                                 num_entries=len(csets_to_del))
                        t.execute("DELETE FROM csetLog WHERE revision IN " +
                                  quote_set(csets_to_del))

                    # Recalculate the revnums
                    self.recompute_table_revnums()
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while deleting from csetLog:",
                    cause=e)
                Till(seconds=CSET_DELETION_WAIT_TIME).wait()
        return
    def _db_worker(self, please_stop):
        while not please_stop:
            try:
                with self._db_transaction():
                    result = self._query(
                        SQL_SELECT + all_columns + SQL_FROM + db_table_name +
                        SQL_WHERE + "last_updated>" +
                        quote_value(self.last_load) + SQL_ORDERBY + sql_list(
                            map(quote_column,
                                ["es_index", "name", "es_column"])))

                with self.locker:
                    for r in result.data:
                        c = row_to_column(result.header, r)
                        self._add(c)
                        if c.last_updated > self.last_load:
                            self.last_load = c.last_updated

                updates = self.todo.pop_all()
                DEBUG and updates and Log.note("{{num}} columns to push to db",
                                               num=len(updates))
                for action, column in updates:
                    while not please_stop:
                        try:
                            with self._db_transaction():
                                DEBUG and Log.note(
                                    "{{action}} db for {{table}}.{{column}}",
                                    action=action,
                                    table=column.es_index,
                                    column=column.es_column)
                                if action is EXECUTE:
                                    self.db.execute(column)
                                elif action is UPDATE:
                                    self.db.execute(
                                        "UPDATE" + db_table_name + "SET" +
                                        sql_list([
                                            "count=" + quote_value(
                                                column.count), "cardinality=" +
                                            quote_value(column.cardinality),
                                            "multi=" +
                                            quote_value(column.multi),
                                            "partitions=" + quote_value(
                                                value2json(column.partitions)),
                                            "last_updated=" +
                                            quote_value(column.last_updated)
                                        ]) + SQL_WHERE + SQL_AND.join([
                                            "es_index = " +
                                            quote_value(column.es_index),
                                            "es_column = " +
                                            quote_value(column.es_column),
                                            "last_updated < " +
                                            quote_value(column.last_updated)
                                        ]))
                                elif action is DELETE:
                                    self.db.execute(
                                        "DELETE FROM" + db_table_name +
                                        SQL_WHERE + SQL_AND.join([
                                            "es_index = " +
                                            quote_value(column.es_index),
                                            "es_column = " +
                                            quote_value(column.es_column)
                                        ]))
                                else:
                                    self._db_insert_column(column)
                            break
                        except Exception as e:
                            e = Except.wrap(e)
                            if "database is locked" in e:
                                Log.note("metadata database is locked")
                                Till(seconds=1).wait()
                                break
                            else:
                                Log.warning("problem updataing database",
                                            cause=e)

            except Exception as e:
                Log.warning("problem updataing database", cause=e)

            (Till(seconds=10) | please_stop).wait()
Beispiel #6
0
    def _normalize_revision(self, r, found_revision, push, get_diff,
                            get_moves):
        new_names = set(r.keys()) - KNOWN_TAGS
        if new_names and not r.tags:
            Log.warning(
                "hg is returning new property names {{names|quote}} for {{changeset}} from {{url}}",
                names=new_names,
                changeset=r.node,
                url=found_revision.branch.url)

        changeset = Changeset(
            id=r.node,
            id12=r.node[0:12],
            author=r.user,
            description=strings.limit(coalesce(r.description, r.desc), 2000),
            date=parse_hg_date(r.date),
            files=r.files,
            backedoutby=r.backedoutby if r.backedoutby else None,
            bug=self._extract_bug_id(r.description))
        rev = Revision(branch=found_revision.branch,
                       index=r.rev,
                       changeset=changeset,
                       parents=unwraplist(list(set(r.parents))),
                       children=unwraplist(list(set(r.children))),
                       push=push,
                       phase=r.phase,
                       bookmarks=unwraplist(r.bookmarks),
                       landingsystem=r.landingsystem,
                       etl={
                           "timestamp": Date.now().unix,
                           "machine": machine_metadata
                       })

        r.pushuser = None
        r.pushdate = None
        r.pushid = None
        r.node = None
        r.user = None
        r.desc = None
        r.description = None
        r.date = None
        r.files = None
        r.backedoutby = None
        r.parents = None
        r.children = None
        r.bookmarks = None
        r.landingsystem = None

        set_default(rev, r)

        # ADD THE DIFF
        if get_diff:
            rev.changeset.diff = self._get_json_diff_from_hg(rev)
        if get_moves:
            rev.changeset.moves = self._get_moves_from_hg(rev)

        try:
            _id = coalesce(rev.changeset.id12,
                           "") + "-" + rev.branch.name + "-" + coalesce(
                               rev.branch.locale, DEFAULT_LOCALE)
            with self.es_locker:
                self.es.add({"id": _id, "value": rev})
        except Exception as e:
            e = Except.wrap(e)
            Log.warning("Did not save to ES, waiting {{duration}} seconds",
                        duration=WAIT_AFTER_NODE_FAILURE,
                        cause=e)
            Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
            if "FORBIDDEN/12/index read-only" in e:
                pass  # KNOWN FAILURE MODE

        return rev
Beispiel #7
0
    def _get_from_elasticsearch(self,
                                revision,
                                locale=None,
                                get_diff=False,
                                get_moves=True):
        rev = revision.changeset.id
        if self.es.cluster.version.startswith("1.7."):
            query = {
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": {
                            "and": [{
                                "term": {
                                    "changeset.id12": rev[0:12]
                                }
                            }, {
                                "term": {
                                    "branch.name": revision.branch.name
                                }
                            }, {
                                "term": {
                                    "branch.locale":
                                    coalesce(locale, revision.branch.locale,
                                             DEFAULT_LOCALE)
                                }
                            }, {
                                "range": {
                                    "etl.timestamp": {
                                        "gt": MIN_ETL_AGE
                                    }
                                }
                            }]
                        }
                    }
                },
                "size": 20
            }
        else:
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "term": {
                                "changeset.id12": rev[0:12]
                            }
                        }, {
                            "term": {
                                "branch.name": revision.branch.name
                            }
                        }, {
                            "term": {
                                "branch.locale":
                                coalesce(locale, revision.branch.locale,
                                         DEFAULT_LOCALE)
                            }
                        }, {
                            "range": {
                                "etl.timestamp": {
                                    "gt": MIN_ETL_AGE
                                }
                            }
                        }]
                    }
                },
                "size": 20
            }

        for attempt in range(3):
            try:
                with self.es_locker:
                    docs = self.es.search(query).hits.hits
                if len(docs) == 0:
                    return None
                best = docs[0]._source
                if len(docs) > 1:
                    for d in docs:
                        if d._id.endswith(d._source.branch.locale):
                            best = d._source
                    Log.warning("expecting no more than one document")
                return best
            except Exception as e:
                e = Except.wrap(e)
                if "EsRejectedExecutionException[rejected execution (queue capacity" in e:
                    (Till(seconds=Random.int(30))).wait()
                    continue
                else:
                    Log.warning("Bad ES call, waiting for {{num}} seconds",
                                num=WAIT_AFTER_NODE_FAILURE,
                                cause=e)
                    Till(seconds=WAIT_AFTER_NODE_FAILURE).wait()
                    continue

        Log.warning("ES did not deliver, fall back to HG")
        return None
Beispiel #8
0
    def get_revision(self,
                     revision,
                     locale=None,
                     get_diff=False,
                     get_moves=True):
        """
        EXPECTING INCOMPLETE revision OBJECT
        RETURNS revision
        """
        rev = revision.changeset.id
        if not rev:
            return Null
        elif rev == "None":
            return Null
        elif revision.branch.name == None:
            return Null
        locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)
        output = self._get_from_elasticsearch(revision,
                                              locale=locale,
                                              get_diff=get_diff)
        if output:
            if not get_diff:  # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED
                output.changeset.diff = None
            if not get_moves:
                output.changeset.moves = None
            DEBUG and Log.note(
                "Got hg ({{branch}}, {{locale}}, {{revision}}) from ES",
                branch=output.branch.name,
                locale=locale,
                revision=output.changeset.id)
            if output.push.date >= Date.now() - MAX_TODO_AGE:
                self.todo.add((output.branch, listwrap(output.parents)))
                self.todo.add((output.branch, listwrap(output.children)))
            if output.push.date:
                return output

        # RATE LIMIT CALLS TO HG (CACHE MISSES)
        next_cache_miss = self.last_cache_miss + (
            Random.float(WAIT_AFTER_CACHE_MISS * 2) * SECOND)
        self.last_cache_miss = Date.now()
        if next_cache_miss > self.last_cache_miss:
            Log.note("delaying next hg call for {{seconds|round(decimal=1)}}",
                     seconds=next_cache_miss - self.last_cache_miss)
            Till(till=next_cache_miss.unix).wait()

        found_revision = copy(revision)
        if isinstance(found_revision.branch, (text_type, binary_type)):
            lower_name = found_revision.branch.lower()
        else:
            lower_name = found_revision.branch.name.lower()

        if not lower_name:
            Log.error("Defective revision? {{rev|json}}",
                      rev=found_revision.branch)

        b = found_revision.branch = self.branches[(lower_name, locale)]
        if not b:
            b = found_revision.branch = self.branches[(lower_name,
                                                       DEFAULT_LOCALE)]
            if not b:
                Log.warning("can not find branch ({{branch}}, {{locale}})",
                            branch=lower_name,
                            locale=locale)
                return Null

        if Date.now() - Date(b.etl.timestamp) > _OLD_BRANCH:
            self.branches = _hg_branches.get_branches(kwargs=self.settings)

        push = self._get_push(found_revision.branch,
                              found_revision.changeset.id)

        url1 = found_revision.branch.url.rstrip(
            "/") + "/json-info?node=" + found_revision.changeset.id[0:12]
        url2 = found_revision.branch.url.rstrip(
            "/") + "/json-rev/" + found_revision.changeset.id[0:12]
        with Explanation("get revision from {{url}}", url=url1, debug=DEBUG):
            raw_rev2 = Null
            try:
                raw_rev1 = self._get_raw_json_info(url1, found_revision.branch)
                raw_rev2 = self._get_raw_json_rev(url2, found_revision.branch)
            except Exception as e:
                if "Hg denies it exists" in e:
                    raw_rev1 = Data(node=revision.changeset.id)
                else:
                    raise e
            output = self._normalize_revision(set_default(raw_rev1, raw_rev2),
                                              found_revision, push, get_diff,
                                              get_moves)
            if output.push.date >= Date.now() - MAX_TODO_AGE:
                self.todo.add((output.branch, listwrap(output.parents)))
                self.todo.add((output.branch, listwrap(output.children)))

            if not get_diff:  # DIFF IS BIG, DO NOT KEEP IT IF NOT NEEDED
                output.changeset.diff = None
            if not get_moves:
                output.changeset.moves = None
            return output
Beispiel #9
0
class Sqlite(DB):
    """
    Allows multi-threaded access
    Loads extension functions (like SQRT)
    """
    @override
    def __init__(self,
                 filename=None,
                 db=None,
                 get_trace=None,
                 upgrade=True,
                 load_functions=False,
                 kwargs=None):
        """
        :param filename:  FILE TO USE FOR DATABASE
        :param db: AN EXISTING sqlite3 DB YOU WOULD LIKE TO USE (INSTEAD OF USING filename)
        :param get_trace: GET THE STACK TRACE AND THREAD FOR EVERY DB COMMAND (GOOD FOR DEBUGGING)
        :param upgrade: REPLACE PYTHON sqlite3 DLL WITH MORE RECENT ONE, WITH MORE FUNCTIONS (NOT WORKING)
        :param load_functions: LOAD EXTENDED MATH FUNCTIONS (MAY REQUIRE upgrade)
        :param kwargs:
        """
        global _upgraded
        global _sqlite3

        self.settings = kwargs
        if not _upgraded:
            if upgrade:
                _upgrade()
            _upgraded = True
            import sqlite3 as _sqlite3
            _ = _sqlite3

        self.filename = File(filename).abspath if filename else None
        if known_databases.get(self.filename):
            Log.error(
                "Not allowed to create more than one Sqlite instance for {{file}}",
                file=self.filename)

        # SETUP DATABASE
        DEBUG and Log.note("Sqlite version {{version}}",
                           version=_sqlite3.sqlite_version)
        try:
            if db == None:
                self.db = _sqlite3.connect(database=coalesce(
                    self.filename, ":memory:"),
                                           check_same_thread=False,
                                           isolation_level=None)
            else:
                self.db = db
        except Exception as e:
            Log.error("could not open file {{filename}}",
                      filename=self.filename,
                      cause=e)
        load_functions and self._load_functions()

        self.locker = Lock()
        self.available_transactions = [
        ]  # LIST OF ALL THE TRANSACTIONS BEING MANAGED
        self.queue = Queue(
            "sql commands"
        )  # HOLD (command, result, signal, stacktrace) TUPLES

        self.get_trace = coalesce(get_trace, TRACE)
        self.upgrade = upgrade
        self.closed = False

        # WORKER VARIABLES
        self.transaction_stack = [
        ]  # THE TRANSACTION OBJECT WE HAVE PARTIALLY RUN
        self.last_command_item = None  # USE THIS TO HELP BLAME current_transaction FOR HANGING ON TOO LONG
        self.too_long = None
        self.delayed_queries = []
        self.delayed_transactions = []
        self.worker = Thread.run("sqlite db thread", self._worker)

        DEBUG and Log.note(
            "Sqlite version {{version}}",
            version=self.query("select sqlite_version()").data[0][0])

    def _enhancements(self):
        def regex(pattern, value):
            return 1 if re.match(pattern + "$", value) else 0

        con = self.db.create_function("regex", 2, regex)

        class Percentile(object):
            def __init__(self, percentile):
                self.percentile = percentile
                self.acc = []

            def step(self, value):
                self.acc.append(value)

            def finalize(self):
                return percentile(self.acc, self.percentile)

        con.create_aggregate("percentile", 2, Percentile)

    def transaction(self):
        thread = Thread.current()
        parent = None
        with self.locker:
            for t in self.available_transactions:
                if t.thread is thread:
                    parent = t

        output = Transaction(self, parent=parent)
        self.available_transactions.append(output)
        return output

    def query(self, command):
        """
        WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED
        :param command: COMMAND FOR SQLITE
        :return: list OF RESULTS
        """
        if self.closed:
            Log.error("database is closed")

        signal = _allocate_lock()
        signal.acquire()
        result = Data()
        trace = extract_stack(1) if self.get_trace else None

        if self.get_trace:
            current_thread = Thread.current()
            with self.locker:
                for t in self.available_transactions:
                    if t.thread is current_thread:
                        Log.error(DOUBLE_TRANSACTION_ERROR)

        self.queue.add(CommandItem(command, result, signal, trace, None))
        signal.acquire()

        if result.exception:
            Log.error("Problem with Sqlite call", cause=result.exception)
        return result

    def close(self):
        """
        OPTIONAL COMMIT-AND-CLOSE
        IF THIS IS NOT DONE, THEN THE THREAD THAT SPAWNED THIS INSTANCE
        :return:
        """
        self.closed = True
        signal = _allocate_lock()
        signal.acquire()
        self.queue.add(CommandItem(COMMIT, None, signal, None, None))
        signal.acquire()
        self.worker.please_stop.go()
        return

    def __enter__(self):
        pass

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def _load_functions(self):
        global _load_extension_warning_sent
        library_loc = File.new_instance(sys.modules[__name__].__file__,
                                        "../..")
        full_path = File.new_instance(
            library_loc, "vendor/sqlite/libsqlitefunctions.so").abspath
        try:
            trace = extract_stack(0)[0]
            if self.upgrade:
                if os.name == 'nt':
                    file = File.new_instance(
                        trace["file"],
                        "../../vendor/sqlite/libsqlitefunctions.so")
                else:
                    file = File.new_instance(
                        trace["file"],
                        "../../vendor/sqlite/libsqlitefunctions")

                full_path = file.abspath
                self.db.enable_load_extension(True)
                self.db.execute(SQL_SELECT + "load_extension" +
                                sql_iso(quote_value(full_path)))
        except Exception as e:
            if not _load_extension_warning_sent:
                _load_extension_warning_sent = True
                Log.warning(
                    "Could not load {{file}}, doing without. (no SQRT for you!)",
                    file=full_path,
                    cause=e)

    def create_new_functions(self):
        def regexp(pattern, item):
            reg = re.compile(pattern)
            return reg.search(item) is not None

        self.db.create_function("REGEXP", 2, regexp)

    def show_transactions_blocked_warning(self):
        blocker = self.last_command_item
        blocked = (self.delayed_queries + self.delayed_transactions)[0]

        Log.warning(
            "Query on thread {{blocked_thread|json}} at\n"
            "{{blocked_trace|indent}}"
            "is blocked by {{blocker_thread|json}} at\n"
            "{{blocker_trace|indent}}"
            "this message brought to you by....",
            blocker_trace=format_trace(blocker.trace),
            blocked_trace=format_trace(blocked.trace),
            blocker_thread=blocker.transaction.thread.name
            if blocker.transaction is not None else None,
            blocked_thread=blocked.transaction.thread.name
            if blocked.transaction is not None else None)

    def _close_transaction(self, command_item):
        query, result, signal, trace, transaction = command_item

        transaction.end_of_life = True
        with self.locker:
            self.available_transactions.remove(transaction)
            assert transaction not in self.available_transactions

            old_length = len(self.transaction_stack)
            old_trans = self.transaction_stack[-1]
            del self.transaction_stack[-1]

            assert old_length - 1 == len(self.transaction_stack)
            assert old_trans
            assert old_trans not in self.transaction_stack
        if not self.transaction_stack:
            # NESTED TRANSACTIONS NOT ALLOWED IN sqlite3
            DEBUG and Log.note(FORMAT_COMMAND, command=query)
            self.db.execute(query)

        has_been_too_long = False
        with self.locker:
            if self.too_long is not None:
                self.too_long, too_long = None, self.too_long
                # WE ARE CHEATING HERE: WE REACH INTO THE Signal MEMBERS AND REMOVE WHAT WE ADDED TO THE INTERNAL job_queue
                with too_long.lock:
                    has_been_too_long = bool(too_long)
                    too_long.job_queue = None

            # PUT delayed BACK ON THE QUEUE, IN THE ORDER FOUND, BUT WITH QUERIES FIRST
            if self.delayed_transactions:
                for c in reversed(self.delayed_transactions):
                    self.queue.push(c)
                del self.delayed_transactions[:]
            if self.delayed_queries:
                for c in reversed(self.delayed_queries):
                    self.queue.push(c)
                del self.delayed_queries[:]
        if has_been_too_long:
            Log.note("Transaction blockage cleared")

    def _worker(self, please_stop):
        try:
            # MAIN EXECUTION LOOP
            while not please_stop:
                command_item = self.queue.pop(till=please_stop)
                if command_item is None:
                    break
                try:
                    self._process_command_item(command_item)
                except Exception as e:
                    Log.warning("worker can not execute command", cause=e)
        except Exception as e:
            e = Except.wrap(e)
            if not please_stop:
                Log.warning("Problem with sql", cause=e)
        finally:
            self.closed = True
            DEBUG and Log.note("Database is closed")
            self.db.close()

    def _process_command_item(self, command_item):
        query, result, signal, trace, transaction = command_item

        with Timer("SQL Timing", silent=not DEBUG):
            if transaction is None:
                # THIS IS A TRANSACTIONLESS QUERY, DELAY IT IF THERE IS A CURRENT TRANSACTION
                if self.transaction_stack:
                    with self.locker:
                        if self.too_long is None:
                            self.too_long = Till(
                                seconds=TOO_LONG_TO_HOLD_TRANSACTION)
                            self.too_long.then(
                                self.show_transactions_blocked_warning)
                        self.delayed_queries.append(command_item)
                    return
            elif self.transaction_stack and self.transaction_stack[-1] not in [
                    transaction, transaction.parent
            ]:
                # THIS TRANSACTION IS NOT THE CURRENT TRANSACTION, DELAY IT
                with self.locker:
                    if self.too_long is None:
                        self.too_long = Till(
                            seconds=TOO_LONG_TO_HOLD_TRANSACTION)
                        self.too_long.then(
                            self.show_transactions_blocked_warning)
                    self.delayed_transactions.append(command_item)
                return
            else:
                # ENSURE THE CURRENT TRANSACTION IS UP TO DATE FOR THIS query
                if not self.transaction_stack:
                    # sqlite3 ALLOWS ONLY ONE TRANSACTION AT A TIME
                    DEBUG and Log.note(FORMAT_COMMAND, command=BEGIN)
                    self.db.execute(BEGIN)
                    self.transaction_stack.append(transaction)
                elif transaction is not self.transaction_stack[-1]:
                    self.transaction_stack.append(transaction)
                elif transaction.exception and query is not ROLLBACK:
                    result.exception = Except(
                        context=ERROR,
                        template=
                        "Not allowed to continue using a transaction that failed",
                        cause=transaction.exception,
                        trace=trace)
                    signal.release()
                    return

                try:
                    transaction.do_all()
                except Exception as e:
                    # DEAL WITH ERRORS IN QUEUED COMMANDS
                    # WE WILL UNWRAP THE OUTER EXCEPTION TO GET THE CAUSE
                    err = Except(context=ERROR,
                                 template="Bad call to Sqlite3 while " +
                                 FORMAT_COMMAND,
                                 params={"command": e.params.current.command},
                                 cause=e.cause,
                                 trace=e.params.current.trace)
                    transaction.exception = result.exception = err

                    if query in [COMMIT, ROLLBACK]:
                        self._close_transaction(
                            CommandItem(ROLLBACK, result, signal, trace,
                                        transaction))

                    signal.release()
                    return

            try:
                # DEAL WITH END-OF-TRANSACTION MESSAGES
                if query in [COMMIT, ROLLBACK]:
                    self._close_transaction(command_item)
                    return

                # EXECUTE QUERY
                self.last_command_item = command_item
                DEBUG and Log.note(FORMAT_COMMAND, command=query)
                curr = self.db.execute(query)
                result.meta.format = "table"
                result.header = [d[0] for d in curr.description
                                 ] if curr.description else None
                result.data = curr.fetchall()
                if DEBUG and result.data:
                    text = convert.table2csv(list(result.data))
                    Log.note("Result:\n{{data|limit(100)|indent}}", data=text)
            except Exception as e:
                e = Except.wrap(e)
                err = Except(context=ERROR,
                             template="Bad call to Sqlite while " +
                             FORMAT_COMMAND,
                             params={"command": query},
                             trace=trace,
                             cause=e)
                result.exception = err
                if transaction:
                    transaction.exception = err
            finally:
                signal.release()