Ejemplo n.º 1
0
 def __init__(self, K, M, Ks=256):
     self.K = K
     self.M = M
     self.Ks = Ks
     self.vecs = None
     self.codebooks = None
     self.pq = PQ(M, Ks)
Ejemplo n.º 2
0
def cli(query, infile, css, to_text, to_text_all, first, compact, to_json):
    """Command line xml and json processor for xpath and css selectors.
    """
    if infile:
        piped_data = infile.read()
    else:
        piped_data = ''.join(sys.stdin)
    pq = PQ(piped_data, to_text=to_text, to_text_all=to_text_all)
    result = pq.css(query) if css else pq.xpath(query)
    if first:
        result = result[0] if result else None
    if result:
        click.echo(pq.output(result, compact=compact, to_json=to_json))
    def dijkstras_version_2(self, s):
        """Dijkstra's Algorithm using a binary heap as the PQ."""

        # Programming Assignment 3:
        # 3) Implement Dijkstra's Algorithm using a binary heap implementation of a PQ as the PQ.
        #    Specifically, use the implementation I have posted here: https://github.com/cicirello/PythonDataStructuresLibrary
        #    Use the download link (if you simply click pq.py Github will just show you the source in a web browser with line numbers).
        #
        #    Have this method return a list of 3-tuples, one for each vertex, such that first position is vertex id,
        #    second is distance from source vertex (i.e., what pseudocode from textbook refers to as v.d), and third
        #    is the vertex's parent (what the textbook refers to as v.pi).  E.g., (2, 10, 5) would mean the shortest path
        #    from s to 2 has weight 10, and vertex 2's parent is vertex 5.
        #
        #    the parameter s is the source vertex.
        distance = [math.inf for x in self._adj]
        parent = [None for x in self._adj]
        Q = PQ()
        distance[s] = 0
        Q.add(s, 0)
        S = []
        for u in range(len(self._adj)):
            if u != s:
                Q.add(u, math.inf)
        while not Q.is_empty():
            u = Q.extract_min()
            S.append(u)
            for v, w in self._adj[u].__iter__(True):
                if (distance[u] + w) < distance[v]:
                    parent[v] = u
                    distance[v] = (distance[u] + w)
        returnlist = []
        for v in S:
            returnlist.append((v, distance[v], parent[v]))
        return returnlist
Ejemplo n.º 4
0
    def dijkstra(self,s):
        """Dijkstra's Algorithm using a binary heap as the PQ.

        Keyword Arguments:
        s - The source vertex.
        """

        # Programming Assignment 3:
        # 2) Implement Dijkstra's Algorithm using a binary heap implementation of a PQ as the PQ.
        
        pointers = [None for i in range(len(self._adj))]
        costs = [math.inf for i in range(len(self._adj))]
        pointers[s]=s
        costs[s]=0

        S = set()
        Q = PQ()
        
        #Q = G.V
        for i in range(len(self._adj)):
            Q.add(i,costs[i])

        while(not Q.is_empty()):
            u = Q.extract_min()
            S = S | {u}
            for v in self._adj[u].__iter__(True):
                #relax
                if v[1]+costs[u] < costs[v[0]]:
                    costs[v[0]] = v[1]+costs[u]
                    pointers[v[0]]=u
                    Q.change_priority(v[0],costs[v[0]])

        return [(i,costs[i],pointers[i]) for i in range(len(self._adj))]
Ejemplo n.º 5
0
def recreate(DATABASE_URL):
    '''
    '''
    schema_filename = join(dirname(__file__), 'schema.pgsql')

    with connect(DATABASE_URL) as conn:
        with conn.cursor() as db:
            with open(schema_filename) as file:
                db.execute(file.read())

            db.execute('DROP TABLE IF EXISTS queue')

        pq = PQ(conn, table='queue')
        pq.create()
Ejemplo n.º 6
0
def recreate(DATABASE_URL):
    '''
    '''
    schema_filename = join(dirname(__file__), 'schema.pgsql')

    with connect(DATABASE_URL) as conn:
        with conn.cursor() as db:
            with open(schema_filename) as file:
                db.execute(file.read())
            
            db.execute('DROP TABLE IF EXISTS queue')

        pq = PQ(conn, table='queue')
        pq.create()
Ejemplo n.º 7
0
def djikstra(start, neighbor_func, distance_func, goal_pred):
    visited = set()

    parents = {start: None}

    distances = {start: 0}
    queue = PQ()
    queue.add_task(start, distances[start])

    while not queue.empty():
        current = queue.pop_task()
        for neighbor in neighbor_func(current):
            if neighbor in visited:
                continue
            tentative_distance = distances[current] + distance_func(
                current, neighbor)
            if neighbor not in distances:
                queue.add_task(neighbor, tentative_distance)
                distances[neighbor] = tentative_distance
                parents[neighbor] = current
            elif tentative_distance < distances[neighbor]:
                queue.update_task(neighbor, tentative_distance)
                distances[neighbor] = tentative_distance
                parents[neighbor] = current
        visited.add(current)
        if goal_pred(current):
            return {
                "distance": distances[current],
                "path": reconstruct_path(current, parents),
                "parents": parents
            }

    return {"distance": float("inf"), "path": None, "parents": parents}
Ejemplo n.º 8
0
    def setUpClass(cls):
        c = cls.base_concurrency * 4
        pool = cls.pool = ThreadedConnectionPool(
            c, c, "dbname=%s user=%s port=%s host=%s password=%s" % (
                os.environ.get('PQ_TEST_DB', 'pq_test'),
                os.environ.get('PQ_TEST_USER', 'postgres'),
                os.environ.get('PQ_TEST_DB_PORT', '5432'),
                os.environ.get('PQ_TEST_DB_HOST', 'localhost'),
                os.environ.get('PQ_TEST_DB_PASS', '')),
            cursor_factory=cls.CURSOR_FACTORY
        )
        cls.pq = PQ(
            pool=pool, table="queue", queue_class=cls.queue_class,
        )

        def setup_filter(record):
            return False

        logger.addFilter(setup_filter)
        try:
            cls.pq.create()
        except ProgrammingError as exc:
            if exc.pgcode != '42P07':
                raise
        finally:
            logger.removeFilter(setup_filter)
Ejemplo n.º 9
0
    def fit(self, vecs, iter):

        assert vecs.dtype == np.float32
        assert vecs.ndim == 2
        _, D = vecs.shape
        self.R = np.eye(D, dtype=np.float32)

        rotation_iter = iter
        pq_iter = iter

        from tqdm import tqdm
        iterator = tqdm(
            range(rotation_iter)) if self.verbose else range(rotation_iter)
        for i in iterator:
            X = vecs @ self.R

            # (a) Train codewords

            if i == rotation_iter - 1:
                # stop iterator display; show the pq process bar
                if type(iterator) is tqdm:
                    iterator.close()
                # In the final loop, run the full training
                pq_tmp = ResidualPQ([
                    PQ(self.M, self.Ks, self.verbose)
                    for _ in range(self.layer)
                ],
                                    verbose=self.verbose)
                pq_tmp.fit(X, iter=pq_iter)
            else:
                # During the training for OPQ, just run one-pass (iter=1) PQ training
                pq_tmp = ResidualPQ(
                    [PQ(self.M, self.Ks, False) for _ in range(self.layer)],
                    verbose=False)
                pq_tmp.fit(X, iter=1)

            # (b) Update a rotation matrix R
            X_ = pq_tmp.compress(X)
            U, s, V = np.linalg.svd(vecs.T @ X_)

            if i == rotation_iter - 1:
                self.pq = pq_tmp
                break
            else:
                self.R = U @ V

        return self
Ejemplo n.º 10
0
def handle_link(text):
    result = re.findall(r'<e\ [^>]*>', text)
    for i in result:
        html = PQ(i)
        if html.attr('type') == 'web':
            template = '[%s](%s)' % (urllib.parse.unquote(
                html.attr('title')), urllib.parse.unquote(html.attr('href')))
        elif html.attr('type') == 'hashtag':
            template = ' `%s` ' % urllib.parse.unquote(html.attr('title'))
        elif html.attr('type') == 'mention':
            template = urllib.parse.unquote(html.attr('title'))
        text = text.strip().replace(i, template)
    return text
Ejemplo n.º 11
0
 def __init__(self, Ks=256, depth=2):
     self.Ks = Ks
     self.depth = depth
     self.pqs = [
         PQ(1, Ks) for i in range((1 + self.Ks) * (depth // 2) + depth % 2)
     ]
     self.code_dtype = np.uint8 if Ks <= 2**8 else (
         np.uint16 if Ks <= 2**16 else np.uint32)
Ejemplo n.º 12
0
    def __init__(self, M, Ks, verbose=True, layer=1):

        self.pq = ResidualPQ([PQ(M, Ks, verbose) for _ in range(layer)])
        self.layer = layer
        self.M = M
        self.Ks = Ks
        self.code_dtype = self.pq.code_dtype
        self.verbose = verbose

        self.R = None
Ejemplo n.º 13
0
def recreate(DATABASE_URL):
    '''
    '''
    ci_schema_filename = join(dirname(__file__), 'schema.pgsql')
    cov_schema_filename = join(dirname(__file__), 'coverage', 'schema.pgsql')

    with connect(DATABASE_URL) as conn:
        with conn.cursor() as db:
            db.execute('SET client_min_messages TO WARNING')

            with open(ci_schema_filename) as file:
                db.execute(file.read())

            with open(cov_schema_filename) as file:
                db.execute(file.read())

            db.execute('DROP TABLE IF EXISTS queue')

        pq = PQ(conn, table='queue')
        pq.create()
Ejemplo n.º 14
0
 def setup_method(self):
     self.data_json = """
     {
       "root": [
         {
           "foo": "bar",
           "deep": {"foo2": "bar_inside"}
         },
         {
           "gar": "fir"
         }
       ]
     }
     """
     self.data_xml = """
     <root>
       <foo>bar
         <foo2>bar_inside</foo2>
       </foo>
       <gar>fir</gar>
     </root>
     """
     self.data_xml = self.data_xml.replace('\n', '').replace(' ', '')
     self.pq = PQ(self.data_xml)
     self.pq_text = PQ(self.data_xml, to_text=True)
     self.pq_text_all = PQ(self.data_xml, to_text_all=True)
Ejemplo n.º 15
0
    def DijkstrasVersion2(self, sourceVertex):
        sourceVertex.distance = 0
        binaryHeap = PQ()
        binaryHeap.add(sourceVertex, sourceVertex.value)

        while not binaryHeap.is_empty():
            tempVertex = binaryHeap.peek_min()

            for e in tempVertex.edges:
                v = e.neighbor
                newDistance = tempVertex.distance + e.weight

                if newDistance < v.distance:
                    v.distance = newDistance
                    v.pred = tempVertex
                    binaryHeap.add(v, v.value)
            binaryHeap.extract_min()

        paths = []
        for v in self.vertices:
            paths.append((v.value, v.distance, v.pred))
        return paths
Ejemplo n.º 16
0
    def setUpClass(cls):
        c = cls.base_concurrency * 4
        pool = cls.pool = ThreadedConnectionPool(
            c, c, "dbname=pq_test user=postgres",
        )
        cls.pq = PQ(
            pool=pool, table="queue", queue_class=cls.queue_class,
        )

        try:
            cls.pq.create()
        except ProgrammingError as exc:
            # We ignore a duplicate table error.
            if exc.pgcode != '42P07':
                raise
Ejemplo n.º 17
0
    def dijkstra(self,s) :
        """Dijkstra's Algorithm using a binary heap as the PQ.

        Keyword Arguments:
        s - The source vertex.
        """

        class VertexData:
            __slots__ = ['d', 'pred']

            def __init__(self):
                self.d = math.inf
                self.pred = None

        vertices = [VertexData() for i in range(len(self._adj))]

        vertices[s].d = 0

        Q = PQ()
        S = []
        list = []

        vertices[s].d = 0
        Q.add(s, 0)

        for u in range(len(self._adj)):
            if u != s:
                Q.add(u, math.inf)

        while not Q.is_empty():
            u = Q.extract_min()
            S.append(u)
            for v, w in self._adj[u].__iter__(True):
                if (vertices[u].d + w) < vertices[v].d:
                    vertices[v].pred = u
                    vertices[v].d = (vertices[u].d + w)

        for v in S:
            list.append((v, vertices[v].d, vertices[v].pred))

        return list
Ejemplo n.º 18
0
    def dijkstra(self, s):
        """Dijkstra's Algorithm using a binary heap as the PQ.

        Keyword Arguments:
        s - The source vertex.
        """
        class VertexData:
            __slots__ = ['d', 'pred']

            def __init__(self):
                self.d = math.inf
                self.pred = None

        vertices = [VertexData() for i in range(len(self._adj))]

        #INIT SINGLE-SOURCE
        vertices[s].d = 0
        tuples = []
        S = []
        Q = PQ()

        for u in range(len(self._adj)):
            if u is s:
                Q.add(s, 0)
            else:
                Q.add(u, math.inf)
        while not Q.is_empty():
            u = Q.extract_min()
            S.append(u)
            for v, w in self._adj[u].__iter__(True):
                #RELAX
                if vertices[v].d > vertices[u].d + w:
                    vertices[v].d = vertices[u].d + w
                    vertices[v].pred = vertices[u]
        for u in S:
            if vertices[u].pred is None:
                tuples.append((u, vertices[u].d, None))
            else:
                tuples.append(
                    (u, vertices[u].d, vertices.index(vertices[u].pred)))
        return tuples
Ejemplo n.º 19
0
    def setUpClass(cls):
        c = cls.base_concurrency * 4
        pool = cls.pool = ThreadedConnectionPool(
            c,
            c,
            "dbname=%s user=%s" % (os.environ.get('PQ_TEST_DB', 'pq_test'),
                                   os.environ.get('PQ_TEST_USER', 'postgres')),
            cursor_factory=cls.CURSOR_FACTORY)
        cls.pq = PQ(
            pool=pool,
            table="queue",
            queue_class=cls.queue_class,
        )

        try:
            cls.pq.create()
        except ProgrammingError as exc:
            # We ignore a duplicate table error.
            if exc.pgcode != '42P07':
                raise
Ejemplo n.º 20
0
 def DijkstrasVersion2(self,s) :
     class VertexData:
         pass
     vList = [VertexData() for i in range(len(self._adj))]
     S = list()
     Q = PQ()
     vList[s].dist = 0
     vList[s].prev = None
     for v in range(len(vList)):
         if(v != s):
             vList[v].dist = float('inf')
             vList[v].prev = None
         Q.add(v, vList[v].dist)
     while not Q.is_empty():
         u = Q.extract_min()
         S.append((u, vList[u].dist, vList[u].prev))
         for v in self._adj[u]:
             temp = vList[u].dist + self._w[(u,v)]
             if temp < vList[v].dist:
                 vList[v].dist = temp
                 vList[v].prev = u
                 Q.change_priority(v, temp)
     return S
Ejemplo n.º 21
0
 def DijkstrasVersion2(self,s) :
     S = PQ()
     TL = []
     class VertexData :
         pass
     vertices = [VertexData() for i in range(len(self._adj))]
     for i in range(len(vertices)) :
         vertices[i].d = inf
         vertices[i].pred = -1
         S.add(i, vertices[i].d)
     vertices[s].d = 0
     vertices[s].pred = -1
     while not S.is_empty() :
         u = S.extract_min()
         
         TL.append((u, vertices[u].d, vertices[u].pred))
         for i in self._adj[u]:
             distance = vertices[u].d + self._w[(u,i)]
             if(distance < vertices[i].d):
                 vertices[i].d = distance
                 vertices[i].pred = u
                 S.change_priority(i, distance)
     return TL
Ejemplo n.º 22
0
import logging
from flask import Flask, render_template, redirect, url_for, flash
from pq import PQ

from api import APIGrabber
from db import PonyDB

logging.basicConfig()

# Config
# ---------------
# App config
app = Flask(__name__)
app.config.from_object(os.environ.get('APP_SETTINGS', None))
db = PonyDB(app)
pq = PQ(db.get_connection())  # Postgres work queue
if db.table_exists('queue') is False:
    pq.create()
queue = pq['themes']


# Routes
# ---------------
@app.route('/')
def show_entries():
    """
    List out all the themes.
    """
    image_themes = db.get_image_themes()
    no_image_themes = db.get_no_image_themes()
    sha = db.get_sha()
Ejemplo n.º 23
0
# The server will crash on the first run, probably because
# of a side effect of create_database
# from sqlalchemy_utils import database_exists, create_database
# if not database_exists(DB_URL):
#     create_database(DB_URL)
# Create the queues.
# from psycopg2 import connect
# from pq import PQ
# conn = connect('dbname=test108 user=postgres')
# pq.create()
# pq = PQ(conn)

sentry = Sentry(app)

conn = connect(DB_URL)
pq = PQ(conn)


class Task(db.Model):
    """A task to be completed by a judicious participant."""

    __tablename__ = "task"

    id = db.Column(UUID, primary_key=True, nullable=False)
    created_at = db.Column(db.DateTime, nullable=False, default=datetime.now)
    last_queued_at = db.Column(db.DateTime)
    type = db.Column(db.String(64), nullable=False)
    parameters = db.Column(db.JSON)
    person_id = db.Column(UUID, db.ForeignKey('person.id'))
    context_id = db.Column(UUID, db.ForeignKey('context.id'), nullable=False)
    last_started_at = db.Column(db.DateTime)
Ejemplo n.º 24
0
 def MST_Prim(self, r=0) :
     parent = [ None for x in range(self.numVertices())]
     Q = PQ()
     Q.add(r,0)
     for u in range(self.numVertices()) :
         if u!=r :
             Q.add(u,inf)
     while not Q.isEmpty() :
         u = Q.extractMin()
         for v in self._adj[u] :
             if Q.contains(v) and self._w[(u,v)] < Q.getPriorityValue(v) :
                 parent[v] = u
                 Q.changePriorityValue(v, self._w[(u,v)])
     A = set()
     for u, v in enumerate(parent) :
         if v!=None:
             A.add((u,v))
     return A
    def mst_prim(self, r=0):
        """Returns the set of edges in some
        minimum spanning tree (MST) of the graph,
        computed using Prim's algorithm.

        Keyword arguments:
        r - vertex id to designate as the root (default is 0).
        """

        parent = [None for x in range(len(self._adj))]
        Q = PQ()
        Q.add(r, 0)
        for u in range(len(self._adj)):
            if u != r:
                Q.add(u, math.inf)
        while not Q.is_empty():
            u = Q.extract_min()
            for v, w in self._adj[u].__iter__(True):
                if Q.contains(v) and w < Q.get_priority(v):
                    parent[v] = u
                    Q.change_priority(v, w)
        A = set()
        for v, u in enumerate(parent):
            if u != None:
                A.add((u, v))
                # A = A | {(u,v)}
        return A
Ejemplo n.º 26
0
class IVFPQ:
    def __init__(self, K, M, Ks=256):
        self.K = K
        self.M = M
        self.Ks = Ks
        self.vecs = None
        self.codebooks = None
        self.pq = PQ(M, Ks)

    def ivf(self, vecs, iterations):
        print('vecs: ss', len(vecs))
        centroids, labels = kmeans2(data=vecs,
                                    k=self.K,
                                    iter=iterations,
                                    minit='points')
        return centroids, labels

    def residual(self, vecs, centroids, labels):
        vecs = vecs - centroids[labels]
        return vecs

    def fit(self, vecs, iterations=20, seed=123):
        """
        获取codebooks.shape=(M,Ks,Ds)
        """
        """
        聚类
        """
        self.centroids, self.labels = self.ivf(vecs, iterations)
        """
        残差
        """
        vecs = self.residual(vecs, self.centroids, self.labels)
        self.pq.fit(vecs=vecs, iterations=iterations, seed=seed)

    def encode(self, vecs):
        """
        对数据进行压缩,并保存压缩有的向量vecs:(N,M)
        centroid2vec:{centroid:[v1_index,v2_index,,,,]}
        """
        centroid_ids, _ = vq(vecs, self.centroids)
        vecs = vecs - self.centroids[centroid_ids]
        centroid2vec = {}
        for i, c_id in enumerate(centroid_ids):
            if c_id not in centroid2vec:
                centroid2vec[c_id] = []
            centroid2vec[c_id].append(i)
        vecs = self.pq.encode(vecs)
        self.vecs = vecs
        self.centroid2vec = centroid2vec

    def get_topH_partitions(self, query, H):
        """
        此时的query是原始query
        其实返回的就是前H个centroids的索引
        """
        _ = np.linalg.norm(self.centroids - query, axis=1)**2
        topH_ids = np.array(
            [iid for iid in np.argsort(_) if iid in self.centroid2vec][:H])
        return topH_ids

    def dtable(self, query, topH_centroids):
        """
        根据query,获取dtables
        此时query是原始query
        """
        #        topH_centroids = self.get_topH_partitions(query,H)
        dtables = []
        for i, pid in enumerate(topH_centroids):
            query = query - self.centroids[pid]
            dtables.append(self.pq.dtable(query))

        return dtables

    def find_topK(self, query, K, H):
        topH_centroids = self.get_topH_partitions(query, H)
        dtables = self.dtable(query, topH_centroids)
        ret = []
        total_dists = []
        for i in range(H):
            sub_vecs_ids = np.array(self.centroid2vec[topH_centroids[i]])
            sub_vecs = self.vecs[sub_vecs_ids]
            dists = dtables[i].adist(sub_vecs)
            topNum_ids = np.argsort(dists)[:K]
            total_dists += list(dists[topNum_ids])
            ret += list(sub_vecs_ids[topNum_ids])
        _ = sorted(zip(ret, total_dists), key=lambda x: x[1])[:K]
        return _
Ejemplo n.º 27
0
def db_queue(conn, name):
    return PQ(conn, table='queue')[name]
Ejemplo n.º 28
0
from psycopg2 import connect, ProgrammingError
from pq import PQ

from wx_explore.common.config import Config

pq = PQ(connect(
    user=Config.POSTGRES_USER,
    password=Config.POSTGRES_PASS,
    host=Config.POSTGRES_HOST,
    port=Config.POSTGRES_PORT,
    dbname=Config.POSTGRES_DB,
),
        table='work_queue')

try:
    pq.create()
except ProgrammingError as exc:
    if exc.pgcode != '42P07':
        raise
Ejemplo n.º 29
0
class TestClass:

    # noinspection PyAttributeOutsideInit
    def setup_method(self):
        self.data_json = """
        {
          "root": [
            {
              "foo": "bar",
              "deep": {"foo2": "bar_inside"}
            },
            {
              "gar": "fir"
            }
          ]
        }
        """
        self.data_xml = """
        <root>
          <foo>bar
            <foo2>bar_inside</foo2>
          </foo>
          <gar>fir</gar>
        </root>
        """
        self.data_xml = self.data_xml.replace('\n', '').replace(' ', '')
        self.pq = PQ(self.data_xml)
        self.pq_text = PQ(self.data_xml, to_text=True)
        self.pq_text_all = PQ(self.data_xml, to_text_all=True)

    def test_selector_from_json(self):
        pq = PQ(self.data_json)
        self._test_selector(pq)

    def test_selector_from_xml(self):
        pq = PQ(self.data_xml)
        self._test_selector(pq)

    def _test_selector(self, pq):
        assert pq.xpath('//foo/text()') == ['bar']
        assert pq.css('foo::text') == ['bar']
        assert pq.xpath('//foo/text()', first=True) == 'bar'
        assert pq.css('foo::text', first=True) == 'bar'
        assert pq.xpath('//foo', first=True, to_text=True) == 'bar'
        assert pq.css('foo', first=True, to_text=True) == 'bar'
        assert pq.xpath('//foo2/text()') == ['bar_inside']
        assert pq.css('foo2::text') == ['bar_inside']

    def test_process_xpath(self):
        assert 'text()' not in self.pq.process_path('//foo', func_name='xpath')
        assert 'text()' in self.pq.process_path('//foo', func_name='xpath', to_text=True)
        assert 'text()' in self.pq_text.process_path('//foo', func_name='xpath')
        assert '//text()' in self.pq.process_path('//foo', func_name='xpath', to_text_all=True)
        assert '//text()' in self.pq_text_all.process_path('//foo', func_name='xpath')

    def test_process_css(self):
        assert '::text' not in self.pq.process_path('//foo', func_name='css')
        assert '::text' in self.pq.process_path('//foo', func_name='css', to_text=True)
        assert '::text' in self.pq_text.process_path('//foo', func_name='css')
        assert ' ::text' in self.pq.process_path('//foo', func_name='css', to_text_all=True)
        assert ' ::text' in self.pq_text_all.process_path('//foo', func_name='css')
Ejemplo n.º 30
0
 def test_selector_from_json(self):
     pq = PQ(self.data_json)
     self._test_selector(pq)
Ejemplo n.º 31
0
 def test_selector_from_xml(self):
     pq = PQ(self.data_xml)
     self._test_selector(pq)