def __init__(self, K, M, Ks=256): self.K = K self.M = M self.Ks = Ks self.vecs = None self.codebooks = None self.pq = PQ(M, Ks)
def cli(query, infile, css, to_text, to_text_all, first, compact, to_json): """Command line xml and json processor for xpath and css selectors. """ if infile: piped_data = infile.read() else: piped_data = ''.join(sys.stdin) pq = PQ(piped_data, to_text=to_text, to_text_all=to_text_all) result = pq.css(query) if css else pq.xpath(query) if first: result = result[0] if result else None if result: click.echo(pq.output(result, compact=compact, to_json=to_json))
def dijkstras_version_2(self, s): """Dijkstra's Algorithm using a binary heap as the PQ.""" # Programming Assignment 3: # 3) Implement Dijkstra's Algorithm using a binary heap implementation of a PQ as the PQ. # Specifically, use the implementation I have posted here: https://github.com/cicirello/PythonDataStructuresLibrary # Use the download link (if you simply click pq.py Github will just show you the source in a web browser with line numbers). # # Have this method return a list of 3-tuples, one for each vertex, such that first position is vertex id, # second is distance from source vertex (i.e., what pseudocode from textbook refers to as v.d), and third # is the vertex's parent (what the textbook refers to as v.pi). E.g., (2, 10, 5) would mean the shortest path # from s to 2 has weight 10, and vertex 2's parent is vertex 5. # # the parameter s is the source vertex. distance = [math.inf for x in self._adj] parent = [None for x in self._adj] Q = PQ() distance[s] = 0 Q.add(s, 0) S = [] for u in range(len(self._adj)): if u != s: Q.add(u, math.inf) while not Q.is_empty(): u = Q.extract_min() S.append(u) for v, w in self._adj[u].__iter__(True): if (distance[u] + w) < distance[v]: parent[v] = u distance[v] = (distance[u] + w) returnlist = [] for v in S: returnlist.append((v, distance[v], parent[v])) return returnlist
def dijkstra(self,s): """Dijkstra's Algorithm using a binary heap as the PQ. Keyword Arguments: s - The source vertex. """ # Programming Assignment 3: # 2) Implement Dijkstra's Algorithm using a binary heap implementation of a PQ as the PQ. pointers = [None for i in range(len(self._adj))] costs = [math.inf for i in range(len(self._adj))] pointers[s]=s costs[s]=0 S = set() Q = PQ() #Q = G.V for i in range(len(self._adj)): Q.add(i,costs[i]) while(not Q.is_empty()): u = Q.extract_min() S = S | {u} for v in self._adj[u].__iter__(True): #relax if v[1]+costs[u] < costs[v[0]]: costs[v[0]] = v[1]+costs[u] pointers[v[0]]=u Q.change_priority(v[0],costs[v[0]]) return [(i,costs[i],pointers[i]) for i in range(len(self._adj))]
def recreate(DATABASE_URL): ''' ''' schema_filename = join(dirname(__file__), 'schema.pgsql') with connect(DATABASE_URL) as conn: with conn.cursor() as db: with open(schema_filename) as file: db.execute(file.read()) db.execute('DROP TABLE IF EXISTS queue') pq = PQ(conn, table='queue') pq.create()
def djikstra(start, neighbor_func, distance_func, goal_pred): visited = set() parents = {start: None} distances = {start: 0} queue = PQ() queue.add_task(start, distances[start]) while not queue.empty(): current = queue.pop_task() for neighbor in neighbor_func(current): if neighbor in visited: continue tentative_distance = distances[current] + distance_func( current, neighbor) if neighbor not in distances: queue.add_task(neighbor, tentative_distance) distances[neighbor] = tentative_distance parents[neighbor] = current elif tentative_distance < distances[neighbor]: queue.update_task(neighbor, tentative_distance) distances[neighbor] = tentative_distance parents[neighbor] = current visited.add(current) if goal_pred(current): return { "distance": distances[current], "path": reconstruct_path(current, parents), "parents": parents } return {"distance": float("inf"), "path": None, "parents": parents}
def setUpClass(cls): c = cls.base_concurrency * 4 pool = cls.pool = ThreadedConnectionPool( c, c, "dbname=%s user=%s port=%s host=%s password=%s" % ( os.environ.get('PQ_TEST_DB', 'pq_test'), os.environ.get('PQ_TEST_USER', 'postgres'), os.environ.get('PQ_TEST_DB_PORT', '5432'), os.environ.get('PQ_TEST_DB_HOST', 'localhost'), os.environ.get('PQ_TEST_DB_PASS', '')), cursor_factory=cls.CURSOR_FACTORY ) cls.pq = PQ( pool=pool, table="queue", queue_class=cls.queue_class, ) def setup_filter(record): return False logger.addFilter(setup_filter) try: cls.pq.create() except ProgrammingError as exc: if exc.pgcode != '42P07': raise finally: logger.removeFilter(setup_filter)
def fit(self, vecs, iter): assert vecs.dtype == np.float32 assert vecs.ndim == 2 _, D = vecs.shape self.R = np.eye(D, dtype=np.float32) rotation_iter = iter pq_iter = iter from tqdm import tqdm iterator = tqdm( range(rotation_iter)) if self.verbose else range(rotation_iter) for i in iterator: X = vecs @ self.R # (a) Train codewords if i == rotation_iter - 1: # stop iterator display; show the pq process bar if type(iterator) is tqdm: iterator.close() # In the final loop, run the full training pq_tmp = ResidualPQ([ PQ(self.M, self.Ks, self.verbose) for _ in range(self.layer) ], verbose=self.verbose) pq_tmp.fit(X, iter=pq_iter) else: # During the training for OPQ, just run one-pass (iter=1) PQ training pq_tmp = ResidualPQ( [PQ(self.M, self.Ks, False) for _ in range(self.layer)], verbose=False) pq_tmp.fit(X, iter=1) # (b) Update a rotation matrix R X_ = pq_tmp.compress(X) U, s, V = np.linalg.svd(vecs.T @ X_) if i == rotation_iter - 1: self.pq = pq_tmp break else: self.R = U @ V return self
def handle_link(text): result = re.findall(r'<e\ [^>]*>', text) for i in result: html = PQ(i) if html.attr('type') == 'web': template = '[%s](%s)' % (urllib.parse.unquote( html.attr('title')), urllib.parse.unquote(html.attr('href'))) elif html.attr('type') == 'hashtag': template = ' `%s` ' % urllib.parse.unquote(html.attr('title')) elif html.attr('type') == 'mention': template = urllib.parse.unquote(html.attr('title')) text = text.strip().replace(i, template) return text
def __init__(self, Ks=256, depth=2): self.Ks = Ks self.depth = depth self.pqs = [ PQ(1, Ks) for i in range((1 + self.Ks) * (depth // 2) + depth % 2) ] self.code_dtype = np.uint8 if Ks <= 2**8 else ( np.uint16 if Ks <= 2**16 else np.uint32)
def __init__(self, M, Ks, verbose=True, layer=1): self.pq = ResidualPQ([PQ(M, Ks, verbose) for _ in range(layer)]) self.layer = layer self.M = M self.Ks = Ks self.code_dtype = self.pq.code_dtype self.verbose = verbose self.R = None
def recreate(DATABASE_URL): ''' ''' ci_schema_filename = join(dirname(__file__), 'schema.pgsql') cov_schema_filename = join(dirname(__file__), 'coverage', 'schema.pgsql') with connect(DATABASE_URL) as conn: with conn.cursor() as db: db.execute('SET client_min_messages TO WARNING') with open(ci_schema_filename) as file: db.execute(file.read()) with open(cov_schema_filename) as file: db.execute(file.read()) db.execute('DROP TABLE IF EXISTS queue') pq = PQ(conn, table='queue') pq.create()
def setup_method(self): self.data_json = """ { "root": [ { "foo": "bar", "deep": {"foo2": "bar_inside"} }, { "gar": "fir" } ] } """ self.data_xml = """ <root> <foo>bar <foo2>bar_inside</foo2> </foo> <gar>fir</gar> </root> """ self.data_xml = self.data_xml.replace('\n', '').replace(' ', '') self.pq = PQ(self.data_xml) self.pq_text = PQ(self.data_xml, to_text=True) self.pq_text_all = PQ(self.data_xml, to_text_all=True)
def DijkstrasVersion2(self, sourceVertex): sourceVertex.distance = 0 binaryHeap = PQ() binaryHeap.add(sourceVertex, sourceVertex.value) while not binaryHeap.is_empty(): tempVertex = binaryHeap.peek_min() for e in tempVertex.edges: v = e.neighbor newDistance = tempVertex.distance + e.weight if newDistance < v.distance: v.distance = newDistance v.pred = tempVertex binaryHeap.add(v, v.value) binaryHeap.extract_min() paths = [] for v in self.vertices: paths.append((v.value, v.distance, v.pred)) return paths
def setUpClass(cls): c = cls.base_concurrency * 4 pool = cls.pool = ThreadedConnectionPool( c, c, "dbname=pq_test user=postgres", ) cls.pq = PQ( pool=pool, table="queue", queue_class=cls.queue_class, ) try: cls.pq.create() except ProgrammingError as exc: # We ignore a duplicate table error. if exc.pgcode != '42P07': raise
def dijkstra(self,s) : """Dijkstra's Algorithm using a binary heap as the PQ. Keyword Arguments: s - The source vertex. """ class VertexData: __slots__ = ['d', 'pred'] def __init__(self): self.d = math.inf self.pred = None vertices = [VertexData() for i in range(len(self._adj))] vertices[s].d = 0 Q = PQ() S = [] list = [] vertices[s].d = 0 Q.add(s, 0) for u in range(len(self._adj)): if u != s: Q.add(u, math.inf) while not Q.is_empty(): u = Q.extract_min() S.append(u) for v, w in self._adj[u].__iter__(True): if (vertices[u].d + w) < vertices[v].d: vertices[v].pred = u vertices[v].d = (vertices[u].d + w) for v in S: list.append((v, vertices[v].d, vertices[v].pred)) return list
def dijkstra(self, s): """Dijkstra's Algorithm using a binary heap as the PQ. Keyword Arguments: s - The source vertex. """ class VertexData: __slots__ = ['d', 'pred'] def __init__(self): self.d = math.inf self.pred = None vertices = [VertexData() for i in range(len(self._adj))] #INIT SINGLE-SOURCE vertices[s].d = 0 tuples = [] S = [] Q = PQ() for u in range(len(self._adj)): if u is s: Q.add(s, 0) else: Q.add(u, math.inf) while not Q.is_empty(): u = Q.extract_min() S.append(u) for v, w in self._adj[u].__iter__(True): #RELAX if vertices[v].d > vertices[u].d + w: vertices[v].d = vertices[u].d + w vertices[v].pred = vertices[u] for u in S: if vertices[u].pred is None: tuples.append((u, vertices[u].d, None)) else: tuples.append( (u, vertices[u].d, vertices.index(vertices[u].pred))) return tuples
def setUpClass(cls): c = cls.base_concurrency * 4 pool = cls.pool = ThreadedConnectionPool( c, c, "dbname=%s user=%s" % (os.environ.get('PQ_TEST_DB', 'pq_test'), os.environ.get('PQ_TEST_USER', 'postgres')), cursor_factory=cls.CURSOR_FACTORY) cls.pq = PQ( pool=pool, table="queue", queue_class=cls.queue_class, ) try: cls.pq.create() except ProgrammingError as exc: # We ignore a duplicate table error. if exc.pgcode != '42P07': raise
def DijkstrasVersion2(self,s) : class VertexData: pass vList = [VertexData() for i in range(len(self._adj))] S = list() Q = PQ() vList[s].dist = 0 vList[s].prev = None for v in range(len(vList)): if(v != s): vList[v].dist = float('inf') vList[v].prev = None Q.add(v, vList[v].dist) while not Q.is_empty(): u = Q.extract_min() S.append((u, vList[u].dist, vList[u].prev)) for v in self._adj[u]: temp = vList[u].dist + self._w[(u,v)] if temp < vList[v].dist: vList[v].dist = temp vList[v].prev = u Q.change_priority(v, temp) return S
def DijkstrasVersion2(self,s) : S = PQ() TL = [] class VertexData : pass vertices = [VertexData() for i in range(len(self._adj))] for i in range(len(vertices)) : vertices[i].d = inf vertices[i].pred = -1 S.add(i, vertices[i].d) vertices[s].d = 0 vertices[s].pred = -1 while not S.is_empty() : u = S.extract_min() TL.append((u, vertices[u].d, vertices[u].pred)) for i in self._adj[u]: distance = vertices[u].d + self._w[(u,i)] if(distance < vertices[i].d): vertices[i].d = distance vertices[i].pred = u S.change_priority(i, distance) return TL
import logging from flask import Flask, render_template, redirect, url_for, flash from pq import PQ from api import APIGrabber from db import PonyDB logging.basicConfig() # Config # --------------- # App config app = Flask(__name__) app.config.from_object(os.environ.get('APP_SETTINGS', None)) db = PonyDB(app) pq = PQ(db.get_connection()) # Postgres work queue if db.table_exists('queue') is False: pq.create() queue = pq['themes'] # Routes # --------------- @app.route('/') def show_entries(): """ List out all the themes. """ image_themes = db.get_image_themes() no_image_themes = db.get_no_image_themes() sha = db.get_sha()
# The server will crash on the first run, probably because # of a side effect of create_database # from sqlalchemy_utils import database_exists, create_database # if not database_exists(DB_URL): # create_database(DB_URL) # Create the queues. # from psycopg2 import connect # from pq import PQ # conn = connect('dbname=test108 user=postgres') # pq.create() # pq = PQ(conn) sentry = Sentry(app) conn = connect(DB_URL) pq = PQ(conn) class Task(db.Model): """A task to be completed by a judicious participant.""" __tablename__ = "task" id = db.Column(UUID, primary_key=True, nullable=False) created_at = db.Column(db.DateTime, nullable=False, default=datetime.now) last_queued_at = db.Column(db.DateTime) type = db.Column(db.String(64), nullable=False) parameters = db.Column(db.JSON) person_id = db.Column(UUID, db.ForeignKey('person.id')) context_id = db.Column(UUID, db.ForeignKey('context.id'), nullable=False) last_started_at = db.Column(db.DateTime)
def MST_Prim(self, r=0) : parent = [ None for x in range(self.numVertices())] Q = PQ() Q.add(r,0) for u in range(self.numVertices()) : if u!=r : Q.add(u,inf) while not Q.isEmpty() : u = Q.extractMin() for v in self._adj[u] : if Q.contains(v) and self._w[(u,v)] < Q.getPriorityValue(v) : parent[v] = u Q.changePriorityValue(v, self._w[(u,v)]) A = set() for u, v in enumerate(parent) : if v!=None: A.add((u,v)) return A
def mst_prim(self, r=0): """Returns the set of edges in some minimum spanning tree (MST) of the graph, computed using Prim's algorithm. Keyword arguments: r - vertex id to designate as the root (default is 0). """ parent = [None for x in range(len(self._adj))] Q = PQ() Q.add(r, 0) for u in range(len(self._adj)): if u != r: Q.add(u, math.inf) while not Q.is_empty(): u = Q.extract_min() for v, w in self._adj[u].__iter__(True): if Q.contains(v) and w < Q.get_priority(v): parent[v] = u Q.change_priority(v, w) A = set() for v, u in enumerate(parent): if u != None: A.add((u, v)) # A = A | {(u,v)} return A
class IVFPQ: def __init__(self, K, M, Ks=256): self.K = K self.M = M self.Ks = Ks self.vecs = None self.codebooks = None self.pq = PQ(M, Ks) def ivf(self, vecs, iterations): print('vecs: ss', len(vecs)) centroids, labels = kmeans2(data=vecs, k=self.K, iter=iterations, minit='points') return centroids, labels def residual(self, vecs, centroids, labels): vecs = vecs - centroids[labels] return vecs def fit(self, vecs, iterations=20, seed=123): """ 获取codebooks.shape=(M,Ks,Ds) """ """ 聚类 """ self.centroids, self.labels = self.ivf(vecs, iterations) """ 残差 """ vecs = self.residual(vecs, self.centroids, self.labels) self.pq.fit(vecs=vecs, iterations=iterations, seed=seed) def encode(self, vecs): """ 对数据进行压缩,并保存压缩有的向量vecs:(N,M) centroid2vec:{centroid:[v1_index,v2_index,,,,]} """ centroid_ids, _ = vq(vecs, self.centroids) vecs = vecs - self.centroids[centroid_ids] centroid2vec = {} for i, c_id in enumerate(centroid_ids): if c_id not in centroid2vec: centroid2vec[c_id] = [] centroid2vec[c_id].append(i) vecs = self.pq.encode(vecs) self.vecs = vecs self.centroid2vec = centroid2vec def get_topH_partitions(self, query, H): """ 此时的query是原始query 其实返回的就是前H个centroids的索引 """ _ = np.linalg.norm(self.centroids - query, axis=1)**2 topH_ids = np.array( [iid for iid in np.argsort(_) if iid in self.centroid2vec][:H]) return topH_ids def dtable(self, query, topH_centroids): """ 根据query,获取dtables 此时query是原始query """ # topH_centroids = self.get_topH_partitions(query,H) dtables = [] for i, pid in enumerate(topH_centroids): query = query - self.centroids[pid] dtables.append(self.pq.dtable(query)) return dtables def find_topK(self, query, K, H): topH_centroids = self.get_topH_partitions(query, H) dtables = self.dtable(query, topH_centroids) ret = [] total_dists = [] for i in range(H): sub_vecs_ids = np.array(self.centroid2vec[topH_centroids[i]]) sub_vecs = self.vecs[sub_vecs_ids] dists = dtables[i].adist(sub_vecs) topNum_ids = np.argsort(dists)[:K] total_dists += list(dists[topNum_ids]) ret += list(sub_vecs_ids[topNum_ids]) _ = sorted(zip(ret, total_dists), key=lambda x: x[1])[:K] return _
def db_queue(conn, name): return PQ(conn, table='queue')[name]
from psycopg2 import connect, ProgrammingError from pq import PQ from wx_explore.common.config import Config pq = PQ(connect( user=Config.POSTGRES_USER, password=Config.POSTGRES_PASS, host=Config.POSTGRES_HOST, port=Config.POSTGRES_PORT, dbname=Config.POSTGRES_DB, ), table='work_queue') try: pq.create() except ProgrammingError as exc: if exc.pgcode != '42P07': raise
class TestClass: # noinspection PyAttributeOutsideInit def setup_method(self): self.data_json = """ { "root": [ { "foo": "bar", "deep": {"foo2": "bar_inside"} }, { "gar": "fir" } ] } """ self.data_xml = """ <root> <foo>bar <foo2>bar_inside</foo2> </foo> <gar>fir</gar> </root> """ self.data_xml = self.data_xml.replace('\n', '').replace(' ', '') self.pq = PQ(self.data_xml) self.pq_text = PQ(self.data_xml, to_text=True) self.pq_text_all = PQ(self.data_xml, to_text_all=True) def test_selector_from_json(self): pq = PQ(self.data_json) self._test_selector(pq) def test_selector_from_xml(self): pq = PQ(self.data_xml) self._test_selector(pq) def _test_selector(self, pq): assert pq.xpath('//foo/text()') == ['bar'] assert pq.css('foo::text') == ['bar'] assert pq.xpath('//foo/text()', first=True) == 'bar' assert pq.css('foo::text', first=True) == 'bar' assert pq.xpath('//foo', first=True, to_text=True) == 'bar' assert pq.css('foo', first=True, to_text=True) == 'bar' assert pq.xpath('//foo2/text()') == ['bar_inside'] assert pq.css('foo2::text') == ['bar_inside'] def test_process_xpath(self): assert 'text()' not in self.pq.process_path('//foo', func_name='xpath') assert 'text()' in self.pq.process_path('//foo', func_name='xpath', to_text=True) assert 'text()' in self.pq_text.process_path('//foo', func_name='xpath') assert '//text()' in self.pq.process_path('//foo', func_name='xpath', to_text_all=True) assert '//text()' in self.pq_text_all.process_path('//foo', func_name='xpath') def test_process_css(self): assert '::text' not in self.pq.process_path('//foo', func_name='css') assert '::text' in self.pq.process_path('//foo', func_name='css', to_text=True) assert '::text' in self.pq_text.process_path('//foo', func_name='css') assert ' ::text' in self.pq.process_path('//foo', func_name='css', to_text_all=True) assert ' ::text' in self.pq_text_all.process_path('//foo', func_name='css')
def test_selector_from_json(self): pq = PQ(self.data_json) self._test_selector(pq)
def test_selector_from_xml(self): pq = PQ(self.data_xml) self._test_selector(pq)