def max_distinct(table, maxer, col, constraint={}): # Note that this will return None for the max of an empty set cols = SQL(", ").join(map(IdentifierWrapper, table.search_cols)) tbl = IdentifierWrapper(table.search_table) qstr, values = table._build_query(constraint, sort=[]) maxer = maxer.format(IdentifierWrapper(col), cols, tbl, qstr) cur = table._execute(maxer, values) return cur.fetchone()[0]
def max_distinct(table, maxer, col, constraint={}, include_deleted=False): # Note that this will return None for the max of an empty set constraint = dict(constraint) if not include_deleted: constraint["deleted"] = {"$or": [False, {"$exists": False}]} cols = SQL(", ").join(map(IdentifierWrapper, table.search_cols)) tbl = IdentifierWrapper(table.search_table) qstr, values = table._build_query(constraint, sort=[]) maxer = maxer.format(IdentifierWrapper(col), cols, tbl, qstr) cur = table._execute(maxer, values) return cur.fetchone()[0]
def count_distinct(table, counter, query={}): cols = SQL(", ").join(map(IdentifierWrapper, table.search_cols)) tbl = IdentifierWrapper(table.search_table) qstr, values = table._build_query(query, sort=[]) counter = counter.format(cols, tbl, qstr) cur = table._execute(counter, values) return int(cur.fetchone()[0])
def count_distinct(table, counter, query={}, include_deleted=False): query = dict(query) if not include_deleted: query["deleted"] = {"$or": [False, {"$exists": False}]} cols = SQL(", ").join(map(IdentifierWrapper, table.search_cols)) tbl = IdentifierWrapper(table.search_table) qstr, values = table._build_query(query, sort=[]) counter = counter.format(cols, tbl, qstr) cur = table._execute(counter, values) return int(cur.fetchone()[0])
def search_distinct( table, selecter, counter, iterator, query={}, projection=1, limit=None, offset=0, sort=None, info=None, include_deleted=False, ): """ Replacement for db.*.search to account for versioning, return Web* objects. Doesn't support split_ors, raw or extra tables. Always computes count. INPUT: - ``table`` -- a search table, such as db.seminars or db.talks - ``counter`` -- an SQL object counting distinct entries - ``selecter`` -- an SQL objecting selecting distinct entries - ``iterator`` -- an iterator taking the same arguments as ``_search_iterator`` """ if offset < 0: raise ValueError("Offset cannot be negative") query = dict(query) if not include_deleted: query["deleted"] = {"$or": [False, {"$exists": False}]} all_cols = SQL(", ").join( map(IdentifierWrapper, ["id"] + table.search_cols)) search_cols, extra_cols = table._parse_projection(projection) cols = SQL(", ").join(map(IdentifierWrapper, search_cols + extra_cols)) tbl = IdentifierWrapper(table.search_table) nres = count_distinct(table, counter, query) if limit is None: qstr, values = table._build_query(query, sort=sort) else: qstr, values = table._build_query(query, limit, offset, sort) fselecter = selecter.format(cols, all_cols, tbl, qstr) cur = table._execute( fselecter, values, buffered=(limit is None), slow_note=(table.search_table, "analyze", query, repr(projection), limit, offset), ) results = iterator(cur, search_cols, extra_cols, projection) if limit is None: if info is not None: # caller is requesting count data info["number"] = nres return results if info is not None: if offset >= nres > 0: # We're passing in an info dictionary, so this is a front end query, # and the user has requested a start location larger than the number # of results. We adjust the results to be the last page instead. offset -= (1 + (offset - nres) / limit) * limit if offset < 0: offset = 0 return search_distinct(table, selecter, counter, iterator, query, projection, limit, offset, sort, info) info["query"] = dict(query) info["number"] = nres info["count"] = limit info["start"] = offset info["exact_count"] = True return list(results)
def write_content_table(data_folder, table, query, selecter, approve_row, users, sep): now_overall = time.time() print("Exporting %s..." % (table.search_table)) # The SQL queries for talks and seminars are different tablename = table.search_table if table in [db.talks, db.seminars]: cols = SQL(", ").join( map(IdentifierWrapper, ["id"] + table.search_cols)) query = SQL(query) selecter = selecter.format(cols, cols, IdentifierWrapper(tablename), query) searchfile = os.path.join(data_folder, tablename + ".txt") header = sep.join(["id"] + table.search_cols) + "\n" + sep.join( ["bigint"] + [table.col_type[col] for col in table.search_cols]) + "\n\n" table._copy_to_select(selecter, searchfile, header, sep=sep) safe_cols = ["id"] + [ col for col in table.search_cols if col in whitelisted_cols ] clear_private_data(searchfile, safe_cols, approve_row, users, sep) # do the other files from lmfdb.backend.table import _counts_cols, _stats_cols from lmfdb.backend.base import _meta_indexes_cols, _meta_constraints_cols, _meta_tables_cols statsfile = os.path.join(data_folder, tablename + "_stats.txt") countsfile = os.path.join(data_folder, tablename + "_counts.txt") indexesfile = os.path.join(data_folder, tablename + "_indexes.txt") constraintsfile = os.path.join(data_folder, tablename + "_constraints.txt") metafile = os.path.join(data_folder, tablename + "_meta.txt") tabledata = [ # tablename, cols, addid, write_header, filename (table.stats.counts, _counts_cols, False, False, countsfile), (table.stats.stats, _stats_cols, False, False, statsfile), ] metadata = [ ("meta_indexes", "table_name", _meta_indexes_cols, indexesfile), ("meta_constraints", "table_name", _meta_constraints_cols, constraintsfile), ("meta_tables", "name", _meta_tables_cols, metafile), ] with DelayCommit(table): for tbl, cols, addid, write_header, filename in tabledata: if filename is None: continue now = time.time() if addid: cols = ["id"] + cols cols_wquotes = ['"' + col + '"' for col in cols] cur = table._db.cursor() with open(filename, "w") as F: try: if write_header: table._write_header_lines(F, cols, sep=sep) cur.copy_to(F, tbl, columns=cols_wquotes, sep=sep) except Exception: table.conn.rollback() raise print("\tExported %s in %.3f secs to %s" % (tbl, time.time() - now, filename)) for tbl, wherecol, cols, filename in metadata: if filename is None: continue now = time.time() cols = SQL(", ").join(map(Identifier, cols)) select = SQL("SELECT {0} FROM {1} WHERE {2} = {3}").format( cols, Identifier(tbl), Identifier(wherecol), Literal(table.search_table), ) table._copy_to_select(select, filename, silent=True, sep=sep) print("\tExported data from %s in %.3f secs to %s" % (tbl, time.time() - now, filename)) print("Exported %s in %.3f secs" % (table.search_table, time.time() - now_overall))
def basic_selecter(table, query=SQL("")): return SQL("SELECT {0} FROM {1}{2}").format( SQL(", ").join(map(IdentifierWrapper, ["id"] + table.search_cols)), IdentifierWrapper(table.search_table), query, )
def index(): # TODO: use a join for the following query seminars = {} conferences = {} deleted_seminars = [] deleted_talks = [] def key(elt): role_key = {"organizer": 0, "curator": 1, "creator": 3} return (role_key[elt[1]], elt[0].name) for rec in db.seminar_organizers.search( {"email": ilike_query(current_user.email)}, ["seminar_id", "curator"]): semid = rec["seminar_id"] role = "curator" if rec["curator"] else "organizer" seminar = WebSeminar(semid) pair = (seminar, role) if seminar.is_conference: conferences[semid] = pair else: seminars[semid] = pair role = "creator" for semid in seminars_search({"owner": ilike_query(current_user.email)}, "shortname", include_deleted=True): if semid not in seminars and semid not in conferences: seminar = WebSeminar(semid, deleted=True) # allow deleted pair = (seminar, role) if seminar.deleted: deleted_seminars.append(seminar) elif seminar.is_conference: conferences[semid] = pair else: seminars[semid] = pair seminars = sorted(seminars.values(), key=key) conferences = sorted(conferences.values(), key=key) deleted_seminars.sort(key=lambda sem: sem.name) for semid, semctr in db._execute( # ~~* is case insensitive amtch SQL(""" SELECT DISTINCT ON ({Ttalks}.{Csemid}, {Ttalks}.{Csemctr}) {Ttalks}.{Csemid}, {Ttalks}.{Csemctr} FROM {Ttalks} INNER JOIN {Tsems} ON {Ttalks}.{Csemid} = {Tsems}.{Csname} WHERE {Tsems}.{Cowner} ~~* %s AND {Ttalks}.{Cdel} = %s AND {Tsems}.{Cdel} = %s """).format( Ttalks=IdentifierWrapper("talks"), Tsems=IdentifierWrapper("seminars"), Csemid=IdentifierWrapper("seminar_id"), Csemctr=IdentifierWrapper("seminar_ctr"), Csname=IdentifierWrapper("shortname"), Cowner=IdentifierWrapper("owner"), Cdel=IdentifierWrapper("deleted"), ), [ilike_escape(current_user.email), True, False], ): talk = WebTalk(semid, semctr, deleted=True) deleted_talks.append(talk) deleted_talks.sort(key=lambda talk: (talk.seminar.name, talk.start_time)) manage = "Manage" if current_user.is_organizer else "Create" return render_template( "create_index.html", seminars=seminars, conferences=conferences, deleted_seminars=deleted_seminars, deleted_talks=deleted_talks, institution_known=institution_known, institutions=institutions(), section=manage, subsection="home", title=manage, user_is_creator=current_user.is_creator, )
def search_distinct( table, selecter, counter, iterator, query={}, projection=1, limit=None, offset=0, sort=None, info=None, include_deleted=False, include_pending=False, more=False, ): """ Replacement for db.*.search to account for versioning, return Web* objects. Doesn't support split_ors, raw or extra tables. Always computes count. INPUT: - ``table`` -- a search table, such as db.seminars or db.talks - ``counter`` -- an SQL object counting distinct entries - ``selecter`` -- an SQL objecting selecting distinct entries - ``iterator`` -- an iterator taking the same arguments as ``_search_iterator`` """ if offset < 0: raise ValueError("Offset cannot be negative") query = dict(query) if not include_deleted: query["deleted"] = {"$or": [False, {"$exists": False}]} all_cols = SQL(", ").join( map(IdentifierWrapper, ["id"] + table.search_cols)) search_cols, extra_cols = table._parse_projection(projection) tbl = IdentifierWrapper(table.search_table) if limit is None: qstr, values = table._build_query(query, sort=sort) else: qstr, values = table._build_query(query, limit, offset, sort) prequery = {} if include_pending else { '$or': [{ 'display': True }, { 'by_api': False }] } if prequery: # We filter the records before finding the most recent (normal queries filter after finding the most recent) # This is mainly used for setting display=False or display=True # We take advantage of the fact that the WHERE clause occurs just after the table name in all of our query constructions pqstr, pqvalues = table._parse_dict(prequery) if pqstr is not None: tbl = tbl + SQL(" WHERE {0}").format(pqstr) values = pqvalues + values if more is not False: # might empty dictionary more, moreval = table._parse_dict(more) if more is None: more = Placeholder() moreval = [True] cols = SQL(", ").join( list(map(IdentifierWrapper, search_cols + extra_cols)) + [more]) extra_cols = extra_cols + ("more", ) values = moreval + values else: cols = SQL(", ").join(map(IdentifierWrapper, search_cols + extra_cols)) fselecter = selecter.format(cols, all_cols, tbl, qstr) cur = table._execute( fselecter, values, buffered=False, slow_note=( table.search_table, "analyze", query, repr(projection), limit, offset, ), ) results = iterator(cur, search_cols, extra_cols, projection) if info is not None: # caller is requesting count data nres = count_distinct(table, counter, query) if limit is None: info["number"] = nres return results if offset >= nres > 0: # We're passing in an info dictionary, so this is a front end query, # and the user has requested a start location larger than the number # of results. We adjust the results to be the last page instead. offset -= (1 + (offset - nres) / limit) * limit if offset < 0: offset = 0 return search_distinct( table, selecter, counter, iterator, query, projection, limit, offset, sort, info, ) info["query"] = dict(query) info["number"] = nres info["count"] = limit info["start"] = offset info["exact_count"] = True res = list(results) return res