def cache_keywords(): core.configure_logging('debug') from backend.db import Keyword engine = core.get_database_engine_string() logging.info("Using connection string '%s'" % (engine,)) engine = create_engine(engine, encoding='utf-8', isolation_level="READ UNCOMMITTED") session = Session(bind=engine, autocommit = False) # Estimate the number of keywords logging.debug("Estimating number of keywords...") for count, in session.execute("SELECT COUNT(*) FROM keywords"): total = count logging.debug("Establishing connection to redis...") r = get_redis_instance(1) logging.info("Caching %d keywords...", total) cached = 0 for _id, word in session.execute("SELECT id, word FROM keywords"): assert r.set(word, _id) cached += 1 if cached % 1000 == 0: logging.info("Cached %d keywords (%.2f%% done)", cached, 100.0*cached/total) logging.info("Cached %d keywords (%.2f%% done)", cached, 100.0*cached/total)
def upgrade(): context = op.get_context() session = Session() session.bind = context.bind for tp in session.query(TimeEntry).filter_by(customer_request_id=None): for trac in tp.project.tracs: cr = session.execute('select value from "trac_%s".ticket_custom where name=\'customerrequest\' and ticket=%s' % (trac.trac_name, tp.ticket)).fetchone() sql_cr = session.execute('select id from customer_requests where id=\'%s\'' % cr.value).fetchone() tp.customer_request_id = sql_cr.id print sql_cr.id session.commit()
def upgrade(): context = op.get_context() session = Session() session.bind = context.bind session.execute( """ALTER TABLE applications DROP CONSTRAINT "applications_project_id_fkey", ADD CONSTRAINT "applications_project_id_fkey" foreign key (project_id) references projects(id) on update cascade; ALTER TABLE customer_requests DROP CONSTRAINT "customer_requests_project_id_fkey", ADD CONSTRAINT "customer_requests_project_id_fkey" foreign key (project_id) references projects(id) on update cascade; ALTER TABLE groups DROP CONSTRAINT "groups_project_id_fkey", ADD CONSTRAINT "groups_project_id_fkey" foreign key (project_id) references projects(id) on update cascade; ALTER TABLE contracts ADD CONSTRAINT "contracts_project_id_fkey" foreign key (project_id) references projects(id) on update cascade; ALTER TABLE kanban_projects DROP CONSTRAINT "kanban_projects_project_id_fkey", ADD CONSTRAINT "kanban_projects_project_id_fkey" foreign key (project_id) references projects(id) on update cascade on delete cascade; ALTER TABLE favorite_projects DROP CONSTRAINT "favorite_projects_project_id_fkey", ADD CONSTRAINT "favorite_projects_project_id_fkey" foreign key (project_id) references projects(id) on update cascade; ALTER TABLE time_entries DROP CONSTRAINT "time_entries_project_id_fkey", ADD CONSTRAINT "time_entries_project_id_fkey" foreign key (project_id) references projects(id) on update cascade;""") session.commit()
def upgrade(): from sqlalchemy.orm.session import Session session = Session(bind=op.get_bind()) # Find duplicates for vcs in session.query(Vcs).group_by(Vcs.repository_id, Vcs.revision).having(func.count(Vcs.id) > 1).all(): print(vcs) # Find all vcs entries with this duplication dupes = session.query(Vcs).filter(Vcs.repository_id == vcs.repository_id).filter(Vcs.revision == vcs.revision).all() # Keep the first and remove the others - thus we need to update references to others to the first for update in dupes[1:]: for af in session.query(Artifakt).filter(Artifakt.vcs_id == update.id).all(): print("Updating artifakt {} to point to vcs {}".format(af.sha1, dupes[0].id)) af.vcs_id = dupes[0].id print("Deleting vcs {}".format(update.id)) session.delete(update) session.commit() if session.bind.dialect.name == "sqlite": session.execute("PRAGMA foreign_keys = OFF") elif session.bind.dialect.name == "mysql": session.execute("SET foreign_key_checks = 0") else: raise NotImplemented with op.batch_alter_table('vcs', schema=None) as batch_op: batch_op.create_unique_constraint('rr', ['repository_id', 'revision']) if session.bind.dialect.name == "sqlite": session.execute("PRAGMA foreign_keys = ON") elif session.bind.dialect.name == "mysql": session.execute("SET foreign_key_checks = 1")
def get_document_rows(self, keywords, domains=set([]), dmset = set([])): # Create a new session session = Session(bind = engine) # Look up the article keywords kres = KeywordIDResolutionService() _keywords = {k : self._kres.resolve(k) for k in keywords} resolved = 0 for k in _keywords: if _keywords[k] is None: yield QueryMessage("No matching keyword: %s", k) else: resolved += 1 if resolved == 0: raise QueryException("No matching keywords.") # Find the sites which talk about a particular keyword sql = """ SELECT domains.`key`, COUNT(*) AS c from domains JOIN articles ON articles.domain_id = domains.id JOIN documents ON documents.article_id = articles.id JOIN keyword_adjacencies ON keyword_adjacencies.doc_id = documents.id WHERE keyword_adjacencies.key1_id IN (:keys) OR keyword_adjacencies.key2_id IN (:keys) GROUP BY domains.id ORDER BY c DESC LIMIT 0,5 """ for key, count in session.execute(sql, ({'keys': ','.join([str(i) for i in _keywords.values()])})): logging.info((key, count)) domains.add(key) return self._kd_proc.get_document_rows(keywords, domains, dmset)
def upgrade(): ### commands auto generated by Alembic - please adjust! ### op.add_column('time_entries', sa.Column('customer_request_id', sa.String(), nullable=True)) op.drop_column('time_entries', u'contract_id') op.drop_column('customer_requests', u'placement') context = op.get_context() session = Session() session.bind = context.bind for tp in session.query(TimeEntry): for trac in tp.project.tracs: cr = session.execute('select value from "trac_%s".ticket_custom where name=\'customerrequest\' and ticket=%s' % (trac.trac_name, tp.ticket)).fetchone() sql_cr = session.execute('select id from customer_requests where id=\'%s\'' % cr.value).fetchone() tp.customer_request_id = sql_cr.id session.commit()
def _select_short_term_statistics_to_purge( session: Session, purge_before: datetime) -> list[int]: """Return a list of short term statistics to purge.""" statistics = session.execute( find_short_term_statistics_to_purge(purge_before)).all() _LOGGER.debug("Selected %s short term statistics to remove", len(statistics)) return [statistic.id for statistic in statistics]
def import_corpora(db_session: Session, dir_corpus: Path) -> None: mappings = [] for dest_corpus in dir_corpus.glob('**/*.txt'): with dest_corpus.open(mode='r') as f: mapping = { 'name': str(dest_corpus), 'content': ' '.join( chain.from_iterable( (line.split(' ') for line in f.read().splitlines()))) } mappings.append(mapping) db_session.execute(Document.__table__.insert(), mappings) db_session.commit()
def _purge_short_term_statistics( session: Session, short_term_statistics: list[int] ) -> None: """Delete by id.""" deleted_rows = session.execute( delete_statistics_short_term_rows(short_term_statistics) ) _LOGGER.debug("Deleted %s short term statistics", deleted_rows)
def _purge_old_recorder_runs(instance: Recorder, session: Session, purge_before: datetime) -> None: """Purge all old recorder runs.""" # Recorder runs is small, no need to batch run it deleted_rows = session.execute( delete_recorder_runs_rows(purge_before, instance.run_history.current.run_id)) _LOGGER.debug("Deleted %s recorder_runs", deleted_rows)
def _purge_state_ids(instance: Recorder, session: Session, state_ids: set[int]) -> None: """Disconnect states and delete by state id.""" # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint # since some databases (MSSQL) cannot do the ON DELETE SET NULL # for us. disconnected_rows = session.execute(disconnect_states_rows(state_ids)) _LOGGER.debug("Updated %s states to remove old_state_id", disconnected_rows) deleted_rows = session.execute(delete_states_rows(state_ids)) _LOGGER.debug("Deleted %s states", deleted_rows) # Evict eny entries in the old_states cache referring to a purged state _evict_purged_states_from_old_states_cache(instance, state_ids)
def patch_registry(session: Session, prefix: str) -> None: """Create an entry similar to a registry defined by identifiers.org.""" entry = {"namespace": prefix, "is_prefixed": False} if prefix == "envipath": entry["name"] = "enviPath" entry["pattern"] = r"^.+$" entry["access_url"] = "https://envipath.org/package/{$id}" elif prefix == "synonyms": entry["name"] = "Synonyms" entry["pattern"] = r"^.+$" elif prefix == "coco": entry["name"] = "Component-Contribution Metabolite" entry["pattern"] = r"^COCOM\d+$" else: raise ValueError(f"Unknown registry prefix '{prefix}'.") # We use low-level insertion in order to circumvent the validation. session.execute(Registry.__table__.insert(), [entry])
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_constraint('datasets_user_id_users_fkey', 'datasets', schema='sampledb', type_='foreignkey') session = Session(bind=op.get_bind()) session.execute(f"DROP VIEW IF EXISTS {DatasetView.__table__}") session.commit() result = session.execute( """SELECT con.conname, information_schema.tables.table_name FROM pg_catalog.pg_constraint con INNER JOIN pg_catalog.pg_class rel ON rel.oid = con.conrelid INNER JOIN pg_catalog.pg_namespace nsp ON nsp.oid = connamespace INNER JOIN information_schema.tables ON information_schema.tables.table_schema = nsp.nspname AND nsp.nspname = 'sampledb' AND rel.relname = information_schema.tables.table_name WHERE information_schema.tables.table_name ~ '^dataset_' AND con.conname ~ 'user_id_fkey$'; """) for r in result: op.drop_constraint(f'{r[0]}', f'{r[1]}', schema='sampledb', type_='foreignkey') session.execute(f"DROP VIEW IF EXISTS sampledb.v_{r[1]};") session.execute(f"DROP TABLE sampledb.users ;") session.commit() session.commit()
def _purge_attributes_ids( instance: Recorder, session: Session, attributes_ids: set[int] ) -> None: """Delete old attributes ids.""" deleted_rows = session.execute(delete_states_attributes_rows(attributes_ids)) _LOGGER.debug("Deleted %s attribute states", deleted_rows) # Evict any entries in the state_attributes_ids cache referring to a purged state _evict_purged_attributes_from_attributes_cache(instance, attributes_ids)
def insert_recipe(session: Session, title: str, description: Optional[str] = None) -> int: result = session.execute( """INSERT INTO 'shlist.recipe' (title, description) VALUES(:title, :description)""", dict(title=title, description=description)) session.commit() return result.lastrowid
def db_size_bytes(session: Session, database_name: str) -> float: """Get the mysql database size.""" return float( session.execute( text("select pg_database_size(:database_name);"), { "database_name": database_name }, ).first()[0])
def _purge_batch_data_ids(instance: Recorder, session: Session, data_ids: set[int]) -> None: """Delete old event data ids in batches of MAX_ROWS_TO_PURGE.""" for data_ids_chunk in chunked(data_ids, MAX_ROWS_TO_PURGE): deleted_rows = session.execute(delete_event_data_rows(data_ids_chunk)) _LOGGER.debug("Deleted %s data events", deleted_rows) # Evict any entries in the event_data_ids cache referring to a purged state _evict_purged_data_from_data_cache(instance, data_ids)
def update_view(session: Session, mat_view_name: str): start_time = time.time() metric = PrometheusMetric( "update_trending_view_runtime_seconds", "Runtimes for src.task.index_trending:update_view()", ("mat_view_name", ), ) session.execute(f"REFRESH MATERIALIZED VIEW {mat_view_name}") update_time = time.time() - start_time metric.save_time({"mat_view_name": mat_view_name}) logger.info( f"index_trending.py | Finished updating {mat_view_name} in: {time.time()-start_time} sec", extra={ "job": "index_trending", "update_time": update_time, "mat_view_name": mat_view_name, }, )
def clear_tables(session: Session) -> None: def _set_default_values_in_settings_table() -> None: for key, value in Settings.default_values.items(): session.query(Settings).filter(Settings.shop_id == shop_id, Settings.key == key). \ update({'value': value}) not_clear_this_tables = [ 'message', ] shop_id = session.query(Shop).first().id shop_tables = _metadata.sorted_tables tables_for_cleaning = [table for table in shop_tables if table.name not in not_clear_this_tables] for table in reversed(tables_for_cleaning): session.execute(table.delete()) _set_default_values_in_settings_table()
def _purge_event_data_ids( instance: Recorder, session: Session, data_ids: set[int] ) -> None: """Delete old event data ids.""" deleted_rows = session.execute(delete_event_data_rows(data_ids)) _LOGGER.debug("Deleted %s data events", deleted_rows) # Evict any entries in the event_data_ids cache referring to a purged state _evict_purged_data_from_data_cache(instance, data_ids)
def db_size_bytes(session: Session, database_name: str) -> float: """Get the mysql database size.""" return float( session.execute( text( "SELECT page_count * page_size as size " "FROM pragma_page_count(), pragma_page_size();" ) ).first()[0] )
def _purge_batch_attributes_ids(instance: Recorder, session: Session, attributes_ids: set[int]) -> None: """Delete old attributes ids in batches of MAX_ROWS_TO_PURGE.""" for attributes_ids_chunk in chunked(attributes_ids, MAX_ROWS_TO_PURGE): deleted_rows = session.execute( delete_states_attributes_rows(attributes_ids_chunk)) _LOGGER.debug("Deleted %s attribute states", deleted_rows) # Evict any entries in the state_attributes_ids cache referring to a purged state _evict_purged_attributes_from_attributes_cache(instance, attributes_ids)
def insert_ingredient(session: Session, ing_name: str, category: str, unit: Optional[str] = None) -> int: result = session.execute( """INSERT INTO 'shlist.ingredient' (ing_name, category, unit) VALUES(:ing_name, :category, :unit)""", dict(ing_name=ing_name, category=category, unit=unit)) session.commit() return result.lastrowid
def check_run_id_null(session: Session) -> Iterable[str]: import sqlalchemy.schema metadata = sqlalchemy.schema.MetaData(session.bind) try: metadata.reflect(only=[DagRun.__tablename__], extend_existing=True, resolve_fks=False) except exc.InvalidRequestError: # Table doesn't exist -- empty db return # We can't use the model here since it may differ from the db state due to # this function is run prior to migration. Use the reflected table instead. dagrun_table = metadata.tables[DagRun.__tablename__] invalid_dagrun_filter = or_( dagrun_table.c.dag_id.is_(None), dagrun_table.c.run_id.is_(None), dagrun_table.c.execution_date.is_(None), ) invalid_dagrun_count = session.query(dagrun_table.c.id).filter(invalid_dagrun_filter).count() if invalid_dagrun_count > 0: dagrun_dangling_table_name = _format_airflow_moved_table_name(dagrun_table.name, "2.2") if dagrun_dangling_table_name in inspect(session.get_bind()).get_table_names(): yield _format_dangling_error( source_table=dagrun_table.name, target_table=dagrun_dangling_table_name, invalid_count=invalid_dagrun_count, reason="with a NULL dag_id, run_id, or execution_date", ) return bind = session.get_bind() dialect_name = bind.dialect.name _create_table_as( dialect_name=dialect_name, source_query=dagrun_table.select(invalid_dagrun_filter), target_table_name=dagrun_dangling_table_name, source_table_name=dagrun_table.name, session=session, ) delete = dagrun_table.delete().where(invalid_dagrun_filter) session.execute(delete)
def db_size_bytes(session: Session, database_name: str) -> float: """Get the mysql database size.""" return float( session.execute( text("SELECT ROUND(SUM(DATA_LENGTH + INDEX_LENGTH), 2) " "FROM information_schema.TABLES WHERE " "TABLE_SCHEMA=:database_name"), { "database_name": database_name }, ).first()[0])
def upgrade(): # ### commands auto generated by Alembic - please adjust! ### session = Session(bind=op.get_bind()) session.execute("CREATE OR REPLACE VIEW {} AS " \ "SELECT datasets.created_at, datasets.updated_at, datasets.id, datasets.name, " \ "datasets.start_date, datasets.end_date, datasets.observation_table_name, " \ "datasets.midias_table_name, datasets.metadata_json, datasets.version, " \ "datasets.description, class_systems.name AS classification_system_name, " \ "users.full_name AS user_name, collect_method.name AS collect_method " \ "FROM {} AS datasets, {} AS class_systems, {} AS users, {} AS collect_method " \ "WHERE users.id = datasets.user_id " \ "AND class_systems.id = datasets.classification_system_id " \ "AND collect_method.id = datasets.collect_method_id;" .format(DatasetView.__table__, Datasets.__table__, 'lccs.class_systems', 'sampledb.users', CollectMethod.__table__) ) session.commit()
def setup_databases(self, **kwargs): # import all models to populate orm.metadata for app in settings.INSTALLED_APPS: import_any_module(['%s.models' % app], raise_error=False) # determine which schemas we need default_schema = orm.engine.url.database schemas = set(t.schema or default_schema \ for t in Base.metadata.tables.values()) url = deepcopy(orm.engine.url) url.database = None self.engine = create_engine(url) insp = inspect(self.engine) # get a list of already-existing schemas existing_schemas = set(insp.get_schema_names()) # if any of the needed schemas exist, do not proceed conflicts = schemas.intersection(existing_schemas) if conflicts: for c in conflicts: print 'drop schema %s;' % c sys.exit('The following schemas are already present: %s. ' \ 'TestRunner cannot proceeed' % ','.join(conflicts)) # create schemas session = Session(bind=self.engine) for schema in schemas: session.execute(CreateSchema(schema)) session.commit() session.bind.dispose() # create tables if len(orm.Base.metadata.tables) > 0: orm.Base.metadata.create_all(checkfirst=False) # generate permissions call_command('createpermissions') return schemas
def _select_event_data_ids_to_purge( session: Session, purge_before: datetime) -> tuple[set[int], set[int]]: """Return sets of event and data ids to purge.""" event_ids = set() data_ids = set() for event in session.execute(find_events_to_purge(purge_before)).all(): event_ids.add(event.event_id) if event.data_id: data_ids.add(event.data_id) _LOGGER.debug("Selected %s event ids and %s data_ids to remove", len(event_ids), len(data_ids)) return event_ids, data_ids
def __init__(self): self.tree = UnambiguousTrieNode() engine = get_database_engine_string() engine = create_engine(engine, encoding='utf-8', isolation_level = 'READ UNCOMMITTED', poolclass=SingletonThreadPool, echo = False, connect_args={'cursorclass': MySQLdb.cursors.SSCursor}) meta = MetaData(engine, reflect=True) conn = engine.connect() session = Session(bind=conn) # Query for keywords sql = "SELECT id, word FROM keywords WHERE word collate latin1_general_cs REGEXP ('^([A-Z][a-z]+ ){1,2}([A-Z][a-z]+)$')" for _id, word in session.execute(sql): self.tree.build(word, _id)
def _select_event_state_attributes_ids_data_ids_to_purge( session: Session, purge_before: datetime ) -> tuple[set[int], set[int], set[int], set[int]]: """Return a list of event, state, and attribute ids to purge.""" events = session.execute(find_events_to_purge(purge_before)).all() _LOGGER.debug("Selected %s event ids to remove", len(events)) states = session.execute(find_states_to_purge(purge_before)).all() _LOGGER.debug("Selected %s state ids to remove", len(states)) event_ids = set() state_ids = set() attributes_ids = set() data_ids = set() for event in events: event_ids.add(event.event_id) if event.data_id: data_ids.add(event.data_id) for state in states: state_ids.add(state.state_id) if state.attributes_id: attributes_ids.add(state.attributes_id) return event_ids, state_ids, attributes_ids, data_ids
def upgrade(): ### commands auto generated by Alembic - please adjust! ### op.add_column('time_entries', sa.Column('tickettitle', sa.Unicode(), nullable=True)) context = op.get_context() session = Session() session.bind = context.bind for tp in session.query(TimeEntry): for trac in tp.project.tracs: ticket = session.execute('select summary from "trac_%s".ticket where id=%s' % (trac.trac_name, tp.ticket)).fetchone() tp.tickettitle = ticket.summary session.commit()
def fetchall(query: str, queryargs: dict, db: Session): textual_query = text(query) resultproxy = db.execute(textual_query, queryargs) # d, a = {}, [] # for rowproxy in resultproxy: # # rowproxy.items() returns an array like [(key0, value0), (key1, value1)] # for column, value in rowproxy.items(): # # build up the dictionary # d = {**d, **{column: value}} # a.append(d) # return a return [dict(row) for row in resultproxy]
def gen_db(tables: List[Table], engine, dialect: str, decimal_as_real: bool = False): """生成db文件并将数据导入,会先生成SQLite建表语句 Args: tables: sqlalchemy通过反射获取的表 engine: 数据库连接 decimal_as_real: 是否将原本为DECIMAL的字段在 db文件中设为REAL,默认为TEXT """ gen_sqlite_sql(tables, dialect, decimal_as_real) with open("sqlite_table.sql", encoding="utf-8") as f: sqls = f.read() sqlite_engine = create_engine("sqlite:///data.db") sqlite_session = Session(bind=sqlite_engine) for sql in sqls.split(";"): sqlite_session.execute(sql + ";") session = Session(bind=engine) for i, table in enumerate(tables): objs = session.query(table).all() class_ = type(f"table_{i}", (), {}) mapper(class_, table) objs = session.query(class_).all() for obj in objs: new_obj = class_() for k, v in obj.__dict__.items(): if k == "_sa_instance_state": continue setattr(new_obj, k, v) sqlite_session.add(new_obj) session.close() sqlite_session.commit() sqlite_session.close()
def _get_ranks( session: Session, prev_slot: int, current_slot: int ) -> List[AggregateTipRank]: return session.execute( text( GET_AGGREGATE_USER_TIPS_RANKS_QUERY, ), { "prev_slot": prev_slot, "current_slot": current_slot, "leaderboard_size": LEADERBOARD_SIZE, }, ).fetchall()
def clear_download_errors(session: Session) -> None: """ Clears all File, Message, or Reply download errors. """ session.execute("""UPDATE files SET download_error_id = null;""") session.execute("""UPDATE messages SET download_error_id = null;""") session.execute("""UPDATE replies SET download_error_id = null;""") session.commit()
def session_all_row(self, session: Session, sql, params=None) -> CDataSet: """ 执行sql, 返回所有符合要求的记录 :param session: :param sql: :param params: :return: """ cursor = session.execute( sql, self._prepare_params_of_execute_sql(session.get_bind(), sql, params)) data = cursor.fetchall() return CDataSet(data)
def _select_state_attributes_ids_to_purge( session: Session, purge_before: datetime) -> tuple[set[int], set[int]]: """Return sets of state and attribute ids to purge.""" state_ids = set() attributes_ids = set() for state in session.execute(find_states_to_purge(purge_before)).all(): state_ids.add(state.state_id) if state.attributes_id: attributes_ids.add(state.attributes_id) _LOGGER.debug( "Selected %s state ids and %s attributes_ids to remove", len(state_ids), len(attributes_ids), ) return state_ids, attributes_ids
def get(self, attr_name: str, db: Session) -> Union[Optional[Value], List[Value]]: attr_def: AttributeDefinition = db.execute( select(AttributeDefinition).where( AttributeDefinition.schema_id == self.schema_id).where( Attribute.name == attr_name).join( Attribute, AttributeDefinition.attribute_id == Attribute.id)).scalar() if attr_def is None: raise KeyError( f'There is no attribute named `{attr_name}` defined for schema id {self.schema_id}' ) attr: Attribute = attr_def.attribute val_model = attr.type.value.model q = select(val_model)\ .where(val_model.attribute_id == attr.id)\ .where(val_model.entity_id == self.id)\ .order_by(val_model.value.asc()) if attr_def.list: return db.execute(q).scalars().all() else: return db.execute(q).scalar()
def __init__(self): self.tree = WhitespaceExpansionTrieNode() # Database connection engine = get_database_engine_string() logging.info("Using connection string '%s'" % (engine,)) engine = create_engine(engine, encoding='utf-8', isolation_level = 'READ UNCOMMITTED', poolclass=SingletonThreadPool, echo = False, connect_args={'cursorclass': MySQLdb.cursors.SSCursor}) meta = MetaData(engine, reflect=True) conn = engine.connect() session = Session(bind=conn) # Query for keywords sql = "SELECT word FROM keywords WHERE word collate latin1_general_cs REGEXP ('^([A-Z](c[A-Z])?[a-z]+ ){1,2}([A-Z](c[A-Z])?[a-z]+)$')" for word, in session.execute(sql): logging.debug(word) self.tree.build(word)
def upgrade(): from sqlalchemy.orm.session import Session session = Session(bind=op.get_bind()) # Add dummy name where there is none for repo in session.query(Repository).all(): if repo.name == "": repo.name = "NoName" session.commit() if session.bind.dialect.name == "sqlite": session.execute("PRAGMA foreign_keys = OFF") elif session.bind.dialect.name == "mysql": session.execute("SET foreign_key_checks = 0") else: raise NotImplemented with op.batch_alter_table('repository', schema=None) as batch_op: batch_op.create_check_constraint('non_empty_name', 'name != ""') if session.bind.dialect.name == "sqlite": session.execute("PRAGMA foreign_keys = ON") elif session.bind.dialect.name == "mysql": session.execute("SET foreign_key_checks = 1")
from backend.db import RawArticle, RawArticleResultLink, RawArticleResult if __name__ == "__main__": core.configure_logging() engine = core.get_database_engine_string() logging.info("Using connection string '%s'" % (engine,)) engine = create_engine(engine, encoding='utf-8', isolation_level="READ COMMITTED") logging.info("Binding session...") session = Session(bind=engine, autocommit = False) # Select the old raw_results sql = "SELECT crawl_id, date_crawled, url, content_type, raw_article_results.status, raw_article_conversions.inserted_id FROM raw_articles JOIN raw_article_results ON raw_article_results.raw_article_id = raw_articles.id JOIN raw_article_conversions ON raw_article_conversions.raw_article_id = raw_articles.id" it = session.execute(sql) for crawl_id, date_crawled, url, content_type, status, inserted_id in it: # Decide if any of these have been comitted sub = session.query(RawArticle).filter_by(crawl_id = crawl_id, url = url, content_type = content_type, date_crawled = date_crawled) try: i = sub.one() logging.info("RawArticle %s has already been processed.", i) continue except NoResultException: pass rbase = RawArticle((crawl_id, (None, None, url, date_crawled, content_type))) rstat = RawArticleResult(None, status)
def handle_noargs(self, **options): verbosity = 1 #int(options.get('verbosity')) interactive = options.get('interactive') show_traceback = options.get('traceback') self.style = no_style() # Import the 'management' module within each installed app, to register # dispatcher events. for app_name in settings.INSTALLED_APPS: try: import_module('.management', app_name) except ImportError as exc: # This is slightly hackish. We want to ignore ImportErrors # if the "management" module itself is missing -- but we don't # want to ignore the exception if the management module exists # but raises an ImportError for some reason. The only way we # can do this is to check the text of the exception. Note that # we're a bit broad in how we check the text, because different # Python implementations may not use the same text. # CPython uses the text "No module named management" # PyPy uses "No module named myproject.myapp.management" msg = exc.args[0] if not msg.startswith('No module named') or 'management' not in msg: raise db = options.get('database') orm = ORM.get(db) db_info = orm.settings_dict is_test_db = db_info.get('TEST', False) if not is_test_db: print 'Database "%s" cannot be purged because it is not a test ' \ 'database.\nTo flag this as a test database, set TEST to ' \ 'True in the database settings.' % db sys.exit() if interactive: confirm = raw_input('\nYou have requested a purge of database ' \ '"%s" (%s). This will IRREVERSIBLY DESTROY all data ' \ 'currently in the database, and DELETE ALL TABLES AND ' \ 'SCHEMAS. Are you sure you want to do this?\n\n' \ 'Type "yes" to continue, or "no" to cancel: ' \ % (db, orm.engine.url)) else: confirm = 'yes' if confirm == 'yes': # get a list of all schemas used by the app default_schema = orm.engine.url.database app_schemas = set(orm.Base.metadata._schemas) app_schemas.add(default_schema) url = deepcopy(orm.engine.url) url.database = None engine = create_engine(url) inspector = inspect(engine) # get a list of existing schemas db_schemas = set(inspector.get_schema_names()) schemas = app_schemas.intersection(db_schemas) app_tables = set() for table in orm.Base.metadata.tables.values(): schema = table.schema or default_schema app_tables.add('%s.%s' % (schema, table.name)) metadata = MetaData() db_tables = [] all_fks = [] for schema in schemas: for table_name in inspector.get_table_names(schema): fullname = '%s.%s' % (schema, table_name) if fullname not in app_tables: continue fks = [] for fk in inspector.get_foreign_keys(table_name, schema=schema): if not fk['name']: continue fks.append(ForeignKeyConstraint((),(),name=fk['name'])) t = Table(table_name, metadata, *fks, schema=schema) db_tables.append(t) all_fks.extend(fks) session = Session(bind=engine) for fkc in all_fks: session.execute(DropConstraint(fkc)) for table in db_tables: session.execute(DropTable(table)) for schema in schemas: session.execute(DropSchema(schema)) session.commit() session.bind.dispose() else: self.stdout.write("Purge cancelled.\n")
# 5) Use the document identifiers to resolve date crawled - our most basic date resolution [DONE] # 6) Use the document identifiers to identify the most likely certain date - our highest date resolution [DONE] # 7) For those documents which don't have this, compute an average uncertain date. [DONE] # 8) Create another temporary table which can hold phrases, copy all the phrases in all of the documents into this table # 9) Filter keyword incidences by the keywords we have and join with this temporary table to count how many phrases in each document # are relevant. # Temporary table creation sql = """CREATE TEMPORARY TABLE query_%d_articles ( id INTEGER PRIMARY KEY, doc_id INTEGER NOT NULL, domain_id INTEGER, keywords TINYINT(1) NOT NULL, domains TINYINT(1) NOT NULL ) ENGINE=MEMORY;""" % (q.id,) logging.debug(sql) session.execute(sql) sql = """CREATE TEMPORARY TABLE query_%d_keywords ( id INTEGER PRIMARY KEY ) ENGINE=MEMORY""" % (q.id,) logging.debug(sql) session.execute(sql) # # Keyword housekeeping for keyword in keywords: _id = keyword.id sql = "INSERT INTO query_%d_keywords VALUES (%d)" % (q.id, _id) session.execute(sql) if using_keywords and not using_domains: