def scrape_historical(self, since: datetime.datetime): """Scrapes cherry-pick issues since a given date. Args: since: datetime to scrape backwards in commit history until """ self.cursor = None page_count = 1 try: while True: logging.info('Fetching page %d of issues from GitHub', page_count) cp_issues = self.scrape_page(since=since, after=self.cursor) cp_issue_dicts = list({ 'number': issue.number, 'created_at': issue.created_at, } for issue in cp_issues) logging.info('Scraped %d cherry-pick issues', len(cp_issue_dicts)) db.get_engine().execute( models.CherrypickIssue.__table__.insert().prefix_with( 'IGNORE'), cp_issue_dicts) page_count += 1 time.sleep(SCRAPE_INTERVAL_SECONDS) except IndexError: logging.info('Completed scraping %d pages of cherry-pick issues', page_count)
def run_manage(parsed_args, app=None): if not hasattr(parsed_args, 'func'): print('Scripts loaded successfully, no tasks specified.') return if not app: try: app = create_app(config_override=CONFIG) except OperationalError as e: if e.orig.args[0] == 1071: print('Please run: ') for bind in ['bio', 'cms']: engine = db.get_engine(app, bind) print( f'ALTER DATABASE `{engine.url.database}` CHARACTER SET utf8;' ) print('to be able to continue.') print(e) return else: raise with app.app_context(): parsed_args.func(parsed_args) print('Done, all tasks completed.')
def scrape_historical(self, since: datetime.datetime): """Scrapes historical commits going back as far as is specified. Args: since: datetime to scrape backwards in commit history until """ self.cursor = None oldest_timestamp = self._get_oldest_commit_timestamp() page_count = 1 try: while True: logging.info( 'Fetching page %d of historical commits from GitHub', page_count) commits = self.scrape_page(since=since, until=oldest_timestamp, after=self.cursor) commit_dicts = [{ 'hash': commit.hash, 'committed_at': commit.committed_at, 'pull_request': commit.pull_request, 'pull_request_status': commit.pull_request_status, } for commit in commits] logging.info('Scraped %d commits', len(commit_dicts)) db.get_engine().execute( models.Commit.__table__.insert().prefix_with('IGNORE'), commit_dicts) page_count += 1 time.sleep(SCRAPE_INTERVAL_SECONDS) except IndexError: logging.info('Completed scraping %d pages of historical commits', page_count)
def scrape_since_latest(self): """Scrapes latest commits from GitHub and saves them to the DB. When the database is empty, it will scrape all commits from the last 90 days. Otherwise, it will scrape commits since the latest commit currently in the DB. """ self.cursor = None latest_timestamp = self._get_latest_commit_timestamp() page_count = 1 try: while True: logging.info('Fetching page %d of commits from GitHub', page_count) commits = self.scrape_page(since=latest_timestamp, after=self.cursor) commit_dicts = [{ 'hash': commit.hash, 'committed_at': commit.committed_at, 'pull_request': commit.pull_request, 'pull_request_status': commit.pull_request_status, } for commit in commits] logging.info('Scraped %d commits', len(commit_dicts)) db.get_engine().execute( models.Commit.__table__.insert().prefix_with('IGNORE'), commit_dicts) page_count += 1 time.sleep(SCRAPE_INTERVAL_SECONDS) except IndexError: logging.info('Completed scraping %d pages of commits', page_count)
def basic_auto_migrate_relational_db(app, bind): """Inspired with http://stackoverflow.com/questions/2103274/""" from sqlalchemy import Table from sqlalchemy import MetaData print('Performing auto-migration in', bind, 'database...') db.session.commit() db.reflect() db.session.commit() db.create_all(bind=bind) with app.app_context(): engine = db.get_engine(app, bind) tables = db.get_tables_for_bind(bind=bind) metadata = MetaData() metadata.engine = engine ddl = engine.dialect.ddl_compiler(engine.dialect, None) for table in tables: db_table = Table(table.name, metadata, autoload=True, autoload_with=engine) db_columns = get_column_names(db_table) columns = get_column_names(table) new_columns = columns - db_columns unused_columns = db_columns - columns existing_columns = columns.intersection(db_columns) for column_name in new_columns: column = getattr(table.c, column_name) if column.constraints: print( f'Column {column_name} skipped due to existing constraints.' ) continue print(f'Creating column: {column_name}') definition = ddl.get_column_specification(column) add_column(engine, table.name, definition) if engine.dialect.name == 'mysql': sql = f'SHOW CREATE TABLE `{table.name}`' table_definition = engine.execute(sql) columns_definitions = {} to_replace = { 'TINYINT(1)': 'BOOL', # synonymous for MySQL and SQLAlchemy 'INT(11)': 'INTEGER', 'DOUBLE': 'FLOAT(53)', ' DEFAULT NULL': '' } for definition in table_definition.first()[1].split('\n'): match = re.match( '\s*`(?P<name>.*?)` (?P<definition>[^,]*),?', definition) if match: name = match.group('name') definition_string = match.group('definition').upper() for mysql_explicit_definition, implicit_sqlalchemy in to_replace.items( ): definition_string = definition_string.replace( mysql_explicit_definition, implicit_sqlalchemy) columns_definitions[ name] = name + ' ' + definition_string columns_to_update = [] for column_name in existing_columns: column = getattr(table.c, column_name) old_definition = columns_definitions[column_name] new_definition = ddl.get_column_specification(column) if old_definition != new_definition: columns_to_update.append( [column_name, old_definition, new_definition]) if columns_to_update: print( '\nFollowing columns in `%s` table differ in definitions ' 'from those in specified in models:' % table.name) for column, old_definition, new_definition in columns_to_update: agreed = got_permission( 'Column: `%s`\n' 'Old definition: %s\n' 'New definition: %s\n' 'Update column definition?' % (column, old_definition, new_definition)) if agreed: update_column(engine, table.name, new_definition) print(f'Updated {column} column definition') else: print(f'Skipped {column} column') if unused_columns: print('\nFollowing columns in `%s` table are no longer used ' 'and can be safely removed:' % table.name) for column in unused_columns: if got_permission(f'Column: `{column}` - remove?'): drop_column(engine, table.name, column) print(f'Removed column {column}.') else: print(f'Keeping column {column}.') print('Auto-migration of', bind, 'database completed.')
def main(argv): del argv # Unused. init_db(db.get_engine())