Esempio n. 1
0
    def scrape_historical(self, since: datetime.datetime):
        """Scrapes cherry-pick issues since a given date.

    Args:
      since: datetime to scrape backwards in commit history until
    """
        self.cursor = None
        page_count = 1

        try:
            while True:
                logging.info('Fetching page %d of issues from GitHub',
                             page_count)

                cp_issues = self.scrape_page(since=since, after=self.cursor)
                cp_issue_dicts = list({
                    'number': issue.number,
                    'created_at': issue.created_at,
                } for issue in cp_issues)
                logging.info('Scraped %d cherry-pick issues',
                             len(cp_issue_dicts))

                db.get_engine().execute(
                    models.CherrypickIssue.__table__.insert().prefix_with(
                        'IGNORE'), cp_issue_dicts)

                page_count += 1
                time.sleep(SCRAPE_INTERVAL_SECONDS)
        except IndexError:
            logging.info('Completed scraping %d pages of cherry-pick issues',
                         page_count)
Esempio n. 2
0
def run_manage(parsed_args, app=None):
    if not hasattr(parsed_args, 'func'):
        print('Scripts loaded successfully, no tasks specified.')
        return

    if not app:
        try:
            app = create_app(config_override=CONFIG)
        except OperationalError as e:
            if e.orig.args[0] == 1071:
                print('Please run: ')
                for bind in ['bio', 'cms']:
                    engine = db.get_engine(app, bind)
                    print(
                        f'ALTER DATABASE `{engine.url.database}` CHARACTER SET utf8;'
                    )
                print('to be able to continue.')
                print(e)
                return
            else:
                raise

    with app.app_context():
        parsed_args.func(parsed_args)

    print('Done, all tasks completed.')
Esempio n. 3
0
    def scrape_historical(self, since: datetime.datetime):
        """Scrapes historical commits going back as far as is specified.

    Args:
      since: datetime to scrape backwards in commit history until
    """
        self.cursor = None
        oldest_timestamp = self._get_oldest_commit_timestamp()
        page_count = 1

        try:
            while True:
                logging.info(
                    'Fetching page %d of historical commits from GitHub',
                    page_count)

                commits = self.scrape_page(since=since,
                                           until=oldest_timestamp,
                                           after=self.cursor)
                commit_dicts = [{
                    'hash':
                    commit.hash,
                    'committed_at':
                    commit.committed_at,
                    'pull_request':
                    commit.pull_request,
                    'pull_request_status':
                    commit.pull_request_status,
                } for commit in commits]
                logging.info('Scraped %d commits', len(commit_dicts))

                db.get_engine().execute(
                    models.Commit.__table__.insert().prefix_with('IGNORE'),
                    commit_dicts)

                page_count += 1
                time.sleep(SCRAPE_INTERVAL_SECONDS)
        except IndexError:
            logging.info('Completed scraping %d pages of historical commits',
                         page_count)
Esempio n. 4
0
    def scrape_since_latest(self):
        """Scrapes latest commits from GitHub and saves them to the DB.

    When the database is empty, it will scrape all commits from the last 90
    days. Otherwise, it will scrape commits since the latest commit currently in
    the DB.
    """
        self.cursor = None
        latest_timestamp = self._get_latest_commit_timestamp()
        page_count = 1

        try:
            while True:
                logging.info('Fetching page %d of commits from GitHub',
                             page_count)

                commits = self.scrape_page(since=latest_timestamp,
                                           after=self.cursor)
                commit_dicts = [{
                    'hash':
                    commit.hash,
                    'committed_at':
                    commit.committed_at,
                    'pull_request':
                    commit.pull_request,
                    'pull_request_status':
                    commit.pull_request_status,
                } for commit in commits]
                logging.info('Scraped %d commits', len(commit_dicts))

                db.get_engine().execute(
                    models.Commit.__table__.insert().prefix_with('IGNORE'),
                    commit_dicts)

                page_count += 1
                time.sleep(SCRAPE_INTERVAL_SECONDS)
        except IndexError:
            logging.info('Completed scraping %d pages of commits', page_count)
Esempio n. 5
0
def basic_auto_migrate_relational_db(app, bind):
    """Inspired with http://stackoverflow.com/questions/2103274/"""

    from sqlalchemy import Table
    from sqlalchemy import MetaData

    print('Performing auto-migration in', bind, 'database...')
    db.session.commit()
    db.reflect()
    db.session.commit()
    db.create_all(bind=bind)

    with app.app_context():
        engine = db.get_engine(app, bind)
        tables = db.get_tables_for_bind(bind=bind)
        metadata = MetaData()
        metadata.engine = engine

        ddl = engine.dialect.ddl_compiler(engine.dialect, None)

        for table in tables:

            db_table = Table(table.name,
                             metadata,
                             autoload=True,
                             autoload_with=engine)
            db_columns = get_column_names(db_table)

            columns = get_column_names(table)
            new_columns = columns - db_columns
            unused_columns = db_columns - columns
            existing_columns = columns.intersection(db_columns)

            for column_name in new_columns:
                column = getattr(table.c, column_name)
                if column.constraints:
                    print(
                        f'Column {column_name} skipped due to existing constraints.'
                    )
                    continue
                print(f'Creating column: {column_name}')

                definition = ddl.get_column_specification(column)
                add_column(engine, table.name, definition)

            if engine.dialect.name == 'mysql':
                sql = f'SHOW CREATE TABLE `{table.name}`'
                table_definition = engine.execute(sql)
                columns_definitions = {}

                to_replace = {
                    'TINYINT(1)':
                    'BOOL',  # synonymous for MySQL and SQLAlchemy
                    'INT(11)': 'INTEGER',
                    'DOUBLE': 'FLOAT(53)',
                    ' DEFAULT NULL': ''
                }
                for definition in table_definition.first()[1].split('\n'):
                    match = re.match(
                        '\s*`(?P<name>.*?)` (?P<definition>[^,]*),?',
                        definition)
                    if match:
                        name = match.group('name')
                        definition_string = match.group('definition').upper()

                        for mysql_explicit_definition, implicit_sqlalchemy in to_replace.items(
                        ):
                            definition_string = definition_string.replace(
                                mysql_explicit_definition, implicit_sqlalchemy)

                        columns_definitions[
                            name] = name + ' ' + definition_string

                columns_to_update = []
                for column_name in existing_columns:

                    column = getattr(table.c, column_name)
                    old_definition = columns_definitions[column_name]
                    new_definition = ddl.get_column_specification(column)

                    if old_definition != new_definition:
                        columns_to_update.append(
                            [column_name, old_definition, new_definition])

                if columns_to_update:
                    print(
                        '\nFollowing columns in `%s` table differ in definitions '
                        'from those in specified in models:' % table.name)
                for column, old_definition, new_definition in columns_to_update:
                    agreed = got_permission(
                        'Column: `%s`\n'
                        'Old definition: %s\n'
                        'New definition: %s\n'
                        'Update column definition?' %
                        (column, old_definition, new_definition))
                    if agreed:
                        update_column(engine, table.name, new_definition)
                        print(f'Updated {column} column definition')
                    else:
                        print(f'Skipped {column} column')

            if unused_columns:
                print('\nFollowing columns in `%s` table are no longer used '
                      'and can be safely removed:' % table.name)
                for column in unused_columns:
                    if got_permission(f'Column: `{column}` - remove?'):
                        drop_column(engine, table.name, column)
                        print(f'Removed column {column}.')
                    else:
                        print(f'Keeping column {column}.')

    print('Auto-migration of', bind, 'database completed.')
Esempio n. 6
0
def main(argv):
    del argv  # Unused.
    init_db(db.get_engine())