def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.driverlog_data_uri) process_project_list(runtime_storage_inst) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def _normalize_user(user): for c in user['companies']: c['end_date'] = utils.date_to_timestamp(c['end_date']) # sort companies by end_date def end_date_comparator(x, y): if x["end_date"] == 0: return 1 elif y["end_date"] == 0: return -1 else: return x["end_date"] - y["end_date"] user['companies'].sort(key=utils.cmp_to_key(end_date_comparator)) if user['companies']: if user['companies'][-1]['end_date'] != 0: user['companies'].append( dict(company_name='*independent', end_date=0)) user['user_id'] = user_processor.make_user_id( launchpad_id=user.get('launchpad_id'), emails=user.get('emails'), gerrit_id=user.get('gerrit_id'), github_id=user.get('github_id'), zanata_id=user.get('zanata_id'), ldap_id=user.get('ldap_id')) or user.get('user_id')
def _process_repo_reviews(repo, runtime_storage_inst, record_processor_inst, rcs_inst): for branch in _get_repo_branches(repo): LOG.info('Processing reviews for repo: %s, branch: %s', repo['uri'], branch) quoted_uri = six.moves.urllib.parse.quote_plus(repo['uri']) rcs_key = 'rcs:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = utils.date_to_timestamp('now') review_iterator = itertools.chain( rcs_inst.log(repo, branch, last_retrieval_time, status='open'), rcs_inst.log(repo, branch, last_retrieval_time, status='merged'), rcs_inst.log(repo, branch, last_retrieval_time, status='abandoned', grab_comments=True), ) review_iterator_typed = _record_typer(review_iterator, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time)
def normalize_user(user): user['emails'] = [email.lower() for email in user['emails']] if user['launchpad_id']: user['launchpad_id'] = user['launchpad_id'].lower() for c in user['companies']: end_date_numeric = 0 if c['end_date']: end_date_numeric = utils.date_to_timestamp(c['end_date']) c['end_date'] = end_date_numeric # sort companies by end_date def end_date_comparator(x, y): if x["end_date"] == 0: return 1 elif y["end_date"] == 0: return -1 else: return cmp(x["end_date"], y["end_date"]) user['companies'].sort(cmp=end_date_comparator) if user['emails']: user['user_id'] = get_user_id(user['launchpad_id'], user['emails'][0]) else: user['user_id'] = user['launchpad_id']
def _process_repo_reviews(repo, runtime_storage_inst, record_processor_inst): rcs_inst = rcs.get_rcs(repo['gerrit_uri']) rcs_inst.setup(key_filename=repo['key_filename'], username=repo['ssh_username'], gerrit_retry=CONF.gerrit_retry) for branch in _get_repo_branches(repo): LOG.info('Processing reviews for repo: %s, branch: %s', repo['uri'], branch) quoted_uri = six.moves.urllib.parse.quote_plus(repo['uri']) rcs_key = 'rcs:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = utils.date_to_timestamp('now') review_iterator = itertools.chain( rcs_inst.log(repo, branch, last_retrieval_time, status='open'), rcs_inst.log(repo, branch, last_retrieval_time, status='merged'), rcs_inst.log(repo, branch, last_retrieval_time, status='abandoned', grab_comments=True), ) review_iterator_typed = _record_typer(review_iterator, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time) rcs_inst.close()
def _normalize_user(user): for c in user['companies']: c['end_date'] = utils.date_to_timestamp(c['end_date']) # sort companies by end_date def end_date_comparator(x, y): if x["end_date"] == 0: return 1 elif y["end_date"] == 0: return -1 else: return x["end_date"] - y["end_date"] user['companies'].sort(key=utils.cmp_to_key(end_date_comparator)) if user['companies']: if user['companies'][-1]['end_date'] != 0: user['companies'].append(dict(company_name='*independent', end_date=0)) user['user_id'] = user_processor.make_user_id( launchpad_id=user.get('launchpad_id'), emails=user.get('emails'), gerrit_id=user.get('gerrit_id'), github_id=user.get('github_id'), zanata_id=user.get('zanata_id'), ldap_id=user.get('ldap_id')) or user.get('user_id')
def process(runtime_storage_inst, record_processor_inst): repos = utils.load_repos(runtime_storage_inst) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key('bug_modified_since') rcs_inst = rcs.get_rcs(cfg.CONF.review_uri) rcs_inst.setup(key_filename=cfg.CONF.ssh_key_filename, username=cfg.CONF.ssh_username) for repo in repos: _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst, bug_modified_since) rcs_inst.close() runtime_storage_inst.set_by_key('bug_modified_since', current_date) LOG.info('Processing mail lists') mail_lists = runtime_storage_inst.get_by_key('mail_lists') or [] for mail_list in mail_lists: _process_mail_list(mail_list, runtime_storage_inst, record_processor_inst) _post_process_records(record_processor_inst, repos)
def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage( CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 try: jsonschema.validate(default_data, schema.default_data) except jsonschema.ValidationError as e: LOG.critical('The default data is invalid: %s' % e) return not 0 default_data_processor.process(runtime_storage_inst, default_data) process_project_list(runtime_storage_inst) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now')) LOG.info('stackalytics-processor succeeded.')
def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage(cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical("Unable to load default data") return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.driverlog_data_uri) process_project_list(runtime_storage_inst, cfg.CONF.project_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor(runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key("runtime_storage_update_time", utils.date_to_timestamp("now"))
def _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst): uri = repo["uri"] LOG.info("Processing repo uri: %s", uri) LOG.debug("Processing blueprints for repo uri: %s", uri) bp_iterator = lp.log(repo) bp_iterator_typed = _record_typer(bp_iterator, "bp") processed_bp_iterator = record_processor_inst.process(bp_iterator_typed) runtime_storage_inst.set_records(processed_bp_iterator, utils.merge_records) LOG.debug("Processing bugs for repo uri: %s", uri) current_date = utils.date_to_timestamp("now") bug_modified_since = runtime_storage_inst.get_by_key("bug_modified_since-%s" % repo["module"]) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, "bug") processed_bug_iterator = record_processor_inst.process(bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key("bug_modified_since-%s" % repo["module"], current_date) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() branches = {repo.get("default_branch", "master")} for release in repo.get("releases"): if "branch" in release: branches.add(release["branch"]) for branch in branches: LOG.debug("Processing commits in repo: %s, branch: %s", uri, branch) vcs_key = "vcs:" + str(parse.quote_plus(uri) + ":" + branch) last_id = runtime_storage_inst.get_by_key(vcs_key) commit_iterator = vcs_inst.log(branch, last_id) commit_iterator_typed = _record_typer(commit_iterator, "commit") processed_commit_iterator = record_processor_inst.process(commit_iterator_typed) runtime_storage_inst.set_records(processed_commit_iterator, _merge_commits) last_id = vcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(vcs_key, last_id) LOG.debug("Processing reviews for repo: %s, branch: %s", uri, branch) rcs_key = "rcs:" + str(parse.quote_plus(uri) + ":" + branch) last_id = runtime_storage_inst.get_by_key(rcs_key) review_iterator = rcs_inst.log(repo, branch, last_id, grab_comments=("ci" in repo)) review_iterator_typed = _record_typer(review_iterator, "review") if "ci" in repo: # add external CI data review_iterator_typed = _process_reviews(review_iterator_typed, repo["ci"], repo["module"], branch) processed_review_iterator = record_processor_inst.process(review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) last_id = rcs_inst.get_last_id(repo, branch) runtime_storage_inst.set_by_key(rcs_key, last_id)
def _process_repo_bugs(repo, runtime_storage_inst, record_processor_inst): LOG.info('Processing bugs for repo: %s', repo['uri']) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key( 'bug_modified_since-%s' % repo['module']) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process(bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key('bug_modified_since-%s' % repo['module'], current_date)
def _normalize_user(user): for c in user['companies']: c['end_date'] = utils.date_to_timestamp(c['end_date']) # sort companies by end_date def end_date_comparator(x, y): if x["end_date"] == 0: return 1 elif y["end_date"] == 0: return -1 else: return x["end_date"] - y["end_date"] user['companies'].sort(key=utils.cmp_to_key(end_date_comparator)) user['user_id'] = user['launchpad_id']
def process(runtime_storage_inst, record_processor_inst): repos = utils.load_repos(runtime_storage_inst) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key('bug_modified_since') for repo in repos: _process_repo(repo, runtime_storage_inst, record_processor_inst, bug_modified_since) runtime_storage_inst.set_by_key('bug_modified_since', current_date) LOG.info('Processing mail lists') mail_lists = runtime_storage_inst.get_by_key('mail_lists') or [] for mail_list in mail_lists: _process_mail_list(mail_list, runtime_storage_inst, record_processor_inst) _post_process_records(record_processor_inst, repos)
def _normalize_user(user): for c in user['companies']: c['end_date'] = utils.date_to_timestamp(c['end_date']) # sort companies by end_date def end_date_comparator(x, y): if x["end_date"] == 0: return 1 elif y["end_date"] == 0: return -1 else: return x["end_date"] - y["end_date"] user['companies'].sort(key=utils.cmp_to_key(end_date_comparator)) user['user_id'] = user_processor.make_user_id( launchpad_id=user.get('launchpad_id'), emails=user.get('emails'), gerrit_id=user.get('gerrit_id'))
def main(): # init conf and logging conf = cfg.CONF conf.register_cli_opts(config.OPTS) conf.register_opts(config.OPTS) conf(project='stackalytics') logging.setup('stackalytics') LOG.info('Logging enabled') runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 gerrit = rcs.get_rcs(None, cfg.CONF.review_uri) gerrit.setup(key_filename=cfg.CONF.ssh_key_filename, username=cfg.CONF.ssh_username) default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.git_base_uri, gerrit, cfg.CONF.driverlog_data_uri) process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def get_vault(): vault = getattr(flask.current_app, 'stackalytics_vault', None) if not vault: try: vault = {} runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) vault['runtime_storage'] = runtime_storage_inst vault['memory_storage'] = memory_storage.get_memory_storage( memory_storage.MEMORY_STORAGE_CACHED) flask.current_app.stackalytics_vault = vault except Exception as e: LOG.critical('Failed to initialize application: %s', e, exc_info=True) flask.abort(500) if not getattr(flask.request, 'stackalytics_updated', None): time_now = utils.date_to_timestamp('now') may_update_by_time = time_now > vault.get('vault_next_update_time', 0) if may_update_by_time: flask.request.stackalytics_updated = True vault['vault_update_time'] = time_now vault['vault_next_update_time'] = ( time_now + cfg.CONF.dashboard_update_interval) memory_storage_inst = vault['memory_storage'] have_updates = memory_storage_inst.update( compact_records(vault['runtime_storage'].get_update( os.getpid()))) vault['runtime_storage_update_time'] = ( vault['runtime_storage'].get_by_key( 'runtime_storage_update_time')) if have_updates: vault['cache'] = {} vault['cache_size'] = 0 _init_releases(vault) _init_module_groups(vault) _init_project_types(vault) _init_repos(vault) _init_user_index(vault) return vault
def _process_repo_reviews(repo, runtime_storage_inst, record_processor_inst): LOG.info('Processing reviews for repo: %s', repo['uri']) rcs_inst = rcs.get_rcs(repo['gerrit_uri']) rcs_inst.setup(key_filename=repo['key_filename'], username=repo['ssh_username'], gerrit_retry=CONF.gerrit_retry) gerrit_hostname = rcs.get_socket_tuple_from_uri(repo['gerrit_uri'])[0] for branch in _get_repo_branches(repo): LOG.info('Processing reviews for repo: %s, branch: %s', repo['uri'], branch) quoted_uri = six.moves.urllib.parse.quote_plus(repo['uri']) rcs_key = 'rcs:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = utils.date_to_timestamp('now') review_iterator = itertools.chain( rcs_inst.log(repo, branch, last_retrieval_time, status='open'), rcs_inst.log(repo, branch, last_retrieval_time, status='merged', grab_comments=True), rcs_inst.log(repo, branch, last_retrieval_time, status='abandoned', grab_comments=True), ) review_iterator_with_gerrit = _param_adder(review_iterator, 'gerrit_hostname', gerrit_hostname) review_iterator_typed = _record_typer(review_iterator_with_gerrit, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time) rcs_inst.close()
def get_vault(): vault = getattr(flask.current_app, 'stackalytics_vault', None) if not vault: try: vault = {} runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) vault['runtime_storage'] = runtime_storage_inst vault['memory_storage'] = memory_storage.get_memory_storage( memory_storage.MEMORY_STORAGE_CACHED) flask.current_app.stackalytics_vault = vault except Exception as e: LOG.critical('Failed to initialize application: %s', e, exc_info=True) flask.abort(500) if not getattr(flask.request, 'stackalytics_updated', None): time_now = utils.date_to_timestamp('now') may_update_by_time = time_now > vault.get('vault_next_update_time', 0) if may_update_by_time: flask.request.stackalytics_updated = True vault['vault_update_time'] = time_now vault['vault_next_update_time'] = ( time_now + cfg.CONF.dashboard_update_interval) memory_storage_inst = vault['memory_storage'] have_updates = memory_storage_inst.update(compact_records( vault['runtime_storage'].get_update(os.getpid()))) vault['runtime_storage_update_time'] = ( vault['runtime_storage'].get_by_key( 'runtime_storage_update_time')) if have_updates: vault['cache'] = {} vault['cache_size'] = 0 _init_releases(vault) _init_module_groups(vault) _init_project_types(vault) _init_repos(vault) _init_user_index(vault) return vault
def main(): # init conf and logging conf = cfg.CONF conf.register_cli_opts(config.OPTS) conf.register_opts(config.OPTS) logging.register_options(conf) logging.set_defaults() conf(project='stackalytics') logging.setup(conf, 'stackalytics') LOG.info('Logging enabled') conf.log_opt_values(LOG, std_logging.DEBUG) runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.driverlog_data_uri) process_project_list(runtime_storage_inst, cfg.CONF.project_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage( CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 try: jsonschema.validate(default_data, schema.default_data) except jsonschema.ValidationError as e: LOG.critical('The default data is invalid: %s' % e) return not 0 default_data_processor.process(runtime_storage_inst, default_data) process_project_list(runtime_storage_inst) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now')) LOG.info('stackalytics-processor succeeded.')
def response_decorated_function(*args, **kwargs): callback = flask.app.request.args.get('callback', False) data = func(*args, **kwargs) if callback: data = str(callback) + '(' + data + ')' mimetype = 'application/javascript' else: mimetype = 'application/json' resp = flask.current_app.response_class(data, mimetype=mimetype) update_time = vault.get_vault()['vault_next_update_time'] now = utils.date_to_timestamp('now') if now < update_time: max_age = update_time - now else: max_age = 0 resp.headers['cache-control'] = 'public, max-age=%d' % (max_age,) resp.headers['expires'] = time.strftime( '%a, %d %b %Y %H:%M:%S GMT', time.gmtime(vault.get_vault()['vault_next_update_time'])) resp.headers['access-control-allow-origin'] = '*' return resp
def response_decorated_function(*args, **kwargs): callback = flask.app.request.args.get('callback', False) data = func(*args, **kwargs) if callback: data = str(callback) + '(' + data + ')' mimetype = 'application/javascript' else: mimetype = 'application/json' resp = flask.current_app.response_class(data, mimetype=mimetype) update_time = vault.get_vault()['vault_next_update_time'] now = utils.date_to_timestamp('now') if now < update_time: max_age = update_time - now else: max_age = 0 resp.headers['cache-control'] = 'public, max-age=%d' % (max_age, ) resp.headers['expires'] = time.strftime( '%a, %d %b %Y %H:%M:%S GMT', time.gmtime(vault.get_vault()['vault_next_update_time'])) resp.headers['access-control-allow-origin'] = '*' return resp
def setUp(self): super(TestRecordProcessor, self).setUp() companies = [ { 'company_name': 'SuperCompany', 'domains': ['super.com', 'super.no'] }, { "domains": ["nec.com", "nec.co.jp"], "company_name": "NEC" }, { 'company_name': '*independent', 'domains': [''] }, ] self.user = { 'user_id': 'john_doe', 'launchpad_id': 'john_doe', 'user_name': 'John Doe', 'emails': ['*****@*****.**', '*****@*****.**'], 'companies': [ { 'company_name': '*independent', 'end_date': 1234567890 }, { 'company_name': 'SuperCompany', 'end_date': 0 }, ] } self.get_users = mock.Mock(return_value=[ self.user, ]) releases = [ { 'release_name': 'prehistory', 'end_date': utils.date_to_timestamp('2011-Apr-21') }, { 'release_name': 'Diablo', 'end_date': utils.date_to_timestamp('2011-Sep-08') }, { 'release_name': 'Zoo', 'end_date': utils.date_to_timestamp('2035-Sep-08') }, ] def get_by_key(table): if table == 'companies': return _make_companies(companies) elif table == 'users': return _make_users(self.get_users()) elif table == 'releases': return releases else: raise Exception('Wrong table %s' % table) p_storage = mock.Mock(runtime_storage.RuntimeStorage) p_storage.get_by_key = mock.Mock(side_effect=get_by_key) self.runtime_storage = p_storage self.commit_processor = record_processor.RecordProcessor(p_storage) self.read_json_from_uri_patch = mock.patch( 'stackalytics.processor.utils.read_json_from_uri') self.read_json = self.read_json_from_uri_patch.start()
def setUp(self): super(TestRecordProcessor, self).setUp() companies = [ { 'company_name': 'SuperCompany', 'domains': ['super.com', 'super.no'] }, { "domains": ["nec.com", "nec.co.jp"], "company_name": "NEC" }, { 'company_name': '*independent', 'domains': [''] }, ] self.user = { 'user_id': 'john_doe', 'launchpad_id': 'john_doe', 'user_name': 'John Doe', 'emails': ['*****@*****.**', '*****@*****.**'], 'companies': [ {'company_name': '*independent', 'end_date': 1234567890}, {'company_name': 'SuperCompany', 'end_date': 0}, ] } self.get_users = mock.Mock(return_value=[ self.user, ]) releases = [ { 'release_name': 'prehistory', 'end_date': utils.date_to_timestamp('2011-Apr-21') }, { 'release_name': 'Diablo', 'end_date': utils.date_to_timestamp('2011-Sep-08') }, { 'release_name': 'Zoo', 'end_date': utils.date_to_timestamp('2035-Sep-08') }, ] def get_by_key(table): if table == 'companies': return default_data_processor._process_companies(companies) elif table == 'users': return default_data_processor._process_users(self.get_users()) elif table == 'releases': return releases else: raise Exception('Wrong table %s' % table) p_storage = mock.Mock(runtime_storage.RuntimeStorage) p_storage.get_by_key = mock.Mock(side_effect=get_by_key) self.runtime_storage = p_storage self.commit_processor = record_processor.RecordProcessor(p_storage) self.read_json_from_uri_patch = mock.patch( 'stackalytics.processor.utils.read_json_from_uri') self.read_json = self.read_json_from_uri_patch.start()
def _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst): uri = repo['uri'] quoted_uri = six.moves.urllib.parse.quote_plus(uri) LOG.info('Processing repo uri: %s', uri) LOG.info('Processing blueprints for repo uri: %s', uri) bp_iterator = lp.log(repo) bp_iterator_typed = _record_typer(bp_iterator, 'bp') processed_bp_iterator = record_processor_inst.process( bp_iterator_typed) runtime_storage_inst.set_records(processed_bp_iterator, utils.merge_records) LOG.info('Processing bugs for repo uri: %s', uri) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key( 'bug_modified_since-%s' % repo['module']) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process( bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key( 'bug_modified_since-%s' % repo['module'], current_date) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() branches = {repo.get('default_branch', 'master')} for release in repo.get('releases'): if 'branch' in release: branches.add(release['branch']) for branch in branches: LOG.info('Processing commits in repo: %s, branch: %s', uri, branch) vcs_key = 'vcs:%s:%s' % (quoted_uri, branch) last_id = runtime_storage_inst.get_by_key(vcs_key) commit_iterator = vcs_inst.log(branch, last_id) commit_iterator_typed = _record_typer(commit_iterator, 'commit') processed_commit_iterator = record_processor_inst.process( commit_iterator_typed) runtime_storage_inst.set_records( processed_commit_iterator, _merge_commits) last_id = vcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(vcs_key, last_id) if 'has_gerrit' not in repo: continue # do not poll reviews for those that do not have them LOG.info('Processing reviews for repo: %s, branch: %s', uri, branch) rcs_key = 'rcs:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = int(time.time()) review_iterator = itertools.chain( rcs_inst.log(repo, branch, last_retrieval_time, status='open'), rcs_inst.log(repo, branch, last_retrieval_time, status='merged'), rcs_inst.log(repo, branch, last_retrieval_time, status='abandoned', grab_comments=True), ) review_iterator_typed = _record_typer(review_iterator, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time) if 'drivers' in repo: LOG.info('Processing CI votes for repo: %s, branch: %s', uri, branch) rcs_key = 'ci:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = int(time.time()) review_iterator = rcs_inst.log(repo, branch, last_retrieval_time, status='merged', grab_comments=True) review_iterator = driverlog.log(review_iterator, repo['drivers']) review_iterator_typed = _record_typer(review_iterator, 'ci') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time)
def _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst): uri = repo['uri'] LOG.info('Processing repo uri: %s', uri) LOG.debug('Processing blueprints for repo uri: %s', uri) bp_iterator = lp.log(repo) bp_iterator_typed = _record_typer(bp_iterator, 'bp') processed_bp_iterator = record_processor_inst.process(bp_iterator_typed) runtime_storage_inst.set_records(processed_bp_iterator, utils.merge_records) LOG.debug('Processing bugs for repo uri: %s', uri) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key( 'bug_modified_since-%s' % repo['module']) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process(bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key('bug_modified_since-%s' % repo['module'], current_date) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() branches = {repo.get('default_branch', 'master')} for release in repo.get('releases'): if 'branch' in release: branches.add(release['branch']) for branch in branches: LOG.debug('Processing commits in repo: %s, branch: %s', uri, branch) vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch) last_id = runtime_storage_inst.get_by_key(vcs_key) commit_iterator = vcs_inst.log(branch, last_id) commit_iterator_typed = _record_typer(commit_iterator, 'commit') processed_commit_iterator = record_processor_inst.process( commit_iterator_typed) runtime_storage_inst.set_records(processed_commit_iterator, _merge_commits) last_id = vcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(vcs_key, last_id) LOG.debug('Processing reviews for repo: %s, branch: %s', uri, branch) rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch) last_id = runtime_storage_inst.get_by_key(rcs_key) review_iterator = rcs_inst.log(repo, branch, last_id, grab_comments=('ci' in repo)) review_iterator_typed = _record_typer(review_iterator, 'review') if 'ci' in repo: # add external CI data review_iterator_typed = _process_reviews(review_iterator_typed, repo['ci'], repo['module'], branch) processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) last_id = rcs_inst.get_last_id(repo, branch) runtime_storage_inst.set_by_key(rcs_key, last_id)
'companies': [ { 'company_name': '*independent', 'end_date': 1234567890 }, { 'company_name': 'SuperCompany', 'end_date': 0 }, ] }] RELEASES = [ { 'release_name': 'prehistory', 'end_date': utils.date_to_timestamp('2011-Apr-21') }, { 'release_name': 'Diablo', 'end_date': utils.date_to_timestamp('2011-Sep-08') }, { 'release_name': 'Zoo', 'end_date': utils.date_to_timestamp('2035-Sep-08') }, ] class TestRecordProcessor(testtools.TestCase): def setUp(self): super(TestRecordProcessor, self).setUp()
'launchpad_id': 'john_doe', 'user_name': 'John Doe', 'emails': ['*****@*****.**', '*****@*****.**'], 'companies': [ {'company_name': '*independent', 'end_date': 1234567890}, {'company_name': 'SuperCompany', 'end_date': 0}, ] } ] RELEASES = [ { 'release_name': 'prehistory', 'end_date': utils.date_to_timestamp('2011-Apr-21') }, { 'release_name': 'Diablo', 'end_date': utils.date_to_timestamp('2011-Sep-08') }, { 'release_name': 'Zoo', 'end_date': utils.date_to_timestamp('2035-Sep-08') }, ] class TestRecordProcessor(testtools.TestCase): def setUp(self): super(TestRecordProcessor, self).setUp()
def _normalize_releases(releases): for release in releases: release['release_name'] = release['release_name'].lower() release['end_date'] = utils.date_to_timestamp(release['end_date']) releases.sort(key=lambda x: x['end_date'])
def _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst): uri = repo['uri'] quoted_uri = six.moves.urllib.parse.quote_plus(uri) LOG.info('Processing repo uri: %s', uri) LOG.info('Processing blueprints for repo uri: %s', uri) bp_iterator = lp.log(repo) bp_iterator_typed = _record_typer(bp_iterator, 'bp') processed_bp_iterator = record_processor_inst.process(bp_iterator_typed) runtime_storage_inst.set_records(processed_bp_iterator, utils.merge_records) LOG.info('Processing bugs for repo uri: %s', uri) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key( 'bug_modified_since-%s' % repo['module']) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process(bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key('bug_modified_since-%s' % repo['module'], current_date) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() branches = {repo.get('default_branch', 'master')} for release in repo.get('releases'): if 'branch' in release: branches.add(release['branch']) for branch in branches: LOG.info('Processing commits in repo: %s, branch: %s', uri, branch) vcs_key = 'vcs:%s:%s' % (quoted_uri, branch) last_id = runtime_storage_inst.get_by_key(vcs_key) commit_iterator = vcs_inst.log(branch, last_id) commit_iterator_typed = _record_typer(commit_iterator, 'commit') processed_commit_iterator = record_processor_inst.process( commit_iterator_typed) runtime_storage_inst.set_records(processed_commit_iterator, _merge_commits) last_id = vcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(vcs_key, last_id) LOG.info('Processing reviews for repo: %s, branch: %s', uri, branch) rcs_key = 'rcs:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = int(time.time()) review_iterator = itertools.chain( rcs_inst.log(repo, branch, last_retrieval_time, status='open'), rcs_inst.log(repo, branch, last_retrieval_time, status='merged'), rcs_inst.log(repo, branch, last_retrieval_time, status='abandoned', grab_comments=True), ) review_iterator_typed = _record_typer(review_iterator, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time) if 'drivers' in repo: LOG.info('Processing CI votes for repo: %s, branch: %s', uri, branch) rcs_key = 'ci:%s:%s' % (quoted_uri, branch) last_retrieval_time = runtime_storage_inst.get_by_key(rcs_key) current_retrieval_time = int(time.time()) review_iterator = rcs_inst.log(repo, branch, last_retrieval_time, status='merged', grab_comments=True) review_iterator = driverlog.log(review_iterator, repo['drivers']) review_iterator_typed = _record_typer(review_iterator, 'ci') processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) runtime_storage_inst.set_by_key(rcs_key, current_retrieval_time)
def _process_repo(repo, runtime_storage_inst, record_processor_inst, rcs_inst): uri = repo['uri'] LOG.info('Processing repo uri: %s', uri) LOG.debug('Processing blueprints for repo uri: %s', uri) bp_iterator = lp.log(repo) bp_iterator_typed = _record_typer(bp_iterator, 'bp') processed_bp_iterator = record_processor_inst.process( bp_iterator_typed) runtime_storage_inst.set_records(processed_bp_iterator, utils.merge_records) LOG.debug('Processing bugs for repo uri: %s', uri) current_date = utils.date_to_timestamp('now') bug_modified_since = runtime_storage_inst.get_by_key( 'bug_modified_since-%s' % repo['module']) bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process( bug_iterator_typed) runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) runtime_storage_inst.set_by_key( 'bug_modified_since-%s' % repo['module'], current_date) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() branches = {repo.get('default_branch', 'master')} for release in repo.get('releases'): if 'branch' in release: branches.add(release['branch']) for branch in branches: LOG.debug('Processing commits in repo: %s, branch: %s', uri, branch) vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch) last_id = runtime_storage_inst.get_by_key(vcs_key) commit_iterator = vcs_inst.log(branch, last_id) commit_iterator_typed = _record_typer(commit_iterator, 'commit') processed_commit_iterator = record_processor_inst.process( commit_iterator_typed) runtime_storage_inst.set_records( processed_commit_iterator, _merge_commits) last_id = vcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(vcs_key, last_id) LOG.debug('Processing reviews for repo: %s, branch: %s', uri, branch) rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch) last_id = runtime_storage_inst.get_by_key(rcs_key) review_iterator = rcs_inst.log(repo, branch, last_id, grab_comments=('ci' in repo)) review_iterator_typed = _record_typer(review_iterator, 'review') if 'ci' in repo: # add external CI data review_iterator_typed = _process_reviews( review_iterator_typed, repo['ci'], repo['module'], branch) processed_review_iterator = record_processor_inst.process( review_iterator_typed) runtime_storage_inst.set_records(processed_review_iterator, utils.merge_records) last_id = rcs_inst.get_last_id(repo, branch) runtime_storage_inst.set_by_key(rcs_key, last_id)