def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.driverlog_data_uri) process_project_list(runtime_storage_inst) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def main(): # init conf and logging conf = cfg.CONF conf.register_cli_opts(config.OPTS) conf.register_opts(config.OPTS) conf() logging.setup('stackalytics') LOG.info('Logging enabled') runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.sources_root, cfg.CONF.force_update) process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) update_records(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst)
def process(runtime_storage_inst, default_data, sources_root, force_update): LOG.debug('Process default data') normalizer.normalize_default_data(default_data) if (_check_default_data_change(runtime_storage_inst, default_data) or force_update): _update_default_data(runtime_storage_inst, default_data) LOG.debug('Gather release index for all repos') release_index = {} for repo in runtime_storage_inst.get_by_key('repos'): vcs_inst = vcs.get_vcs(repo, sources_root) release_index.update(vcs_inst.get_release_index()) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) # need to iterate over full view of records and generate valid # users profiles LOG.debug('Iterate all records to create valid users profiles') for record in runtime_storage_inst.get_all_records(): record_processor_inst.update_user(record) # update records according to generated users profiles LOG.debug('Update all records according to users profiles') updated_records = record_processor_inst.update( runtime_storage_inst.get_all_records(), release_index) runtime_storage_inst.set_records(updated_records) if 'project_sources' in default_data: _retrieve_project_list(runtime_storage_inst, default_data['project_sources'])
def update_repos(runtime_storage_inst): repos = runtime_storage_inst.get_by_key('repos') record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) for repo in repos: process_repo(repo, runtime_storage_inst, record_processor_inst)
def _update_members_company_name(runtime_storage_inst): LOG.debug('Update company names for members') record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) member_iterator = _get_changed_member_records(runtime_storage_inst, record_processor_inst) for record in member_iterator: company_name = record['company_name'] user = utils.load_user(runtime_storage_inst, record['user_id']) user['companies'] = [{ 'company_name': company_name, 'end_date': 0, }] user['company_name'] = company_name utils.store_user(runtime_storage_inst, user) LOG.debug('Company name changed for user %s', user) record_id = record['record_id'] runtime_storage_inst.memcached.set( runtime_storage_inst._get_record_name(record_id), record) runtime_storage_inst._commit_update(record_id)
def main(): utils.init_config_and_logging(config.CONNECTION_OPTS + config.PROCESSOR_OPTS) runtime_storage_inst = runtime_storage.get_runtime_storage( CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 try: jsonschema.validate(default_data, schema.default_data) except jsonschema.ValidationError as e: LOG.critical('The default data is invalid: %s' % e) return not 0 default_data_processor.process(runtime_storage_inst, default_data) process_project_list(runtime_storage_inst) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now')) LOG.info('stackalytics-processor succeeded.')
def _update_records(runtime_storage_inst, sources_root): LOG.debug('Update existing records') release_index = {} for repo in utils.load_repos(runtime_storage_inst): vcs_inst = vcs.get_vcs(repo, sources_root) release_index.update(vcs_inst.get_release_index()) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) record_processor_inst.update(release_index)
def make_record_processor(self, users=None, companies=None, releases=None, repos=None): rp = record_processor.RecordProcessor( make_runtime_storage(users=users, companies=companies, releases=releases, repos=repos)) return rp
def update_records(runtime_storage_inst): repos = utils.load_repos(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) for repo in repos: process_repo(repo, runtime_storage_inst, record_processor_inst) mail_lists = runtime_storage_inst.get_by_key('mail_lists') or [] for mail_list in mail_lists: process_mail_list(mail_list, runtime_storage_inst, record_processor_inst) record_processor_inst.update()
def update_repos(runtime_storage_inst): repos = runtime_storage_inst.get_by_key('repos') record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) open_reviews = _open_reviews(runtime_storage_inst) for repo in repos: module = repo['module'] open_reviews_repo = set() if module in open_reviews: open_reviews_repo = open_reviews[module] process_repo(repo, runtime_storage_inst, record_processor_inst, open_reviews_repo)
def main(): # init conf and logging conf = cfg.CONF conf.register_cli_opts(config.OPTS) conf.register_opts(config.OPTS) conf(project='stackalytics') logging.setup('stackalytics') LOG.info('Logging enabled') runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 gerrit = rcs.get_rcs(None, cfg.CONF.review_uri) gerrit.setup(key_filename=cfg.CONF.ssh_key_filename, username=cfg.CONF.ssh_username) default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.git_base_uri, gerrit, cfg.CONF.driverlog_data_uri) process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def make_record_processor(self, users=None, companies=None, releases=None, repos=None, lp_info=None, lp_user_name=None): rp = record_processor.RecordProcessor( make_runtime_storage(users=users, companies=companies, releases=releases, repos=repos)) if lp_info is not None: self.lp_profile_by_email.side_effect = (lambda x: lp_info.get(x)) if lp_user_name is not None: self.lp_profile_by_launchpad_id.side_effect = ( lambda x: lp_user_name.get(x)) return rp
def main(): # init conf and logging conf = cfg.CONF conf.register_cli_opts(config.OPTS) conf.register_opts(config.OPTS) logging.register_options(conf) logging.set_defaults() conf(project='stackalytics') logging.setup(conf, 'stackalytics') LOG.info('Logging enabled') conf.log_opt_values(LOG, std_logging.DEBUG) runtime_storage_inst = runtime_storage.get_runtime_storage( cfg.CONF.runtime_storage_uri) default_data = utils.read_json_from_uri(cfg.CONF.default_data_uri) if not default_data: LOG.critical('Unable to load default data') return not 0 default_data_processor.process(runtime_storage_inst, default_data, cfg.CONF.driverlog_data_uri) process_project_list(runtime_storage_inst, cfg.CONF.project_list_uri) update_pids(runtime_storage_inst) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) process(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) # long operation should be the last update_members(runtime_storage_inst, record_processor_inst) runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now'))
def process(runtime_storage_inst, default_data, sources_root): normalizer.normalize_default_data(default_data) if _check_default_data_change(runtime_storage_inst, default_data): _update_default_data(runtime_storage_inst, default_data) release_index = {} for repo in runtime_storage_inst.get_by_key('repos'): vcs_inst = vcs.get_vcs(repo, sources_root) release_index.update(vcs_inst.get_release_index()) record_processor_inst = record_processor.RecordProcessor( runtime_storage_inst) updated_records = record_processor_inst.update( runtime_storage_inst.get_all_records(), release_index) runtime_storage_inst.set_records(updated_records) if 'project_sources' in default_data: _retrieve_project_list(runtime_storage_inst, default_data['project_sources'])
def setUp(self): super(TestRecordProcessor, self).setUp() companies = [ { 'company_name': 'SuperCompany', 'domains': ['super.com', 'super.no'] }, { "domains": ["nec.com", "nec.co.jp"], "company_name": "NEC" }, { 'company_name': '*independent', 'domains': [''] }, ] self.user = { 'user_id': 'john_doe', 'launchpad_id': 'john_doe', 'user_name': 'John Doe', 'emails': ['*****@*****.**', '*****@*****.**'], 'companies': [ { 'company_name': '*independent', 'end_date': 1234567890 }, { 'company_name': 'SuperCompany', 'end_date': 0 }, ] } self.get_users = mock.Mock(return_value=[ self.user, ]) releases = [ { 'release_name': 'prehistory', 'end_date': utils.date_to_timestamp('2011-Apr-21') }, { 'release_name': 'Diablo', 'end_date': utils.date_to_timestamp('2011-Sep-08') }, { 'release_name': 'Zoo', 'end_date': utils.date_to_timestamp('2035-Sep-08') }, ] def get_by_key(table): if table == 'companies': return _make_companies(companies) elif table == 'users': return _make_users(self.get_users()) elif table == 'releases': return releases else: raise Exception('Wrong table %s' % table) p_storage = mock.Mock(runtime_storage.RuntimeStorage) p_storage.get_by_key = mock.Mock(side_effect=get_by_key) self.runtime_storage = p_storage self.commit_processor = record_processor.RecordProcessor(p_storage) self.read_json_from_uri_patch = mock.patch( 'stackalytics.processor.utils.read_json_from_uri') self.read_json = self.read_json_from_uri_patch.start()
def make_record_processor(runtime_storage_inst=None): return record_processor.RecordProcessor(runtime_storage_inst or make_runtime_storage())