def make_childmap(self): visited = set([]) q = Queue() q.put((self.git.head.commit, None)) # (commit, child_hash) progress = Progress(u'[%s] making git childmap' % self.__name__, 500, 10000, False) progress.set_point(0) progress.start() while q.empty() is False: progress.check() commit, child_hash = q.get() commit_hash = str(commit)[:7] # create child map if commit_hash not in self.childmap: self.childmap[commit_hash] = set([]) if child_hash is not None: self.childmap[commit_hash].add(child_hash) if commit_hash in visited: continue visited.add(commit_hash) # pass itself to parent for parent in commit.parents: q.put((parent, commit_hash)) # add ancestors if this commit has no parents if len(commit.parents) == 0: self.ancestors.add(commit_hash) progress.done() pass
def _encode(self): logger.info('encoding lua tables') Progress.start('Decoding MIZ file', length=3) Progress.set_label('Encoding map resource') logger.debug('encoding map resource') with open(self.map_res_file, mode='w', encoding=ENCODING) as f: f.write(SLTP().encode(self._map_res, self._map_res_qual)) Progress.set_value(1) Progress.set_label('Encoding map resource') logger.debug('encoding l10n dictionary') with open(self.dictionary_file, mode='w', encoding=ENCODING) as f: f.write(SLTP().encode(self.l10n, self._l10n_qual)) Progress.set_value(2) Progress.set_label('Encoding map resource') logger.debug('encoding mission dictionary') with open(self.mission_file, mode='w', encoding=ENCODING) as f: f.write(SLTP().encode(self.mission.d, self._mission_qual)) Progress.set_value(3) logger.info('encoding done')
def loads(self): ''' loads a raw file of bug report :return: ''' fileConnt = self.getFileCounts(self.SourceBugPath) bugitems = [] # show progress progress = Progress(u'[%s] Loading bug reports' % self.__name__, 2, 10, True) progress.set_upperbound(fileConnt) progress.start() for root, dirs, files in os.walk(self.SourceBugPath): for f in files: if f[:f.find(u'-')].strip().lower() != self.ProjectName.lower( ): continue #shutil.copy(os.path.join(root, f), os.path.join(_dest, f)) bugitem = self.get_bugitem(os.path.join(root, f)) if bugitem is not None: bugitems.append(bugitem) progress.check() progress.done() return bugitems
def load(self, _force=False): ''' Load commit info from GitLogPath :return: {bugID:[{'hash':u'', 'author':u'', 'commit_date':u'', 'message':u'', 'fixedFiles':{}}, {}, ...], ...} ''' if os.path.exists(self.GitLogPath) is False or _force is True: self.make() logfile = codecs.open(self.GitLogPath, 'r', 'utf-8') progress = Progress(u'[%s] loading git log data' % self.__name__, 1000, 20000, False) progress.set_point(0) progress.start() for logitem in self.file_loader(logfile, _with_filter=False): # filter unuseful logs if len(logitem['fixedFiles']) == 0: continue # We only use bug report id in log message # mapping bug report ID logitem['linked_bug'] = re.findall( r'%s-[0-9]+' % self.ProjectName.upper(), logitem['message']) logitem['linked_bug'] = set(logitem['linked_bug']) for linked_id in logitem['linked_bug']: if linked_id not in self.logs: self.logs[linked_id] = [logitem] else: self.logs[linked_id].append(logitem) progress.check() progress.done() logfile.close() return self.logs
def _decode(self): logger.info('decoding lua tables') if not self.zip_content: self.unzip(overwrite=False) Progress.start('Decoding MIZ file', length=3) Progress.set_label('Decoding map resource') logger.debug('reading map resource file') with open(self.map_res_file, encoding=ENCODING) as f: self._map_res, self._map_res_qual = SLTP().decode(f.read()) Progress.set_value(1) Progress.set_label('Decoding dictionary file') logger.debug('reading l10n file') with open(self.dictionary_file, encoding=ENCODING) as f: self._l10n, self._l10n_qual = SLTP().decode(f.read()) Progress.set_value(2) Progress.set_label('Decoding mission file') logger.debug('reading mission file') with open(self.mission_file, encoding=ENCODING) as f: mission_data, self._mission_qual = SLTP().decode(f.read()) self._mission = Mission(mission_data, self._l10n) Progress.set_value(3) logger.info('decoding done')
def fill_SubjectSheet(self, _sheet, _group, _srcCounts, _bugCounts, _dupCounts): projects = _bugCounts.keys() projects.sort() size = sum([len(_bugCounts[project]) for project in projects]) progress = Progress(u'[%s] fill subject' % self.__name__, 2, 10, True) progress.set_point(0).set_upperbound(size) progress.start() styles = [self.base_format, self.base_format, self.base_format, self.number_format, self.number_format] for project in projects: for version in _bugCounts[project].keys(): if version == 'all': continue values = [_group, project, version.upper(), _bugCounts[project][version], _srcCounts[project][version]] self.input_row(_sheet, self.subj_data_row, 6, values, styles) self.subj_data_row += 1 progress.check() progress.done() #summary styles = [self.subtitle_format, self.subtitle_format, self.number_format, self.number_format, self.number_format] for project in projects: values = [_group, project.upper(), _bugCounts[project]['all'], _dupCounts[project], _srcCounts[project]['all']] self.input_row(_sheet, self.subj_summary_row, 0, values, styles) self.subj_summary_row += 1 pass
def source_counting(self, _group, _project): statistics = {} progress = Progress('source counting', 2, 10, True) progress.set_upperbound(len(self.S.versions[_project].keys())) progress.start() for version in self.S.versions[_project].keys(): vname = VersionUtil.get_versionName(version, _project) repo = os.path.join(self.S.getPath_source(_group, _project, vname), ) result = self.getCodeCount(repo) if result is None: continue statistics[vname] = result progress.check() progress.done() maxValue = 0 for vname in statistics: if maxValue < statistics[vname]: maxValue = statistics[vname] statistics['max'] = maxValue pretty = PrettyStringBuilder(_indent_depth=2) text = pretty.get_dicttext({_project: statistics}) f = open( os.path.join(self.S.getPath_base(_group, _project), u'sources.txt'), 'w') f.write(text) f.close()
def get_hump_error_per_type(): query = """SELECT DISTINCT run.aggregate_id, aggregate_type.type_id FROM `run` LEFT OUTER JOIN aggregate_type ON run.aggregate_id = aggregate_type.aggregate_id WHERE dissipation < 0.01""" session = get_session() aggregate_ids = session.execute(query).fetchall() session.close() errors = { 1: [], 2: [], 3: [], 4: [], 5: [] } p = Progress(len(aggregate_ids)) p.start() cnt = 0 for aggregate_id,type_id in aggregate_ids: err,alpha = get_hump_info(aggregate_id) errors[type_id].append(err) cnt += 1 p.update(cnt) p.finish() x = errors.keys() y = [np.mean(yi) for yi in errors.values()] yerr = [np.std(yi) for yi in errors.values()] ymax = [np.max(yi) for yi in errors.values()] ymin = [np.min(yi) for yi in errors.values()] with file('./simulation_data/type_hump_error_ranges.bin','wb') as fp: pickle.dump(errors,fp) fig = pplot.figure() ax = fig.add_subplot(311) ax.bar(x,y,yerr=yerr,color='b') ax.set_ylabel("Avergage power law error") ax.set_xlabel("Type") ax = fig.add_subplot(312) ax.set_ylabel("Maximum power law error") ax.set_xlabel("Type") ax.bar(x,ymax, color='r') ax = fig.add_subplot(313) ax.set_ylabel("Minimum power law error") ax.set_xlabel("Type") ax.bar(x,ymin, color='g') pplot.show()
def unhash_folder(_src, _dest): ''' hashed folder ==> unshed folder example) path/aa/00/filename ==> path/filename :param _src: :param _dest: :return: ''' if os.path.exists(_dest) is False: os.makedirs(_dest) progress = Progress(u'Bug reports is merging', 20, 1000, False) progress.start() for root, dirs, files in os.walk(_src): for f in files: shutil.copy(os.path.join(root, f), os.path.join(_dest, f)) progress.check() progress.done()
def make_tagmap(self, ): q = Queue() visited = set([]) # root node find (queue init) for item in list(self.ancestors): q.put((item, None)) # (commit_hash, tagname) # For each item in queue progress = Progress(u'[%s] making git tagmaps' % self.__name__, 500, 10000, False) progress.set_point(0) progress.start() while q.empty() is False: commit_hash, parent_tag = q.get() # If this commit in tags, map with commit_hash and tag if commit_hash in self.tags: commit_tag = self.tags[commit_hash] self.tagmap[commit_hash] = commit_tag # if this commit not in tags, map with child commit_hash and tag else: if commit_hash not in self.tagmap: self.tagmap[commit_hash] = parent_tag else: # compare time previous_tag and parent_tag previous_tag = self.tagmap[commit_hash] pre_time = self.tagtimes[previous_tag] par_time = self.tagtimes[parent_tag] if par_time > pre_time: self.tagmap[commit_hash] = parent_tag commit_tag = parent_tag if commit_hash not in visited: visited.add(commit_hash) for child_hash in self.childmap[commit_hash]: q.put((child_hash, commit_tag)) progress.check() progress.done() pass
def download(url, local_file, progress_title: str, progress_text: str = '', file_size: int = None): logger.info('downloading {} -> {}'.format(url, local_file)) Progress.start(progress_title) Progress.set_label(progress_text) def hook(data): Progress.set_value(float(data['percent_complete'])) dl = Downloader( url=url, filename=local_file, progress_hooks=[hook], content_length=file_size, ) return dl.download()
def load_raw(self, _force=False): ''' Load commit info from GitLogPath :return: {bugID:[{'hash':u'', 'author':u'', 'commit_date':u'', 'message':u'', 'fixedFiles':{}}, {}, ...], ...} ''' if os.path.exists(self.GitLogPath) is False or _force is True: self.make() self.logs = [] logfile = codecs.open(self.GitLogPath, 'r', 'utf-8') progress = Progress(u'[%s] loading git log data' % self.__name__, 1000, 20000, False) progress.set_point(0) progress.start() for logitem in self.file_loader(logfile): # filter unuseful logs #if len(logitem['fixedFiles'])==0: continue if logitem['hash'] == '': continue self.logs.insert(0, logitem) progress.check() progress.done() logfile.close() return self.logs
def run(self): agstart = time.time() for i in xrange(self.no_sims): logging.info("Going for simulation %d"%(i+1)) gc.collect() run_id = str(uuid4()) with DataContainer(self.config,run_id,self.aggregate_id) as dc: p = Progress(self.config['model']['no_steps']) model_class = None if(self.market_type == 1): logging.info("Using default Market") model_class = Market elif(self.market_type == 2): logging.info("Using ShuffleIRSMarket") model_class = ShuffleIRSMarket elif(self.market_type == 3): logging.info("Using SortedIRSMarket") model_class = SortedIRSMarket elif(self.market_type == 4): logging.info("Using RandomSortedIRSMarket") model_class = SortedRandomIRSMarket elif(self.market_type == 5): logging.info("Using RandomShuffleIRSMarket") model_class = ShuffleRandomIRSMarket elif(self.market_type == 6): logging.info("Using ConstantRandomShuffleIRSMarket") model_class = ConstShuffleIRSMarket elif(self.market_type == 7): logging.info("Using quick CRS-IRS-Mkt") model_class = sim else: raise "No such market type" p.start() start = time.time() with model_class(self.config['model'],dc,p.update) as m: m.run() t = time.time()-start p.finish() print "" logging.info("Run took %f seconds"%t) if(self.config['analysis']['do_analysis']): start = time.time() self.do_analysis(dc,run_id) t = time.time()-start logging.info("Analysis took %f seconds"%t) if(self.save_data): start = time.time() dc.save_data() t = time.time()-start logging.info("Saving data took %f seconds"%t) gc.collect() print "" print "" gc.collect() dt = (time.time() - agstart) / 60 logging.info("Experiment took %f minutes"%dt) if(self.config['aggregate']['do_aggregate'] and self.save_data): start = time.time() self.do_aggregate(dc,run_id) logging.info('Aggregation took %f seconds'%(time.time()-start))
if (s.save_density_for_avalanche_size): s.density_per_avalanche_size = defaultdict(list) s.save_gross_risk_for_avalanche_size = save_gross_risk_for_avalanche_size if (s.save_gross_risk_for_avalanche_size): s.gross_risk_per_avalanche_size = defaultdict(list) s.gross_risk_per_avalanche_size = defaultdict(list) if (s.save_avalanche_tree): os.makedirs(s.avalanche_tree_file_path) if (save_giant_component): s.giant_components = np.zeros(s.no_steps) start = time.time() p.start() s.run() p.finish() print print "Run took %d seconds" % (time.time() - start) if (save): print "Saving data" dc.save_defaults() dc.save_run() if s.save_avalanche_progression: print "Saving avalanche progression" file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp:
def do_run(steps, no_banks, threshold, max_tenure, max_irs_value, avalanche_fraction=0.9): #steps = 10000 save = False save_risk = False save_risk_avalanche_time_series = False save_dist = False save_giant_component = False save_avalanche_progression = False save_critical_info = False save_avalanche_tree = False save_degree_distribution = False no_connection_scatter_moments = 0 connection_scatter_moments = np.random.randint( 0, steps, no_connection_scatter_moments) seed = np.random.randint(0, 1000) dcconfig = { 'model': { 'no_banks': no_banks, 'no_steps': steps, 'threshold': threshold, 'sigma': 1, 'max_irs_value': max_irs_value, 'irs_threshold': -1, 'dissipation': 0.0, 'max_tenure': max_tenure }, 'analysis': { 'data_to_save': ['defaults'] }, 'file_root': './simulation_data/', 'market_type': 7, 'seed': seed } measure_no_steps = 2 * dcconfig['model']['max_tenure'] ########################################################################### dc = DataContainer(dcconfig, str(uuid4()), str(uuid4())) p = Progress(steps) s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist, connection_scatter_moments, seed, avalanche_fraction=avalanche_fraction) s.save_degree_distribution = save_degree_distribution if (s.save_degree_distribution): s.degrees = np.zeros((steps, dcconfig['model']['no_banks'])) s.no_irs = np.zeros((steps, dcconfig['model']['no_banks'])) s.save_avalanche_progression = save_avalanche_progression s.save_risk_avalanche_time_series = save_risk_avalanche_time_series s.collect_critical_info = save_critical_info s.save_giant_component = save_giant_component s.save_avalanche_tree = save_avalanche_tree s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id s.irs_creations = np.zeros(steps) s.irs_removals = np.zeros(steps) if (s.save_avalanche_tree): os.makedirs(s.avalanche_tree_file_path) if (save_giant_component): s.giant_components = np.zeros(s.no_steps) ########################################################################### start = time.time() p.start() tme, size = s.run() print p.finish() defaulting_bank = s.defaulting_bank_no start_at = tme - measure_no_steps + 1 print "Large enough avalanche found at %d of size %d" % (tme, size) print print "Run took %d seconds" % (time.time() - start) print print "Going for the analysis" ########################################################################### ## Actual stuff thats needed dc = DataContainer(dcconfig, str(uuid4()), str(uuid4())) p = Progress(steps) s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist, connection_scatter_moments, seed, start_at, defaulting_bank, avalanche_fraction=avalanche_fraction) nb = dcconfig['model']['no_banks'] s.measured_balances = np.zeros((measure_no_steps, nb)) s.measured_gross_balances = np.zeros((measure_no_steps, nb)) s.degrees = np.zeros((measure_no_steps, nb)) s.no_irs = np.zeros((measure_no_steps, nb)) #s.giant_component = [] s.defaulted_nodes = [] s.irs_pb = [] s.network = np.zeros((nb, nb)) s.irs_creations = np.zeros(steps) s.irs_removals = np.zeros(steps) ################# s.save_degree_distribution = save_degree_distribution s.save_avalanche_progression = save_avalanche_progression s.save_risk_avalanche_time_series = save_risk_avalanche_time_series s.collect_critical_info = save_critical_info s.save_giant_component = save_giant_component s.save_avalanche_tree = save_avalanche_tree s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id if (s.save_avalanche_tree): os.makedirs(s.avalanche_tree_file_path) if (save_giant_component): s.giant_components = np.zeros(s.no_steps) ########################################################################### start = time.time() p.start() tme, size = s.run() p.finish() print print "Large enough avalanche found at %d of size %d" % (tme, size) if s.save_avalanche_progression: print "Saving avalanche progression" file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.avalanche_progressions, fp) pickle.dump(dcconfig, fp) if s.collect_critical_info: print "Critical info" file_path = './simulation_data/critical/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.critical_info, fp) pickle.dump(s.max_default_size_t.tolist(), fp) if (s.save_giant_component): pickle.dump(s.giant_components.tolist(), fp) pickle.dump(dcconfig, fp) if len(connection_scatter_moments) > 0: print "Connection Scatters" file_path = './simulation_data/connection_scatters/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.connection_scatters, fp) if save_dist: file_path = './simulation_data/dists/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.trials, fp) pickle.dump(dcconfig['model']['no_banks'], fp) if (True): os.makedirs("./simulation_data/large_avalanche_data/%s" % dc.aggregate_id) print "Saving stuff" file_path = './simulation_data/large_avalanche_data/%s/degrees.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.degrees.tolist(), fp) file_path = './simulation_data/large_avalanche_data/%s/no_irs.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.no_irs.tolist(), fp) pickle.dump(s.irs_pb, fp) file_path = './simulation_data/large_avalanche_data/%s/balances.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.measured_balances.tolist(), fp) pickle.dump(s.measured_gross_balances.tolist(), fp) #file_path = './simulation_data/large_avalanche_data/%s/gc.bin'%dc.aggregate_id #with file(file_path,'wb') as fp: # pickle.dump(s.giant_component,fp) file_path = './simulation_data/large_avalanche_data/%s/network.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.network.tolist(), fp) file_path = './simulation_data/large_avalanche_data/%s/defaulted.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.defaulted_nodes, fp) file_path = './simulation_data/large_avalanche_data/%s/irs_data.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.irs_creations.tolist(), fp) pickle.dump(s.irs_removals.tolist(), fp) dcconfig['failed_bank'] = s.defaulting_bank_no file_path = './simulation_data/large_avalanche_data/%s/config.json' % dc.aggregate_id with open(file_path, 'w') as fp: json.dump(dcconfig, fp, indent=4) print dc.aggregate_id