def muster_basis_options(self): text = "" lowername = self.method.lower() options = defaultdict(lambda: defaultdict(dict)) options["BASIS"]["ORBITAL"]["value"] = self.basis if self.method in ["ccsd(t)-f12-optri"]: if self.basis == "cc-pvdz-f12": options["BASIS"]["JKFIT"]["value"] = "aug-cc-pvtz/jkfit" options["BASIS"]["JKFITC"]["value"] = self.basis + "/optri" options["BASIS"]["MP2FIT"]["value"] = "aug-cc-pvtz/mp2fit" elif ( ("df-" in self.method) or ("f12" in self.method) or (self.method in ["mp2c", "dft-sapt", "dft-sapt-pbe0acalda"]) ): if self.unaugbasis and self.auxbasis: options["BASIS"]["JKFIT"]["value"] = self.auxbasis + "/jkfit" options["BASIS"]["JKFITB"]["value"] = self.unaugbasis + "/jkfit" options["BASIS"]["MP2FIT"]["value"] = self.auxbasis + "/mp2fit" options["BASIS"]["DFLHF"]["value"] = self.auxbasis + "/jkfit" else: raise ValidationError("""Auxiliary basis not predictable from orbital basis '%s'""" % (self.basis)) return text, options
def default_scheduling_algorithm(self): """ Decide whether we need to schedule our own triggers (if at all) in order to progress to the next mode. This algorithm has been tested against the following simulators: Icarus Verilog """ if not self._terminate and self._writes: if self._mode == Scheduler._MODE_NORMAL: if not self._readwrite.primed: self._readwrite.prime(self.react) elif not self._next_timestep.primed: self._next_timestep.prime(self.react) elif self._terminate: if _debug: self.log.debug("Test terminating, scheduling Timer") for t in self._trigger2coros: t.unprime() for t in [self._readwrite, self._readonly, self._next_timestep, self._timer1, self._timer0]: if t.primed: t.unprime() self._timer1.prime(self.begin_test) self._trigger2coros = collections.defaultdict(list) self._coro2triggers = collections.defaultdict(list) self._terminate = False self._mode = Scheduler._MODE_TERM
def getTrainingContextData(): training_data = OrderedDict() #Initialising the xml parser for the training and test set training_root = initializeXMLParser(dir_path+training_file) #Grabbing one word type at a time for word_type_xml in training_root: word_type = word_type_xml.attrib['item'] training_data[word_type] = defaultdict(lambda: defaultdict(dict)) #Grabbing the instance id and its list of senses for word_instance in word_type_xml: instance = word_instance.attrib['id'] senses = [answer.attrib['senseid'] for answer in word_instance.findall('answer')] pre_context = word_instance.find('context').text.split() post_context = word_instance.find('context').find('head').tail.split() #Pre-processing the pre-context and post context #TODO: Check why this is reducing the accuracy of the model by 1% pre_context = preProcessContextData(pre_context) post_context = preProcessContextData(post_context) training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context } #break;#TODO: Remove this breakpoint. Only testing for one word type right now return training_data
def __init__(self, max_n): """ max_n must be greater than or equal to 2. """ self._max_n = max_n # Maps {n: {ngram_prefix: word_counts}} # ngram_prefix is a tuple of words. # word_counts is a Counter of word to count. self._ngram_word_counts_map = {} # Used to calculate the continuation counts. # For each n, maps a word to a set of ngram_prefix that preceed it. # Maps {n: {word: set(ngram_prefix)}} self._continuations_map = {} # Used to normalize continuation counts into a probability. # Maps {n: set(ngram)} self._ngrams_map = {} # Maps {order: discount} # TODO(dounanshi): calculate discount http://www.riacs.edu/research/technical_reports/TR_pdf/TR_00.07.pdf self._discount_map = {1: .75, 2: .75, 3: .75} # Initialize maps. for i in range(max_n): n = i + 1 self._ngram_word_counts_map[n] = defaultdict(Counter) self._continuations_map[n] = defaultdict(set) self._ngrams_map[n] = set() # Maps {ngram_prefix: count} self._prefix_count_cache = {} # Maps {ngram_prefix: (n1, n2, n3)} self._nvals_cache = {}
def APMTracker(replay): """ Builds ``player.aps`` and ``player.apm`` dictionaries where an action is any Selection, Hotkey, or Ability event. Also provides ``player.avg_apm`` which is defined as the sum of all the above actions divided by the number of seconds played by the player (not necessarily the whole game) multiplied by 60. """ for player in replay.players: player.aps = defaultdict(int) player.apm = defaultdict(int) player.seconds_played = replay.length.seconds for event in player.events: if event.name == 'SelectionEvent' or 'AbilityEvent' in event.name or 'ControlGroup' in event.name: player.aps[event.second] += 1 player.apm[int(event.second/60)] += 1 elif event.name == 'PlayerLeaveEvent': player.seconds_played = event.second if len(player.apm) > 0: player.avg_apm = sum(player.aps.values())/float(player.seconds_played)*60 else: player.avg_apm = 0 return replay
def worker_list(self, include_running=True, **kwargs): self.prune() workers = [ dict( name=worker.id, last_active=worker.last_active, started=getattr(worker, 'started', None), **worker.info ) for worker in self._state.get_active_workers()] workers.sort(key=lambda worker: worker['started'], reverse=True) if include_running: running = collections.defaultdict(dict) num_pending = collections.defaultdict(int) num_uniques = collections.defaultdict(int) for task in self._state.get_pending_tasks(): if task.status == RUNNING and task.worker_running: running[task.worker_running][task.id] = self._serialize_task(task.id, False) elif task.status == PENDING: for worker in task.workers: num_pending[worker] += 1 if len(task.workers) == 1: num_uniques[list(task.workers)[0]] += 1 for worker in workers: tasks = running[worker['name']] worker['num_running'] = len(tasks) worker['num_pending'] = num_pending[worker['name']] worker['num_uniques'] = num_uniques[worker['name']] worker['running'] = tasks return workers
def prune_features(self, clser, min_pos_feature_count, min_neg_feature_count, verbose=False): if verbose: print 'Pruning the features' print features_counts = defaultdict(int) for feat in self.classifiers_features[clser]: for f in feat: features_counts[f] += 1 if verbose: print " Number of features: ", len(features_counts) features_counts = defaultdict(lambda: [0, 0]) for feat, output in zip(self.classifiers_features[clser], self.classifiers_outputs[clser]): output = 0 if output < 0.5 else 1 for f in feat: features_counts[f][output] += 1 remove_features = [] for f in features_counts: negative, positive = features_counts[f] if positive >= min_pos_feature_count + len(f): # keep it continue if negative >= min_neg_feature_count + len(f): # keep it continue # remove the feature since it does not meet the criteria remove_features.append(f) if verbose: print " Number of features occurring less then %d positive times and %d negative times: %d" % \ (min_pos_feature_count, min_neg_feature_count, len(remove_features)) remove_features = set(remove_features) for feat in self.classifiers_features[clser]: feat.prune(remove_features) # count the features again and report the result features_counts = defaultdict(int) for feat in self.classifiers_features[clser]: for f in feat: features_counts[f] += 1 self.classifiers_features_list[clser] = features_counts.keys() self.classifiers_features_mapping[clser] = {} for i, f in enumerate(self.classifiers_features_list[clser]): self.classifiers_features_mapping[clser][f] = i if verbose: print " Number of features after pruning: ", len(features_counts)
def _Symbolize(input): asan_libs = _FindASanLibraries() libraries = collections.defaultdict(list) asan_lines = [] for asan_log_line in [a.strip() for a in input]: m = _ParseAsanLogLine(asan_log_line) if m: libraries[m['library']].append(m) asan_lines.append({'raw_log': asan_log_line, 'parsed': m}) all_symbols = collections.defaultdict(dict) original_symbols_dir = symbol.SYMBOLS_DIR for library, items in libraries.iteritems(): libname = _TranslateLibPath(library, asan_libs) lib_relative_addrs = set([i['rel_address'] for i in items]) info_dict = symbol.SymbolInformationForSet(libname, lib_relative_addrs, True) if info_dict: all_symbols[library]['symbols'] = info_dict for asan_log_line in asan_lines: m = asan_log_line['parsed'] if not m: print asan_log_line['raw_log'] continue if (m['library'] in all_symbols and m['rel_address'] in all_symbols[m['library']]['symbols']): s = all_symbols[m['library']]['symbols'][m['rel_address']][0] print s[0], s[1], s[2] else: print asan_log_line['raw_log']
def edit_quantiles(self,q=.01,quantile_range=False,v=False,write=True): basic.log('creating edit quantiles %s' % self.lang) f_out = basic.create_dir('results/quantiles') df = pd.read_csv(self.db_path) df = self.drop_dups(df) df.page_id = df.page_id.astype(int) if self.drop1: df = df.loc[(df['len'] > 1)] q = np.arange(q,1+q,q) results = defaultdict(dict) for n in self.namespace: results[n] = defaultdict(dict) for r in self.revert: basic.log('%s %s %s' % (self.lang,n,r)) if n == 'at': result = df[r].quantile(q=q) mean = df[r].mean() else: result = df.loc[(df['namespace'] == self.namespace.index(n)),r].quantile(q=q) #qcut = pd.qcut(df.loc[(df['namespace'] == self.namespace.index(n)),r],q) #print(qcut) mean = df.loc[(df['namespace'] == self.namespace.index(n)),r].mean() result = result.to_frame() column = '%s_%s_%s' % (self.lang,n,r) result.columns = [column] results[n][r] = {'quantiles':result,'mean':mean} if write: result = result.append(DataFrame({column:result.loc[(result[column] < int(mean+1))].tail(1).index.values},index=['mean_quantile'])) result = result.append(DataFrame({column:mean},index=['mean_value'])) result.to_csv('%s/%s_%s_%s.csv' % (f_out,self.lang,n,r),encoding='utf-8',index_label='qauntiles') return results
def to_dict(self, default=None): """ Converts sequence of (Key, Value) pairs to a dictionary. >>> type(seq([('a', 1)]).to_dict()) dict >>> seq([('a', 1), ('b', 2)]).to_dict() {'a': 1, 'b': 2} :param default: Can be a callable zero argument function. When not None, the returned dictionary is a collections.defaultdict with default as value for missing keys. If the value is not callable, then a zero argument lambda function is created returning the value and used for collections.defaultdict :return: dictionary from sequence of (Key, Value) elements """ dictionary = {} for e in self.sequence: dictionary[e[0]] = e[1] if default is None: return dictionary else: if hasattr(default, '__call__'): return collections.defaultdict(default, dictionary) else: return collections.defaultdict(lambda: default, dictionary)
def __init__(self, ldg=None): DependencyGraph.__init__(self) self.nodes = defaultdict(lambda: {'address': None, 'ldg': 0, 'gid': 1, #has the same value of the gid of nodes in ldg. 'lemma': None, 'head': None, 'deps': defaultdict(int), 'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()), 'ctag': None, 'tag': None, 'feats': None, }) self.git_list = [1] self.nodes[0].update( {'address': 0, 'head': -1, 'ldg': 'TOP', 'gid': 1, #has the same value of the gid of nodes in ldg. 'remaining_ops': defaultdict(list), } ) if isinstance(ldg, LgGraph): self.nodes[0]['ldg'] = ldg if isinstance(ldg, GraphNet): self.nodes = ldg self.git_list = ldg.get_git_list()
def __init__(self, k, messages): self.k = k # user_id -> (phrase -> [next words]) self.m = defaultdict(lambda : defaultdict(list)) for message in messages: self.read_message(message)
def __init__(self, names, messages): self.names = names self.messages = messages # do some preanalysis # MBU: user_id -> [message] self.messages_by_user = defaultdict(list) # who has liked {{user}}'s messages? # user_id -> (liker -> count) self.likes_per_user = defaultdict(lambda : defaultdict(int)) # who has {{user}} liked? # user_id -> (liked -> count) self.user_likes = defaultdict(lambda : defaultdict(int)) # which words are used most often? # word -> (user_id -> count) self.most_common_words = defaultdict(lambda : defaultdict(int)) # per user, which words are used most often? # user_id -> (word -> count) self.mcw_per_user = defaultdict(lambda : defaultdict(int)) # which users have liked their own posts? # user_id -> count self.self_likes = defaultdict(int) for message in messages: self.read_message(message)
def show_connections(): user=User.query.filter_by(id=session['user_id']).first() if 'linkedin_token' in session: conns = linkedin.get('people/~/connections:(headline,id,first-name,last-name,location,industry,picture-url)') f = open('data.json', 'w') f.write(json.dumps(conns.data, indent=1)) f.close() connections = json.loads(json.dumps(conns.data, indent=1)) # Get an id for a connection. We'll just pick the first one. print len(connections['values']) index = 0 all = list() categorized = defaultdict(list) countdata = defaultdict() for conn in connections['values']: try: #all.append() name = conn['firstName'].encode("utf-8")+' '+conn['lastName'].encode("utf-8") industry = conn['industry'].encode("utf-8") headline = conn['headline'].encode("utf-8") contact = (name, industry, headline) #conn['firstName'].encode("utf-8"), conn['lastName'].encode("utf-8"), conn['id'].encode("utf-8"), , conn['picture-url'].encode("utf-8"), conn['location'].encode("utf-8") all.append(contact) categorized[industry].append(contact) except KeyError: pass index = index+1 for key in categorized: if len(categorized[key])*1000/index > 10 : countdata[key] = len(categorized[key]) print key,countdata[key] return render_template("connections.html", title = 'Connections', all_conn=all, cat_conn=categorized, cat_count = countdata, user=user)
def __init__(self, analysis_files): # The analysis files we gather information from. self.analysis_files = analysis_files # Map from scala source files to the class files generated from that source self.products = defaultdict(set) # Map from scala sources to jar files they depend on. (And, rarely, class files.) self.binary_deps = defaultdict(set) # Map from scala sources to the source files providing the classes that they depend on # The set of source files here does *not* appear to include inheritance! # eg, in src/jvm/com/foursquare/api/util/BUILD:util, # in the source file ClientMetrics, class ClientView extends PrettyEnumeration, but # the file declaring PrettyEnumeration is *not* in the source deps. # But PrettyEnumeration *is* included in the list of classes in external_deps. self.source_deps = defaultdict(set) # Map from scala sources to the classes that they depend on. (Not class files, source files, but just classes. self.external_deps = defaultdict(set) # Map from scala sources to the classes that they provide. (Again, not class files, fully-qualified class names.) self.class_names = defaultdict(set) for c in self.analysis_files: self.parse(c)
def search_all(self, text): candidates = defaultdict(float) for ngram in text_to_ngrams(text, self.size): matches = self.ngrams.get(ngram, None) if not matches: continue total = matches["total"] for name, value in list(matches["name"].items()): candidates[name] += float(value) / total def score(): return { "value": 0.0, } high_scores = defaultdict(score) for name, value in list(candidates.items()): row = self.by_name.get(name, None) key = row["iso2"] if row["sub"]: key += ">" if value > high_scores[key]["value"]: high_scores[key] = { "iso2": row["iso2"], "value": value, "name": name, "sub": row["sub"], } high_scores = sorted(high_scores.values(), key=lambda x: x["value"], reverse=True) return high_scores
def invoke(self, dirname, filenames=set(), linter_configs=set()): """ Main entrypoint for all plugins. Returns results in the format of: {'filename': { 'line_number': [ 'error1', 'error2' ] } } """ retval = defaultdict(lambda: defaultdict(list)) extensions = ' -o '.join(['-name "*%s"' % ext for ext in self.get_file_extensions()]) cmd = 'find %s %s | xargs %s' % ( dirname, extensions, self.get_command( dirname, linter_configs=linter_configs)) result = self.executor(cmd) for line in result.split('\n'): output = self.process_line(dirname, line) if output is not None: filename, lineno, messages = output if filename.startswith(dirname): filename = filename[len(dirname) + 1:] retval[filename][lineno].append(messages) return retval
def generate_te_doping(self, d): types = ['p', 'n'] target = 'seebeck_doping' # root key for getting all temps, etc pf_dict = defaultdict(lambda: defaultdict(int)) zt_dict = defaultdict(lambda: defaultdict(int)) for type in types: for t in d[target][type]: # temperatures outside_pf_array = [] outside_zt_array = [] for didx, tensor in enumerate(d[target][type][t]): # doping idx inside_pf_array = [] inside_zt_array = [] for tidx, val in enumerate(tensor): seebeck = d['seebeck_doping'][type][t][didx][tidx] cond = d['cond_doping'][type][t][didx][tidx] kappa = d['kappa_doping'][type][t][didx][tidx] inside_pf_array.append(seebeck*seebeck*cond) inside_zt_array.append(seebeck*seebeck*cond*t/kappa) outside_pf_array.append(inside_pf_array) outside_zt_array.append(inside_zt_array) pf_dict[type][t] = outside_pf_array zt_dict[type][t] = outside_zt_array return pf_dict, zt_dict
def extract_classifiers(self, das, utterances, verbose=False): # process the training data self.utterances = utterances self.das = das self.utterances_list = self.utterances.keys() self.utterance_fvc = {} self.das_abstracted = {} self.das_category_labels = {} for utt_idx in self.utterances_list: self.utterances[utt_idx] = self.preprocessing.normalise(self.utterances[utt_idx]) self.utterance_fvc[utt_idx] = self.get_fvc(self.utterances[utt_idx]) self.das_abstracted[utt_idx], self.das_category_labels[utt_idx] = \ self.get_abstract_da(self.das[utt_idx],self.utterance_fvc[utt_idx]) # get the classifiers self.classifiers = defaultdict(int) self.classifiers = defaultdict(int) for k in self.utterances_list: for dai in self.das_abstracted[k].dais: self.classifiers[unicode(dai)] += 1 if verbose: if dai.value and 'CL_' not in dai.value: print '=' * 120 print 'Un-abstracted category label value' print '-' * 120 print unicode(self.utterances[k]) print unicode(self.utterance_fvc[k]) print unicode(self.das[k]) print unicode(self.das_abstracted[k])
def metadata_catalog(fits_filenames): "Histogram the metadata values in list of fits files." common, optional = metadata_field_use(fits_filenames) allfields = optional.union(common) histo = collections.defaultdict(int) values = collections.defaultdict(set) for fname in fits_filenames: hdulist = pyfits.open(fname) hdr = hdulist[0].header for field in allfields: if field in hdr: histo[field] += 1 values[field].add(str(hdr[field])) hdulist.close() print('\n', '~'*78) print('Histogram of field use:') pprint(histo) print('\n', '~'*78) max_unique = 0.80 print('Values used (max %s unique values):'%(max_unique)) #! pprint(values) for k,v in values.items(): if float(len(v))/len(fits_filenames) > max_unique: continue print('%8s: %s'%(k,', '.join(v)))
def _getavailablepackfiles(self): """For each pack file (a index/data file combo), yields: (full path without extension, mtime, size) mtime will be the mtime of the index/data file (whichever is newer) size is the combined size of index/data file """ indexsuffixlen = len(self.INDEXSUFFIX) packsuffixlen = len(self.PACKSUFFIX) ids = set() sizes = defaultdict(lambda: 0) mtimes = defaultdict(lambda: []) try: for filename, type, stat in osutil.listdir(self.path, stat=True): id = None if filename[-indexsuffixlen:] == self.INDEXSUFFIX: id = filename[:-indexsuffixlen] elif filename[-packsuffixlen:] == self.PACKSUFFIX: id = filename[:-packsuffixlen] # Since we expect to have two files corresponding to each ID # (the index file and the pack file), we can yield once we see # it twice. if id: sizes[id] += stat.st_size # Sum both files' sizes together mtimes[id].append(stat.st_mtime) if id in ids: yield (os.path.join(self.path, id), max(mtimes[id]), sizes[id]) else: ids.add(id) except OSError as ex: if ex.errno != errno.ENOENT: raise
def add2Tree(tree,id,name,floor,type): type = 'Type-'+type for i, btype in enumerate(tree['item']): if btype['id'] == type: ntype = btype break else: ntype = defaultdict() ntype['text'] = type ntype['id'] = type ntype['item'] = [] tree['item'].append(ntype) floor = 'Floor-' + floor floorid = type + floor for i, bfloor in enumerate(ntype['item']): if bfloor['id'] == floorid: nfloor = bfloor break else: nfloor = defaultdict() nfloor['text'] = floor nfloor['id'] = floorid nfloor['item'] = [] ntype['item'].append(nfloor) leaf = defaultdict() leaf['id']= str(id) + '-' + name leaf['text']= '.'.join(name.split('.')[3:]) nfloor['item'].append(leaf)
def run_merge(filenames): """Merges all Skype databases to a new database.""" dbs = [skypedata.SkypeDatabase(f) for f in filenames] db_base = dbs.pop() counts = collections.defaultdict(lambda: collections.defaultdict(int)) postbacks = Queue.Queue() postfunc = lambda r: postbacks.put(r) worker = workers.MergeThread(postfunc) name, ext = os.path.splitext(os.path.split(db_base.filename)[-1]) now = datetime.datetime.now().strftime("%Y%m%d") filename_final = util.unique_path("%s.merged.%s%s" % (name, now, ext)) print("Creating %s, using %s as base." % (filename_final, db_base)) shutil.copyfile(db_base.filename, filename_final) db2 = skypedata.SkypeDatabase(filename_final) chats2 = db2.get_conversations() db2.get_conversations_stats(chats2) for db1 in dbs: chats = db1.get_conversations() db1.get_conversations_stats(chats) bar_total = sum(c["message_count"] for c in chats) bar_text = " Processing %.*s.." % (30, db1) bar = ProgressBar(max=bar_total, afterword=bar_text) bar.start() args = {"db1": db1, "db2": db2, "chats": chats, "type": "diff_merge_left"} worker.work(args) while True: result = postbacks.get() if "error" in result: print("Error merging %s:\n\n%s" % (db1, result["error"])) worker = None # Signal for global break break # break while True if "done" in result: break # break while True if "diff" in result: counts[db1]["chats"] += 1 counts[db1]["msgs"] += len(result["diff"]["messages"]) msgcounts = sum(c["message_count"] for c in result["chats"]) bar.update(bar.value + msgcounts) if result["output"]: log(result["output"]) if not worker: break # break for db1 in dbs bar.stop() bar.afterword = " Processed %s." % db1 bar.update(bar_total) print if not counts: print("Nothing new to merge.") db2.close() os.unlink(filename_final) else: for db1 in dbs: print("Merged %s in %s from %s." % (util.plural("message", counts[db1]["msgs"]), util.plural("chat", counts[db1]["chats"]), db1)) print("Merge into %s complete." % db2)
def hierarchical(keys): """ Iterates over dimension values in keys, taking two sets of dimension values at a time to determine whether two consecutive dimensions have a one-to-many relationship. If they do a mapping between the first and second dimension values is returned. Returns a list of n-1 mappings, between consecutive dimensions. """ ndims = len(keys[0]) if ndims <= 1: return True dim_vals = list(zip(*keys)) combinations = (zip(*dim_vals[i : i + 2]) for i in range(ndims - 1)) hierarchies = [] for combination in combinations: hierarchy = True store1 = defaultdict(list) store2 = defaultdict(list) for v1, v2 in combination: if v2 not in store2[v1]: store2[v1].append(v2) previous = store1[v2] if previous and previous[0] != v1: hierarchy = False break if v1 not in store1[v2]: store1[v2].append(v1) hierarchies.append(store2 if hierarchy else {}) return hierarchies
def as_coefficients_dict(a): """Return a dictionary mapping terms to their Rational coefficient. Since the dictionary is a defaultdict, inquiries about terms which were not present will return a coefficient of 0. If an expression is not an Add it is considered to have a single term. Examples ======== >>> from sympy.abc import a, x >>> (3*x + a*x + 4).as_coefficients_dict() {1: 4, x: 3, a*x: 1} >>> _[a] 0 >>> (3*a*x).as_coefficients_dict() {a*x: 3} """ d = defaultdict(list) for ai in a.args: c, m = ai.as_coeff_Mul() d[m].append(c) for k, v in d.iteritems(): if len(v) == 1: d[k] = v[0] else: d[k] = Add(*v) di = defaultdict(int) di.update(d) return di
def findSubstring(self, s, words): """ :type s: str :type words: List[str] :rtype: List[int] """ result, m, n, k = [], len(s), len(words), len(words[0]) if m < n*k: return result lookup = collections.defaultdict(int) for i in words: lookup[i] += 1 # Space: O(n * k) for i in xrange(m+1-k*n): # Time: O(m) cur, j = collections.defaultdict(int), 0 while j < n: # Time: O(n) word = s[i+j*k:i+j*k+k] # Time: O(k) if word not in lookup: break cur[word] += 1 if cur[word] > lookup[word]: break j += 1 if j == n: result.append(i) return result
def bench_b(power_list): n_samples, n_features = 1000, 10000 data_params = {'n_samples': n_samples, 'n_features': n_features, 'tail_strength': .7, 'random_state': random_state} dataset_name = "low rank matrix %d x %d" % (n_samples, n_features) ranks = [10, 50, 100] if enable_spectral_norm: all_spectral = defaultdict(list) all_frobenius = defaultdict(list) for rank in ranks: X = make_low_rank_matrix(effective_rank=rank, **data_params) if enable_spectral_norm: X_spectral_norm = norm_diff(X, norm=2, msg=False) X_fro_norm = norm_diff(X, norm='fro', msg=False) for n_comp in [np.int(rank/2), rank, rank*2]: label = "rank=%d, n_comp=%d" % (rank, n_comp) print(label) for pi in power_list: U, s, V, _ = svd_timing(X, n_comp, n_iter=pi, n_oversamples=2, power_iteration_normalizer='LU') if enable_spectral_norm: A = U.dot(np.diag(s).dot(V)) all_spectral[label].append(norm_diff(X - A, norm=2) / X_spectral_norm) f = scalable_frobenius_norm_discrepancy(X, U, s, V) all_frobenius[label].append(f / X_fro_norm) if enable_spectral_norm: title = "%s: spectral norm diff vs n power iteration" % (dataset_name) plot_power_iter_vs_s(power_iter, all_spectral, title) title = "%s: frobenius norm diff vs n power iteration" % (dataset_name) plot_power_iter_vs_s(power_iter, all_frobenius, title)
def reload(self, data): self.data = data self.data = [row for row in self.data if row["lang"] == self.lang] self.by_name = {} for row in self.data: self.by_name[row["name"]] = { "iso2":row["iso2"], "sub":row["sub"], } def ngram_dict(): return { "name": defaultdict(float), "total": 0.0 } names = defaultdict(list) for row in self.data: iso2 = row["iso2"] name = row["name"] names[iso2].append(name) self.ngrams = defaultdict(ngram_dict) for iso2, name_list in list(names.items()): weight = 1.0 / len(name_list) for name in name_list: for ngram in text_to_ngrams(name, self.size): self.ngrams[ngram]["name"][name] += weight self.ngrams[ngram]["total"] += weight
def get_context_data(self, **kwargs): context = super(BugmailStatsView, self).get_context_data(**kwargs) json_stats = cache.get(self.cache_key) if not json_stats: wks_ago = (now() - timedelta(days=14)).date() stats = BugmailStat.objects.stats_for_range(wks_ago) stats_dict = { BugmailStat.TOTAL: defaultdict(int), BugmailStat.USED: defaultdict(int), } for s in stats: stats_dict[s.stat_type][date_to_js(s.date)] += s.count all_stats = { 'total': [], 'used': [], 'x_axis': [], } stats_total = stats_dict[BugmailStat.TOTAL] stats_used = stats_dict[BugmailStat.USED] for d in date_range(wks_ago): d = date_to_js(d) all_stats['x_axis'].append(d) all_stats['total'].append([d, stats_total[d]]) all_stats['used'].append([d, stats_used[d]]) json_stats = json.dumps(all_stats) cache.set(self.cache_key, json_stats, 1800) # 30 minutes context['stats'] = json_stats return context
def __init__(self): self.log = SimLog("cocotb.scheduler") if _debug: self.log.setLevel(logging.DEBUG) # A dictionary of pending coroutines for each trigger, # indexed by trigger self._trigger2coros = collections.defaultdict(list) # A dictionary of pending triggers for each coroutine, indexed by coro self._coro2triggers = collections.defaultdict(list) # Our main state self._mode = Scheduler._MODE_NORMAL # A dictionary of pending writes self._writes = {} self._pending_coros = [] self._pending_callbacks = [] self._pending_triggers = [] self._pending_threads = [] self._pending_events = [] # Events we need to call set on once we've unwound self._terminate = False self._test_result = None self._entrypoint = None self._main_thread = threading.current_thread() # Select the appropriate scheduling algorithm for this simulator self.advance = self.default_scheduling_algorithm self._is_reacting = False