def discover_from_www(settings): """ Builds a Munin dashboard structure (domain/host/plugins) by reading the HTML files rather than listing the cache folder because the later is likely to contain old data """ # delayed import since this function should not be used in the "normal" case try: from bs4 import BeautifulSoup except ImportError: try: from BeautifulSoup import BeautifulSoup except ImportError: print "Please install BeautifulSoup to use this program" print " pip install beautifulsoup4 or easy_install beautifulsoup4" sys.exit(1) folder = settings.paths['www'] print "Reading Munin www cache: ({0})".format(folder) with open(os.path.join(folder, "index.html")) as f: root = BeautifulSoup(f.read()) domains = root.findAll("span", {"class": "domain"}) # hosts and domains are at the same level in the tree so let's open the file for domain in domains: with open(os.path.join(folder, domain.text, "index.html")) as f: domain_root = BeautifulSoup(f.read()) links = domain_root.find(id="content").findAll("a") progress_bar = ProgressBar(len(links), title=domain.text) for link in links: progress_bar.update() elements = link.get("href").split("/") if len(elements) < 2 \ or elements[0].startswith("..") \ or elements[-1].startswith("index"): continue if len(elements) == 2: host, plugin = elements[0], elements[1] elif len(elements) == 3: # probably a multigraph, we'll be missing the plugin part # we won't bother reading the html file for now and guess it from the RRD database later host, plugin = elements[0], ".".join(elements[1:3]) else: print "Unknown structure" continue plugin = plugin.replace(".html", "") settings.domains[domain.text].hosts[host].plugins[plugin].is_multigraph = (len(elements) == 3) settings.domains[domain.text].hosts[host].plugins[plugin].settings = { 'graph_title': link.text, } settings.nb_plugins += 1 return settings
def make_pdb(self, bar_msg=''): """ Returns a pdb-like formatted string. bar_msg is a string with message to show at ProgressBar initialization. bar_msg = '' disables the bar. :param bar_msg: str :return: str """ models = self.models() if bar_msg: bar = ProgressBar(len(models), bar_msg) else: bar = None if len(models) == 1: s = self.__repr__() else: s = '' for m in models: s += 'MODEL%9i\n' % m[0].model s += m.__repr__() s += '\nENDMDL\n' if bar: bar.update() if bar: bar.done(False) return s
def _preprocess(self): self.lang = Lang() for text in self._texts: self.lang.index_text(text) for text in self._texts: indexes = indexes_from_text(self.lang, text) indexes.append(EOT_token) padded_indexes = pad_indexes(indexes, self._max_text_length, PAD_token) self._indexed_texts.append(padded_indexes) self._indexed_texts = np.stack(self._indexed_texts, axis=0) bar = ProgressBar(len(self._audio_files) - 1, unit='') for (audio_files_read, audio_file) in enumerate(self._audio_files): # (n_mels, T), (1+n_fft/2, T) mel, mag = compute_spectrograms(audio_file) padded_mel = pad_time_dim(mel, self._max_audio_length, 0) padded_mag = pad_time_dim(mag, self._max_audio_length, 0) self._mels.append(padded_mel.transpose()) self._mags.append(padded_mag.transpose()) bar.update(audio_files_read) self._mels = np.stack(self._mels, axis=0) self._mags = np.stack(self._mags, axis=0)
def fit(self, train_df, regressors=None): print("Fitting...") progress_bar = ProgressBar(len(train_df.columns)) for item in train_df.columns: self.models[item] = Prophet( yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, **self.prophet_config) target = train_df[item].dropna() if self.use_boxcox: idx = target.index target, self.lmbda_boxcox[item] = boxcox(target) target = pd.Series(target, index=idx) target.index.name = "ds" target.name = "y" if self.country_holidays is not None: self.models[item].add_country_holidays(country_name=self.country_holidays) if regressors is not None: target = pd.merge(target, regressors, left_index=True, right_index=True, how="left") for reg in regressors.columns: self.models[item].add_regressor(reg) target = target.reset_index() self.models[item].fit(target) progress_bar.update() progress_bar.finish() return self.models
def generate(self): progress_bar = ProgressBar(self.settings.nb_rrd_files) self.add_header(self.settings) for domain in self.settings.domains: for host in self.settings.domains[domain].hosts: row = self.add_row("{0} / {1}".format(domain, host)) for plugin in self.settings.domains[domain].hosts[ host].plugins: _plugin = self.settings.domains[domain].hosts[ host].plugins[plugin] panel = row.add_panel( _plugin.settings["graph_title"] or plugin, plugin) for field in _plugin.fields: query = panel.add_query(field) if "label" in _plugin.fields[field].settings: query.alias = _plugin.fields[field].settings[ "label"] progress_bar.update() panel.width = 12 // self.settings.grafana['graph_per_row'] panel.process_graph_settings(_plugin.settings) panel.process_graph_thresholds(_plugin.fields) panel.process_graph_types(_plugin.fields)
def train(self, ts, cm, batchsz=1): self.model.train() start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) pg = ProgressBar(steps) cm.reset() total_loss = 0 for i in range(steps): self.optimizer.zero_grad() si = shuffle[i] x, y = self._batch(ts, si, batchsz) pred = self.model(x) loss = self.crit(pred, y) total_loss += loss.data[0] loss.backward() self._add_to_cm(cm, y, pred) self.optimizer.step() pg.update() pg.done() duration = time.time() - start_time total_corr = cm.get_correct() total = cm.get_total() print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (float(total_loss) / total, total_corr, total, float(total_corr) / total, duration)) print(cm)
def predict(self, steps): print("Forecasting...") progress_bar = ProgressBar(len(self.models.items())) self.fcst_ds = pd.date_range( start=self.train_ds.min(), freq="D", periods=len(self.train_ds)+steps)[-365:] for item, model in self.models.items(): pred = model.predict( exogenous=fourier( steps, seasonality=self.seasonality, n_terms=self.n_fourier_terms), n_periods=steps, return_conf_int=True, alpha=(1.0 - self.confidence_interval)) self.fcst[item] = pd.DataFrame( {"yhat":pred[0], "yhat_lower":pred[1][:,0], "yhat_upper":pred[1][:,1]}, index=self.fcst_ds) if self.use_boxcox: self.fcst[item] = inv_boxcox( self.fcst[item], self.lmbda_boxcox[item]) progress_bar.update() progress_bar.finish() return pd.concat(self.fcst, axis=1)
def parallel_test(model_cls, model_kwargs, checkpoint, dataset, data_func, gpus, worker_per_gpu=1): ctx = multiprocessing.get_context('spawn') idx_queue = ctx.Queue() result_queue = ctx.Queue() num_workers = len(gpus) * worker_per_gpu workers = [ ctx.Process(target=worker_func, args=(model_cls, model_kwargs, checkpoint, dataset, data_func, gpus[i % len(gpus)], idx_queue, result_queue)) for i in range(num_workers) ] for w in workers: w.daemon = True w.start() for i in range(len(dataset)): idx_queue.put(i) results = {} prog_bar = ProgressBar(task_num=len(dataset)) for _ in range(len(dataset)): img_id, res = result_queue.get() results[img_id] = format_ret(res) prog_bar.update() print('\n') for worker in workers: worker.terminate() return results
def train(self, ts, batchsz): self.model.train() start_time = time.time() steps = int(math.floor(len(ts) / float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) total_loss = total = 0 pg = ProgressBar(steps) for i in range(steps): self.optimizer.zero_grad() si = shuffle[i] ts_i = data.batch(ts, si, batchsz, long_tensor_alloc, tensor_shape, tensor_max) src, dst, tgt = self._wrap(ts_i) pred = self.model((src, dst)) loss = self.crit(pred, tgt) total_loss += loss.data[0] loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip) total += self._total(tgt) self.optimizer.step() pg.update() pg.done() duration = time.time() - start_time avg_loss = float(total_loss) / total print('Train (Loss %.4f) (Perplexity %.4f) (%.3f sec)' % (avg_loss, np.exp(avg_loss), duration))
def plot_traj(trajs, fig_size=(6, 6), color="mediumpurple", size=5, title='', is_plot_line=False, od_only=False, offset=None): """plot the traj """ if offset is None: offset = [0, 0] p = ProgressBar(len(trajs), '绘制轨迹图') plt.figure(figsize=fig_size) for i in range(len(trajs)): p.update(i) traj = np.array(trajs[i]) if od_only: traj = [traj[0], traj[-1]] x = [x[0] + np.random.uniform(-offset[0], offset[0]) for x in traj] y = [y[1] + np.random.uniform(-offset[1], offset[1]) for y in traj] if od_only: if is_plot_line: plt.plot(x[0], y[0], c=color) plt.plot(x[1], y[1], c="yellowgreen") plt.scatter(x[0], y[0], c=color, s=size) plt.scatter(x[1], y[1], c="yellowgreen", s=size) else: if is_plot_line: plt.plot(x, y, c=color) plt.scatter(x, y, c=color, s=size) plt.title(title) plt.show()
def rmsd_matrix(self, msg=''): """ Calculates rmsd matrix with no fitting for all pairs od models in trajectory. :return: np.array """ def rmsd(m1, m2, ml): return np.sqrt(np.sum((m1 - m2)**2) / ml) model_length = len(self.template) models = self.coordinates.reshape(-1, model_length, 3) dim = len(models) result = np.zeros((dim, dim)) if msg: bar = ProgressBar((dim * dim - dim) / 2, msg=msg) else: bar = None for i in range(dim): for j in range(i + 1, dim): if bar: bar.update() result[i, j] = result[j, i] = rmsd(models[i], models[j], model_length) if bar: bar.done(True) return result
def load_images( path, preprocessor=None, limit=None, ): images = [] images_id = next(os.walk(path))[2] size = limit if limit != None else len(images_id) print(f"Loading {size} images") prog = ProgressBar(100, size) for id in range(size): name = images_id[id] filename = path + "/" + name image = load_img(filename, target_size=(224, 224)) image = img_to_array(image) image = image.reshape( (1, image.shape[0], image.shape[1], image.shape[2])) if preprocessor != None: image = preprocessor.preprocess_input(image) image_id = name.split('.')[0] images.append([image_id, image]) prog.update(id) print("Loading complete") return images
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER): """ Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation Converts all *.rrd files in source folder """ assert os.path.exists(source) try: os.makedirs(destination) except OSError as e: if e.errno != errno.EEXIST: raise filelist = [("", os.path.join(source, file)) for file in os.listdir(source) if file.endswith(".rrd")] nb_files = len(filelist) progress_bar = ProgressBar(nb_files) print("Exporting {0} RRD databases:".format(nb_files)) for domain, file in filelist: src = os.path.join(source, domain, file) dst = os.path.join( destination, "{0}-{1}".format(domain, file).replace(".rrd", ".xml")) progress_bar.update() code = subprocess.check_call(['rrdtool', 'dump', src, dst]) return nb_files
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER): """ Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation Converts all *.rrd files in source folder """ assert os.path.exists(source) try: os.makedirs(destination) except OSError as e: if e.errno != errno.EEXIST: raise filelist = [("", os.path.join(source, file)) for file in os.listdir(source) if file.endswith(".rrd")] nb_files = len(filelist) progress_bar = ProgressBar(nb_files) print "Exporting {0} RRD databases:".format(nb_files) for domain, file in filelist: src = os.path.join(source, domain, file) dst = os.path.join(destination, "{0}-{1}".format(domain, file).replace(".rrd", ".xml")) progress_bar.update() code = subprocess.check_call(['rrdtool', 'dump', src, dst]) return nb_files
def train(self, ts, cm, dropout, batchsz=1): total_loss = 0 start_time = time.time() steps = int(math.floor(len(ts)/float(batchsz))) shuffle = np.random.permutation(np.arange(steps)) pg = ProgressBar(steps) cm.reset() for i in range(steps): si = shuffle[i] ts_i = data.batch(ts, si, batchsz) feed_dict = self.model.ex2dict(ts_i, 1.0-dropout) _, step, summary_str, lossv, guess = self.sess.run([self.train_op, self.global_step, self.summary_op, self.loss, self.model.best], feed_dict=feed_dict) self.train_writer.add_summary(summary_str, step) total_loss += lossv cm.add_batch(ts_i.y, guess) pg.update() pg.done() total = cm.get_total() total_corr = cm.get_correct() duration = time.time() - start_time print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (float(total_loss)/total, total_corr, total, float(total_corr)/total, duration)) print(cm)
def train(self, ts): self.model.train() start_time = time.time() steps = int(len(ts)) shuffle = np.random.permutation(np.arange(steps)) total_loss = total_corr = total = 0 pg = ProgressBar(steps) for i in range(steps): self.optimizer.zero_grad() si = shuffle[i] src, dst, tgt = self._wrap(ts[si]) pred = self.model((src, dst)) loss = self.crit(pred, tgt) total_loss += loss.data[0] loss.backward() total_corr += self._right(pred, tgt) total += self._total(tgt) self.optimizer.step() pg.update() pg.done() duration = time.time() - start_time avg_loss = float(total_loss) / total print( 'Train (Loss %.4f) (Perplexity %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (avg_loss, np.exp(avg_loss), total_corr, total, float(total_corr) / total, duration))
def stability_derivatives(self): prog = ProgressBar('Instantiating Stability Object') derivatives = StabilityDerivatives(u=self.initial_trim_case.u, w=self.initial_trim_case.w, q=0, theta_f=self.initial_trim_case.fuselage_tilt, collective_pitch=self.initial_trim_case.collective_pitch, longitudinal_cyclic=self.initial_trim_case.longitudinal_cyclic) prog.update(100) return derivatives
def specific_entropy(self): """ Allows look-up of reference Specific Entropy `s` of the ambient flow depending on the ambient static temperature :pr:attr:`engine_in.ambient.t_static` and ambient static pressure `py:attr:`engine_in.ambient.p_static`. If a cached value is already present in the look-up table then the online calculator won't be accessed. Otherwise, this process takes some time as a request to the server has to be made. :return: Specific Entropy in SI Joule per kilogram Kelvin [J/kg K] :rtype: float """ prog = ProgressBar('Fetching Entropy Value') temp = self.engine_in.ambient.t_static - 273.15 pres = self.engine_in.ambient.p_static / 1000. tol = {'temperature': 10., 'pressure': 1.} # Tolerances for cache search procedure # Opening cache as read-only file to look-up stored values with open(DIRS['ENTROPY_TABLE_DIR']) as cache: entries = cache.readlines()[1:] valid_entries = [] for entry in entries: try: cached_temp, cached_pres, cached_s0 = entry.split('\t') cached_temp, cached_pres, cached_s0 = float(cached_temp), float(cached_pres), float(cached_s0) temp_error, pres_error = temp - cached_temp, pres - cached_pres if temp_error < tol['temperature'] or pres_error < tol['pressure']: # [0] Temperature [1] Pressure [2] Entropy [3] Squared Error valid_entries += [(cached_temp, cached_pres, cached_s0, (temp_error + pres_error)**2)] except Exception as e: raise e if len(valid_entries) == 0: # No valid entries found online database must be accessed with requests.Session() as session: prog.update(25, 'Loading Session') calculator_url = 'http://www.irc.wisc.edu/properties/' data = {'units': 'International', 'fluid': 'Dry Air', 'parameter1': 'T', # State 1 = Temperature 'parameter2': 'P', # State 2 = Pressure 'state1': '{}'.format(temp), # Degrees Celcius 'state2': '{}'.format(pres), # Absolute Pressure in kPa 'calculate': 'Calculate Properties'} prog.update(35, 'Posting Request to Database') response = session.post(calculator_url, data=data) prog.update(90, 'Parsing Response') result_hdr = ('<!-- Calculation Results -->', '</table') # Tuple of start [0] end [1] strings raw = response.text.split(result_hdr[0])[-1].split(result_hdr[1])[0] filtered = raw.replace('\t', '').replace('\n', '').split('Entropy: ')[-1].split(' J')[0] s0 = float(filtered) # Entropy in J/kg K # Writing value to cache and returning with open(DIRS['ENTROPY_TABLE_DIR'], 'a') as cache: cache.write('\n{:.10f}\t{:.10f}\t{:.10f}'.format(temp, pres, s0)) prog.update(100, 'Complete') return s0 else: closest_match = sorted(valid_entries, key=lambda x: x[-1])[0] # Returns entry with minimum sq. Error prog.update(100, 'Complete') return closest_match[2]
def main(argv=None): with tf.Session() as sess: data_dir = FLAGS.data_dir files = [os.path.join(data_dir, item) for item in os.listdir(data_dir)] # files = random.sample(files, 800) images = tf.placeholder(tf.float32, [None, RESIZE_FINAL, RESIZE_FINAL, 3]) logits = inference( images, False, num_classes=2, num_blocks=[3, 4, 6, 3], # defaults to 50-layer network use_bias=False, # defaults to using batch norm bottleneck=True) init = tf.global_variables_initializer() resnet_variables = tf.global_variables() saver = tf.train.Saver(resnet_variables) saver.restore(sess, os.path.join(FLAGS.model_dir, FLAGS.ckpt_file)) softmax_output = tf.nn.softmax(logits) if FLAGS.target: print('Creating output file %s' % FLAGS.target) output = open(os.path.join(FLAGS.data_dir, FLAGS.target), 'w') writer = csv.writer(output) writer.writerow(('file', 'label', 'score')) num_batches = int(math.ceil(len(files)) / MAX_BATCH_SZ) pg = ProgressBar(num_batches) # try: for j in range(num_batches): start_offset = j * MAX_BATCH_SZ end_offset = min((j + 1) * MAX_BATCH_SZ, len(files)) batch_image_files = files[start_offset:end_offset] images_ = [] for file in batch_image_files: print file image_buffer = tf.read_file(file) bbox = [] image = image_preprocessing(image_buffer, [], False) images_.append(image) image_batch = tf.stack(images_) batch_results = sess.run(softmax_output, feed_dict={images: image_batch.eval()}) batch_sz = batch_results.shape[0] for i in range(batch_sz): output_i = batch_results[i] best_i = np.argmax(output_i) best_choice = (label_list[best_i], output_i[best_i]) if writer is not None: f = batch_image_files[i] writer.writerow( (f, best_choice[0], '%.2f' % best_choice[1])) pg.update() pg.done()
def import_from_xml_folder(self, folder): raise DeprecationWarning # build file list and grouping if necessary file_list = os.listdir(folder) grouped_files = defaultdict(list) errors = [] progress_bar = ProgressBar(len(file_list)) for file in file_list: fullname = os.path.join(folder, file) parts = file.replace(".xml", "").split("-") series_name = ".".join(parts[0:-2]) if self.settings.influxdb['group_fields']: grouped_files[series_name].append((parts[-2], fullname)) else: grouped_files[".".join([series_name, parts[-2]])].append(('value', fullname)) if self.settings.interactive: show = raw_input("Would you like to see the prospective series and columns? y/[n]: ") or "n" if show in ("y", "Y"): for series_name in sorted(grouped_files): print(" - {2}{0}{3}: {1}".format(series_name, [name for name, _ in grouped_files[series_name]], Color.GREEN, Color.CLEAR)) print("Importing {0} XML files".format(len(file_list))) for series_name in grouped_files: data = [] keys_name = ['time'] values = defaultdict(list) for field, file in grouped_files[series_name]: progress_bar.update() keys_name.append(field) content = read_xml_file(file) [values[key].append(value) for key, value in content.items()] # join data with time as first column data.extend([[k]+v for k, v in values.items()]) try: pass # self.upload_values(series_name, keys_name, data) except Exception as e: errors.append(str(e)) continue try: self.validate_record(series_name, keys_name) except Exception as e: errors.append("Validation error in {0}: {1}".format(series_name, e)) if errors: print("The following errors were detected while importing:") for error in errors: print(" {0} {1}".format(Symbol.NOK_RED, error))
def write2files(init_path, file_list, D, write_file_num=14650): """写入文件 """ rand_ind = random.sample([i for i in range(len(file_list))], write_file_num) # 随机抽取轨迹 p2 = ProgressBar(write_file_num, '写入文件') for i in range(write_file_num): p2.update(i) with open(init_path + file_list[rand_ind[i]], 'w') as f2: for step in D[rand_ind[i]]: f2.writelines(str(step[0]) + ',' + str(step[1]) + '\n')
def extract_features(images, model): features = dict() count = 0 prog = ProgressBar(100, len(images)) for id, image in images: feature = model.predict(image, verbose=0) features[id] = feature count += 1 prog.update(count) return features
def markov_model(trajectory, N, epsilon): """basic description detailed description Args: trajectory: 轨迹数据(二维数组) N : 二级网格数 epsilon : 隐私预算 Returns: O_: 中间点转移概率矩阵 """ O_ = np.zeros([N, N]) # 建立N*N的转移概率矩阵 for t in trajectory: O_0 = np.zeros([N, N]) for i in range(len(t) - 1): curr_point = t[i] next_point = t[i + 1] O_0[curr_point][next_point] += 1 O_0 = O_0 / (len(t) - 1) # 该轨迹的转移概率 O_ += O_0 line_all = [] p = ProgressBar(N, '建立中间点转移概率矩阵') for i in range(N): p.update(i) score = 0 for j in range(N): # 添加拉普拉斯噪声 # sensitivity = 1 # randomDouble = random.random() - 0.5 # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log( # 1 - 2 * abs(randomDouble)) noise = np.random.laplace(0, 1 / epsilon) # noise = 0.00000000000000000000000001 O_[i][j] += noise if O_[i][j] < 0: O_[i][j] = 0 score += O_[i][j] line_all.append(score) # compute X,归一 for i in range(N): O_[i] /= line_all[i] sns.heatmap(data=O_, square=True) plt.show() return O_
def one_run(projects_train, projects_test, K, outlier_threshold, granularity): rmse_failed_run = [] rmse_success_run = [] rmse_run = [] accuracy_run = [] relative_time = np.linspace(0.025, 1, 20) bar = ProgressBar(end_value=len(relative_time), text="Time steps", count=True) bar.start() for i, rel_t in enumerate(relative_time): # Data t = int(rel_t * 999) samples = subsample(t, granularity) t = len(samples) T = 999 # Remove outliers projects_train_filtered = [p for p in projects_train if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)] projects_test_filtered = [p for p in projects_test if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)] X_train = np.ndarray(shape=(len(projects_train_filtered), t), buffer=np.array([p.money[samples] for p in projects_train_filtered]), dtype=float) y_train = np.expand_dims(np.array([p.money[T] for p in projects_train_filtered]), axis=1) X_test = np.ndarray(shape=(len(projects_test_filtered), t), buffer=np.array([p.money[samples] for p in projects_test_filtered]), dtype=float) y_test = np.expand_dims(np.array([p.money[T] for p in projects_test_filtered]), axis=1) #X_max = np.max(X_train, axis=0) #X_train = X_train / X_max[np.newaxis, :] #X_test = X_test / X_max[np.newaxis, :] # Hyperparameters beta = 0.0001 epsilon = 1e0 lam = 0 iterations = 50 random_restarts = None mls = LeastSquaresMixture(X_train, y_train, K=K, beta=beta, lam=lam, iterations=iterations, epsilon=epsilon, random_restarts=random_restarts) mls.train(verbose=False) #print(mls) rmse_failed, rmse_success, rmse, accuracy = mls.evaluate(X_test, y_test, verbose=False) rmse_failed_run.append(rmse_failed) rmse_success_run.append(rmse_success) rmse_run.append(rmse) accuracy_run.append(accuracy) bar.update(i) print(accuracy_run) return rmse_failed_run, rmse_success_run, rmse_run, accuracy_run
def run(self, mode, X, Y, batch_size, optimizer=None, clip=None): self.reset_states(batch_size) if optimizer: self.train(True) else: self.eval() nbatches = X.size(0) // batch_size pb = ProgressBar(mode, self.epoch, nbatches) _total_time = 0 _total_loss = 0 _total_word = 0 L = nn.CrossEntropyLoss(size_average=False) for index in range(nbatches): begin = index * batch_size end = begin + batch_size # Start if optimizer: start = time.time() x = Variable(X[begin:end], requires_grad=False) t = Variable(Y[begin:end], requires_grad=False) else: start = time.time() x = Variable(X[begin:end], requires_grad=False, volatile=True) t = Variable(Y[begin:end], requires_grad=False, volatile=True) y = self(x) loss = L(y, t.view(-1)) if optimizer: if clip: torch.nn.utils.clip_grad_norm(self.parameters(), clip) self.zero_grad() loss.backward() optimizer.step() # End time_per_batch = time.time() - start _total_time += time_per_batch _total_loss += loss.cpu().data.numpy()[0] _total_word += float(numpy.prod(t.size())) pb.update([('ppl', numpy.exp(_total_loss / _total_word), lambda x: x), ('wps', _total_word / _total_time, lambda x: x)]) print return numpy.exp(_total_loss / _total_word), _total_word / _total_time
def get_all_ranking(save_to): from utils import ProgressBar fout = open(save_to, 'w') images = Image.objects.all() progress = ProgressBar(len(images) * len(images), 20) for target in images: searcher = Searcher(target) searcher.run() results = [] for _, image in searcher.results: results.append((image.origin_id, len(results))) progress.update() results.sort() print >> fout, ' '.join(str(x) for _, x in results) print('Finished. Written to file "{}"'.format(save_to))
def trip_distribution(trajectory, N, epsilon): """ 获取行程分布 Args: trajectory: 轨迹数据(二维数组) N : 二级网格数 epsilon : 隐私预算 Returns: R: 转移概率矩阵 """ R = np.zeros((N, N)) # 每个格子建立转移概率矩阵 for t in trajectory: if len(t) > 1: sta = t[0] end = t[-1] R[sta][end] += 1 else: print(t) count = np.sum(R) print(count) p = ProgressBar(N, '建立转移概率矩阵') for i in range(N): p.update(i) for j in range(N): # 添加拉普拉斯噪声 # sensitivity = 1 # randomDouble = random.random() - 0.5 # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log( # 1 - 2 * abs(randomDouble)) noise = np.random.laplace(0, 1 / epsilon) R[i][j] += noise if R[i][j] < 0: R[i][j] = 0 # 是否计算加完噪声后的|D|, 存疑 # count += R[i][j] R /= count return R
def markov_model(trajectory, N, epsilon): """ 马尔可夫模型 Args: trajectory: 轨迹数据(二维数组) N : 二级网格数 epsilon : 隐私预算 Returns: O_: 中间点转移概率矩阵 """ O_ = np.zeros((N, N)) # 建立 N*N 的转移概率矩阵 for t in trajectory: O_sub = np.zeros((N, N)) for i in range(len(t) - 1): curr_point = t[i] next_point = t[i + 1] O_sub[curr_point][next_point] += 1 O_sub /= (len(t) - 1) # 该轨迹的转移概率 O_ += O_sub p = ProgressBar(N, '生成中间点转移概率矩阵') for i in range(N): p.update(i) for j in range(N): noise = np.random.laplace(0, 1 / epsilon) # 添加拉普拉斯噪声 O_[i][j] += noise if O_[i][j] < 0: O_[i][j] = 0 # compute X row_sum = [sum(O_[i]) for i in range(N)] for j in range(N): O_[j] /= row_sum[j] # 绘制矩阵热力图 sns.heatmap(data=O_, square=True) plt.title('mobility model construction matrix (epsilon=%s)' % str(used_pair[0])) plt.show() return O_
def spectrogram2wav(spectrogram): ''' spectrogram: [t, f], i.e. [t, nfft // 2 + 1] ''' spectrogram = spectrogram.T # [f, t] X_best = copy.deepcopy(spectrogram) # [f, t] bar = ProgressBar(hp.n_iter, unit='') for i in range(hp.n_iter): bar.update(i) X_t = invert_spectrogram(X_best) est = librosa.stft( X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length) # [f, t] phase = est / np.maximum(1e-8, np.abs(est)) # [f, t] X_best = spectrogram * phase # [f, t] X_t = invert_spectrogram(X_best) return np.real(X_t)
def predict(self, steps, freq="D", regressors=None): print("Forecasting...") progress_bar = ProgressBar(len(self.models.items())) for item, model in self.models.items(): future = model.make_future_dataframe(steps, freq=freq).set_index("ds") if regressors is not None: future = pd.merge(future, regressors, left_index=True, right_index=True, how="left") pred = model.predict(future.reset_index()).set_index("ds") pred = pred[["yhat", "yhat_lower", "yhat_upper"]] self.fcst[item] = pred if self.use_boxcox: self.fcst[item] = inv_boxcox( self.fcst[item], self.lmbda_boxcox[item]) progress_bar.update() progress_bar.finish() fcst_df = pd.concat(self.fcst, axis=1).sort_index(axis=1) return fcst_df
def route_length_estimate(trajectory, A, lo, hi, epsilon, sensitivity): """ 轨迹长度估计 Args: Returns: """ C = A * A L_matrix = [[] for _ in range(C)] # L矩阵 L_array = [] for t in trajectory: lenT = len(t) if lenT > hi: continue if lenT < 2 or lo > lenT: continue row = t[0] col = t[-1] l_index = row * A + col # 转一维坐标 L_matrix[l_index].append(lenT) p = ProgressBar(C, '计算轨迹中值长度矩阵') for i in range(C): p.update(i) score_arr = [] K = L_matrix[i].copy() # 取一种头尾轨迹的所有轨迹长 K.sort() # 顺序排序 if len(K) < 1: L_array.append(0) continue m_index = len(K) / 2 # 中值下标 for j in range(len(K)): score_arr.append(-abs(j - m_index)) # 得分函数 r_index = exp_mechanism(score_arr, len(K), epsilon, sensitivity) # print(K, '--->', K[r_index]) L_array.append(K[r_index]) return L_array
def trip_distribution(trajectory, N, epsilon): """ 获取行程分布 Args: trajectory: 轨迹数据(二维数组) N : 二级网格数 epsilon : 隐私预算 Returns: R: 转移概率矩阵 """ R = np.zeros((N, N)) # 每个格子建立转移概率矩阵 for t in trajectory: if len(t) > 1: sta = t[0] end = t[-1] R[sta][end] += 1 count = 0 p = ProgressBar(N, '建立转移概率矩阵') for i in range(N): p.update(i) for j in range(N): # 添加拉普拉斯噪声 sensitivity = 1 randomDouble = random.random() - 0.5 noise = -(sensitivity / epsilon) * signum(randomDouble) * math.log( 1 - 2 * abs(randomDouble)) R[i][j] += noise if R[i][j] < 0: R[i][j] = 0 count += R[i][j] R /= count return R
def export_to_xml(settings): progress_bar = ProgressBar(settings.nb_rrd_files) try: os.makedirs(settings.paths['xml']) except OSError as e: if e.errno != errno.EEXIST: raise for domain, host, plugin, field in settings.iter_fields(): _field = settings.domains[domain].hosts[host].plugins[plugin].fields[field] if _field.rrd_found: progress_bar.update() code = subprocess.check_call(['rrdtool', 'dump', _field.rrd_filename, _field.xml_filename]) if code == 0: _field.rrd_exported = True return progress_bar.current
def generate(self): progress_bar = ProgressBar(self.settings.nb_rrd_files) self.add_header(self.settings) for domain in self.settings.domains: for host in self.settings.domains[domain].hosts: row = self.add_row("{0} / {1}".format(domain, host)) for plugin in self.settings.domains[domain].hosts[host].plugins: _plugin = self.settings.domains[domain].hosts[host].plugins[plugin] panel = row.add_panel(_plugin.settings["graph_title"] or plugin, plugin) for field in _plugin.fields: query = panel.add_query(field) if "label" in _plugin.fields[field].settings: query.alias = _plugin.fields[field].settings["label"] progress_bar.update() panel.width = 12//self.settings.grafana['graph_per_row'] panel.process_graph_settings(_plugin.settings) panel.process_graph_thresholds(_plugin.fields) panel.process_graph_types(_plugin.fields)
def export_to_xml(settings): progress_bar = ProgressBar(settings.nb_rrd_files) try: os.makedirs(settings.paths['xml']) except OSError as e: if e.errno != errno.EEXIST: raise for domain, host, plugin, field in settings.iter_fields(): _field = settings.domains[domain].hosts[host].plugins[plugin].fields[ field] if _field.rrd_found: progress_bar.update() code = subprocess.check_call( ['rrdtool', 'dump', _field.rrd_filename, _field.xml_filename]) if code == 0: _field.rrd_exported = True return progress_bar.current
def one_run(projects_train, projects_test, outlier_threshold): rmse_run = [] accuracy_run = [] relative_time = np.linspace(0.025, 1, 20) bar = ProgressBar(end_value=len(relative_time), text="Time steps", count=True) bar.start() for i, rel_t in enumerate(relative_time): # n_samples = 1 # t0 = 1 # t1 = 500 # samples = subsample(t0, t1, n_samples) samples = rel_t * 1000 - 1 t = 1 T = 999 ARD = False projects_train = [p for p in projects_train if p.money[T] * p.goal < outlier_threshold and p.money[samples] * p.goal < outlier_threshold] projects_test = [p for p in projects_test if p.money[T] * p.goal < outlier_threshold and p.money[samples] * p.goal < outlier_threshold] X_train = np.ndarray(shape=(len(projects_train), t), buffer=np.array([p.money[samples] * p.goal for p in projects_train]), dtype=float) y_train = np.expand_dims(np.array([p.money[T] * p.goal for p in projects_train]), axis=1) X_test = np.ndarray(shape=(len(projects_test), t), buffer=np.array([p.money[samples] * p.goal for p in projects_test]), dtype=float) y_test = np.expand_dims(np.array([p.money[T] * p.goal for p in projects_test]), axis=1) kernel = GPy.kern.RBF(input_dim=t, ARD=ARD) m = GPy.models.GPRegression(X_train, y_train, kernel) m.optimize() rmse, accuracy = evaluate(X_test, y_test, projects_test, m) rmse_run.append(rmse) accuracy_run.append(accuracy) bar.update(i) return rmse_run, accuracy_run
def discover_from_rrd(settings, insert_missing=True, print_missing=False): """ Builds a Munin dashboard structure (domain/host/plugins) by listing the files in the RRD folder http://munin-monitoring.org/wiki/MuninFileNames: /var/lib/munin/SomeGroup/foo.example.com-cpu-irq-d.rrd --------- --------------- --- --- - | | | | `-- Data type (a = absolute, c = counter, d = derive, g = gauge) | | | `----- Field name / data source: 'irq' | | `--------- Plugin name: 'cpu' | `------------------- Node name: 'foo.example.com' `-------------------------------- Group name: 'SomeGroup' """ folder = settings.paths['munin'] print "Reading Munin RRD cache: ({0})".format(folder) not_inserted = defaultdict(dict) for domain in os.listdir(folder): if not os.path.isdir(os.path.join(folder, domain)): #domains are represented as folders continue if not insert_missing and not domain in settings.domains: #skip unknown domains (probably no longer wanted) continue files = os.listdir(os.path.join(folder, domain)) progress_bar = ProgressBar(len(files), title=domain) for filename in files: progress_bar.update() path = os.path.join(folder, domain, filename) if os.path.isdir(path) or not path.endswith(".rrd"): # not a RRD database continue parts = os.path.splitext(filename)[0].split('-') length = len(parts) if(length < 4): print "Error:", filename, parts, length continue host, plugin, field, datatype = parts[0], ".".join(parts[1:-2]), parts[-2], parts[-1] if not insert_missing and (not host in settings.domains[domain].hosts or not plugin in settings.domains[domain].hosts[host].plugins): if not host in not_inserted[domain]: not_inserted[domain][host] = set() not_inserted[domain][host].add(plugin) continue plugin_data = settings.domains[domain].hosts[host].plugins[plugin] try: assert os.path.exists(os.path.join(folder, domain, "{0}-{1}-{2}-{3}.rrd".format(host, plugin.replace(".", "-"), field, datatype[0]))) except AssertionError: print "{0} != {1}-{2}-{3}-{4}.rrd".format(filename, host, plugin, field, datatype[0]) plugin_data.fields[field].rrd_found = False else: plugin_data.fields[field].rrd_found = True plugin_data.fields[field].rrd_filename = os.path.join(settings.paths['munin'], domain, filename) plugin_data.fields[field].xml_filename = os.path.join(settings.paths['xml'], domain, filename.replace(".rrd", ".xml")) plugin_data.fields[field].settings = { "type": DATA_TYPES[datatype] } settings.nb_fields += 1 if print_missing and len(not_inserted): print "The following plugin databases were ignored" for domain, hosts in not_inserted.items(): print " - Domain {0}:".format(domain) for host, plugins in hosts.items(): print " {0} Host {1}: {2}".format(Symbol.NOK_RED, host, ", ".join(plugins)) return settings
def import_from_xml(self): print("\nUploading data to InfluxDB:") progress_bar = ProgressBar(self.settings.nb_rrd_files*3) # nb_files * (read + upload + validate) errors = [] def _upload_and_validate(measurement, tags, fields, packed_values): try: self.write_series(measurement, tags, fields, packed_values) except Exception as e: errors.append((Symbol.NOK_RED, "Error writing {0} to InfluxDB: {1}".format(measurement, e))) return finally: progress_bar.update(len(fields)-1) # 'time' column ignored try: self.validate_record(measurement, fields) except Exception as e: errors.append((Symbol.WARN_YELLOW, "Validation error in {0}: {1}".format(measurement, e))) finally: progress_bar.update(len(fields)-1) # 'time' column ignored try: assert self.client and self.valid except: raise Exception("Not connected to a InfluxDB server") else: print(" {0} Connection to database \"{1}\" OK".format(Symbol.OK_GREEN, self.settings.influxdb['database'])) if self.settings.influxdb['group_fields']: """ In "group_fields" mode, all fields of a same plugin (ex: system, user, nice, idle... of CPU usage) will be represented as columns of the same time series in InfluxDB. Schema will be: +----------------------+-------+----------+----------+-----------+ | time_series_name | col_0 | col_1 | col_2 | col_3 ... | +----------------------+-------+----------+----------+-----------+ | domain.host.plugin | time | metric_1 | metric_2 | metric_3 | | acadis.org.tesla.cpu | time | system | user | nice | | ... | | | | | +----------------------+-------+----------+----------+-----------+ """ for domain, host, plugin in self.settings.iter_plugins(): _plugin = self.settings.domains[domain].hosts[host].plugins[plugin] measurement = plugin tags = { "domain": domain, "host": host, "plugin": plugin } if _plugin.is_multigraph: tags["is_multigraph"] = True print(host, plugin) field_names = ['time'] values = defaultdict(list) values_with_time = [] for field in _plugin.fields: _field = _plugin.fields[field] if _field.rrd_exported: field_names.append(field) try: content = read_xml_file(_field.xml_filename) except Exception as e: errors.append((Symbol.WARN_YELLOW, "Could not read file for {0}: {1}".format(field, e))) else: [values[key].append(value) for key, value in content.items()] # keep track of influxdb storage info to allow 'fetch' _field.influxdb_measurement = measurement _field.influxdb_field = field _field.xml_imported = True # update progress bar [###### ] 42 % progress_bar.update() # join data with time as first column values_with_time.extend([[k]+v for k, v in values.items()]) _upload_and_validate(measurement, tags, field_names, values_with_time) else: # non grouping """ In "non grouped" mode, all fields of a same plugin will have a dedicated time series and the values will be written to a 'value' column Schema will be: +-----------------------------+-------+-------+ | time_series_name | col_0 | col_1 | +-----------------------------+-------+-------+ | domain.host.plugin.metric_1 | time | value | | domain.host.plugin.metric_2 | time | value | | acadis.org.tesla.cpu.system | time | value | | ... | | | +-----------------------------+-------+-------+ """ for domain, host, plugin, field in self.settings.iter_fields(): _field = self.settings.domains[domain].hosts[host].plugins[plugin].fields[field] if not _field.rrd_exported: continue measurement = field tags = { "domain": domain, "host": host, "plugin": plugin } field_names = ['time', 'value'] values = defaultdict(list) values_with_time = [] _field.influxdb_measurement = measurement _field.influxdb_field = 'value' content = read_xml_file(_field.xml_filename) [values[key].append(value) for key, value in content.items()] _field.xml_imported = True progress_bar.update() # join data with time as first column values_with_time.extend([[k]+v for k, v in values.items()]) _upload_and_validate(measurement, tags, field_names, values_with_time) for error in errors: print(" {} {}".format(error[0], error[1]))