def karate_test_scenario(deepwalk_path): y_path = '../../local_resources/zachary_karate/y.p' x_path = '../../local_resources/zachary_karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['deepwalk'], ['logistic']] x_deepwalk = pd.read_csv(deepwalk_path, index_col=0) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) X = [x_deepwalk.values, normalize(x, axis=0)] n_folds = 10 results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/karate/deepwalk_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/karate/deepwalk_micro_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/karate/deepwalk_macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/karate/deepwalk_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def karate_scenario(): deepwalk_path = 'local_resources/zachary_karate/size8_walks1_len10.emd' y_path = 'local_resources/zachary_karate/y.p' x_path = 'local_resources/zachary_karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['logistic'], ['deepwalk']] x_deepwalk = utils.read_embedding(deepwalk_path, target) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) X = [x_deepwalk, normalize(x, axis=0)] n_folds = 2 results = run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results) results, tests = utils.stats_test(all_results) tests[0].to_csv('results/karate/deepwalk_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('results/karate/deepwalk_micro_pvalues' + utils.get_timestamp() + '.csv') print 'macro', results[0] print 'micro', results[1] macro_path = 'results/karate/deepwalk_macro' + utils.get_timestamp( ) + '.csv' micro_path = 'results/karate/deepwalk_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def generate_political_blogs_embedding(): import visualisation s = datetime.datetime.now() y_path = '../../local_resources/political_blogs/y.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/polblogs/' walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=1.0, save_path=log_path, epochs=5, concurrent_steps=4) path = '../../local_resources/political_blogs/embeddings/Win' + '_' + utils.get_timestamp() + '.csv' embedding_in, embedding_out = HCE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/political_blogs/figs/poincare_polar_Win' + '_' + utils.get_timestamp() + '.pdf') visualisation.plot_poincare_embedding(embedding_out, y, '../../results/political_blogs/figs/poincare_polar_Wout' + '_' + utils.get_timestamp() + '.pdf') df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/political_blogs/embeddings/Wout' + '_' + utils.get_timestamp() + '.csv', sep=',') print('political blogs sample generated in: ', datetime.datetime.now() - s) political_blogs_scenario(path) return path
def blogcatalog_121_scenario(embedding_path): target_path = '../../local_resources/blogcatalog_121_sample/y.p' feature_path = '../../local_resources/blogcatalog_121_sample/X.p' hyperbolic = pd.read_csv(embedding_path, index_col=0).values paths = ['../../local_resources/blogcatalog_121_sample/blogcatalog2.emd'] sizes = [128] [deepwalk], y = read_embeddings(paths, target_path, sizes) names = [['logistic'], ['deepwalk'], ['hyp embedding']] x = utils.read_pickle(feature_path) # y = utils.read_pickle(target_path) X = [x, deepwalk, hyperbolic] n_folds = 2 results = run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/blogcatalog_121_sample/macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/blogcatalog_121_sample/micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def generate_blogcatalog_cartesian_embedding(): import visualisation s = datetime.datetime.now() y_path = '../../local_resources/blogcatalog/y.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/blogcatalog_cartesian/final_throw1' walk_path = '../../local_resources/blogcatalog/p025_q025_d128_walks.csv' size = 128 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10, initial_learning_rate=0.2, save_path=log_path, epochs=5, concurrent_steps=12) path = '../../local_resources/blogcatalog/embeddings/Win_cartesian' + '_' + utils.get_timestamp() + '.csv' embedding_in, embedding_out = HCE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/blogcatalog/figs/poincare_Win_cartesian' + '_' + utils.get_timestamp() + '.pdf') visualisation.plot_poincare_embedding(embedding_out, y, '../../results/blogcatalog/figs/poincare_Wout_cartesian' + '_' + utils.get_timestamp() + '.pdf') df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/blogcatalog/embeddings/Wout_cartesian' + '_' + utils.get_timestamp() + '.csv', sep=',') print('blogcatalog cartesian embedding generated in: ', datetime.datetime.now() - s) return path
def tf_train100000_emd_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_train100000.tsv' # feature_path = '../../local_resources/features_train100000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) del rf_features.index.name emd = pd.read_csv('../../local_results/tf_train_100000.emd', header=None, index_col=0, skiprows=1, sep=" ") # emd = pd.read_csv('../../local_results/tf_train_100000.emd', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(feature_path, emd) all_feat = features.join(emd) X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) names = np.array([['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'], ['L2 just emd', 'L1 just emd', 'RF just emd']]) n_folds = 10 results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/neural/tf_macro_train100000' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/neural/tf_micro_train100000' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def run_scenario(folder, embedding_path): y_path = '../../local_resources/{}/y.p'.format(folder) x_path = '../../local_resources/{}/X.p'.format(folder) sizes = [2, 4, 8, 16, 32, 64, 128] deepwalk_embeddings = [] deepwalk_names = [] dwpath = '../../local_resources/{0}/{1}'.format(folder, folder) for size in sizes: path = dwpath + str(size) + '.emd' de = pd.read_csv(path, header=None, index_col=0, skiprows=1, sep=" ") de.sort_index(inplace=True) deepwalk_embeddings.append(de.values) deepwalk_names.append(['deepwalk' + str(size)]) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['hyperbolic'], ['logistic']] names = deepwalk_names + names embedding = pd.read_csv(embedding_path, index_col=0) X = deepwalk_embeddings + [embedding.values, normalize(x, axis=0)] n_folds = 10 results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/{0}/pvalues{1}.csv'.format(folder, utils.get_timestamp())) tests[1].to_csv('../../results/{0}/pvalues{1}.csv'.format(folder, utils.get_timestamp())) print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/{0}/macro{1}.csv'.format(folder, utils.get_timestamp()) micro_path = '../../results/{0}/micro{1}.csv'.format(folder, utils.get_timestamp()) results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def compare_embeddings(): emd_reps = 10 # number of times to generate the embeddings det_reps = 10 # number of times to repeat the classification train_size = 4 # number of training examples size = 2 # the number of dimensions to embed walks = pd.read_csv('local_resources/zachary_karate/walks1_len10_p1_q1.csv', header=None).values p1 = Params(batch_size=4, embedding_size=size, neg_samples=5, skip_window=3, num_pairs=1500, logging_interval=100, initial_learning_rate=0.2) p2 = Params(batch_size=4, embedding_size=size, neg_samples=8, skip_window=3, num_pairs=1500, logging_interval=100, initial_learning_rate=0.2) param_arr = [p1, p2] elems, unigrams = np.unique(walks, return_counts=True) names = ['neg5', 'neg8'] results = [] for name, params in zip(names, param_arr): result = generate_embeddings(name, emd_reps, det_reps, params, walks, unigrams, train_size) results.append(result) means, tests = utils.array_stats_test(results) tests[0].to_csv('results/karate/tf_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('results/karate/tf_micro_pvalues' + utils.get_timestamp() + '.csv') print('results', means) means_path = 'results/karate/tf_means' + utils.get_timestamp() + '.csv' means.to_csv(means_path, index=True) all_results = utils.merge_results(results) macro_path = 'results/karate/tf_macro' + utils.get_timestamp() + '.csv' micro_path = 'results/karate/tf_micro' + utils.get_timestamp() + '.csv' all_results[0].to_csv(macro_path, index=True) all_results[1].to_csv(micro_path, index=True)
def generate_plot(self, filter_label): timestamps = [get_timestamp(t['created_at']) for t in self.source.iter_tweets()] bins = self.get_time_bins(timestamps) feature = self.source.get_stats(include_tweets=True) filter_stats = feature.get(filter_label, None) if not filter_stats: raise Exception("Could not get statistics for filter %s. Wrong filter label?" % filter_label) for k, feat in enumerate(filter_stats): # Gather the different timestamps ttweets = filter_stats[feat] tally = [0] * self.steps timestamps = [float(get_timestamp(t['created_at'])) for t in ttweets] # Do some sort of histograms with it for ts in timestamps: j = self.get_bin(ts, bins)#index = int(ts/step) tally[j] = tally[j] + 1 #tally, bins = np.histogram(timestamps, bins=steps) #bins = .5*(bins[1:]+bins[:-1])+starttime plt.plot(bins, tally, color=self.colors[k]) dates = ["%s:%s" %(get_date(a).hour, get_date(a).minute) for j,a in enumerate(bins) if j%(self.steps/20) == 0] for i, date in enumerate(dates): split = date.split(':') if len(split[1]) == 1: dates[i] = split[0] + ':0' + split[1] plt.ylabel('Number of Tweets') plt.xticks([b for j,b in enumerate(bins) if j%(self.steps/20)==0], dates, rotation=90) plt.legend(filter_stats.keys(),loc=1)
def blogcatalog_deepwalk_node2vec(): paths = [ 'local_resources/blogcatalog/blogcatalog128.emd', 'local_resources/blogcatalog/blogcatalog_p025_q025_d128.emd' ] names = [['logistic_p1_q1'], ['logistic_p025_q025']] y_path = 'local_resources/blogcatalog/y.p' detectors = [classifiers_embedded_128, classifiers_embedded_128] sizes = [128, 128] X, y = read_embeddings(paths, y_path, sizes) n_folds = 5 results = run_all_datasets(X, y, names, detectors, n_folds) all_results = utils.merge_results(results) results = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = 'results/blogcatalog/macro_deepwalk_node2vec' + utils.get_timestamp( ) + '.csv' micro_path = 'results/blogcatalog/micro_deepwalk_node2vec' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def generate_karate_embedding(): import visualisation y_path = '../../local_resources/karate/y.p' targets = utils.read_pickle(y_path) y = np.array(targets['cat']) log_path = '../../local_resources/tf_logs/run4/' walk_path = '../../local_resources/karate/walks_n1_l10.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=0.1, initial_learning_rate=1.0, save_path=log_path, epochs=10, concurrent_steps=1) path = '../../local_resources/karate/embeddings/tf_Win_polar' + '_' + utils.get_timestamp() + '.csv' embedding_in, embedding_out = HE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/karate/figs/poincare_polar_Win' + '_' + utils.get_timestamp() + '.pdf') visualisation.plot_poincare_embedding(embedding_out, y, '../../results/karate/figs/poincare_polar_Wout' + '_' + utils.get_timestamp() + '.pdf') df_in = pd.DataFrame(data=embedding_in, index=range(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=range(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/karate/embeddings/tf_Wout_polar' + '_' + utils.get_timestamp() + '.csv', sep=',') return path
def test_embeddings(): feature_path = '../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) emd = pd.read_csv('../local_resources/hyperbolic_embeddings/tf_test1.csv', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(rf_features) features = features.loc[emd.index, :] y = y.loc[emd.index].values names = np.array([['RF just emd']]) n_folds = 10 classifiers = [ RandomForestClassifier(max_depth=2, n_estimators=50, bootstrap=True, criterion='entropy', max_features=0.1, n_jobs=1) ] results = run_all_datasets([emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = 'tf_testing_1in10000' + utils.get_timestamp() + '.csv' micro_path = 'tf_micro_1in10000' + utils.get_timestamp() + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True) assert results[0]['mean'].values > 0.6
def add_item(self, dirname, parentName=None, top=True): ''' Recursive function to add items to the File Tree Viewer. @dirname: current directory being parsed @parentName: Name of the parent item in tree @top: Specifies if a top level directory ''' dirs = os.listdir(dirname) if top is True: for name in dirs: path = os.path.join(dirname, name) #get metadata information timestamp = u.get_timestamp(path) fs = u.get_fileSize(path) self.fileTree.insert("", 0, path, text=name, values=(timestamp, fs)) if os.path.isdir(path) is True: self.add_item(path, path, False) else: for name in dirs: path = os.path.join(dirname, name) #get metadata information timestamp = u.get_timestamp(path) fs = u.get_fileSize(path) self.fileTree.insert(parentName, 0, path, text=name, values=(timestamp, fs)) if os.path.isdir(path) is True: self.add_item(path, path, False)
def nips_experiment_runner(module, folder, learning_rate): """ runs the experiments on small graphs submitted to NIPS and MLG :param module: The module for the relevant type of embeddings eg. HE for Hyperbolic Embedding :return: None """ from visualisation import plot_lines_from_df names = ['football', 'adjnoun', 'polbooks', 'political_blogs', 'karate'] # names = ['karate'] for name in names: embedding_path = run_embedding(name, learning_rate, run_scenario=False, module=module) mean_path = '../../results/all/{}_means_{}.csv'.format( name, utils.get_timestamp()) error_path = '../../results/all/{}_errors_{}.csv'.format( name, utils.get_timestamp()) means, errors = MLD.run_test_train_split_scenario(name, embedding_path) means.to_csv(mean_path) errors.to_csv(error_path) outpath = '../../results/all/lineplots/{}/{}_{}.pdf'.format( folder, name, utils.get_timestamp()) plot_lines_from_df(name, mean_path, error_path, outpath)
def simulated_tree_scenario(branching_factor, levels): import visualisation folder = '../../local_resources/simulated_trees' deepwalk_path = '../../local_resources/simulated_trees/deepwalk_z{}_l{}.emd'.format( branching_factor, levels) walk_path = '../../local_resources/simulated_trees/walks_long_z{}_l{}.emd'.format( branching_factor, levels) emb_path = create_adj_mat(folder, branching_factor, levels) generate_simulated_tree(emb_path, walk_path, deepwalk_path) deepwalk_emd = pd.read_csv(deepwalk_path, header=None, index_col=0, skiprows=1, sep=" ") s = datetime.datetime.now() # y_path = '../../local_resources/blogcatalog_121_sample/y.p' # y = utils.read_pickle(y_path) y = generate_y(branching_factor, levels) log_path = '../../local_resources/tf_logs/sim_tree/' # walk_path = '../../local_resources/simulated_trees/walks.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=0.1, initial_learning_rate=1.0, save_path=log_path, epochs=20, concurrent_steps=4) path = '../../local_resources/simulated_trees/embeddings/Win' + '_' + utils.get_timestamp( ) + '.csv' embedding_in, embedding_out = HE.main(params) visualisation.plot_deepwalk_embedding( deepwalk_emd.values, y, '../../results/simulated_trees/figs/deepwalk_z{}_l{}_{}.pdf'.format( branching_factor, levels, utils.get_timestamp())) visualisation.plot_poincare_embedding( embedding_in, y, '../../results/simulated_trees/figs/hyp_z{}_l{}_{}.pdf'.format( branching_factor, levels, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') return path
def run_embedding(folder, learning_rate, run_scenario=True, module=HE): """ Generate an embeddings for a given graph :param folder: the name of the folder and also the graph :param run_scenario: True if cv results are required :param module: An alias for the module containing the specific embedding :return: the path to the embedding """ import visualisation s = datetime.datetime.now() y_path = '../../local_resources/{}/y.p'.format(folder) targets = utils.read_pickle(y_path) y = np.array(targets['cat']) log_path = '../../local_resources/tf_logs/run1/' walk_path = '../../local_resources/{}/walks_n1_l10.csv'.format(folder) size = 4 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=learning_rate, save_path=log_path, epochs=5, concurrent_steps=4) path = '../../local_resources/{0}/embeddings/Win_{1}.csv'.format( folder, utils.get_timestamp()) embedding_in, embedding_out = module.main(params) visualisation.plot_poincare_embedding( embedding_in, y, '../../results/all/embedding_figs/{}_Win_{}.pdf'.format( folder, utils.get_timestamp())) visualisation.plot_poincare_embedding( embedding_out, y, '../../results/all/embedding_figs/{}_Wout_{}.pdf'.format( folder, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv('../../local_resources/{0}/embeddings/Wout_{1}.csv'.format( folder, utils.get_timestamp()), sep=',') print('{} embedding generated in: '.format(folder), datetime.datetime.now() - s) if run_scenario: MLD.run_scenario(folder, path) return path
def _check_data_validity(self): if self.data is None: raise HDError(domain='HDORMObject', errno=HDORMObject.ERR_NO_DATA, msg=u'Set data first to check data.') if '_id' not in self.data: self.data['_id'] = self._generate_internal_id() if not self.data.has_key('_ts'): self.data['_ts'] = utils.get_timestamp() if not self.data.has_key('_cts'): self.data['_cts'] = utils.get_timestamp()
def fill_db(self): """ Add some data to the database. """ ts_start = get_timestamp(DAY_STRING1) ts_end = get_timestamp(DAY_STRING2) for timestamp in range(ts_start, ts_end, 600): sender = random.randint(0, MAX_USERS) receiver = random.randint(0, MAX_USERS) sent_sum = random.randint(0, MAX_SUM) manager.add_transaction(sender, receiver, sent_sum, timestamp)
def start(name=None): global TIMERS if name is None: name = 'timer-%s' % get_timestamp() TIMERS[name] = { 'start': get_timestamp(), 'active': True, } return name
def follow(follower, followee, pipe=None): """Create a connection between follower and followee.""" if pipe is None: pipe = conn.pipeline() execute = True else: execute = False pipe.zadd(utils.get_followers_key(followee), follower, utils.get_timestamp()) pipe.zadd(utils.get_followees_key(follower), followee, utils.get_timestamp()) if execute: pipe.execute()
def gensim_1in10000_emd_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) emd = pd.read_csv('../../local_results/customer.emd', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(rf_features) # select only the data points that we have embeddings for features = features.loc[emd.index, :] y = y.loc[emd.index].values all_feat = features.join(emd, how='inner') print 'input features shape', all_feat.shape X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) # names = np.array( # [['L2 without emd'], ['L2 with emd']]) names = np.array([['L2 without emd'], ['L2 with emd'], ['L2 just emd']]) # names = np.array( # [['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'], # ['L2 just emd', 'L1 just emd', 'RF just emd']]) # names = np.array([['without MF'], ['with MF']]) n_folds = 5 # np.random.seed(42) clf = LogisticRegression(multi_class='ovr', penalty='l2', solver='liblinear', n_jobs=1, max_iter=1000, C=0.005) df = run_repetitions([X1, X2, emd.values], y, clf, names, reps=10) print df # results = run_all_datasets([X1, X2], y, names, [clf], n_folds) results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def update_backend(self, hostname, backend_name, state, status_code, status_text): backend = self.get_backend(hostname, backend_name) if backend == {}: server = self._get_server(hostname) server[BACKEND_KEY].append({'name': backend_name, 'state': state, 'status_code': status_code, 'status_text': status_text, 'timestamp': utils.get_timestamp()}) else: backend['state'] = state backend['status_code'] = status_code backend['status_text'] = status_text backend['timestamp'] = utils.get_timestamp()
def batch_size_scenario(): """ Generate embeddings using different batch sizes for the ~1000 vertex polblogs network :return: """ import visualisation s = datetime.datetime.now() y_path = '../../local_resources/political_blogs/y.p' x_path = '../../local_resources/political_blogs/X.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/polblogs/' walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv' size = 2 # dimensionality of the embedding batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128] embeddings = [] for batch_size in batch_sizes: params = Params(walk_path, batch_size=batch_size, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=0.1, save_path=log_path, epochs=5, concurrent_steps=4) path = '../../local_resources/political_blogs/embeddings/Win_batch_{}_{}.csv'.format( batch_size, utils.get_timestamp()) embedding_in, embedding_out = HCE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/political_blogs/figs/poincare_polar_Win_batch_{}_{}.pdf'.format( batch_size, utils.get_timestamp())) visualisation.plot_poincare_embedding(embedding_out, y, '../../results/political_blogs/figs/poincare_polar_Wout_batch_{}_{}.pdf'.format( batch_size, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/political_blogs/embeddings/Wout_batch_{}_{}.csv'.format( batch_size, utils.get_timestamp()), sep=',') print('political blogs embedding generated in: ', datetime.datetime.now() - s) embeddings.append(embedding_in) x, y = utils.read_data(x_path, y_path, threshold=0) names = [[str(batch_size)] for batch_size in batch_sizes] n_folds = 10 results = run_detectors.run_all_datasets(embeddings, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/political_blogs/batch_size_macro' + utils.get_timestamp() + '.csv' micro_path = '../../results/political_blogs/batch_size_micro' + utils.get_timestamp() + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True) return path
def get_balance(user, since, until): """ Computes a user's balance over a given time interval. @param user - integer representing user id @param since - string representing the start date @param until - string representing the end date """ start_ts = get_timestamp(since) end_ts = get_timestamp(until) balance = -Transaction.objects( Q(sender=user) & Q(timestamp__gte=start_ts) & Q(timestamp__lt=end_ts)).sum('amount') return balance
def snowflake(datacenterId, workerId, epoch): global lastTimestamp, sequence, sequenceMask datacenterId = datacenterId & datacenterIdBits workerId = workerId & workerIdBits timestamp = get_timestamp() if (timestamp < lastTimestamp): raise "Clock moved backwards" if (timestamp == lastTimestamp): sequence = (sequence + 1) & sequenceMask if (sequence == 0): timestamp = til_next_millis(lastTimestamp) else: sequence = 0 lastTimestamp = timestamp timestamp = timestamp - (int(epoch*1000)) guoidValue = (timestamp << timestampLeftShift) |\ (datacenterId << datacenterIdShift) |\ (workerId << workerIdShift) |\ sequence return guoidValue
def _setup_logger(): """Configure logger. Returns: logging instance. """ log_format = '%(levelname)-8s %(message)s' if not os.path.isdir(_Logger._log_dir): os.makedirs(_Logger._log_dir) filename = '%s/pyser_%s.log' % (_Logger._log_dir, utils.get_timestamp()) logging.basicConfig(level=logging.DEBUG, format=log_format, filename=filename, filemode='w') if _Logger.verbose: console = logging.StreamHandler() console.setLevel(logging.DEBUG) console.setFormatter(logging.Formatter(log_format)) logging.getLogger('').addHandler(console) ret = logging.getLogger('pyser') return ret
def login_sso(request, data, db: Session): session_id = data["session_id"] user_id = int(str(session_id).split("@")[0]) user_info = user_manager.get_user_info_by_user_id(user_id, db) if not user_info: return api_response_data("error_user_not_existed") client_ids = application_manager.get_list_enable_sso_client_ids(db) if data["client_id"] not in client_ids: return api_response_data("error_application_not_enable_sso") sso_session_obj = session_manager.get_session_by_user_id(user_id, db) if not sso_session_obj: return api_response_data("error_sso_session_not_found") if sso_session_obj.expired_time < get_timestamp(): return api_response_data("error_sso_session_expired") token_obj = login_manager.get_token_obj( data["app_id"], data["client_id"], data["client_secret"], data["redirect_url"], user_info, data["algorithm"], db ) return api_response_data("success", {"token_obj": token_obj})
def update_stats_db(cursor): params = { 'time': get_timestamp('%Y/%m/%d'), 'wa_nation_num': data['wa_nation_num'], 'perc_wa_regional_nation_num': data['perc_wa_to_region'], 'perc_wa_ns_wa': data['perc_wa_to_ns_wa'], 'regional_nation_num': data['regional_nation_num'], 'perc_regional_nation_num_ns': data['perc_regional_nation_num_ns'], 'ns_nation_num': data['ns_nation_num'], 'perc_max_in_endo_num_wa': data['perc_max_in_endo_num_wa'], 'crs_avg_in_endo_num': data['crs_avg_in_endo_num'], 'SPCG_avg_in_endo_num': data['SPCG_avg_in_endo_num'], 'crsdel_num': data['crsdel_num'], 'perc_crsdel_num_wa': data['perc_crsdel_num_wa'], 'endo_num': data['endo_num'], 'density_num': data['density_num'], 'wa_nations': ",".join(data.nx_graph.nodes) } query_str = """INSERT INTO stats VALUES (:time, :wa_nation_num, :perc_wa_regional_nation_num, :perc_wa_ns_wa, :regional_nation_num, :perc_regional_nation_num_ns, :ns_nation_num, :perc_max_in_endo_num_wa, :crs_avg_in_endo_num, :SPCG_avg_in_endo_num, :crsdel_num, :perc_crsdel_num_wa, :endo_num, :density_num, :wa_nations)""" cursor.execute(query_str, params) logger.info('Updated stats table')
def ping_send(self): """Send the initial ping message to the client at a certain interval. Ping mechanism (S for server, C for client, t-i for i-th timestamp): packet 0: S->C, t-0 packet 1: C->S, t-0 + t-1 packet 2: S->C, t-1 In this way, both server and client get the round-trip latency. Packet format (before encryption): "1" (1 byte) (type flag for ping) seq (1 byte) (0, 1 or 2) timestamp (11 or 22 bytes) (time in milliseconds, in hexagon) """ raw_packet = "1" + "0" + get_timestamp() to_write = self.cipher.encrypt(raw_packet) + self.split_char if self.authenticated: #logging.debug("send ping0") self.transport.write(to_write) interval = random.randint(500, 1500) / 100 if self.initiator.obfs_level == 3: RESET_INTERVAL = 5 else: RESET_INTERVAL = 2 self.cronjob = reactor.callLater(interval, self.ping_send) self.cancel_job = reactor.callLater(RESET_INTERVAL, self.close)
def home(solicitud): # si no existe el valor aun en la base de datos try: es_vivo = Setting.objects.get(key='en_vivo').value except Setting.DoesNotExist: es_vivo = False # checar si estamos transmitiendo en vivo # regresar la vista de "vivo" de ser asi if es_vivo: return render_to_response('./live.html') # si no hay videos aun try: ultimo_video = Video.objects.filter(activado=True).latest('fecha') except Video.DoesNotExist: ultimo_video = None proximos = Video.objects.filter(proximo=True) ultimos_4_videos = Video.objects.filter(activado=True).order_by('-fecha')[1:5] pais = get_pais(solicitud.META) # plantilla return render_to_response('./home.html', { 'ultimo_video': ultimo_video, # El ultimo video 'videos': ultimos_4_videos, # ultimos 4 videos 'pais': pais, # el horario del programa localizado 'timestamp': get_timestamp(), # Obtiene el timestamp del sig. program. 'cursos': Curso.objects.filter(activado=True, fecha__gte=datetime.datetime.now()).order_by('fecha'), 'cursos_geo': Curso.objects.filter(activado=True, fecha__gte=datetime.datetime.now(), pais=pais).order_by('fecha'), 'proximo': proximos[0] if proximos.exists() else None })
def update_last_item(): # TODO: Check if last working day has been already included into the system ec = EuriborCrawl() db = BigQuery() last_item = ec.get_last_euribor_rate_dict() last_item_pd = {k: [v] for k, v in last_item.items()} now = get_timestamp() last_item_pd['date_insertion'] = now last_item_pd['date_insertion'] = pd.to_datetime( last_item_pd['date_insertion']) df_last_item = pd.DataFrame.from_dict(last_item_pd) df_last_item['eur_date'] = pd.to_datetime(df_last_item['eur_date']) schema = [ db.bigquery.SchemaField("eur_date", db.bigquery.enums.SqlTypeNames.TIMESTAMP), db.bigquery.SchemaField("eur_1w", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_1m", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_3m", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_6m", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_12m", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_year", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("eur_month", db.bigquery.enums.SqlTypeNames.NUMERIC), db.bigquery.SchemaField("date_insertion", db.bigquery.enums.SqlTypeNames.TIMESTAMP), ] db.insert_dataframe(TABLE_NAME, df_last_item, schema)
def plot_most_polarity_user(self, df, polarity): """ Plots the most positive/negative 20 users :param df : input dataframe :type df : pandas.DataFrame :param polarity : value of polarity to look at (Positive, Negative, Neutral) :type polarity : str """ neg_polarity_df = df.loc[df['t_polarity'] == polarity][['u_screen_name', 't_polarity']] sorted_users = neg_polarity_df.groupby('u_screen_name').count().sort_values( by="t_polarity", ascending=False) sorted_users.reset_index(level=0, inplace=True) if len(sorted_users['t_polarity'].unique()) == 1: palette = ['silver' for x in sorted_users['t_polarity']] else: palette = ['silver' if (x < max(sorted_users['t_polarity'])) else 'red' for x in sorted_users['t_polarity']] self.horiz_plot( data=sorted_users, x="t_polarity", y="u_screen_name", palette=palette, xlim_upper=max(sorted_users['t_polarity'])+1, max_values=20, figsize=(15, 25), title="Users with the most {} tweets (top 20 users)".format(polarity.lower()), output_name="20_users_most_{}_tweets_{}.png".format(polarity.lower(), get_timestamp()))
def __walk_on_the_backtrace(self): """All is in the function name Returns: dict: file info """ btinfo = self.info['btinfo'] bt_common_part = btinfo['common_part'] backtraces = btinfo['backtraces'] # get the uuid where the bt has the better stats uuid = max(backtraces.items(), key=lambda x: x[1][1])[1][0] fileinfo = None bt = backtrace.get_files(uuid, common=bt_common_part) if len(bt) >= 2: ts = utils.get_timestamp(self.first_date) # remove the first (already done) for i in range(1, len(bt)): m = self.hg_pattern.match(bt[i]) if m: filename = m.group(1) node = m.group(2) fs = FileStats(path=filename, channel=self.channel, node=node, utc_ts=ts) fileinfo = fs.get_info(guilty_only=True) if fileinfo: # hurrah \o/ we found a pertinent file ! break return fileinfo
def karate_scenario(): walks = pd.read_csv( 'local_resources/zachary_karate/walks1_len10_p1_q1.csv', header=None).values x_path = 'local_resources/zachary_karate/X.p' y_path = 'local_resources/zachary_karate/y.p' targets = utils.read_pickle(y_path) y = np.array(targets['cat']) vocab_size = get_vocab_size(x_path, bipartite=False) print('vocab of size: ', vocab_size) # define the noise distribution elems, unigrams = np.unique(walks, return_counts=True) print('unigram distribution', zip(elems, unigrams)) embeddings = [] names = [] n_reps = 10 train_size = 4 # the number of labelled points to use size = 2 for window in xrange(1, 10): params = Params(batch_size=4, embedding_size=size, neg_samples=5, skip_window=3, num_pairs=1500, logging_interval=100, initial_learning_rate=0.2) embedding = main('local_resources/zachary_karate/tf.emd', walks, unigrams, params) embeddings.append(embedding) names.append(['window' + str(window)]) if size == 2: visualisation.plot_embedding( embedding, y, 'results/karate/figs/window_' + str(window) + '_' + utils.get_timestamp() + '.pdf') karate_results(embeddings, names, n_reps, train_size)
def main(): args = parser.parse_args() args.timestamp = tools.get_timestamp() tools.mkdir_or_exist(args.workdir) tools.setup(args.benchmark, args.deterministic, args.seed) if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def __init__(self, user_id, post_id, text): self.id = random.randint(100000, 10000000) self.created = get_timestamp() self.post_id = post_id self.user_id = user_id self.text = text pass
def lipa_na_mpesa(): formatted_time = get_timestamp() decoded_password = generate_password(formatted_time) access_token = generate_access_token() api_url = "https://sandbox.safaricom.co.ke/mpesa/stkpush/v1/processrequest" headers = {"Authorization": "Bearer %s" % access_token} request = { "BusinessShortCode": keys.business_shortCode, "Password": decoded_password, "Timestamp": formatted_time, "TransactionType": "CustomerPayBillOnline", "Amount": "5", "PartyA": keys.phone_number, "PartyB": keys.business_shortCode, "PhoneNumber": keys.phone_number, "CallBackURL": "https://fullstackdjango.com/lipanampesa/", "AccountReference": "12345678", "TransactionDesc": "Pay School Fees" } response = requests.post(api_url, json=request, headers=headers) print(response.text)
def prepare_payload(self): """Prepare payload for Gremlin.""" query_str = cve_node_replace_script_template timestamp = get_timestamp() bindings = { 'cve_id': self._cve_dict.get('cve_id'), 'description': self._cve_dict.get('description'), 'cvss_v2': self._cve_dict.get('cvss_v2'), 'ecosystem': self._cve_dict.get('ecosystem'), 'modified_date': timestamp } if self._cve_dict.get('nvd_status'): query_str += cve_node_replace_script_template_nvd_status bindings['nvd_status'] = self._cve_dict.get('nvd_status') if self._cve_dict.get('fixed_in'): for ver in self._cve_dict.get('fixed_in'): query_str += "cve_v.property('fixed_in', '" + ver + "');" for epv_dict in self._cve_dict.get('affected'): edge_str = add_affected_edge_script_template.format( ecosystem=self._cve_dict.get('ecosystem'), name=epv_dict.get('name'), version=epv_dict.get('version')) query_str += edge_str payload = { 'gremlin': query_str, 'bindings': bindings } return payload
def pickle_onions(self): ts = get_timestamp("log") pickle_jar = join(_log_dir, "class-data_{}.pickle".format(ts)) self.logger.info("Pickling class data to " "{pickle_jar}...".format(**locals())) with open(pickle_jar, "wb") as pj: pickle.dump(self.class_data, pj) symlink_cur_to_latest(join(_log_dir, "class-data"), ts, "pickle")
def open(self): csv_file = None file_writer = None timestamp = utils.get_timestamp() new_file = self.filename + timestamp try: csv_file = open('logs/' + new_file, 'w+') except Exception, e: pass
def test_rejects_invalid_tokens(self): """Tests that an invalid token is rejected.""" config.set(xsrf_token_key='abcdef') tool = utils.XsrfTool() self.assertFalse(tool.verify_token( 'ThisTokenDoesNotEvenHaveASlash', 12345, 'test_action')) timestamp = utils.get_timestamp(XsrfToolTests.TEST_NOW) self.assertFalse( tool.verify_token('NotTheRightDigest/%f' % timestamp, 12345, 'test_action'))
def make_ts_dir(self, parent_dir=_log_dir, raw_dir_name="batch"): """Creates a timestamped folder to hold a group of traces.""" raw_dirpath = join(parent_dir, raw_dir_name) ts = get_timestamp("log") ts_dir = timestamp_file(raw_dirpath, ts, is_dir=True) symlink_cur_to_latest(raw_dirpath, ts) with open(join(ts_dir, "control.pickle"), "wb") as fh: pickle.dump(self.control_data, fh) return ts_dir
def generate_plot(self): timestamps = [get_timestamp(t['created_at']) for t in self.source.iter_tweets()] bins = self.get_time_bins(timestamps) tally = [0] * self.steps for tstamp in timestamps: j = self.get_bin(tstamp, bins) tally[j] = tally[j] + 1 plt.plot(bins, tally) dates = ["%s:%s" %(get_date(a).hour, get_date(a).minute) for i,a in enumerate(bins) if i%3 == 0] plt.xticks([b for i, b in enumerate(bins) if i%3 ==0], dates, rotation=0)
def update(self, document, do_refresh=False, do_update_timestamp=True): ''' Update object from DB synchronously. @param document: dictionary specifying the document to use for the update or insert. (ex: {'$set': {'un': 'Sunhong Kim'}}) @param do_refresh: update current object data or not. @param do_update_timestamp: update timestamp or not. ''' hdapp = HDApp.shared_app col = hdapp.get_collection_conn(col_name=self._collection_name()) if do_update_timestamp: if '$set' in document.keys(): document['$set']['_ts'] = utils.get_timestamp() else: document['$set'] = {'_ts': utils.get_timestamp()} col.update({'_id': self.iid}, document) if do_refresh: self.read()
def server_stats(name): backends = api.VARNISH_STATE.get_backends(name) if backends == {}: return {} varnishstats = api.VARNISH_STATE.get_varnishstats(name) stats = {'process': api.VARNISH_STATE.get_process(name), 'varnishstats': varnishstats['varnishstats'], 'backends': backends['backends'], 'timestamp': utils.get_timestamp()} return stats
def add_me(bot, update): bot.sendChatAction(chat_id=update.message.chat.id, action=telegram.ChatAction.TYPING) if (utils.is_private(update.message)): conn = sqlite3.connect(configuration.db_users_path) c = conn.cursor() selectData = (str(update.message.chat.id)) c.execute("SELECT COUNT(*) FROM USERS WHERE chat_id = ?", [selectData]) result = int(c.fetchone()[0]) print(result) if (result == 0): insertData = ( str(update.message.chat.id), str(update.message.chat.first_name), str(update.message.chat.last_name), str(utils.get_timestamp()), str(utils.get_timestamp()), 0, 0, 0) c.execute("INSERT INTO USERS VALUES (null,?,?,?,?,?,?,?,?)", insertData) conn.commit() bot.sendMessage(chat_id=update.message.chat.id, text='Your request has been submitted.'); else: bot.sendMessage(chat_id=update.message.chat.id, text='Request already submitted.'); conn.close()
def test_repo_feed_all_launched_repos(self): config.set_for_repo('haiti', deactivated=True, launched=True, test_mode=False) config.set_for_repo('japan', deactivated=False, launched=True, test_mode=True, updated_date=utils.get_timestamp( datetime.datetime(2012, 03, 11))) config.set_for_repo('pakistan', deactivated=False, launched=False, test_mode=False) # 'haiti', 'japan', and 'pakistan' exist in the datastore. Only those # which are 'launched' and not 'deactivated' i.e., only 'japan' should # appear in the feed. doc = self.go('/global/feeds/repo') expected_content = u'''\ <?xml version="1.0" encoding="UTF-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xmlns:gpf="http://schemas.google.com/personfinder/2012" xmlns:georss="http://www.georss.org/georss"> <id>http://%s/personfinder/global/feeds/repo</id> <title>Person Finder Repository Feed</title> <updated>2012-03-11T00:00:00Z</updated> <entry> <id>%s/japan</id> <published>2011-03-11T00:00:00Z</published> <updated>2012-03-11T00:00:00Z</updated> <title xml:lang="ja">2011 日本地震</title> <content type="text/xml"> <gpf:repo> <gpf:title xml:lang="ja">2011 日本地震</gpf:title> <gpf:title xml:lang="en">2011 Japan Earthquake</gpf:title> <gpf:title xml:lang="ko"></gpf:title> <gpf:title xml:lang="zh-CN">2011 日本地震</gpf:title> <gpf:title xml:lang="zh-TW">2011 日本地震</gpf:title> <gpf:title xml:lang="pt-BR">2011 Terremoto no Japão</gpf:title> <gpf:title xml:lang="es">2011 Terremoto en Japón</gpf:title> <gpf:read_auth_key_required>true</gpf:read_auth_key_required> <gpf:search_auth_key_required>true</gpf:search_auth_key_required> <gpf:test_mode>true</gpf:test_mode> <gpf:location> <georss:point>38 140.7</georss:point> </gpf:location> </gpf:repo> </content> </entry> </feed> ''' % (self.hostport, ROOT_URL) assert expected_content == doc.content, \ text_diff(expected_content, doc.content) # verify we logged the repo read. self.verify_api_log(ApiActionLog.REPO, api_key='')
def wrapper(self, *args): clean_args = [] for arg in args: if type(arg) is not datetime: try: parsed_date = parser.parse(arg.replace('"', '')) zoned_date = get_timestamp(parsed_date) clean_args.append(zoned_date) except ValueError: pass else: clean_args.append(arg) return func(self, *clean_args)
def main(): print('Running speedtest') cmd = 'speedtest --simple --share' print('Checking to see if %s exists' % speedtest_file_loc) if not os.path.exists(speedtest_file_loc): print('Creating directory structure at %s' % speedtest_file_loc) make_directories() filename = get_speedtest_file_name() print('Running speedtest and pushing results to %s' % filename) logfile = open(filename, 'w') p = subprocess.Popen(cmd, shell=True, universal_newlines=True, stdout=logfile) ret_code = p.wait() logfile.flush() logfile.close() print(ret_code) print('Log published at %s.' % filename) print('Processing logfile output') (timestamp, ping, upload, download, share_results) = get_output(filename) print('Generating JSON data for insertion into Mongo.') json_data = generate_json_data(timestamp,ping,upload,download,share_results) print('Generating CSV data.') csv_data = generate_csv_data(timestamp, ping, upload, download, share_results) print('Writing out CSV data.') csv_file = get_file_from_dropbox(dropbox_file_name) write_mode = WriteMode.add if csv_file is None: csv_file = workbook_name print("Creating new CSV file.") f = open(csv_file, 'w') f.write(csv_data[0]) f.write(csv_data[1]) f.close() else: print("Appending existing file.") f = open(csv_file, 'a') f.write(csv_data[1]) f.close() write_mode = WriteMode.overwrite print('Uploading workbook to Dropbox.') upload_to_dropbox(csv_file, dropbox_file_name, write_mode=write_mode) print('Inserting data into Mongo.') save_to_mongo([json.loads(json_data)]) print('Pinging service in order to send email.') if ping_service(): print('Sending email') email_timestamp = get_timestamp(dt_format=email_date_time_format) request_mapping = construct_request_body(email_timestamp,ping, upload, download, share_results, type_='speedtest') send_email_request(request_mapping) print('Cleaning up.') clean_dir(speedtest_file_loc) print('Done.')
def testCsvData(self): timestamp = get_timestamp() ping = 99.0 upload = 9 download = 10 share_results = 'blah' csv_string = generate_csv_data(timestamp, ping, upload, download, share_results) self.assertIsNotNone(csv_string) self.assertTrue(2 == len(csv_string)) self.assertIn(str(ping), csv_string[1]) self.assertIn(str(download), csv_string[1]) self.assertIn(str(upload), csv_string[1]) self.assertIn(str(timestamp), csv_string[1]) self.assertIn(str(share_results), csv_string[1])
def test_bad_with_no_prior_key(self): """Tests a bad token when a token key has to be autogenerated. If the config doesn't already have an XSRF token key set, the XSRF tool will generate one automatically. """ # config seems to be shared across tests, so we have to specifically set # it to None. config.set(xsrf_token_key=None) tool = utils.XsrfTool() timestamp = utils.get_timestamp(XsrfToolTests.TEST_NOW) self.assertFalse( tool.verify_token('NotTheRightDigest/%f' % timestamp, 12345, 'test_action'))
def testJsonData(self): timestamp = get_timestamp() ping = 99.0 upload = 9 download = 10 share_results = 'blah' json_string = generate_json_data(timestamp, ping, upload, download, share_results) self.assertIsNotNone(json_string) mapping = json.loads(json_string) self.assertEqual('speedtest', mapping['name']) attributes = mapping['attributes'] self.assertEqual(ping, attributes['ping']) self.assertEqual(upload, attributes['upload']) self.assertEqual(download, attributes['download']) self.assertEqual(share_results, attributes['share_results'])
def to_msg(self): """ """ msg = "# Automatically Generated in " + str(get_timestamp()) + "\n" msg += "# MESSAGE: " + self.name + "\n" if not self.description is None: msg += "# Description:" + self.description.replace("\n", "\n#") + "\n" msg += "uint8 ID = " + self.id + "\n" msg += "uint8 sysid" + "\n" msg += "uint8 compid" + "\n" for field in self.msg_fields: msg += field.to_string() return msg
def add_onions(self, class_data): """Add sorted onions into the HS history table""" onions = [] ts = get_timestamp("db") for class_name, class_urls in class_data.items(): onions += [self.Onion( hs_url='{}{}'.format(hs_url.split('onion')[0], 'onion'), is_sd=True if 'sd' in class_name else False, sd_version=class_name.split('_')[1] if 'sd' in class_name else 'N/A', is_current=True, sorted_class=class_name, t_sort=ts) for hs_url in class_urls] with self.safe_session() as session: session.bulk_save_objects(onions)
def load(self): backup_file = self.sample["crab"]["taskdir"]+"/backup.pkl" if os.path.isfile(backup_file): with open(backup_file,"r") as fhin: d_tot = pickle.load(fhin) for key in d_tot["sample"].keys(): self.sample[key] = d_tot["sample"][key] for key in d_tot["misc"].keys(): self.misc[key] = d_tot["misc"][key] last_saved = self.misc["last_saved"] if last_saved: min_ago = round((u.get_timestamp() - last_saved) / 60.0) # self.do_log("successfully loaded %s which was last saved %i minutes ago" % (backup_file, min_ago)) self.do_log("successfully loaded backup (last saved %i minutes ago)" % min_ago) else: self.do_log("successfully loaded %s" % (backup_file)) else: self.do_log("can't load. probably a new sample.")
def __gen_header_file( self, header_name ): """ """ header_file = "// Automatically Generated in " + str( get_timestamp() ) + os.linesep include_guard = self.mav_generator.package_name.upper() + "_" + header_name.upper() + "_H" header_file += "#ifndef " + include_guard + os.linesep header_file += "#define " + include_guard + os.linesep for include in self.mav_generator.parser.includes: for message in ( include.messages + include.enums ): header_file += "#include <" + include.package_name + os.sep + message.name + ".h>" + os.linesep for message in ( self.mav_generator.parser.messages + self.mav_generator.parser.enums ): header_file += "#include <" + self.mav_generator.package_name + os.sep + message.name + ".h>" + os.linesep header_file += "#endif // " + include_guard + os.linesep return header_file
def next(self, logical_shard_id): timestamp = get_timestamp() if (timestamp < self.last_timestamp): raise "Clock moved backwards" if (timestamp == self.last_timestamp): self.sequence = (self.sequence + 1) & Config.SEQUENCE_MASK if (self.sequence == 0): timestamp = til_next_millis(self.last_timestamp) else: self.sequence = 0 self.last_timestamp = timestamp timestamp = timestamp - (int(self.epoch)*1000) guoidValue = (timestamp << Config.TIMESTAMP_LEFT_SHIFT) |\ (logical_shard_id << Config.LOGICAL_SHARD_ID_SHIFT) | self.sequence return guoidValue
def next(self): timestamp = get_timestamp() if (timestamp < self.last_timestamp): raise "Clock moved backwards" if (timestamp == self.last_timestamp): self.sequence = (self.sequence + 1) & Config.SEQUENCE_MASK if (self.sequence == 0): timestamp = til_next_millis(self.last_timestamp) else: self.sequence = 0 self.last_timestamp = timestamp timestamp = timestamp - (int(self.epoch*1000)) guoidValue = (timestamp << Config.TIMESTAMP_LEFT_SHIFT) |\ (self.datacenter_id | (self.worker_id) | self.sequence) return guoidValue
def __handler(self, json): """Handler for Socorro supersearch Args: json (dict): json data (dict): dictionary to update with data """ total = json['total'] info = {'total': total, 'hits': json['hits']} info.update(json['facets']) # we get the first date of appearance in using the different buildid self.first_date = Track.__get_min_buildid(info['build_id']) filename, node = self.__get_topmost_filename(info) self.info['filename'] = filename self.info['node'] = node self.info['total'] = total self.info['platforms'] = Track.__get_stats(info, 'platform_pretty_version') self.info['buildids'] = Track.__get_stats(info, 'build_id') self.info['versions'] = Track.__get_stats(info, 'version') self.info['cpu_name'] = Track.__get_stats(info, 'cpu_name') self.info['cpu_info'] = Track.__get_stats(info, 'cpu_info') self.info['reason'] = Track.__get_stats(info, 'reason') self.info['system_memory_use'] = Track.__get_mean_stddev(info, 'system_memory_use_percentage', prettyfy=lambda x: utils.simple_percent(round(x, 0))) self.info['uptime'] = Track.__get_mean_stddev(info, 'uptime', prettyfy=lambda x: str(x) + 's') self.info['btinfo'] = Track.__get_bt_stats(info) Track.__get_url_stats(info) ts = utils.get_timestamp(self.first_date) fs = FileStats(path=filename, channel=self.channel, node=node, utc_ts=ts) # don't dig: if non-pertinent we'll try in the next function in the backtrace fileinfo = fs.get_info(guilty_only=True) if fileinfo and fileinfo['guilty']: self.info['fileinfo'] = fileinfo else: fileinfo = self.__walk_on_the_backtrace() if fileinfo: self.info['fileinfo'] = fileinfo else: # didn't find out any guilty patches... :( self.info['fileinfo'] = fs.get_info(guilty_only=False)
def get_output(path_to_file): """ Example File Output: Ping: 5.402 ms Download: 39.47 Mbit/s Upload: 11.47 Mbit/s Share results: http://www.speedtest.net/result/5327551818.png """ print('Processing output from %s' % path_to_file) def get_and_replace(line_of_text, term, *replacements): result = None if term in line_of_text: result = line_of_text.replace(term, '') for replacement in replacements: result = result.replace(replacement, '') result = result.strip() return result timestamp = get_timestamp(dt_format=spreadsheet_date_time_format) ping = None download = None upload = None share_results = None f = open(path_to_file, 'r') lines = f.readlines() f.close() for line in lines: if ping is None: ping = get_and_replace(line, 'Ping', 'ms', ':') if download is None: download = get_and_replace(line, 'Download', 'Mbit/s', ':') if upload is None: upload = get_and_replace(line, 'Upload', 'Mbit/s', ':') if share_results is None: share_results = get_and_replace(line, 'Share results:') message = 'Ping=%s, Upload=%s, Download=%s, Share_Results=%s' % (ping, upload, download, share_results) print('Output processed. Returning values %s' % message) return timestamp, float(ping), float(upload), float(download), share_results