class Jarvis: def __init__(self): self.website = Websites() self.data = Data() self.cmd = '' def begin(self, passed_cmd, choice='website'): # TODO : for any media command passed, sign in, then wait for other commands which refer to other functions self.cmd = passed_cmd url = ctl(ctc(self.cmd)) # desired link from the prompt # choosing between signing in or getting the data if choice is None or choice.lower() == 'website': self.website.sign_in(url) # open AND sign in elif choice.lower() == 'data': self.data.get_from(ctc(passed_cmd))
def __init__(self): T1O8 = Trial("T1O8", Data.T1O8()) T2O1 = Trial("T2O1", Data.T2O1()) T3O6 = Trial("T3O6", Data.T3O6()) T4O3 = Trial("T4O3", Data.T4O3()) T5O5 = Trial("T5O5", Data.T5O5()) T6O2 = Trial("T6O2", Data.T6O2()) T7O4 = Trial("T7O4", Data.T7O4()) T8O9 = Trial("T8O9", Data.T8O9()) T9O7 = Trial("T9O7", Data.T9O7()) T10O10 = Trial("T10O10", Data.T10O10()) self._data = [T1O8.data(), T2O1.data(), T3O6.data(), T4O3.data(), T5O5.data(), T6O2.data(), T7O4.data(), T8O9.data(), T9O7.data(), T10O10.data()] self._trial_list = [T1O8, T2O1, T3O6, T4O3, T5O5, T6O2, T7O4, T8O9, T9O7, T10O10] self._nmb_odors = 10 self._nmb_data_per_trial = 1000 self._nmb_trials = 10
def get_data_gps(request): try: obj = Data() result = obj.get_gps_data() # # result = { # "geometry":{ # "type": "Point", # "coordinates": [53.428361, -1.37398] # }, # "type": "Feature", # "properties": {} # } # result = {"sunny":123} return Response(result, status=status.HTTP_200_OK) except Exception as e: print e.message
def fit_predict_categorical_encoding( datasets, str_preprocess, encoders, classifiers, reduction_methods, n_components, test_size, n_splits, n_jobs, results_path, model_path=None, custom_cv=None, ): """ Learning with dirty categorical variables. """ path = get_data_path() results_path = os.path.join(path, results_path) model_path = os.path.join(path, model_path) if not os.path.exists(results_path): os.makedirs(results_path) for dataset in datasets: n_rows = choose_nrows(dataset_name=dataset) for encoder in encoders: print("Dataset: %s" % dataset) data = Data(dataset).get_df() data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess) special_col = [ col for col in data.col_action if data.col_action[col] == "Special" ][0] if type(encoder) is list: # special_col = [col for col in data.col_action # if data.col_action[col] == 'Special'][0] for i, enc in enumerate(encoder): print(enc) if i == 0: data.col_action[special_col] = "Special" else: new_col = "%s_%d" % (special_col, i) data.df[new_col] = data.df[special_col].copy() data.col_action[new_col] = enc data.xcols.append(new_col) for reduction_method in reduction_methods: print("Data shape: %d, %d" % data.df.shape) cv = select_cross_val(data.clf_type, n_splits, test_size, custom_cv=custom_cv, col_name=special_col) scaler = select_scaler() # Define classifiers clfs = instanciate_estimators( data.clf_type, classifiers, clf_seed, y=data.df.loc[:, data.ycol].values, model_path=model_path, ) for i, clf in enumerate(clfs): print( "{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}".format( "Prediction column", data.ycol, "Task type", str(data.clf_type), "Classifier", clf, "Encoder", encoder, "Dimension reduction", reduction_method, n_components, )) try: clf_name = clf.estimator.__class__.__name__ results_dict = { "dataset": data.name, "n_splits": n_splits, "test_size": test_size, "n_rows": n_rows, "encoder": encoder, "str_preprocess": str_preprocess, "clf": [ classifiers[i], clf_name, clf.estimator.get_params() ], "ShuffleSplit": [cv.__class__.__name__], "scaler": [scaler.__class__.__name__, scaler.get_params()], "sample_seed": sample_seed, "shuffleseed": shuffle_seed, "col_action": data.col_action, "clf_type": data.clf_type, "dimension_reduction": [reduction_method, n_components], } except AttributeError: clf_name = clf.__class__.__name__ results_dict = { "dataset": data.name, "n_splits": n_splits, "test_size": test_size, "n_rows": n_rows, "encoder": encoder, "str_preprocess": str_preprocess, "clf": [classifiers[i], clf_name, clf.get_params()], "ShuffleSplit": [cv.__class__.__name__], "scaler": [scaler.__class__.__name__, scaler.get_params()], "sample_seed": sample_seed, "shuffleseed": shuffle_seed, "col_action": data.col_action, "clf_type": data.clf_type, "dimension_reduction": [reduction_method, n_components], } if verify_if_exists(results_path, results_dict): print("Prediction already exists.\n") continue start = time.time() if type(encoder) is str: column_action = get_column_action( data.col_action, data.xcols, encoder, reduction_method, n_components, data.clf_type, ) if type(encoder) is list: column_action = get_column_action( data.col_action, data.xcols, encoder[0], reduction_method, n_components, data.clf_type, ) pred = Parallel(n_jobs=n_jobs)(delayed(fit_predict_fold)( data, scaler, column_action, clf, encoder, reduction_method, n_components, fold, cv.n_splits, train_index, test_index, ) for fold, (train_index, test_index) in enumerate( cv.split(data.df, data.df[data.ycol].values))) pred = np.array(pred) results = { "fold": list(pred[:, 0]), "n_train_samples": list(pred[:, 1]), "n_train_features": list(pred[:, 2]), "score": list(pred[:, 3]), "encoding_time": list(pred[:, 4]), "training_time": list(pred[:, 5]), } results_dict["results"] = results # Saving results pc_name = socket.gethostname() now = "".join([ c for c in str(datetime.datetime.now()) if c.isdigit() ]) filename = "%s_%s_%s_%s_%s.json" % ( pc_name, data.name, classifiers[i], encoder, now, ) results_file = os.path.join(results_path, filename) results_dict = array2list(results_dict) # patch for nystrom + ridge if clf.__class__.__name__ == "GridSearchCV": if clf.estimator.__class__.__name__ == "Pipeline": results_dict["clf"] = method2str( results_dict["clf"]) write_json(results_dict, results_file) print("prediction time: %.1f s." % (time.time() - start)) print("Saving results to: %s\n" % results_file)
1, ), ( 0, 2, ), ( 1, 2, )] train_num_set = [5, 10, 15, 20, 25, 30, 35, 40, 45, 52] print("方程 = a * x +b*y+ c") print("训练数据量\t输入维度\ttrain_rmse\ttest_rmse\ttest_平均误差率\t\t模型参数") for input_dim in input_set: for train_num in train_num_set: data = Data("../.././data/data.txt", input_dim, train_num) #训练指数模型 xdata = data.train.X ydata = data.train.Y[:, 0] # 训练数据 regr = linear_model.LinearRegression() regr.fit(xdata, ydata) #print('coefficients(b1,b2...):',regr.coef_) #print('intercept(b0):',regr.intercept_) def func(x, a, b, c):
def __init__(self): self.website = Websites() self.data = Data() self.cmd = ''
# AUC count = 0.0 for num in AUCs: count = count + num AUC = count / AUCs.__len__() return Recall, AUC if __name__ == '__main__': embedding_size = Const.embedding_size drop_ratio = Const.drop_ratio epoch = Const.epoch batch_size = Const.batch_size data = Data() h = Helper() num_users = data.get_user_size() num_items = data.get_item_size() shan = SHAN(num_users, num_items, embedding_size, drop_ratio) # shan.load_state_dict(torch.load('SHAN2_dict.pkl')) # print(shan) if torch.cuda.is_available(): print("using cuda") shan.cuda() lr_flag = True pre_mean_loss = 999 lr = Const.lr for i in range(0, epoch):
def sorption_level(self): if (self._odor == 7 or self._odor == 8 or self._odor == 9): return "high" if (self._odor == 2 or self._odor == 1 or self._odor == 5): return "medium" if (self._odor == 6 or self._odor == 3 or self._odor == 4 or self._odor == 10): return "low" def set_total_spikes(self, sum): self._total_spikes = sum def get_total_spikes(self): return self._total_spikes T1O8 = Trial("T1O8", Data.T1O8()) T2O1 = Trial("T2O1", Data.T2O1()) T3O6 = Trial("T3O6", Data.T3O6()) T4O3 = Trial("T4O3", Data.T4O3()) T5O5 = Trial("T5O5", Data.T5O5()) T6O2 = Trial("T6O2", Data.T6O2()) T7O4 = Trial("T7O4", Data.T7O4()) T8O9 = Trial("T8O9", Data.T8O9()) T9O7 = Trial("T9O7", Data.T9O7()) T10O10 = Trial("T10O10", Data.T10O10()) data = [T1O8.data(), T2O1.data(), T3O6.data(), T4O3.data(), T5O5.data(), T6O2.data(), T7O4.data(), T8O9.data(), T9O7.data(), T10O10.data()] trial_list = [T1O8, T2O1, T3O6, T4O3, T5O5, T6O2, T7O4, T8O9, T9O7, T10O10] def trial_matrix(trial_list):
def fit_predict_categorical_encoding(datasets, str_preprocess, encoders, classifiers, reduction_methods, n_components, test_size, n_splits, n_jobs, results_path, model_path=None, custom_cv=None): ''' Learning with dirty categorical variables. ''' path = get_data_path() results_path = os.path.join(path, results_path) model_path = os.path.join(path, model_path) if not os.path.exists(results_path): os.makedirs(results_path) for dataset in datasets: n_rows = choose_nrows(dataset_name=dataset) for encoder in encoders: print('Dataset: %s' % dataset) data = Data(dataset).get_df() data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess) special_col = [ col for col in data.col_action if data.col_action[col] == 'Special' ][0] if type(encoder) is list: # special_col = [col for col in data.col_action # if data.col_action[col] == 'Special'][0] for i, enc in enumerate(encoder): print(enc) if i == 0: data.col_action[special_col] = 'Special' else: new_col = '%s_%d' % (special_col, i) data.df[new_col] = data.df[special_col].copy() data.col_action[new_col] = enc data.xcols.append(new_col) for reduction_method in reduction_methods: print('Data shape: %d, %d' % data.df.shape) cv = select_cross_val(data.clf_type, n_splits, test_size, custom_cv=custom_cv, col_name=special_col) scaler = select_scaler() # Define classifiers clfs = instanciate_estimators(data.clf_type, classifiers, clf_seed, y=data.df.loc[:, data.ycol].values, model_path=model_path) for i, clf in enumerate(clfs): print( '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}'.format( 'Prediction column', data.ycol, 'Task type', str(data.clf_type), 'Classifier', clf, 'Encoder', encoder, 'Dimension reduction', reduction_method, n_components)) try: clf_name = clf.estimator.__class__.__name__ results_dict = { 'dataset': data.name, 'n_splits': n_splits, 'test_size': test_size, 'n_rows': n_rows, 'encoder': encoder, 'str_preprocess': str_preprocess, 'clf': [ classifiers[i], clf_name, clf.estimator.get_params() ], 'ShuffleSplit': [cv.__class__.__name__], 'scaler': [scaler.__class__.__name__, scaler.get_params()], 'sample_seed': sample_seed, 'shuffleseed': shuffle_seed, 'col_action': data.col_action, 'clf_type': data.clf_type, 'dimension_reduction': [reduction_method, n_components] } except AttributeError: clf_name = clf.__class__.__name__ results_dict = { 'dataset': data.name, 'n_splits': n_splits, 'test_size': test_size, 'n_rows': n_rows, 'encoder': encoder, 'str_preprocess': str_preprocess, 'clf': [classifiers[i], clf_name, clf.get_params()], 'ShuffleSplit': [cv.__class__.__name__], 'scaler': [scaler.__class__.__name__, scaler.get_params()], 'sample_seed': sample_seed, 'shuffleseed': shuffle_seed, 'col_action': data.col_action, 'clf_type': data.clf_type, 'dimension_reduction': [reduction_method, n_components] } if verify_if_exists(results_path, results_dict): print('Prediction already exists.\n') continue start = time.time() if type(encoder) is str: column_action = get_column_action( data.col_action, data.xcols, encoder, reduction_method, n_components, data.clf_type) if type(encoder) is list: column_action = get_column_action( data.col_action, data.xcols, encoder[0], reduction_method, n_components, data.clf_type) pred = Parallel(n_jobs=n_jobs)( delayed(fit_predict_fold) (data, scaler, column_action, clf, encoder, reduction_method, n_components, fold, cv.n_splits, train_index, test_index) for fold, (train_index, test_index) in enumerate( cv.split(data.df, data.df[data.ycol].values))) pred = np.array(pred) results = { 'fold': list(pred[:, 0]), 'n_train_samples': list(pred[:, 1]), 'n_train_features': list(pred[:, 2]), 'score': list(pred[:, 3]), 'encoding_time': list(pred[:, 4]), 'training_time': list(pred[:, 5]) } results_dict['results'] = results # Saving results pc_name = socket.gethostname() now = ''.join([ c for c in str(datetime.datetime.now()) if c.isdigit() ]) filename = ( '%s_%s_%s_%s_%s.json' % (pc_name, data.name, classifiers[i], encoder, now)) results_file = os.path.join(results_path, filename) results_dict = array2list(results_dict) # patch for nystrom + ridge if clf.__class__.__name__ == 'GridSearchCV': if clf.estimator.__class__.__name__ == 'Pipeline': results_dict['clf'] = method2str( results_dict['clf']) write_json(results_dict, results_file) print('prediction time: %.1f s.' % (time.time() - start)) print('Saving results to: %s\n' % results_file)
if cross: cross_point = get_cross_point(pw1, pw2, x, xs[i + 1]) if y1[i] > y2[i]: adv1 += (y1[i] - y2[i]) * (cross_point - x) / 2 adv2 += (y2[i + 1] - y1[i + 1]) * (xs[i + 1] - cross_point) / 2 else: adv2 += (y2[i] - y1[i]) * (cross_point - x) / 2 adv1 += (y1[i + 1] - y2[i + 1]) * (xs[i + 1] - cross_point) / 2 return adv1, adv2 fig, axes = plt.subplots(2, 1) for ds in datasets: print(ds) ref_data = Data(model, ds, 'two_stage', testset=testset) base_acc = ref_data.layer_acc[-1] x1 = [ x[1]['mean_exit'] for x in Data(model, ds, rt1, testset=testset).etp_data ] x2 = [ x[1]['mean_exit'] for x in Data(model, ds, rt2, testset=testset).etp_data ] y1 = [x[1]['acc'] for x in Data(model, ds, rt1, testset=testset).etp_data] y2 = [x[1]['acc'] for x in Data(model, ds, rt2, testset=testset).etp_data] axes[0].plot(x1, y1, label='JOINT') axes[0].plot(x2, y2, label='ALT')
embeddings = {} for i in range(pred.shape[0]): embeddings[i] = pred[i] return embeddings, weights, time_callback, history if __name__ == '__main__': for name in ['cora', 'citeseer', 'pubmed']: edge_path = '%s/%s-edgelist.txt' % (name, name) label_path = '%s/%s-label.txt' % (name, name) feat_path = '%s/%s-feature.txt' % (name, name) data = Data(edge_path, name) adj = data.create_adj_from_edgelist(edge_path) y_class = data.get_label(label_path) y_feat = data.get_feat(feat_path) print(adj.shape, y_class.shape, y_feat.shape) vis = False t1 = time.time() for epoch in [10]: embeddings, weights, time_callback, history = model( adj, y_class, y_feat, 128, epoch)
# b+=1 # return a/b # def calc_auc(y_true, y_pred): # return metrics.roc_auc_score(y_true, y_pred) if __name__ == '__main__': args = parse_args() if args.dataset == 'dblp': path = "../../dataset/DBLP4057_GAT_with_idx_tra200_val_800.mat" save_path = "../HACUD/dblp" data_generator = Data(path=path, save_path=save_path) X_train = data_generator.X_train X_test = data_generator.X_test y_train = data_generator.y_train y_test = data_generator.y_test os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) config = dict() config['n_nodes'] = data_generator.n_nodes config['n_metapath'] = data_generator.n_metapath config['n_class'] = y_train.shape[1] plain_adj, norm_adj, mean_adj = data_generator.get_adj_mat()
import json import matplotlib.pyplot as plt from get_data import Data import operator words = [ 'tech', 'apple', 'google', 'baidu', 'phone', 'internet', 'router', 'wifi', 'artificial', 'intelligence', 'machine', 'laptop', 'desktop', 'mac', 'automation', 'roomba', 'robot', 'microcontroller', 'software', 'hardware', 'engineering' ] train_cities = [ 'Toronto', 'San Francisco', 'Boston', 'New York', 'Mexico City' ] train_scores = [800, 1000, 900, 950, 400] test, train = Data.get_data(words, train_cities, train_scores) rng = numpy.random # Parameters learning_rate = 0.01 training_epochs = 1000 display_step = 50 train_X = [] train_Y = [] test_X = [] test_Y = [] f = open('cities.txt', 'r') text = f.readlines()[0] cities = text[1:-1].replace('\'', '').split(',')
import sys from get_data import Data, DistilbertData, RawBertData model, routine = sys.argv[1:3] testset = False if len(sys.argv) > 3 and sys.argv[3] == 'testset': testset = True datasets = ["RTE", "MRPC", "SST-2", "QNLI", "QQP", "MNLI"] dbdata = DistilbertData() rbdata = RawBertData(size=model.split('-')[1]) for ds in datasets: ref_data = Data(model, ds, 'two_stage', testset=testset) data = Data(model, ds, routine, testset=testset) col = data.etp_data # show data print(ds) print('etp\tlayer\tRlayer\tacc\tRacc\tdrop') # base_acc = rbdata.acc[ds] base_acc = ref_data.layer_acc[-1] base_layer = rbdata.layers if model.startswith('distil'): base_layer = 6 shrink = dbdata.saving print('{}\t{:.1f}\t{:.3f}\t{:.2f}\t{:.3f}\t{:.2f}'.format( 'dev', 6, shrink, dbdata.acc[ds],