Python Data Exemples, get_data.Data Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : main.py Projet : siddhantchimankar/Personal-Bot

class Jarvis:

    def __init__(self):
        self.website = Websites()
        self.data = Data()
        self.cmd = ''

    def begin(self, passed_cmd, choice='website'):
        # TODO : for any media command passed, sign in, then wait for other commands which refer to other functions
        self.cmd = passed_cmd
        url = ctl(ctc(self.cmd))  # desired link from the prompt
        # choosing between signing in or getting the data
        if choice is None or choice.lower() == 'website':
            self.website.sign_in(url)  # open AND sign in

        elif choice.lower() == 'data':
            self.data.get_from(ctc(passed_cmd))

Exemple #2

0

Afficher le fichier

    def __init__(self):
        T1O8 = Trial("T1O8", Data.T1O8())
        T2O1 = Trial("T2O1", Data.T2O1())
        T3O6 = Trial("T3O6", Data.T3O6())
        T4O3 = Trial("T4O3", Data.T4O3())
        T5O5 = Trial("T5O5", Data.T5O5())
        T6O2 = Trial("T6O2", Data.T6O2())
        T7O4 = Trial("T7O4", Data.T7O4())
        T8O9 = Trial("T8O9", Data.T8O9())
        T9O7 = Trial("T9O7", Data.T9O7())
        T10O10 = Trial("T10O10", Data.T10O10())

        self._data = [T1O8.data(), T2O1.data(), T3O6.data(), T4O3.data(), T5O5.data(), T6O2.data(), T7O4.data(), T8O9.data(), T9O7.data(), T10O10.data()]
        self._trial_list = [T1O8, T2O1, T3O6, T4O3, T5O5, T6O2, T7O4, T8O9, T9O7, T10O10]

        self._nmb_odors = 10
        self._nmb_data_per_trial = 1000
        self._nmb_trials = 10

Exemple #3

0

Afficher le fichier

Fichier : views.py Projet : SrikanthAmudala/GPS_DJANGO_SERVICE

def get_data_gps(request):
    try:
        obj = Data()
        result = obj.get_gps_data()
        #
        # result  = {
        #     "geometry":{
        #         "type": "Point",
        #         "coordinates": [53.428361, -1.37398]
        #     },
        #     "type": "Feature",
        #     "properties": {}
        # }


        # result = {"sunny":123}
        return Response(result, status=status.HTTP_200_OK)
    except Exception as e:
        print e.message

Exemple #4

0

Afficher le fichier

def fit_predict_categorical_encoding(
    datasets,
    str_preprocess,
    encoders,
    classifiers,
    reduction_methods,
    n_components,
    test_size,
    n_splits,
    n_jobs,
    results_path,
    model_path=None,
    custom_cv=None,
):
    """
    Learning with dirty categorical variables.
    """
    path = get_data_path()
    results_path = os.path.join(path, results_path)
    model_path = os.path.join(path, model_path)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    for dataset in datasets:
        n_rows = choose_nrows(dataset_name=dataset)
        for encoder in encoders:
            print("Dataset: %s" % dataset)
            data = Data(dataset).get_df()
            data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess)
            special_col = [
                col for col in data.col_action
                if data.col_action[col] == "Special"
            ][0]
            if type(encoder) is list:
                # special_col = [col for col in data.col_action
                #                if data.col_action[col] == 'Special'][0]
                for i, enc in enumerate(encoder):
                    print(enc)
                    if i == 0:
                        data.col_action[special_col] = "Special"
                    else:
                        new_col = "%s_%d" % (special_col, i)
                        data.df[new_col] = data.df[special_col].copy()
                        data.col_action[new_col] = enc
                        data.xcols.append(new_col)
            for reduction_method in reduction_methods:
                print("Data shape: %d, %d" % data.df.shape)
                cv = select_cross_val(data.clf_type,
                                      n_splits,
                                      test_size,
                                      custom_cv=custom_cv,
                                      col_name=special_col)
                scaler = select_scaler()

                # Define classifiers
                clfs = instanciate_estimators(
                    data.clf_type,
                    classifiers,
                    clf_seed,
                    y=data.df.loc[:, data.ycol].values,
                    model_path=model_path,
                )

                for i, clf in enumerate(clfs):
                    print(
                        "{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}".format(
                            "Prediction column",
                            data.ycol,
                            "Task type",
                            str(data.clf_type),
                            "Classifier",
                            clf,
                            "Encoder",
                            encoder,
                            "Dimension reduction",
                            reduction_method,
                            n_components,
                        ))

                    try:
                        clf_name = clf.estimator.__class__.__name__
                        results_dict = {
                            "dataset":
                            data.name,
                            "n_splits":
                            n_splits,
                            "test_size":
                            test_size,
                            "n_rows":
                            n_rows,
                            "encoder":
                            encoder,
                            "str_preprocess":
                            str_preprocess,
                            "clf": [
                                classifiers[i], clf_name,
                                clf.estimator.get_params()
                            ],
                            "ShuffleSplit": [cv.__class__.__name__],
                            "scaler":
                            [scaler.__class__.__name__,
                             scaler.get_params()],
                            "sample_seed":
                            sample_seed,
                            "shuffleseed":
                            shuffle_seed,
                            "col_action":
                            data.col_action,
                            "clf_type":
                            data.clf_type,
                            "dimension_reduction":
                            [reduction_method, n_components],
                        }
                    except AttributeError:
                        clf_name = clf.__class__.__name__
                        results_dict = {
                            "dataset":
                            data.name,
                            "n_splits":
                            n_splits,
                            "test_size":
                            test_size,
                            "n_rows":
                            n_rows,
                            "encoder":
                            encoder,
                            "str_preprocess":
                            str_preprocess,
                            "clf":
                            [classifiers[i], clf_name,
                             clf.get_params()],
                            "ShuffleSplit": [cv.__class__.__name__],
                            "scaler":
                            [scaler.__class__.__name__,
                             scaler.get_params()],
                            "sample_seed":
                            sample_seed,
                            "shuffleseed":
                            shuffle_seed,
                            "col_action":
                            data.col_action,
                            "clf_type":
                            data.clf_type,
                            "dimension_reduction":
                            [reduction_method, n_components],
                        }

                    if verify_if_exists(results_path, results_dict):
                        print("Prediction already exists.\n")
                        continue

                    start = time.time()
                    if type(encoder) is str:
                        column_action = get_column_action(
                            data.col_action,
                            data.xcols,
                            encoder,
                            reduction_method,
                            n_components,
                            data.clf_type,
                        )
                    if type(encoder) is list:
                        column_action = get_column_action(
                            data.col_action,
                            data.xcols,
                            encoder[0],
                            reduction_method,
                            n_components,
                            data.clf_type,
                        )
                    pred = Parallel(n_jobs=n_jobs)(delayed(fit_predict_fold)(
                        data,
                        scaler,
                        column_action,
                        clf,
                        encoder,
                        reduction_method,
                        n_components,
                        fold,
                        cv.n_splits,
                        train_index,
                        test_index,
                    ) for fold, (train_index, test_index) in enumerate(
                        cv.split(data.df, data.df[data.ycol].values)))
                    pred = np.array(pred)
                    results = {
                        "fold": list(pred[:, 0]),
                        "n_train_samples": list(pred[:, 1]),
                        "n_train_features": list(pred[:, 2]),
                        "score": list(pred[:, 3]),
                        "encoding_time": list(pred[:, 4]),
                        "training_time": list(pred[:, 5]),
                    }
                    results_dict["results"] = results

                    # Saving results
                    pc_name = socket.gethostname()
                    now = "".join([
                        c for c in str(datetime.datetime.now()) if c.isdigit()
                    ])
                    filename = "%s_%s_%s_%s_%s.json" % (
                        pc_name,
                        data.name,
                        classifiers[i],
                        encoder,
                        now,
                    )
                    results_file = os.path.join(results_path, filename)
                    results_dict = array2list(results_dict)

                    # patch for nystrom + ridge
                    if clf.__class__.__name__ == "GridSearchCV":
                        if clf.estimator.__class__.__name__ == "Pipeline":
                            results_dict["clf"] = method2str(
                                results_dict["clf"])

                    write_json(results_dict, results_file)
                    print("prediction time: %.1f s." % (time.time() - start))
                    print("Saving results to: %s\n" % results_file)

Exemple #5

0

Afficher le fichier

Fichier : 2_one_linear.py Projet : rian-day/GPR_model

    1,
), (
    0,
    2,
), (
    1,
    2,
)]
train_num_set = [5, 10, 15, 20, 25, 30, 35, 40, 45, 52]

print("方程 = a * x +b*y+ c")
print("训练数据量\t输入维度\ttrain_rmse\ttest_rmse\ttest_平均误差率\t\t模型参数")

for input_dim in input_set:
    for train_num in train_num_set:
        data = Data("../.././data/data.txt", input_dim, train_num)

        #训练指数模型

        xdata = data.train.X
        ydata = data.train.Y[:, 0]

        # 训练数据
        regr = linear_model.LinearRegression()
        regr.fit(xdata, ydata)

        #print('coefficients(b1,b2...):',regr.coef_)
        #print('intercept(b0):',regr.intercept_)


        def func(x, a, b, c):

Exemple #6

0

Afficher le fichier

Fichier : main.py Projet : siddhantchimankar/Personal-Bot

 def __init__(self):
     self.website = Websites()
     self.data = Data()
     self.cmd = ''

Exemple #7

0

Afficher le fichier

    # AUC
    count = 0.0
    for num in AUCs:
        count = count + num
    AUC = count / AUCs.__len__()

    return Recall, AUC


if __name__ == '__main__':
    embedding_size = Const.embedding_size
    drop_ratio = Const.drop_ratio
    epoch = Const.epoch
    batch_size = Const.batch_size

    data = Data()
    h = Helper()
    num_users = data.get_user_size()
    num_items = data.get_item_size()
    shan = SHAN(num_users, num_items, embedding_size, drop_ratio)
    # shan.load_state_dict(torch.load('SHAN2_dict.pkl'))
    # print(shan)

    if torch.cuda.is_available():
        print("using cuda")
        shan.cuda()

    lr_flag = True
    pre_mean_loss = 999
    lr = Const.lr
    for i in range(0, epoch):

Exemple #8

0

Afficher le fichier

    def sorption_level(self):
        if (self._odor == 7 or self._odor == 8 or self._odor == 9):
            return "high"
        if (self._odor == 2 or self._odor == 1 or self._odor == 5):
            return "medium"
        if (self._odor == 6 or self._odor == 3 or self._odor == 4 or self._odor == 10):
            return "low"
    
    def set_total_spikes(self, sum):
        self._total_spikes = sum

    def get_total_spikes(self):
        return self._total_spikes

T1O8 = Trial("T1O8", Data.T1O8())
T2O1 = Trial("T2O1", Data.T2O1())
T3O6 = Trial("T3O6", Data.T3O6())
T4O3 = Trial("T4O3", Data.T4O3())
T5O5 = Trial("T5O5", Data.T5O5())
T6O2 = Trial("T6O2", Data.T6O2())
T7O4 = Trial("T7O4", Data.T7O4())
T8O9 = Trial("T8O9", Data.T8O9())
T9O7 = Trial("T9O7", Data.T9O7())
T10O10 = Trial("T10O10", Data.T10O10())


data = [T1O8.data(), T2O1.data(), T3O6.data(), T4O3.data(), T5O5.data(), T6O2.data(), T7O4.data(), T8O9.data(), T9O7.data(), T10O10.data()]
trial_list = [T1O8, T2O1, T3O6, T4O3, T5O5, T6O2, T7O4, T8O9, T9O7, T10O10]

def trial_matrix(trial_list):

Exemple #9

0

Afficher le fichier

def fit_predict_categorical_encoding(datasets,
                                     str_preprocess,
                                     encoders,
                                     classifiers,
                                     reduction_methods,
                                     n_components,
                                     test_size,
                                     n_splits,
                                     n_jobs,
                                     results_path,
                                     model_path=None,
                                     custom_cv=None):
    '''
    Learning with dirty categorical variables.
    '''
    path = get_data_path()
    results_path = os.path.join(path, results_path)
    model_path = os.path.join(path, model_path)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    for dataset in datasets:
        n_rows = choose_nrows(dataset_name=dataset)
        for encoder in encoders:
            print('Dataset: %s' % dataset)
            data = Data(dataset).get_df()
            data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess)
            special_col = [
                col for col in data.col_action
                if data.col_action[col] == 'Special'
            ][0]
            if type(encoder) is list:
                # special_col = [col for col in data.col_action
                #                if data.col_action[col] == 'Special'][0]
                for i, enc in enumerate(encoder):
                    print(enc)
                    if i == 0:
                        data.col_action[special_col] = 'Special'
                    else:
                        new_col = '%s_%d' % (special_col, i)
                        data.df[new_col] = data.df[special_col].copy()
                        data.col_action[new_col] = enc
                        data.xcols.append(new_col)
            for reduction_method in reduction_methods:
                print('Data shape: %d, %d' % data.df.shape)
                cv = select_cross_val(data.clf_type,
                                      n_splits,
                                      test_size,
                                      custom_cv=custom_cv,
                                      col_name=special_col)
                scaler = select_scaler()

                # Define classifiers
                clfs = instanciate_estimators(data.clf_type,
                                              classifiers,
                                              clf_seed,
                                              y=data.df.loc[:,
                                                            data.ycol].values,
                                              model_path=model_path)

                for i, clf in enumerate(clfs):
                    print(
                        '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {},{}'.format(
                            'Prediction column', data.ycol, 'Task type',
                            str(data.clf_type), 'Classifier', clf, 'Encoder',
                            encoder, 'Dimension reduction', reduction_method,
                            n_components))

                    try:
                        clf_name = clf.estimator.__class__.__name__
                        results_dict = {
                            'dataset':
                            data.name,
                            'n_splits':
                            n_splits,
                            'test_size':
                            test_size,
                            'n_rows':
                            n_rows,
                            'encoder':
                            encoder,
                            'str_preprocess':
                            str_preprocess,
                            'clf': [
                                classifiers[i], clf_name,
                                clf.estimator.get_params()
                            ],
                            'ShuffleSplit': [cv.__class__.__name__],
                            'scaler':
                            [scaler.__class__.__name__,
                             scaler.get_params()],
                            'sample_seed':
                            sample_seed,
                            'shuffleseed':
                            shuffle_seed,
                            'col_action':
                            data.col_action,
                            'clf_type':
                            data.clf_type,
                            'dimension_reduction':
                            [reduction_method, n_components]
                        }
                    except AttributeError:
                        clf_name = clf.__class__.__name__
                        results_dict = {
                            'dataset':
                            data.name,
                            'n_splits':
                            n_splits,
                            'test_size':
                            test_size,
                            'n_rows':
                            n_rows,
                            'encoder':
                            encoder,
                            'str_preprocess':
                            str_preprocess,
                            'clf':
                            [classifiers[i], clf_name,
                             clf.get_params()],
                            'ShuffleSplit': [cv.__class__.__name__],
                            'scaler':
                            [scaler.__class__.__name__,
                             scaler.get_params()],
                            'sample_seed':
                            sample_seed,
                            'shuffleseed':
                            shuffle_seed,
                            'col_action':
                            data.col_action,
                            'clf_type':
                            data.clf_type,
                            'dimension_reduction':
                            [reduction_method, n_components]
                        }

                    if verify_if_exists(results_path, results_dict):
                        print('Prediction already exists.\n')
                        continue

                    start = time.time()
                    if type(encoder) is str:
                        column_action = get_column_action(
                            data.col_action, data.xcols, encoder,
                            reduction_method, n_components, data.clf_type)
                    if type(encoder) is list:
                        column_action = get_column_action(
                            data.col_action, data.xcols, encoder[0],
                            reduction_method, n_components, data.clf_type)
                    pred = Parallel(n_jobs=n_jobs)(
                        delayed(fit_predict_fold)
                        (data, scaler, column_action, clf, encoder,
                         reduction_method, n_components, fold, cv.n_splits,
                         train_index, test_index)
                        for fold, (train_index, test_index) in enumerate(
                            cv.split(data.df, data.df[data.ycol].values)))
                    pred = np.array(pred)
                    results = {
                        'fold': list(pred[:, 0]),
                        'n_train_samples': list(pred[:, 1]),
                        'n_train_features': list(pred[:, 2]),
                        'score': list(pred[:, 3]),
                        'encoding_time': list(pred[:, 4]),
                        'training_time': list(pred[:, 5])
                    }
                    results_dict['results'] = results

                    # Saving results
                    pc_name = socket.gethostname()
                    now = ''.join([
                        c for c in str(datetime.datetime.now()) if c.isdigit()
                    ])
                    filename = (
                        '%s_%s_%s_%s_%s.json' %
                        (pc_name, data.name, classifiers[i], encoder, now))
                    results_file = os.path.join(results_path, filename)
                    results_dict = array2list(results_dict)

                    # patch for nystrom + ridge
                    if clf.__class__.__name__ == 'GridSearchCV':
                        if clf.estimator.__class__.__name__ == 'Pipeline':
                            results_dict['clf'] = method2str(
                                results_dict['clf'])

                    write_json(results_dict, results_file)
                    print('prediction time: %.1f s.' % (time.time() - start))
                    print('Saving results to: %s\n' % results_file)

Exemple #10

0

Afficher le fichier

Fichier : auc.py Projet : castorini/berxit

        if cross:
            cross_point = get_cross_point(pw1, pw2, x, xs[i + 1])
            if y1[i] > y2[i]:
                adv1 += (y1[i] - y2[i]) * (cross_point - x) / 2
                adv2 += (y2[i + 1] - y1[i + 1]) * (xs[i + 1] - cross_point) / 2
            else:
                adv2 += (y2[i] - y1[i]) * (cross_point - x) / 2
                adv1 += (y1[i + 1] - y2[i + 1]) * (xs[i + 1] - cross_point) / 2

    return adv1, adv2


fig, axes = plt.subplots(2, 1)
for ds in datasets:
    print(ds)
    ref_data = Data(model, ds, 'two_stage', testset=testset)
    base_acc = ref_data.layer_acc[-1]

    x1 = [
        x[1]['mean_exit']
        for x in Data(model, ds, rt1, testset=testset).etp_data
    ]
    x2 = [
        x[1]['mean_exit']
        for x in Data(model, ds, rt2, testset=testset).etp_data
    ]
    y1 = [x[1]['acc'] for x in Data(model, ds, rt1, testset=testset).etp_data]
    y2 = [x[1]['acc'] for x in Data(model, ds, rt2, testset=testset).etp_data]
    axes[0].plot(x1, y1, label='JOINT')
    axes[0].plot(x2, y2, label='ALT')

Exemple #11

0

Afficher le fichier

    embeddings = {}
    for i in range(pred.shape[0]):
        embeddings[i] = pred[i]

    return embeddings, weights, time_callback, history


if __name__ == '__main__':

    for name in ['cora', 'citeseer', 'pubmed']:

        edge_path = '%s/%s-edgelist.txt' % (name, name)
        label_path = '%s/%s-label.txt' % (name, name)
        feat_path = '%s/%s-feature.txt' % (name, name)

        data = Data(edge_path, name)

        adj = data.create_adj_from_edgelist(edge_path)
        y_class = data.get_label(label_path)
        y_feat = data.get_feat(feat_path)

        print(adj.shape, y_class.shape, y_feat.shape)

        vis = False

        t1 = time.time()

        for epoch in [10]:

            embeddings, weights, time_callback, history = model(
                adj, y_class, y_feat, 128, epoch)

Exemple #12

0

Afficher le fichier

Fichier : main.py Projet : visitworld123/DGFraud

    #     b+=1
    # return a/b


# def calc_auc(y_true, y_pred):
#     return metrics.roc_auc_score(y_true, y_pred)

if __name__ == '__main__':

    args = parse_args()

    if args.dataset == 'dblp':
        path = "../../dataset/DBLP4057_GAT_with_idx_tra200_val_800.mat"
        save_path = "../HACUD/dblp"

    data_generator = Data(path=path, save_path=save_path)

    X_train = data_generator.X_train
    X_test = data_generator.X_test

    y_train = data_generator.y_train
    y_test = data_generator.y_test

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    config = dict()
    config['n_nodes'] = data_generator.n_nodes
    config['n_metapath'] = data_generator.n_metapath
    config['n_class'] = y_train.shape[1]

    plain_adj, norm_adj, mean_adj = data_generator.get_adj_mat()

Exemple #13

0

Afficher le fichier

import json
import matplotlib.pyplot as plt
from get_data import Data
import operator

words = [
    'tech', 'apple', 'google', 'baidu', 'phone', 'internet', 'router', 'wifi',
    'artificial', 'intelligence', 'machine', 'laptop', 'desktop', 'mac',
    'automation', 'roomba', 'robot', 'microcontroller', 'software', 'hardware',
    'engineering'
]
train_cities = [
    'Toronto', 'San Francisco', 'Boston', 'New York', 'Mexico City'
]
train_scores = [800, 1000, 900, 950, 400]
test, train = Data.get_data(words, train_cities, train_scores)

rng = numpy.random

# Parameters
learning_rate = 0.01
training_epochs = 1000
display_step = 50
train_X = []
train_Y = []
test_X = []
test_Y = []

f = open('cities.txt', 'r')
text = f.readlines()[0]
cities = text[1:-1].replace('\'', '').split(',')

Exemple #14

0

Afficher le fichier

Fichier : show_data.py Projet : castorini/berxit

import sys
from get_data import Data, DistilbertData, RawBertData

model, routine = sys.argv[1:3]

testset = False
if len(sys.argv) > 3 and sys.argv[3] == 'testset':
    testset = True
datasets = ["RTE", "MRPC", "SST-2", "QNLI", "QQP", "MNLI"]

dbdata = DistilbertData()
rbdata = RawBertData(size=model.split('-')[1])

for ds in datasets:
    ref_data = Data(model, ds, 'two_stage', testset=testset)
    data = Data(model, ds, routine, testset=testset)
    col = data.etp_data
    # show data
    print(ds)
    print('etp\tlayer\tRlayer\tacc\tRacc\tdrop')
    # base_acc = rbdata.acc[ds]
    base_acc = ref_data.layer_acc[-1]
    base_layer = rbdata.layers
    if model.startswith('distil'):
        base_layer = 6
        shrink = dbdata.saving
        print('{}\t{:.1f}\t{:.3f}\t{:.2f}\t{:.3f}\t{:.2f}'.format(
            'dev',
            6,
            shrink,
            dbdata.acc[ds],