Python get_data Exemples, data_process.get_data Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : period_analysis.py Projet : FGCUStellarResearch/Gyrochronology

def pass_data(file_num, alg_choice = None):
    """ This function takes the data obtained from the selected menu choices above, and passes them to their respective algorithms.
    
    *** Should be phased out after implementing GUI. ***

    Args:
        file_num (String): The option chosen for number of files selected, either 1, multiple or a test sinusoid is selected. 
        alg_choice (String, optional): Used in the case of creating a test sinusoid. Algorithm is chosen prior to . Defaults to None.
    """    
    if file_num == "1":
        time, detrended_flux, background = data_process.get_data()
        # Change values in columns to float values for later processing.
        time = [float(data) for data in time]
        detrended_flux = [float(data) for data in detrended_flux]
        noise = [float(data) for data in background]
        
        while(True):
            alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Morlet Wavelet \n5 - GPS\n6 - All\n0 - Exit Program\n")

            alg.selection(time, detrended_flux, alg_choice)
    else:
        time, detrended_flux, background = data_process.get_data()
        # Change values in columns to float values for later processing.
        time = [float(data) for data in time]
        detrended_flux = [float(data) for data in detrended_flux]
        noise = [float(data) for data in background]

        alg.selection(time, detrended_flux, alg_choice)

Exemple #2

0

Afficher le fichier

Fichier : train.py Projet : vaecole/SIAT-AUTOS-ITS

 def __init__(self, data_path, epochs=210):
     self.epochs = epochs
     self.gen_optimizer = tf.keras.optimizers.Adam(0.0002, 0.5)
     self.disc_optimizer = tf.keras.optimizers.Adam(0.0002, 0.5)
     self.generator = Generator()
     self.discriminator = Discriminator()
     self.cross_entropy = tf.keras.losses.BinaryCrossentropy(
         from_logits=True)
     self.condition_weekend = np.array([[1, 0]]).repeat(batch_size,
                                                        axis=0)  # weekend
     self.condition_workday = np.array([[0, 1]]).repeat(batch_size,
                                                        axis=0)  # workday
     self.monthly_parking_rate = get_data(data_path)
     self.seed = sample_noise(
         batch_size)  # tf.random.normal([batch_size, 1], 0.5, 0.2)
     self.avg_weekend, self.avg_workday = self.get_average(
         self.monthly_parking_rate)

Exemple #3

0

Afficher le fichier

Fichier : load_model.py Projet : stanpcf/yun-text

def main():
    data = get_data(max_len=FLAGS.max_len)

    cls_name = FLAGS.classifier
    module_name = ".".join(cls_name.split('.')[:-1])
    cls_name = cls_name.split('.')[-1]
    _module = importlib.import_module(module_name)
    cls = _module.__dict__.get(cls_name)

    model = cls(data=data,
                nb_epoch=FLAGS.nb_epoch,
                max_len=FLAGS.max_len,
                embed_size=FLAGS.embed_size,
                batch_size=FLAGS.batch_size,
                optimizer=FLAGS.optimizer,
                use_pretrained=FLAGS.use_pretrained,
                trainable=FLAGS.trainable,
                is_kfold=True,
                kfold=10,
                is_retrain=True)
    model.model_predict_with_weights(FLAGS.kfold_model_path)

Exemple #4

0

Afficher le fichier

Fichier : predict.py Projet : poppy-cloud/5001kaggle

import pandas as pd
from data_process import get_data
import xgboost as xgb
import csv

# load the data
x_train, y_train, x_val = get_data()

# train the model
reg = xgb.XGBRegressor()
reg.fit(x_train,
        y_train,
        eval_set=[(x_train[8000:14007], y_train[8000:14007])])

# prediction
y_pred = reg.predict(x_val)

test_result = []

for i in range(len(x_val)):
    test_result.append(y_pred[i])
print(test_result)

# write the result
result = csv.reader(open('test.csv', 'r'))
result = [i for i in result]
result[0].append('speed')
for i in range(1, len(result)):
    result[i].append(test_result[i - 1])
with open('result.csv', 'w', newline='') as f:
    f_csv = csv.writer(f)

Exemple #5

0

Afficher le fichier

Fichier : rnn_model.py Projet : CampusAmour/nlp-pytorch

SEED = 7
split_ratio = 0.8
SEQ_LENGTH = 256
BATCH_SIZE = 64
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda' if USE_CUDA else 'cpu')
VOCAB_SIZE = 10000
EMBED_DIM = 100
HIDDEN_DIM = 128
OUTPUT_DIM = 2
learning_rate = 1e-4
NUM_EPOCHS = 8
MODEL_PATH = './models/bi_rnn_model.pth'

vocab, train_iterator, valid_iterator, test_iterator = get_data(
    SEQ_LENGTH, SEED, split_ratio, VOCAB_SIZE - 2, BATCH_SIZE, device)


class RNNModel(nn.Module):
    def __init__(self,
                 vocab_size,
                 embed_dim,
                 hidden_dim,
                 output_dim,
                 bidirectional=False):
        super(RNNModel, self).__init__()
        self.bidirectional = bidirectional
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim,
                          hidden_dim,
                          bidirectional=bidirectional,

Exemple #6

0

Afficher le fichier

    dev_file = options.val_data
    test_file = options.test_data
    domain_file = options.domain_file
    domain_test_file = options.domain_test_file

    MAX_SEQUENCE_LENGTH = 20
    MAX_NB_WORDS = 20000
    EMBEDDING_DIM = 300
    batch_size = 256
    nb_classes = 2

    modelFile = options.w2v_model_file  #"../w2v_models/crisis_word_vector.txt"
    emb_model = KeyedVectors.load_word2vec_format(modelFile, binary=False)
    #    emb_model=""
    delim = "\t"
    data, _, _ = data_process.get_data(train_file, delim)
    ul_data, _, _ = data_process.get_data(domain_file, delim)

    data.extend(ul_data)
    print("Number of inst for vocab: " + str(len(data)))

    word_index, tokenizer = data_process.get_tokenizer(data, MAX_NB_WORDS,
                                                       MAX_SEQUENCE_LENGTH)
    train_x, train_y, train_le, train_labels, _, _ = data_process.get_dev_data_with_id(
        train_file, tokenizer, MAX_SEQUENCE_LENGTH, delim)
    dev_x, dev_y, dev_le, dev_labels, _, _ = data_process.get_dev_data_with_id(
        dev_file, tokenizer, MAX_SEQUENCE_LENGTH, delim)
    test_x, test_y, test_le, test_labels, _, _ = data_process.get_dev_data_with_id(
        test_file, tokenizer, MAX_SEQUENCE_LENGTH, delim)
    domain_x, domain_y, _, _, _, _ = data_process.get_dev_data_with_id(
        domain_file, tokenizer, MAX_SEQUENCE_LENGTH, delim)

Exemple #7

0

Afficher le fichier

Fichier : main.py Projet : vaecole/SIAT-AUTOS-ITS

    all_start = datetime.datetime.now()  # 程序开始运行

    # 超参数设定
    sample_size = 96  # 一次性学习的样本大小
    hidden = 100  # LSTM层的神经元个数
    batch_size = 1  # LSTM层的batch_size
    time_step = 96  # LSTM层的timestep
    learn_set = 1700  # 迭代次数列表
    cond_dim = 2  # 条件值
    latent_dim = 1  # latent space 维度
    num_run = 1  # 运行的次数
    num_gen_once = 1  # 单次生成序列
    LR = 0.001  # 学习率
    # 数据处理部分
    data = '../data'  # 读取data文件夹内的数据集，数据集是一个月内的停车数据 每十五分钟取一个点，每天96个点
    sample_set, index, num_seq = get_data(data, sample_size)
    index_list = [i for i in range(num_seq)]

    # 数据生成部分
    g_data_ = []
    for loop in range(num_run):
        begin = datetime.datetime.now()
        g_data = train(sample_set, index, sample_size, learn_set, batch_size,
                       hidden, time_step, num_seq, num_gen_once, LR,
                       latent_dim, cond_dim)
        g_data_.append(g_data)
        end = datetime.datetime.now()
        print('用时: ', end - begin)
        end_end = datetime.datetime.now()
        print('总用时：', end_end - all_start)
    g_data_ = np.array(g_data_)

Exemple #8

0

Afficher le fichier

Fichier : period_analysis.py Projet : FGCUStellarResearch/Gyrochronology

menu = True
while(menu):
        menu_selec = input("Select file option: \n1 - Single file \n2 - Multiple Files \n0 - Exit Program\n\n")

        if(menu_selec == "1"):
            file_path = input("Choose file for period analysis: ")
            File_Management.read_input_file(file_path)
            pass_data(menu_selec)
            menu = False

        elif(menu_selec == "2"):
           files = File_Management.open_dir()
           alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Wavelets \n5 - All\n0 - Exit Program\n")
           for path in files:
               File_Management.read_input_file(path)
               pass_data(menu_selec, alg_choice)
           
        elif(menu_selec == "3"):
            data_process.create_sin()
            alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Wavelets \n5 - All\n0 - Exit Program\n")
            time, detrended_flux, background = data_process.get_data()

            alg.selection(time, detrended_flux, alg_choice)
            
        elif(menu_selec == "0"):
            sys.exit()
        else:
            print("This is not a valid selection.")

Exemple #9

0

Afficher le fichier

Fichier : spider.py Projet : Jackybecomebetter/Shein

    def resolution(self, url):
        c = super(GetProduct, self).parse(url)
        html = c[0]
        status = c[1]
        ProductCheck = tool.tools.ProductCheck(url)
        if ProductCheck == 0:
            if status == 200:
                tree = etree.HTML(html)
                pid = tool.tools.get_id(url)[0]
                cid = tool.tools.get_id(url)[1]
                title = tree.xpath(
                    "//div[@class='goodsd-right col-sm-5']//h4/text()")
                title = "".join(title).strip()
                price = tool.tools.get_price(pid)[0]
                orig_price = tool.tools.get_price(pid)[1]
                description = tree.xpath(
                    "//div[@class='goodsd-right col-sm-5']//div[@class='kv']/div//text()"
                )
                description = [
                    x.strip() for x in description if x != '\n          '
                ]
                description = [x for x in description if x != '']
                description = "\n".join(description)

                size_fits = tree.xpath("//table[@class='kv']")[0]
                del size_fits.attrib['class']
                for size_fit in size_fits:
                    del size_fit.attrib['class']
                    for i in size_fit:
                        # keys(1)
                        del i.attrib['class']
                        i.set('valign', '321')
                        del i.attrib['valign']
                        i.set('colspan', '321')
                        del i.attrib['colspan']
                    # print(size_fit)
                    # list_size.append(size_fit)

                size_fits = etree.tostring(size_fits)
                size_fits = size_fits.decode().replace('\n', '')
                sku = tree.xpath(
                    "//div[@class='summary']/span[@class='sku']/text()")
                sku = "".join(sku).replace(' ',
                                           '').replace('\n',
                                                       '').replace('SKU:', '')
                review = tree.xpath("//div[@class='comments']/a/span/text()")
                review = "".join(review).strip('\n').replace(' ', '')
                sizes = tool.tools.get_size(pid)
                if review == '':
                    review = 0

                img_urls = tree.xpath(
                    "//div[@class='swiper-wrapper']/div/img/@data-src")

                # img_counts = tree.xpath("//div[@class='vertical-wrap']/img/@data-src")
                color_urls = tree.xpath("//div[@class='opt-color']/a/@href")
                color_imgs = tree.xpath("//div[@class='opt-color']/a/@style")
                # 判断是否有相同的id
                ProductCheckSku = tool.tools.ProductCheck(sku)
                if ProductCheckSku == 0:
                    # 判断是否有多个颜色
                    this_url = url
                    if color_urls:
                        color_urls = color_urls[1:]
                        for color_url in color_urls:
                            color_url = color_url + ''
                            # color_url = str(color_url.encode('UTF-8'))
                            self.son_resolution(color_url, pid)
                    data_process.get_data(pid, cid, this_url, title, price,
                                          orig_price, description, size_fits,
                                          sku, review, img_urls, color_urls,
                                          color_imgs, sizes)

        else:
            print("-----Parent-url-repetition-----")

Exemple #10

0

Afficher le fichier

Fichier : run_baseline.py Projet : diealecel/deepconvo

    X = MaxPooling3D((2, 2, 2), strides = (2, 2, 2))(X)

    X = Conv3D(2, (1, 1, 1), strides = (2, 2, 2), name = 'conv2', kernel_initializer = glorot_uniform(seed = 0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D((1, 2, 2), strides = (2, 2, 2))(X)

    # Output layer.
    X = Flatten()(X)
    X = Dense(classes, activation = 'softmax', name = 'fc' + str(classes), kernel_initializer = glorot_uniform(seed = 0))(X)

    # Create model.
    model = Model(inputs = X_input, outputs = X, name = '3Dlipreader')

    return model


if __name__ == '__main__':
    setup()

    print 'Gathering data...'
    x_train, y_train, x_test, y_test = get_data(DATASET_PATH, TRAIN_SPLIT, NUM_FRAMES_PER_TENSOR, 'rgb')

    model = get_model_from_architecture(input_shape = INPUT_DIM, classes = 2)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    model.fit(x_train, y_train, epochs = NUM_EPOCHS, batch_size = BATCH_SZ)

    predictions = model.evaluate(x_test, y_test)
    print "Loss = " + str(predictions[0])
    print "Test accuracy = " + str(predictions[1])

Exemple #11

0

Afficher le fichier

    data['体检日期'] = (pd.to_datetime(data['体检日期']) - parse('2017-10-09')).dt.days

    # data.fillna(data.median(axis=0), inplace=True)
    data.dropna(inplace=True)
    scaler_columns = [i for i in data.columns if i != 'id' and i != '血糖']
    scaler = MinMaxScaler()
    data[scaler_columns] = scaler.fit_transform(data[scaler_columns])
    train_feat = data[data.id.isin(train_id)]
    test_feat = data[data.id.isin(test_id)]
    train_feat = train_feat.drop(['id'], axis=1)
    test_feat = test_feat.drop(['id'], axis=1)
    return train_feat, test_feat


# train_feat, test_feat = make_feat(train, test)
train_feat, test_feat = get_data(data_path)
# train_feat['血糖'] = np.log(train_feat['血糖'])

predictors = [f for f in test_feat.columns if f not in ['血糖']]


def evalerror(pred, df):
    label = df.get_label().values.copy()
    score = mean_squared_error(label, pred) * 0.5
    return ('0.5mse', score, False)


print('开始训练...')
params = {
    'learning_rate': 0.01,
    'boosting_type': 'gbdt',

Exemple #12

0

Afficher le fichier

def data_op(file_num=None, alg_choice=None):
    """ This function takes in a file/s and an algorithm, and passes the given file data to the chosen algorithm.
    Note: each of these parameters are optional in the event that the user does not select a choice from either of their
    respective ComboBoxes, however if either is left as None this function will exit itself.
    Args:
        file_num (String): the amount of files chosen by the user. Either single or multiple files, or a test sinusoid.
        alg_choice (String): the user's chosen algorithm.
    """

    # Maps the algorithm choices to numbers for compatibility with algorithms.py
    alg_dict = {
        'Time Series': '1',
        'Lomb-Scargle': '2',
        'Autocorrelation': '3',
        'Wavelets': '4',
        'GPS': '5',
        'All': '6'
    }

    # Prevents program from crashing in the event that the user doesn't select properly.
    if file_num is None or file_num == "Select" or alg_choice is None or alg_choice == "Select":
        tk.messagebox.showinfo(
            "Error", "Please select both a file/folder and an algorithm")

    elif file_num == "Single File":

        # Prevents program from crashing in the event that the user closes the file selection window.
        if not files:
            tk.messagebox.showinfo("Error", "Error: No Files Selected")
            return

        # Also prevents program from crashing in the event that the user closes the file selection window.
        elif files[0] == "" or files[0] is None:
            tk.messagebox.showinfo("Error", "Error: No Files Selected")
            return

        else:
            print(files[0])
            File_Management.read_input_file(files[0])

        time, detrended_flux, background = data_process.get_data()
        time = [float(data) for data in time]
        detrended_flux = [float(data) for data in detrended_flux]
        noise = [float(data) for data in background]

        alg_choice = alg_dict[alg_choice]

        alg.selection(time, detrended_flux, alg_choice)

    elif file_num == "Multiple Files":

        # Iterates through the files in the selected folder and passes each one through the chosen algorithm.
        # One potential issue with this is if the user intends to pass files through different algorithms.
        for path in files:
            # Prevents program from crashing in the event that the user chooses a folder containing bad file types.
            if not (path.endswith('.csv') or path.endswith('.fits')):
                continue

            File_Management.read_input_file(path)

            time, detrended_flux, background = data_process.get_data()
            time = [float(data) for data in time]
            detrended_flux = [float(data) for data in detrended_flux]
            noise = [float(data) for data in background]

            alg_new = alg_dict[alg_choice]
            alg.selection(time, detrended_flux, alg_new)

    # This option is not currently functional when used in sequence with a .csv file.
    elif file_num == "Test Sinusoid":
        data_process.create_sin()
        time, detrended_flux, background = data_process.get_data()
        time = [float(data) for data in time]
        detrended_flux = [float(data) for data in detrended_flux]
        noise = [float(data) for data in background]

        alg_choice = alg_dict[alg_choice]
        alg.selection(time, detrended_flux, alg_choice)

Exemple #13

0

Afficher le fichier

Fichier : solution.py Projet : AC-81/user-performance

#!/usr/bin/python

import sklearn  #importing the basic library required

from sklearn.preprocessing import MinMaxScaler

import sys
import numpy as np

sys.path.append("../tools/")
from data_process import get_data

user_data1, user_id1, problem_data1, train_submission1 = get_data()
'''
		we have the following things with us now
		
		user_data1 : the performance of a particular user is given and some features like his/her level , problems solved etc are 			present
		submission_count , problem_solved , contribution  , follower_count  , max_rating , rating ,rank 
		
		user_id1 : it has only user id's in it and the data related to its corresponding columns in user_data
		
		problem_data : it has the description of a particular problem , i.e its id(int) and the difficulty.
		
		train_submission1 : it has 4 columns which are (all are ints)
					1) user_id 
					2) problem_id
					3) attempts_range
					4) difficulty on a scale of 1 to 14 (both included)