Python ReadData 예제들, ReadData.ReadData Python 예제들

예제 #1

0

파일 보기

def getInputData(batchsize):
    readdata = ReadData()
    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batchsize)
    example_batch = tf.reshape(features, [-1])
    item = tf.string_split(example_batch, delimiter="").values.eval()
    return [dict1[alp.decode().lower()] for alp in list(item)]

예제 #2

0

파일 보기

파일: Insert2DB.py 프로젝트: wondywang/okstock

	def write2DB(self):
		from ReadData import ReadData

		#rd = ReadData(self.morning)

		ymd = str(self.date_time.year)+"-"+str(self.date_time.month)+"-"+str(self.date_time.day)+"-"+str(self.date_time.weekday())
		if self.morning==True:
			ymd = "m"+ymd
		file_bak = "../daily/" + ymd + ".txt"

		rd = ReadData(self.morning)
		rd.gbk2utf8("深沪A股.TXT", file_bak)
		f = open(file_bak)
		
		try:
			con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8')
			cur = con.cursor()

			Regex1 = re.compile(r"\d")
			while True:
				line = f.readline()
				if not line:
					break
				if not re.search(Regex1, line):
					print line
					continue

				data = rd.splitItem(line)
				for w in data: print w,
				print ""

				create_time = self.date_time.strftime('%Y-%m-%d %H:%M:%S')
				sql = "INSERT INTO " + self.table_name + \
					"(idx, name, rise_rate, cur_price, rise_price, buy,\
					sale, total_stock, cur_stock, rise_v, exchange, day_begin,\
					up, down, yesterday, profit, total_price, quantity_ratio,\
					industry, area, amplitude, ave_price, inner_market,\
					outer_market, inner_outer_ratio, buy_volume, sale_volume,\
					currency_capital, market_cap, create_time)\
					VALUES( \'" + data[0] +"\',\'"+ data[1] +"\',"+ data[2] +","\
					+ data[3] +","+ data[4] +","+ data[5] +","+ data[6] +","\
					+ data[7] +","+ data[8] +","+ data[9] +","+ data[10] +","\
					+ data[11] +","+ data[12] +","+ data[13] +","+ data[14] +","\
					+ data[15] +","+ data[16] +","+ data[17] +",\'"+ data[18] +"\',\'"\
					+ data[19] +"\'," + data[20] +","+ data[21] +"," + data[22] +","\
					+ data[23] +","+ data[24] +","+ data[25] +","+ data[26] +","\
					+ data[27] +","+ data[28] +",\'"+ create_time + "\')"
				print sql
				cur.execute(sql)

		except mdb.Error, e:
			print "Error %d: %s" % (e.args[0], e.args[1])
			sys.exit(1)

예제 #3

0

파일 보기

파일: DenoiseByEMD.py 프로젝트: yangboyubyron/LSTM_EMD_PCA_EURUSD

def DenoiseByEMD():
    """
    This function denoises the original signal by removing n levels of noises.
    """
    globalData = ReadData()
    removeLevel = Settings.denoiseLevel
    filename = './Cashe/globalData_EMD_{}.pickle'.format(removeLevel)
    # if the denoising is not used (removeLevel = 0)
    if removeLevel < 1:
        globalDataEMD = globalData
        with open(filename, 'wb') as f:
            pickle.dump(globalDataEMD, f)
        return globalDataEMD
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            globalDataEMD = pickle.load(f)
        return globalDataEMD
    # Imfs
    globalDataImfs = GetImfs(globalData)
    globalDataEMD = {}
    for key in globalData:
        df = globalData[key]
        dfImfs = globalDataImfs[key]
        NLevels = dfImfs.shape[1]
        tsReons = dfImfs[range(Settings.denoiseLevel, NLevels)].sum(axis=1)
        tsReons.name = df.columns[0]
        globalDataEMD[key] =  pd.DataFrame(tsReons)
    # Save the data to the filename as a pickle file
    with open(filename, 'wb') as f:
        pickle.dump(globalDataEMD, f)
    return globalDataEMD

예제 #4

0

파일 보기

파일: FaceLivenessCNNInception.py 프로젝트: Anastasiia01/SimpleFaceLiveness

def main(args):
    #---set up path for training and test data (NUAA face liveness dataset)--------------
    model_name = args.model
    learning_rate = args.lr
    epoch = args.epoch
    with open(path) as file:
        print("Reading from json ... ")
        data = json.load(file)[model_name]
        accuracy = data['accuracy']
        model_file = data['file']
    print("Reading input from the NUAA dataset ... ")
    readd = ReadData()
    clientdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ClientNormalized/'
    imposterdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ImposterNormalized/'
    client_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_train_normalized.txt'
    imposter_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_train_normalized.txt'
    
    client_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_test_normalized.txt'
    imposter_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_test_normalized.txt'

    #---------------read training, test data----------------
    train_images, train_labels = readd.read_data(clientdir, imposterdir, client_train_normaized_file, imposter_train_normaized_file)
    test_images, test_labels = readd.read_data(clientdir, imposterdir, client_test_normaized_file, imposter_test_normaized_file)


    for i in range(0,1):

        #--pick one of the following models for face liveness detection---
        if model_name =='CNN':
            print("Selected CNN")
            cnn = CNNModel()  # simple CNN model for face liveness detection---
        else:
            print("Selected Inception")
            cnn = InceptionV4Model()  #Inception model for liveness detection

        if args.resume:
            print("Resuming from the best model")
            model = cnn.load_model(model_file)#to use pretrained model
        else:
            print("Starting from scratch by creating a new model")
            model = cnn.create_model(learning_rate)  # create and train a new model   
        print("Starting training ...")
        model = cnn.train_model(model, train_images,train_labels,test_images,test_labels, epoch, accuracy, model_file, model_name)
      
        test_loss, test_acc = cnn.evaluate(model, test_images,  test_labels)
        print('iteration = ' + str(i) + ' ---------------------------------------------========')
    print("**************************************Done***************************************")

예제 #5

0

파일 보기

파일: Clusterer.py 프로젝트: sabinamanafli/K-Means

    def __init__(self):
        self.data = ReadData().please_read_data()

        self.cluster1 = [
        ]  #define cluster array so we can cluster elements to them
        self.cluster2 = []
        self.cluster3 = []
        self.centroid1 = []  #define centroid array so we can update them
        self.centroid2 = []
        self.centroid3 = []

예제 #6

0

파일 보기

파일: NiceLinReg.py 프로젝트: WinstonXu/LinearRegression

        '''Runs training until cost values converge to within some interval'''
        val = self.linreg(learning_rate, ind, dep)
        old_val = 0
        #Can change this variable to decide how much convergence is wanted
        while np.absolute(val-old_val) > 1:
            old_val = val
            val = self.linreg(learning_rate, ind, dep)
        self.getTheta()

    def getTheta(self):
        '''Prints out Value for current weight and bias variables'''
        print "Weight     Bias"
        print self.weight, self.bias

if __name__ == '__main__':
    #command line to run this properly
    #python NiceLinReg.py data.csv [2,3] 1
    np.random.seed(42)
    loader = ReadData()
    loader.load(sys.argv[1], sys.argv[2], int(sys.argv[3]))
    print "Temp Only"
    tempOnly = NiceLinReg()
    dailyTemp = loader.getInd(0)
    DOJIA = loader.getDep()
    tempOnly.train(.000005, dailyTemp, DOJIA)

    print "\nDiff in Temp and avg highest recorded temp"
    diff = NiceLinReg()
    diffList = loader.diff(0,1)
    diff.train(0.000000000049, diffList, DOJIA)

예제 #7

0

파일 보기

    def get_HvM(self, ):

        # Read Meta
        Meta = ReadMeta(neuralfeaturesdir)
        DF_img = Meta.get_DF_img()
        DF_neu = Meta.get_DF_neu()
        times = Meta.get_times()

        # Read Neural data
        Data = ReadData(datadir, DF_neu)
        IT, V4 = Data.get_data()

        D = Mapping.get_Neu_trial_V36(IT[1:], [70, 170], times)
        image_indices = np.random.randint(low=0, high=D.shape[1], size=ni)
        D = D[:, image_indices, :]
        D = np.swapaxes(D, 0, 1)
        nf = D.shape[1]
        nt = D.shape[2]

        mu = np.zeros((self.nf, self.ni))
        sd = np.zeros((self.nf, self.ni))
        for f in range(self.nf):
            for i in range(self.ni):
                mu[f, i] = D[i, f, :].mean()
                sd[f, i] = D[i, f, :].std()
        hf = h5py.File(resultdir + 'HvM_stats.h5', 'w')
        hf.create_dataset('mu', data=mu)
        hf.create_dataset('sd', data=sd)
        hf.close()

        # #test synthetic as HvM
        # nf = 168
        # nt = 46
        # noise_dist = 'poisson'
        # sds = np.logspace(-1, 1, num=int(nf))
        # D = np.zeros((ni, nf, nt))
        # D_mean = np.random.rand(ni, nf)
        # for tr in range(nt):
        #     D[:, :, tr] = D_mean
        #
        # noise1 = np.zeros((ni, nf, int(nt * splitfract)))
        # noise2 = np.zeros((ni, nf, int(nt * splitfract)))
        # for i in range(ni):
        #     if noise_dist == 'normal':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #     elif noise_dist == 'poisson':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #
        #     D[:, :, :int(nt * splitfract)] = D[:, :, :int(nt * splitfract)] + noise1
        #     D[:, :, int(nt * splitfract):] = D[:, :, int(nt * splitfract):] + noise2

        # to test  HvM as syntheic
        # hf = h5py.File(resultdir+'D.h5', 'w')
        # hf.create_dataset('D', data=D)
        # hf.close()

        sds = []
        Collinearity = 'HvM'
        noise_dist = 'HvM'

        return D

예제 #8

0

파일 보기

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May  5 01:20:32 2018

@author: computer
"""

from ReadData import ReadData
from catboost import Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
import gc 

data = ReadData()

data.X_train.drop(data.X_train.index[:int( 1e+8 )], inplace=True)
data.y_train.drop(data.y_train.index[:int( 1e+8 )], inplace=True)



X_train, X_valid, y_train, y_valid = train_test_split(data.X_train, data.y_train, test_size=0.1)

cat_features = X_train.columns.get_indexer_for(data.CATEGORICAL_FEATURES)

del data; gc.collect()

train_pool = Pool(X_train, y_train, cat_features=cat_features)

del X_train; del y_train; gc.collect()

예제 #9

0

파일 보기

파일: gan.py 프로젝트: tzthink/DCGAN

            5, 1, stride=2, activation_fn=tf.nn.sigmoid)).tensor


def get_generator_loss(D2):
    '''Loss for the genetor. Maximize probability of generating images that
    discrimator cannot differentiate.

    Returns:
        see the paper
    '''
    return tf.reduce_mean(
        tf.nn.relu(D2) - D2 + tf.log(1.0 + tf.exp(-tf.abs(D2))))


if __name__ == "__main__":
    rd = ReadData()

    input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 32 * 32])

    with pt.defaults_scope(activation_fn=tf.nn.elu,
                           batch_normalize=True,
                           learned_moments_update_rate=0.0003,
                           variance_epsilon=0.001,
                           scale_after_normalization=True):
        with tf.variable_scope("model"):
            D1 = discriminator(input_tensor)  # positive examples
            D_params_num = len(tf.trainable_variables())
            G = generator()

        with tf.variable_scope("model", reuse=True):
            D2 = discriminator(G)  # generated examples

예제 #10

0

파일 보기

파일: VAE.py 프로젝트: tzthink/VariationalAutoencoder

from ReadData import ReadData
import tensorflow as tf
import os.path
import numpy as np

rd = ReadData()

input_dim = 1024  # 32 * 32
hidden_encoder_dim = 400
hidden_decoder_dim = 400
latent_dim = 20
lam = 0


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.001)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0., shape=shape)
    return tf.Variable(initial)


# input
x = tf.placeholder("float", shape=[None, input_dim])  #  input_dim=32*32
l2_loss = tf.constant(0.0)  # l2_loss is a number

# hidden W, b
W_encoder_input_hidden = weight_variable([input_dim,
                                          hidden_encoder_dim])  # [1024, 400]

예제 #11

0

파일 보기

파일: evaluate.py 프로젝트: mayukhpsm/Multimodal-Cyberbullying-Identification

    inputs = [(args.no_comments, 512), (30, 1536)]
    model = RecurrentCNN(no_filters=hidden_size, no_classes=args.no_classes)
elif args.model == 'bilstm_rcnn':
    inputs = [(args.no_comments, 512), (30, 1536)]
    model = BiLSTMRecurrentCNN(hidden_size, no_classes=args.no_classes)

model = model.build(inputs)
model.load_weights(args.weights)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

reader = ReadData(dataset=args.dataset,
                  text_embedding_path=args.text_embedding,
                  video_feature_path=args.video_features,
                  data_shape=inputs,
                  train_val_split=1.)

results = []
labels = []

prog_bar = tqdm(total=int(reader.val_size / args.batch_size))

num_batches = int(reader.val_size / args.batch_size)

i = 0

for x, y in reader.generator_val(batch_size=args.batch_size):
    label = list(y)
    result = list(model.predict(x))

예제 #12

0

파일 보기

from GA import GA
from ReadData import ReadData
from utils import *

data = ReadData("easy_01_tsp.txt")
params = {'popSize': 100, 'noGen': 100}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()
bestFitness = 0
bestDist = 0
bestChromoOverallRepres = None
for g in range(ga.getParam()['noGen']):
    ga.oneGenerationElitism()
    # ga.oneGeneration()
    # ga.oneGenerationSteadyState()
    bestChromo = ga.bestChromosome()
    if bestChromo.fitness > bestFitness:
        bestChromoOverallRepres = bestChromo.repres
        bestFitness = bestChromo.fitness
        bestDist = str(dist(bestChromo.repres, ga.getProblParam()))
    print('Best solution in generation ' + str(g) + ' is: ' +str(bestChromo.repres) + ' fitness = ' + str(bestChromo.fitness) + ' dist: ' + str(dist(bestChromo.repres,ga.getProblParam())))
print("\n")
print('Best solution overall is: ' +  str(bestChromoOverallRepres) + ' fitness = ' + str(bestFitness) + ' dist: ' + str(bestDist))

예제 #13

0

파일 보기

__version__ = "0.0.1"
__maintainer__ = "Agniv Sen"
__email__ = "*****@*****.**"
__status__ = "Protoyping"

"""



# ****************************************************
# This is the entry point of this entire project. 
# For someone who wants to understand the code flow, please start from this point
# ****************************************************


rd = ReadData();    #Initializing File Reader Class



featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,2))
featureMapProj = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,param.PARTICLE_COUNT,3))
featureStore = np.zeros((param.FEATURE_SIZE))
world = np.zeros((param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z));

stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE)
cameraState = np.zeros((stateVectorSize))

# Variables for archiving position vector, quaternion and features
_x = []
_y = []
_z = []

예제 #14

0

파일 보기

파일: main.py 프로젝트: AlexandraSuciu26/proiectePython

'''
Created on 5 apr. 2020

@author: Alexandraah
'''
from GA import GA
from ReadData import ReadData
from utils import *
import matplotlib.pyplot as plt


data = ReadData("C:\\@Alexandra\\anul2\\semestrul2\\ai\\lab\\laborator4\\berlin.txt")
params = {'popSize': 500, 'noGen': 1000}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()

res=[]
res1=[]
for i in range(params['noGen']):
    #ga.oneGeneration()
    ga.oneGenerationElitism()
    #ga.oneGenerationSteadyState()
    best = ga.bestChromosome()
    fitnesses = [c.fitness for c in ga.population]
    avgFitness = sum(fitnesses) / len(fitnesses)
    res.append(avgFitness)
    for c in ga.population:
        res1.append(c.fitness)
        print("Fiteness:"+str(c.fitness)+"\n")
    print('Generation: ' + str(i) + '\nBest chromosome: ' + str(best.repres) + '\nLocal best fitness: ' + str(best.fitness)

예제 #15

0

파일 보기

파일: Driver.py 프로젝트: lukassos/LibMonoSLAM

__credits__ = []
__license__ = "GNU GPL"
__version__ = "0.0.1"
__maintainer__ = "Agniv Sen"
__email__ = "*****@*****.**"
__status__ = "Protoyping"

"""

# ****************************************************
# This is the entry point of this entire project.
# For someone who wants to understand the code flow, please start from this point
# ****************************************************

__name__ = '__main__'
rd = ReadData()
#Initializing File Reader Class

featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES, 2))
featureMapProj = np.zeros(
    (param.MAX_OBSERVATION, param.MAX_FEATURES, param.PARTICLE_COUNT, 3))
featureStore = np.zeros((param.FEATURE_SIZE))
world = np.zeros(
    (param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z))

stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE +
                   param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE +
                   param.ANGULAR_VELOCITY_VECTOR_SIZE)
cameraState = np.zeros((stateVectorSize))

# Variables for archiving position vector, quaternion and features

예제 #16

0

파일 보기

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
############
from ReadData import ReadData
from SHE import SHE
#####Parameters of experimental device#########
global pixel_width  #pixel width,unit:nm
pixel_width = 320e3
lam_x = 0.124  # X-ray wavelength, unit:nm
dis_s2d = 5300e6  #sample-to-detector distance, unit:nm
############constant definition################
pi = math.pi
###############################################
init_data = ReadData("data/1.4/50k-132-1.401286.asc")
px_min = 122
px_max = 362
pz_min = 166
pz_max = 326
cen_px = (px_max + px_min) / 2
cen_pz = (pz_max + pz_min) / 2
I = init_data[pz_min:(pz_max + 1), px_min:(px_max + 1)]
plt.imshow(I[:, 40:200])
plt.axis('off')
plt.colorbar()
#EI = np.sqrt(I)
#I_max = 0.05*np.max(I)
#########################
#x = pixel_width*np.arange(px_min-cen_px,px_max-cen_px+1)
#z = pixel_width*np.arange(pz_min-cen_pz,pz_max-cen_pz+1)

예제 #17

0

파일 보기

파일: main.py 프로젝트: FrankMartinem/Algorithm

    H = np.dot(np.linalg.pinv(R), P).T
    x_est = np.dot(H, y)
    err = x - x_est
    g = lambda x: np.dot(H, x)
    R = np.dot(err, err.T) / l
    return g, R


if os.name == 'posix':
    trainPath = os.path.abspath('.') + '/Data/train.txt'
    testPath = os.path.abspath('.') + '/Data/test.txt'
elif os.name == 'nt':
    trainPath = os.path.abspath('.') + '\\Data\\train.txt'
    testPath = os.path.abspath('.') + '\\Data\\test.txt'

TrainX, TrainY = ReadData(trainPath, 'train')
# TestX, TestY = ReadData(testPath, 'test')

# two types of Test DataSet
TrainX = TrainX[:, 0:-500]
TrainY = TrainY[:, 0:-500]
TestX = TrainX[:, -500:]
TestY = TrainY[:, -500:]

xTrainDim, TrainLen = np.shape(TrainX)
print(str(xTrainDim) + " " + str(TrainLen))
yTrainDim, TrainLen = np.shape(TrainY)
xTestDim, TestLen = np.shape(TestX)
print(str(yTrainDim) + " " + str(TestLen))

# kalman filter

예제 #18

0

파일 보기

    default='nmt_logs')
parser.add_argument(
    '--inference',
    action="store_true",
    help='Whether to run inference or simply train the network')
parser.add_argument('--pretrained_path', help='Path to Pre-trained Weights')

args = parser.parse_args()

assert args.dataset.endswith('csv'), "Dataset File needs to be in CSV format"
assert 0. <= args.train_val_split < 1., "Train-vs-Validation Split need to be between [0, 1)"

latent_dim = args.latent_dim

# Reading and Preparing Training/Validation Dataset
reader = ReadData(args.dataset, args.train_val_split, args.language_1,
                  args.language_2)
(X_train, y_train), (X_val, y_val) = reader.prep_data()
train_samples = len(X_train)
val_samples = len(X_val)
num_encoder_tokens = reader.num_encoder_tokens
num_decoder_tokens = reader.num_decoder_tokens

# Loading Embedding Matrix
lang1_embedding = Word2Vec.load(args.lang1_embedding)
lang1_tok = Tokenizer()
lang1_tok.fit_on_texts(reader.language_1_text)

encoder_embedding_matrix = np.zeros((num_encoder_tokens, latent_dim))
for word, i in lang1_tok.word_index.items():
    try:
        embedding_vector = lang1_embedding[word]

예제 #19

0

파일 보기

파일: Result_Error_CV.py 프로젝트: hughwilson44/COS424_hw2

# ----------
# import original data
# ----------
Datapath = '../../methylation_imputation/data/'
DataSample_full = Datapath + 'intersected_final_chr1_cutoff_20_sample_full.bed'
DataSample_partial = Datapath + 'intersected_final_chr1_cutoff_20_sample_partial.bed'
DataTrain = Datapath + 'intersected_final_chr1_cutoff_20_train.bed'

# Training data
# use either one of the following three:
# DataTrain = ReadData(DataTrain)
# DataTrain = pd.read_csv('../result/Train_NaN_Meaned', sep = '\t')
DataTrain = pd.read_csv('../result/Train_NaN_Meaned_without_2627', sep = '\t')

# Sample data
DataSample_full = ReadData(DataSample_full)
DataSample_partial = ReadData(DataSample_partial)


# ----------
# Format Data so they are indexed by start position
# ----------
DataTrain.set_index('start', drop=False, inplace=True, verify_integrity=True)
DataSample_full.set_index('start', drop=False, inplace=True, verify_integrity=True)
DataSample_partial.set_index('start', drop=False, inplace=True, verify_integrity=True)


# ----------
# Read the Imputation result
# ----------
Resultpath = '../result/raw/'

예제 #20

0

파일 보기

파일: InsertDB.py 프로젝트: wondywang/okstock

from ReadData import ReadData
import codecs

today = datetime.now()
ym = str(today.year)+str(today.month)
ymd = str(today.year)+"-"+str(today.month)+"-"+str(today.day)

table_name = "stock" + ym

try:
	con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8')
	cur = con.cursor()
	createTable(cur, ym)
	
	rd = ReadData()
	rd.gbktoutf8("深沪A股.TXT")

	file_name = ymd + "." + "txt"
	f = open(file_name)
	title = f.readline()	#read the title
	print title
	while True:
		line = f.readline()
		print line
		if not line:	#EOF
			break
		if not re.search(r"\d", line):
			continue
		data = rd.splitItem(line)
		for w in data:

예제 #21

0

파일 보기

    if args.model.endswith('lstm'):
        model = ConvLSTMModel1(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    elif args.model.endswith('deep'):
        model = ConvLSTMModel2(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    else:
        model = ConvModel(classes)

reader = ReadData(args.training_csv,
                  args.embedding,
                  args.classes,
                  batch_size=args.batch_size,
                  no_samples=args.no_samples,
                  train_val_split=args.train_val_split)

print('Reading Validation data.')
val_x, val_y = reader.read_all_val()
if args.model.startswith('cnn'):
    val_x = np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1))

with tf.name_scope('Model'):
    prediction = model.model(x)

with tf.name_scope('Loss'):
    crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
                                                          labels=y)
    cost_func = (tf.reduce_mean(crossent)) / args.batch_size

예제 #22

0

파일 보기

파일: train.py 프로젝트: jatinmandav/News-Article-Classification

                  optimizer=optimizer,
                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
model.summary()

if args.check_build:
    exit()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=False)

print('Reading Validation Data ..')
val_x, val_y = reader.read_val()

train_generator = reader.generator()

log_dir = args.model

예제 #23

0

파일 보기

def mainFunc():
    filepath = 'D:/Code/readfile/data.csv'
    d = ReadData(filepath)
    print(d.data.keys())

예제 #24

0

파일 보기

파일: K_line_plot.py 프로젝트: ChengJiaGen/K-Line

        ax1.spines['bottom'].set_color("#5998ff")
        ax1.spines['top'].set_color("#5998ff")
        ax1.spines['left'].set_color("#5998ff")
        ax1.spines['right'].set_color("#5998ff")
        #设置y轴刻度值的颜色
        ax1.tick_params(axis='y', colors='w')
        plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper'))
        ax1.tick_params(axis='x', colors='w')
        plt.ylabel('Stock price and Volume')
        plt.rcParams['font.sans-serif'] = ['SimHei']
        plt.suptitle("股票代码：{}".format(ts_code), color='w', fontsize=40)
        plt.show()

        print(stock_data)


if __name__ == "__main__":
    #股票代码
    ts_code = "000001.SZ"
    #对比结果
    result = {
        'start_time': '20160412',
        'end_time': '20160707',
        'pearson_index': 0.945269566803306
    }
    read_data = ReadData()
    source_data = read_data.mysql_read_date(ts_code, result["start_time"],
                                            result["end_time"])
    k_plot = KLinePlot()
    k_plot.plot_k_line(source_data, ts_code)

예제 #25

0

파일 보기

파일: assignment09.py 프로젝트: ishquark13/machineLearning

import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from math import sqrt

from ReadData import ReadData
from ExtractSamples import ExtractSamples
from ColorModels import ColorModels
from CrossValidation import CrossValidation
from RunKMeans import RunKMeans
from RunEM_GMM import RunEM_GMM
from RunCommands import RunFCM, RunPCM
from sklearn.metrics import confusion_matrix, roc_curve
from scipy.stats import multivariate_normal

data_train, labels, locations = ReadData()  # Load all data
Data, ObjLabels = ExtractSamples(data_train, labels,
                                 locations)  # extract objects
plt.close("all")  # close all image plots
Data = Data / 255  #Normalize pixel values to be between 0 and 1
Data_HSV, Data_YIQ, Data_HLS = ColorModels(
    Data, ObjLabels)  # Transform RGB to different color spaces

DTrain, DVal, labelsTrain, labelsVal = CrossValidation(
    Data, ObjLabels, 0.8, 'RGB')  #80% of data for training and 20% for testing
DTrain_HSV, DVal_HSV, labelsTrain_HSV, labelsVal_HSV = CrossValidation(
    Data_HSV, ObjLabels, 0.8,
    'HSV')  #80% of data for training and 20% for testing
DTrain_YIQ, DVal_YIQ, labelsTrain_YIQ, labelsVal_YIQ = CrossValidation(
    Data_YIQ, ObjLabels, 0.8,
    'YIQ')  #80% of data for training and 20% for testing

예제 #26

0

파일 보기

# logits = drnn(X,W,b)
# predict = tf.nn.softmax(logits)
# Y = predict
# # Cost & Optimizer
# loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=Y))
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learningrate)
# train_op = optimizer.minimize(loss_op)

# # build accuracy
# correct_pred = tf.equal(tf.argmax(predict,1),tf.argmax(Y,1))
# accuracy = tf.reduce_mean(correct_pred,tf.float32)

# Initiate Global variable
init = tf.global_variables_initializer()

readdata = ReadData()
trainingFiles, testingFiles = readdata.filePathConstructor()
features = readdata.input_pipeline(trainingFiles, batch_size)

# Start training
with tf.Session() as sess:
    # init session
    sess.run(init)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    # loop training steps
    for step in range(training_steps):
        # read input data
        example_batch = tf.reshape(features, [-1])

예제 #27

0

파일 보기

파일: k_line_demo.py 프로젝트: ChengJiaGen/K-Line

                similar_value = 0.5 * self.calc_pearson(
                    mul_open, atom_open) + 0.5 * self.calc_pearson(
                        mul_close, atom_close)
                if (result["pearson_index"] < similar_value):
                    result = {
                        "start_time": temp_compare[0]["trade_date"],
                        "end_time": temp_compare[-1]["trade_date"],
                        "pearson_index": similar_value
                    }
        # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S")
        # print("Calc Cost: {}".format(str((end_time - start_time).seconds)))
        return result


if __name__ == "__main__":
    read_data = ReadData()
    #设置比较的周期
    num = 60
    #选取需要查找的最后num天数的数据
    source_data = read_data.mysql_read_data("002936.SZ").iloc[-num:]
    if len(source_data) < num:
        num = len(source_data)
    #选取某一支对比的股票并除去最后的num天数据
    compare_data = read_data.mysql_read_data("000001.SZ").iloc[:-num]
    #实例化类，输入的格式为Dataframe
    compare = CompareSimilarKDynamic(source_data, compare_data, num)
    #返回对比的皮尔逊系数最高的一组数据
    result = compare.compare_dynamic()
    print(result)

예제 #28

0

파일 보기

def main():
    readdata = ReadData()

    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batch_size)

    with tf.Session() as sess:
        # Create the graph, etc.
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        dict1 = {
            value: (int(key) + 1)
            for key, value in enumerate(list(string.ascii_lowercase))
        }
        dict1[' '] = 0
        dict1[';'] = -1
        dict1['-'] = -1
        vocab_size = len(dict1)
        for i in range(1):
            example_batch = tf.reshape(features, [-1])
            item = tf.string_split(example_batch, delimiter="").values.eval()
            chars = [dict1[alp.decode().lower()] for alp in list(item)]
            data_size = len(chars)
            print('Data has %d characters, %d unique.' %
                  (data_size, vocab_size))

            # # Hyper-parameters
            # hidden_size   = 100  # hidden layer's size
            # seq_length    = 25   # number of steps to unroll
            # learning_rate = 1e-1

            # inputs     = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="inputs")
            # targets    = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")
            # init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

            # intializer = tf.random_normal_initializer(stddev=1.0)

            # with tf.variable_scope("RNN") as scope:
            #     hs_t = init_state
            #     ys = []
            #     for t,xs_t in enumerate(tf.split(inputs,seq_length,axis=0)):
            #         if t > 0:scope.reuse_variables()
            #         Wxh = tf.get_variable("Wxh",shape=[vocab_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Whh = tf.get_variable("Whh",shape=[hidden_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Why = tf.get_variable("Why",shape=[hidden_size,vocab_size],dtype=tf.float32,intializer=initializer)
            #         bh = tf.get_variable("bh",shape=[hidden_size],intializer=intializer)
            #         by = tf.get_variable("by",shape=[vocab_size],initializer=intializer)

            #         hs_t = tf.tanh(tf.matmul(xs_t,Wxh) + tf.matmul(hs_t,Whh) + bh)
            #         ys_t = tf.matmul(hs_t,Why) + by
            #         ys.append(ys_t)

            # h_prev = hs_t

            # output_softmax = tf.nn.softmax(ys[-1])

            # outputs = tf.concat(ys,axis=0)
            # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs))

            # #optimizer
            # minimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            # grad_and_vars = minimizer.compute_gradients(loss)

            # pred = RNN(chars,weights,biases)
            # # Loss and optimizer
            # # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
            # # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

            # # # Model evaluation
            # # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
            # # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            # # print(example_batch)

        coord.request_stop()
        coord.join(threads)

예제 #29

0

파일 보기

파일: BootStrap.py 프로젝트: biubiutang/LARA-1

                    self.assignAspect(sentence)
            self.populateAspectWordMat()
            changed=self.calcChiSq()
        self.corpus.aspectSentences.clear()
        for review in self.corpus.allReviews:
            for sentence in review.sentences:
                self.assignAspect(sentence)
        print(self.corpus.aspectKeywords)
    
    # Saves the object into the given file
    def saveToFile(self,fileName,obj):
        with open(modelDataDir+fileName,'w') as fp:
            json.dump(obj,fp)
            fp.close()
            
rd = ReadData()
rd.readAspectSeedWords()
rd.readStopWords()
rd.readReviewsFromJson()
rd.removeLessFreqWords()
bootstrapObj = BootStrap(rd)
bootstrapObj.bootStrap()
bootstrapObj.populateLists()
bootstrapObj.saveToFile("wList.json",bootstrapObj.wList)
bootstrapObj.saveToFile("ratingsList.json",bootstrapObj.ratingsList)
bootstrapObj.saveToFile("reviewIdList.json",bootstrapObj.reviewIdList)
bootstrapObj.saveToFile("vocab.json",list(bootstrapObj.corpus.wordFreq.keys()))
bootstrapObj.saveToFile("aspectKeywords.json",bootstrapObj.corpus.aspectKeywords)


# In[ ]:

예제 #30

0

파일 보기

파일: Similarkviolent.py 프로젝트: ChengJiaGen/K-Line

                atom_close = self.dynamic(atom_close,"close",index)
                similar_value = 0.5 * self.calc_pearson(mul_open,atom_open) + 0.5 * self.calc_pearson(mul_close,atom_close)
                if (result["pearson_index"] < similar_value):
                    result = {
                        "start_time": temp_compare[0]["trade_date"],
                        "end_time": temp_compare[-1]["trade_date"],
                        "pearson_index": similar_value
                    }
        # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S")
        # print("Calc Cost: {}".format(str((end_time - start_time).seconds)))
        return result


if __name__ =='__main__':
    read_data = ReadData()
    ts_code_list = read_data.mysql_read_ts_code()[:]
    print("----------------Complete ts_code reading--------------------:{}")
    start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print("--------Start cal----------:{0}".format(start_time))
    results_dict = {}
    source_data = read_data.mysql_read_data("000009.SZ").iloc[-60:]
    for ts_code in ts_code_list:
        #print(ts_code)
        compare_data = read_data.mysql_read_data(ts_code).iloc[:-60]
        if len(compare_data) < 60:
            print("该股票数据不足")
        else:
            compare = CompareSimilarKDynamic(source_data, compare_data, 60)
            result = compare.compare_dynamic()
            results_dict[ts_code] = result

예제 #31

0

파일 보기

파일: Model - bkup.py 프로젝트: sidtandon2014/KaggleProblems

             #                                              ,test_size=test_size)
            #--------DROP ID column from train and test
            #if ISTRAIN == 1:
            tmpModel,df = trainingAlgo(X_train,y_train,X_test,y_test)
            model = tmpModel
            """
            _,acc,rocScore = models.evaluateModel(X_test,y_test,tmpModel)
            if roc < rocScore:
                roc = rocScore
                model = tmpModel
                print("Accurachy %f, ROC Score %f" % (acc,roc))
            """
        return model,df

#------Get feature set and create classes   
readData = ReadData(".","HomeCredit","sa","Pass@123")        
models = Models()

featureSet = readData.getData("dbo.FeatureSet")

featureSet = models.convertCategoricalVaribalesToOneHotEncoding(featureSet)
featureSet = models.addFeatures(featureSet)

train = featureSet[featureSet["TARGET"] != -1]
test = featureSet[featureSet["TARGET"] == -1]

test_ids = test["SK_ID_CURR"]

test.drop(["TARGET","SK_ID_CURR"],axis = 1,inplace = True)
train.drop(["SK_ID_CURR"],axis = 1,inplace = True)
train["TARGET"] = train["TARGET"].astype("category")

예제 #32

0

파일 보기

                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

model.load_weights(args.weights)

print('Model Loaded from {}.'.format(args.weights))

model.summary()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=False)

test_data = pd.read_excel(args.dataset, sheet_name=None)['Sheet1']
test_data = test_data.sample(frac=1.0).reset_index(drop=True)
test_data = test_data.head(int(len(test_data) * args.size))

print(test_data.columns)

assert len(test_data.columns) > 1, "Labels of Test set not available."

예제 #33

0

파일 보기

파일: run.py 프로젝트: alejmest/CS6375

def main(args):
    # judge input arguments length
    if len(args) != 6:
        print('Should Have Six Input Arguments')
        exit(0)

    # input parameters
    L = int(args[0])
    K = int(args[1])
    training_set_file_name = args[2]
    validation_set_file_name = args[3]
    test_set_file_name = args[4]
    to_print = True if args[5].lower() == 'yes' else False

    path = './' + DATA_DIRECTORY + '/'

    # read data from training set, test set, and validation set
    rd = ReadData()
    labels, training_set = rd.createDataSet(path + training_set_file_name)
    labels, validation_set = rd.createDataSet(path + validation_set_file_name)
    labels, test_set = rd.createDataSet(path + test_set_file_name)

    # build tree
    dt = DecisionTree()

    info_gain_tree_root = dt.buildDT(training_set, labels.copy(),
                                     'information_gain')
    pruned_info_gain_tree_root = dt.pruneTree(info_gain_tree_root, L, K,
                                              validation_set, labels)

    variance_impurity_tree_root = dt.buildDT(training_set, labels.copy(),
                                             'variance_impurity')
    pruned_variance_impurity_tree_root = dt.pruneTree(
        variance_impurity_tree_root, L, K, validation_set, labels)

    print()
    info_accuracy = dt.calAccuracy(test_set, info_gain_tree_root, labels)
    print('Accuracy of decision tree constructed using information gain: %s' %
          info_accuracy)
    variance_accuracy = dt.calAccuracy(test_set, variance_impurity_tree_root,
                                       labels)
    print('Accuracy of decision tree constructed using variance impurity: %s' %
          variance_accuracy)

    prune_info_accuracy = dt.calAccuracy(test_set, pruned_info_gain_tree_root,
                                         labels)
    print(
        'Accuracy of pruned decision tree constructed using information gain: %s'
        % prune_info_accuracy)

    pruned_variance_accuracy = dt.calAccuracy(
        test_set, pruned_variance_impurity_tree_root, labels)
    print(
        'Accuracy of pruned decision tree constructed using variance impurity: %s'
        % pruned_variance_accuracy)

    if (to_print):
        print()
        print('Build Decision Tree By Using Information Gain')
        info_gain_tree_root.printTree()

        print()

        print()
        print('Build Decision Tree By Using Variance Impurity')
        variance_impurity_tree_root.printTree()
        print()