Beispiel #1
0
def getInputData(batchsize):
    readdata = ReadData()
    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batchsize)
    example_batch = tf.reshape(features, [-1])
    item = tf.string_split(example_batch, delimiter="").values.eval()
    return [dict1[alp.decode().lower()] for alp in list(item)]
def DenoiseByEMD():
    """
    This function denoises the original signal by removing n levels of noises.
    """
    globalData = ReadData()
    removeLevel = Settings.denoiseLevel
    filename = './Cashe/globalData_EMD_{}.pickle'.format(removeLevel)
    # if the denoising is not used (removeLevel = 0)
    if removeLevel < 1:
        globalDataEMD = globalData
        with open(filename, 'wb') as f:
            pickle.dump(globalDataEMD, f)
        return globalDataEMD
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            globalDataEMD = pickle.load(f)
        return globalDataEMD
    # Imfs
    globalDataImfs = GetImfs(globalData)
    globalDataEMD = {}
    for key in globalData:
        df = globalData[key]
        dfImfs = globalDataImfs[key]
        NLevels = dfImfs.shape[1]
        tsReons = dfImfs[range(Settings.denoiseLevel, NLevels)].sum(axis=1)
        tsReons.name = df.columns[0]
        globalDataEMD[key] =  pd.DataFrame(tsReons)
    # Save the data to the filename as a pickle file
    with open(filename, 'wb') as f:
        pickle.dump(globalDataEMD, f)
    return globalDataEMD
Beispiel #3
0
    def __init__(self):
        self.data = ReadData().please_read_data()

        self.cluster1 = [
        ]  #define cluster array so we can cluster elements to them
        self.cluster2 = []
        self.cluster3 = []
        self.centroid1 = []  #define centroid array so we can update them
        self.centroid2 = []
        self.centroid3 = []
def main(args):
    #---set up path for training and test data (NUAA face liveness dataset)--------------
    model_name = args.model
    learning_rate = args.lr
    epoch = args.epoch
    with open(path) as file:
        print("Reading from json ... ")
        data = json.load(file)[model_name]
        accuracy = data['accuracy']
        model_file = data['file']
    print("Reading input from the NUAA dataset ... ")
    readd = ReadData()
    clientdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ClientNormalized/'
    imposterdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ImposterNormalized/'
    client_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_train_normalized.txt'
    imposter_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_train_normalized.txt'
    
    client_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_test_normalized.txt'
    imposter_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_test_normalized.txt'

    #---------------read training, test data----------------
    train_images, train_labels = readd.read_data(clientdir, imposterdir, client_train_normaized_file, imposter_train_normaized_file)
    test_images, test_labels = readd.read_data(clientdir, imposterdir, client_test_normaized_file, imposter_test_normaized_file)


    for i in range(0,1):

        #--pick one of the following models for face liveness detection---
        if model_name =='CNN':
            print("Selected CNN")
            cnn = CNNModel()  # simple CNN model for face liveness detection---
        else:
            print("Selected Inception")
            cnn = InceptionV4Model()  #Inception model for liveness detection

        if args.resume:
            print("Resuming from the best model")
            model = cnn.load_model(model_file)#to use pretrained model
        else:
            print("Starting from scratch by creating a new model")
            model = cnn.create_model(learning_rate)  # create and train a new model   
        print("Starting training ...")
        model = cnn.train_model(model, train_images,train_labels,test_images,test_labels, epoch, accuracy, model_file, model_name)
      
        test_loss, test_acc = cnn.evaluate(model, test_images,  test_labels)
        print('iteration = ' + str(i) + ' ---------------------------------------------========')
    print("**************************************Done***************************************")
                  optimizer=optimizer,
                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
model.summary()

if args.check_build:
    exit()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=args.dataset,
                      embedding_config=embedding,
                      data_shape=inputs,
                      train_val_split=args.train_val_split,
                      sentence_pair=False)

print('Reading Validation Data ..')
val_x, val_y = reader.read_val()

train_generator = reader.generator()

log_dir = args.model
    inputs = [(args.no_comments, 512), (30, 1536)]
    model = RecurrentCNN(no_filters=hidden_size, no_classes=args.no_classes)
elif args.model == 'bilstm_rcnn':
    inputs = [(args.no_comments, 512), (30, 1536)]
    model = BiLSTMRecurrentCNN(hidden_size, no_classes=args.no_classes)

model = model.build(inputs)
model.load_weights(args.weights)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

reader = ReadData(dataset=args.dataset,
                  text_embedding_path=args.text_embedding,
                  video_feature_path=args.video_features,
                  data_shape=inputs,
                  train_val_split=1.)

results = []
labels = []

prog_bar = tqdm(total=int(reader.val_size / args.batch_size))

num_batches = int(reader.val_size / args.batch_size)

i = 0

for x, y in reader.generator_val(batch_size=args.batch_size):
    label = list(y)
    result = list(model.predict(x))
Beispiel #7
0
__version__ = "0.0.1"
__maintainer__ = "Agniv Sen"
__email__ = "*****@*****.**"
__status__ = "Protoyping"

"""



# ****************************************************
# This is the entry point of this entire project. 
# For someone who wants to understand the code flow, please start from this point
# ****************************************************


rd = ReadData();    #Initializing File Reader Class



featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,2))
featureMapProj = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,param.PARTICLE_COUNT,3))
featureStore = np.zeros((param.FEATURE_SIZE))
world = np.zeros((param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z));

stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE)
cameraState = np.zeros((stateVectorSize))

# Variables for archiving position vector, quaternion and features
_x = []
_y = []
_z = []
        '''Runs training until cost values converge to within some interval'''
        val = self.linreg(learning_rate, ind, dep)
        old_val = 0
        #Can change this variable to decide how much convergence is wanted
        while np.absolute(val-old_val) > 1:
            old_val = val
            val = self.linreg(learning_rate, ind, dep)
        self.getTheta()

    def getTheta(self):
        '''Prints out Value for current weight and bias variables'''
        print "Weight     Bias"
        print self.weight, self.bias

if __name__ == '__main__':
    #command line to run this properly
    #python NiceLinReg.py data.csv [2,3] 1
    np.random.seed(42)
    loader = ReadData()
    loader.load(sys.argv[1], sys.argv[2], int(sys.argv[3]))
    print "Temp Only"
    tempOnly = NiceLinReg()
    dailyTemp = loader.getInd(0)
    DOJIA = loader.getDep()
    tempOnly.train(.000005, dailyTemp, DOJIA)

    print "\nDiff in Temp and avg highest recorded temp"
    diff = NiceLinReg()
    diffList = loader.diff(0,1)
    diff.train(0.000000000049, diffList, DOJIA)
Beispiel #9
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat May  5 01:20:32 2018

@author: computer
"""

from ReadData import ReadData
from catboost import Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
import gc 

data = ReadData()

data.X_train.drop(data.X_train.index[:int( 1e+8 )], inplace=True)
data.y_train.drop(data.y_train.index[:int( 1e+8 )], inplace=True)



X_train, X_valid, y_train, y_valid = train_test_split(data.X_train, data.y_train, test_size=0.1)

cat_features = X_train.columns.get_indexer_for(data.CATEGORICAL_FEATURES)

del data; gc.collect()

train_pool = Pool(X_train, y_train, cat_features=cat_features)

del X_train; del y_train; gc.collect()
Beispiel #10
0
def mainFunc():
    filepath = 'D:/Code/readfile/data.csv'
    d = ReadData(filepath)
    print(d.data.keys())
Beispiel #11
0
    if args.model.endswith('lstm'):
        model = ConvLSTMModel1(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    elif args.model.endswith('deep'):
        model = ConvLSTMModel2(hidden_states,
                               classes,
                               attention_size=attention_size,
                               use_attention=args.use_attention)
    else:
        model = ConvModel(classes)

reader = ReadData(args.training_csv,
                  args.embedding,
                  args.classes,
                  batch_size=args.batch_size,
                  no_samples=args.no_samples,
                  train_val_split=args.train_val_split)

print('Reading Validation data.')
val_x, val_y = reader.read_all_val()
if args.model.startswith('cnn'):
    val_x = np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1))

with tf.name_scope('Model'):
    prediction = model.model(x)

with tf.name_scope('Loss'):
    crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,
                                                          labels=y)
    cost_func = (tf.reduce_mean(crossent)) / args.batch_size
Beispiel #12
0
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
############
from ReadData import ReadData
from SHE import SHE
#####Parameters of experimental device#########
global pixel_width  #pixel width,unit:nm
pixel_width = 320e3
lam_x = 0.124  # X-ray wavelength, unit:nm
dis_s2d = 5300e6  #sample-to-detector distance, unit:nm
############constant definition################
pi = math.pi
###############################################
init_data = ReadData("data/1.4/50k-132-1.401286.asc")
px_min = 122
px_max = 362
pz_min = 166
pz_max = 326
cen_px = (px_max + px_min) / 2
cen_pz = (pz_max + pz_min) / 2
I = init_data[pz_min:(pz_max + 1), px_min:(px_max + 1)]
plt.imshow(I[:, 40:200])
plt.axis('off')
plt.colorbar()
#EI = np.sqrt(I)
#I_max = 0.05*np.max(I)
#########################
#x = pixel_width*np.arange(px_min-cen_px,px_max-cen_px+1)
#z = pixel_width*np.arange(pz_min-cen_pz,pz_max-cen_pz+1)
Beispiel #13
0
    H = np.dot(np.linalg.pinv(R), P).T
    x_est = np.dot(H, y)
    err = x - x_est
    g = lambda x: np.dot(H, x)
    R = np.dot(err, err.T) / l
    return g, R


if os.name == 'posix':
    trainPath = os.path.abspath('.') + '/Data/train.txt'
    testPath = os.path.abspath('.') + '/Data/test.txt'
elif os.name == 'nt':
    trainPath = os.path.abspath('.') + '\\Data\\train.txt'
    testPath = os.path.abspath('.') + '\\Data\\test.txt'

TrainX, TrainY = ReadData(trainPath, 'train')
# TestX, TestY = ReadData(testPath, 'test')

# two types of Test DataSet
TrainX = TrainX[:, 0:-500]
TrainY = TrainY[:, 0:-500]
TestX = TrainX[:, -500:]
TestY = TrainY[:, -500:]

xTrainDim, TrainLen = np.shape(TrainX)
print(str(xTrainDim) + " " + str(TrainLen))
yTrainDim, TrainLen = np.shape(TrainY)
xTestDim, TestLen = np.shape(TestX)
print(str(yTrainDim) + " " + str(TestLen))

# kalman filter
Beispiel #14
0
    def get_HvM(self, ):

        # Read Meta
        Meta = ReadMeta(neuralfeaturesdir)
        DF_img = Meta.get_DF_img()
        DF_neu = Meta.get_DF_neu()
        times = Meta.get_times()

        # Read Neural data
        Data = ReadData(datadir, DF_neu)
        IT, V4 = Data.get_data()

        D = Mapping.get_Neu_trial_V36(IT[1:], [70, 170], times)
        image_indices = np.random.randint(low=0, high=D.shape[1], size=ni)
        D = D[:, image_indices, :]
        D = np.swapaxes(D, 0, 1)
        nf = D.shape[1]
        nt = D.shape[2]

        mu = np.zeros((self.nf, self.ni))
        sd = np.zeros((self.nf, self.ni))
        for f in range(self.nf):
            for i in range(self.ni):
                mu[f, i] = D[i, f, :].mean()
                sd[f, i] = D[i, f, :].std()
        hf = h5py.File(resultdir + 'HvM_stats.h5', 'w')
        hf.create_dataset('mu', data=mu)
        hf.create_dataset('sd', data=sd)
        hf.close()

        # #test synthetic as HvM
        # nf = 168
        # nt = 46
        # noise_dist = 'poisson'
        # sds = np.logspace(-1, 1, num=int(nf))
        # D = np.zeros((ni, nf, nt))
        # D_mean = np.random.rand(ni, nf)
        # for tr in range(nt):
        #     D[:, :, tr] = D_mean
        #
        # noise1 = np.zeros((ni, nf, int(nt * splitfract)))
        # noise2 = np.zeros((ni, nf, int(nt * splitfract)))
        # for i in range(ni):
        #     if noise_dist == 'normal':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds])
        #     elif noise_dist == 'poisson':
        #         n = np.random.rand()
        #         noise1[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #         noise2[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds])
        #
        #     D[:, :, :int(nt * splitfract)] = D[:, :, :int(nt * splitfract)] + noise1
        #     D[:, :, int(nt * splitfract):] = D[:, :, int(nt * splitfract):] + noise2

        # to test  HvM as syntheic
        # hf = h5py.File(resultdir+'D.h5', 'w')
        # hf.create_dataset('D', data=D)
        # hf.close()

        sds = []
        Collinearity = 'HvM'
        noise_dist = 'HvM'

        return D
Beispiel #15
0
from GA import GA
from ReadData import ReadData
from utils import *

data = ReadData("easy_01_tsp.txt")
params = {'popSize': 100, 'noGen': 100}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()
bestFitness = 0
bestDist = 0
bestChromoOverallRepres = None
for g in range(ga.getParam()['noGen']):
    ga.oneGenerationElitism()
    # ga.oneGeneration()
    # ga.oneGenerationSteadyState()
    bestChromo = ga.bestChromosome()
    if bestChromo.fitness > bestFitness:
        bestChromoOverallRepres = bestChromo.repres
        bestFitness = bestChromo.fitness
        bestDist = str(dist(bestChromo.repres, ga.getProblParam()))
    print('Best solution in generation ' + str(g) + ' is: ' +str(bestChromo.repres) + ' fitness = ' + str(bestChromo.fitness) + ' dist: ' + str(dist(bestChromo.repres,ga.getProblParam())))
print("\n")
print('Best solution overall is: ' +  str(bestChromoOverallRepres) + ' fitness = ' + str(bestFitness) + ' dist: ' + str(bestDist))

import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from math import sqrt

from ReadData import ReadData
from ExtractSamples import ExtractSamples
from ColorModels import ColorModels
from CrossValidation import CrossValidation
from RunKMeans import RunKMeans
from RunEM_GMM import RunEM_GMM
from RunCommands import RunFCM, RunPCM
from sklearn.metrics import confusion_matrix, roc_curve
from scipy.stats import multivariate_normal

data_train, labels, locations = ReadData()  # Load all data
Data, ObjLabels = ExtractSamples(data_train, labels,
                                 locations)  # extract objects
plt.close("all")  # close all image plots
Data = Data / 255  #Normalize pixel values to be between 0 and 1
Data_HSV, Data_YIQ, Data_HLS = ColorModels(
    Data, ObjLabels)  # Transform RGB to different color spaces

DTrain, DVal, labelsTrain, labelsVal = CrossValidation(
    Data, ObjLabels, 0.8, 'RGB')  #80% of data for training and 20% for testing
DTrain_HSV, DVal_HSV, labelsTrain_HSV, labelsVal_HSV = CrossValidation(
    Data_HSV, ObjLabels, 0.8,
    'HSV')  #80% of data for training and 20% for testing
DTrain_YIQ, DVal_YIQ, labelsTrain_YIQ, labelsVal_YIQ = CrossValidation(
    Data_YIQ, ObjLabels, 0.8,
    'YIQ')  #80% of data for training and 20% for testing
Beispiel #17
0
            5, 1, stride=2, activation_fn=tf.nn.sigmoid)).tensor


def get_generator_loss(D2):
    '''Loss for the genetor. Maximize probability of generating images that
    discrimator cannot differentiate.

    Returns:
        see the paper
    '''
    return tf.reduce_mean(
        tf.nn.relu(D2) - D2 + tf.log(1.0 + tf.exp(-tf.abs(D2))))


if __name__ == "__main__":
    rd = ReadData()

    input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 32 * 32])

    with pt.defaults_scope(activation_fn=tf.nn.elu,
                           batch_normalize=True,
                           learned_moments_update_rate=0.0003,
                           variance_epsilon=0.001,
                           scale_after_normalization=True):
        with tf.variable_scope("model"):
            D1 = discriminator(input_tensor)  # positive examples
            D_params_num = len(tf.trainable_variables())
            G = generator()

        with tf.variable_scope("model", reuse=True):
            D2 = discriminator(G)  # generated examples
             #                                              ,test_size=test_size)
            #--------DROP ID column from train and test
            #if ISTRAIN == 1:
            tmpModel,df = trainingAlgo(X_train,y_train,X_test,y_test)
            model = tmpModel
            """
            _,acc,rocScore = models.evaluateModel(X_test,y_test,tmpModel)
            if roc < rocScore:
                roc = rocScore
                model = tmpModel
                print("Accurachy %f, ROC Score %f" % (acc,roc))
            """
        return model,df

#------Get feature set and create classes   
readData = ReadData(".","HomeCredit","sa","Pass@123")        
models = Models()

featureSet = readData.getData("dbo.FeatureSet")

featureSet = models.convertCategoricalVaribalesToOneHotEncoding(featureSet)
featureSet = models.addFeatures(featureSet)

train = featureSet[featureSet["TARGET"] != -1]
test = featureSet[featureSet["TARGET"] == -1]

test_ids = test["SK_ID_CURR"]

test.drop(["TARGET","SK_ID_CURR"],axis = 1,inplace = True)
train.drop(["SK_ID_CURR"],axis = 1,inplace = True)
train["TARGET"] = train["TARGET"].astype("category")
Beispiel #19
0
def main():
    readdata = ReadData()

    trainingFiles, testingFiles = readdata.filePathConstructor()
    features = readdata.input_pipeline(trainingFiles, batch_size)

    with tf.Session() as sess:
        # Create the graph, etc.
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        # Start populating the filename queue.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        dict1 = {
            value: (int(key) + 1)
            for key, value in enumerate(list(string.ascii_lowercase))
        }
        dict1[' '] = 0
        dict1[';'] = -1
        dict1['-'] = -1
        vocab_size = len(dict1)
        for i in range(1):
            example_batch = tf.reshape(features, [-1])
            item = tf.string_split(example_batch, delimiter="").values.eval()
            chars = [dict1[alp.decode().lower()] for alp in list(item)]
            data_size = len(chars)
            print('Data has %d characters, %d unique.' %
                  (data_size, vocab_size))

            # # Hyper-parameters
            # hidden_size   = 100  # hidden layer's size
            # seq_length    = 25   # number of steps to unroll
            # learning_rate = 1e-1

            # inputs     = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="inputs")
            # targets    = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")
            # init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")

            # intializer = tf.random_normal_initializer(stddev=1.0)

            # with tf.variable_scope("RNN") as scope:
            #     hs_t = init_state
            #     ys = []
            #     for t,xs_t in enumerate(tf.split(inputs,seq_length,axis=0)):
            #         if t > 0:scope.reuse_variables()
            #         Wxh = tf.get_variable("Wxh",shape=[vocab_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Whh = tf.get_variable("Whh",shape=[hidden_size,hidden_size],dtype=tf.float32,intializer=intializer)
            #         Why = tf.get_variable("Why",shape=[hidden_size,vocab_size],dtype=tf.float32,intializer=initializer)
            #         bh = tf.get_variable("bh",shape=[hidden_size],intializer=intializer)
            #         by = tf.get_variable("by",shape=[vocab_size],initializer=intializer)

            #         hs_t = tf.tanh(tf.matmul(xs_t,Wxh) + tf.matmul(hs_t,Whh) + bh)
            #         ys_t = tf.matmul(hs_t,Why) + by
            #         ys.append(ys_t)

            # h_prev = hs_t

            # output_softmax = tf.nn.softmax(ys[-1])

            # outputs = tf.concat(ys,axis=0)
            # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs))

            # #optimizer
            # minimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            # grad_and_vars = minimizer.compute_gradients(loss)

            # pred = RNN(chars,weights,biases)
            # # Loss and optimizer
            # # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
            # # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

            # # # Model evaluation
            # # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
            # # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            # # print(example_batch)

        coord.request_stop()
        coord.join(threads)
Beispiel #20
0
'''
Created on 5 apr. 2020

@author: Alexandraah
'''
from GA import GA
from ReadData import ReadData
from utils import *
import matplotlib.pyplot as plt


data = ReadData("C:\\@Alexandra\\anul2\\semestrul2\\ai\\lab\\laborator4\\berlin.txt")
params = {'popSize': 500, 'noGen': 1000}
ga = GA(params, data.problParams)
ga.initialisation()
ga.evaluation()

res=[]
res1=[]
for i in range(params['noGen']):
    #ga.oneGeneration()
    ga.oneGenerationElitism()
    #ga.oneGenerationSteadyState()
    best = ga.bestChromosome()
    fitnesses = [c.fitness for c in ga.population]
    avgFitness = sum(fitnesses) / len(fitnesses)
    res.append(avgFitness)
    for c in ga.population:
        res1.append(c.fitness)
        print("Fiteness:"+str(c.fitness)+"\n")
    print('Generation: ' + str(i) + '\nBest chromosome: ' + str(best.repres) + '\nLocal best fitness: ' + str(best.fitness)
Beispiel #21
0
                atom_close = self.dynamic(atom_close,"close",index)
                similar_value = 0.5 * self.calc_pearson(mul_open,atom_open) + 0.5 * self.calc_pearson(mul_close,atom_close)
                if (result["pearson_index"] < similar_value):
                    result = {
                        "start_time": temp_compare[0]["trade_date"],
                        "end_time": temp_compare[-1]["trade_date"],
                        "pearson_index": similar_value
                    }
        # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S")
        # print("Calc Cost: {}".format(str((end_time - start_time).seconds)))
        return result


if __name__ =='__main__':
    read_data = ReadData()
    ts_code_list = read_data.mysql_read_ts_code()[:]
    print("----------------Complete ts_code reading--------------------:{}")
    start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print("--------Start cal----------:{0}".format(start_time))
    results_dict = {}
    source_data = read_data.mysql_read_data("000009.SZ").iloc[-60:]
    for ts_code in ts_code_list:
        #print(ts_code)
        compare_data = read_data.mysql_read_data(ts_code).iloc[:-60]
        if len(compare_data) < 60:
            print("该股票数据不足")
        else:
            compare = CompareSimilarKDynamic(source_data, compare_data, 60)
            result = compare.compare_dynamic()
            results_dict[ts_code] = result
Beispiel #22
0
    default='nmt_logs')
parser.add_argument(
    '--inference',
    action="store_true",
    help='Whether to run inference or simply train the network')
parser.add_argument('--pretrained_path', help='Path to Pre-trained Weights')

args = parser.parse_args()

assert args.dataset.endswith('csv'), "Dataset File needs to be in CSV format"
assert 0. <= args.train_val_split < 1., "Train-vs-Validation Split need to be between [0, 1)"

latent_dim = args.latent_dim

# Reading and Preparing Training/Validation Dataset
reader = ReadData(args.dataset, args.train_val_split, args.language_1,
                  args.language_2)
(X_train, y_train), (X_val, y_val) = reader.prep_data()
train_samples = len(X_train)
val_samples = len(X_val)
num_encoder_tokens = reader.num_encoder_tokens
num_decoder_tokens = reader.num_decoder_tokens

# Loading Embedding Matrix
lang1_embedding = Word2Vec.load(args.lang1_embedding)
lang1_tok = Tokenizer()
lang1_tok.fit_on_texts(reader.language_1_text)

encoder_embedding_matrix = np.zeros((num_encoder_tokens, latent_dim))
for word, i in lang1_tok.word_index.items():
    try:
        embedding_vector = lang1_embedding[word]
Beispiel #23
0
                  metrics=['accuracy'])
else:
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

model.load_weights(args.weights)

print('Model Loaded from {}.'.format(args.weights))

model.summary()

embedding = {'type': args.embedding_type, 'path': args.embedding_path}
if args.model == 'sentence_pair':
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=True)
else:
    reader = ReadData(path_file=None,
                      embedding_config=embedding,
                      data_shape=inputs,
                      sentence_pair=False)

test_data = pd.read_excel(args.dataset, sheet_name=None)['Sheet1']
test_data = test_data.sample(frac=1.0).reset_index(drop=True)
test_data = test_data.head(int(len(test_data) * args.size))

print(test_data.columns)

assert len(test_data.columns) > 1, "Labels of Test set not available."
Beispiel #24
0
def main(args):
    # judge input arguments length
    if len(args) != 6:
        print('Should Have Six Input Arguments')
        exit(0)

    # input parameters
    L = int(args[0])
    K = int(args[1])
    training_set_file_name = args[2]
    validation_set_file_name = args[3]
    test_set_file_name = args[4]
    to_print = True if args[5].lower() == 'yes' else False

    path = './' + DATA_DIRECTORY + '/'

    # read data from training set, test set, and validation set
    rd = ReadData()
    labels, training_set = rd.createDataSet(path + training_set_file_name)
    labels, validation_set = rd.createDataSet(path + validation_set_file_name)
    labels, test_set = rd.createDataSet(path + test_set_file_name)

    # build tree
    dt = DecisionTree()

    info_gain_tree_root = dt.buildDT(training_set, labels.copy(),
                                     'information_gain')
    pruned_info_gain_tree_root = dt.pruneTree(info_gain_tree_root, L, K,
                                              validation_set, labels)

    variance_impurity_tree_root = dt.buildDT(training_set, labels.copy(),
                                             'variance_impurity')
    pruned_variance_impurity_tree_root = dt.pruneTree(
        variance_impurity_tree_root, L, K, validation_set, labels)

    print()
    info_accuracy = dt.calAccuracy(test_set, info_gain_tree_root, labels)
    print('Accuracy of decision tree constructed using information gain: %s' %
          info_accuracy)
    variance_accuracy = dt.calAccuracy(test_set, variance_impurity_tree_root,
                                       labels)
    print('Accuracy of decision tree constructed using variance impurity: %s' %
          variance_accuracy)

    prune_info_accuracy = dt.calAccuracy(test_set, pruned_info_gain_tree_root,
                                         labels)
    print(
        'Accuracy of pruned decision tree constructed using information gain: %s'
        % prune_info_accuracy)

    pruned_variance_accuracy = dt.calAccuracy(
        test_set, pruned_variance_impurity_tree_root, labels)
    print(
        'Accuracy of pruned decision tree constructed using variance impurity: %s'
        % pruned_variance_accuracy)

    if (to_print):
        print()
        print('Build Decision Tree By Using Information Gain')
        info_gain_tree_root.printTree()

        print()

        print()
        print('Build Decision Tree By Using Variance Impurity')
        variance_impurity_tree_root.printTree()
        print()