コード例 #1
0
    def __init__(self,
                 probability,
                 file_name='./resource/data.csv',
                 feature_name=None):
        if probability <= 0.05:
            self.scale = 50
        elif probability > 0.05 and probability <= 0.5:
            self.scale = 35
        else:
            self.scale = 10
        self.flag = []
        if feature_name is None:
            feature_name = [
                'POLAR', 'ELEVATION', 'FREQUENCY', 'R_RAINHEIGHT', 'RR_002',
                'A_002'
            ]
        data = reader.data_formatting(
            reader.data_extracter(feature_num=91,
                                  file_name=file_name,
                                  data_size=614))
        data = dp.shuffle(data)
        self.feature, self.label = reader.feature_extracter(data,
                                                            feature_name,
                                                            has_name=False)

        self.feature = reader.data_formatting(self.feature)
        self.label = reader.data_formatting(self.label)
        self.feature, self.label = dp.kill_missing(self.feature, self.label)
        self.feature = (dp.fill_missing(self.feature, method='mean'))
        for i in range(4):
            self.flag.append(self.feature.pop(-2))
        #print(self.flag)
        self.vaild_list = []
コード例 #2
0
def predict(sen, sess):

    sen = Data_preprocessing.clean(sen)
    sen_vec = Data_preprocessing.convert_sen_to_vector(sen)
    X_batch = Data_preprocessing.create_batch(sen_vec, hyperparams.BATCH_SIZE)
    feed_dict = {
        model.inputs: X_batch,
        model.keep_probs: 1,
        model.decoder_sequence_length: [20] * hyperparams.BATCH_SIZE,
        model.encoder_sequence_length:
        [len(X_batch[0])] * hyperparams.BATCH_SIZE
    }

    output_batch = sess.run([model.predictions], feed_dict=feed_dict)
    output = output_batch[0]

    return output
コード例 #3
0
    def regression(self):
        stock_data = Parser.parse_open(self.path_open + self.name)

        normalised_time = Dp.time_conversion(stock_data,
                                             time_interval='week',
                                             number=1)
        index_start = len(normalised_time)
        scaled_time = Dp.stock_regime_data_scale(normalised_time)
        stock_trade = Dp.stock_trade_normalisation(stock_data[1,
                                                              -index_start:])
        stock_previous_close = Dp.stock_trade_normalisation(
            stock_data[3, -index_start:])
        stock_open = Dp.stock_trade_normalisation(stock_data[4, -index_start:])
        stock_day_high, stock_day_low = Dp.get_hig_and_low(
            stock_trade, normalised_time)
        #stock_day_high = Dp.stock_trade_normalisation(stock_day_high)
        #stock_day_low = Dp.stock_trade_normalisation(stock_day_low)
        returns = Dp.stock_trade_normalisation(stock_trade -
                                               np.roll(stock_trade, 2))
        df = np.array([stock_open, stock_day_high])
        regimes = Regime_constructor.regime_construct_1(
            np.reshape(df, (-1, df.shape[0])))

        Data_plotter.plot_regimes(
            np.asarray([normalised_time, returns, regimes]))

        length_of_normalised_time = len(normalised_time)
        start_index_stock_trade = len(stock_trade) - index_start
        reg = linear_model.LinearRegression()
        print(len(stock_trade[start_index_stock_trade:]), index_start)
        reg.fit([normalised_time], stock_trade[start_index_stock_trade:])
        linear_model_coeffisient = reg.coef_
        print(linear_model_coeffisient)
コード例 #4
0
def train(session):
    for i in range(hyperparams.EPOCHS):
        epoch_accuracy = []
        epoch_loss = []
        for b in range(len(hyperparams.BUCKETS)):

            bucket = Data_preprocessing.get_bucket(b)
            bucket_accuracy = []
            bucket_loss = []

            questions = []
            answers = []

            for k in range(len(bucket)):
                questions.append(np.array(bucket[k][0]))
                answers.append(np.array(bucket[k][1]))

            for j in tqdm(range(len(bucket) // hyperparams.BATCH_SIZE)):

                X_batch = questions[i * hyperparams.BATCH_SIZE:(i + 1) *
                                    hyperparams.BATCH_SIZE]
                Y_batch = answers[i * hyperparams.BATCH_SIZE:(i + 1) *
                                  hyperparams.BATCH_SIZE]

                feed_dict = {
                    model.inputs:
                    X_batch,
                    model.targets:
                    Y_batch,
                    model.keep_probs:
                    hyperparams.KEEP_PROBS,
                    model.decoder_sequence_length:
                    [len(Y_batch[0])] * hyperparams.BATCH_SIZE,
                    model.encoder_sequence_length:
                    [len(X_batch[0])] * hyperparams.BATCH_SIZE
                }

                cost, _, preds = session.run(
                    [model.loss, model.opt, model.predictions],
                    feed_dict=feed_dict)

                epoch_accuracy.append(
                    accuracy(np.array(Y_batch), np.array(preds)))
                bucket_accuracy.append(
                    accuracy(np.array(Y_batch), np.array(preds)))

                bucket_loss.append(cost)
                epoch_loss.append(cost)

        print("EPOCH: {}/{}".format(i + 1, hyperparams.EPOCHS),
              " | Epoch loss: {}".format(np.mean(epoch_loss)),
              " | Epoch accuracy: {}".format(np.mean(epoch_accuracy)))

        saver.save(session, "checkpoint/chatbot_{}.ckpt".format(i))
コード例 #5
0
import tensorflow as tf
import pickle
import Data_preprocessing


#loading the saved parameters
def load_params():
    with open('params.p', mode='rb') as in_file:
        return pickle.load(in_file)


#getting the source and target vocabuaries
_, (source_vocab_to_int, target_vocab_to_int), (
    source_int_to_vocab,
    target_int_to_vocab) = Data_preprocessing.load_preprocess()

load_path = load_params()

batch_size = 30


#converting the words to vectors of integers
def word_to_seq(word, vocab_to_int):
    results = []
    for word in list(word):
        if word in vocab_to_int:
            results.append(vocab_to_int[word])
        else:
            results.append(vocab_to_int['<UNK>'])
コード例 #6
0
ファイル: main.py プロジェクト: nhatanh174/Auto-detecting-bug
columns = "id bug_id summary description report_time report_timestamp status commit commit_timestamp files Unnamed:10".split()
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2)
df =pd.DataFrame(X_train,columns=columns)
df.to_csv("Training_Test\AspectJ_csv_trainingset.csv")
#-----------------------------------------------------------------------------------------------

# Read file csv and Extract summary and description
inp = pd.read_csv('Training_Test\AspectJ_csv_trainingset.csv')
x= inp[['summary','description']]

df =pd.DataFrame(x)

df.to_csv('Training_Test\AspectJ_process.csv')

#Data pre-processing
pre_bug.Start()

#Word_Embedding
brp.word_embedding()

#Extract feature vector
vecto_detect_bug = efv.Start()
print(vecto_detect_bug)

#---------------for source file--------------


path= r"E:\Pycharm projects\Auto_detect_bug\Training_Test\sourceFile_aspectj"

files=[]        # list name file.java
file.openFolder(path,files,'*.java')
コード例 #7
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Nov  9 16:03:17 2017

@author: diego
"""

import Classifier as c
import Data_preprocessing as d
import runtime_parser as rp
import file_writer as fw

rp.process_runtime_arguments()
x, y, tags = d.load_data(rp.data, rp.n_clases)

num = x.shape[0] if rp.cv[1] == "n" else float(rp.cv[1])
path = fw.create_dir(rp.cv[0], num, rp.classifier)

if rp.classifier == 1:
    fw.add_file_header(path)

if rp.cv[0] == "kf":
    split = d.kfold(x, int(num))
    c.kf_metrics(x, y, split, path, int(num), tags)

else:
    x_train, x_test, y_train, y_test = d.holdout(x, y, num)
    c.h_metrics(x_train, x_test, y_train, y_test, path, tags)
コード例 #8
0
dataset = pd.read_csv("transliteration.txt",delimiter = "\t",header=None,encoding='utf-8',na_filter = False)

#Splitting English words in X and Hindi words in y
X = dataset.iloc[:,0]
y = dataset.iloc[:,-1]

#importing the preprocessed data 
#The preprocessing is done in a file named Data_preprocessing
import Data_preprocessing

#source_int_text is the English words' processed vector. (Word is Converted to integer vector)
#target_int_text is the Hindi words' processed vector.
#source_vocab_to_int and  source_int_to_vocab are the English dictionaries
#target_vocab_to_int and  target_int_to_vocab are the Hindi dictionaries

source_int_text, target_int_text, source_vocab_to_int, target_vocab_to_int,source_int_to_vocab,target_int_to_vocab = Data_preprocessing.preprocess(X,y)

#encoder and decoder layers are defined in Layers 
#placeholders are defined in Model_Inputs
import Layers
import Model_Inputs


def seq2seq_model(input_data, target_data, keep_prob, batch_size,
                  target_sequence_length,
                  max_target_word_length,
                  source_vocab_size, target_vocab_size,
                  enc_embedding_size, dec_embedding_size,
                  rnn_size, num_layers, target_vocab_to_int):

#    Build the Sequence-to-Sequence model
コード例 #9
0
 def normalize(self):
     self.feature = dp.normalize(self.feature, type='normal')
     self.label = dp.normalize(self.label,
                               type='constant',
                               scale=self.scale)
コード例 #10
0
    with open(file_with_missing_values, "w") as my_csv:
        csv_writer = csv.writer(my_csv,dialect="excel")
        csv_writer.writerow(header)
        with open(path+"combined_metereological_data.txt", "r") as met_data:
            count_line = 0
            for line in met_data:
                if count_line != 0:
                    line = line.split(",")
                    i = 0
                    row = []
                    while i < len(line):
                        if i < 2:
                            row.append(line[i])
                        elif 2 <= i < (len(line) - 1):
                            if line[i] != "-":
                                row.append(float(line[i]))
                            else:
                                row.append("NaN")
                        i += 1
                    csv_writer.writerow(row)
                count_line += 1


if __name__ == '__main__':
    header = Variables.header_for_metereological
    path = Variables.path_for_metereological_data
    file_with_missing_values = path+"met_data.csv"
    file_without_missing_values = path + "met_data_without_missing_values.csv"
    main(file_with_missing_values,path)
    Data_preprocessing.creating_csv_without_missing_data(file_with_missing_values,file_without_missing_values,header)