Exemple #1
0
 def update_file(self, filename):
     print('<updating from file....>:\n%s' % filename)
     data = ReadData(filename, True, None)
     TotalLines = data.countLines()
     print('[Total Lines] = ', TotalLines)
     initLineNo = TotalLines / 1000
     stepLength = TotalLines / 10
     nextLineNo = initLineNo
     iLine = 0
     starttime = time.time()
     for line in data:
         iLine += 1
         # show progress
         if iLine >= nextLineNo:
             timesofar = (time.time() - starttime) / 60
             totaltime = (timesofar * TotalLines / iLine)
             timeleft = (timesofar * (TotalLines - iLine) / iLine)
             print(
                 '[Progress]: %3.2f%% (%d/%d)  %.2f/%.2fmins %.2fmins left'
                 % (iLine / TotalLines * 100, iLine, TotalLines, timesofar,
                    totaltime, timeleft))
             if nextLineNo is initLineNo:
                 nextLineNo = stepLength
             else:
                 nextLineNo += stepLength
         # question lines in the training data
         R21 = re.compile('^21 (.+)\t+([\S]+)\t+([\S]+)$')
         Qline = R21.search(line)
         if Qline:
             Question = Qline.group(1)
             CorrectAnswer = Qline.group(2)
             # make questions in the training data to be normal sentense
             line = Question.replace('XXXXX', CorrectAnswer)
         self.update_line(line)
Exemple #2
0
 def __init__(self, file_name):
     print(file_name)
     a = ReadData(file_name)
     self.fl = a.fl
     self.is_freq = False
     self.remove_files = True
     a.total_1_std(0.5)
 def __init__(self, file_name, type_to_run, run_removed):
     print(file_name)
     self.type_to_run = type_to_run
     a = ReadData(file_name)
     self.fl = a.fl
     self.is_freq = False
     self.run_removed = run_removed
     self.clf_dir = "clfs"
     if self.run_removed:
         a.total_1_std(0.5)
         self.clf_dir = "removed_clfs"
     self.logs = a.run_code()
Exemple #4
0
    def load_inputs(self, load_path, ext_len = None, resample_rate = 0.2):
        label_set = None
        print("读取数据...")
        read_data = ReadData(normalize=self.normalize)
        if self.predict == True:
            data_set = read_data.load_data(path=load_path, ext_len=ext_len)
        else:
            data_set, label_set = read_data.get_data(path=load_path, ext_len = ext_len)
        print("数据完读取完毕...")
        print("数据集的样本个数:{}".format(len(data_set)))

        if self.is_resample:
            print("数据降采样...")
            data_set = [random_resample(sample.T, resample_rate = resample_rate) for sample in data_set]

        if self.spectrogram:
            data_set = [ecg_spectrogram(sample.T) for sample in data_set]

        if self.doFFT:
            print("FFT变换...")
            data_set = [doFFT(sample.T) for sample in data_set]

        if self.bandpass_filter:
            print("bandpass_filter...")
            data_set = [bandpass_filter(sample,20,400,1000,6) for sample in data_set]

        if self.wavelet:
            print("计算小波系数....")
            data_set = [pywt_swt(sample) for sample in data_set]

        if self.remove_spike:
            print("去除峰值...")
            data_set = [schmidt_spike_removal(sample.T) for sample in data_set]

        if self.MFCC:
            print("计算MFCC....")
            data_set = [MFCC(sample) for sample in data_set]

        if self.data_split and label_set is not None:
            print("划分数据集...")
            X_tr, y_tr, X_valid, y_valid = data_split(data_set, label_set, 5, 4)  # 划分训练集和测试集
            print("数据集划分完毕...")
            return  (X_tr, y_tr, X_valid, y_valid)
        else:
            if self.predict == True:
                return data_set
            else:
                return (data_set, label_set)
Exemple #5
0
def main_copybuf(data_file):
    write = Write("output")
    s = Sequence(
        ReadData(),
        Split([
            (
                Variable("x", lambda vec: vec[0]),
                Histogram(mesh((-10, 10), 10)),
            ),
            (
                Variable("y", lambda vec: vec[1]),
                Histogram(mesh((-10, 10), 10)),
            ),
            (
                Variable("z", lambda vec: vec[2]),
                Histogram(mesh((-10, 10), 10)),
            ),
        ]),
        MakeFilename("{{variable.name}}"),
        ToCSV(),
        # write,
        # RenderLaTeX("histogram_1d.tex", "templates"),
        # write,
        # LaTeXToPDF(),
        # PDFToPNG(),
    )
    results = s.run([data_file])
    for res in results:
        print(res)
Exemple #6
0
def main():
    data_file = os.path.join("..", "data", "normal_3d.csv")
    s = Sequence(
        ReadData(),
        Split([
            (
                lambda vec: vec[0],
                Histogram(mesh((-10, 10), 10)),
                ToCSV(),
                Print(),
                Write("output", "x"),
            ),
            (
                lambda vec: vec[1],
                Histogram(mesh((-10, 10), 10)),
                ToCSV(),
                Write("output", "y"),
            ),
            # (
            #     lambda vec: vec[2],
            #     Histogram(mesh((-10, 10), 10)),
            #     ToCSV(),
            #     Write("output", ("z", "csv")),
            # ),
        ]),
        RenderLaTeX("histogram_1d.tex", "templates"),
        Write("output"),
        LaTeXToPDF(),
        PDFToPNG(),
    )
    results = s.run([data_file])
    for res in results:
        print(res)
Exemple #7
0
def main():
    data_file = os.path.join("..", "data", "normal_3d.csv")
    write = Write("output")
    s = Sequence(
        ReadData(),
        Split([
            (
                Variable("x", lambda vec: vec[0]),
                Histogram(mesh((-10, 10), 10)),
            ),
            (
                Variable("y", lambda vec: vec[1]),
                Histogram(mesh((-10, 10), 10)),
            ),
            (
                Variable("z", lambda vec: vec[2]),
                Histogram(mesh((-10, 10), 10)),
            ),
        ]),
        MakeFilename("{{variable.name}}"),
        ToCSV(),
        write,
        RenderLaTeX("histogram_1d.tex", "templates"),
        write,
        LaTeXToPDF(),
        PDFToPNG(),
    )
    results = s.run([data_file])
    for res in results:
        print(res)
Exemple #8
0
def main():
    TrainPipeLine(
        'data/bitstampUSD_1-min_data_2012-01-01_to_2019-03-13.csv', ReadData(),
        PreprocessingProcedure1D(),
        TrainProcedureKeras(
            KerasLinear1D(
                saved_model_path="models/saved_model.h5"))).execute()
Exemple #9
0
def main_no_copybuf(data_file):
    s = Sequence(
        ReadData(),
        Split(
            [
                (
                    lambda vec: vec[0],
                    Histogram(mesh((-10, 10), 10)),
                    MakeFilename("x"),
                ),
                (
                    lambda vec: vec[1],
                    Histogram(mesh((-10, 10), 10)),
                    MakeFilename("y"),
                ),
                (
                    lambda vec: vec[2],
                    Histogram(mesh((-10, 10), 10)),
                    MakeFilename("z"),
                ),
            ],
            copy_buf=False,
        ),
        MakeFilename("{{variable.name}}"),
        ToCSV(),
    )
    results = s.run([data_file])
    for res in results:
        print(res)
Exemple #10
0
 def __init__(self, padding):
     self.quantizes_valid = QuantizesValid()
     self.read_data = ReadData()
     self.padding = padding
     if "back" in self.padding:
         self.stride = 2
     else:
         self.stride = 1
     self.folder_parameter = os.path.join('Parameter',
                                          'Padding_' + self.padding)
Exemple #11
0
 def update_file(self, filename):
     filename = filename.replace('.txt', '_WP.txt')
     regrex_lineNum = re.compile('(\d+)\t(.*)')
     regrex_blank = re.compile('XXXXX:[\w$]+')
     print('<updating from file....>:\n%s' % filename)
     data = ReadData(filename, True, None)
     TotalLines = data.countLines()
     print('[Total Lines] = ', TotalLines)
     initLineNo = TotalLines / 1000
     stepLength = TotalLines / 10
     nextLineNo = initLineNo
     iLine = 0
     starttime = time.time()
     for line in data:
         iLine += 1
         # show progress
         if iLine >= nextLineNo:
             timesofar = (time.time() - starttime) / 60
             totaltime = (timesofar * TotalLines / iLine)
             timeleft = (timesofar * (TotalLines - iLine) / iLine)
             print(
                 '[Progress]: %3.2f%% (%d/%d)  %.2f/%.2fmins %.2fmins left'
                 % (iLine / TotalLines * 100, iLine, TotalLines, timesofar,
                    totaltime, timeleft))
             if nextLineNo is initLineNo:
                 nextLineNo = stepLength
             else:
                 nextLineNo += stepLength
         mLineNum = regrex_lineNum.search(line)
         if mLineNum:
             # question lines in the training data
             if int(mLineNum.group(1)) == 21:
                 Question = mLineNum.group(2).split('\t')[0]
                 CorrectAnswer = mLineNum.group(2).split('\t')[1]
                 # make questions in the training data to be normal sentense
                 line = Question.replace(
                     regrex_blank.search(Question).group(0), CorrectAnswer)
             else:
                 line = mLineNum.group(2)
             self.update_line(line)
Exemple #12
0
def main_worker(args):
    device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
    beginning = time.time()

    read_data = ReadData("/ibex/scratch/mag0a/Github/data/aminer.txt")
    print("read file cost time: ", time.time() - beginning)
    # dataset = Copus(read_data)

    dataset = Copus("./data")
    dataloader = DataLoader(dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)
    print("dataloader cost time: ", time.time() - beginning)

    idx2word = read_data.idx2word
    wc = read_data.word_count
    wf = np.array([wc[word] for word in idx2word])
    wf = wf / wf.sum()
    weights = t.tensor(wf) if args.weights else None
    if weights is not None:
        wf = t.pow(weights, 0.75)
        weights = (wf / wf.sum()).float()

    model = SGNS(100000, 128, n_negs=20)
    model = nn.DataParallel(model)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=0.025)
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, len(dataloader))

    print("training preperation cost time: ", time.time() - beginning)
    model.train()
    for epoch in range(4):
        for i, (u, v) in enumerate(tqdm(dataloader)):
            u, v, weights = u.to(device), v.to(device), weights.to(device)
            optimizer.zero_grad()
            loss = model(u, v, weights)
            loss = loss.mean()
            loss.backward()
            optimizer.step()
            scheduler.step()

            running_loss = loss.item()
            if i > 0 and i % 1000 == 0:
                print(" Loss: " + str(running_loss))

        t.save(model.state_dict(), 'model_%s.pkl' % epoch)
Exemple #13
0
def main():
    data_file = os.path.join("..", "data", "normal_3d.csv")
    s = Sequence(
        ReadData(),
        lambda dt: (dt[0][0], dt[1]),
        Histogram(mesh((-10, 10), 10)),
        ToCSV(),
        MakeFilename("x"),
        Write("output"),
        RenderLaTeX("histogram_1d.tex"),
        Write("output"),
        LaTeXToPDF(),
        PDFToPNG(),
    )
    results = s.run([data_file])
    print(list(results))
Exemple #14
0
 def __init__(self, mode, padding, stride):
     self.quantizes_valid = QuantizesValid()
     self.self_padding = SelfPadding(padding)
     self.self_padding.stride = stride
     self.read_data = ReadData()
     self.mode = mode
     if "Convolution_1v1" in self.mode:
         self.padding = "same"
         self.stride = 1
     elif "back" == padding:
         self.padding = padding
         self.stride = 2
     else:
         self.padding = padding
         self.stride = stride
     self.folder_parameter = os.path.join(
         'Parameter',
         self.mode + "_" + self.padding + "_stride" + str(self.stride))
Exemple #15
0
 def update_file(self, filename):
     print('<update_file....>:\n%s' % filename)
     data = ReadData(filename, True, None)
     for line in data:
         one = None
         two = None
         for word in self.__pre.getToken(line):
             if two:
                 if two not in self:
                     self[two] = WordDict(1)
                 self[two][word] = WordDict(2)
                 self[two][word].add()
             if one:
                 if word not in self[one][two]:
                     self[one][two][word] = Three()
                 self[one][two][word].add()
             one = two
             two = word
Exemple #16
0
 def __init__(self):
     self.quantizes_valid = QuantizesValid()
     self.read_data = ReadData()
     self.folder_parameter = os.path.join('Parameter', 'Dense_channel512')
Exemple #17
0
class SelfDense:
    def __init__(self):
        self.quantizes_valid = QuantizesValid()
        self.read_data = ReadData()
        self.folder_parameter = os.path.join('Parameter', 'Dense_channel512')

    def data(self):
        train_x = list(np.arange(4, -4, -0.03125)[0:256])
        train_x.extend(np.arange(-4, 4, 0.03125)[0:256])

        train_y = list(np.arange(8, -8, -0.125)[0:5])
        train_y.extend(np.arange(-8, 8, 0.125)[0:5])

        train_x_binary = self.quantizes_valid.values_to_binary(train_x)
        file_name = 'input_image'
        self.write_output(file_name, train_x_binary)

        train_x = self.quantizes_valid.quantizes(train_x)
        train_x = np.array(train_x).reshape(1, 1, 1, 512)

        train_y = self.quantizes_valid.quantizes(train_y)
        train_y = np.array(train_y).reshape(1, 1, 1, len(train_y))

        file_name = 'input_image.txt'
        self.write_output(file_name, train_x)

        np.save(os.path.join(self.folder_parameter, 'train_x'), train_x)
        np.save(os.path.join(self.folder_parameter, 'train_y'), train_y)

        return train_x, train_y

    def valid(self):
        train_x = np.load(os.path.join(self.folder_parameter, 'train_x.npy'))
        train_y = np.load(os.path.join(self.folder_parameter, 'train_y.npy'))
        output_shape = train_y.shape

        parameter = {'weight': list(), 'bias': list()}

        weights = list()
        for weight in parameter.keys():
            self.read_data.file = os.path.join(self.folder_parameter,
                                               weight + '.txt')
            parameter[weight] = self.read_data.read_values()
            a = self.quantizes_valid.quantizes(parameter[weight])
            weights.append(a)
        print('quantizes weight:', weights)
        output = list()
        for x in range(output_shape[3]):
            output.append(0.0)

#convolution 1*1
        data_test = ""
        x_index = 0
        y_index = 0
        for index, input_data in enumerate(train_x.reshape(-1, 1)):
            for fliter in range(output_shape[3]):
                if fliter == 0 and y_index == 0 and (not index == 0):
                    x_index += 1
#                if (y_index * output_shape[3] + fliter) == 40:
#                    print(output)
#                    aaaaa
                print(x_index, fliter, y_index,
                      y_index * output_shape[3] + fliter)
                print(weights[0][y_index * output_shape[3] + fliter],
                      input_data)
                data_test += str(index) + "," + str(fliter) + "," + str(
                    y_index) + "," + str(
                        x_index * output_shape[3] +
                        fliter) + "," + str(input_data) + "," + str(
                            weights[0][y_index * output_shape[3] +
                                       fliter]) + '\n'
                output[x_index * output_shape[3] +
                       fliter] += weights[0][y_index * output_shape[3] +
                                             fliter] * input_data

                if y_index == train_x.shape[3] - 1 and fliter == output_shape[
                        3] - 1:
                    y_index = 0
                elif fliter == output_shape[3] - 1:
                    y_index += 1

#dense
#        for index, x in enumerate(train_x.reshape(-1, 1)):
#            for fliter in range(output_shape[3]):
#                print(weights[0][index * output_shape[3] + fliter], x)
#                output[fliter] += weights[0][index * output_shape[3] + fliter] * x
        file_name = 'dense_test.txt'
        self.write_output(file_name, data_test)
        print(output)
        data_out = ''
        for index, x in enumerate(output):
            x = weights[1][index] + x
            data_out += str(x) + '\n'
            print('output data:', x)

        file_name = 'dense_valid.txt'
        self.write_output(file_name, data_out)

    def model(self):
        shape = (1, 1, 512)
        input = Input(shape=shape)
        x = Dense(units=10)(input)
        #        x = Dense(units = 10, activation='softmax')(input)
        model = Model(inputs=input, outputs=x)
        return model

    def training(self, model):
        files_name = ['weight', 'bias']
        parameter = {'weight': list(), 'bias': list()}
        for index, key in enumerate(parameter.keys()):
            parameter_output = ""
            for x in model.layers[1].get_weights()[index].reshape(-1, 1):
                parameter_output += str(x) + '\n'
                parameter[key].append(float(x))

            file_name = files_name[index] + '.txt'
            self.write_output(file_name, parameter_output)

            weights = self.quantizes_valid.quantizes(parameter[key])
            weights = self.quantizes_valid.values_to_binary(weights)
            self.write_output(files_name[index], weights)

        return model

    def output(self, model, data_out):
        file_name = 'dense.txt'
        self.write_output(file_name, data_out.reshape(-1, 1))

        data_out_binary = self.quantizes_valid.values_to_binary(
            data_out.reshape(-1, 1))
        print(data_out_binary)

        file_name = 'dense'
        self.write_output(file_name, data_out_binary)

        print('output data', data_out)

    def write_output(self, file_name, values):
        if not os.path.isdir(self.folder_parameter):
            os.makedirs(r'%s/%s' % ('Parameter', 'Dense_channel512'))

        path_file = os.path.join(self.folder_parameter, file_name)

        with open(path_file, 'w') as f:
            f.writelines(str(values))
Exemple #18
0
    parser.add_argument('--project', default = None, type=str2None)  # project 5 most frequent documents

    parser.add_argument('--balance', default = None, type=str2None) # upsample classes for learning curves and possible model improvement

    args = parser.parse_args()

    corr = args.corr

    project = args.project

    balance = args.balance

    #read documents from train and test jsonl format and convert them to pandas dataframe

    read_train = ReadData('train.jsonl')

    read_test = ReadData('test.jsonl')

    df_train = read_train.read_data()

    #calculate minimal and maximal number of raw words in documents 

    df_train['doc_size'] = df_train['text'].apply(lambda x:calculate_docs_size(x))

    print('Max len of raw document in train set: {}'.format(df_train['doc_size'].max()))

    print('Min len of raw document in train set: {}'.format(df_train['doc_size'].min()))

    df_test = read_test.read_data()
Exemple #19
0
#batchnorm dense convolution globalaverage mobilenetv2cifar10 padding
file_name = "mobilenetv2cifar10"
#batchnorm1 2 3
mode = "batchnorm3"

#folder = get_folder()
#Mobilenetv2Cifar10 Conv1 DwConv bottleneck0_layer bottleneck1_layer
#bottleneck2_layer Conv_3v3
folder_parameter = os.path.join('Parameter', "Mobilenetv2Cifar10")
#folder_parameter = os.path.join('Parameter', folder)
#software hardware
selector = "hardware"

if __name__ == '__main__': 
    valid = ''
    read_data = ReadData()
    quantize_valid = QuantizesValid()
    if selector == "hardware":
        read_data.type = 'binaries'
        read_data.file = os.path.join(folder_parameter, 'output_verilog.txt')
        binaries = read_data.read_values()
        print(binaries)
#        binaries = quantize_valid.binary_to_values(binaries)
        binaries = quantize_valid.binary_to_values(binaries[0:10])
        valid = binaries
        print(binaries)
    elif selector == "software":
        read_data.file = os.path.join(folder_parameter, file_name + '_valid.txt')
        values = read_data.read_values()
        valid = values
        print(values)
Exemple #20
0
from config import *
from read_data import ReadData
from text_processing import TextProcessing

st.set_page_config(layout="wide")

st.markdown("<h1 style='text-align: center; color: black;'>Multipurpose Natural Language Processing App</h1>", 
            unsafe_allow_html=True)
st.markdown(Config.hide_streamlit_style, unsafe_allow_html=True)

data_choice = st.radio("Select your preferred way of data input", ('Upload a file', 'Direct text input'))

if data_choice == 'Upload a file':
    uploaded_file = st.sidebar.file_uploader("Upload your file:", type=['txt'])
    read_obj = ReadData(uploaded_file)
    data = read_obj.read_file_txt()
    input_type = True

else:
    data = st.text_input('Input your text here:')
    input_type = False

if data is not None:
    model_option = st.selectbox("Please choose your intended model:", ["Text Summarization"])
    process_obj = TextProcessing(data)
    cleaned_data = process_obj.text_cleaning(input_type)

    

    
Exemple #21
0
def test_direct_call():
    from read_data import ReadData
    obj = ReadData(as_int=True)
Exemple #22
0
    "~/Jottacloud/data_for_bdt/MSSM_log_MASS_allsquarks.dat"
]

# Define list with features for MASS dataset
feature_list = [
    "3.mGluino", "4.mdL", "5.mdR", "6.muL", "7.muR", "8.msL", "9.msR",
    "10.mcL", "11.mcR"
]
target_list = ["2.gg_NLO"]
# The data files *_MASS.txt contains a column with NaNs, this must be removed
drop_col = 'Unnamed: 15'

features, target, features_test, target_test = ReadData(files,
                                                        feature_list,
                                                        target_list,
                                                        drop_col,
                                                        eps=1E-9,
                                                        squark_mean=True,
                                                        train_test=True)

# Set file suffix:
suffix = "LS_loss"
# Where to save plots
directory = "plots/"

####################################################################

# Load saved model if it exist
# reg          = joblib.load('BDT_LS_loss.pkl')

####################################################################
Exemple #23
0
class SelfBatchNormalization:
    def __init__(self, mode):
        self.quantizes_valid = QuantizesValid()
        self.read_data = ReadData()
        self.mode = mode
        self.folder_parameter = os.path.join('Parameter', self.mode)

    def data(self):
        train_x = list(np.arange(4, -4, -0.125)[0:64])
        train_x.extend(np.arange(-4, 4, 0.125)[0:64])

        train_y = list(np.arange(8, -8, -0.125)[0:64])
        train_y.extend(np.arange(-8, 8, 0.125)[0:64])

        train_x_binary = self.quantizes_valid.values_to_binary(train_x)
        file_name = 'input_image'
        self.write_output(file_name, train_x_binary)

        train_x = self.quantizes_valid.quantizes(train_x)
        train_x = np.array(train_x).reshape(1, 4, 4, 8)

        train_y = self.quantizes_valid.quantizes(train_y)
        train_y = np.array(train_y).reshape(1, 4, 4, 8)

        file_name = 'input_image.txt'
        self.write_output(file_name, train_x)

        np.save(os.path.join(self.folder_parameter, 'train_x'), train_x)

        return train_x, train_y

    def valid2(self, train_x, output_shape, parameter):
        weights = list()
        for index in range(len(parameter.values())):
            weight = list()
            weight.append(parameter['gamma'][index])
            weight.append(parameter['beta'][index])
            weight.append(parameter['mean'][index])
            weight.append(parameter['variance'][index])

            a = self.quantizes(weight)
            weights.append(a)

        print('quantizes weight:', weights)

        for index, x in enumerate(train_x.reshape(-1, 1)):
            a = weights[index % train_x.shape[3]]
            if self.mode == "batchnorm1" or self.mode == "batchnorm2":
                x = (x - a[2]) * a[3] * a[0] + a[1]
            elif self.mode == "batchnorm3":
                x = x * a[3] + a[2]

    def valid(self):
        train_x = np.load(os.path.join(self.folder_parameter, 'train_x.npy'))

        parameter = {
            'gamma': list(),
            'beta': list(),
            'mean': list(),
            'variance': list()
        }

        for weight in parameter.keys():
            self.read_data.file = os.path.join(self.folder_parameter,
                                               weight + '.txt')
            parameter[weight] = self.read_data.read_values()

        weights = list()
        for index in range(len(parameter.values())):
            weight = list()
            weight.append(parameter['gamma'][index])
            weight.append(parameter['beta'][index])
            weight.append(parameter['mean'][index])
            weight.append(parameter['variance'][index])

            a = self.quantizes(weight)
            weights.append(a)

        print('quantizes weight:', weights)

        data_out = ''
        for index, x in enumerate(train_x.reshape(-1, 1)):
            a = weights[index % train_x.shape[1]]
            if "batchnorm3" in self.mode or "batchnorm2" in self.mode:
                #            print(x - a[2])
                #            print((x - a[2])* a[3])
                #            print((x - a[2])* a[3] * a[0])
                #            print((x - a[2])* a[3] * a[0] + a[1])
                x = (x - a[2]) * a[3] * a[0] + a[1]
            elif "batchnorm3" in self.mode:
                #                print(x)
                #                print(x * a[3])
                #                print(x * a[3] + a[2] )
                x = x * a[3] + a[2]

            data_out += str(x) + '\n'
            print('output data:', x)
        file_name = 'batchnorm_valid.txt'
        self.write_output(file_name, data_out)

    def model(self):
        shape = (4, 4, 8)
        input = Input(shape=shape)
        x = BatchNormalization()(input)
        model = Model(inputs=input, outputs=x)
        return model

    def quantizes(self, weights):
        if "batchnorm1" in self.mode:
            weights[3] = 1 / np.sqrt(weights[3])
            weights = self.quantizes_valid.quantizes(weights)

        elif "batchnorm2" in self.mode:
            weights[3] = np.sqrt(weights[3])
            for index, x in enumerate(self.quantizes_valid.quantizes(weights)):
                weights[index] = x

            weights[3] = 1 / weights[3]
            weights[3] = self.quantizes_valid.quantizes([weights[3]])
        elif "batchnorm3" in self.mode:
            weights[2] = -(1 / np.sqrt(weights[3]) * weights[2] *
                           weights[0]) + weights[1]
            weights[3] = 1 / np.sqrt(weights[3]) * weights[0]
            weights = self.quantizes_valid.quantizes(weights)

        return weights

    def training(self, model):
        files_name = ['gamma', 'beta', 'mean', 'variance']
        variance = list()
        mean = list()
        gamma = list()
        beta = list()

        for index in range(len(files_name)):
            weight_output = ""
            for x in model.layers[1].get_weights()[index]:
                weight_output += str(x) + '\n'

            file_name = files_name[index] + '.txt'
            self.write_output(file_name, weight_output)
            print('original weight :', files_name[index])

        for index in range(len(model.layers[1].get_weights()[0])):
            weights = list()
            weights.append(model.layers[1].get_weights()[0][index])
            weights.append(model.layers[1].get_weights()[1][index])
            weights.append(model.layers[1].get_weights()[2][index])
            weights.append(model.layers[1].get_weights()[3][index])
            print('original weight :', weights)

            weights = self.quantizes(weights)
            variance.append(weights[3])
            mean.append(weights[2])
            gamma.append(weights[0])
            beta.append(weights[1])

        variance = self.quantizes_valid.values_to_binary(variance)
        file_name = 'variance'
        self.write_output(file_name, variance)

        mean = self.quantizes_valid.values_to_binary(mean)
        file_name = 'mean'
        self.write_output(file_name, mean)

        gamma = self.quantizes_valid.values_to_binary(gamma)
        file_name = 'gamma'
        self.write_output(file_name, gamma)

        beta = self.quantizes_valid.values_to_binary(beta)
        file_name = 'beta'
        self.write_output(file_name, beta)

        #    weights = quantizes(weights)

        #    weights_quantize = list()
        #    for x in weights:
        #        weights_quantize.append(np.array([x], dtype = np.float32))
        #
        #    print('quantizes weight :', weights_quantize)
        #    model.layers[1].set_weights(weights_quantize)
        print(model.layers[1].get_weights())
        #
        json_string = model.to_json()
        with open(os.path.join(self.folder_parameter, "batchnorm model.json"),
                  "w") as text_file:
            text_file.write(json_string)
        model.save(os.path.join(self.folder_parameter, "batchnorm model.hdf5"))
        return model

    def output(self, model, data_out):
        file_name = 'batchnorm.txt'
        self.write_output(file_name, data_out.reshape(-1, 1))

        data_out_binary = self.quantizes_valid.values_to_binary(
            data_out.reshape(-1, 1))
        print(data_out_binary)
        file_name = 'batchnorm'
        self.write_output(file_name, data_out_binary)

        print('output data', data_out)
        print('gamma:', K.eval(model.layers[1].gamma))
        print('beta:', K.eval(model.layers[1].beta))
        print('moving_mean:', K.eval(model.layers[1].moving_mean))
        print('moving_variance:', K.eval(model.layers[1].moving_variance))
#        print('epsilon :', model.layers[1].epsilon)
#        print('data_in :', data_in)
#        print('data_out:', data_out)

    def write_output(self, file_name, values):
        if not os.path.isdir(self.folder_parameter):
            os.makedirs(r'%s/%s' % ('Parameter', self.mode))

        path_file = os.path.join(self.folder_parameter, file_name)

        with open(path_file, 'w') as f:
            f.writelines(str(values))
Exemple #24
0
 def __init__(self, mode):
     self.quantizes_valid = QuantizesValid()
     self.read_data = ReadData()
     self.mode = mode
     self.folder_parameter = os.path.join('Parameter', self.mode)
Exemple #25
0
    def __init__(self, Parameters):

        CITY = Parameters.city
        DAYS = Parameters.days
        BUDGET = Parameters.budget
        VISITED = Parameters.visited
        BOUNDRYCONDITIONS = Parameters.boundryConditions
        INTEREST = Parameters.interest

        dataFile = CITY + ".xlsx"
        durationFile = CITY + "_duration.xls"

        cityData = ReadData(dataFile)
        cityDuration = ReadDurations(durationFile)

        DESTINATIONS = np.setdiff1d(range(len(cityData)), VISITED)
        self.n = len(DESTINATIONS)

        self.TRAVELTIME = np.zeros((self.n + 2 * DAYS, self.n + 2 * DAYS))
        data = []

        p = 0
        for i in DESTINATIONS:
            data.append(cityData[i])
            q = 0
            for j in DESTINATIONS:
                self.TRAVELTIME[p, q] = cityDuration[i][j]
                q = q + 1
            p = p + 1

        data = np.asarray(data)
        self.ID = np.asarray(data[:, 0], dtype=int)
        self.COORDINATES = data[:, 1:3]
        self.HAPPINESS = data[:, 3] + (data[:, 4:9] * INTEREST).sum(axis=1)
        self.COST = data[:, 9]
        self.OPENTIME = np.append(data[:, 10], np.zeros((2 * DAYS, 1)))
        self.CLOSETIME = data[:, 11]
        self.SERVICETIME = np.append(data[:, 12], np.zeros((2 * DAYS, 1)))
        self.DAYS = DAYS
        self.TMAX = BOUNDRYCONDITIONS[:, 5]
        self.BUDGET = BUDGET
        self.TMIN = BOUNDRYCONDITIONS[:, 4]

        keyId = 0
        GoogleResp = FindDurations(
            [BOUNDRYCONDITIONS[0][0], BOUNDRYCONDITIONS[0][1]],
            self.COORDINATES, keyId)
        startDuration = GoogleResp[0]
        if ((BOUNDRYCONDITIONS[0][0] == BOUNDRYCONDITIONS[0][2])
                and (BOUNDRYCONDITIONS[0][1] == BOUNDRYCONDITIONS[0][3])):
            stayDuration = GoogleResp[0]
        else:
            keyId = GoogleResp[1]
            GoogleResp = FindDurations(
                [BOUNDRYCONDITIONS[0][2], BOUNDRYCONDITIONS[0][3]],
                self.COORDINATES, keyId)
            stayDuration = GoogleResp[0]

        self.TRAVELTIME[self.n, 0:self.n] = startDuration
        self.TRAVELTIME[0:self.n, self.n] = startDuration[:]
        self.TRAVELTIME[self.n + 1, 0:self.n] = stayDuration
        self.TRAVELTIME[0:self.n, self.n + 1] = stayDuration[:]

        if (DAYS > 1):
            for i in range(1, DAYS):
                for j in range(2):
                    self.TRAVELTIME[self.n + 2 * i + j,
                                    0:self.n] = stayDuration
                    self.TRAVELTIME[0:self.n,
                                    self.n + 2 * i + j] = stayDuration[:]
        self.TRAVELTIME = self.TRAVELTIME * 1.2

        for i in range(DAYS):
            for j in range(2):
                self.ID = np.append(self.ID, cityData.shape[0] + j)
                newCoordinates = [
                    BOUNDRYCONDITIONS[i][2 * j],
                    BOUNDRYCONDITIONS[i][2 * j + 1]
                ]
                self.COORDINATES = np.vstack(
                    [self.COORDINATES, newCoordinates])
Exemple #26
0
from read_data import ReadData
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from transformer.feature_transformer import FeatureTransformer
from sklearn.metrics import accuracy_score

from sklearn import svm

training_data = ReadData.readData();
df_train = pd.DataFrame(training_data)

#  test data
test_data = []
test_data.append({"feature": u"hiện tại công nghệ đang phát triển nhanh, công nghệ Var trong sân", "target": ""})
df_test = pd.DataFrame(test_data)

pipe_line = Pipeline([
    ("transformer", FeatureTransformer()),
    ("vect", CountVectorizer()),#bag-of-words
    ("clf", svm.SVC(C=1.0, kernel='linear', gamma='auto', probability=True))
])

clf = pipe_line.fit(df_train["feature"], df_train.target)

predicted = clf.predict(df_test["feature"])

print(clf.predict_proba(df_test["feature"]));