def update_file(self, filename): print('<updating from file....>:\n%s' % filename) data = ReadData(filename, True, None) TotalLines = data.countLines() print('[Total Lines] = ', TotalLines) initLineNo = TotalLines / 1000 stepLength = TotalLines / 10 nextLineNo = initLineNo iLine = 0 starttime = time.time() for line in data: iLine += 1 # show progress if iLine >= nextLineNo: timesofar = (time.time() - starttime) / 60 totaltime = (timesofar * TotalLines / iLine) timeleft = (timesofar * (TotalLines - iLine) / iLine) print( '[Progress]: %3.2f%% (%d/%d) %.2f/%.2fmins %.2fmins left' % (iLine / TotalLines * 100, iLine, TotalLines, timesofar, totaltime, timeleft)) if nextLineNo is initLineNo: nextLineNo = stepLength else: nextLineNo += stepLength # question lines in the training data R21 = re.compile('^21 (.+)\t+([\S]+)\t+([\S]+)$') Qline = R21.search(line) if Qline: Question = Qline.group(1) CorrectAnswer = Qline.group(2) # make questions in the training data to be normal sentense line = Question.replace('XXXXX', CorrectAnswer) self.update_line(line)
def __init__(self, file_name): print(file_name) a = ReadData(file_name) self.fl = a.fl self.is_freq = False self.remove_files = True a.total_1_std(0.5)
def __init__(self, file_name, type_to_run, run_removed): print(file_name) self.type_to_run = type_to_run a = ReadData(file_name) self.fl = a.fl self.is_freq = False self.run_removed = run_removed self.clf_dir = "clfs" if self.run_removed: a.total_1_std(0.5) self.clf_dir = "removed_clfs" self.logs = a.run_code()
def load_inputs(self, load_path, ext_len = None, resample_rate = 0.2): label_set = None print("读取数据...") read_data = ReadData(normalize=self.normalize) if self.predict == True: data_set = read_data.load_data(path=load_path, ext_len=ext_len) else: data_set, label_set = read_data.get_data(path=load_path, ext_len = ext_len) print("数据完读取完毕...") print("数据集的样本个数:{}".format(len(data_set))) if self.is_resample: print("数据降采样...") data_set = [random_resample(sample.T, resample_rate = resample_rate) for sample in data_set] if self.spectrogram: data_set = [ecg_spectrogram(sample.T) for sample in data_set] if self.doFFT: print("FFT变换...") data_set = [doFFT(sample.T) for sample in data_set] if self.bandpass_filter: print("bandpass_filter...") data_set = [bandpass_filter(sample,20,400,1000,6) for sample in data_set] if self.wavelet: print("计算小波系数....") data_set = [pywt_swt(sample) for sample in data_set] if self.remove_spike: print("去除峰值...") data_set = [schmidt_spike_removal(sample.T) for sample in data_set] if self.MFCC: print("计算MFCC....") data_set = [MFCC(sample) for sample in data_set] if self.data_split and label_set is not None: print("划分数据集...") X_tr, y_tr, X_valid, y_valid = data_split(data_set, label_set, 5, 4) # 划分训练集和测试集 print("数据集划分完毕...") return (X_tr, y_tr, X_valid, y_valid) else: if self.predict == True: return data_set else: return (data_set, label_set)
def main_copybuf(data_file): write = Write("output") s = Sequence( ReadData(), Split([ ( Variable("x", lambda vec: vec[0]), Histogram(mesh((-10, 10), 10)), ), ( Variable("y", lambda vec: vec[1]), Histogram(mesh((-10, 10), 10)), ), ( Variable("z", lambda vec: vec[2]), Histogram(mesh((-10, 10), 10)), ), ]), MakeFilename("{{variable.name}}"), ToCSV(), # write, # RenderLaTeX("histogram_1d.tex", "templates"), # write, # LaTeXToPDF(), # PDFToPNG(), ) results = s.run([data_file]) for res in results: print(res)
def main(): data_file = os.path.join("..", "data", "normal_3d.csv") s = Sequence( ReadData(), Split([ ( lambda vec: vec[0], Histogram(mesh((-10, 10), 10)), ToCSV(), Print(), Write("output", "x"), ), ( lambda vec: vec[1], Histogram(mesh((-10, 10), 10)), ToCSV(), Write("output", "y"), ), # ( # lambda vec: vec[2], # Histogram(mesh((-10, 10), 10)), # ToCSV(), # Write("output", ("z", "csv")), # ), ]), RenderLaTeX("histogram_1d.tex", "templates"), Write("output"), LaTeXToPDF(), PDFToPNG(), ) results = s.run([data_file]) for res in results: print(res)
def main(): data_file = os.path.join("..", "data", "normal_3d.csv") write = Write("output") s = Sequence( ReadData(), Split([ ( Variable("x", lambda vec: vec[0]), Histogram(mesh((-10, 10), 10)), ), ( Variable("y", lambda vec: vec[1]), Histogram(mesh((-10, 10), 10)), ), ( Variable("z", lambda vec: vec[2]), Histogram(mesh((-10, 10), 10)), ), ]), MakeFilename("{{variable.name}}"), ToCSV(), write, RenderLaTeX("histogram_1d.tex", "templates"), write, LaTeXToPDF(), PDFToPNG(), ) results = s.run([data_file]) for res in results: print(res)
def main(): TrainPipeLine( 'data/bitstampUSD_1-min_data_2012-01-01_to_2019-03-13.csv', ReadData(), PreprocessingProcedure1D(), TrainProcedureKeras( KerasLinear1D( saved_model_path="models/saved_model.h5"))).execute()
def main_no_copybuf(data_file): s = Sequence( ReadData(), Split( [ ( lambda vec: vec[0], Histogram(mesh((-10, 10), 10)), MakeFilename("x"), ), ( lambda vec: vec[1], Histogram(mesh((-10, 10), 10)), MakeFilename("y"), ), ( lambda vec: vec[2], Histogram(mesh((-10, 10), 10)), MakeFilename("z"), ), ], copy_buf=False, ), MakeFilename("{{variable.name}}"), ToCSV(), ) results = s.run([data_file]) for res in results: print(res)
def __init__(self, padding): self.quantizes_valid = QuantizesValid() self.read_data = ReadData() self.padding = padding if "back" in self.padding: self.stride = 2 else: self.stride = 1 self.folder_parameter = os.path.join('Parameter', 'Padding_' + self.padding)
def update_file(self, filename): filename = filename.replace('.txt', '_WP.txt') regrex_lineNum = re.compile('(\d+)\t(.*)') regrex_blank = re.compile('XXXXX:[\w$]+') print('<updating from file....>:\n%s' % filename) data = ReadData(filename, True, None) TotalLines = data.countLines() print('[Total Lines] = ', TotalLines) initLineNo = TotalLines / 1000 stepLength = TotalLines / 10 nextLineNo = initLineNo iLine = 0 starttime = time.time() for line in data: iLine += 1 # show progress if iLine >= nextLineNo: timesofar = (time.time() - starttime) / 60 totaltime = (timesofar * TotalLines / iLine) timeleft = (timesofar * (TotalLines - iLine) / iLine) print( '[Progress]: %3.2f%% (%d/%d) %.2f/%.2fmins %.2fmins left' % (iLine / TotalLines * 100, iLine, TotalLines, timesofar, totaltime, timeleft)) if nextLineNo is initLineNo: nextLineNo = stepLength else: nextLineNo += stepLength mLineNum = regrex_lineNum.search(line) if mLineNum: # question lines in the training data if int(mLineNum.group(1)) == 21: Question = mLineNum.group(2).split('\t')[0] CorrectAnswer = mLineNum.group(2).split('\t')[1] # make questions in the training data to be normal sentense line = Question.replace( regrex_blank.search(Question).group(0), CorrectAnswer) else: line = mLineNum.group(2) self.update_line(line)
def main_worker(args): device = t.device("cuda:0" if t.cuda.is_available() else "cpu") beginning = time.time() read_data = ReadData("/ibex/scratch/mag0a/Github/data/aminer.txt") print("read file cost time: ", time.time() - beginning) # dataset = Copus(read_data) dataset = Copus("./data") dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) print("dataloader cost time: ", time.time() - beginning) idx2word = read_data.idx2word wc = read_data.word_count wf = np.array([wc[word] for word in idx2word]) wf = wf / wf.sum() weights = t.tensor(wf) if args.weights else None if weights is not None: wf = t.pow(weights, 0.75) weights = (wf / wf.sum()).float() model = SGNS(100000, 128, n_negs=20) model = nn.DataParallel(model) model.to(device) optimizer = Adam(model.parameters(), lr=0.025) scheduler = lr_scheduler.CosineAnnealingLR(optimizer, len(dataloader)) print("training preperation cost time: ", time.time() - beginning) model.train() for epoch in range(4): for i, (u, v) in enumerate(tqdm(dataloader)): u, v, weights = u.to(device), v.to(device), weights.to(device) optimizer.zero_grad() loss = model(u, v, weights) loss = loss.mean() loss.backward() optimizer.step() scheduler.step() running_loss = loss.item() if i > 0 and i % 1000 == 0: print(" Loss: " + str(running_loss)) t.save(model.state_dict(), 'model_%s.pkl' % epoch)
def main(): data_file = os.path.join("..", "data", "normal_3d.csv") s = Sequence( ReadData(), lambda dt: (dt[0][0], dt[1]), Histogram(mesh((-10, 10), 10)), ToCSV(), MakeFilename("x"), Write("output"), RenderLaTeX("histogram_1d.tex"), Write("output"), LaTeXToPDF(), PDFToPNG(), ) results = s.run([data_file]) print(list(results))
def __init__(self, mode, padding, stride): self.quantizes_valid = QuantizesValid() self.self_padding = SelfPadding(padding) self.self_padding.stride = stride self.read_data = ReadData() self.mode = mode if "Convolution_1v1" in self.mode: self.padding = "same" self.stride = 1 elif "back" == padding: self.padding = padding self.stride = 2 else: self.padding = padding self.stride = stride self.folder_parameter = os.path.join( 'Parameter', self.mode + "_" + self.padding + "_stride" + str(self.stride))
def update_file(self, filename): print('<update_file....>:\n%s' % filename) data = ReadData(filename, True, None) for line in data: one = None two = None for word in self.__pre.getToken(line): if two: if two not in self: self[two] = WordDict(1) self[two][word] = WordDict(2) self[two][word].add() if one: if word not in self[one][two]: self[one][two][word] = Three() self[one][two][word].add() one = two two = word
def __init__(self): self.quantizes_valid = QuantizesValid() self.read_data = ReadData() self.folder_parameter = os.path.join('Parameter', 'Dense_channel512')
class SelfDense: def __init__(self): self.quantizes_valid = QuantizesValid() self.read_data = ReadData() self.folder_parameter = os.path.join('Parameter', 'Dense_channel512') def data(self): train_x = list(np.arange(4, -4, -0.03125)[0:256]) train_x.extend(np.arange(-4, 4, 0.03125)[0:256]) train_y = list(np.arange(8, -8, -0.125)[0:5]) train_y.extend(np.arange(-8, 8, 0.125)[0:5]) train_x_binary = self.quantizes_valid.values_to_binary(train_x) file_name = 'input_image' self.write_output(file_name, train_x_binary) train_x = self.quantizes_valid.quantizes(train_x) train_x = np.array(train_x).reshape(1, 1, 1, 512) train_y = self.quantizes_valid.quantizes(train_y) train_y = np.array(train_y).reshape(1, 1, 1, len(train_y)) file_name = 'input_image.txt' self.write_output(file_name, train_x) np.save(os.path.join(self.folder_parameter, 'train_x'), train_x) np.save(os.path.join(self.folder_parameter, 'train_y'), train_y) return train_x, train_y def valid(self): train_x = np.load(os.path.join(self.folder_parameter, 'train_x.npy')) train_y = np.load(os.path.join(self.folder_parameter, 'train_y.npy')) output_shape = train_y.shape parameter = {'weight': list(), 'bias': list()} weights = list() for weight in parameter.keys(): self.read_data.file = os.path.join(self.folder_parameter, weight + '.txt') parameter[weight] = self.read_data.read_values() a = self.quantizes_valid.quantizes(parameter[weight]) weights.append(a) print('quantizes weight:', weights) output = list() for x in range(output_shape[3]): output.append(0.0) #convolution 1*1 data_test = "" x_index = 0 y_index = 0 for index, input_data in enumerate(train_x.reshape(-1, 1)): for fliter in range(output_shape[3]): if fliter == 0 and y_index == 0 and (not index == 0): x_index += 1 # if (y_index * output_shape[3] + fliter) == 40: # print(output) # aaaaa print(x_index, fliter, y_index, y_index * output_shape[3] + fliter) print(weights[0][y_index * output_shape[3] + fliter], input_data) data_test += str(index) + "," + str(fliter) + "," + str( y_index) + "," + str( x_index * output_shape[3] + fliter) + "," + str(input_data) + "," + str( weights[0][y_index * output_shape[3] + fliter]) + '\n' output[x_index * output_shape[3] + fliter] += weights[0][y_index * output_shape[3] + fliter] * input_data if y_index == train_x.shape[3] - 1 and fliter == output_shape[ 3] - 1: y_index = 0 elif fliter == output_shape[3] - 1: y_index += 1 #dense # for index, x in enumerate(train_x.reshape(-1, 1)): # for fliter in range(output_shape[3]): # print(weights[0][index * output_shape[3] + fliter], x) # output[fliter] += weights[0][index * output_shape[3] + fliter] * x file_name = 'dense_test.txt' self.write_output(file_name, data_test) print(output) data_out = '' for index, x in enumerate(output): x = weights[1][index] + x data_out += str(x) + '\n' print('output data:', x) file_name = 'dense_valid.txt' self.write_output(file_name, data_out) def model(self): shape = (1, 1, 512) input = Input(shape=shape) x = Dense(units=10)(input) # x = Dense(units = 10, activation='softmax')(input) model = Model(inputs=input, outputs=x) return model def training(self, model): files_name = ['weight', 'bias'] parameter = {'weight': list(), 'bias': list()} for index, key in enumerate(parameter.keys()): parameter_output = "" for x in model.layers[1].get_weights()[index].reshape(-1, 1): parameter_output += str(x) + '\n' parameter[key].append(float(x)) file_name = files_name[index] + '.txt' self.write_output(file_name, parameter_output) weights = self.quantizes_valid.quantizes(parameter[key]) weights = self.quantizes_valid.values_to_binary(weights) self.write_output(files_name[index], weights) return model def output(self, model, data_out): file_name = 'dense.txt' self.write_output(file_name, data_out.reshape(-1, 1)) data_out_binary = self.quantizes_valid.values_to_binary( data_out.reshape(-1, 1)) print(data_out_binary) file_name = 'dense' self.write_output(file_name, data_out_binary) print('output data', data_out) def write_output(self, file_name, values): if not os.path.isdir(self.folder_parameter): os.makedirs(r'%s/%s' % ('Parameter', 'Dense_channel512')) path_file = os.path.join(self.folder_parameter, file_name) with open(path_file, 'w') as f: f.writelines(str(values))
parser.add_argument('--project', default = None, type=str2None) # project 5 most frequent documents parser.add_argument('--balance', default = None, type=str2None) # upsample classes for learning curves and possible model improvement args = parser.parse_args() corr = args.corr project = args.project balance = args.balance #read documents from train and test jsonl format and convert them to pandas dataframe read_train = ReadData('train.jsonl') read_test = ReadData('test.jsonl') df_train = read_train.read_data() #calculate minimal and maximal number of raw words in documents df_train['doc_size'] = df_train['text'].apply(lambda x:calculate_docs_size(x)) print('Max len of raw document in train set: {}'.format(df_train['doc_size'].max())) print('Min len of raw document in train set: {}'.format(df_train['doc_size'].min())) df_test = read_test.read_data()
#batchnorm dense convolution globalaverage mobilenetv2cifar10 padding file_name = "mobilenetv2cifar10" #batchnorm1 2 3 mode = "batchnorm3" #folder = get_folder() #Mobilenetv2Cifar10 Conv1 DwConv bottleneck0_layer bottleneck1_layer #bottleneck2_layer Conv_3v3 folder_parameter = os.path.join('Parameter', "Mobilenetv2Cifar10") #folder_parameter = os.path.join('Parameter', folder) #software hardware selector = "hardware" if __name__ == '__main__': valid = '' read_data = ReadData() quantize_valid = QuantizesValid() if selector == "hardware": read_data.type = 'binaries' read_data.file = os.path.join(folder_parameter, 'output_verilog.txt') binaries = read_data.read_values() print(binaries) # binaries = quantize_valid.binary_to_values(binaries) binaries = quantize_valid.binary_to_values(binaries[0:10]) valid = binaries print(binaries) elif selector == "software": read_data.file = os.path.join(folder_parameter, file_name + '_valid.txt') values = read_data.read_values() valid = values print(values)
from config import * from read_data import ReadData from text_processing import TextProcessing st.set_page_config(layout="wide") st.markdown("<h1 style='text-align: center; color: black;'>Multipurpose Natural Language Processing App</h1>", unsafe_allow_html=True) st.markdown(Config.hide_streamlit_style, unsafe_allow_html=True) data_choice = st.radio("Select your preferred way of data input", ('Upload a file', 'Direct text input')) if data_choice == 'Upload a file': uploaded_file = st.sidebar.file_uploader("Upload your file:", type=['txt']) read_obj = ReadData(uploaded_file) data = read_obj.read_file_txt() input_type = True else: data = st.text_input('Input your text here:') input_type = False if data is not None: model_option = st.selectbox("Please choose your intended model:", ["Text Summarization"]) process_obj = TextProcessing(data) cleaned_data = process_obj.text_cleaning(input_type)
def test_direct_call(): from read_data import ReadData obj = ReadData(as_int=True)
"~/Jottacloud/data_for_bdt/MSSM_log_MASS_allsquarks.dat" ] # Define list with features for MASS dataset feature_list = [ "3.mGluino", "4.mdL", "5.mdR", "6.muL", "7.muR", "8.msL", "9.msR", "10.mcL", "11.mcR" ] target_list = ["2.gg_NLO"] # The data files *_MASS.txt contains a column with NaNs, this must be removed drop_col = 'Unnamed: 15' features, target, features_test, target_test = ReadData(files, feature_list, target_list, drop_col, eps=1E-9, squark_mean=True, train_test=True) # Set file suffix: suffix = "LS_loss" # Where to save plots directory = "plots/" #################################################################### # Load saved model if it exist # reg = joblib.load('BDT_LS_loss.pkl') ####################################################################
class SelfBatchNormalization: def __init__(self, mode): self.quantizes_valid = QuantizesValid() self.read_data = ReadData() self.mode = mode self.folder_parameter = os.path.join('Parameter', self.mode) def data(self): train_x = list(np.arange(4, -4, -0.125)[0:64]) train_x.extend(np.arange(-4, 4, 0.125)[0:64]) train_y = list(np.arange(8, -8, -0.125)[0:64]) train_y.extend(np.arange(-8, 8, 0.125)[0:64]) train_x_binary = self.quantizes_valid.values_to_binary(train_x) file_name = 'input_image' self.write_output(file_name, train_x_binary) train_x = self.quantizes_valid.quantizes(train_x) train_x = np.array(train_x).reshape(1, 4, 4, 8) train_y = self.quantizes_valid.quantizes(train_y) train_y = np.array(train_y).reshape(1, 4, 4, 8) file_name = 'input_image.txt' self.write_output(file_name, train_x) np.save(os.path.join(self.folder_parameter, 'train_x'), train_x) return train_x, train_y def valid2(self, train_x, output_shape, parameter): weights = list() for index in range(len(parameter.values())): weight = list() weight.append(parameter['gamma'][index]) weight.append(parameter['beta'][index]) weight.append(parameter['mean'][index]) weight.append(parameter['variance'][index]) a = self.quantizes(weight) weights.append(a) print('quantizes weight:', weights) for index, x in enumerate(train_x.reshape(-1, 1)): a = weights[index % train_x.shape[3]] if self.mode == "batchnorm1" or self.mode == "batchnorm2": x = (x - a[2]) * a[3] * a[0] + a[1] elif self.mode == "batchnorm3": x = x * a[3] + a[2] def valid(self): train_x = np.load(os.path.join(self.folder_parameter, 'train_x.npy')) parameter = { 'gamma': list(), 'beta': list(), 'mean': list(), 'variance': list() } for weight in parameter.keys(): self.read_data.file = os.path.join(self.folder_parameter, weight + '.txt') parameter[weight] = self.read_data.read_values() weights = list() for index in range(len(parameter.values())): weight = list() weight.append(parameter['gamma'][index]) weight.append(parameter['beta'][index]) weight.append(parameter['mean'][index]) weight.append(parameter['variance'][index]) a = self.quantizes(weight) weights.append(a) print('quantizes weight:', weights) data_out = '' for index, x in enumerate(train_x.reshape(-1, 1)): a = weights[index % train_x.shape[1]] if "batchnorm3" in self.mode or "batchnorm2" in self.mode: # print(x - a[2]) # print((x - a[2])* a[3]) # print((x - a[2])* a[3] * a[0]) # print((x - a[2])* a[3] * a[0] + a[1]) x = (x - a[2]) * a[3] * a[0] + a[1] elif "batchnorm3" in self.mode: # print(x) # print(x * a[3]) # print(x * a[3] + a[2] ) x = x * a[3] + a[2] data_out += str(x) + '\n' print('output data:', x) file_name = 'batchnorm_valid.txt' self.write_output(file_name, data_out) def model(self): shape = (4, 4, 8) input = Input(shape=shape) x = BatchNormalization()(input) model = Model(inputs=input, outputs=x) return model def quantizes(self, weights): if "batchnorm1" in self.mode: weights[3] = 1 / np.sqrt(weights[3]) weights = self.quantizes_valid.quantizes(weights) elif "batchnorm2" in self.mode: weights[3] = np.sqrt(weights[3]) for index, x in enumerate(self.quantizes_valid.quantizes(weights)): weights[index] = x weights[3] = 1 / weights[3] weights[3] = self.quantizes_valid.quantizes([weights[3]]) elif "batchnorm3" in self.mode: weights[2] = -(1 / np.sqrt(weights[3]) * weights[2] * weights[0]) + weights[1] weights[3] = 1 / np.sqrt(weights[3]) * weights[0] weights = self.quantizes_valid.quantizes(weights) return weights def training(self, model): files_name = ['gamma', 'beta', 'mean', 'variance'] variance = list() mean = list() gamma = list() beta = list() for index in range(len(files_name)): weight_output = "" for x in model.layers[1].get_weights()[index]: weight_output += str(x) + '\n' file_name = files_name[index] + '.txt' self.write_output(file_name, weight_output) print('original weight :', files_name[index]) for index in range(len(model.layers[1].get_weights()[0])): weights = list() weights.append(model.layers[1].get_weights()[0][index]) weights.append(model.layers[1].get_weights()[1][index]) weights.append(model.layers[1].get_weights()[2][index]) weights.append(model.layers[1].get_weights()[3][index]) print('original weight :', weights) weights = self.quantizes(weights) variance.append(weights[3]) mean.append(weights[2]) gamma.append(weights[0]) beta.append(weights[1]) variance = self.quantizes_valid.values_to_binary(variance) file_name = 'variance' self.write_output(file_name, variance) mean = self.quantizes_valid.values_to_binary(mean) file_name = 'mean' self.write_output(file_name, mean) gamma = self.quantizes_valid.values_to_binary(gamma) file_name = 'gamma' self.write_output(file_name, gamma) beta = self.quantizes_valid.values_to_binary(beta) file_name = 'beta' self.write_output(file_name, beta) # weights = quantizes(weights) # weights_quantize = list() # for x in weights: # weights_quantize.append(np.array([x], dtype = np.float32)) # # print('quantizes weight :', weights_quantize) # model.layers[1].set_weights(weights_quantize) print(model.layers[1].get_weights()) # json_string = model.to_json() with open(os.path.join(self.folder_parameter, "batchnorm model.json"), "w") as text_file: text_file.write(json_string) model.save(os.path.join(self.folder_parameter, "batchnorm model.hdf5")) return model def output(self, model, data_out): file_name = 'batchnorm.txt' self.write_output(file_name, data_out.reshape(-1, 1)) data_out_binary = self.quantizes_valid.values_to_binary( data_out.reshape(-1, 1)) print(data_out_binary) file_name = 'batchnorm' self.write_output(file_name, data_out_binary) print('output data', data_out) print('gamma:', K.eval(model.layers[1].gamma)) print('beta:', K.eval(model.layers[1].beta)) print('moving_mean:', K.eval(model.layers[1].moving_mean)) print('moving_variance:', K.eval(model.layers[1].moving_variance)) # print('epsilon :', model.layers[1].epsilon) # print('data_in :', data_in) # print('data_out:', data_out) def write_output(self, file_name, values): if not os.path.isdir(self.folder_parameter): os.makedirs(r'%s/%s' % ('Parameter', self.mode)) path_file = os.path.join(self.folder_parameter, file_name) with open(path_file, 'w') as f: f.writelines(str(values))
def __init__(self, mode): self.quantizes_valid = QuantizesValid() self.read_data = ReadData() self.mode = mode self.folder_parameter = os.path.join('Parameter', self.mode)
def __init__(self, Parameters): CITY = Parameters.city DAYS = Parameters.days BUDGET = Parameters.budget VISITED = Parameters.visited BOUNDRYCONDITIONS = Parameters.boundryConditions INTEREST = Parameters.interest dataFile = CITY + ".xlsx" durationFile = CITY + "_duration.xls" cityData = ReadData(dataFile) cityDuration = ReadDurations(durationFile) DESTINATIONS = np.setdiff1d(range(len(cityData)), VISITED) self.n = len(DESTINATIONS) self.TRAVELTIME = np.zeros((self.n + 2 * DAYS, self.n + 2 * DAYS)) data = [] p = 0 for i in DESTINATIONS: data.append(cityData[i]) q = 0 for j in DESTINATIONS: self.TRAVELTIME[p, q] = cityDuration[i][j] q = q + 1 p = p + 1 data = np.asarray(data) self.ID = np.asarray(data[:, 0], dtype=int) self.COORDINATES = data[:, 1:3] self.HAPPINESS = data[:, 3] + (data[:, 4:9] * INTEREST).sum(axis=1) self.COST = data[:, 9] self.OPENTIME = np.append(data[:, 10], np.zeros((2 * DAYS, 1))) self.CLOSETIME = data[:, 11] self.SERVICETIME = np.append(data[:, 12], np.zeros((2 * DAYS, 1))) self.DAYS = DAYS self.TMAX = BOUNDRYCONDITIONS[:, 5] self.BUDGET = BUDGET self.TMIN = BOUNDRYCONDITIONS[:, 4] keyId = 0 GoogleResp = FindDurations( [BOUNDRYCONDITIONS[0][0], BOUNDRYCONDITIONS[0][1]], self.COORDINATES, keyId) startDuration = GoogleResp[0] if ((BOUNDRYCONDITIONS[0][0] == BOUNDRYCONDITIONS[0][2]) and (BOUNDRYCONDITIONS[0][1] == BOUNDRYCONDITIONS[0][3])): stayDuration = GoogleResp[0] else: keyId = GoogleResp[1] GoogleResp = FindDurations( [BOUNDRYCONDITIONS[0][2], BOUNDRYCONDITIONS[0][3]], self.COORDINATES, keyId) stayDuration = GoogleResp[0] self.TRAVELTIME[self.n, 0:self.n] = startDuration self.TRAVELTIME[0:self.n, self.n] = startDuration[:] self.TRAVELTIME[self.n + 1, 0:self.n] = stayDuration self.TRAVELTIME[0:self.n, self.n + 1] = stayDuration[:] if (DAYS > 1): for i in range(1, DAYS): for j in range(2): self.TRAVELTIME[self.n + 2 * i + j, 0:self.n] = stayDuration self.TRAVELTIME[0:self.n, self.n + 2 * i + j] = stayDuration[:] self.TRAVELTIME = self.TRAVELTIME * 1.2 for i in range(DAYS): for j in range(2): self.ID = np.append(self.ID, cityData.shape[0] + j) newCoordinates = [ BOUNDRYCONDITIONS[i][2 * j], BOUNDRYCONDITIONS[i][2 * j + 1] ] self.COORDINATES = np.vstack( [self.COORDINATES, newCoordinates])
from read_data import ReadData import pandas as pd from sklearn.pipeline import Pipeline from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from transformer.feature_transformer import FeatureTransformer from sklearn.metrics import accuracy_score from sklearn import svm training_data = ReadData.readData(); df_train = pd.DataFrame(training_data) # test data test_data = [] test_data.append({"feature": u"hiện tại công nghệ đang phát triển nhanh, công nghệ Var trong sân", "target": ""}) df_test = pd.DataFrame(test_data) pipe_line = Pipeline([ ("transformer", FeatureTransformer()), ("vect", CountVectorizer()),#bag-of-words ("clf", svm.SVC(C=1.0, kernel='linear', gamma='auto', probability=True)) ]) clf = pipe_line.fit(df_train["feature"], df_train.target) predicted = clf.predict(df_test["feature"]) print(clf.predict_proba(df_test["feature"]));