class ObjectReader(): from DataReader import DataReader #These are the file extentions that have more reading #This is to OptionalInts = [ "w3d", "w3a", "w3q" ] variableTypes = [] def __init__(self, filename): self.read = DataReader(filename) self.variableTypes = [ self.read.int, self.read.float, self.read.float, self.read.string ] self.fileVersion = self.read.int() self.originalInfo = self.readTable() self.customInfo = self.readTable() def readMod(self): modInfo = {} modInfo["ID"] = self.read.charArray(4) varType = self.read.int() if filename.split(".")[1] in self.OptionalInts: modInfo["level"] = self.read.int() modInfo["pointer"] = self.read.int() print(varType) modInfo["value"] = self.variableTypes[varType]() self.read.int() #verify / end thing return modInfo def readObject(self): objectData = {} objectData["oldID"] = self.read.charArray(4) objectData["newID"] = self.read.charArray(4) modCount = self.read.int() objectData["mods"] = [] for i in xrange(modCount): objectData["mods"].append(self.readMod()) return objectData def readTable(self): tmpLen = self.read.int() tmpInfo = [] if tmpLen > 0: for i in xrange(tmpLen): tmpInfo.append(self.readObject()) return tmpInfo
def __init__(self): QtGui.QDialog.__init__(self) self.setupUi(self) self.datreader = DataReader() self.Plotter = Plotter() self.directory = os.getcwd() self.WorkingD_label.setText(self.directory) self.ShowFile_PB.clicked.connect(self.show_file_start) # shows first lines in the textbrowser self.ReadSets_PB.clicked.connect(self.read_set) # reads all files that start with lineEdit and creates a dict in the Sets_Dict[set][file][column] self.PlotFile_PB.clicked.connect(self.plotfile) self.MAV_slider.valueChanged.connect(self.mav_valuechanged) self.MAV_slider.sliderReleased.connect(self.mav_released) self.LP_slider.sliderReleased.connect(self.lp) self.LP_slider.valueChanged.connect(self.lp_valuechanged) self.HP_slider.sliderReleased.connect(self.hp) self.HP_slider.valueChanged.connect(self.hp_valuechanged) #self.CutZeros.clicked.connect(self.cut_zeros_filedict) self.PlotColumn_PB.clicked.connect(self.plotcolumn) self.Clear_PB.clicked.connect(self.clear) self.Export_PB.clicked.connect(self.export) self.FFT_PB.clicked.connect(self.fft) self.ReadLabBook.clicked.connect(self.readlabbook) self.MAVEdit.returnPressed.connect(self.mav_released) self.MVAREdit.returnPressed.connect(self.mvar) self.MMMINEdit.returnPressed.connect(self.mmmin) self.Corr_PB.clicked.connect(self.correlate) self.Select_PB.clicked.connect(self.open_filedialog) self.Pyro_PB.clicked.connect(self.read_pyro) self.Log_PB.clicked.connect(self.log_scale) self.Sets_Dict = dict() # contains [set1][file1][column1] - the data self.Files_Dict = dict() # contains [filename 1]: 'set-filename' self.Columns_Dict = dict() # contains[set-filename-column]: same
def get_answer_part2(): result = 0 char_step = 0; for paren in DataReader.read_data(1)[0]: char_step += 1 result += 1 if paren == '(' else -1 if result < 0: return char_step
def get_answer_part1(self): #2572 position = Position(0, 0) houses = {self.get_dict_key_from_position(position): True} for move in DataReader.read_data(day=3)[0]: position = AdventDayThree.adjust_pos_for_move(position, move) houses[self.get_dict_key_from_position(position)] = True return len(houses.keys())
def answer_part_one(): total_size = 0; for wrapping in DataReader.read_data(day=2): dimensions = wrapping.replace('\n', '').split('x') int_dimensions = [int(dimensions[0]), int(dimensions[1]), int(dimensions[2])] int_dimensions = sorted(int_dimensions) total_size += 3 * (int_dimensions[0] * int_dimensions[1]) total_size += 2 * (int_dimensions[1] * int_dimensions[2]) total_size += 2 * (int_dimensions[2] * int_dimensions[0]) return total_size
def __init__(self, filename): self.read = DataReader(filename) self.variableTypes = [ self.read.int, self.read.float, self.read.float, self.read.string ] self.fileVersion = self.read.int() self.originalInfo = self.readTable() self.customInfo = self.readTable()
def get_answer_part2(self): #2631 santa = Position(0, 0) robot = Position(0, 0) houses = {self.get_dict_key_from_position(santa): True} move_count = 0 for move in DataReader.read_data(day=3)[0]: move_count += 1 if move_count % 2: santa = self.adjust_pos_for_move(santa, move) houses[self.get_dict_key_from_position(santa)] = True else: robot = self.adjust_pos_for_move(robot, move) houses[self.get_dict_key_from_position(robot)] = True return len(houses.keys())
class PerceptronModel: class TrainSplit: def __init__(self): self.train = [] self.test = [] def __init__(self): self.reader = DataReader() self.list_of_entities = [] self.list_of_klasses = [] self.num_of_folds = 10 def readTitles(self): self.list_of_entities = self.reader.get_list_of_entities() def crossValidationSplits(self): splits = [] for fold in range(0, self.num_of_folds): split = self.TrainSplit() length = len(self.list_of_entities) for i in range(0, length): if i % self.num_of_folds == fold: split.train.append(self.list_of_entities[i]) else: split.test.append(self.list_of_entities[i]) splits.append(split) return splits def classify(self, entity): pass def train(self, split): pass def test(self, split): pass
def __init__(self): self.DR = DataReader() self.Features = RuleBasedFeatures() self.feats = ["cc", "ck", "bk", "pk", "hk", "oe", "3", "5", "6", "x", 'nword', 'hood', 'bCaret', 'cCaret', 'pCaret', 'hCaret'] self.gang = {}
use_cuda = torch.cuda.is_available() # use_cuda = False data_path = "../ivd_data/preprocessed.h5" indicies_path = "../ivd_data/indices.json" images_features_path = "../ivd_data/image_features.h5" crop_features_path = "../ivd_data/image_features_crops.h5" ts = str(datetime.datetime.fromtimestamp(time()).strftime('%Y_%m_%d_%H_%M')) output_file = "logs/deciderguesser_output" + ts + ".log" loss_file = "logs/decider_guesser_loss" + ts + ".log" hyperparameters_file = "logs/deciderguesser_hyperparameters" + ts + ".log" dr = DataReader(data_path=data_path, indicies_path=indicies_path, images_features_path=images_features_path, crop_features_path = crop_features_path) ### Hyperparamters my_sys = getpass.getuser() == 'nabi' length = 11 logging = True if my_sys else False save_models = True if my_sys else False # OpenNMT Parameters opt = argparse.Namespace() opt.batch_size = 1 opt.beam_size = 5 opt.gpu = 0 opt.max_sent_length = 100 opt.replace_unk = True opt.tgt = None
class ReadW3E(): def __init__(self, filename): self.read = DataReader(filename) self.mapInfo = self.ReadMap() def ReadMap(self): mapInfo = self.ReadHeader() mapInfo["info"] = [] for i in xrange((mapInfo["width"])*(mapInfo["height"])): mapInfo["info"].append(self.ReadTile()) return mapInfo def ReadHeader(self): data = {} data["fileID"] = self.read.charArray(4) data["formatVersion"] = self.read.int() data["mainTileSet"] = self.read.char() data["customTileSet"] = self.read.int() #actually is a boolean data["groundTileSets"] = self.ReadTileset() data["cliffTileSets"] = self.ReadTileset() data["width"] = self.read.int() data["height"] = self.read.int() data["offsetX"] = self.read.float() data["offsetY"] = self.read.float() return data def ReadTileset(self): length = self.read.int() info = [] for i in range(0,length): info.append(self.read.charArray(4)) return info def ReadTile(self): tmpData = {} tmpData["groundHeight"] = self.read.short() tmpData["waterLevel"] = self.read.short() #bit 15 is used for boundary flag 1 tmpData["nibble1"] = self.read.byte() tmpData["textureDetails"] = self.read.byte() tmpData["nibble2"] = self.read.byte() return tmpData
test_set_num = 25 train_set_num = 660 valid_set_num = 7 path = ['.\data\A', '.\data\B', '.\data\C'] x_train = np.zeros((1, sample_size, 2)) x_test = np.zeros((1, sample_size, 2)) x_valid = np.zeros((1, sample_size, 2)) y_train = np.zeros((1)) y_test = np.zeros((1)) y_valid = np.zeros((1)) data = [x_train, x_test, x_valid, y_train, y_test, y_valid] data_reader = DataReader(data_augment_stride=data_augment_stride, train_set_num=train_set_num) for i in range(len(path)): data_sub = data_reader.read_data(path[i]) for i in range(len(data)): data[i] = np.append(data[i], data_sub[i], axis=0) for i in range(len(data)): data[i] = data[i][1:] # train set A, test set B x_train, x_test, x_valid, y_train, y_test, y_valid = different_load_set( data, 'B', 'A') # for t_SNE y_label = y_test y_train = to_categorical(y_train, num_classes=10) y_test = to_categorical(y_test, num_classes=10)
def get_answer_part1(): result = 0 for paren in DataReader.read_data(1)[0]: result += 1 if paren == '(' else -1 return result
@author: haotianteng """ from DataReader import DataReader #############################super parameter TRANNING_READS = 30000 #Total reads used TRAIN_WEIGHT = 0.4 #Proportion of reads used to train TEST_WEIGHT = 0.4 #Proportion of reads used to test VALID_WEIGHT = 0.2 #Proportion of reads used to validate #Structure EVENT_LENGTH = 20 #Length of each sentence HIDDEN_UNIT_NUM = 24 #Length of the hidden state of each hidden layer CellLayer = 3 #Number of the hidden layers #Training STEP_RATE = 0.5 BATCH_SIZE = 20 EPOCH = 5000 ############################# ###############################Read the data data = DataReader( TRAIN_WEIGHT, TEST_WEIGHT, VALID_WEIGHT, EVENT_LENGTH, TRANNING_READS, event_total=10000, file_list='/home/haotianteng/UQ/BINF7000/Nanopore/GN_003/event_pass.dat')
Created on Oct 25, 2016 @author: Iegor ''' from GraphAnimator import GraphAnimator from DataReader import DataReader from GreedySolver import solve_tsp import numpy as np def make_dist_matrix(points): x = [] y = [] for point in points: x.append(point[0]) y.append(point[1]) """Creates distance matrix for the given coordinate vectors""" N = len(x) xx = np.vstack((x, ) * N) yy = np.vstack((y, ) * N) return np.sqrt((xx - xx.T)**2 + (yy - yy.T)**2) if __name__ == "__main__": d = DataReader('../../../capitals.txt') points = d.readPoints() matrix = make_dist_matrix(points) path = solve_tsp(matrix) g = GraphAnimator(points=points, pathes=path) g.beginAnimation()
import os import numpy as np from sklearn.linear_model import LogisticRegression from DataReader import DataReader data_reader = DataReader("./data/us_trial.text", "./data/us_trial.labels") X, Y = data_reader.get_features() len_X = len(X) len_X = (int)(len_X / 10) len_train = (int)(0.8*len_X) print(len_train) trainX, trainY = X[0:len_train], Y[0:len_train] trainX, trainY = np.asarray(trainX), np.asarray(trainY) testX, testY = X[len_train:len_X], Y[len_train:len_X] testX, testY = np.asarray(testX), np.asarray(testY) print(len(testX)) if __name__ == '__main__': clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial') clf.fit(trainX, trainY) counter, len_testX = 0, len(testX) for i in range(len_testX): output = clf.predict(np.asarray([testX[i]])) if output == testY[i]: counter += 1 accuracy = counter / len_testX accuracy = accuracy * 100
def gen_dataset(seed): random_seed = seed data_reader = DataReader() return DataObject(data_reader, 1, random_seed=random_seed)
# csvwriter.writerow(row) # left[int(row[0])] -= 1 #input("Done") connection_type = Connection_JIT(0, 0, 0) kv_ty = (numba.types.containers.UniTuple(numba.int64, 2), numba.typeof(connection_type)) master_connection_dict = numba.typed.Dict.empty(*kv_ty) innov_list = numba.typed.List() innov_list.append(1) print("Begin test...") pr = Predictor() p = Population(784, 47, 1, master_connection_dict, innov_list, 0) dr = DataReader() mapping = dr.get_mapping() images = dr.get_images( 112800, 28, 28) # 112800 images in data set, each image 28x28 pixels mp = p.networks[0] for q in range(200): print("Adding connection " + str(q + 1), end='\r') mp.add_connection() #mp.randomize_all_bias() print("\nStarting predictions...") engines = [[mp, 0, 0]] pr.make_predictions(engines, mapping, images, 1) base = engines[0][2]
from DataReader import DataReader from Preprocessor import Preprocessor from Vectorizer import Vectorizer from Classifier import Classifier from DeepLearning import DeepLearner from sklearn.model_selection import train_test_split as split import numpy as np sub_b=['UNT','TIN'] dr_tr = DataReader('./datasets/training-v1/offenseval-training-v1.tsv','B') tr_data,tr_labels = dr_tr.get_labelled_data() tr_data,tr_labels = dr_tr.upsample(tr_data,tr_labels,label=0) tr_data,tr_labels = dr_tr.shuffle(tr_data,tr_labels,'random') dr_tst = DataReader('./datasets/test-B/testset-taskb.tsv') tst_data,tst_ids = dr_tst.get_test_data() tr_data = tr_data[:500] tr_labels = tr_labels[:500] ##### Naive Bayes - Lemmatize - tfidf prp = Preprocessor('remove_stopwords') tr_data_clean = prp.clean(tr_data) tst_data_clean = prp.clean(tst_data) vct = Vectorizer('tfidf') tr_vectors = vct.vectorize(tr_data_clean) tst_vectors = vct.vectorize(tst_data_clean) clf = Classifier('M-NaiveBayes')
def extractor(self): #array in which to store people list people_list = [] #initialization of prev_person prev_person = "" #for each person for i in range(0, len(self.data_list)): #identification of current person curr_person = self.data_list[i].split("_")[0] #if a new person is found if curr_person != prev_person: #if a previuous person exists if prev_person != "": #function to calculate features about a person fc = FeaturesCalculator(self.strides_info) #identification of gender: 0 for male, 1 for female if prev_person[0] == 'M': features = fc.calculator(0) elif prev_person[0] == 'F': features = fc.calculator(1) else: features = fc.calculator(-1) #function to store features about a person fs = FeaturesStorage(prev_person, features) fs.storage() #addind prev_person to list people_list.append(prev_person) #to free array with strides data self.strides_info = [] #updating prev_person prev_person = curr_person #list of strides about current person strides_list = os.listdir(self.strides_dir + self.data_list[i]) for j in range(0, len(strides_list)): #reading data of the selected stride dr = DataReader(self.strides_dir + self.data_list[i] + '/' + strides_list[j] + '/kalman/txt/') data, time = dr.reader() #adding stride data to strides_info array self.strides_info.append([time, data]) else: #list of strides about current person strides_list = os.listdir(self.strides_dir + self.data_list[i]) for j in range(0, len(strides_list)): #reading data of the selected stride dr = DataReader(self.strides_dir + self.data_list[i] + '/' + strides_list[j] + '/kalman/txt/') data, time = dr.reader() #adding stride data to strides_info array self.strides_info.append([time, data]) #if the last folder is rerached up if i == len(self.data_list) - 1: #function to calculate features about a person fc = FeaturesCalculator(self.strides_info) #identification of gender: 0 for male, 1 for female if prev_person[0] == 'M': features = fc.calculator(0) elif prev_person[0] == 'F': features = fc.calculator(1) else: features = fc.calculator(-1) #function to store features about a person fs = FeaturesStorage(prev_person, features) fs.storage() #addind prev_person to list people_list.append(prev_person) return people_list
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from HyperParameters import HyperParameters from DataReader import DataReader from NeuralNet import NeuralNet def draw(reader, net): plt.plot(reader.XTrain, reader.YTrain) plt.show() if __name__ == "__main__": reader = DataReader() reader.ReadData() reader.NormalizeX() reader.NormalizeY() hp = HyperParameters(13, 1, eta=0.001, max_epoch=2000, batch_size=50, eps=1e-5) net = NeuralNet(hp) net.train(reader, checkpoint=0.2) print("W=", net.weight) print("B=", net.bias)
# lazy way for the cache decorator @memoize def _wordBreak(s): # print(s) results = [] for word in dict: if s == word: results.append(word) elif s.startswith(word): # print('got', word) for result in _wordBreak(s[len(word):]): results.append(word + ' ' + result) return results return _wordBreak(s) def weight(s): weight = 0 for word in s: weight += 10**len(word) return weight if __name__ == "__main__": d = DataReader('../../../dict_en.txt') tokens = d.readTokens() wraps = Solution().wordBreak("thisisatest", tokens) print(sorted(wraps, key=weight))
def trainModel(experiment, testRun, setTarg): print("Training model ...") datasetTrain = DataReader(experiment.data["path"]) datasetTrain.setDatasetClassic("train", experiment.data["feature"], experiment.data["annotation"]) if setTarg == "MeanStd": datasetTrain.setTargetMeanStd() if testRun: datasetTrain = keepOne(datasetTrain) datasetDev = DataReader(experiment.data["path"]) datasetDev.setDatasetClassic("dev", experiment.data["feature"], experiment.data["annotation"]) if setTarg == "MeanStd": datasetDev.setTargetMeanStd() if testRun: datasetDev = keepOne(datasetDev) if testRun: experiment.maxEpoch = 1 inp, tar = datasetDev[0] experiment.inputDim = inp.shape[1] experiment.outputDim = tar.shape[1] # print("experiment.outputDim", tar.shape) wrapper = getWrapper(experiment) wrapper.trainModel(datasetTrain, datasetDev, batchSize=experiment.batchSize, maxEpoch=experiment.maxEpoch, loadBefore=True, tolerance=experiment.tolerance, minForTolerance=experiment.minForTolerance) wrapper.saveLogToCSV()
def __init__(self, file_path): self.model = models.load_model(file_path) self.data_reader = DataReader() self.nn_helper_funcs = NNHelperFunctions() self.history = []
for item_count in column_counts: p = item_count / total probabilities.append(p) # print("probs", probabilities) column_entropy = 0 for p_i in probabilities: column_entropy += entropy(p_i) # print("column_entropy", column_entropy) return column_entropy #accessing training data and setting up data frame train_data_url = 'https://raw.githubusercontent.com/jeniyat/CSE-5521-SP21/master/HW/HW1/Data/train.csv' dr = DataReader(train_data_url) dr.read() df = pd.DataFrame(dr.data) #CALCULATING ENTROPY OF PREDICTION COLUMN #getting edible column's counts of values edible_col = df[0].value_counts().to_list() probabilities = [] #get total count for current column total = 0 for column in edible_col: total += column
def __init__(self): self.reader = DataReader() self.list_of_entities = [] self.list_of_klasses = [] self.num_of_folds = 10
# tf.compat.v1.set_random_seed(seed_value) # 5. Configure a new global `tensorflow` session from keras import backend as K #session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) #sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) #K.set_session(sess) session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf) tf.compat.v1.keras.backend.set_session(sess) from DataReader import DataReader from NN_Manager import NN_Manager #Get the dataSets #tf.random.set_seed(13) dataReader = DataReader() (trainingDataSet,testingDataSet) = dataReader.GetDataSets() (trainingLabels,testingLabels) = dataReader.GetLabels() #Create chosen networks networkManager = NN_Manager(trainingDataSet,testingDataSet,trainingLabels,testingLabels) #networkManager.addNetwork("CNN") #networkManager.addNetwork("LargerFilterCNN") #networkManager.addNetwork("LargeCNN") networkManager.addNetwork("StackedLSTM") #networkManager.addNetwork("LSTM") #networkManager.addNetwork("FFNN") #networkManager.addNetwork("GRU") #networkManager.addNetwork("StackedLSTM")
from DataReader import DataReader import numpy as np import pickle import os from time import time data_path = '../ivd_data/preprocessed.h5' indicies_path = '../ivd_data/indices.json' dr = DataReader(data_path=data_path, indicies_path=indicies_path) # source and target files src_train = 'data/gw_src_train' src_valid = 'data/gw_src_valid' tgt_train = 'data/gw_tgt_train' tgt_valid = 'data/gw_tgt_valid' # parameters length = 15 train_val_ratio = 0.1 n_games_to_train = '_ALL' sos_token_stirng = '-SOS-' # get all games game_ids = dr.get_game_ids() # prune games def get_game_ids_with_max_length(length): """ return all game ids where all questions are smaller then the given length """
# initial_population_size: number of chromosomes in the initial population # nb_generations: nb of times we will produce a generation (includes tournament, cross over and mutation ) # ratio_cross : ratio of the total population to be crossed over and mutated # prob_mutate : mutation probability # k: number of participants on the selection tournaments. initial_population_size, nb_generations, ratio_cross, prob_mutate, k = 200, 100, 0.8, 0.05, 2 # How many time to run the whole algo: for mini and small: 10 is enough, for big :100 problem_instances = 10 DATA_PATH = "data/data_transportationPb_mini.xlsx" if __name__ == "__main__": # reading the data from the excel file dataReader = DataReader(DATA_PATH) data = dataReader.read_data() data["nb_vehicles"] = 9 # creating the list of vehicle names vehicles = ['vehicle' + str(i) for i in range(0, int(data["nb_vehicles"]))] stations = list(data["all_stations"]) distances = data["distances"] mandatory_trips = data["trips"] best_results = [] print("EXECUTING ", problem_instances, " INSTANCES ") genetic_problem = GeneticProblem(vehicles, stations, distances, mandatory_trips) generation_factory = GenerationFactory(genetic_problem) t0 = time()
return model def save_model(model_save_dir, model_name, model): save_path = '{}/{}'.format(model_save_dir, model_name) model.save(save_path) if __name__ == '__main__': source_path = 'data' model_save_dir = 'models/' img_width, img_height = 200, 200 model = create_vgg16_model(img_width, img_height) data_reader = DataReader(source_path) train_images, train_labels = data_reader.get_train_data() val_images, val_labels = data_reader.get_val_data() test_images, test_labels = data_reader.get_test_data() np.set_printoptions(suppress=True) history = model.fit(train_images, train_labels, validation_data=(val_images, val_labels), batch_size=16, epochs=10, verbose=1) predictions = model.predict(test_images) for i in range(len(predictions)):
from DataReader import DataReader x = DataReader('sign-ins.csv') x.createPlot(x.createDatesList(), x.createCountList()) y = DataReader('sign-ins2.csv') y.createPlot(y.createDatesList(), y.createCountList())
def read_header(self): """Read the header of a MPQ archive.""" def read_mpq_header(offset=None): if offset: self.file.seek(offset) data = self.file.read(32) header = MPQFileHeader._make( struct.unpack(MPQFileHeader.struct_format, data)) header = header._asdict() if header['format_version'] == 1: data = self.file.read(12) extended_header = MPQFileHeaderExt._make( struct.unpack(MPQFileHeaderExt.struct_format, data)) header.update(extended_header._asdict()) return header def read_mpq_user_data_header(): data = self.file.read(16) header = MPQUserDataHeader._make( struct.unpack(MPQUserDataHeader.struct_format, data)) header = header._asdict() header['content'] = self.file.read(header['user_data_header_size']) return header magic = self.file.read(4) self.file.seek(0) print(magic) if magic == "HM3W": datReader = DataReader(self.file) header = {} ## should be HM3W header["wc3map_magic"] = datReader.charArray(4) ## unknown datReader.int() header["wc3map_mapName"] = datReader.string() """ 0x0001: 1=hide minimap in preview screens 0x0002: 1=modify ally priorities 0x0004: 1=melee map 0x0008: 1=playable map size was large and has never been reduced to medium 0x0010: 1=masked area are partially visible 0x0020: 1=fixed player setting for custom forces 0x0040: 1=use custom forces 0x0080: 1=use custom techtree 0x0100: 1=use custom abilities 0x0200: 1=use custom upgrades 0x0400: 1=map properties menu opened at least once since map creation 0x0800: 1=show water waves on cliff shores 0x1000: 1=show water waves on rolling shores """ header["wc3map_mapFlags"] = datReader.flags() header["wc3map_maxPlayers"] = datReader.int() self.file.read(512 - datReader.index) print ("Now position:", self.file.tell()) else: ## If the magic isn't HM3W, we will skip the first 512 bytes of the ## file anyway self.file.seek(512) print(self.file.tell()) magic = self.file.read(4) self.file.seek(512) print( len(magic)) print(magic, hex(ord(magic[3])) ) if magic == b'MPQ\x1a': header.update(read_mpq_header()) header['offset'] = 512 elif magic == b'MPQ\x1b': user_data_header = read_mpq_user_data_header() header.update(read_mpq_header(user_data_header['mpq_header_offset'])) header['offset'] = user_data_header['mpq_header_offset'] header['user_data_header'] = user_data_header else: raise ValueError("Invalid file header.") return header
pred_directory = os.path.join(directory, 'Pred') # make directory for storing predictions if it does not exist if os.path.isdir(pred_directory) == False: os.makedirs(pred_directory) # choose network, can be either DRN18 or DRN26 network = 'DRN26' # set parameters batch_size=8 num_epochs=100 use_weights = 1 num_classes = 5 image_dims=[500,500,3] data = DataReader(directory, batch_size, num_epochs, use_weights=0) dataset = data.test_batch(data_file) num_images = data.num_images # get image filenames image_list = data.image_list # determine number of iterations based on number of images num_iterations = int(np.floor(num_images/batch_size)) # create iterator allowing us to switch between datasets data_iterator = dataset.make_one_shot_iterator() next_element = data_iterator.get_next() # create placeholder for train or test train_network = tf.placeholder(tf.bool, [])
class Printer: def __init__(self): self.DR = DataReader() self.Features = RuleBasedFeatures() self.feats = ["cc", "ck", "bk", "pk", "hk", "oe", "3", "5", "6", "x", 'nword', 'hood', 'bCaret', 'cCaret', 'pCaret', 'hCaret'] self.gang = {} def loadData(self, postsFile): self.DR.loadData(postsFile) def calculateFeatures(self, posts): Hits = dd(int) Scopes = dd(int) numWordsScope = dd(int) for post in posts: numWords = len(self.DR.posts[post][4].split()) postHits, postScopes = self.Features.scorePostWordIndexing(self.DR.posts[post][4]) #print postHits, postScopes for feat in self.feats: Scopes[feat] += len(postScopes[feat]) Hits[feat + 'Count'] += len(postHits[feat + 'Count']) numWordsScope[feat] += numWords #print Hits, Scopes #simpleGlobal = self.globalScoreSimple(Hits, Scopes) #complexityGloabal = self.globalScoreComplexity(Hits, Scopes) return Hits, Scopes, numWordsScope def globalScoreComplexity(self, counts, scopeDict): scopeIndices = set() for feat in scopeDict.iterkeys(): for index in scopeDict[feat]: scopeIndices.add(index) count = 0 for feat in counts.iterkeys(): count += len(counts[feat]) if len(scopeIndices) > 0: return str(round(count * 100.0 / len(scopeIndices), 2)) return "" def globalScoreSimple(self, counts, scopeDict): scope = 0 for feat in scopeDict.iterkeys(): scope += len(scopeDict[feat]) count = 0 for feat in counts.iterkeys(): count += len(counts[feat]) if scope > 0: return str(round(count * 100.0 / scope, 2)) return "" def printFeats(self, users, outFile): outFile = open(outFile, 'w', 1) for user in users: Hits, Scopes, numWordsScope = self.calculateFeatures(self.DR.userwisePosts[user]) feats = [] for feat in self.feats: try: feats.append(str(round(Hits[feat+'Count'] * 100.0 / Scopes[feat], 2))) except ZeroDivisionError: feats.append('-1') outFile.write(user + ',' + ','.join(feats) + ',' + self.gang[user] + '\n') outFile.close() def loadGangAnnotation(self, gangAnnotation): for line in open(gangAnnotation): line = line.strip().split('\t') self.gang[line[0]] = line[1]
def _load_data(self): self.raw = DataReader(day=7).as_raw()
def Train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.train.get_or_create_global_step() inc_step = tf.assign_add(global_step, 1) reader = DataReader(FLAGS.input_training_data, FLAGS.buffer_size, FLAGS.batch_size, FLAGS.traing_epochs, is_shuffle=True) model = LRModel() trainer = Trainer(model, inc_step, reader) summary_op = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True saver = tf.train.Saver(max_to_keep=FLAGS.max_model_to_keep, name='model_saver') with tf.Session(config=config) as session: summ_writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph) #Load Pretrain session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) session.run(reader.iterator.initializer) #zero = tf.constant(0, dtype=tf.float32) ckpt = tf.train.get_checkpoint_state(FLAGS.output_model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) print("Load model from ", ckpt.model_checkpoint_path) else: print("No initial model found.") trainer.start_time = time.time() while True: try: _, avg_loss, total_weight, step, summary = session.run( trainer.train_ops() + [summary_op]) #where = tf.not_equal(model.W, zero) #indices = tf.where(where) #print(session.run(indices)) #print(session.run(tf.gather(model.W, indices))) #print(session.run(model.b)) #print(step) if step % FLAGS.log_frequency == 1: summ_writer.add_summary(summary, step) trainer.print_log(total_weight, step, avg_loss) if step % FLAGS.checkpoint_frequency == 1: saver.save(session, FLAGS.output_model_path + "/model", global_step=step) except tf.errors.OutOfRangeError: print("End of training.") break pass
format(network_type, num_hidden_units, num_layers, num_epochs, embed_vector_size, window_size, min_count)) #mode = 'train' mode = 'evaluate' #mode = 'score' K.clear_session() with K.get_session() as sess: K.set_session(sess) graphr = K.get_session().graph with graphr.as_default(): if mode == 'train': print("Training the model... num_epochs = {}, num_layers = {}". format(num_epochs, num_layers)) reader = DataReader(num_classes, vector_size=embed_vector_size) entityExtractor = EntityExtractor(reader, embedding_pickle_file) entityExtractor.train(local_train_file_path, network_type=network_type, num_epochs=num_epochs, num_hidden_units=num_hidden_units, num_layers=num_layers) entityExtractor.save_tag_map(tag_to_idx_map_file) #Save the model entityExtractor.save(model_file_path) elif mode == 'evaluate': # Evaluate the model print("Evaluating the model...")
import scipy.sparse as sps from Base.NonPersonalizedRecommender import TopPop, Random, GlobalEffects from KNN.UserKNNCFRecommender import UserKNNCFRecommender from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender from GraphBased.RP3betaRecommender import RP3betaRecommender from GraphBased.P3alphaRecommender import P3alphaRecommender from Data_manager.RecSys2019.RecSys2019Reader import RecSys2019Reader from Data_manager.DataSplitter_leave_k_out import DataSplitter_leave_k_out if __name__ == '__main__': for seed in [0, 1, 2, 3, 4]: data_reader = DataReader() data = DataObject(data_reader, 1, random_seed=seed) recommender = RP3betaRecommender(data.urm_train) recommender.fit(topK=10, alpha=0.27, beta=0.11) LogToFileEvaluator.evaluate(data, seed, recommender, "RP3", "", filename="algo_eval.csv") # for seed in [0, 1, 2, 3, 4]: # data_reader = DataReader() # data = DataObject(data_reader, 1, random_seed=seed) # recommender = ItemKNNCFRecommender(data.urm_train) # recommender.fit(topK=22, shrink=850, similarity="jaccard", feature_weighting="BM25")
class MeinDialog(QtGui.QDialog, Dlg): def __init__(self): QtGui.QDialog.__init__(self) self.setupUi(self) self.datreader = DataReader() self.Plotter = Plotter() self.directory = os.getcwd() self.WorkingD_label.setText(self.directory) self.ShowFile_PB.clicked.connect(self.show_file_start) # shows first lines in the textbrowser self.ReadSets_PB.clicked.connect(self.read_set) # reads all files that start with lineEdit and creates a dict in the Sets_Dict[set][file][column] self.PlotFile_PB.clicked.connect(self.plotfile) self.MAV_slider.valueChanged.connect(self.mav_valuechanged) self.MAV_slider.sliderReleased.connect(self.mav_released) self.LP_slider.sliderReleased.connect(self.lp) self.LP_slider.valueChanged.connect(self.lp_valuechanged) self.HP_slider.sliderReleased.connect(self.hp) self.HP_slider.valueChanged.connect(self.hp_valuechanged) #self.CutZeros.clicked.connect(self.cut_zeros_filedict) self.PlotColumn_PB.clicked.connect(self.plotcolumn) self.Clear_PB.clicked.connect(self.clear) self.Export_PB.clicked.connect(self.export) self.FFT_PB.clicked.connect(self.fft) self.ReadLabBook.clicked.connect(self.readlabbook) self.MAVEdit.returnPressed.connect(self.mav_released) self.MVAREdit.returnPressed.connect(self.mvar) self.MMMINEdit.returnPressed.connect(self.mmmin) self.Corr_PB.clicked.connect(self.correlate) self.Select_PB.clicked.connect(self.open_filedialog) self.Pyro_PB.clicked.connect(self.read_pyro) self.Log_PB.clicked.connect(self.log_scale) self.Sets_Dict = dict() # contains [set1][file1][column1] - the data self.Files_Dict = dict() # contains [filename 1]: 'set-filename' self.Columns_Dict = dict() # contains[set-filename-column]: same def log_scale(self): print 'log_scale' f = self.Plotter.figure_list[int(self.CurrentFigureEdit.text())] ax = f.axes[int(self.ax_scale_edit.text())] if ax.get_yscale == 'linear': ax.set_yscale('log') print 'log' else: ax.set_yscale('linear') plt.title('') def read_pyro(self): print 'read_pyro' filelist = list() filelist = [f for f in os.listdir(self.directory) if f.startswith(self.lineEdit.text())] print filelist filelist = [os.path.join(self.directory, f) for f in filelist] cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')] print cols_of_interest self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_pyro(filelist, cols_of_interest) #self.cut_zeros_filedict() self.update_SetScroll() self.update_Files_Dict() self.update_FileScroll() self.update_Columns_Dict() self.update_ColumnScroll() print self.Sets_Dict.keys() def open_filedialog(self): files = str(QtGui.QFileDialog.getOpenFileName(None, QtCore.QString('Select File'), QtCore.QString(os.getcwd()),QtCore.QString('*.txt'))) print files self.lineEdit.setText(os.path.basename(files)) self.WorkingD_label.setText(os.path.dirname(files)) self.directory = os.path.dirname(files) def correlate(self): fnum = self.Plotter.plot_eval(self.Plotter.correlate, 0, int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def readlabbook(self): #self.HistoryEdit.insertPlainText(self.HistoryEdit.text()) print 'read labbook' lab_dict = self.datreader.get_labdict(str(self.lineEdit.text())) filelist = lab_dict.keys() print filelist path = self.directory filelist = [os.path.join(path, f) for f in filelist if f in os.listdir(path)] print filelist cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')] print cols_of_interest self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_files(filelist, cols_of_interest) #self.cut_zeros_filedict() lab_dict = self.datreader.get_labdict(str(self.lineEdit.text())) for f in self.Sets_Dict[str(self.lineEdit.text())].keys(): for info in lab_dict[f].keys(): self.Sets_Dict[str(self.lineEdit.text())][f][info] = lab_dict[f][info] self.update_SetScroll() self.update_Files_Dict() self.update_FileScroll() self.update_Columns_Dict() self.update_ColumnScroll() print self.Sets_Dict.keys() def mvar(self): fnum = self.Plotter.plot_eval(self.Plotter.mvar, int(self.MVAREdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def mmmin(self): fnum = self.Plotter.plot_eval(self.Plotter.mmmin, int(self.MMMINEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def mav_released(self): #if not self.InActiveFigure.isChecked(): # self.MAVEdit.setText(str(self.MAV_slider.value())) fnum = self.Plotter.plot_eval(self.Plotter.mav, int(self.MAVEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def fft(self): print 'fft' fnum = self.Plotter.plot_eval(self.Plotter.fft, 0, int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def export(self): self.Plotter.export(int(self.CurrentFigureEdit.text())) def clear(self): self.Sets_Dict = dict() self.update_SetScroll() self.Files_Dict = dict() self.update_FileScroll() self.Columns_Dict = dict() self.update_ColumnScroll() def lp_valuechanged(self): self.LPEdit.setText(str(self.LP_slider.value())) def lp(self): print 'mav' self.MAVEdit.setText(str(self.LP_slider.value())) def hp_valuechanged(self): self.HPEdit.setText(str(self.HP_slider.value())) def hp(self): print 'mav' self.MAVEdit.setText(str(self.HP_slider.value())) def mav_valuechanged(self): self.MAVEdit.setText(str(self.MAV_slider.value())) if self.InActiveFigure.isChecked(): fnum = self.Plotter.plot_eval(self.Plotter.mav, int(self.MAVEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked()) self.CurrentFigureEdit.setText(str(fnum)) def plotcolumn(self): for col in self.ColumnScroll.selectedItems(): key = str(col.text()).split('::') col_data = self.Sets_Dict[key[0]][key[1]][key[2]] x_axis = self.Sets_Dict[key[0]][key[1]]['Zeit'] label = str(col.text()+ '') self.Plotter.plot_column(x_axis, col_data, int(self.CurrentFigureEdit.text()), label) # def cut_zeros_filedict(self): # print 'cut_zeros_filedict' # if self.CutZeros.isChecked() == True: # print 'checked' # for fd in self.Files_Dict.keys(): # self.Files_Dict[fd] = self.datreader.cutzeros_file_dict(self.Files_Dict[fd]) def plotfile(self): for f in self.FileScroll.selectedItems(): key = str(f.text()).split('::') print key title = str(f.text()) self.Plotter.plot_file(self.Sets_Dict[key[0]][key[1]], [ str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')], int(self.CurrentFigureEdit.text()), title) def read_set(self): print 'read_set' filelist = list() filelist = [f for f in os.listdir(self.directory) if f.startswith(self.lineEdit.text())] print filelist filelist = [os.path.join(self.directory, f) for f in filelist] cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')] print cols_of_interest self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_files(filelist, cols_of_interest) #self.cut_zeros_filedict() self.update_SetScroll() self.update_Files_Dict() self.update_FileScroll() self.update_Columns_Dict() self.update_ColumnScroll() print self.Sets_Dict.keys() def update_ColumnScroll(self): print 'update_ColumnScroll' self.ColumnScroll.clear() for col in self.Columns_Dict.keys(): item = QtGui.QListWidgetItem() item.setText(col) self.ColumnScroll.addItem(item) def update_Columns_Dict(self): print 'update_FilesDict' cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')] for s in self.Sets_Dict.keys(): # sets for f in self.Sets_Dict[s].keys(): #files for c in self.Sets_Dict[s][f].keys(): if c in cols_of_interest: self.Columns_Dict[s + '::' + f + '::' + c] = s + '::' + f + '::' + c def update_Files_Dict(self): print 'update_FilesDict' for s in self.Sets_Dict.keys(): # sets print s for f in self.Sets_Dict[s].keys(): #files print f self.Files_Dict[f] = str(s) + '::'+ str(f) #self.cut_zeros_filedict() def update_SetScroll(self): print 'update_SetScroll' self.SetScroll.clear() for key in self.Sets_Dict.keys(): item = QtGui.QListWidgetItem() item.setText(str(key)) self.SetScroll.addItem(item) def update_FileScroll(self): print 'update_FileScroll' self.FileScroll.clear() for key in self.Files_Dict.keys(): item = QtGui.QListWidgetItem() item.setText(str(self.Files_Dict[key])) self.FileScroll.addItem(item) def show_file_start(self): try: f = open(self.lineEdit.text()) s= '' for i in range(12): s = s+f.readline() self.textBrowser.setText(s) except: print 'Error in file read'
def __init__(self, filename): self.read = DataReader(filename) self.mapInfo = self.ReadMap()
def get_tweets_labels(tweet_file, labels_file): #Simply read in data data_reader = DataReader(tweet_file, labels_file) tweets = data_reader.read_tweets() labels = data_reader.read_labels() return tweets, labels
def main(): print("Running on BIO-NLP data\n\n") from sys import platform if platform == "win32": home_dir = "C:\\dl4nlp" else: home_dir = os.path.join(os.path.expanduser('~'), "dl4nlp") print("home_dir = {}".format(home_dir)) # The hyper-parameters of the word embedding trained model window_size = 5 embed_vector_size = 50 min_count = 400 data_folder = os.path.join("sample_data", "drugs_and_diseases") test_file_path = os.path.join(data_folder, "Drug_and_Disease_test.txt") resources_pickle_file = os.path.join(home_dir, "models", "resources.pkl") # The hyper-parameters of the LSTM trained model #network_type= 'unidirectional' network_type = 'bidirectional' #embed_vector_size = 50 num_classes = 7 + 1 max_seq_length = 613 num_layers = 2 num_hidden_units = 150 num_epochs = 10 batch_size = 50 dropout = 0.2 reg_alpha = 0.0 print("Initializing data...") model_file_path = os.path.join(home_dir,'models','lstm_{}_model_units_{}_lyrs_{}_epchs_{}_vs_{}_ws_{}_mc_{}.h5'.\ format(network_type, num_hidden_units, num_layers, num_epochs, embed_vector_size, window_size, min_count)) K.clear_session() with K.get_session() as sess: K.set_session(sess) graphr = K.get_session().graph with graphr.as_default(): # Evaluate the model print("Evaluating the model...") reader = DataReader( input_resources_pickle_file=resources_pickle_file) entityExtractor = EntityExtractor(reader) #load the model print("Loading the model from file {} ...".format(model_file_path)) entityExtractor.load(model_file_path) entityExtractor.print_summary() if not os.path.exists(os.path.join(home_dir, "output")): os.makedirs(os.path.join(home_dir, "output")) # make sure that the input test data file is in IOB format output_prediction_file = os.path.join(home_dir, "output", "prediction_output.tsv") evaluation_report, confusion_matrix = entityExtractor.evaluate_model( test_file_path, output_prediction_file) print(evaluation_report) print(confusion_matrix) ######################################################### # from the commmand line interface, # (1) change directory to \code\02_modeling\03_model_evaluation # (2) run the following perl evaluation script # "C:\Program Files\Git\usr\bin\perl.exe" Drug_and_Disease_eval.pl ..\..\..\sample_data\drugs_and_diseases\Drug_and_Disease_test.txt C:\dl4nlp\output\prediction_output.tsv ######################################################### K.clear_session() K.set_session(None) print("Done.")
def populate_features(tweet_file, labels_file): data_reader = DataReader(tweet_file, labels_file) return data_reader.get_features()
from DataReader import DataReader from Preprocessor import Preprocessor from Vectorizer import Vectorizer from Classifier import Classifier from DeepLearning import DeepLearner from sklearn.model_selection import train_test_split as split import numpy as np dr = DataReader('./datasets/training-v1/offenseval-training-v1.tsv', 'A') data, labels = dr.get_labelled_data() data, labels = dr.shuffle(data, labels, 'random') data = data[:] labels = labels[:] prp = Preprocessor('remove_stopwords', 'lemmatize') data = prp.clean(data) tr_data, tst_data, tr_labels, tst_labels = split(np.array(data), labels, test_size=0.2, stratify=labels) tr_data, tr_labels = dr.upsample(tr_data, tr_labels, label=1) tr_data, tr_labels = dr.shuffle(tr_data, tr_labels, 'random') vct = Vectorizer('count') vct.vectorize(tr_data) model = DeepLearner(tr_data, tr_labels, vocab_length=vct.vocab_length,
class DataDriver: def __init__(self, oscars): self.Data = DataReader("tmdb-movies.csv") self.Data.formatData() self.OscarFile = pd.read_csv(oscars) self.ActorsDictionary = {} self.MovieDF = self.Data.getMovieDF() self.Categories = [ "ACTOR", "ACTRESS", "ACTOR IN A SUPPORTING ROLE", "ACTRESS IN A SUPPORTING ROLE", "ACTOR IN A LEADING ROLE", "ACTRESS IN A LEADING ROLE" ] self.OutputData = self.Data.getOutput() self.cleanOscarData() def scoreGenres(self): genreList = [ 'Action', 'Adventure', 'Science Fiction', 'Thriller', 'Fantasy', 'Crime', 'Western', 'Drama', 'Family', 'Animation', 'Comedy', 'Mystery', 'Romance', 'War', 'History', 'Music', 'Horror', 'Documentary', 'Foreign', 'TV Movie' ] GenreScore = { k: v for (k, v) in zip(genreList, list(reversed(range(len(genreList) + 1)))) } for ind, row in self.MovieDF.iterrows(): score = 1 for genre in row["genres"]: score *= GenreScore[genre] self.Data.setNewAttribute(ind, "genres", score) def setActorsDict(self): for ind, row in self.MovieDF.iterrows(): for actor in row["cast"]: self.ActorsDictionary[actor] = 0 self.scoreOscars() def cleanOscarData(self): self.OscarFile.drop(["year"], axis=1, inplace=True) for ind, row in self.OscarFile.iterrows(): if row["category"] not in self.Categories: self.OscarFile.drop([ind], inplace=True) self.setActorsDict() def scoreOscars(self): for ind, row in self.OscarFile.iterrows(): if row["winner"]: if row["entity"] in self.ActorsDictionary.keys(): val = self.ActorsDictionary[row["entity"]] self.ActorsDictionary[row["entity"]] = val + 1 elif row["entity"] in self.ActorsDictionary.keys(): val = self.ActorsDictionary[row["entity"]] self.ActorsDictionary[row["entity"]] = val self.AddScores() def SearchDict(self, dic, name): for key, val in dic.items(): if key == name: return val return 0 def IterateScore(self, dic, arr): Score = 0 for person in arr: Score += self.SearchDict(dic, person) return Score def AddScores(self): for ind, row in self.MovieDF.iterrows(): self.MovieDF = self.Data.setNewAttribute( ind, "cast", self.IterateScore(self.ActorsDictionary, row["cast"])) def setRevOutput(self): df1 = pd.get_dummies(self.MovieDF["revenue"]) self.MovieDF = pd.concat([self.MovieDF, df1], axis=1) self.MovieDF.drop(["revenue"], axis=1, inplace=True) def SaveData(self): self.MovieDF.to_csv("DataRevExploration.csv", index=False)
import joblib import numpy from sklearn.preprocessing import MinMaxScaler from DataReader import DataReader if __name__ == '__main__': # test_cases, test_result = DataReader.read_testdata() test_cases, test_result = DataReader.read_testdata() nn = joblib.load('./BPnn.pkl') # 2021.01.07: 归一化与反归一化工作整合进BPmodel中 # 根据训练的模型预测测试集的结果 result_predict = nn.userPredict(test_cases) i = 0 miss = 0 for res in result_predict: print('No. ', i+1, res, test_result[i][0], '%.2f%%' % (abs(res[0] - test_result[i][0]) / test_result[i][0] * 100)) if round(res[0]) != test_result[i][0]: miss += 1 i = i + 1 print('correct rate: ', (i-miss)/i*100, '%')