def generate_model(author, steps): """Given an author name, processes the data/<author>.txt input for steps number of iterations into the model input to be used by the lambda_handler function. """ predictor = Predictor(128) # Filenames. author_models_dir = get_dir_for_author(author) if not os.path.exists(author_models_dir): os.mkdir(author_models_dir) model_file = author_models_dir + author + ".model" vocab_file = author_models_dir + author + ".vocab" commons_file = author_models_dir + author + ".commons" raw_text_file = "../data/" + author + ".txt" # Read in the 'frequently used words' as common vocab. frequent = read_common_vocab("../data/20k_most_common.txt") # Clean the content. with open(raw_text_file, 'r') as raw: raw_words = raw.read().split(' ') data, _ = clean_input_data(raw_words, frequent) # Write out the words that occur in the clean data to the commons file. record_common_vocab(data, commons_file) # Train the model. This step takes the longest. predictor.train(data, steps) # Save the model that we have trained to disk. predictor.save(model_file, vocab_file) return predictor
def build_predictor(data, settings): sys.stdout.write('Building model\n') data_input = Input(shape=(settings['max_len'], )) bucket_size = Input(shape=(1, ), dtype="int8") embedding = Embedding(input_dim=settings['max_features'] + 3, output_dim=settings['word_embedding_size'], mask_zero=True, name="emb")(data_input) encoder = Predictor(input_dim=settings['word_embedding_size'], hidden_dim=settings['sentence_embedding_size'], RL_dim=settings['RL_dim'], max_len=settings['max_len'], batch_size=settings['batch_size'], random_action_prob=settings['random_action_prob'], name='encoder')([embedding, bucket_size]) layer = encoder[0] for idx, hidden_dim in enumerate(settings['hidden_dims']): layer = Dense(hidden_dim, name="dense_{}".format(idx))(layer) layer = Activation('tanh')(layer) layer = Dropout(settings['dense_dropout'])(layer) output = Dense(settings['num_of_classes'], activation='softmax', name='output')(layer) model = Model(inputs=[data_input, bucket_size], outputs=[ output, encoder[1], encoder[2], encoder[3], encoder[4], encoder[5] ]) return model
def predict(modelPath, labelPath, wavPath): lf = open(labelPath, 'r') labelLn = lf.readlines() # remove \n label = [] for item in labelLn: label.append(item.strip()) pred = Predictor() pred.load(modelPath) #70Hzと74Hzの区別がつく分解能が要る。 FR = 44100 T = 1.0 / FR N = 16384 rate, dataAll = wavfile.read(wavPath, True) if rate != FR: print('Error: sample rate is not 44.1kHz!') return # get channel 0 (left channel in stereo) if dataAll.ndim != 1: dataAll = dataAll[:, 0] print('Time Chord Confidence') nPred = (int)(len(dataAll) / FR) for t in range(0, nPred): data = dataAll[t * FR:t * FR + N] # 16bit int format -> float64 x = data.astype(np.float) # normalize input time domain data to [-1 +1] xs = normalizeArray(x) w = scipy.signal.blackman(N) xf = scipy.fftpack.fft(xs * w) xfa = np.abs(xf) #70Hzと4kHzの区間を見る。 startIdx = (int)(70 * N / FR) endIdx = (int)(4000 * N / FR) xfs = xfa[startIdx:endIdx] # normalize frequency domain data to [0, +1] xIn = normalizePositiveArray(xfs) xInR = np.reshape(xIn, (1, -1)) (ypred, conf) = pred.predict(xInR) ypredI = int(ypred) #print('', ypredI) if conf.max() >= 0: print('%3d:%02d %10s %6.2f' % ((int)(t / 60), t % 60, label[ypredI], conf.max()))
def runAlg(self, dataType): ''' 运行算法的函数,并且画图 :param dataType: gsm / lte :return: ''' if dataType != "gsm" and dataType != "lte": raise Exception("未知数据类型") #分类器和回归器的结果 regRes = [] claRes = [] predorCla = Predictor("cla", dataType) predorReg = Predictor("reg", dataType) for time in range(10): print str(time) + " -- begin" #跑算法 claRes.append(predorCla.fit()) regRes.append(predorReg.fit()) print str(time) + " -- change data" # 重新生成一个数据的划分 predorReg.changeData() predorCla.changeData() #排序 regRes.sort() claRes.sort() folder = "gsmResult/" if "gsm" == dataType else "lteResult/" #画图 self.draw(regRes, folder + "regImg") self.draw(claRes, folder + "claImg") #输出结果和中位结果 resFile = open(folder + "res", 'w') resFile.write("reg result:\n") resFile.write(str(regRes)) resFile.write("\nreg mid:\n") resFile.write(str((regRes[4] + regRes[5]) / 2)) resFile.write("\ncla result:\n") resFile.write(str(claRes)) resFile.write("\ncla mid:\n") resFile.write(str((claRes[4] + claRes[5]) / 2))
def __init__(self): self.FORMAT = pyaudio.paFloat32 self.CHANNELS = 1 self.RATE = 44100 self.CHUNK = 1024 * 2 self.p = None self.stream = None self.chunksRead = None self.predictor = Predictor()
def __init__(self): if exists('database.dat'): # deserialize database is much faster. print('deserialize the QA database...') self.search_engine = SearchEngine('cppjieba/dict', 'database.dat') else: # load database from txt is slower. print('load from QA database from txt format...') self.search_engine = SearchEngine('cppjieba/dict') self.search_engine.loadFromTxt('question_answer.txt') self.search_engine.save('database.dat') self.predictor = Predictor()
def get_data(self): # Gets the result from the SVM model p = Predictor(self.filePath) # Gets the result using the SVM model p.assess_func() #store result self.result = p.result_data self.data = p.raw_data #store raw data self.channelName = p.header #store electrodes information for the GUI self.eegLength = len( p.raw_data) / 256 #gets the length of the eeg in seconds self.bad = p.bad # Bad channels in the EEG self.powers = p.powers # power of different waves in the eeg
def DoWork(source_file, m, k, fltr, trade_cost, testing_start_date, testing_end_date): # Set some defaults p = Predictor(source_file, m, k, fltr, testing_start_date, testing_end_date) p.read_file_to_daily_data_by_weeks(0) p.calc_historical_weekly_return(p.daily_data_by_weeks) # Initialize variables strategy_trade_count = 0 in_market_count = 0 current_state = 0 strategy_capital = 100 bh_capital = 100 strategy_weekly_return = [] bh_weekly_return = [] for idx in range(p.start_split, p.end_split): # Generate weekly signals p.weekly_return_data = p.historical_weekly_return_data[:idx - 1] alist = p.find_k_closest_histories() r = p.calc_next_week_return(alist) cur_signal = p.signal(current_state, r) # Signal handling cur_index_price = p.daily_data_by_weeks[idx][-1][2] prev_index_price = p.daily_data_by_weeks[idx - 1][-1][2] actual_return = math.log(cur_index_price) - math.log(prev_index_price) if current_state != cur_signal: current_state = cur_signal strategy_trade_count += 1 strategy_capital = strategy_capital * (1 - trade_cost) # K Nearest Neighbor strategy_capital = strategy_capital * (1 + actual_return * current_state) strategy_weekly_return.append(actual_return) # Buy-and-Hold if current_state == 1: #in_market_count += 1 bh_capital = bh_capital * (1 + actual_return) bh_weekly_return.append(actual_return) print('{0}, {1}, {2}'.format(p.daily_data_by_weeks[idx - 1][-1][0], strategy_capital, bh_capital))
def load(positive_class, negative_class): """ Helper function that loads in the model and creates a DirectoryManipulator. :return: The model and a DirectoryManipulator. """ print("#" * 15) print("loading model...") print("#" * 15) p = Predictor( r"models\inceptionResNetV2_optimized_h5\inceptionResNetV2_optimized.h5", (positive_class, negative_class)) print("#" * 15) print("model loaded.") print("#" * 15) d = DirectoryManipulator() return p, d
class MLP_Runner: trainer = Trainer() predictor = Predictor() def data_load(self): dataset, datatarget, T_len = self.db.MLP_fetch_data() return dataset, datatarget, T_len def train(self, dataset, datatarget, T_len): print("check......................................! \n") data_set, data_target, test_set, test_target = self.trainer.one_hot_encode_normalize(dataset, datatarget, T_len) print("done.............") self.trainer.Train(data_set, data_target) print(test_set[:1000]) self.predictor.predict(test_set, test_target)
def lambda_handler(event, context): """Use a model for an existing author to generate length words, interleaved with user text input.""" author = event["author"] user_text = event["userText"] length = event["length"] # Load in the predictor model_file = get_dir_for_author(author) + author + ".model" vocab_file = get_dir_for_author(author) + author + ".vocab" predictor = Predictor(128, model=model_file, vocab=vocab_file) # Clean the user data and separate out unknown words. common_vocab = read_common_cocab(get_dir_for_author(author) + author + ".commons") data, unique_user_words = clean_input_data(user_text, common_vocab) generated_sample = predictor.sample(length) return clean_generated_data(' '.join(generated_sample), unique_user_words)
def update_bert(session): assert type(session) is str # download qa database from database. try: logger.info('get the latest knowledge from wd_qa_knowledge...') db = MySQLdb.connect(host=db_host, user=db_usr, passwd=db_psw, db=db_name, charset='utf8') sql = "select question,answer from wd_qa_knowledge" cur = db.cursor() cur.execute(sql.encode('utf-8')) qa = str() for row in cur.fetchall(): qa += row[0] + "\t" + row[1] + "\n" with open("question_answer.txt", "wb") as f: f.write(qa.encode('utf-8')) db.commit() db.close() except Exception as e: logger.error(e) response = jsonify({'status': 'failure'}) socketio.emit('msg', namespace='/socket', room=session, data=response) return # generate dataset. try: logger.info('generating training set...') from subprocess import call call( ["./create_dataset", "-i", "question_answer.txt", "-o", "dataset"]) except Exception as e: logger.error(e) response = jsonify({'status': 'failure'}) socketio.emit('msg', namespace='/socket', room=session, data=response) return # finetune model logger.info('training...') from Predictor import Predictor predictor = Predictor() predictor.finetune('dataset') response = jsonify({"status": "success"}) socketio.emit('msg', namespace='/socket', room=session, data=response)
def searchProperties(self, BHK, FloorArea, Bathrooms, Locality, FurnishingStatus, TypeofSale): configParser = configparser.ConfigParser(allow_no_value=True) keys = [] places = [] configFilePath = 'config.ini' configParser.read(configFilePath) if (len(configParser.read(configFilePath)) > 0): details = configParser.sections()[0] for key in configParser[details]: keys.append(key) places += configParser[details][keys[0]].split(',') places_key = [] for place in places: places_key += ['Locality_' + place] inp = {} inp = { 'BHKs': [BHK], 'FloorArea': [FloorArea], 'Bathrooms': [Bathrooms] } for place in places_key: inp[place] = [0] inp.update({ 'Type_of_Sale_New': [0], 'Type_of_Sale_Resale': [0], 'Furnishing_Status_Furnished': [0], 'Furnishing_Status_Semi-Furnished': [0], 'Furnishing_Status_Unfurnished': [0] }) inp['Locality_' + Locality] = [1] inp['Furnishing_Status_' + FurnishingStatus] = [1] inp['Type_of_Sale_' + TypeofSale] = [1] #print(inp) predictor = Predictor() #print(type(predictor.regressor)) #print(predictor.price) #return 0 price = predictor.getPredictedPrice(inp) if price: return price else: predictor.managePredictor() return predictor.getPredictedPrice(inp)
def __init__(self, location, inputFile, outputDir=None, cns=False, reject=None, angleOnly=False, ppm=False, progressBar=None, writePgm=True): self.input = inputFile self.progressBar = progressBar print 'DANGLE (version 1.1)' print DANGLE_CITE # 1. read config file for location of reference information self.reference = Reference(os.path.dirname(location)) self.reference.outDir = outputDir or OUTDIR if not os.path.isdir(self.reference.outDir): os.makedirs(self.reference.outDir) self.reference.cns = cns self.reference.ppm = ppm self.reference.angleOnly = angleOnly if (reject is not None): self.reference.rejectThresh = reject # 2. read shifts of query protein (input) and calculate secondary shifts self.query = Protein(self.reference) self.query.readShiftsFromXml(inputFile) # 3. compare with DB print 'STEP1: Shift search' self.topMatches = self.compareWithShiftDB() # 4. make preditions from scorograms print 'STEP2: GLE generation' self.predictor = Predictor(self.query, self.topMatches, self.reference, writePgm) self.predictions = self.predictor.predictPhiPsiFromDatabaseMatches( progressBar=self.progressBar)
def load_model(): global predictor current_model_dir = model_dir + 'current_model/' try: os.stat(current_model_dir) except: os.mkdir(current_model_dir) model_path = get_model_file(current_model_dir) if None == model_path: zip_ref = zipfile.ZipFile(model_dir+'current_model.zip', 'r') zip_ref.extractall(current_model_dir) zip_ref.close() model_path = get_model_file(current_model_dir) predictor = Predictor(current_model_dir + model_path)
def simulate_generations(self, num_generations, print_best): file_path = "csv/ESN_Results.csv" dw = DataWriter() dr = DataReader() dw.init_table(file_path) p = Predictor() mapping = dr.get_mapping() images = dr.get_images(112800, 28, 28) # 112800 images in data set scale_factor = 10 for i in range(num_generations): sum = 0 best_score = -100 best_accuracy = -100 best_net = [] engines = [] for net in self.networks: engine = [net, 0, 0] engines.append(engine) p.make_predictions(engines, mapping, images, scale_factor) for j in range(len(engines)): self.networks[j].fitness = engines[j][1] if engines[j][2] > best_score: best_score = engines[j][2] best_net = self.networks[j] if engines[j][1] > best_accuracy: best_accuracy = engines[j][1] avg_accuracy = self.avg_fitness(self.networks) # avg accuracy for j in range(len(engines)): self.networks[j].fitness = engines[j][2] # change fitness to score avg_score = self.avg_fitness(self.networks) # avg accuracy avg_size = self.avg_network_size() if print_best: best_net.show_net() print("-----------------------------------\t\t\t\t\t\t\n Generation " + str(i+1) + " results\n-----------------------------------\n", end='\n') print("Highest accuracy: " + str(best_accuracy*100) + "%\nHighest score: " + str(best_score**(1.0/scale_factor)) + "\nAverage accuracy: " + str(avg_accuracy*100) + "%\nAverage score: " + str(avg_score**(1.0/scale_factor)) + "\nNum species: " + str(len(self.species)) + "\nInnovs tried: " + str(self.networks[0].master_innov[0]) + "\nAverage connections per network: " + str(avg_size) + "\n") non_jit = self.construct_non_jit(best_net) pickle.dump(non_jit, open("neural_net.txt", "wb")) dw.write_row(file_path, [i+1, best_accuracy*100, avg_accuracy*100, best_score**(1.0/scale_factor), avg_score**(1.0/scale_factor), avg_size]) if i != num_generations-1: self.prepare_next_gen(math.ceil(self.pop_size/10)) print("\nStarting Generation " + str(i+2) + ": Species = " + str(len(self.species)) + ", Innovs = " + str(self.networks[0].master_innov[0]), end='\n') print("Finished simulation!")
def train(datasetPath, modelSavePath): print('reading file...') frame = read_data(datasetPath) # Process data into feature and label arrays print("Processing {} samples with {} attributes".format( len(frame.index), len(frame.columns))) X_train, X_test, y_train, y_test = get_features_and_labels(frame) #print('', y_test) #print('', X_test[0]) print('training...') pred = Predictor() pred.learn(X_train, y_train) y_pred = pred.predict1(X_test) score = f1_score(y_test, y_pred, average='weighted') print('score = ', score) pred.save(modelSavePath) return score
def main(): print "-- Welcome to movie-recommend! --" # for output readability np.set_printoptions(formatter={'float_kind': '{:25f}'.format}) # baseline predictor by default mode = BASELINE # read command-line argument, if provided if len(sys.argv) > 1: if sys.argv[1] == IMPROVED or sys.argv[1] == BASELINE: mode = sys.argv[1] print "\tYou chose", mode, "predictor!" else: print "\t", sys.argv[ 1], "is not a valid argument. Default:", mode, "predictor!" else: print "\tYou did not provide any arguments. Default:", mode, "predictor!" # read and parse text files parser = Parser(mode) print "\tParser initialized:" print "\t\t", len(parser.test_set), "test points and", np.count_nonzero( parser.training_matrix), "training points" # initialize predictor and calculate rmse predictor = Predictor(mode, parser.training_matrix, parser.test_set) print "\trmse on test data (baseline):", predictor.rmse_test if predictor.mode == BASELINE: print "\trmse on training data (baseline):", predictor.rmse_training else: print "\trmse on test data (improved):", predictor.rmse_test_improved # execute histogram plotting and get error distribution error_dist = predictor.calculate_absolute_errors( parser.test_set, predictor.improved_matrix ) if predictor.mode == IMPROVED else predictor.calculate_absolute_errors( parser.test_set, predictor.baseline_matrix) print "\tHistogram saved to file. Error distribution:", error_dist
def main(): toy = True market, news = read_data() train_idx, val_idx, test_idx = split_data(market, toy) # Create preprocessors market_prepro = MarketPrepro() market_prepro.fit(train_idx, market) news_prepro = NewsPrepro() news_prepro.fit(train_idx, news) prepro = JoinedPreprocessor(market_prepro, news_prepro) # Train data generator instance join_generator = JoinedGenerator(prepro, train_idx, market, news) val_generator = JoinedGenerator(prepro, val_idx, market, news) print('Generators created') # Create and train model model = ModelFactory.lstm_128( len(market_prepro.feature_cols) + len(news_prepro.feature_cols)) model.load_weights("best_weights.h5") print(model.summary()) ModelFactory.train(model, toy, join_generator, val_generator) # Predict predictor = Predictor(prepro, market_prepro, news_prepro, model, ModelFactory.look_back, ModelFactory.look_back_step) y_pred, y_test = predictor.predict_idx(test_idx, market, news) y_pred = predictor.predict(market, news) plt.plot(y_pred) plt.plot(y_test) plt.legend(["pred", "test"]) plt.show() # get_merged_Xy(train_idx.sample(5), market, pd.DataFrame([],columns=news.columns)).head() print('The end')
def main(argv): yolov3 = tf.keras.models.load_model(FLAGS.model, compile = False); predictor = Predictor(yolov3 = yolov3); anno = COCO(join(FLAGS.annotation_dir, 'instances_val2017.json')); count = 0; for imgid in anno.getImgIds(): print("processing (%d/%d)" % (count, len(anno.getImgIds()))); detections = list(); # predict img_info = anno.loadImgs([imgid])[0]; img = cv2.imread(join(FLAGS.coco_eval_dir, img_info['file_name'])); boundings = predictor.predict(img).numpy(); # collect results for bounding in boundings: detections.append([imgid, bounding[0], bounding[1], bounding[2] - bounding[0], bounding[3] - bounding[1], bounding[4], label_map.index(int(bounding[5]) + 1)]); count += 1; cocoDt = anno.loadRes(np.array(detections)); cocoEval = COCOeval(anno, cocoDt, iouType = 'bbox'); cocoEval.params.imgIds = anno.getImgIds(); cocoEval.evaluate(); cocoEval.accumulate(); cocoEval.summarize();
def main(): training_set = TrainingSetHandler() training_set.load_training_set() gram_list = [] for size in Constants.SIZE_OF_GRAMS: for language in training_set.language_list: gram = NGram(size, string.ascii_lowercase, 0.5) gram.train(training_set.training_set[language], language) gram_list.append(gram) #dump copies of grams to file dump_grams(gram_list) predic = Predictor(gram_list) test_set_handler = TestSetHandler() test_set_handler.load_test_sentence() for idx, sentence in enumerate(test_set_handler.test_set): clean_sentence = "".join([c for c in sentence[1] if c.isalpha()]).lower() prediction = predic.predict_this_sentence(clean_sentence) with open(os.path.join(Constants.OUTPUT_PATH, "out{}.txt".format(idx)), 'w') as f: output = OutputHelper(prediction, sentence, f) output.print_and_save_output()