def __findBestModel(self, model: str) -> str: pred = Predictor() pdHandler = PrometheusDataHandler(model) data = pdHandler.get_data() results = {} count = 0.0 logging.debug("Running findBestModel in SimpleLoadBalance") #data = { ip, vals } for ip in data: # data_values = [ (name,value) ] values = data[ip] data_values = pred.accuracy(values) for r in data_values: # print(r) try: results[r[1]] += r[0] #some times it is better to ask for forgiveness than permission :) except KeyError as e: results[r[1]] = r[0] count += 1.0 ## #Here we take the average between all of the nodes ## for k in results.keys(): val = results[k] results[k] = val / count order_acc = sorted(results.items(), key=operator.itemgetter(1)) return order_acc[0][0]
def on_batch_end(self, batch, logs=None): self.train_loss.update_state(logs['loss']) if batch % self.eval_freq == 0: image, bbox, labels = next(self.iter) image = image.numpy().astype('uint8') predictor = Predictor(yolov5l=yolov5l) boundings = predictor.predict(image) color_map = dict() for bounding in boundings: if bounding[5].numpy().astype('int32') not in color_map: color_map[bounding[5].numpy().astype('int32')] = tuple( np.random.randint(low=0, high=256, size=(3, )).tolist()) clr = color_map[bounding[5].numpy().astype('int32')] cv2.rectangle(image, tuple(bounding[0:2].numpy().astype('int32')), tuple(bounding[2:4].numpy().astype('int32')), clr, 1) cv2.putText( image, predictor.getClsName( bounding[5].numpy().astype('int32')), tuple(bounding[0:2].numpy().astype('int32')), cv2.FONT_HERSHEY_PLAIN, 1, clr, 2) image = tf.expand_dims(image, axis=0) with self.log.as_default(): tf.summary.scalar('train loss', self.train_loss.result(), step=optimizer.iterations) tf.summary.image('detect', image[..., ::-1], step=optimizer.iterations) self.train_loss.reset_states()
def predict(modelPath, labelPath, wavPath): lf = open(labelPath, 'r') labelLn = lf.readlines() # remove \n label = [] for item in labelLn: label.append(item.strip()) pred = Predictor() pred.load(modelPath) #70Hzと74Hzの区別がつく分解能が要る。 FR = 44100 T = 1.0 / FR N = 16384 rate, dataAll = wavfile.read(wavPath, True) if rate != FR: print('Error: sample rate is not 44.1kHz!') return # get channel 0 (left channel in stereo) if dataAll.ndim != 1: dataAll = dataAll[:, 0] print('Time Chord Confidence') nPred = (int)(len(dataAll) / FR) for t in range(0, nPred): data = dataAll[t * FR:t * FR + N] # 16bit int format -> float64 x = data.astype(np.float) # normalize input time domain data to [-1 +1] xs = normalizeArray(x) w = scipy.signal.blackman(N) xf = scipy.fftpack.fft(xs * w) xfa = np.abs(xf) #70Hzと4kHzの区間を見る。 startIdx = (int)(70 * N / FR) endIdx = (int)(4000 * N / FR) xfs = xfa[startIdx:endIdx] # normalize frequency domain data to [0, +1] xIn = normalizePositiveArray(xfs) xInR = np.reshape(xIn, (1, -1)) (ypred, conf) = pred.predict(xInR) ypredI = int(ypred) #print('', ypredI) if conf.max() >= 0: print('%3d:%02d %10s %6.2f' % ((int)(t / 60), t % 60, label[ypredI], conf.max()))
def main(): parse_args() predictor = Predictor(get_language_files()) best_guess = predictor.predict(get_input_string()) print("Language is: {}".format(best_guess.language_name))
def GetExpectedReturns(self,Xtot,Ytot,k,stock): #Retrieve the dates for each adjustment close price or close price dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Adj Close' : 1}) if 'Adj Close' not in list(dataMongo.keys()): dataMongo = self.db.Prices.find_one({'BBGTicker' : stock},{'Close' : 1}) data=dataMongo['Close'] else: data=dataMongo['Adj Close'] dates = sorted(data.keys()) #Handle incorrect start and end date entered by user dh = DataHandler self.startDate = dh.HandleIncorrectDate(self.startDate,'',dates) self.endDate = dh.HandleIncorrectDate('',self.endDate,dates) dates = dates[dates.index(self.startDate):dates.index(self.endDate)] #Prediction must consider the window for covariance matrix startDate = dates[k*self.rebalanceFreq + self.window] pr = Predictor(stock,startDate,self.horizon,self.db) #Lookup output and features and TIsDates = Xtot[stock]["DATES"] Xtrain = Xtot[stock]["ANALYTICS"][:TIsDates.index(startDate)] Ytrain = Ytot[stock]["RETURNS"][:TIsDates.index(startDate)] Xpred = Xtot[stock]["ANALYTICS"][TIsDates.index(startDate)] mu = [stock,pr.PredictKNN(Xtrain,Ytrain,Xpred)/self.horizon] return mu
def generate_model(author, steps): """Given an author name, processes the data/<author>.txt input for steps number of iterations into the model input to be used by the lambda_handler function. """ predictor = Predictor(128) # Filenames. author_models_dir = get_dir_for_author(author) if not os.path.exists(author_models_dir): os.mkdir(author_models_dir) model_file = author_models_dir + author + ".model" vocab_file = author_models_dir + author + ".vocab" commons_file = author_models_dir + author + ".commons" raw_text_file = "../data/" + author + ".txt" # Read in the 'frequently used words' as common vocab. frequent = read_common_vocab("../data/20k_most_common.txt") # Clean the content. with open(raw_text_file, 'r') as raw: raw_words = raw.read().split(' ') data, _ = clean_input_data(raw_words, frequent) # Write out the words that occur in the clean data to the commons file. record_common_vocab(data, commons_file) # Train the model. This step takes the longest. predictor.train(data, steps) # Save the model that we have trained to disk. predictor.save(model_file, vocab_file) return predictor
def derivative(self, x: np.ndarray, y: np.array, est: Predictor): '''returns gradient (vector)''' m = len(y) updates = np.zeros(self.dim, dtype=float) for j in range(len(updates)): updates[j] = est.lr() * sum([(est.predict(x[i]) - y[i]) * x[i][j] for i in range(m)]) / m return updates
def __init__(self): self.FORMAT = pyaudio.paFloat32 self.CHANNELS = 1 self.RATE = 44100 self.CHUNK = 1024 * 2 self.p = None self.stream = None self.chunksRead = None self.predictor = Predictor()
def process(self, request): model = request['model'] pdHandler = PrometheusDataHandler(model) data = pdHandler.get_data() pred = Predictor() for ip in data: values = data[ip] self.data_[ip] = pred.accuracy(values)
def testAll(self): logProbabilities = numpy.asarray([[0.4, 0.80, 0.50], [0.45, 0.4, 0.41], [0.4, 0.41, 0.45]]) expected = [1,0,2] target = Predictor() self.assertEquals(expected, target.getPredictions(logProbabilities))
def __init__(self) -> None: self.dim = 3 self.est = [\ Predictor(np.random.normal(0, 0.5, self.dim), 100, 0.001),\ Predictor(np.random.normal(0, 0.5, self.dim), 100, 0.001),\ Predictor(np.random.normal(0, 0.5, self.dim), 80, 0.01),\ Predictor(np.random.normal(0, 0.5, self.dim), 50, 0.01) ] self.houses_dict = { 1: 'Gryffindor', 2: 'Ravenclaw', 3: 'Slytherin', 4: 'Hufflepuff' }
def __init__(self): if exists('database.dat'): # deserialize database is much faster. print('deserialize the QA database...') self.search_engine = SearchEngine('cppjieba/dict', 'database.dat') else: # load database from txt is slower. print('load from QA database from txt format...') self.search_engine = SearchEngine('cppjieba/dict') self.search_engine.loadFromTxt('question_answer.txt') self.search_engine.save('database.dat') self.predictor = Predictor()
def get_data(self): # Gets the result from the SVM model p = Predictor(self.filePath) # Gets the result using the SVM model p.assess_func() #store result self.result = p.result_data self.data = p.raw_data #store raw data self.channelName = p.header #store electrodes information for the GUI self.eegLength = len( p.raw_data) / 256 #gets the length of the eeg in seconds self.bad = p.bad # Bad channels in the EEG self.powers = p.powers # power of different waves in the eeg
def process(self, request): model = request['model'] pdHandler = PrometheusDataHandler(model) data = pdHandler.get_data() # Example of using a fileHandler # csv_values = FileHandler('../data/exchange.csv').get_data() # prediction = pred.arima(csv_values[0:-1-i], 1+i) for ip in data: # values is a tuple (time, workload) values = data[ip] pred = Predictor() prediction = pred.get_prediction(values, model) self.data_[ip] = prediction[-1]
def DoWork(source_file, m, k, fltr, trade_cost, testing_start_date, testing_end_date): # Set some defaults p = Predictor(source_file, m, k, fltr, testing_start_date, testing_end_date) p.read_file_to_daily_data_by_weeks(0) p.calc_historical_weekly_return(p.daily_data_by_weeks) # Initialize variables strategy_trade_count = 0 in_market_count = 0 current_state = 0 strategy_capital = 100 bh_capital = 100 strategy_weekly_return = [] bh_weekly_return = [] for idx in range(p.start_split, p.end_split): # Generate weekly signals p.weekly_return_data = p.historical_weekly_return_data[:idx - 1] alist = p.find_k_closest_histories() r = p.calc_next_week_return(alist) cur_signal = p.signal(current_state, r) # Signal handling cur_index_price = p.daily_data_by_weeks[idx][-1][2] prev_index_price = p.daily_data_by_weeks[idx - 1][-1][2] actual_return = math.log(cur_index_price) - math.log(prev_index_price) if current_state != cur_signal: current_state = cur_signal strategy_trade_count += 1 strategy_capital = strategy_capital * (1 - trade_cost) # K Nearest Neighbor strategy_capital = strategy_capital * (1 + actual_return * current_state) strategy_weekly_return.append(actual_return) # Buy-and-Hold if current_state == 1: #in_market_count += 1 bh_capital = bh_capital * (1 + actual_return) bh_weekly_return.append(actual_return) print('{0}, {1}, {2}'.format(p.daily_data_by_weeks[idx - 1][-1][0], strategy_capital, bh_capital))
def build_predictor(data, settings): sys.stdout.write('Building model\n') data_input = Input(shape=(settings['max_len'], )) bucket_size = Input(shape=(1, ), dtype="int8") embedding = Embedding(input_dim=settings['max_features'] + 3, output_dim=settings['word_embedding_size'], mask_zero=True, name="emb")(data_input) encoder = Predictor(input_dim=settings['word_embedding_size'], hidden_dim=settings['sentence_embedding_size'], RL_dim=settings['RL_dim'], max_len=settings['max_len'], batch_size=settings['batch_size'], random_action_prob=settings['random_action_prob'], name='encoder')([embedding, bucket_size]) layer = encoder[0] for idx, hidden_dim in enumerate(settings['hidden_dims']): layer = Dense(hidden_dim, name="dense_{}".format(idx))(layer) layer = Activation('tanh')(layer) layer = Dropout(settings['dense_dropout'])(layer) output = Dense(settings['num_of_classes'], activation='softmax', name='output')(layer) model = Model(inputs=[data_input, bucket_size], outputs=[ output, encoder[1], encoder[2], encoder[3], encoder[4], encoder[5] ]) return model
class QASystem(object): def __init__(self): if exists('database.dat'): # deserialize database is much faster. print('deserialize the QA database...') self.search_engine = SearchEngine('cc/cppjieba/dict', 'database.dat') else: # load database from txt is slower. print('load from QA database from txt format...') self.search_engine = SearchEngine('cc/cppjieba/dict') self.search_engine.loadFromTxt('question_answer.txt') self.search_engine.save('database.dat') self.predictor = Predictor() def query(self, question, count=3): answer_scores = self.search_engine.query(question, count) answer_totalscores = dict() for answer, match in answer_scores.items(): _, relevance = self.predictor.predict(question, answer) answer_totalscores[answer] = ( log(max(match[0], sys.float_info.min)) * relevance, match[1], ) return answer_totalscores def updateDB(self, file): assert type(file) is str self.search_engine.loadFromTxt(file) self.search_engine.save('database.dat')
def main(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host,port)) s.listen(backlog) predictor = Predictor(get_language_files()) while True: client, address = s.accept() data = client.recv(size) print("Recieved {}".format(data)) if data: ngram = predictor.predict(data) print("Sending {}".format(ngram.language_name)) client.send(ngram.language_name) client.close()
class QASystem(object): def __init__(self): if exists('database.dat'): # deserialize database is much faster. print('deserialize the QA database...') self.search_engine = SearchEngine('cppjieba/dict', 'database.dat') else: # load database from txt is slower. print('load from QA database from txt format...') self.search_engine = SearchEngine('cppjieba/dict') self.search_engine.loadFromTxt('question_answer.txt') self.search_engine.save('database.dat') self.predictor = Predictor() def query(self, question, count=3): answer_scores = self.search_engine.query(question, count) answer_totalscores = dict() for answer, match in answer_scores.items(): _, relevance = self.predictor.predict(question, answer) answer_totalscores[answer] = exp(match) + exp(relevance) # sort in descend order of total score sorted(answer_totalscores, key=operator.itemgetter(1), reverse=True) return answer_totalscores def updateDB(self, file): assert type(file) is str self.search_engine.loadFromTxt(file) self.search_engine.save('database.dat')
def runAlg(self, dataType): ''' 运行算法的函数,并且画图 :param dataType: gsm / lte :return: ''' if dataType != "gsm" and dataType != "lte": raise Exception("未知数据类型") #分类器和回归器的结果 regRes = [] claRes = [] predorCla = Predictor("cla", dataType) predorReg = Predictor("reg", dataType) for time in range(10): print str(time) + " -- begin" #跑算法 claRes.append(predorCla.fit()) regRes.append(predorReg.fit()) print str(time) + " -- change data" # 重新生成一个数据的划分 predorReg.changeData() predorCla.changeData() #排序 regRes.sort() claRes.sort() folder = "gsmResult/" if "gsm" == dataType else "lteResult/" #画图 self.draw(regRes, folder + "regImg") self.draw(claRes, folder + "claImg") #输出结果和中位结果 resFile = open(folder + "res", 'w') resFile.write("reg result:\n") resFile.write(str(regRes)) resFile.write("\nreg mid:\n") resFile.write(str((regRes[4] + regRes[5]) / 2)) resFile.write("\ncla result:\n") resFile.write(str(claRes)) resFile.write("\ncla mid:\n") resFile.write(str((claRes[4] + claRes[5]) / 2))
def __init__(self,df,host,measurement,look_back,nb_layers,loss,metric,nb_features,optimizer,nb_epochs,nb_batch,form,freq_period) : Predictor.__init__(self) self.df=df self.host=host self.measurement=measurement self.form=form self.freq_period=freq_period trend_x, trend_y,seasonal_x,seasonal_y,residual_x,residual_y=self.prepare_data(df,look_back,self.freq_period) model_trend=self.make_models(nb_layers,loss,metric,nb_features,optimizer,True) model_seasonal=self.make_models(nb_layers,loss,metric,nb_features,optimizer,False) model_residual=self.make_models(nb_layers,loss,metric,nb_features,optimizer,False) model_trend=self.train_model(model_trend,trend_x,trend_y,nb_epochs,nb_batch,"trend") model_seasonal=self.train_model(model_seasonal,seasonal_x,seasonal_y,nb_epochs,nb_batch,"seasonal") model_residual=self.train_model(model_residual,residual_x,residual_y,nb_epochs,nb_batch,"residual") self.model_trend=model_trend self.model_seasonal=model_seasonal self.model_residual=model_residual
def run(): data_loader = DataLoader(data_dir_root, data_train_file, data_test_file) raw_train_df, raw_test_df = data_loader.load_csv_data() data_loader.print_statistics() pre_processor = PreProcessor( raw_train_df, raw_test_df, cols_to_consider=cols_to_consider, # cols_to_consider=raw_train_df.columns[0:-1], target_feature='SalePrice') pre_processor.pre_process_data() print_features_info(pre_processor.raw_train_df, pre_processor.clean_train_df) plot_target_feature(pre_processor.raw_train_df, pre_processor.target_feature) plot_features_hist(pre_processor.raw_train_df) plot_correlation_numeric_features(pre_processor.clean_train_df) train_X, train_y = prepare_data( pre_processor.clean_train_df, class_col=pre_processor.target_feature, reg_encoding_features=[], one_hot_encoding_features=one_hot_encod_features, ordinal_encoding_features=features_ordinal_mappings, no_enc_features=no_enc_features) test_X, test_y = prepare_data( pre_processor.clean_test_df, class_col=pre_processor.target_feature, reg_encoding_features=[], one_hot_encoding_features=one_hot_encod_features, ordinal_encoding_features=features_ordinal_mappings, no_enc_features=no_enc_features) evaluator = Predictor(train_X, train_y, test_X, test_y, eval_classifiers, eval_classifiers_params_grid) all_predictions, final_prediction = evaluator.build_models( grid_search=False) evaluation_df = evaluator.save_predictions_to_df(all_predictions, final_prediction) submission_df = evaluator.save_predictions_for_submission( evaluation_df, id_col=pre_processor.raw_test_df['Id']) evaluation_df.to_csv("test_evaluation_results.csv", index=False) submission_df.to_csv("test_submission.csv", index=False)
def lambda_handler(event, context): """Use a model for an existing author to generate length words, interleaved with user text input.""" author = event["author"] user_text = event["userText"] length = event["length"] # Load in the predictor model_file = get_dir_for_author(author) + author + ".model" vocab_file = get_dir_for_author(author) + author + ".vocab" predictor = Predictor(128, model=model_file, vocab=vocab_file) # Clean the user data and separate out unknown words. common_vocab = read_common_cocab(get_dir_for_author(author) + author + ".commons") data, unique_user_words = clean_input_data(user_text, common_vocab) generated_sample = predictor.sample(length) return clean_generated_data(' '.join(generated_sample), unique_user_words)
def __init__(self, location, inputFile, outputDir=None, cns=False, reject=None, angleOnly=False, ppm=False, progressBar=None, writePgm=True): self.input = inputFile self.progressBar = progressBar print 'DANGLE (version 1.1)' print DANGLE_CITE # 1. read config file for location of reference information self.reference = Reference(os.path.dirname(location)) self.reference.outDir = outputDir or OUTDIR if not os.path.isdir(self.reference.outDir): os.makedirs(self.reference.outDir) self.reference.cns = cns self.reference.ppm = ppm self.reference.angleOnly = angleOnly if (reject is not None): self.reference.rejectThresh = reject # 2. read shifts of query protein (input) and calculate secondary shifts self.query = Protein(self.reference) self.query.readShiftsFromXml(inputFile) # 3. compare with DB print 'STEP1: Shift search' self.topMatches = self.compareWithShiftDB() # 4. make preditions from scorograms print 'STEP2: GLE generation' self.predictor = Predictor(self.query, self.topMatches, self.reference, writePgm) self.predictions = self.predictor.predictPhiPsiFromDatabaseMatches( progressBar=self.progressBar)
def update_bert(session): assert type(session) is str # download qa database from database. try: logger.info('get the latest knowledge from wd_qa_knowledge...') db = MySQLdb.connect(host=db_host, user=db_usr, passwd=db_psw, db=db_name, charset='utf8') sql = "select question,answer from wd_qa_knowledge" cur = db.cursor() cur.execute(sql.encode('utf-8')) qa = str() for row in cur.fetchall(): qa += row[0] + "\t" + row[1] + "\n" with open("question_answer.txt", "wb") as f: f.write(qa.encode('utf-8')) db.commit() db.close() except Exception as e: logger.error(e) response = jsonify({'status': 'failure'}) socketio.emit('msg', namespace='/socket', room=session, data=response) return # generate dataset. try: logger.info('generating training set...') from subprocess import call call( ["./create_dataset", "-i", "question_answer.txt", "-o", "dataset"]) except Exception as e: logger.error(e) response = jsonify({'status': 'failure'}) socketio.emit('msg', namespace='/socket', room=session, data=response) return # finetune model logger.info('training...') from Predictor import Predictor predictor = Predictor() predictor.finetune('dataset') response = jsonify({"status": "success"}) socketio.emit('msg', namespace='/socket', room=session, data=response)
def simulate_generations(self, num_generations, print_best): file_path = "csv/ESN_Results.csv" dw = DataWriter() dr = DataReader() dw.init_table(file_path) p = Predictor() mapping = dr.get_mapping() images = dr.get_images(112800, 28, 28) # 112800 images in data set scale_factor = 10 for i in range(num_generations): sum = 0 best_score = -100 best_accuracy = -100 best_net = [] engines = [] for net in self.networks: engine = [net, 0, 0] engines.append(engine) p.make_predictions(engines, mapping, images, scale_factor) for j in range(len(engines)): self.networks[j].fitness = engines[j][1] if engines[j][2] > best_score: best_score = engines[j][2] best_net = self.networks[j] if engines[j][1] > best_accuracy: best_accuracy = engines[j][1] avg_accuracy = self.avg_fitness(self.networks) # avg accuracy for j in range(len(engines)): self.networks[j].fitness = engines[j][2] # change fitness to score avg_score = self.avg_fitness(self.networks) # avg accuracy avg_size = self.avg_network_size() if print_best: best_net.show_net() print("-----------------------------------\t\t\t\t\t\t\n Generation " + str(i+1) + " results\n-----------------------------------\n", end='\n') print("Highest accuracy: " + str(best_accuracy*100) + "%\nHighest score: " + str(best_score**(1.0/scale_factor)) + "\nAverage accuracy: " + str(avg_accuracy*100) + "%\nAverage score: " + str(avg_score**(1.0/scale_factor)) + "\nNum species: " + str(len(self.species)) + "\nInnovs tried: " + str(self.networks[0].master_innov[0]) + "\nAverage connections per network: " + str(avg_size) + "\n") non_jit = self.construct_non_jit(best_net) pickle.dump(non_jit, open("neural_net.txt", "wb")) dw.write_row(file_path, [i+1, best_accuracy*100, avg_accuracy*100, best_score**(1.0/scale_factor), avg_score**(1.0/scale_factor), avg_size]) if i != num_generations-1: self.prepare_next_gen(math.ceil(self.pop_size/10)) print("\nStarting Generation " + str(i+2) + ": Species = " + str(len(self.species)) + ", Innovs = " + str(self.networks[0].master_innov[0]), end='\n') print("Finished simulation!")
def __init__(self, df, host, measurement, look_back, nb_layers, loss, metric, nb_features, optimizer, nb_epochs, nb_batch, form, freq_period): Predictor.__init__(self) self.df = df self.host = host self.measurement = measurement self.form = form self.freq_period = freq_period trend_x, trend_y, seasonal_x, seasonal_y, residual_x, residual_y = self.prepare_data( df, look_back, self.freq_period) model_trend = self.make_models(nb_layers, loss, metric, nb_features, optimizer, True) model_seasonal = self.make_models(nb_layers, loss, metric, nb_features, optimizer, False) model_residual = self.make_models(nb_layers, loss, metric, nb_features, optimizer, False) que = queue.Queue() threads_list = list() thread = Thread_train_model(model_trend, que, trend_x, trend_y, nb_epochs, nb_batch, "trend", "Trend Thread") thread.start() threads_list.append(thread) thread_1 = Thread_train_model(model_seasonal, que, seasonal_x, seasonal_y, nb_epochs, nb_batch, "seasonal", "Seasonal Thread") thread_1.start() threads_list.append(thread_1) thread_2 = Thread_train_model(model_residual, que, residual_x, residual_y, nb_epochs, nb_batch, "residual", "Residual Thread") thread_2.start() threads_list.append(thread_2) for t in threads_list: t.join() self.model_trend = que.get(block=False) self.model_save(self.model_trend, "trend") self.model_seasonal = que.get(block=False) self.model_save(self.model_seasonal, "seasonal") self.model_residual = que.get(block=False) self.model_save(self.model_residual, "residual")
def GetPrediction(): with graph.as_default(): request_data = request.get_json() input_data = { "Name": 1, "PatientAge": [request_data["PatientAge"]], "TimesPerDay": [request_data["TimesPerDay"]], "DiagnosticCode": [request_data["DiagnosticCode"]], "CitySize": [request_data["CitySize"]], "PillCost": [request_data["PillCost"]], "NumberOfProducts": [request_data["NumberOfProducts"]], "KnownDoctorsVisits": [request_data["KnownDoctorsVisits"]], "Income": [request_data["Income"]], "DaysSinceLastViolation": [request_data["DaysSinceLastViolation"]], "Adhered": [request_data["Adhered"]] } input_data_df = pd.DataFrame(data=input_data) predictor = Predictor() prediction = predictor.predict(input_data_df) return '{ "Adhered": ' + str(prediction[0][0]) + '}'
def DoWork(source_file, m, k, fltr, trade_cost, testing_start_date, testing_end_date): # Set some defaults p = Predictor(source_file, m, k, fltr, testing_start_date, testing_end_date) p.read_file_to_daily_data_by_weeks(0) p.calc_historical_weekly_return(p.daily_data_by_weeks) # Initialize variables strategy_trade_count = 0 in_market_count = 0 current_state = 0 strategy_capital = 100 bh_capital = 100 strategy_weekly_return = [] bh_weekly_return = [] for idx in range(p.start_split, p.end_split): # Generate weekly signals p.weekly_return_data = p.historical_weekly_return_data[:idx-1] alist = p.find_k_closest_histories() r = p.calc_next_week_return(alist) cur_signal = p.signal(current_state, r) # Signal handling cur_index_price = p.daily_data_by_weeks[idx][-1][2] prev_index_price = p.daily_data_by_weeks[idx-1][-1][2] actual_return = math.log(cur_index_price) - math.log(prev_index_price) if current_state != cur_signal: current_state = cur_signal strategy_trade_count += 1 strategy_capital = strategy_capital * (1 - trade_cost) # K Nearest Neighbor strategy_capital = strategy_capital * (1 + actual_return * current_state) strategy_weekly_return.append(actual_return) # Buy-and-Hold if current_state == 1: #in_market_count += 1 bh_capital = bh_capital * (1 + actual_return) bh_weekly_return.append(actual_return) print('{0}, {1}, {2}'.format(p.daily_data_by_weeks[idx-1][-1][0], strategy_capital, bh_capital))
def main(): print "-- Welcome to movie-recommend! --" # for output readability np.set_printoptions(formatter={'float_kind': '{:25f}'.format}) # baseline predictor by default mode = BASELINE # read command-line argument, if provided if len(sys.argv) > 1: if sys.argv[1] == IMPROVED or sys.argv[1] == BASELINE: mode = sys.argv[1] print "\tYou chose", mode, "predictor!" else: print "\t", sys.argv[ 1], "is not a valid argument. Default:", mode, "predictor!" else: print "\tYou did not provide any arguments. Default:", mode, "predictor!" # read and parse text files parser = Parser(mode) print "\tParser initialized:" print "\t\t", len(parser.test_set), "test points and", np.count_nonzero( parser.training_matrix), "training points" # initialize predictor and calculate rmse predictor = Predictor(mode, parser.training_matrix, parser.test_set) print "\trmse on test data (baseline):", predictor.rmse_test if predictor.mode == BASELINE: print "\trmse on training data (baseline):", predictor.rmse_training else: print "\trmse on test data (improved):", predictor.rmse_test_improved # execute histogram plotting and get error distribution error_dist = predictor.calculate_absolute_errors( parser.test_set, predictor.improved_matrix ) if predictor.mode == IMPROVED else predictor.calculate_absolute_errors( parser.test_set, predictor.baseline_matrix) print "\tHistogram saved to file. Error distribution:", error_dist
def main(): toy = True market, news = read_data() train_idx, val_idx, test_idx = split_data(market, toy) # Create preprocessors market_prepro = MarketPrepro() market_prepro.fit(train_idx, market) news_prepro = NewsPrepro() news_prepro.fit(train_idx, news) prepro = JoinedPreprocessor(market_prepro, news_prepro) # Train data generator instance join_generator = JoinedGenerator(prepro, train_idx, market, news) val_generator = JoinedGenerator(prepro, val_idx, market, news) print('Generators created') # Create and train model model = ModelFactory.lstm_128( len(market_prepro.feature_cols) + len(news_prepro.feature_cols)) model.load_weights("best_weights.h5") print(model.summary()) ModelFactory.train(model, toy, join_generator, val_generator) # Predict predictor = Predictor(prepro, market_prepro, news_prepro, model, ModelFactory.look_back, ModelFactory.look_back_step) y_pred, y_test = predictor.predict_idx(test_idx, market, news) y_pred = predictor.predict(market, news) plt.plot(y_pred) plt.plot(y_test) plt.legend(["pred", "test"]) plt.show() # get_merged_Xy(train_idx.sample(5), market, pd.DataFrame([],columns=news.columns)).head() print('The end')
def main(): training_set = TrainingSetHandler() training_set.load_training_set() gram_list = [] for size in Constants.SIZE_OF_GRAMS: for language in training_set.language_list: gram = NGram(size, string.ascii_lowercase, 0.5) gram.train(training_set.training_set[language], language) gram_list.append(gram) #dump copies of grams to file dump_grams(gram_list) predic = Predictor(gram_list) test_set_handler = TestSetHandler() test_set_handler.load_test_sentence() for idx, sentence in enumerate(test_set_handler.test_set): clean_sentence = "".join([c for c in sentence[1] if c.isalpha()]).lower() prediction = predic.predict_this_sentence(clean_sentence) with open(os.path.join(Constants.OUTPUT_PATH, "out{}.txt".format(idx)), 'w') as f: output = OutputHelper(prediction, sentence, f) output.print_and_save_output()
def main(argv): yolov3 = tf.keras.models.load_model(FLAGS.model, compile = False); predictor = Predictor(yolov3 = yolov3); anno = COCO(join(FLAGS.annotation_dir, 'instances_val2017.json')); count = 0; for imgid in anno.getImgIds(): print("processing (%d/%d)" % (count, len(anno.getImgIds()))); detections = list(); # predict img_info = anno.loadImgs([imgid])[0]; img = cv2.imread(join(FLAGS.coco_eval_dir, img_info['file_name'])); boundings = predictor.predict(img).numpy(); # collect results for bounding in boundings: detections.append([imgid, bounding[0], bounding[1], bounding[2] - bounding[0], bounding[3] - bounding[1], bounding[4], label_map.index(int(bounding[5]) + 1)]); count += 1; cocoDt = anno.loadRes(np.array(detections)); cocoEval = COCOeval(anno, cocoDt, iouType = 'bbox'); cocoEval.params.imgIds = anno.getImgIds(); cocoEval.evaluate(); cocoEval.accumulate(); cocoEval.summarize();
def load(positive_class, negative_class): """ Helper function that loads in the model and creates a DirectoryManipulator. :return: The model and a DirectoryManipulator. """ print("#" * 15) print("loading model...") print("#" * 15) p = Predictor( r"models\inceptionResNetV2_optimized_h5\inceptionResNetV2_optimized.h5", (positive_class, negative_class)) print("#" * 15) print("model loaded.") print("#" * 15) d = DirectoryManipulator() return p, d
import sys import os import pickle from Predictor import Predictor if __name__ == '__main__': if len(sys.argv) < 3: print 'Usage:', sys.argv[0], 'spamFolder, hamFolder' else: if os.path.isdir(sys.argv[1]) and os.path.isdir(sys.argv[2]): print 'training...' predictor = Predictor(sys.argv[1], sys.argv[2]) print predictor.predict('hw6-spamham-data/dev/dev1') # save to pickle print 'saving predictor to pickle' pickle.dump(predictor, open('predictor.pickle', 'w')) else: print 'training folders illegal'
import sys import os import pickle from Predictor import Predictor if __name__ == '__main__': if len(sys.argv) < 3: print 'Usage:', sys.argv[0], 'spamFolder, hamFolder' else: if os.path.isdir(sys.argv[1]) and os.path.isdir(sys.argv[2]): print 'training...' predictor = Predictor(sys.argv[1], sys.argv[2]) print predictor.predict('bla') # save to pickle print 'saving predictor to pickle' pickle.dump(predictor, open('predictor.pickle', 'w')) else: print 'training folders illegal'
class RLDecisionMaker: def __init__(self, cluster): #Create logger LOG_FILENAME = 'files/logs/Coordinator.log' self.log = get_logger('RLDecisionMaker', 'INFO', logfile=LOG_FILENAME) self.log.info("Using 'gain' : " + env_vars['gain'] +" with threshold of "+str( env_vars["decision_threshold"]*100) + "% and interval: " + str(env_vars['decision_interval'])) self.log.info("Cluster Size from %d to %d nodes" % (env_vars['min_cluster_size'], env_vars['max_cluster_size'])) self.debug = False if self.debug: self.currentState = 8 else: self.currentState = cluster.node_count() self.cluster = cluster self.nextState = self.currentState self.waitForIt = env_vars['decision_interval'] / env_vars['metric_fetch_interval'] self.pending_action = None self.decision = {"action": "PASS", "count": 0} # The policy for getting throughput and latency when computing the reward func. # average, centroid self.measurementsPolicy = 'centroid' self.prediction = env_vars['use_prediction'] self.predictor = Predictor() # used only in simulation!! self.countdown = 0 # A dictionary that will remember rewards and metrics in states previously visited self.memory = {} for i in range(env_vars["min_cluster_size"], env_vars["max_cluster_size"] + 1): self.memory[str(i)] = {} #self.memory[str(i)]['V'] = None # placeholder for rewards and metrics self.memory[str(i)]['r'] = None self.memory[str(i)]['arrayMeas'] = None # Load any previous statics. self.measurementsFile = env_vars["measurements_file"] self.trainingFile = env_vars["training_file"] self.sumMetrics = {} # initialize measurements file meas = open(self.measurementsFile, 'a+') if os.stat(self.measurementsFile).st_size == 0: # The file is empty, set the headers for each column. meas.write('State\t\tLambda\t\tThroughput\t\tLatency\t\tCPU\t\tTime\n') meas.close() # load training set meas = open(self.trainingFile, 'r+') if os.stat(self.trainingFile).st_size != 0: # Read the training set measurements saved in the file. meas.next() # Skip the first line with the headers of the columns for line in meas: # Skip comments (used in training sets) if not line.startswith('###'): m = line.split('\t\t') self.add_measurement(m) meas.close() def add_measurement(self, metrics, write_file=False, write_mem=True): """ adds the measurement to either memory or file or both @param metrics: array The metrics to store. An array containing [state, lamdba, throughput, latency, time] @param writeFile: boolean If set write the measurement in the txt file :return: """ if self.measurementsPolicy.startswith('average'): if not self.sumMetrics.has_key(metrics[0]): # Save the metric with the state as key metrics = [state, inlambda, throughput, latency] self.sumMetrics[metrics[0]] = {'inlambda': 0.0, 'throughput': 0.0, 'latency': 0.0, 'divide_by': 0} self.sumMetrics[metrics[0]] = {'inlambda': self.sumMetrics[metrics[0]]['inlambda'] + float(metrics[1]), 'throughput': self.sumMetrics[metrics[0]]['throughput'] + float(metrics[2]), 'latency': self.sumMetrics[metrics[0]]['latency'] + float(metrics[3]), 'divide_by': self.sumMetrics[metrics[0]]['divide_by'] + 1} if self.debug and write_file: self.log.debug("add_measurements: won't load measurement to memory") else: if write_mem: # metrics-> 0: state, 1: lambda, 2: thoughtput, 3:latency, 4:cpu, 5:time if not self.memory.has_key(metrics[0]): self.memory[str(metrics[0])] = {} #self.memory[str(metrics[0])]['V'] = None # placeholder for rewards and metrics self.memory[str(metrics[0])]['r'] = None self.memory[str(metrics[0])]['arrayMeas'] = np.array([float(metrics[1]), float(metrics[2]), float(metrics[3]), float(metrics[4])], ndmin=2) elif self.memory[metrics[0]]['arrayMeas'] is None: self.memory[metrics[0]]['arrayMeas'] = np.array([float(metrics[1]), float(metrics[2]), float(metrics[3]), float(metrics[4])], ndmin=2) else: self.memory[metrics[0]]['arrayMeas'] = np.append(self.memory[metrics[0]]['arrayMeas'], [[float(metrics[1]), float(metrics[2]), float(metrics[3]), float(metrics[4])]], axis=0) # but add 1 zero measurement for each state for no load cases ??? too many 0s affect centroids? if write_file: if write_mem: used = "Yes" else: used = "No" ms = open(self.measurementsFile, 'a') # metrics[5] contains the time tick -- when running a simulation, it represents the current minute, # on actual experiments, it is the current time. Used for debugging and plotting ms.write(str(metrics[0]) + '\t\t' + str(metrics[1]) + '\t\t' + str(metrics[2]) + '\t\t' + str(metrics[3]) + '\t\t' + str(metrics[4]) + '\t\t' + str(metrics[5]) + '\t\t'+ used+'\n') ms.close() # param state: string Get the average metrics (throughput, latency) for this state. # return a dictionary with the averages def get_averages(self, state): averages = {} if self.sumMetrics.has_key(state): averages['throughput'] = float(self.sumMetrics[state]['throughput'] / self.sumMetrics[state]['divide_by']) averages['latency'] = float(self.sumMetrics[state]['latency'] / self.sumMetrics[state]['divide_by']) self.log.debug("GETAVERAGES Average metrics for state: " + state + " num of measurements: " + str( self.sumMetrics[state]['divide_by']) + " av. throughput: " + str(averages['throughput']) + " av. latency: " + str(averages['latency'])) return averages def doKmeans(self, state, from_inlambda, to_inlambda): # Run kmeans for the measurements of this state and return the centroid point (throughput, latency) ctd = {} label = [] centroids = {} if self.memory[state]['arrayMeas'] != None: count_state_measurements = len(self.memory[state]['arrayMeas']) # self.log.debug("DOKMEANS " + str(len(self.memory[state]['arrayMeas'])) + # " measurements available for state " + state) sliced_data = None for j in self.memory[state]['arrayMeas']: #self.my_logger.debug("DOKMEANS self.memory[state]['arrayMeas'][j]: "+ str(j)) # If this measurement belongs in the slice we're insterested in if j[0] >= from_inlambda and j[0] <= to_inlambda: #self.my_logger.debug("DOKMEANS adding measurement : "+ str(j)) # add it if sliced_data == None: sliced_data = np.array(j, ndmin=2) else: sliced_data = np.append(sliced_data, [j], axis=0) k = 1 # number of clusters # 1. No known lamdba values close to current lambda measurement if sliced_data == None: # Check if there are any known values from +-50% inlambda. # original_inlambda = float(from_inlambda* (10/9)) # from_inlambda = 0.8 * original_inlambda # to_inlambda = 1.2 * original_inlambda # self.my_logger.debug("Changed lambda range to +- 20%: "+ str(from_inlambda) + " - "+ str(to_inlambda)) # for j in self.memory[state]['arrayMeas']: # #self.my_logger.debug("DOKMEANS self.memory[state]['arrayMeas'][j]: "+ str(j)) # # If this measurement belongs in the slice we're insterested in # if j[0] >= from_inlambda and j[0] <= to_inlambda: # # add it # if sliced_data == None: # sliced_data = np.array(j, ndmin=2) # else: # sliced_data = np.append(sliced_data, [j], axis=0) # #centroids, label = kmeans2(self.memory[state]['arrayMeas'], k, minit='points') # (obs, k) # #else: # if sliced_data == None: self.log.debug("No known lamdba values close to current lambda measurement. Returning zeros!") else: # self.log.debug("DOKMEANS length of sliced_data to be fed to kmeans: " + str(len(sliced_data)) # + " (out of %d total)" % count_state_measurements) centroids, label = kmeans2(sliced_data, k, minit='points') pass # initialize dictionary num_of_meas = {} #num_of_meas = {'0': 0, '1': 0, '2': 0, '3': 0, '4': 0} for j in range(0, k): num_of_meas[str(j)] = 0 if len(label) > 0: for i in label: num_of_meas[str(i)] += 1 max_meas_cluster = max(num_of_meas.iteritems(), key=operator.itemgetter(1))[0] # self.my_logger.debug("DOKMEANS state: "+ state +" kmeans2 centroids: "+ str(centroids) +" label: "+ # str(num_of_meas) + " cluster with max measurements: "+ str(max_meas_cluster)) ctd['inlambda'] = centroids[int(max_meas_cluster)][0] ctd['throughput'] = centroids[int(max_meas_cluster)][1] ctd['latency'] = centroids[int(max_meas_cluster)][2] ctd['cpu'] = centroids[int(max_meas_cluster)][3] else: #self.log.debug("DOKMEANS one of the clusters was empty and so label is None :|. Returning zeros") ctd['inlambda'] = 0.0 ctd['throughput'] = 0.0 ctd['latency'] = 0.0 ctd['cpu'] = 0.0 #return None else: self.log.debug("DOKMEANS self.memory[state]['arrayMeas'] is None :|") return ctd def moving_average(self, iterable, n=3): # moving_average([40, 30, 50, 46, 39, 44]) --> 40.0 42.0 45.0 43.0 # http://en.wikipedia.org/wiki/Moving_average it = iter(iterable) d = deque(itertools.islice(it, n - 1)) d.appendleft(0) s = sum(d) for elem in it: s += elem - d.popleft() d.append(elem) yield s / float(n) def predict_load(self): # Linear Regression gia na doume to slope stdin, stdout = os.popen2("tail -n 20 " + self.measurementsFile) stdin.close() lines = stdout.readlines(); stdout.close() ten_min_l = [] # store past 10 mins lambda's ten_min = [] # store past 10 mins ticks for line in lines: m = line.split('\t\t') # state, lambda, throughput, latency, cpu, time tick ten_min_l.append(float(m[1])) ten_min.append(float(m[5])) # run running average on the 10 mins lambda measurements n = 5 run_avg_gen = self.moving_average(ten_min_l, n) run_avg = [] for r in run_avg_gen: run_avg.append(float(r)) ten_min_ra = ten_min[2:18] # np.arange(i-8, i-2, 1) # linear regression on the running average #(slope, intercept, r_value, p_value, stderr) = linregress(ten_min, ten_min_l) (slope, intercept, r_value, p_value, stderr) = linregress(ten_min_ra, run_avg) # fit the running average in a polynomial coeff = np.polyfit(ten_min, ten_min_l, deg=2) self.log.debug("Slope (a): " + str(slope) + " Intercept(b): " + str(intercept)) self.log.debug("Polynom coefficients: " + str(coeff)) #self.my_logger.debug("next 10 min prediction "+str(float(slope * (p + 10) + intercept + stderr))) predicted_l = float(slope * (ten_min[19] + 10) + intercept + stderr) # lambda in 10 mins from now #predicted_l = np.polyval(coeff, (ten_min[9] + 10)) # lambda in 10 mins from now if slope > 0: #if predicted_l > allmetrics['inlambda'] : dif = 6000 + 10 * int(slope) #dif = 6000 + 0.2 * int(predicted_l - allmetrics['inlambda']) self.log.debug("Positive slope: " + str(slope) + " dif: " + str(dif) + ", the load is increasing. Moving the lambda slice considered 3K up") else: dif = -6000 + 10 * int(slope) #dif = -6000 + 0.2 * int(predicted_l - allmetrics['inlambda']) self.log.debug("Negative slope " + str(slope) + " dif: " + str(dif) + ", the load is decreasing. Moving the lambda slice considered 3K down") #dif = ((predicted_l - allmetrics['inlambda'])/ allmetrics['inlambda']) * 0.1 * 6000#* allmetrics['inlambda'] #dif = int((predicted_l / allmetrics['inlambda']) * 6000) return predicted_l def publish_to_local_ganglia(self, allmetrics): """ Publishes monitoring data to local ganglia agent :param allmetrics: :return: """ self.log.debug( "TAKEDECISION allmetrics: " + str(allmetrics)) #Publish measurements to ganglia try: os.system("gmetric -n ycsb_inlambda -v " + str( allmetrics['inlambda']) + " -d 15 -t float -u 'reqs/sec' -S " + str( self.monitoring_endpoint) + ":[DEBUG] hostname") os.system("gmetric -n ycsb_throughput -v " + str( allmetrics['throughput']) + " -d 15 -t float -u 'reqs/sec' -S " + str( self.monitoring_endpoint) + ":[DEBUG] hostname") os.system( "gmetric -n ycsb_latency -v " + str(allmetrics['latency']) + " -d 15 -t float -u ms -S " + str( self.monitoring_endpoint) + ":[DEBUG] hostname") except: pass def handle_metrics(self, client_metrics, server_metrics): # read metrics allmetrics = {'inlambda': 0, 'throughput': 0, 'latency': 0, 'cpu': 0} if not self.debug: ## Aggreggation of YCSB client metrics clients = 0 servers = 0 # We used to collect server cpu too, do we need it? #self.log.debug("TAKEDECISION state: %d, pending action: %s. Collecting metrics" % (self.currentState, str(self.pending_action))) for host in client_metrics.keys(): metric = client_metrics[host] if isinstance(metric, dict): for key in metric.keys(): if key.startswith('ycsb_TARGET'): allmetrics['inlambda'] += float(metric[key]) elif key.startswith('ycsb_THROUGHPUT'): allmetrics['throughput'] += float(metric[key]) elif key.startswith('ycsb_READ') or key.startswith('ycsb_UPDATE') or key.startswith( 'ycsb_RMW') or key.startswith('ycsb_INSERT'): allmetrics['latency'] += float(metric[key]) clients += 1 for host in server_metrics.keys(): metric = server_metrics[host] if isinstance(metric, dict): #check if host in active cluster hosts if not host in self.cluster.get_hosts().keys(): continue servers += 1 for key in metric.keys(): if key.startswith('cpu_idle'): allmetrics['cpu'] += float(metric[key]) try: allmetrics['latency'] = allmetrics['latency'] / clients except: allmetrics['latency'] = 0 try: allmetrics['cpu'] = (allmetrics['cpu'] / servers) # average node cpu usage except: allmetrics['cpu'] = 0 else: self.log.info("Running in DEBUG mode, no metrics retrieved!") return allmetrics # a log-related variable pending_action_logged = False def take_decision(self, client_metrics, server_metrics): ''' this method reads allmetrics object created by Monitoring.py and decides whether a change of the number of participating virtual nodes is due. ''' # update prediction current minute counter self.predictor.tick_tock() if client_metrics is None or server_metrics is None: return # first parse all metrics allmetrics = self.handle_metrics(client_metrics, server_metrics) #self.publish_to_local_ganglia(allmetrics) pending_action = not (self.pending_action is None) # true if there is no pending action # 1. Save the current metrics to file and in memory only if there is no pending action. self.add_measurement([str(self.currentState), allmetrics['inlambda'], allmetrics['throughput'], allmetrics['latency'], allmetrics['cpu'], datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")], write_file=True, write_mem=((not pending_action) and bool(env_vars['update_metrics']))) # if there is a pending action, don't take a decision if pending_action: global pending_action_logged if not pending_action_logged: self.log.debug("Last action " + self.pending_action + " hasn't finished yet, see you later!") pending_action_logged = True if self.debug: if self.countdown == 0: self.log.debug("Running a simulation, set state from " + str(self.currentState) + " to " + str(self.nextState)) self.currentState = self.nextState self.pending_action = None else: self.countdown -= 1 self.log.debug("Reducing countdown to " + str(self.countdown)) # skip decision self.decision["action"] = "PASS" self.decision["count"] = 0 return self.decision pending_action_logged = False # manage the interval counter (waitForIt) if self.waitForIt == 0: self.waitForIt = env_vars['decision_interval'] / env_vars['metric_fetch_interval'] else: if self.waitForIt == env_vars['decision_interval'] / env_vars['metric_fetch_interval']: self.log.debug("New decision in " + str(float(self.waitForIt*env_vars['metric_fetch_interval'])/60) + " mins, see you later!") self.waitForIt -= 1 self.decision["action"] = "PASS" self.decision["count"] = 0 return self.decision # Select values close to the current throughtput, define tha lambda range we're interested in -+ 5% slice_range=75 from_inlambda = allmetrics['inlambda'] - slice_range to_inlambda = allmetrics['inlambda'] + slice_range if self.prediction: predicted_l = self.predictor.poly_regression() if predicted_l > 0: # there are enough data to make a prediction, if not use the actual lambda self.log.debug( "Predicted: " + str(predicted_l) + " lambda :" + str(allmetrics['inlambda'])) from_inlambda = predicted_l - slice_range to_inlambda = predicted_l + slice_range self.log.debug("TAKEDECISION state %d lambda range: %d - %d" % (self.currentState, from_inlambda, to_inlambda)) # too low to care, the initial num of nodes can answer 1000 req/sec, # so consider it as 0 1000 * len(cluster.size)!! if 0.0 < to_inlambda < 1000: from_inlambda = 0.0 self.log.debug("TAKEDECISION state %d current lambda %d changed lambda range to: %d - %d" % (self.currentState, allmetrics['inlambda'], from_inlambda, to_inlambda)) # The subgraph we are interested in. It contains only the allowed transitions from the current state. from_node = max(int(env_vars["min_cluster_size"]), (self.currentState - env_vars["rem_nodes"])) to_node = min(self.currentState + int(env_vars["add_nodes"]), int(env_vars["max_cluster_size"])) #self.my_logger.debug("TAKEDECISION creating graph from node: "+ str(from_node) +" to node "+ str(to_node)) #inject the current number of nodes allmetrics['current_nodes'] = self.currentState states = fset.FuzzySet() # Calculate rewards using the values in memory if any, or defaults for i in range(from_node, to_node + 1): # se periptwsi pou den exeis 3anadei to state upologizei poso tha ithele na einai to throughput # allmetrics['max_throughput'] = float(i) * float(self.utils.serv_throughput) allmetrics['num_nodes'] = i met = {} if self.measurementsPolicy.startswith('average'): met = self.getAverages(str(i)) elif self.measurementsPolicy.startswith('centroid'): met = self.doKmeans(str(i), from_inlambda, to_inlambda) #format met output out_met = {k: int(v) for k,v in met.iteritems()} self.log.debug("TAKEDECISION state: " + str(i) + " met: " + str(out_met)) if met != None and len(met) > 0: # Been in this state before, use the measurements allmetrics['inlambda'] = met['inlambda'] allmetrics['throughput'] = met['throughput'] allmetrics['latency'] = met['latency'] allmetrics['cpu'] = met['cpu'] #self.my_logger.debug("TAKEDECISION adding visited state "+ str(i) +" with gain "+ str(self.memory[str(i)]['r'])) #else: # No clue for this state use current measurements... #self.my_logger.debug("TAKEDECISION unknown state "+ str(i) +" with gain "+ str(self.memory[str(i)]['r'])) self.memory[str(i)]['r'] = eval(env_vars["gain"], allmetrics) # if self.currentState != i: # self.my_logger.debug( # "TAKEDECISION adding state " + str(i) + " with gain " + str(self.memory[str(i)]['r'])) states.add(fset.FuzzyElement(str(i), self.memory[str(i)]['r'])) # For the current state, use current measurement # if self.currentState == i: # if not self.debug: # cur_gain = eval(env_vars["gain"], allmetrics) # # for debugging purposes I compare the current reward with the one computed using the training set # self.log.debug("TAKEDECISION state %d current reward: %d training set reward: %d" # % (self.currentState, cur_gain, self.memory[str(i)]['r'])) # self.memory[str(i)]['r'] = cur_gain # #self.log.debug("TAKEDECISION adding current state " + str(i) + " with gain " + str(cur_gain)) # else: # cur_gain = (self.memory[str(i)]['r']) # self.log.debug("TAKEDECISION state %d current state training set reward: %d" # % (self.currentState, cur_gain)) # # states.add(fset.FuzzyElement(str(i), cur_gain)) # Create the transition graph v = [] for i in states.keys(): v.append(i) v = set(v) stategraph = fgraph.FuzzyGraph(viter=v, directed=True) for j in range(from_node, to_node + 1): if j != self.currentState: # Connect nodes with allowed transitions from the current node.connect(tail, head, mu) head--mu-->tail stategraph.connect(str(j), str(self.currentState), eval(env_vars["trans_cost"], allmetrics)) #self.my_logger.debug( # "TAKEDECISION connecting state " + str(self.currentState) + " with state " + str(j)) # Connect nodes with allowed transitions from node j. #for k in range(max(int(env_vars["min_cluster_size"]), j - int(env_vars["rem_nodes"])), # min(j + int(env_vars["add_nodes"]), int(env_vars["max_cluster_size"])+1)): # if k != j: # self.my_logger.debug("TAKEDECISION connecting state "+ str(j) +" with state "+ str(k)) # stategraph.connect(str(k), str(j), eval(env_vars["trans_cost"], allmetrics)) #Calculate the V matrix for available transitions V = {} for s in range(from_node, to_node + 1): # Get allowed transitions from this state. if self.memory[str(s)]['r'] != None: # For each state s, we need to calculate the transitions allowed. #allowed_transitions = stategraph.edges(head=str(s)) #Vs = [] # for t in allowed_transitions: # t[0] is the tail state of the edge (the next state) # No V from last run #if self.memory[t[0]]['V'] == None: # self.memory[t[0]]['V'] = self.memory[t[0]]['r'] # Vs.append(self.memory[t[0]]['r']) # self.my_logger.debug("TAKEDECISION tail state: "+ t[0] +" head state: "+ # t[1] +" V("+t[0]+") = "+ str(self.memory[t[0]]['V'])) # self.my_logger.debug("TAKEDECISION transition cost from state:"+ str(t[1]) +" to state: "+ str(t[0]) + # " is "+ str(stategraph.mu(t[1],t[0]))) # The original algo uses previous values of max reward (+ gamma * previous max), we don't # if len(Vs) > 0: # V[s] = self.memory[str(s)]['r'] + float(self.utils.gamma) * max(Vs) # else: # V[s] = self.memory[str(s)]['r'] V[s] = self.memory[str(s)]['r'] self.log.debug("TAKEDECISION Vs="+str(V)) # Find the max V (the min state with the max value) max_gain = max(V.values()) max_set = [key for key in V if V[key] == max_gain] self.log.debug("max set: "+str(max_set)) self.nextState = min(max_set) self.log.debug("max(V): %d (GAIN=%d)" % (self.nextState, V[self.nextState])) #self.my_logger.debug("TAKEDECISION next state: "+ str(self.nextState)) # Remember the V values calculated ??? #for i in V.keys(): # self.memory[str(i)]['V'] = V[i] # self.my_logger.debug("TAKEDECISION V("+ str(i) +") = "+ str(V[i])) # vis = fuzz.visualization.VisManager.create_backend(stategraph) # (vis_format, data) = vis.visualize() # # with open("%s.%s" % ("states", vis_format), "wb") as fp: # fp.write(data) # fp.flush() # fp.close() if self.nextState != self.currentState: self.log.debug("Decided to change state to_next: " + str(self.nextState) + " from_curr: " + str(self.currentState)) # You've chosen to change state, that means that nextState has a greater reward, therefore d is always > 0 current_reward = self.memory[str(self.currentState)]['r'] d = self.memory[str(self.nextState)]['r'] - current_reward self.log.debug( "Difference is " + str(d) + " abs thres="+str(env_vars['decision_abs_threshold'])+" gte:"+str(float(d) < env_vars['decision_abs_threshold'])) if (current_reward != 0 and (abs(float(d) / current_reward) < env_vars['decision_threshold']))\ or float(d) < env_vars['decision_abs_threshold']: #false alarm, stay where you are self.nextState = self.currentState # skip decision self.decision["action"] = "PASS" self.decision["count"] = 0 self.log.debug("ups changed my mind...staying at state: " + str(self.currentState) + " cause the gain difference is: " + str(abs(d)) + " which is less than %d%% of the current reward, it's actually %f%%" % (int(100*env_vars['decision_threshold']) ,abs(float(d)*100) / (float(current_reward)+0.001))) else: self.log.debug("Difference "+ str(d) + " is greater than threshold ("+str(env_vars['decision_threshold'])+"). Keeping decision") # If the reward is the same with the state you're in, don't move # elif (d == 0): # #false alarm, stay where you are # self.nextState = self.currentState # # skip decision # self.decision["action"] = "PASS" # self.decision["count"] = 0 # self.log.debug("ups changed my mind...staying at state: " + str(self.currentState) + # " cause the gain difference is: " + str(abs(d)) + # " which is less than 10% of the current reward " # + str(self.memory[str(self.currentState)]['r'])) if self.nextState > self.currentState: self.decision["action"] = "ADD" elif self.nextState < self.currentState: self.decision["action"] = "REMOVE" self.decision["count"] = abs(int(self.currentState) - int(self.nextState)) #self.log.debug("TAKEDECISION: action " + self.decision["action"] + " " + str(self.decision["count"]) + # " nodes.") ## Don't perform the action if we're debugging/simulating!!! if self.debug: if self.pending_action is None and not self.decision["action"].startswith("PASS"): self.pending_action = self.decision['action'] self.countdown = 2 * self.decision['count'] * 60 / env_vars['metric_fetch_interval'] #self.currentState = str(self.nextState) self.log.debug("TAKEDECISION simulation, action will finish in: " + str(self.countdown) + " mins") else: self.log.debug("TAKEDECISION Waiting for action to finish: " + str(self.pending_action)) return self.decision def simulate(self): self.log.debug("START SIMULATION!!") ## creates a sin load simulated for an hour # for i in range(0, 3600, 10): #for i in range(0, 14400, 60): # 4 hours for i in range(0, 900, 1): cpu = max(5, 60 * abs(math.sin(0.05 * math.radians(i))) - int(self.currentState)) # lamdba is the query arrival rate, throughput is the processed queries #l = 60000 + 40000 * math.sin(0.01 * i) + random.uniform(-4000, 4000) #l = 50000 * math.sin(60 * math.radians(i)/40) + 65000 + random.uniform(-8000, 8000) #l = 40000 * math.sin(60 * math.radians(i)/50) + 45000 + random.uniform(-4000, 4000) #l = 30000 * math.sin(0.02 * i) + 55000 + random.uniform(-4000, 4000) l = 60000 * math.sin(0.04 * i) + 75000 + random.uniform(-6000, 6000) # first 10 mins # if i < 1200: # l = 20000 # elif i < 2400: # l = 40000 # elif i < 4400: # l = 60000 # elif i < 6000: # l = 40000 # elif i < 7200: # l = 20000 maxThroughput = (float(self.currentState) * float(env_vars["serv_throughput"])) # latency = 200 # msec # if (l > maxThroughput): # latency += (l-maxThroughput)/10 # +100msec for every 1000 reqs queued #throughput = min(maxThroughput, l)# max throughput for the current cluster throughput = l #(+/- e ??) latency = 0.0000004 * l ** 2 + 200 # msec... if l > maxThroughput: throughput = maxThroughput - 0.01 * l latency = 0.00001 * (l - maxThroughput) ** 2 + (0.0000004 * maxThroughput ** 2 + 200) # msec... ? values = {'latency': latency, 'cpu': cpu, 'inlambda': l, 'throughput': throughput, 'num_nodes': self.currentState} self.log.debug( "SIMULATE i: " + str(i) + " state: " + str(self.currentState) + " values:" + str(values) + " maxThroughput: " + str(maxThroughput)) #nomizw de xreiazetai giati ginetai kai take_decision kai se debug mode #self.addMeasurement([self.currentState, str(l), str(throughput), str(latency), str(i)], True) #if self.pending_action[len(self.pending_action)-1] == "done" : self.take_decision(values) time.sleep(1) return def simulate_training_set(self): # run state 12 lambdas self.log.debug("START SIMULATION!!") self.debug = True load = [] for k in range(9, 19): for j in self.memory[str(k)]['arrayMeas']: load.append(j[0]) #for i in range(0, 120, 1): # paizei? 1 wra ana miso lepto for i in range(0, 240*12, 1): l = load[i] # throughput = (800 * self.currentState) # if l < (800 * self.currentState): # throughput = l values = {'inlambda': l, 'num_nodes': self.currentState} self.log.debug( "SIMULATE i: " + str(i) + " state: " + str(self.currentState) + " values:" + str(values)) self.take_decision(values)
def runWithoutWndchrm(self): tr = Trainer(load=False, loadWndchrm=False) tr.runWithoutWndchrm() pr = Predictor(load=False, loadWndchrm=False) pr.runWithoutWndchrm()
def run(self): tr = Trainer(load=False, loadWndchrm=False) tr.run() pr = Predictor(load=False, loadWndchrm=False) return pr.run()
def run(self, k=3, useOnlyRF=True): featureGetter = FeatureGetter() fileNameTrain = data_io.get_savez_name() fileNameTest = data_io.get_savez_name_test() print "Merging files..." (namesObservations, coordinates, dataset) = self.mergeFiles(fileNameTrain, fileNameTest) dataset = dataset[:,self.filterIndexes(len(dataset[0]))] print "Shuffling and splitting the data" indexesChanged = np.arange(len(dataset)) np.random.shuffle(indexesChanged) splittedNamesObs = self.getShuffledSplits(namesObservations, indexesChanged, k+1) splittedCoords = self.getShuffledSplits(coordinates, indexesChanged, k+1) splittedData = self.getShuffledSplits(dataset, indexesChanged, k+1) """Leave the last split for testing""" testNamesObs = splittedNamesObs[k] testCoords = splittedCoords[k] testDataset = splittedData[k] splittedNamesObs = splittedNamesObs[:k] splittedCoords = splittedCoords[:k] splittedData = splittedData[:k] del(dataset) del(coordinates) del(namesObservations) del(indexesChanged) bestModel = None bestFmeasure = 0 for i in range(k-1,-1,-1):#i is the index of the validation print "Doing cross-validation for i=%d" %i namesObservationsValid = splittedNamesObs[i] coordinatesValid = splittedCoords[i] datasetValid = splittedData[i] namesObservationsValid = np.reshape(namesObservationsValid, namesObservationsValid.shape[0]) namesObservationsTrain = self.getTrainData(splittedNamesObs,i) coordinatesTrain = self.getTrainData(splittedCoords,i) datasetTrain = self.getTrainData(splittedData, i) namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) print "Getting target vector" (indexes, target, obs) = featureGetter.getTargetVector(coordinatesTrain, namesObservationsTrain, datasetTrain) print "Selecting features" classifier = RandomForestClassifier(n_estimators=100, verbose=2, n_jobs=1, min_samples_split=1, random_state=1, compute_importances=True) model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)]) model.fit(obs[indexes], target[indexes]) if not useOnlyRF: importances = classifier.feature_importances_ filterImportances = np.where(importances > 0.0001)[0] print len(filterImportances) #namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) print "Training model" #classifier = RandomForestClassifier(n_estimators=500, verbose=2, n_jobs=1, min_samples_split=100, random_state=1, compute_importances=True) #classifier = KNeighborsClassifier() classifier = LinearSVC(verbose=1) #classifier = MLPClassifier(verbose=1) model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)]) model.fit(obs[indexes][:,filterImportances], target[indexes]) print "Making predictions" if not useOnlyRF: predictions = model.predict(datasetValid[:,filterImportances]) else: predictions = model.predict(datasetValid) predictions = predictions.reshape(len(predictions), 1) print "Calculating validation results" [_, _, _, _, _, fmeasure, _] = Predictor.finalResults(namesObservationsValid, predictions, coordinatesValid) if fmeasure > bestFmeasure: bestFmeasure = fmeasure bestModel = model del(datasetTrain) del(datasetValid) del(coordinatesTrain) del(coordinatesValid) del(namesObservationsTrain) del(namesObservationsValid) print "Calculating final results" predictions = bestModel.predict(testDataset) print "The final score is: " testNamesObs = np.reshape(testNamesObs, testNamesObs.shape[0]) Predictor.finalResults(testNamesObs, predictions, testCoords)
def DoWork(source_file, m, k, fltr, trade_cost, testing_start_date, testing_end_date): # Set some defaults p = Predictor(source_file, m, k, fltr, testing_start_date, testing_end_date) p.read_file_to_daily_data_by_weeks(0) p.calc_historical_weekly_return(p.daily_data_by_weeks) # Initialize variables strategy_trade_count = 0 in_market_count = 0 current_state = 0 long_position_r = 0 short_position_r = 0 bh_return = 0 strategy_weekly_return = [] bh_weekly_return = [] for idx in range(p.start_split, p.end_split): # Generate weekly signals p.weekly_return_data = p.historical_weekly_return_data[:idx-1] alist = p.find_k_closest_histories() r = p.calc_next_week_return(alist) cur_signal = p.signal(current_state, r) # Signal handling cur_index_price = p.daily_data_by_weeks[idx][-1][2] prev_index_price = p.daily_data_by_weeks[idx-1][-1][2] actual_return = math.log(cur_index_price) - math.log(prev_index_price) if current_state != cur_signal: current_state = cur_signal strategy_trade_count += 1 # K Nearest Neighbor if current_state == 1: long_position_r += actual_return strategy_weekly_return.append(actual_return) elif current_state == -1: short_position_r += actual_return * current_state strategy_weekly_return.append(actual_return * current_state) # Buy-and-Hold if current_state == 1: in_market_count += 1 bh_return += actual_return bh_weekly_return.append(actual_return) ''' if current_state == 0: decision = 'No position!' elif actual_return * current_state > 0: decision = 'Prediction is RIGHT!' else: decision = 'Prediction is WRONG!' print('Predicted return = {0} and actual return = {1} - {2}'.format(r, actual_return, decision)) ''' # Excess return trade_cost_coef = math.log((1-trade_cost)/(1+trade_cost)) strategy_return = long_position_r + short_position_r + strategy_trade_count * trade_cost_coef bh_return = (in_market_count/float(p.end_split - p.start_split)) * bh_return + 2 * trade_cost_coef print('Strategy return = {0} (long: {1} and short: {2}), bh return = {3}, excess return = {4}, sharpe = {5}'.format(strategy_return, long_position_r, short_position_r, bh_return, strategy_return-bh_return, strategy_return/Stdev(strategy_weekly_return)))
def defaultPredData(comicId): pred = Predictor() pred.generatePredictorDataTemplate() directory = cacheLoc + "predictorInfo/" + str(comicId) + "/" shutil.copy2(cacheLoc + "predictorInfo/predictorData.txt", directory)
def __init__(self, cluster): #Create logger LOG_FILENAME = 'files/logs/Coordinator.log' self.log = get_logger('RLDecisionMaker', 'INFO', logfile=LOG_FILENAME) self.log.info("Using 'gain' : " + env_vars['gain'] +" with threshold of "+str( env_vars["decision_threshold"]*100) + "% and interval: " + str(env_vars['decision_interval'])) self.log.info("Cluster Size from %d to %d nodes" % (env_vars['min_cluster_size'], env_vars['max_cluster_size'])) self.debug = False if self.debug: self.currentState = 8 else: self.currentState = cluster.node_count() self.cluster = cluster self.nextState = self.currentState self.waitForIt = env_vars['decision_interval'] / env_vars['metric_fetch_interval'] self.pending_action = None self.decision = {"action": "PASS", "count": 0} # The policy for getting throughput and latency when computing the reward func. # average, centroid self.measurementsPolicy = 'centroid' self.prediction = env_vars['use_prediction'] self.predictor = Predictor() # used only in simulation!! self.countdown = 0 # A dictionary that will remember rewards and metrics in states previously visited self.memory = {} for i in range(env_vars["min_cluster_size"], env_vars["max_cluster_size"] + 1): self.memory[str(i)] = {} #self.memory[str(i)]['V'] = None # placeholder for rewards and metrics self.memory[str(i)]['r'] = None self.memory[str(i)]['arrayMeas'] = None # Load any previous statics. self.measurementsFile = env_vars["measurements_file"] self.trainingFile = env_vars["training_file"] self.sumMetrics = {} # initialize measurements file meas = open(self.measurementsFile, 'a+') if os.stat(self.measurementsFile).st_size == 0: # The file is empty, set the headers for each column. meas.write('State\t\tLambda\t\tThroughput\t\tLatency\t\tCPU\t\tTime\n') meas.close() # load training set meas = open(self.trainingFile, 'r+') if os.stat(self.trainingFile).st_size != 0: # Read the training set measurements saved in the file. meas.next() # Skip the first line with the headers of the columns for line in meas: # Skip comments (used in training sets) if not line.startswith('###'): m = line.split('\t\t') self.add_measurement(m) meas.close()
def run(self, k=3, patientSplit=True, useOnlyRF=True, breakin2=True): featureGetter = FeatureGetter() overallTP = 0 overallFP = 0 overallFN = 0 fileNameTrain = data_io.get_savez_name() fileNameTest = data_io.get_savez_name_test() print "Merging files..." (namesObservations, coordinates, dataset) = self.mergeFiles(fileNameTrain, fileNameTest) dataset = dataset[:,self.filterIndexes(len(dataset[0]))] print "Shuffling and splitting the data" indexesChanged = np.arange(len(dataset)) np.random.shuffle(indexesChanged) if patientSplit: k = 12 (splittedNamesObs, splittedCoords, splittedData) = self.getSplits(namesObservations, coordinates, dataset) if breakin2: k = 2 (splittedNamesObs, splittedCoords, splittedData) = self.getNewSplits(splittedNamesObs, splittedCoords, splittedData) else: splittedNamesObs = self.getShuffledSplits(namesObservations, indexesChanged, k) splittedCoords = self.getShuffledSplits(coordinates, indexesChanged, k) splittedData = self.getShuffledSplits(dataset, indexesChanged, k) del(dataset) del(coordinates) del(namesObservations) del(indexesChanged) overallArrayTP = np.zeros(12) overallArrayFP = np.zeros(12) overallArrayFN = np.zeros(12) for i in range(k-1,-1,-1):#i is the index of the validation print "Doing cross-validation for i=%d" %i namesObservationsTest = splittedNamesObs[i] coordinatesTest = splittedCoords[i] datasetTest = splittedData[i] namesObservationsTest = np.reshape(namesObservationsTest, namesObservationsTest.shape[0]) namesObservationsTrain = self.getTrainData(splittedNamesObs,i) coordinatesTrain = self.getTrainData(splittedCoords,i) datasetTrain = self.getTrainData(splittedData, i) namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) print "Getting target vector" (indexes, target, obs) = featureGetter.getTargetVector(coordinatesTrain, namesObservationsTrain, datasetTrain) print "Selecting features" classifier = RandomForestClassifier(n_estimators=100, verbose=2, n_jobs=1, min_samples_split=1, random_state=1, compute_importances=True) model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)]) model.fit(obs[indexes], target[indexes]) if not useOnlyRF: importances = classifier.feature_importances_ filterImportances = np.where(importances > 0.0001)[0] print len(filterImportances) #namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) print "Training model" #classifier = RandomForestClassifier(n_estimators=500, verbose=2, n_jobs=1, min_samples_split=100, random_state=1, compute_importances=True) #classifier = KNeighborsClassifier() classifier = LinearSVC(verbose=1) #classifier = MLPClassifier(verbose=1) model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)]) model.fit(obs[indexes][:,filterImportances], target[indexes]) print "Making predictions" if not useOnlyRF: predictions = model.predict(datasetTest[:,filterImportances]) else: predictions = model.predict(datasetTest) predictions = predictions.reshape(len(predictions), 1) print "Calculating final results" [truePositives, falsePositives, falseNegatives, _, _, _, (arrayTP, arrayFP, arrayFN)] = Predictor.finalResults(namesObservationsTest, predictions, coordinatesTest) print arrayTP print arrayFP print arrayFN overallArrayTP += arrayTP overallArrayFP += arrayFP overallArrayFN += arrayFN overallTP += truePositives overallFP += falsePositives overallFN += falseNegatives del(datasetTrain) del(datasetTest) del(coordinatesTrain) del(coordinatesTest) del(namesObservationsTrain) del(namesObservationsTest) precision = 0 if overallTP+overallFP == 0 else (overallTP+0.0)/(overallTP+overallFP+0.0) recall = 0 if overallTP+overallFN == 0 else (overallTP+0.0)/(overallTP+overallFN+0.0) fmeasure = 0 if recall+precision == 0 else 2*(precision*recall)/(recall+precision) print "Overall results for k=%d" %k print overallTP print overallFP print overallFN print precision print recall print fmeasure for i in range(len(overallArrayTP)): "Results for patient number %d:"% (i+1) overallTP = overallArrayTP[i] overallFP = overallArrayFP[i] overallFN = overallArrayFN[i] precision = 0 if overallTP+overallFP == 0 else (overallTP+0.0)/(overallTP+overallFP+0.0) recall = 0 if overallTP+overallFN == 0 else (overallTP+0.0)/(overallTP+overallFN+0.0) fmeasure = 0 if recall+precision == 0 else 2*(precision*recall)/(recall+precision) print precision print recall print fmeasure
def GetPredictions(logProbabilities): target = Predictor() return target.getPredictions(logProbabilities)
sys.exit() if sys.argv[1] == "load": usePickle = True elif sys.argv[1] == "train": usePickle = False else: print "Usage:", usage sys.exit() #create classifier if usePickle: print "Importing Classifier" p = pickle.load(open('predictor.pickle', 'r')) else: print "Training Classifier" p = Predictor("spam", "ham") print "Saving Pickle" pickle.dump(p, open('predictor.pickle', 'w')) if len(sys.argv) > 2: if sys.argv[2] == "test": testDev() testExternal() elif os.path.isdir(sys.argv[2]): # predict all files in folder for f in sorted_nicely(glob.glob(sys.argv[2]+'/*')): print f, ':', p.predict(f) elif os.path.isfile(sys.argv[2]): # predict this file print sys.argv[2], ':', p.predict(sys.argv[2])
elif input == 5: # Display machine learning results # Get query from user query = getQuery(True, True, True) print "\n" # Get results from GradCafe gradResults = GradCafe.getResults(query, False) QueryUtil.refineQuery(query) # Get results from GoHackers goResults = GoHackers.getResults(query, False) # Predict outcome doExperiment = False predictor = Predictor(gradResults, goResults) if doExperiment: predictor.runExperiment() else: predictor.predict() if not continueQuery(): break else: break