def main(config_filename): config = Config() config.load(config_filename) train_data, test_data = get_data(config.data_config, config.competition) vocabulary = train_data.get_vocabulary(config.lower_vocabulary).merge( test_data.get_vocabulary(config.lower_vocabulary)) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "ru" and \ 'fasttext' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.ru.vec", vocabulary, 100000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "ru" and \ 'w2v' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/RDT/russian-big-w2v.txt", vocabulary, 100000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "en" and \ 'w2v' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/W2V/GoogleNews-vectors-negative300.vec", vocabulary, 150000, config.embeddings_filename) if not os.path.exists(config.embeddings_filename) and \ config.data_config.language == "en" and \ 'fasttext' in config.embeddings_filename: shrink_w2v("/media/yallen/My Passport/Models/Vectors/FastText/wiki.en.vec", vocabulary, 150000, config.embeddings_filename) char_set = train_data.get_char_set() print(vocabulary.size()) print(char_set) targets, additionals, rev_categories, output_sizes = get_targets_additionals(train_data) train_model(config_filename, train_data, vocabulary, char_set, targets, additionals, output_sizes) predict(config_filename, test_data, vocabulary, char_set, targets, additionals, rev_categories)
def _run_prediction(args): predict( raw_data_file=args.raw_data_file, processed_data_folderpath=args.processed_data_folderpath, model_folderpath=args.model_folderpath, output_filepath=args.output_filepath, model_name=args.model_name )
def test_predict(test_data: str, test_config_yaml: str, test_model_path: str, transformer_path: str, prediction_path: str, logger): logger.info(f"test_predict") prediction = predict(test_data, test_config_yaml, test_model_path, transformer_path, prediction_path) logger.info(f"shape of prediction {prediction}") assert prediction.shape == (100, 14)
def add_entry(): """View that process a POST with new song input :return: redirect to index page """ try: flight1 = Flight(month=request.form['month'], day_of_week=request.form['day_of_week'], day_of_month=request.form['day_of_month'], \ airline=request.form['airline'], origin_city=request.form['origin_city'], dest_city=request.form['dest_city'], \ dep_time=request.form['dept_time'], air_time=request.form['air_time']) except: logger.info("Unable to create flight") label = "" probs = 0 try: label, probs = predict('data/tree.pkl', flight1) except Exception: logger.info("Unable to predict") logger.info("You are connecting to: " + app.config['SQLALCHEMY_DATABASE_URI']) try: db.session.add(flight1) db.session.commit() except Exception: logger.info("Cannot add row of flight data to the database! ") return render_template('error.html') logger.info("User input added to %s: %s from %s", app.config["SQLALCHEMY_DATABASE_URI"], request.form['airline'], request.form['origin_city']) return render_template('index.html', prediction_text='{}'.format(label), prob_text='{}'.format(probs))
def translate(input, stanza_processor, translator, src_word_to_idx, idx_to_trg_word, bpe, checkpoint_path='', device='cuda:0', bsz=32): print("tokenizing, multiword-token-expanding, pos-tagging...") doc = stanza_processor('\n\n'.join(input)) # returns Document object sentences = doc.sentences # list of Stanza Sentence objects truecased_sentences = [] print("truecasing...") for sent in sentences: truecase_sentence(sent, should_lower_de) truecased_sentences.append(' '.join([word.text for word in sent.words])) print("subword segmenting...") subword_segmented_sentences = [ bpe.process_line(sent).split() for sent in truecased_sentences ] print("converting to batches of indices...") to_indices("user-input", subword_segmented_sentences, src_word_to_idx) # modifies in-place test_batches = get_test_batches(subword_segmented_sentences, bsz, device) print("making predictions...") translations, _, _ = predict(translator, test_batches, idx_to_trg_word, checkpoint_path) return translations
def build_detections(segmenter, files, target_h, target_w, test_sample_size=None): results = [] test_sample_size = len( files) if test_sample_size is None else test_sample_size for idx, imfile in enumerate(files): if idx >= test_sample_size: break print('Processing {} out of {} files...'.format( idx + 1, test_sample_size), end='\r') sys.stdout.flush() try: img = utils.load_image(imfile).astype(np.uint8) pred, scores = predict(segmenter, img, h=target_h, w=target_w) img_id = int(imfile[-10:-4]) results += [ ann for ann in ann_dict_generator(pred, scores, img_id, imfile) ] # bname = os.path.basename(imfile) # pred = pred[:, :, 0] # pred[pred > 0.5] = 255 # PILImage.fromarray(pred, mode='L').save('/tmp/inference_results/{}.png'.format(bname)) except: if img is None: print('Skipping corrupted image') continue else: raise # Exception('corrupted image') return results
def display_scores(): """[reads data, processes through models, predict. ] Returns: [function]: [HTML(template built to display a dict as a table)] """ # output can be k:v, list, nested, etc. jinja can do indexing and deal with nesting. # Read in data api_df = pd.read_json('data/api_data.json') # Set model location as variable model_path = 'models/rf_1.pickle' # Store model.predict into 'scores' scores = predict(api_df, pipeline, model_path) # Set columns names scores = scores[[ 'object_id', 'org_name', 'country', 'name', 'payee_name', 'payout_type', 'probability' ]] # Add a computed column ('risk'), with categorical values scores['risk'] = scores['probability'].apply( lambda p: 'HIGH' if p > .85 else 'MEDIUM' if p > .8 else 'LOW') # Round values in 'probability' column scores['probability'] = np.round(scores['probability'], 2) scores = scores.values return render_template('scores.html', scores=scores)
def predict_image(): if request.method == 'POST': file = request.files['file'] image_file = file.read() image = cv2.imdecode(np.frombuffer(image_file, dtype=np.uint8), -1) height, width, channels = image.shape center_image = (width // 2, height // 2) print("shape image: ", (width, height)) list_boxes, list_scores, list_classes = predict( image, PREDICTOR, CLASSES) print('list_boxes', list_boxes) print('list_classes', list_classes) # draw # image = draw_bbox(image, list_boxes, list_scores, list_classes) # cv2.imwrite("image.jpg", image) i = 0 len_boxes = len(list_boxes) point_tl = None point_tr = None point_bl = None point_br = None receipt = None while i < len_boxes: bbox = list_boxes[i] x1 = bbox[0] y1 = bbox[1] x2 = bbox[2] y2 = bbox[3] w = x2 - x1 h = y2 - y1 center_x = x1 + w // 2 center_y = y1 + h // 2 center = (center_x, center_y) # print("max: ", (x1, y1)) # print("min: ", (x2, y2)) if list_classes[i] == 'top_right': point_tr = center elif list_classes[i] == 'bottom_left': point_bl = center elif list_classes[i] == 'bottom_right': point_br = center elif list_classes[i] == 'top_left': point_tl = center elif list_classes[i] == 'receipt': receipt = bbox i += 1 result = { 'point_tl': point_tl, 'point_tr': point_tr, 'point_bl': point_bl, 'point_br': point_br, 'receipt': receipt } return result
def test_default_subword_model( checkpoint_path='/content/gdrive/My Drive/NMT/unittests/checkpoints/', config_path='/content/gdrive/My Drive/NMT/configs/', corpus_path='/content/gdrive/My Drive/NMT/unittests/first_ten_sentences/' ): hyperparams = import_configs(config_path=config_path, unittesting=True) # use subword-level vocab hyperparams["vocab_type"] = "subword_joint" #hyperparams["learning_rate"] = .01 # increase learning rate print(f"vocab_type: {hyperparams['vocab_type']}") print(f"tie_weights: {hyperparams['tie_weights']}") construct_model_data("train.de", "train.en", hyperparams=hyperparams, corpus_path=corpus_path, checkpoint_path=checkpoint_path, overfit=True) # model of sufficient capacity should be able to bring loss down to ~zero. model, loss = train(total_epochs=100, early_stopping=False, checkpoint_path=checkpoint_path, save=False, write=True) assert loss < .01 model_data = retrieve_model_data(checkpoint_path=checkpoint_path) dev_batches = model_data[ "dev_batches"] # holds the training data, bc overfit=True dev_references = model_data[ "references"] # holds the training data, bc overfit=True idx_to_trg_word = model_data["idx_to_trg_word"] # greedy search should be able to perfectly predict the training data. dev_translations, _, _ = predict(model, dev_batches, idx_to_trg_word, checkpoint_path) bleu = evaluate(dev_translations, dev_references) assert bleu >= 100 # beam search should be able to perfectly predict the training data. model.decoder.set_inference_alg("beam_search") dev_translations, _, _ = predict(model, dev_batches, idx_to_trg_word, checkpoint_path) bleu = evaluate(dev_translations, dev_references) assert bleu >= 100
def get_image(): data = get_imagenet() allowed_species = get_allowed_species() print(dir(request)) print(request.files) print(request.files.get("photo")) # request.files - json (dict) which we get from front # get method from dict image = request.files.get("photo", "") print(dir(image)) try: filename = os.path.join(UPLOADER_FOLDER, f"{datetime.now().isoformat()}.jpg") image.save(filename) print("save completed") image_batch = image_preprocessing(filename) print("start predicting") labels = predict(image_batch) for label in labels: print(label) if label[1] not in allowed_species: continue result = {"category_name": label[1], "id": label[0], "probability": float(label[2])} row = data[data.category_name == label[1]].iloc[0] print(row) result["rus_name"] = fix_na(row.rus_name) result["photo_link"] = fix_na(row.photo_link) result["food"] = fix_na(row.food) result["description"] = fix_na(row.description) result["brief"] = fix_na(row.brief) return jsonify(result) return { "category_name": "undefined_bird", "id": "no001", "probability": 0, "rus_name": "курлык", "photo_link": "https://drive.google.com/uc?export=download&confirm=no_antivirus&" "id=1HUkCNwHbFihtUVD09jrCXimRmaLJhMD8", "food": "подсушенный белый хлеб, зерна, крупы, семечки, овсяные хлопья", "description": "Человек приручил дикого сизого голубя более 5000 лет тому назад. С тех пор голубеводы " "вывели более 800 пород домашних голубей, различных по цвету, форме тела и назначению.", "brief": "Мы не уверены, но возможно это Голубь!", } except Exception as ex: print("vse naebnulos") print(repr(ex)) abort(400)
def run_example(): # We then load our data from the tsv file into a list, where each list element is an instance to be classified data = pd.read_csv("./data/example_news.tsv", sep="\t") instances_to_be_classified = data['data'] # We then call the predict function sending our data as parameter. The function returns predictions for each # instance. predictions = predict(instances_to_be_classified) print(predictions)
def detect_face(frameCount): # grab global references to the video stream, output frame, and # lock variables global vs, outputFrame, outputFrame2, outputAttributes, lock, presence # loop over frames from the video stream while True: # read the next frame from the video stream and resize it frame = vs.read() center_w, center_h = (const.FRAME_WIDTH // 2, const.FRAME_HEIGHT // 2) frame = imutils.resize(frame, width=const.FRAME_WIDTH, height=const.FRAME_HEIGHT) frame_cut = frame.copy()[center_h - 109:center_h + 109, center_w - 89:center_w + 89] # grab the current timestamp and draw it on the frame # timestamp = datetime.datetime.now() # cv2.putText(frame, timestamp.strftime( # "%A %d %B %Y %I:%M:%S%p"), (10, frame.shape[0] - 10), # cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) faces = fdetect(frame_cut) #Draw a ellipse to center de face axesLength = (80, 100) angle = 0 startAngle = 0 endAngle = 360 #if there is a face the elipse is green, else red if faces[0]: color = (0, 255, 0) else: color = (0, 0, 255) # Line thickness of 5 px thickness = 5 cv2.ellipse(frame, (center_w, center_h), axesLength, angle, startAngle, endAngle, color, thickness) frame = cv2.flip(frame, +1) attributes = predict.predict(faces[0], frame_cut) # acquire the lock, set the output frame, and release the # lock with lock: outputFrame = frame.copy() outputFrame2 = frame_cut.copy() presence = faces[0] outputAttributes = attributes.copy()
def batch_test(excel, save_path): mydata = pd.read_excel(excel) names = mydata.iloc[:, 0] print names lyrics = mydata.iloc[:, 1] print lyrics print mydata.shape[1] f = open(save_path, 'w') for idx in range(mydata.shape[1]): f.write(names[idx].encode('utf-8') + '\t' + predict(lyrics[idx].encode('utf-8')) + '\n') f.close() logger.info('success')
def sentiment_classification(): """Run multi-label boardgame classification given boardgame description. --- parameters: - name: body in: body schema: id: description required: - description properties: description: type: [string] description: the required boardgame description for POST method required: true definitions: SentimentResponse: Project: properties: status: type: string ml-result: type: object responses: 40x: description: Client error 200: description: Multi-label Boardgame Categorization examples: [ { "status": "success", "sentiment": "1" }, { "status": "error", "message": "Exception caught" }, ] """ json_request = request.get_json() # Getting whatever you sent to service if not json_request: return Response("No json provided.", status=400) # If you sent nothing, we throw error description = json_request['description'] if description is None: return Response("No text provided.", status=400) else: preprocessed_description = preprocess(description) predicted_categories = predict(preprocessed_description) return flask.jsonify({"status": "success", "predicted_categories": predicted_categories.tolist()}) # Returning an answer to the POST request; the .jsonify part will put HTTP status 200
def p_mult(target_directory, weights_dir, modeltype): ''' take all .npy arrays from directory and run inference on each zplane :param directory: :return: ''' end_patient = 5 for patient_indx in range(1, end_patient + 1): if not os.path.exists(target_directory + '/{}'.format(patient_indx)): os.makedirs(target_directory + '/{}'.format(patient_indx)) predict.predict(patient_indx, img_directory='./', target_directory=target_directory + '/{}'.format(patient_indx), weights_directory=weights_dir, modeltype=modeltype) print("finished with patient {}".format(patient_indx)) return None
def get_masks(): flag_formdata = 0 if request.method == 'POST': data = json.loads(request.get_data().decode()) for key in data: if 'img_url' == key: flag_formdata = 1 img_url = data[key] res = predict.predict(img_url, model, config) res = jsonify(res) return res if flag_formdata == 0: abort(404)
def hello_world(): text = request.form['q'] if(not text): return 'Empty string identified.' label = predict(text) if (label == 0): return "invalid question type" sens = json.loads(request.form['sens']) print(sens[0]) code = similarity(sens, text) print(code) label = getClassifiedCategories(getCategoryCount(text)) if len(label): return label[0] else: return "Waiting for full text.."
def do_upload(): upload = request.files['upload'] name, ext = os.path.splitext(upload.filename) if ext not in ('.png','.jpg','.jpeg'): return 'File extension not allowed.' save_path = "./new_test" if not os.path.exists(save_path): os.makedirs(save_path) file_path = "{path}/{file}".format(path=save_path, file=upload.filename) upload.save(file_path) imgname = str(upload.filename) prediction = p.predict(imgname) os.remove("./new_test/{}".format(imgname)) return str(prediction)
def upload(): if request.method == 'POST': image_byte = request.data if image_byte == '': return 'Format not supported' image = base64.b64decode(image_byte) image = io.BytesIO(image) image = Image.open(image) image.thumbnail((299, 299), Image.ANTIALIAS) caption = predict(image) response = app.response_class( response=json.dumps({'caption': caption}), status=200, mimetype='application/json' ) return response
def ner_extractor(): if 'file' not in request.files: return jsonify("no file was provided") file = request.files["file"] if file.filename == '': return jsonify("no file was provided") if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(TMP_DIR + filename) res = predict(TMP_DIR + filename) files = glob.glob(TMP_DIR + '*') for f in files: os.remove(f) return jsonify(res) return jsonify("-1")
def predict_phc(scale=2.0, cpu=True): os.makedirs("output_phc", exist_ok=True) os.makedirs("output_phc/Sequence 1", exist_ok=True) os.makedirs("output_phc/Sequence 2", exist_ok=True) os.makedirs("output_phc/Sequence 3", exist_ok=True) os.makedirs("output_phc/Sequence 4", exist_ok=True) predict(model_path="models_phc/", input_path="RawData/" "PhC-C2DL-PSC/Sequence 1", out_path="output_phc/Sequence 1", prep_func=preprocess_phc, scale=scale, cpu=cpu) predict(model_path="models_phc/", input_path="RawData/" "PhC-C2DL-PSC/Sequence 2", out_path="output_phc/Sequence 2", prep_func=preprocess_phc, scale=scale, cpu=cpu) predict(model_path="models_phc/", input_path="RawData/" "PhC-C2DL-PSC/Sequence 3", out_path="output_phc/Sequence 3", prep_func=preprocess_phc, scale=scale, cpu=cpu) predict(model_path="models_phc/", input_path="RawData/" "PhC-C2DL-PSC/Sequence 4", out_path="output_phc/Sequence 4", prep_func=preprocess_phc, scale=scale, cpu=cpu)
def predict_fluo(scale=1.0, cpu=True): os.makedirs("output_fluo", exist_ok=True) os.makedirs("output_fluo/Sequence 1", exist_ok=True) os.makedirs("output_fluo/Sequence 2", exist_ok=True) os.makedirs("output_fluo/Sequence 3", exist_ok=True) os.makedirs("output_fluo/Sequence 4", exist_ok=True) predict(model_path="models_fluo/", input_path="RawData/" "Fluo-N2DL-HeLa/Sequence 1", out_path="output_fluo/Sequence 1", prep_func=preprocess_fluo, scale=scale, cpu=cpu) predict(model_path="models_fluo/", input_path="RawData/" "Fluo-N2DL-HeLa/Sequence 2", out_path="output_fluo/Sequence 2", prep_func=preprocess_fluo, scale=scale, cpu=cpu) predict(model_path="models_fluo/", input_path="RawData/" "Fluo-N2DL-HeLa/Sequence 3", out_path="output_fluo/Sequence 3", prep_func=preprocess_fluo, scale=scale, cpu=cpu) predict(model_path="models_fluo/", input_path="RawData/" "Fluo-N2DL-HeLa/Sequence 4", out_path="output_fluo/Sequence 4", prep_func=preprocess_fluo, scale=scale, cpu=cpu)
def predict_dic(scale=1.0, cpu=True): os.makedirs("output_dic", exist_ok=True) os.makedirs("output_dic/Sequence 1", exist_ok=True) os.makedirs("output_dic/Sequence 2", exist_ok=True) os.makedirs("output_dic/Sequence 3", exist_ok=True) os.makedirs("output_dic/Sequence 4", exist_ok=True) predict(model_path="models_dic/", input_path="RawData/" "DIC-C2DH-HeLa/Sequence 1", out_path="output_dic/Sequence 1", prep_func=preprocess_dic, scale=scale, cpu=cpu) predict(model_path="models_dic/", input_path="RawData/" "DIC-C2DH-HeLa/Sequence 2", out_path="output_dic/Sequence 2", prep_func=preprocess_dic, scale=scale, cpu=cpu) predict(model_path="models_dic/", input_path="RawData/" "DIC-C2DH-HeLa/Sequence 3", out_path="output_dic/Sequence 3", prep_func=preprocess_dic, scale=scale, cpu=cpu) predict(model_path="models_dic/", input_path="RawData/" "DIC-C2DH-HeLa/Sequence 4", out_path="output_dic/Sequence 4", prep_func=preprocess_dic, scale=scale, cpu=cpu)
def main(): # First load our data to X and Y [X, Y] = load_file("./data/data.txt") # Visualize data using matplotlib plt.plot_data(X, Y) # Get the number of training examples and features m = len(X) n = 1 # Add in the ones for X intercept X = np.c_[np.ones(m), X] Y = np.array(Y) # Initalize fitting parameters theta = np.zeros(n + 1).transpose() # Set inital values for alpha and num of iterations alpha = 0.01 iterations = 500 # Get inital J cost J = cost_function(X, Y, theta, alpha) print(f"Inital cost function is: {J}") # Record past J costs J_hist = [] # Perform gradient descent for i in range(iterations): J_hist.append(cost_function(X, Y, theta, alpha)) theta = descent(X, Y, theta, alpha) # Predict profits for city with 35000 population prof = predict(35000, theta) print(f'Profit for City with Population of 35,000: ${prof * 10000}') # Draw the linear fit plt.plot_line(X[:, 1], X.dot(theta)) plt.plot_show()
def upload_predict(): if request.method == 'POST': image_file = request.files["image"] if image_file: image_location = os.path.join(UPLOAD_FOLDER, image_file.filename) image_file.save(image_location) img = prepare_images(image_location, 2) ref, degraded, output, scores = predict(img, image_file.filename) output_image = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) output_image = Image.fromarray(output_image) output_location = os.path.join(OUTPUT_FOLDER, image_file.filename) output_image.save(output_location) return render_template('index.html', image_name=image_file.filename, psnr=scores[1][0], mse=scores[1][1], ssim=scores[1][2]) return render_template('index.html', image_name=None, psnr=0, mse=0, ssim=0)
if __name__ == "__main__": from src.predict import predict from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--input', default=None, required=True, type=str) args = parser.parse_args() result = predict(args.input) print(result)
def test_outdrop( checkpoint_path='/content/gdrive/My Drive/NMT/unittests/checkpoints/', config_path='/content/gdrive/My Drive/NMT/configs/', corpus_path='/content/gdrive/My Drive/NMT/unittests/first_ten_sentences/' ): hyperparams = import_configs(config_path=config_path, unittesting=True) # use word-level vocab hyperparams["vocab_type"] = "word" hyperparams["trim_type"] = "top_k" hyperparams["enc_dropout"] = .5 hyperparams["dec_dropout"] = .5 print(f"hidden size: {hyperparams['dec_hidden_size']}") construct_model_data("train.de", "train.en", hyperparams=hyperparams, corpus_path=corpus_path, checkpoint_path=checkpoint_path, overfit=True) # model of sufficient capacity should be able to bring loss down to ~zero. model, loss = train(total_epochs=100, early_stopping=False, checkpoint_path=checkpoint_path, save=False, write=False) assert loss < .01 model_data = retrieve_model_data(checkpoint_path=checkpoint_path) dev_batches = model_data[ "dev_batches"] # holds the training data, bc overfit=True dev_references = model_data[ "references"] # holds the training data, bc overfit=True idx_to_trg_word = model_data["idx_to_trg_word"] # greedy search should be able to perfectly predict the training data. dev_translations, _, _ = predict(model, dev_batches, idx_to_trg_word, checkpoint_path) bleu = evaluate(dev_translations, dev_references) assert bleu >= 100 # beam search should be able to perfectly predict the training data. model.decoder.set_inference_alg("beam_search") dev_translations, _, _ = predict(model, dev_batches, idx_to_trg_word, checkpoint_path) bleu = evaluate(dev_translations, dev_references) assert bleu >= 100 # def test_default_subword_model(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["vocab_type"] = "subword_joint" # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # default word model, except dn divide scores by scaling factor inside attention fn. # def test_attn(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["vocab_type"] = "word" # hyperparams["trim_type"] = "top_k" # hyperparams["attention_fn"] = "dot_product" # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # no weight tying, no additional attention layer # def test_no_tying(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["vocab_type"] = "word" # hyperparams["trim_type"] = "top_k" # hyperparams["attention_layer"] = False # hyperparams["tie_weights"] = False # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # no weight tying and no attention mechanism. # def test_no_attn_no_tying(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["vocab_type"] = "word" # hyperparams["trim_type"] = "top_k" # hyperparams["attention_fn"] = "none" # hyperparams["attention_layer"] = False # hyperparams["tie_weights"] = False # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # default model, except dropout after lstm is turned on. # def test_dropout(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["enc_dropout"] = 0.2 # hyperparams["dec_dropout"] = 0.2 # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # ensure still works on cpu. # # must change runtime type to cpu before performing this test # # def test_default_word_model_cpu(): # # hyperparams = import_configs(config_path=config_path, unittesting=True) # # hyperparams["vocab_type"] = "word" # # hyperparams["trim_type"] = "top_k" # # hyperparams["device"] = "cpu" # # train_batches, dev_batches, test_batches, vocabs, ref_corpuses, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # # corpus_path=corpus_path, overfit=True, write=False # # ) # # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # simplest possible model. # # - unidirectional encoder. # # - no attention mechanism. # def test_uni_no_attn(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["attention_fn"] = "none" # constrain_configs(hyperparams) # ensure passes constraint-check # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # two-layer vanilla network with layer_to_layer decoder_init_scheme # def test_layer_to_layer_uni_no_attn(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["enc_num_layers"] = 2 # hyperparams["dec_num_layers"] = 2 # hyperparams["decoder_init_scheme"] = "layer_to_layer" # hyperparams["attention_fn"] = "none" # hyperparams["bidirectional"] = False # constrain_configs(hyperparams) # ensure passes constraint-check # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # two-layer vanilla network with final_to_first decoder_init_scheme # def test_final_to_first_uni_no_attn(): # hyperparams = import_configs(config_path=config_path, unittesting=True) # hyperparams["enc_num_layers"] = 2 # hyperparams["dec_num_layers"] = 2 # hyperparams["decoder_init_scheme"] = "final_to_first" # hyperparams["attention_fn"] = "none" # hyperparams["bidirectional"] = False # constrain_configs(hyperparams) # ensure passes constraint-check # train_batches, dev_batches, vocabs, hyperparams = construct_model_data("train.de", "train.en", hyperparams=hyperparams, # corpus_path=corpus_path, overfit=True, write=False # ) # predict_train_data(hyperparams, train_batches, dev_batches, ref_corpuses["train.en"], vocabs["idx_to_trg_word"], checkpoint_path) # # associate some epoch number with saved model, so can verify stored correct model. # def test_early_stopping(): # # set random seed # pass
sess = train.start_tf_sess() if is_train: train.train(sess=sess, data=data, labels=labels, learning_rate=.000001, run_name=run_name, steps=5000, batch_size=30, accumulate=0, print_each=25, use_class_entropy=False) else: predictions = predict.predict(sess, data=data, run_name=run_name, batch_size=750, num_categories=len(labels[0]), category_names=cat_names) predict.prediction_accuracy(predictions, labels, True) test_labels = np.load('data/{}_lab.npy'.format('mitTest')) train_labels = np.load('data/{}_lab.npy'.format('mittrain')) def plot_dataset_hist(test_labels, train_labels): x = np.asarray( [np.argmax(test_labels, axis=1), np.argmax(train_labels, axis=1)]).transpose() print(x) plt.hist(x, bins=range(6), histtype='bar', stacked=True,
def click_disease(self): '''Applies the predictor to the specified image''' res = predict(self.imagepath) self.result.configure(text=res) self.root.update_idletasks()
def test_train(config): predict(config)