def test_save(self): imgs = self.database.paths._clean_full.glob('*') ws = [] hs = [] for img in tqdm(list(imgs)): img = cv2.imread(str(img), cv2.IMREAD_ANYDEPTH) ws.append(img.shape[0]) hs.append(img.shape[1]) print(mean(ws)) print(mean(hs)) exit(1) imgs, masks, abnormalities = self.database.patches() img_path = imgs[0] img = img_path.load() img_t = np.array([[img]]) img_t = DataLoader.normalize(img_t) img_t = DataLoader.fix_dimensions(img_t) img_t = numpy_to_tensor(img_t, as_type=torch.FloatTensor, to_gpu=False) img_t = img_t path1 = Path('../tests/model1.pt') path2 = Path('../tests/model2.pt') model1 = get_model(n_classes=len(self.dataset.classes), config=self.config) y1 = model1(img_t) y1_infer = model1.infer(img) print(y1) print(y1_infer) print() model2 = get_model(n_classes=len(self.dataset.classes), config=self.config) torch.save(model1.state_dict(), path1) model2.load_state_dict(torch.load(path1)) y2 = model2(img_t) y2_infer = model2.infer(img) print(y2) print(y2_infer) print() torch.save(model1, path2) model3 = torch.load(path2) y3 = model3(img_t) y3_infer = model3.infer(img) print(y3) print(y3_infer) print() a = 2
period_idx = len(ref) sent = ref[:period_idx + 1] ref = ref[period_idx + 1:] res_ref.append(" ".join(sent)) # Write to file if not os.path.isdir(config.log_save_dir + '/rouge_ref'): os.mkdir(config.log_save_dir + '/rouge_ref') if not os.path.isdir(config.log_save_dir + '/rouge_dec'): os.mkdir(config.log_save_dir + '/rouge_dec') ref_file = os.path.join(config.log_save_dir, "rouge_ref", "%s_ref.txt" % id) dec_file = os.path.join(config.log_save_dir, "rouge_dec", "%s_dec.txt" % id) with open(ref_file, "wb") as f: for idx, sent in enumerate(res_ref): f.write( sent.encode('utf-8')) if idx == len(res_ref) - 1 else f.write( (sent + "\n").encode('utf-8')) with open(dec_file, "wb") as f: for idx, sent in enumerate(res): f.write(sent.encode('utf-8')) if idx == len(res) - 1 else f.write( (sent + "\n").encode('utf-8')) if __name__ == '__main__': model, _, _, _ = get_model(config.train_from, eval=True) data = DataLoader(config) decoder = BeamSearchDecoder(model, data) decoder.decode()
minr, minc, maxr, maxc = region.bbox if maxr - minr > 30 and maxc - minc > 30: yield minr, minc, maxr, maxc image_dir = '/Users/thelacker/PycharmProjects/logos/test_photos/' image_name = 'test2.jpeg' image_path = image_dir + image_name l = LogoFinder() bgr, rgb, gray = l.read_image(image_path) t = time.time() croped_images = l.make_heatmap(image_path) model = get_model() for n, crop in enumerate(croped_images): minr, minc, maxr, maxc = crop crop_img = bgr[minr:maxr, minc:maxc] tmp_name = 'res/{0}-{1}.jpg'.format(image_name[:-4], n) cv2.imwrite(tmp_name, crop_img) img = load_img(tmp_name, False, target_size=(300, 300)) x = img_to_array(img) x = x / 255 x = np.expand_dims(x, axis=0) preds = model.predict_classes(x) prob = model.predict_proba(x) if prob[0][0] < 0.8: print(tmp_name, prob) cv2.imwrite('result/{0}-{1}.jpg'.format(image_name[:-4], n), crop_img)
import sys import os sys.path.append(os.getcwd()) from main import get_model parser, model = get_model('20190717_1039_29_397_745000', 'Solver') parser.config.batch_size = 32 train, dev, test = parser.build_iters() for i in test: break model.init_beam_decoder(num_processes=16, beam_width=32) model_output = model.beam_decode(i)
:param out_value: :return: output_str """ out_best = list(np.argmax(out_value[0, 2:], axis=1)) out_best = [k for k, g in itertools.groupby(out_best)] output_str = '' for i in out_best: if i < len(letters): output_str += letters[i] return output_str test_dir = "../data/test/" test_imgs = os.listdir(test_dir) model = get_model(training=False) model.load_weights("../model/model_5_itr--20.hd5") # load test labels text_data = pd.read_csv("../data/test_label.csv", header=None) def predict(img): total = 0 acc = 0 letter_total = 0 letter_acc = 0 start = time.time() for j, img in enumerate(sorted(test_imgs, key=lambda s: int(s[:-4]))):
import main as mn import traceback as tb """ Basic test cases are written customly """ if __name__ == '__main__' : try: k = 4 n = 5 # for get first n resp emobj = mn.get_model(k) if emobj is None : raise Exception('no pickle object found') print(emobj.em_parameters) print('cluster name') print(emobj.cluster_name) print('image supported extension') print(emobj.IMAGE_EXT_SUPPORTED) print('get forst n responsibility') print(emobj.get_first_n_data_responsibility(n, to_json=True)) print('first n heteroginity') print(emobj.get_first_n_heterogeneity(n, seed=mn.CONSTANT.SEED)) print('change k') emobj2 = mn.get_model(3) print(emobj2.get_em_params) except Exception as e: print(tb.format_tb(''.join(tb.format_tb(e.__traceback__)))) mn.LOGGER.LOG(e)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--batch-size', type=int, default=32) arg('--workers', type=int, default=4) arg('--arch', type=str, default='seresnext50') arg('--amp', type=str, default='') arg('--size', type=int, default=192) arg('--debug', action='store_true') arg('--model-path', type=str, default='') args = parser.parse_args() train_dir = DATA_ROOT / 'train' valid_dir = DATA_ROOT / 'val' use_cuda = cuda.is_available() model = get_model(args.arch) model.load_state_dict(torch.load(args.model_path, map_location="cpu")) if use_cuda: model = model.cuda() if args.amp: if not APEX_AVAILABLE: raise ValueError("Apex is not installed!") model = amp.initialize(model, opt_level=args.amp) # The first line is to make sure we have the same class_map as in training _, class_map = build_dataframe_from_folder(train_dir) df_valid = build_dataframe_from_folder(valid_dir, class_map) idx_to_name = get_class_idx_to_class_name_mapping(class_map) # Export the mapping for later use with open(CACHE_DIR / "id_to_name_map.json", "w") as fout: json.dump(idx_to_name, fout) test_transform = get_test_transform(int(args.size * 1.25), args.size) valid_loader = make_loader(args, TrainDataset, df_valid, test_transform, shuffle=False) print(f'{len(valid_loader.dataset):,} in valid') bot = ImageClassificationBot(model=model, train_loader=None, valid_loader=None, clip_grad=0, optimizer=None, echo=True, criterion=None, callbacks=[], pbar=True, use_tensorboard=False, use_amp=(args.amp != '')) logits, truths = bot.predict(valid_loader, return_y=True) probs = torch.softmax(logits, dim=-1) preds = torch.argmax(probs, dim=1) print( f"Validation accuracy: {np.mean(preds.numpy() == truths.numpy()) * 100:.2f}%" ) df_out = pd.DataFrame({ "truth": truths.numpy(), "max_prob": np.max(probs.numpy(), axis=1), "truth_prob": torch.gather(probs, 1, truths[:, None]).numpy()[:, 0], "pred": preds, "path": [ valid_loader.dataset._df.iloc[i].image_path for i in range(len(valid_loader.dataset)) ] }) df_out.to_csv(CACHE_DIR / "valid_preds.csv", index=False)
def doEverything(): model = sefariaWord2Vec.get_model("word2vec.bin") with codecs.open("idf.json", 'rb', encoding='utf8') as fin: idf = json.load(fin) topics = TopicsManager() SCORE_QUOTE = 0.2 SCORE_WORD = 1 SCORE_SHEET = 2 my_topics = {} topic_list = topics.list() for itag, tag_dict in enumerate(topic_list): tag = tag_dict["tag"] print u"TAG {} {}/{}".format(tag, itag, len(topic_list)) t = topics.get(tag) core_segs = t.contents()['sources'] source_sheet_count_dict = { ref: count for ref, count in core_segs } keywords = {} seg_sheet_count = {} term = Term().load({'name': tag}) if getattr(term, 'titles', False): hetitles = filter(lambda x: x['lang'] == 'he', term.titles) hetitleVecs = [model[title['text']] for title in hetitles if title['text'] in model] if len(hetitleVecs) == 0: print u"No titles in model for {}".format(tag) continue else: print u"No term for {}".format(tag) continue potential_keywords = {} for seg, count in core_segs: r = Ref(seg) tc = TextChunk(r, 'he') text = flatten(tc.text) try: words = tokenizer(text) except TypeError as e: continue term_freqs = defaultdict(int) for w in words: term_freqs[w] += 1 cosDists = [min([spatial.distance.cosine(model[w],titleVec) for titleVec in hetitleVecs]) for w in words if w in model] tfidf_list = [tfidf(term_freqs[w], idf[w], len(words)) for w in words] for w, d, tf in zip(words, cosDists, tfidf_list): if w not in potential_keywords: potential_keywords[w] = {"cosDist": d, "count": 1, "tfidf": tf} else: potential_keywords[w]["count"] += 1 for w, v in potential_keywords.items(): v["score"] = (v["cosDist"]**v["count"])*(-v["tfidf"]) potential_keywords = filter(lambda x: x[1]["score"] < 3.5 and len(x[0]) > 2, potential_keywords.items()) potential_kw_dict = { x[0]: x[1]["score"] for x in potential_keywords } segs_to_search = set() for seg, count in core_segs: r = Ref(seg) for l in r.linkset(): segs_to_search.add(l.refs[0]) segs_to_search.add(l.refs[1]) segs_to_search_dicts = {} for seg in segs_to_search: temp_seg_dict = {"score": 0.0} try: r = Ref(seg) tc = TextChunk(r, "he") words = tokenizer(flatten(tc.text)) matched_words = set() for w in words: temp_word_score = potential_kw_dict.get(w, 0.0) * SCORE_WORD if 0 < temp_word_score < 0.5: matched_words.add(w) temp_seg_dict["score"] -= temp_word_score temp_seg_dict["score"] = temp_seg_dict["score"] / len(words) if temp_seg_dict["score"] != 0.0 else 0.0 # normalize word scores temp_seg_dict["score"] += source_sheet_count_dict.get(seg, 0.0) * SCORE_SHEET temp_seg_dict["base_score"] = temp_seg_dict["score"] temp_seg_dict["category"] = r.primary_category temp_seg_dict["matched_words"] = list(matched_words) temp_seg_dict["heRef"] = r.he_normal() try: tp = r.index.best_time_period() if not tp is None: comp_start_date = int(tp.start) else: comp_start_date = 3000 # far in the future except UnicodeEncodeError as e: comp_start_date = 3000 temp_seg_dict["timeperiod"] = comp_start_date segs_to_search_dicts[seg] = temp_seg_dict except PartialRefInputError as e: continue except NoVersionFoundError as e: continue except TypeError as e: continue except InputError as e: continue for seg, temp_seg_dict in segs_to_search_dicts.items(): try: r = Ref(seg) links = reduce(lambda a, b: a | set(b.refs), r.linkset(), set()) try: links.remove(seg) # no self-links except KeyError as e: pass for l in links: temp_seg_dict["score"] += segs_to_search_dicts.get(l, {}).get("base_score", 0.0) * SCORE_QUOTE except PartialRefInputError as e: pass segs_to_search_dict_items = filter(lambda x: x[1]["score"] > 2, segs_to_search_dicts.items()) segs_to_search_dict_items.sort(key=lambda x: -x[1]["score"]) segs_to_search_dict_items = segs_to_search_dict_items[:20] my_topics[hetitles[0]['text']] = [ {"ref": temp_seg_dict["heRef"], "score": temp_seg_dict["score"], "timeperiod": temp_seg_dict["timeperiod"], "category": temp_seg_dict["category"], "matched": temp_seg_dict["matched_words"]} for ref, temp_seg_dict in segs_to_search_dict_items ] with codecs.open("my_topics.json", 'wb', encoding='utf8') as fout: json.dump(my_topics, fout, ensure_ascii=False, indent=2, encoding='utf8')
import sys import os sys.path.append(os.getcwd()) from flask import Flask, request from main import get_model import torch as t import json from flask_cors import CORS, cross_origin print("Loading model...") parser, model = get_model('20190724_1431_49_360_675000', 'Solver') model.eval() model.init_beam_decoder() print("Model loaded") app = Flask(__name__) @app.route("/recognize", methods=["POST"]) @cross_origin(origin='http://172.18.34.25', headers=['Content-Type']) def recognize(): f = request.files["file"] print(f) f.save("test.wav") with t.no_grad(): feature, length = parser.parser_wav_inference('test.wav') output = model.beam_decode_feature(feature.float().cuda(), length.cuda())
from main import get_model import torch as t from ctcdecode import CTCBeamDecoder parser, model = get_model('20190717_1039_29_411_770000', 'WSolver') parser.config.batch_size = 32 train, dev, test = parser.build_iters() from tqdm import tqdm from src.models.utils.score import calculate_cer_ctc model.cpu() model.eval() print('-') model.init_beam_decoder(num_processes=16, beam_width=32) import numpy as np scores = [] for i in tqdm(test): tgt = i['tgt'] tgt = [model.vocab.convert_id2str(i) for i in tgt] model_output = model.beam_decode(i) ss = [calculate_cer_ctc(i[0], i[1]) for i in zip(model_output, tgt)] score = np.mean(ss) print(score) scores.append(score) print('current mean', np.mean(scores)) import torch as t t.save(scores, 'test32.t') print(np.mean(scores))