Exemple #1
0
    def test_save(self):
        imgs = self.database.paths._clean_full.glob('*')
        ws = []
        hs = []
        for img in tqdm(list(imgs)):
            img = cv2.imread(str(img), cv2.IMREAD_ANYDEPTH)
            ws.append(img.shape[0])
            hs.append(img.shape[1])
        print(mean(ws))
        print(mean(hs))
        exit(1)

        imgs, masks, abnormalities = self.database.patches()
        img_path = imgs[0]
        img = img_path.load()

        img_t = np.array([[img]])
        img_t = DataLoader.normalize(img_t)
        img_t = DataLoader.fix_dimensions(img_t)
        img_t = numpy_to_tensor(img_t, as_type=torch.FloatTensor, to_gpu=False)
        img_t = img_t

        path1 = Path('../tests/model1.pt')
        path2 = Path('../tests/model2.pt')
        model1 = get_model(n_classes=len(self.dataset.classes),
                           config=self.config)
        y1 = model1(img_t)
        y1_infer = model1.infer(img)
        print(y1)
        print(y1_infer)
        print()

        model2 = get_model(n_classes=len(self.dataset.classes),
                           config=self.config)
        torch.save(model1.state_dict(), path1)
        model2.load_state_dict(torch.load(path1))
        y2 = model2(img_t)
        y2_infer = model2.infer(img)
        print(y2)
        print(y2_infer)
        print()

        torch.save(model1, path2)
        model3 = torch.load(path2)
        y3 = model3(img_t)
        y3_infer = model3.infer(img)
        print(y3)
        print(y3_infer)
        print()

        a = 2
            period_idx = len(ref)
        sent = ref[:period_idx + 1]
        ref = ref[period_idx + 1:]
        res_ref.append(" ".join(sent))

    # Write to file
    if not os.path.isdir(config.log_save_dir + '/rouge_ref'):
        os.mkdir(config.log_save_dir + '/rouge_ref')
    if not os.path.isdir(config.log_save_dir + '/rouge_dec'):
        os.mkdir(config.log_save_dir + '/rouge_dec')
    ref_file = os.path.join(config.log_save_dir, "rouge_ref",
                            "%s_ref.txt" % id)
    dec_file = os.path.join(config.log_save_dir, "rouge_dec",
                            "%s_dec.txt" % id)

    with open(ref_file, "wb") as f:
        for idx, sent in enumerate(res_ref):
            f.write(
                sent.encode('utf-8')) if idx == len(res_ref) - 1 else f.write(
                    (sent + "\n").encode('utf-8'))
    with open(dec_file, "wb") as f:
        for idx, sent in enumerate(res):
            f.write(sent.encode('utf-8')) if idx == len(res) - 1 else f.write(
                (sent + "\n").encode('utf-8'))


if __name__ == '__main__':
    model, _, _, _ = get_model(config.train_from, eval=True)
    data = DataLoader(config)
    decoder = BeamSearchDecoder(model, data)
    decoder.decode()
Exemple #3
0
            minr, minc, maxr, maxc = region.bbox
            if maxr - minr > 30 and maxc - minc > 30:
                yield minr, minc, maxr, maxc


image_dir = '/Users/thelacker/PycharmProjects/logos/test_photos/'
image_name = 'test2.jpeg'
image_path = image_dir + image_name

l = LogoFinder()
bgr, rgb, gray = l.read_image(image_path)

t = time.time()
croped_images = l.make_heatmap(image_path)

model = get_model()

for n, crop in enumerate(croped_images):
    minr, minc, maxr, maxc = crop
    crop_img = bgr[minr:maxr, minc:maxc]
    tmp_name = 'res/{0}-{1}.jpg'.format(image_name[:-4], n)
    cv2.imwrite(tmp_name, crop_img)
    img = load_img(tmp_name, False, target_size=(300, 300))
    x = img_to_array(img)
    x = x / 255
    x = np.expand_dims(x, axis=0)
    preds = model.predict_classes(x)
    prob = model.predict_proba(x)
    if prob[0][0] < 0.8:
        print(tmp_name, prob)
        cv2.imwrite('result/{0}-{1}.jpg'.format(image_name[:-4], n), crop_img)
import sys
import os
sys.path.append(os.getcwd())
from main import get_model

parser, model = get_model('20190717_1039_29_397_745000', 'Solver')
parser.config.batch_size = 32
train, dev, test = parser.build_iters()
for i in test:
    break
model.init_beam_decoder(num_processes=16, beam_width=32)
model_output = model.beam_decode(i)
    :param out_value:
    :return: output_str
    """
    out_best = list(np.argmax(out_value[0, 2:], axis=1))
    out_best = [k for k, g in itertools.groupby(out_best)]
    output_str = ''
    for i in out_best:
        if i < len(letters):
            output_str += letters[i]
    return output_str


test_dir = "../data/test/"
test_imgs = os.listdir(test_dir)

model = get_model(training=False)
model.load_weights("../model/model_5_itr--20.hd5")

# load test labels
text_data = pd.read_csv("../data/test_label.csv", header=None)


def predict(img):

    total = 0
    acc = 0
    letter_total = 0
    letter_acc = 0
    start = time.time()

    for j, img in enumerate(sorted(test_imgs, key=lambda s: int(s[:-4]))):
Exemple #6
0
import main as mn
import traceback as tb
"""
	Basic test cases are written customly
"""

if __name__ == '__main__' :
	try:
		k = 4
		n = 5 # for get first n resp
		emobj = mn.get_model(k)
		if emobj is None :
			raise Exception('no pickle object found')
		print(emobj.em_parameters)
		print('cluster name')
		print(emobj.cluster_name)
		print('image supported extension')
		print(emobj.IMAGE_EXT_SUPPORTED)
		print('get forst n responsibility')
		print(emobj.get_first_n_data_responsibility(n, to_json=True))
		print('first n heteroginity')
		print(emobj.get_first_n_heterogeneity(n, seed=mn.CONSTANT.SEED))
		print('change k')
		emobj2 = mn.get_model(3)
		print(emobj2.get_em_params)
	except Exception as e:
		print(tb.format_tb(''.join(tb.format_tb(e.__traceback__))))
		mn.LOGGER.LOG(e)
    
Exemple #7
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--batch-size', type=int, default=32)
    arg('--workers', type=int, default=4)
    arg('--arch', type=str, default='seresnext50')
    arg('--amp', type=str, default='')
    arg('--size', type=int, default=192)
    arg('--debug', action='store_true')
    arg('--model-path', type=str, default='')
    args = parser.parse_args()

    train_dir = DATA_ROOT / 'train'
    valid_dir = DATA_ROOT / 'val'

    use_cuda = cuda.is_available()
    model = get_model(args.arch)
    model.load_state_dict(torch.load(args.model_path, map_location="cpu"))
    if use_cuda:
        model = model.cuda()
    if args.amp:
        if not APEX_AVAILABLE:
            raise ValueError("Apex is not installed!")
        model = amp.initialize(model, opt_level=args.amp)

    # The first line is to make sure we have the same class_map as in training
    _, class_map = build_dataframe_from_folder(train_dir)
    df_valid = build_dataframe_from_folder(valid_dir, class_map)
    idx_to_name = get_class_idx_to_class_name_mapping(class_map)
    # Export the mapping for later use
    with open(CACHE_DIR / "id_to_name_map.json", "w") as fout:
        json.dump(idx_to_name, fout)

    test_transform = get_test_transform(int(args.size * 1.25), args.size)

    valid_loader = make_loader(args,
                               TrainDataset,
                               df_valid,
                               test_transform,
                               shuffle=False)

    print(f'{len(valid_loader.dataset):,} in valid')

    bot = ImageClassificationBot(model=model,
                                 train_loader=None,
                                 valid_loader=None,
                                 clip_grad=0,
                                 optimizer=None,
                                 echo=True,
                                 criterion=None,
                                 callbacks=[],
                                 pbar=True,
                                 use_tensorboard=False,
                                 use_amp=(args.amp != ''))
    logits, truths = bot.predict(valid_loader, return_y=True)
    probs = torch.softmax(logits, dim=-1)
    preds = torch.argmax(probs, dim=1)
    print(
        f"Validation accuracy: {np.mean(preds.numpy() == truths.numpy()) * 100:.2f}%"
    )
    df_out = pd.DataFrame({
        "truth":
        truths.numpy(),
        "max_prob":
        np.max(probs.numpy(), axis=1),
        "truth_prob":
        torch.gather(probs, 1, truths[:, None]).numpy()[:, 0],
        "pred":
        preds,
        "path": [
            valid_loader.dataset._df.iloc[i].image_path
            for i in range(len(valid_loader.dataset))
        ]
    })
    df_out.to_csv(CACHE_DIR / "valid_preds.csv", index=False)
def doEverything():
    model = sefariaWord2Vec.get_model("word2vec.bin")

    with codecs.open("idf.json", 'rb', encoding='utf8') as fin:
        idf = json.load(fin)

    topics = TopicsManager()
    SCORE_QUOTE = 0.2
    SCORE_WORD = 1
    SCORE_SHEET = 2

    my_topics = {}

    topic_list = topics.list()
    for itag, tag_dict in enumerate(topic_list):
        tag = tag_dict["tag"]
        print u"TAG {} {}/{}".format(tag, itag, len(topic_list))

        t = topics.get(tag)
        core_segs = t.contents()['sources']
        source_sheet_count_dict = {
            ref: count for ref, count in core_segs
        }
        keywords = {}
        seg_sheet_count = {}
        term = Term().load({'name': tag})
        if getattr(term, 'titles', False):
            hetitles = filter(lambda x: x['lang'] == 'he', term.titles)
            hetitleVecs = [model[title['text']] for title in hetitles if title['text'] in model]
            if len(hetitleVecs) == 0:
                print u"No titles in model for {}".format(tag)
                continue
        else:
            print u"No term for {}".format(tag)
            continue

        potential_keywords = {}
        for seg, count in core_segs:
            r = Ref(seg)
            tc = TextChunk(r, 'he')
            text = flatten(tc.text)
            try:
                words = tokenizer(text)
            except TypeError as e:
                continue
            term_freqs = defaultdict(int)
            for w in words:
                term_freqs[w] += 1
            cosDists = [min([spatial.distance.cosine(model[w],titleVec) for titleVec in hetitleVecs])                 for w in words if w in model]
            tfidf_list = [tfidf(term_freqs[w], idf[w], len(words)) for w in words]
            for w, d, tf in zip(words, cosDists, tfidf_list):
                if w not in potential_keywords:
                    potential_keywords[w] = {"cosDist": d, "count": 1, "tfidf": tf}
                else:
                    potential_keywords[w]["count"] += 1

        for w, v in potential_keywords.items():
            v["score"] = (v["cosDist"]**v["count"])*(-v["tfidf"])

        potential_keywords = filter(lambda x: x[1]["score"] < 3.5 and len(x[0]) > 2, potential_keywords.items())
        potential_kw_dict = {
            x[0]: x[1]["score"] for x in potential_keywords
        }

        segs_to_search = set()
        for seg, count in core_segs:
            r = Ref(seg)
            for l in r.linkset():
                segs_to_search.add(l.refs[0])
                segs_to_search.add(l.refs[1])

        segs_to_search_dicts = {}
        for seg in segs_to_search:
            temp_seg_dict = {"score": 0.0}
            try:
                r = Ref(seg)
                tc = TextChunk(r, "he")
                words = tokenizer(flatten(tc.text))
                matched_words = set()
                for w in words:
                    temp_word_score = potential_kw_dict.get(w, 0.0) * SCORE_WORD
                    if 0 < temp_word_score < 0.5:
                        matched_words.add(w)
                    temp_seg_dict["score"] -= temp_word_score
                temp_seg_dict["score"] = temp_seg_dict["score"] / len(words) if temp_seg_dict["score"] != 0.0 else 0.0  # normalize word scores
                temp_seg_dict["score"] += source_sheet_count_dict.get(seg, 0.0) * SCORE_SHEET
                temp_seg_dict["base_score"] = temp_seg_dict["score"]
                temp_seg_dict["category"] = r.primary_category
                temp_seg_dict["matched_words"] = list(matched_words)
                temp_seg_dict["heRef"] = r.he_normal()
                try:
                    tp = r.index.best_time_period()
                    if not tp is None:
                        comp_start_date = int(tp.start)
                    else:
                        comp_start_date = 3000  # far in the future
                except UnicodeEncodeError as e:
                    comp_start_date = 3000
                temp_seg_dict["timeperiod"] = comp_start_date
                segs_to_search_dicts[seg] = temp_seg_dict
            except PartialRefInputError as e:
                continue
            except NoVersionFoundError as e:
                continue
            except TypeError as e:
                continue
            except InputError as e:
                continue
        for seg, temp_seg_dict in segs_to_search_dicts.items():
            try:
                r = Ref(seg)
                links = reduce(lambda a, b: a | set(b.refs), r.linkset(), set())
                try:
                    links.remove(seg)  # no self-links
                except KeyError as e:
                    pass
                for l in links:
                    temp_seg_dict["score"] += segs_to_search_dicts.get(l, {}).get("base_score", 0.0) * SCORE_QUOTE
            except PartialRefInputError as e:
                pass

        segs_to_search_dict_items = filter(lambda x: x[1]["score"] > 2, segs_to_search_dicts.items())
        segs_to_search_dict_items.sort(key=lambda x: -x[1]["score"])
        segs_to_search_dict_items = segs_to_search_dict_items[:20]
        my_topics[hetitles[0]['text']] = [
            {"ref": temp_seg_dict["heRef"], "score": temp_seg_dict["score"], "timeperiod": temp_seg_dict["timeperiod"], "category": temp_seg_dict["category"], "matched": temp_seg_dict["matched_words"]} for ref, temp_seg_dict  in segs_to_search_dict_items
        ]

    with codecs.open("my_topics.json", 'wb', encoding='utf8') as fout:
        json.dump(my_topics, fout, ensure_ascii=False, indent=2, encoding='utf8')
Exemple #9
0
import sys
import os
sys.path.append(os.getcwd())

from flask import Flask, request
from main import get_model
import torch as t
import json
from flask_cors import CORS, cross_origin

print("Loading model...")

parser, model = get_model('20190724_1431_49_360_675000', 'Solver')
model.eval()
model.init_beam_decoder()
print("Model loaded")

app = Flask(__name__)


@app.route("/recognize", methods=["POST"])
@cross_origin(origin='http://172.18.34.25', headers=['Content-Type'])
def recognize():

    f = request.files["file"]
    print(f)
    f.save("test.wav")
    with t.no_grad():
        feature, length = parser.parser_wav_inference('test.wav')
        output = model.beam_decode_feature(feature.float().cuda(),
                                           length.cuda())
from main import get_model
import torch as t
from ctcdecode import CTCBeamDecoder

parser, model = get_model('20190717_1039_29_411_770000', 'WSolver')
parser.config.batch_size = 32
train, dev, test = parser.build_iters()
from tqdm import tqdm
from src.models.utils.score import calculate_cer_ctc

model.cpu()
model.eval()
print('-')
model.init_beam_decoder(num_processes=16, beam_width=32)
import numpy as np
scores = []
for i in tqdm(test):
    tgt = i['tgt']
    tgt = [model.vocab.convert_id2str(i) for i in tgt]
    model_output = model.beam_decode(i)
    ss = [calculate_cer_ctc(i[0], i[1]) for i in zip(model_output, tgt)]
    score = np.mean(ss)
    print(score)
    scores.append(score)
    print('current mean', np.mean(scores))
import torch as t
t.save(scores, 'test32.t')
print(np.mean(scores))