Beispiel #1
0
def home():
    if request.method == 'GET':
        return render_template('home.html')

    if request.method == 'POST':
        image_file = check_image_file(request)
        if image_file:
            try:
                filename = save_image(image_file)
                passed = True
            except Exception:
                passed = False

            if passed:
                img_url = url_for('images', filename=filename)
                args.img_path = os.path.join(app.config['UPLOAD_FOLDER'],
                                             filename)
                result = evaluate(model, args)

                _format = request.args.get('format')
                if _format == 'json':
                    return jsonify(str(result))
                else:
                    return render_template('predict.html',
                                           result=result,
                                           img_url=img_url)

            else:
                return redirect(url_for('error'))
def plot_confusion_matrix():
    # Keep track of correct guesses in a confusion matrix
    confusion = torch.zeros(n_categories, n_categories)
    n_confusion = 10000

    # go through a bunch of examples and record which are correctly guessed
    for i in range(n_confusion):
        category, line, category_tensor, line_tensor = randomTrainingPair()
        output = evaluate(line_tensor)
        guess, guess_i = categoryFromOutput(output)
        category_i = all_categories.index(category)
        confusion[category_i][guess_i] += 1

    # normalize by dividing every row by its sum
    for i in range(n_categories):
        confusion[i] = confusion[i] / confusion[i].sum()

    # setup plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(confusion.numpy())
    fig.colorbar(cax)

    # set up axes
    # Set up axes
    ax.set_xticklabels([''] + all_categories, rotation=90)
    ax.set_yticklabels([''] + all_categories)

    # Force label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    # sphinx_gallery_thumbnail_number = 2
    plt.show()
Beispiel #3
0
def predict():

    filename = request.form.get('fId')
    df = pd.read_csv(os.path.join(app.config['UPLOAD_FOLDER'], filename))
    res = evaluate(df)

    return render_template(
        'table.html',
        tables=[
            df.to_html(classes="table table-striped table-bordered",
                       max_rows=30,
                       index=False,
                       header=True,
                       border=False)
        ],
        prediction='Sato prediction: ' + str(res),
        showButton=False)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        '--format',
                        type=str,
                        choices=('json', 'csv'),
                        default='json')
    args = parser.parse_args()

    if args.format == 'json':
        df = load_from_json()
    elif args.format == 'csv':
        df = pd.read_csv(sys.stdin, nrows=100)

    sample_columns = sample(df, 6, 5)
    #sample_columns = sliding_window(df.columns.tolist(), 6)

    sys.stderr.write('Loading model...\n')
    import predict

    sys.stderr.write(f'Extracting features from {len(df.columns)}..\n')
    feature_dict, sherlock_features = predict.extract(df)

    sys.stderr.write(f'Running {len(sample_columns)} predictions...\n')
    predictions = []
    for cols in tqdm.tqdm(sample_columns):
        predictions.append(
            predict.evaluate(df, list(cols), feature_dict, sherlock_features))

    col_predictions = collections.defaultdict(lambda: [])
    for i, cols in enumerate(sample_columns):
        for j, col in enumerate(cols):
            col_predictions[col].append(predictions[i][j])

    for c, p in col_predictions.items():
        print(c, collections.Counter(p).most_common(1)[0][0])
Beispiel #5
0
def train_without_classifier():
    batch_per_epoch = train_loader.batch_num
    print_every = 3
    save_every = 3
    patience = 10
    early_stopping = None
    best_f1_batch = 0
    best_f1 = 0

    start_time = time.time()
    for epoch in range(1, args.epochs + 1):
        # if early_stopping:
        #     logging.info("-----------------------------")
        #     logging.info("early stopping at epoch %d" % epoch)
        #     logging.info("best epoch %d" % best_f1_batch)
        #     break
        pr_train_loss = 0
        for batch_idx, batch in enumerate(train_loader.next_batch()):
            # seq:      (batch_size, seq_len)
            # seq_len:  (batch,)
            # sub:      (batch,)
            # rel:      (batch, rel_len)
            # rel_len:  (batch,)
            # crel:     (batch, crel_len)
            # crel_label:   (batch, crel_len)
            # crel_len:     (batch,)
            seq, seq_len, sub, rel, rel_len, crel, crel_label, crel_len = batch
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            loss = train_epoch(seq, seq_len, sub, rel, rel_len, crel,
                               crel_label, crel_len, encoder, decoder)
            loss.backward()
            pr_train_loss += loss.data.item()
            encoder_optimizer.step()
            decoder_optimizer.step()

        if epoch % print_every == 0:
            BiLSTM = True if args.atten_mode == "BiLSTM" else False
            acc, recall, precision, f1 = evaluate(
                valid_loader,
                encoder,
                decoder,
                device,
                start=rel_vocab.lookup("<_start>"),
                biLSTM=BiLSTM)
            logging.info("-----------------------------")
            logging.info("epoch %d" % epoch)
            logging.info("valid accuracy : %f" % acc)
            logging.info("valid recall : %f" % recall)
            logging.info("valid precision : %f" % precision)
            logging.info("valid f1 : %f" % f1)
            logging.info("train loss : %f" % pr_train_loss)

            if f1 > best_f1:
                best_f1 = f1
                best_f1_batch = epoch
                torch.save(encoder,
                           args.save_path + "/best_encoder.%s.pth" % name)
                torch.save(decoder,
                           args.save_path + "/best_decoder.%s.pth" % name)
            elif f1 < best_f1 and epoch - best_f1_batch >= patience:
                early_stopping = True

        if epoch % save_every == 0:
            torch.save(encoder, args.save_path + "/encoder.%s.pth" % name)
            torch.save(decoder, args.save_path + "/decoder.%s.pth" % name)
    return best_f1
Beispiel #6
0
from config import config
import tensorflow as tf
import predict
from PIL import Image
from tkinter import *
from tkinter import filedialog

root = Tk()
root.filename = filedialog.askopenfilename(initialdir="E:/Images",
                                           title="이미지 파일을 선택하세요!")
print(root.filename)
root.withdraw()

image_path = root.filename
result, attention_plot = predict.evaluate(image_path)
print('Prediction Caption:', ' '.join(result))
predict.plot_attention(image_path, result, attention_plot)
Beispiel #7
0
def take():
    print("Take starting")

    category, dataset = c.department_class()

    dataset = preprocess.data_clean(dataset)

    dfwater, dfpwd, dfksrtc, dfkseb, dfenv = dataframes.dataframing(dataset)

    water_lemm, pwd_lemm, ksrtc_lemm, kseb_lemm, env_lemm = tokenise.tokenisation(
        dfwater, dfpwd, dfksrtc, dfkseb, dfenv)

    water_freq, pwd_freq, ksrtc_freq, kseb_freq, env_freq = frequency.word_frequency(
        water_lemm, pwd_lemm, ksrtc_lemm, kseb_lemm, env_lemm)

    water_lis, pwd_lis, ksrtc_lis, kseb_lis, env_lis = topwords.most_repeated_keywords(
        dfwater, dfpwd, dfksrtc, dfkseb, dfenv, water_freq, pwd_freq,
        ksrtc_freq, kseb_freq, env_freq, "manual")

    #subject  =  request.form['subject']
    subject = request.args.get('subject')
    mess = request.args.get('message')
    #message =  request.form['message']

    message = subject + " " + mess

    keywords, item = testdata.test(message)

    water_flag, pwd_flag, kseb_flag, ksrtc_flag, env_flag, water_dept, pwd_dept, kseb_dept, ksrtc_dept, env_dept, flag_env, flag_kseb, flag_ksrtc, flag_pwd, flag_water = predict.evaluate(
        keywords, item, water_lis, env_lis, pwd_lis, ksrtc_lis, kseb_lis,
        category, nlp)

    name = water_dept + pwd_dept + kseb_dept + ksrtc_dept + env_dept
    name = [
        'Water Authority', 'PWD', 'KSEB', 'KSRTC',
        'Environment and climate change'
    ]
    flags = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    flags[0] = water_flag
    flags[1] = pwd_flag
    flags[2] = kseb_flag
    flags[3] = ksrtc_flag
    flags[4] = env_flag
    flags[5] = flag_env
    flags[6] = flag_kseb
    flags[7] = flag_ksrtc
    flags[8] = flag_pwd
    flags[9] = flag_water

    #Prediction list
    predicted_class = []
    if flags[0] == 1:
        predicted_class.append(name[0])
    if flags[1] == 1:
        predicted_class.append(name[1])
    if flags[2] == 1:
        predicted_class.append(name[2])
    if flags[3] == 1:
        predicted_class.append(name[3])
    if flags[4] == 1:
        predicted_class.append(name[4])
    if flags[5] == 1:
        predicted_class.append(name[4])
    if flags[6] == 1:
        predicted_class.append(name[2])
    if flags[7] == 1:
        predicted_class.append(name[3])
    if flags[8] == 1:
        predicted_class.append(name[1])
    if flags[9] == 1:
        predicted_class.append(name[0])

    print("Predicted class")
    print(predicted_class)
    for i in predicted_class:
        flash(i)

    print("Working >>>")

    #adding into database
    #taking aadhaar from database
    flag = 0
    predicted_class_length = len(predicted_class)
    print("Predicted class length", predicted_class_length)
    for i in range(0, predicted_class_length):
        id = session['id']
        new_complaint = Complaints(subject=subject,
                                   content=mess,
                                   department=predicted_class[i],
                                   status="Submitted",
                                   user_id=id)
        db.session.add(new_complaint)
        flag = flag + 1
        db.session.commit()

        print('New Complaint submitted ')
    #all_data = Complaints.query.all()
    print("Flag Complaints length", flag)
    obj = db.session.query(Complaints).order_by(
        Complaints.comp_id.desc()).first()
    last_subject = obj.subject
    last_complaint = obj.content
    last_id = obj.comp_id

    notification = Notifications(comp_id=last_id,
                                 subject=last_subject,
                                 complaint=last_complaint)
    db.session.add(notification)
    db.session.commit()
    print("new notification added")

    #return render_template('Success.html',name =name,flags =flags)
    if subject and message:
        return render_template(
            'Success.html',
            name=name,
            flags=flags,
        )
    else:
        return redirect(url_for('log'))
Beispiel #8
0
def train(args):
    if args.max_seq_length <= 0:
        args.max_seq_length = np.inf
    # load training data
    training_set = GroundedScanDataset(
        args.data_path,
        args.data_directory + args.split + '/',
        split="train",
        target_vocabulary_file=args.target_vocabulary_file,
        k=args.k,
        max_seq_length=args.max_seq_length)
    training_set.read_dataset(
        max_examples=None,  # use all dataset
        simple_situation_representation=args.simple_situation_representation)
    training_set.shuffle_data()
    # load validation data
    validation_set = GroundedScanDataset(
        args.data_path,
        args.data_directory + args.split + '/',
        split="dev",
        target_vocabulary_file=args.target_vocabulary_file,
        k=args.k,
        max_seq_length=args.max_seq_length)
    validation_set.read_dataset(
        max_examples=None,  # use all dataset
        simple_situation_representation=args.simple_situation_representation)
    validation_set.shuffle_data()
    parser = None
    if args.parse_type == 'default':
        grammar = Grammar()
        word2narg = WORD2NARG
    else:
        if args.parse_type == 'constituency':
            parser = ConstituencyParser()
        elif args.parse_type == 'dependency':
            parser = StanfordDependencyParser()
        word2narg = parser.word2narg
    if args.compare_attention:
        compare_list = COMPARE_LIST
    else:
        compare_list = None
    data_iter = training_set.get_data_iterator(
        batch_size=args.training_batch_size)
    input_text_batch, _, situation_batch, situation_representation_batch, \
            target_batch, target_lengths, agent_positions, target_positions = next(data_iter)
    example_feature = situation_batch[0][0]  # first seq, first observation
    model = SentenceNetwork(words=word2narg,
                            cnn_kernel_size=args.cnn_kernel_size,
                            n_channels=args.cnn_num_channels,
                            example_feature=example_feature,
                            rnn_dim=args.rnn_dim,
                            rnn_depth=args.rnn_depth,
                            attention_dim=args.att_dim,
                            output_dim=args.output_dim,
                            device=args.device,
                            compare_list=compare_list,
                            compare_weight=args.compare_weight,
                            normalize_size=args.normalize_size,
                            no_attention=args.no_attention,
                            parse_type=args.parse_type,
                            pass_state=args.pass_state)
    n_update = 0
    n_validate = 0
    n_checkpoint = 0
    best_match = 0
    if args.resume_from_file != '':
        resume_file = args.model_prefix + args.resume_from_file
        assert os.path.isfile(resume_file), "No checkpoint found at {}".format(
            resume_file)
        args.logger.info(
            "Loading checkpoint from file at '{}'".format(resume_file))
        model.load_state_dict(torch.load(resume_file)[0])
        n_checkpoint = args.resume_n_update
        n_update = args.checkpoint_range * n_checkpoint
        n_validate = n_update / args.validate_every
    else:
        torch.save([model.state_dict()], args.model_prefix + '/model_0.pkl')
    model.to(args.device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=(args.adam_beta_1, args.adam_beta_2))
    # training
    training_set.shuffle_data()
    for i in range(args.n_epochs):
        for j, data in enumerate(
                training_set.get_data_iterator(
                    batch_size=args.training_batch_size)):
            model.train()
            input_text_batch, _, situation_batch, situation_representation_batch, \
                   target_batch, target_lengths, agent_positions, target_positions = data
            if args.parse_type == 'default':
                arg_tree = grammar.arg_tree(split_str(input_text_batch[0]))
            else:
                arg_tree = parser.parse(input_text_batch[0])
            args.logger.info('train {}, arg tree: {}'.format(
                input_text_batch[0], arg_tree))
            model.update_words(arg_tree)
            target_lengths = torch.tensor(target_lengths,
                                          dtype=torch.long,
                                          device=args.device)
            success, total_loss, word_losses = model.loss(
                situation_batch, target_batch, target_lengths, optimizer)
            if not success:
                continue
            args.logger.info('epoch {}, iter {}, train loss: {}'.format(
                i, j, float(total_loss)))
            # save checkpoints
            if n_update % args.checkpoint_range == 0:
                log_model_params(model, args.writer, 'comp_gscan', n_update)
                # log numbers, TODO: log loss per word
                args.writer.add_scalar('loss/train_total', float(total_loss),
                                       n_checkpoint)
                model_path = args.model_prefix + '/model_' + str(
                    n_checkpoint) + '.pkl'
                torch.save([model.state_dict()], model_path)
                n_checkpoint += 1
            # validation
            if n_update % args.validate_every == 0:
                validation_set.shuffle_data()
                model.eval()
                # compute loss
                loss = 0
                n_batch = 0
                for k, data in enumerate(
                        validation_set.get_data_iterator(
                            batch_size=args.training_batch_size)):
                    input_text_batch, _, situation_batch, situation_representation_batch, \
                           target_batch, target_lengths, agent_positions, target_positions = data
                    if args.parse_type == 'default':
                        arg_tree = grammar.arg_tree(
                            split_str(input_text_batch[0]))
                    else:
                        arg_tree = parser.parse(input_text_batch[0])
                    model.update_words(arg_tree)
                    with torch.no_grad():
                        target_lengths = torch.tensor(target_lengths,
                                                      dtype=torch.long,
                                                      device=args.device)
                        success, total_loss, word_losses = model.loss(
                            situation_batch, target_batch, target_lengths)
                        loss += float(total_loss)
                    n_batch += 1
                loss = loss / n_batch
                args.logger.info('epoch {}, iter {}, val loss: {}'.format(
                    i, j, float(loss)))
                args.writer.add_scalar('loss/val_total', float(loss),
                                       n_validate)
                # run evaluation
                accuracy, exact_match = evaluate(
                    training_set,
                    validation_set.get_data_iterator(batch_size=1),
                    model=model,
                    world=validation_set.dataset._world,
                    max_steps=args.max_steps,
                    vocab=validation_set.target_vocabulary,
                    max_examples_to_evaluate=args.max_testing_examples,
                    device=args.device,
                    parser=parser)
                args.logger.info(
                    "  Evaluation Accuracy: %5.2f Exact Match: %5.2f" %
                    (accuracy, exact_match))
                args.writer.add_scalar('accuracy/val_total', accuracy,
                                       n_validate)
                args.writer.add_scalar('exact_match/val_total', exact_match,
                                       n_validate)
                # save the best model
                if exact_match > best_match:
                    model_path = args.model_prefix + '/model_best.pkl'
                    torch.save([model.state_dict(), n_update, exact_match],
                               model_path)
                    best_match = exact_match
                    args.logger.info(
                        'save best model at n_update {}'.format(n_update))
                n_validate += 1
            n_update += 1
Beispiel #9
0
    nargs=1,
    action="store",
    default=0,
    dest="limit",
    help=
    "Limit genes in all datasets, it speeds up on data pre-processing development."
)
parser.add_argument("-t",
                    "--train",
                    action="store_true",
                    dest="train",
                    help="When the flag is activated, it performs training.")
parser.add_argument(
    "-e",
    "--evaluate",
    action="store_true",
    dest="evaluate",
    help="When the flag is activated, it predicts test classes.")

args = parser.parse_args()

if args.data_proc:
    dp = DataPreprocess("data", [2, 4, 6, 8, 10, 12, 15, 20, 25, 30],
                        int(args.limit[0]))

if args.train:
    tr = Training([2, 4, 6, 8, 10, 12, 15, 20, 25, 30])

if args.evaluate:
    evaluate()
        'causal': args.causal,
        'mask_nonlinear': args.mask_nonlinear
    }

    train_args = {
        'lr': args.lr,
        'batch_size': args.batch_size,
        'epochs': args.epochs
    }

    model = ConvTasNet(**model_args)

    if args.evaluate == 0 and args.separate == 0:
        dataset = AudioDataset(args.data_dir, sr=args.sr, mode='train', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only)

        print('DataLoading Done')

        train(model, dataset, **train_args)
    elif args.evaluate == 1:
        model.load_state_dict(torch.load(args.model, map_location='cpu'))

        dataset = AudioDataset(args.data_dir, sr=args.sr, mode='test', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only)

        evaluate(model, dataset, args.batch_size, 0, args.cal_sdr)
    else:
        model.load_state_dict(torch.load(args.model, map_location='cpu'))

        dataset = AudioDataset(args.data_dir, sr=args.sr, mode='test', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only)

        separate(model, dataset, args.output_dir, sr=8000)
Beispiel #11
0
            ckpt_manager.save()

        print('Epoch {} Loss {:.6f}'.format(epoch + 1, total_loss / num_steps))
        print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
    return loss_plot


# checkout
start_epoch, ckpt_manager = checkout()

# training
loss_plot = training(start_epoch, dataset, num_steps)

# loss plot show
preprocess.pltshow(loss_plot)

# captions on the validation set
# 랜덤한 이미지 하나 선택
rid = np.random.randint(0, len(img_name_val))
image = img_name_val[rid]
real_caption = ' '.join(
    [tokenizer.index_word[i] for i in cap_val[rid] if i not in [0]])
result, attention_plot = predict.evaluate(image, tokenizer, max_length,
                                          attention_features_shape,
                                          image_features_extract_model,
                                          encoder, decoder)

print('Real Caption:', real_caption)
print('Prediction Caption:', ' '.join(result))
predict.plot_attention(image, result, attention_plot, real_caption,
                       ' '.join(result))