Beispiel #1
0
def index():
    photo = request.files["file"]
    in_memory_file = BytesIO()
    photo.save(in_memory_file)
    data = np.fromstring(in_memory_file.getvalue(), dtype=np.uint8)
    label = classify(cv2.imdecode(data, 1))
    return {"label": str(label)}
Beispiel #2
0
def main():
    accountId = sys.argv[1]
    # account_list = scrap.get_following(accountId)
    # scrap.twint_loop(accountId, account_list, 20)

    # print(accountId)

    DATA_DIR = Path(f"./data/{scrap.name_cleaning(accountId)}")
    json_files = [
        pos_json for pos_json in os.listdir(DATA_DIR)
        if pos_json.endswith('.json')
    ]

    df_list = []

    for file_name in json_files:
        temp_df = pd.read_json(DATA_DIR / file_name, lines=True)
        df_list.append(temp_df)

    df = pd.concat(df_list, sort=False)
    new_df = df.sort_values(by='created_at', ascending=False)
    # cut_df = new_df.head(50)
    unclassified = new_df[["link", "created_at", "username", "tweet"]]
    # unclassified = new_df[["link", "created_at", "username", "tweet"]].to_json(orient="records")
    classified = []

    count = 0

    test_set, train_set = model.make_sets()
    NBclassifier = model.train(test_set, train_set)

    for index, row in unclassified.iterrows():
        if count == 20:
            break
        if model.classify(row['tweet'], NBclassifier):
            data = {}
            data['tweet'] = row['tweet']
            data['created_at'] = row['created_at'].strftime(
                '%Y-%m-%d %H:%M:%S')
            data['link'] = row['link']
            data['username'] = row['username']
            # json_data = json.dumps(data)
            # 	# print(row)
            # 	break
            classified.append(data)
            count += 1

    # print("{test: 'test'}")
    print(classified)
    # parsed = json.loads(result)
    # data = json.dumps(parsed, indent=2)
    # print (data)
    pass
Beispiel #3
0
def classify_type():
    try:
        sepal_len = request.args.get('slen')  # Get parameters for sepal length
        sepal_wid = request.args.get('swid')  # Get parameters for sepal width
        petal_len = request.args.get('plen')  # Get parameters for petal length
        petal_wid = request.args.get('pwid')  # Get parameters for petal width

        # Get the output from the classification model
        variety = model.classify(sepal_len, sepal_wid, petal_len, petal_wid)

        # Render the output in new HTML page
        return render_template('output.html', variety=variety)
    except:
        return 'Error'
Beispiel #4
0
def main():
    """
    Loads an existing model, opens audio input stream, classifies input
    """
    args = parse_args()

    print('Audio stream classifier')

    print("Restoring model: ", args.model)
    mdl = model.restore(args.model)

    if mdl is None:
        print("Can't classify data without an existing model.")
        return

    print("Opening audio input..")

    audio = pyaudio.PyAudio()
    stream = audio.open(format=pyaudio.paFloat32,
                        channels=1,
                        rate=args.sample_rate,
                        input=True,
                        frames_per_buffer=args.frame_size)

    label_a = label_b = ""

    if args.labels is not None:
        label_a = args.labels[0]
        label_b = args.labels[1]

    while True:
        # Peel off [frame_size] bytes from the audio stream
        stream_data = stream.read(args.frame_size)

        # Unpack the binary stream and expand
        data = struct.unpack("%df" % args.frame_size, stream_data)
        data = np.expand_dims([data], axis=2)

        avg = model.classify(mdl, data)

        steps = 20
        a = int(math.ceil(avg * steps))
        b = steps - a

        print(label_a + " [" + ("." * a) + "|" + ("." * b) + "] " + label_b +
              " - " + str(avg),
              end='\r')
Beispiel #5
0
def test(dataset, config):
    print('test() called')
    weights = config.weights
    V = config.num_views
    batch_size = config.batch_size
    ckptfile = os.path.join(config.log_dir,
                            config.snapshot_prefix + str(weights))
    data_size = dataset.size()
    print('dataset size:', data_size)

    with tf.Graph().as_default():

        global_step = tf.Variable(0, trainable=False)

        view_ = tf.placeholder('float32',
                               shape=(None, V, 227, 227, 3),
                               name='im0')
        y_ = tf.placeholder('int64', shape=(None), name='y')
        keep_prob_ = tf.placeholder('float32')

        fc8 = model.inference_multiview(view_, config.num_classes, keep_prob_)
        loss = model.loss(fc8, y_)
        #train_op = model.train(loss, global_step, data_size)
        prediction = model.classify(fc8)
        placeholders = [view_, y_, keep_prob_, prediction, loss]
        saver = tf.train.Saver(tf.all_variables())

        init_op = tf.global_variables_initializer()
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        saver.restore(sess, ckptfile)
        print('restore variables done')
        print("Start testing")
        print("Size:", data_size)
        print("It'll take", int(math.ceil(data_size / batch_size)),
              "iterations.")

        acc, _, predictions, labels = _test(dataset, config, sess,
                                            placeholders)
        print('acc:', acc * 100)

    import Evaluation_tools as et
    eval_file = os.path.join(config.log_dir, '{}.txt'.format(config.name))
    et.write_eval_file(config.data, eval_file, predictions, labels,
                       config.name)
    et.make_matrix(config.data, eval_file, config.log_dir)
Beispiel #6
0
def classsification(file_dir):

    X = util.preprocessing(file_dir)
    y_pred = classify(X)

    class0 = 0
    class1 = 0
    class2 = 0

    for k in y_pred:
        if k == 0:
            class0 += 1
        elif k == 1:
            class1 += 1
        elif k == 2:
            class2 += 1

    return class0, class1, class2
Beispiel #7
0
def test(dataset_test, model_path):
    batch_size = FLAGS.batch_size
    # Foward graph
    with tf.Graph().as_default():
        images  = tf.placeholder('float32', shape=(None, 227, 227, 3))
        labels = tf.placeholder('int64', shape=(None))

        logits = model.inference(images, 1., CLASS_NUMBER)
        loss = model.loss(logits, labels)
        prediction = model.classify(logits)


        saver = tf.train.Saver()

        #session for testing
        with tf.Session(config=config) as sess:
       
            if not model_path:
                saver.restore(sess, osp.join(model_dir ,'model_best'))
            else:
                saver.restore(sess, model_path)
            
            accuracy_sum = []
            isnextepoch = False
            step = 0
            loss_value_sum = []
            while not isnextepoch:
                batch_x, batch_y, isnextepoch = dataset_test.sample(batch_size)
                step += len(batch_x)
                feed_dict = {images: batch_x, labels: batch_y} 
                l, pred, loss_value= sess.run([logits, prediction, loss], feed_dict=feed_dict)
                loss_value_sum.append(loss_value)
                #IPython.embed()
                accuracy = 0.
                for index in xrange(len(pred)):
                    if pred[index] == batch_y[index]:
                        accuracy += 1
                accuracy /= len(pred)
                accuracy_sum.append(accuracy)
                sys.stdout.write("\r{:7d}/{}".format(step, len(dataset_test)))
                sys.stdout.flush()
            print('\nTest loss: {},Accuarcy: {}'.format(np.mean(loss_value_sum), np.mean(accuracy_sum)) )
def inference(input_image, m_encoded_test_cand_keys, m_encoded_test_cand_values,
              m_label_test_cand):
  """Constructs inference graph."""
  processed_input_image = input_data.parse_function_test(input_image)
  _, encoded_query, _ = model.cnn_encoder(
      processed_input_image, reuse=False, is_training=False)
  weighted_encoded_test, weight_coefs_test = model.relational_attention(
      encoded_query,
      tf.constant(m_encoded_test_cand_keys),
      tf.constant(m_encoded_test_cand_values),
      reuse=False)
  _, prediction_weighted_test = model.classify(
      weighted_encoded_test, reuse=False)
  predicted_class = tf.argmax(prediction_weighted_test, axis=1)
  expl_per_class = tf.py_func(
      utils.class_explainability,
      (tf.constant(m_label_test_cand), weight_coefs_test), tf.float32)
  confidence = tf.reduce_max(expl_per_class, axis=1)

  return predicted_class, confidence, weight_coefs_test
Beispiel #9
0
def evaluateModel(model, sample_set, filename=None, class_list=[]):
	N = 0
	N_wrong = 0
	least_certain = []
	header = "\t".join(class_list)

	if not filename == None:
		f = open(filename, "w")
		print("%s\tklasa" % header, file=f)

	for sample in sample_set:
		N += 1
		sample_class = sample[-1]
		sample = numpy.array(sample[:-1])
		out_class, probability, probs = model.classify(sample)

		if not filename == None:
			buff = ""
			for clas in class_list:
				buff += "%.2lf\t" % probs[clas]
			buff += out_class

			print("%s" % buff, file=f)

			least_certain.append((probability, buff))

		if not sample_class == out_class:
			N_wrong += 1

	if filename == "../output/opceniti.dat":
		least_certain = sorted(least_certain, key=lambda tup:tup[0])
		least_certain = least_certain[:5]

		f = open("../output/nejednoznacne.dat", "w")
		print("%s\tklasa" % header, file=f)

		for (x, y) in least_certain:
			print("%s" % y, file=f)

	return N_wrong / float(N)
Beispiel #10
0
def process_data(data, frame_size, mdl, stream):
    """ Processes a chunk of data from the stream, returns the average classification
        Optionally outputs to the debug stream if provided
    :param data: the data to classify
    :param frame_size: the size of the data (in audio samples)
    :param mdl: the previously trained model
    :param stream: optional output stream for debugging
    :return: the classification
    """
    unpacked = list(struct.unpack("%dh" % frame_size,
                                  data))  # Unpack the first 2000 16bit samples

    inverse_short_max = 3.0517578125e-5
    data = [x * inverse_short_max for x in unpacked]

    if stream is not None:
        packed = struct.pack("%df" % frame_size, *data)
        stream.write(packed)

    data = np.expand_dims([data], axis=2)

    average = model.classify(mdl, data)

    return average
Beispiel #11
0
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''):
    print('train() called')
    is_finetune = bool(ckptfile)
    V = g_.NUM_VIEWS
    batch_size = FLAGS.batch_size

    dataset_train.shuffle()
    dataset_val.shuffle()
    data_size = dataset_train.size()
    print('training size:', data_size)

    with tf.Graph().as_default():
        startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1])
        global_step = tf.Variable(startstep, trainable=False)

        # placeholders for graph input
        view_ = tf.placeholder('float32',
                               shape=(None, V, 227, 227, 3),
                               name='im0')
        y_ = tf.placeholder('int64', shape=(None), name='y')
        keep_prob_ = tf.placeholder('float32')

        # graph outputs
        fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_)
        loss = model.loss(fc8, y_)
        train_op = model.train(loss, global_step, data_size)
        prediction = model.classify(fc8)

        # build the summary operation based on the F collection of Summaries
        summary_op = tf.summary.merge_all()

        # must be after merge_all_summaries
        validation_loss = tf.placeholder('float32',
                                         shape=(),
                                         name='validation_loss')
        validation_summary = tf.summary.scalar('validation_loss',
                                               validation_loss)
        validation_acc = tf.placeholder('float32',
                                        shape=(),
                                        name='validation_accuracy')
        validation_acc_summary = tf.summary.scalar('validation_accuracy',
                                                   validation_acc)

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000)

        init_op = tf.global_variables_initializer()
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options))
        if is_finetune:
            # load checkpoint file
            saver.restore(sess, ckptfile)
            print('restore variables done')
        elif caffemodel:
            # load caffemodel generated with caffe-tensorflow
            sess.run(init_op)
            model.load_alexnet_to_mvcnn(sess, caffemodel)
            print('loaded pretrained caffemodel:', caffemodel)
        else:
            # from scratch
            sess.run(init_op)
            print('init_op done')

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=sess.graph)

        step = startstep
        for epoch in range(100):
            print('epoch:', epoch)

            for batch_x, batch_y in dataset_train.batches(batch_size):
                step += 1

                start_time = time.time()
                feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 0.5}

                _, pred, loss_value = sess.run([train_op, prediction, loss],
                                               feed_dict=feed_dict)

                duration = time.time() - start_time

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                # print training information
                if step % 10 == 0 or step - startstep <= 30:
                    sec_per_batch = float(duration)
                    print(
                        '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)'
                        % (datetime.now(), step, loss_value,
                           FLAGS.batch_size / duration, sec_per_batch))

                # validation
                if step % g_.VAL_PERIOD == 0:  # and step > 0:
                    val_losses = []
                    predictions = np.array([])

                    val_y = []
                    for val_step, (val_batch_x, val_batch_y) in \
                            enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)):
                        val_feed_dict = {
                            view_: val_batch_x,
                            y_: val_batch_y,
                            keep_prob_: 1.0
                        }
                        val_loss, pred = sess.run([loss, prediction],
                                                  feed_dict=val_feed_dict)
                        val_losses.append(val_loss)
                        predictions = np.hstack((predictions, pred))
                        val_y.extend(val_batch_y)

                    val_loss = np.mean(val_losses)

                    acc = metrics.accuracy_score(val_y[:predictions.size],
                                                 np.array(predictions))
                    print('%s: step %d, validation loss=%.4f, acc=%f' %
                          (datetime.now(), step, val_loss, acc * 100.))

                    # validation summary
                    val_loss_summ = sess.run(
                        validation_summary,
                        feed_dict={validation_loss: val_loss})
                    val_acc_summ = sess.run(validation_acc_summary,
                                            feed_dict={validation_acc: acc})
                    summary_writer.add_summary(val_loss_summ, step)
                    summary_writer.add_summary(val_acc_summ, step)
                    summary_writer.flush()

                if step % 100 == 0:
                    # print ('running summary')
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

                if step % g_.SAVE_PERIOD == 0 and step > startstep:
                    checkpoint_path = os.path.join(FLAGS.train_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
Beispiel #12
0
def test_classify():
    filename = "./dog.png"
    image = tf.io.encode_base64(tf.io.read_file(filename))
    assert classify(image, isb64=True)[1] == "vizsla"
Beispiel #13
0
def run_classifier():
    value = request.args.get('value', default='*', type=str)
    return ml.classify(value)
def main(unused_argv):
    """Main function."""

    # Load training and eval data - this portion can be modified if the data is
    # imported from other sources.
    (m_train_data, m_train_labels), (m_eval_data, m_eval_labels) = \
      tf.keras.datasets.fashion_mnist.load_data()
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (m_train_data, m_train_labels))
    eval_dataset = tf.data.Dataset.from_tensor_slices(
        (m_eval_data, m_eval_labels))

    train_dataset = train_dataset.map(input_data.parse_function_train)
    eval_dataset = eval_dataset.map(input_data.parse_function_eval)
    eval_batch_size = int(
        math.floor(len(m_eval_data) / FLAGS.batch_size) * FLAGS.batch_size)

    train_batch = train_dataset.repeat().batch(FLAGS.batch_size)
    train_cand = train_dataset.repeat().batch(FLAGS.example_cand_size)
    eval_cand = train_dataset.repeat().batch(FLAGS.eval_cand_size)
    eval_batch = eval_dataset.repeat().batch(eval_batch_size)

    iter_train = train_batch.make_initializable_iterator()
    iter_train_cand = train_cand.make_initializable_iterator()
    iter_eval_cand = eval_cand.make_initializable_iterator()
    iter_eval = eval_batch.make_initializable_iterator()

    image_batch, _, label_batch = iter_train.get_next()
    image_train_cand, _, _ = iter_train_cand.get_next()
    image_eval_cand, orig_image_eval_cand, label_eval_cand = iter_eval_cand.get_next(
    )
    eval_batch, orig_eval_batch, eval_labels = iter_eval.get_next()

    # Model and loss definitions
    _, encoded_batch_queries, encoded_batch_values = model.cnn_encoder(
        image_batch, reuse=False, is_training=True)
    encoded_cand_keys, _, encoded_cand_values = model.cnn_encoder(
        image_train_cand, reuse=True, is_training=True)

    weighted_encoded_batch, weight_coefs_batch = model.relational_attention(
        encoded_batch_queries,
        encoded_cand_keys,
        encoded_cand_values,
        normalization=FLAGS.normalization)

    tf.summary.scalar(
        "Average max. coef. train",
        tf.reduce_mean(tf.reduce_max(weight_coefs_batch, axis=1)))

    # Sparsity regularization
    entropy_weights = tf.reduce_sum(
        -weight_coefs_batch *
        tf.log(FLAGS.epsilon_sparsity + weight_coefs_batch),
        axis=1)
    sparsity_loss = tf.reduce_mean(entropy_weights) - tf.log(
        FLAGS.epsilon_sparsity +
        tf.constant(FLAGS.example_cand_size, dtype=tf.float32))
    tf.summary.scalar("Sparsity entropy loss", sparsity_loss)

    # Intermediate loss
    joint_encoded_batch = (1 - FLAGS.alpha_intermediate) * encoded_batch_values \
      + FLAGS.alpha_intermediate * weighted_encoded_batch

    logits_joint_batch, _ = model.classify(joint_encoded_batch, reuse=False)
    softmax_joint_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_joint_batch, labels=label_batch))

    # Self loss
    logits_orig_batch, _ = model.classify(encoded_batch_values, reuse=True)
    softmax_orig_key_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_orig_batch, labels=label_batch))

    # Prototype combination loss
    logits_weighted_batch, _ = model.classify(weighted_encoded_batch,
                                              reuse=True)
    softmax_weighted_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_weighted_batch, labels=label_batch))

    train_loss_op = softmax_orig_key_op + softmax_weighted_op + \
      softmax_joint_op + FLAGS.sparsity_weight * sparsity_loss
    tf.summary.scalar("Total loss", train_loss_op)

    global_step = tf.train.get_or_create_global_step()
    learning_rate = tf.train.exponential_decay(FLAGS.init_learning_rate,
                                               global_step=global_step,
                                               decay_steps=FLAGS.decay_every,
                                               decay_rate=FLAGS.decay_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    tf.summary.scalar("Learning rate", learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        gvs = optimizer.compute_gradients(train_loss_op)
        capped_gvs = [(tf.clip_by_value(grad, -FLAGS.gradient_thresh,
                                        FLAGS.gradient_thresh), var)
                      for grad, var in gvs]
        train_op = optimizer.apply_gradients(capped_gvs,
                                             global_step=global_step)

    # Evaluate model

    # Process sequentially to avoid out-of-memory.
    i = tf.constant(0)
    encoded_cand_keys_val = tf.zeros([0, FLAGS.attention_dim])
    encoded_cand_queries_val = tf.zeros([0, FLAGS.attention_dim])
    encoded_cand_values_val = tf.zeros([0, FLAGS.val_dim])

    def cond(i, unused_l1, unused_l2, unused_l3):
        return i < int(
            math.ceil(FLAGS.eval_cand_size / FLAGS.example_cand_size))

    def body(i, encoded_cand_keys_val, encoded_cand_queries_val,
             encoded_cand_values_val):
        """Loop body."""
        temp = image_eval_cand[i * FLAGS.example_cand_size:(i + 1) *
                               FLAGS.example_cand_size, :, :, :]
        temp_keys, temp_queries, temp_values = model.cnn_encoder(
            temp, reuse=True, is_training=False)
        encoded_cand_keys_val = tf.concat([encoded_cand_keys_val, temp_keys],
                                          0)
        encoded_cand_queries_val = tf.concat(
            [encoded_cand_queries_val, temp_queries], 0)
        encoded_cand_values_val = tf.concat(
            [encoded_cand_values_val, temp_values], 0)
        return i+1, encoded_cand_keys_val, encoded_cand_queries_val, \
            encoded_cand_values_val

    _, encoded_cand_keys_val, encoded_cand_queries_val, \
        encoded_cand_values_val, = tf.while_loop(
            cond, body, [i, encoded_cand_keys_val, encoded_cand_queries_val,
                         encoded_cand_values_val],
            shape_invariants=[
                i.get_shape(), tf.TensorShape([None, FLAGS.attention_dim]),
                tf.TensorShape([None, FLAGS.attention_dim]),
                tf.TensorShape([None, FLAGS.val_dim])])

    j = tf.constant(0)
    encoded_val_keys = tf.zeros([0, FLAGS.attention_dim])
    encoded_val_queries = tf.zeros([0, FLAGS.attention_dim])
    encoded_val_values = tf.zeros([0, FLAGS.val_dim])

    def cond2(j, unused_j1, unused_j2, unused_j3):
        return j < int(math.ceil(eval_batch_size / FLAGS.batch_size))

    def body2(j, encoded_val_keys, encoded_val_queries, encoded_val_values):
        """Loop body."""
        temp = eval_batch[j * FLAGS.batch_size:(j + 1) *
                          FLAGS.batch_size, :, :, :]
        temp_keys, temp_queries, temp_values = model.cnn_encoder(
            temp, reuse=True, is_training=False)
        encoded_val_keys = tf.concat([encoded_val_keys, temp_keys], 0)
        encoded_val_queries = tf.concat([encoded_val_queries, temp_queries], 0)
        encoded_val_values = tf.concat([encoded_val_values, temp_values], 0)
        return j + 1, encoded_val_keys, encoded_val_queries, encoded_val_values

    _, encoded_val_keys, encoded_val_queries, \
        encoded_val_values = tf.while_loop(
            cond2, body2, [
                j, encoded_val_keys, encoded_val_queries, encoded_val_values],
            shape_invariants=[
                j.get_shape(), tf.TensorShape([None, FLAGS.attention_dim]),
                tf.TensorShape([None, FLAGS.attention_dim]),
                tf.TensorShape([None, FLAGS.val_dim])])

    weighted_encoded_val, weight_coefs_val = model.relational_attention(
        encoded_val_queries,
        encoded_cand_keys_val,
        encoded_cand_values_val,
        normalization=FLAGS.normalization)

    # Coefficient distribution
    tf.summary.scalar("Average max. coefficient val",
                      tf.reduce_mean(tf.reduce_max(weight_coefs_val, axis=1)))

    # Analysis of median number of prototypes above a certain
    # confidence threshold.
    sorted_weights = tf.contrib.framework.sort(weight_coefs_val,
                                               direction="DESCENDING")
    cum_sorted_weights = tf.cumsum(sorted_weights, axis=1)
    for threshold in [0.5, 0.9, 0.95]:
        num_examples_thresh = tf.shape(sorted_weights)[1] + 1 - tf.reduce_sum(
            tf.cast(cum_sorted_weights > threshold, tf.int32), axis=1)
        tf.summary.histogram(
            "Number of samples for explainability above " + str(threshold),
            num_examples_thresh)
        tf.summary.scalar(
            "Median number of samples for explainability above " +
            str(threshold),
            tf.contrib.distributions.percentile(num_examples_thresh, q=50))

    expl_per_class = tf.py_func(utils.class_explainability,
                                (label_eval_cand, weight_coefs_val),
                                tf.float32)
    max_expl = tf.reduce_max(expl_per_class, axis=1)
    tf.summary.histogram("Maximum per-class explainability", max_expl)

    _, prediction_val = model.classify(encoded_val_values, reuse=True)
    _, prediction_weighted_val = model.classify(weighted_encoded_val,
                                                reuse=True)

    val_eq_op = tf.equal(tf.cast(tf.argmax(prediction_val, 1), dtype=tf.int32),
                         eval_labels)
    val_acc_op = tf.reduce_mean(tf.cast(val_eq_op, dtype=tf.float32))
    tf.summary.scalar("Val accuracy input query", val_acc_op)

    val_weighted_eq_op = tf.equal(
        tf.cast(tf.argmax(prediction_weighted_val, 1), dtype=tf.int32),
        eval_labels)
    val_weighted_acc_op = tf.reduce_mean(
        tf.cast(val_weighted_eq_op, dtype=tf.float32))
    tf.summary.scalar("Val accuracy weighted prototypes", val_weighted_acc_op)

    conf_wrong = tf.reduce_mean(
        (1 - tf.cast(val_weighted_eq_op, tf.float32)) * max_expl)
    tf.summary.scalar("Val average confidence of wrong decisions", conf_wrong)

    conf_right = tf.reduce_mean(
        tf.cast(val_weighted_eq_op, tf.float32) * max_expl)
    tf.summary.scalar("Val average confidence of right decisions", conf_right)

    # Confidence-controlled prediction
    for ti in [0.5, 0.8, 0.9, 0.95, 0.99, 0.999]:
        mask = tf.cast(tf.greater(max_expl, ti), tf.float32)
        acc_tot = tf.reduce_sum(tf.cast(val_weighted_eq_op, tf.float32) * mask)
        conf_tot = tf.reduce_sum(mask)

        tf.summary.scalar("Val accurate ratio for confidence above " + str(ti),
                          acc_tot / conf_tot)
        tf.summary.scalar("Val total ratio for confidence above " + str(ti),
                          conf_tot / eval_batch_size)

    # Visualization of example images and corresponding prototypes
    for image_ind in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        tf.summary.image(
            "Input image " + str(image_ind),
            tf.expand_dims(orig_eval_batch[image_ind, :, :, :], 0))
        mask = tf.greater(weight_coefs_val[image_ind, :], 0.05)
        mask = tf.squeeze(mask)
        mask.set_shape([None])
        relational_attention_images = tf.boolean_mask(orig_image_eval_cand,
                                                      mask,
                                                      axis=0)
        relational_attention_weight_coefs = tf.boolean_mask(tf.squeeze(
            weight_coefs_val[image_ind, :]),
                                                            mask,
                                                            axis=0)
        annotated_images = utils.tf_put_text(
            relational_attention_images, relational_attention_weight_coefs)
        tf.summary.image("Prototype images for image " + str(image_ind),
                         annotated_images)

    # Training setup
    init = (tf.global_variables_initializer(),
            tf.local_variables_initializer())
    saver_all = tf.train.Saver()
    summaries = tf.summary.merge_all()

    with tf.Session() as sess:

        summary_writer = tf.summary.FileWriter("./tflog/" + model_name,
                                               sess.graph)

        sess.run(init)
        sess.run(iter_train.initializer)
        sess.run(iter_train_cand.initializer)
        sess.run(iter_eval_cand.initializer)
        sess.run(iter_eval.initializer)

        for step in range(1, FLAGS.num_steps):
            if step % FLAGS.display_step == 0:
                _, train_loss = sess.run([train_op, train_loss_op])
                print("Step " + str(step) + " , Training loss = " +
                      "{:.4f}".format(train_loss))
            else:
                sess.run(train_op)

            if step % FLAGS.val_step == 0:
                val_acc, merged_summary = sess.run(
                    [val_weighted_acc_op, summaries])
                print("Step " + str(step) + " , Val Accuracy = " +
                      "{:.4f}".format(val_acc))
                summary_writer.add_summary(merged_summary, step)

            if step % FLAGS.save_step == 0:
                saver_all.save(sess, checkpoint_name)
Beispiel #15
0
import json

import model

training_data_path = 'data/train.json'
test_data_path = 'data/validation.json'

with open(training_data_path) as training_data_file:
    training_data = json.loads(training_data_file.read())

with open(test_data_path) as test_data_file:
    test_data = json.loads(test_data_file.read())

model.train(training_data)

corrects = 0
total = len(test_data)

for doc in test_data:
    category = doc.pop('category')
    predicted_category = model.classify(doc)
    if category == predicted_category:
        corrects += 1

accuracy = corrects / total

print('Accuracy: {:.3f}'.format(accuracy))
Beispiel #16
0
                                       preprocessor=preprocessor)
    cv_data = np.array_split(np.hstack((train_data, train_labels)), 5)

    cv_acc = 0
    if max_param is None:
        cv_acc, max_param = cross_validate(cv_data, weights, update_weights,
                                           params, update_params)

    max_weights = train(train_data,
                        train_labels,
                        weights,
                        update_weights,
                        max_param,
                        update_params,
                        epochs=10)
    train_acc = classify(train_data, train_labels, max_weights)

    test_acc = classify(test_data, test_labels, max_weights)

    def predictor(row):
        label = np.sign(np.dot(row, max_weights))
        if label == -1:
            label = 0
        return label

    write_output('Logistic regression', max_param, cv_acc, train_acc, test_acc)
    write_predictions('logreg',
                      predictor,
                      n_features=n_features,
                      neg_labels=True,
                      bias=True,
Beispiel #17
0
import boto3
import model

access_key = "AKIAIZHM4QZ2PYAPVE6Q"
access_secret = "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"
region = "us-east-1"
return_queue = "https://sqs.us-east-1.amazonaws.com/083630338242/DeepSightReturns"

sqs = boto3.client('sqs',
                   aws_access_key_id=access_key,
                   aws_secret_access_key=access_secret,
                   region_name=region)

classification = model.classify()
response = sqs.send_message(QueueUrl=return_queue, MessageBody=classification)
Beispiel #18
0
def analyze():
    img_data = request.files.get('imfile', '')
    img = img_data.read()
    prediction = ml.classify(img)
    disp_im = b64encode(img).decode("utf-8")
    return render_template("predictor.html", result=prediction, file=disp_im)
Beispiel #19
0
def train(dataset_train, dataset_test, caffemodel=''):
    print('train() called')
    V = config.num_views
    batch_size = config.batch_size

    dataset_train.shuffle()
    data_size = dataset_train.size()

    print('training size:', data_size)

    with tf.Graph().as_default():
        with tf.device('/gpu:0'):

            tf_config = tf.ConfigProto(log_device_placement=False)
            tf_config.gpu_options.allow_growth = True
            tf_config.allow_soft_placement = True

            global_step = tf.Variable(0, trainable=False)

            # placeholders for graph input
            view_ = tf.placeholder('float32',
                                   shape=(None, V, 227, 227, 3),
                                   name='im0')
            y_ = tf.placeholder('int64', shape=(None), name='y')
            keep_prob_ = tf.placeholder('float32')

            # graph outputs
            fc8 = model.inference_multiview(view_, config.num_classes,
                                            keep_prob_)
            loss = model.loss(fc8, y_)
            train_op = model.train(loss, global_step, data_size)
            prediction = model.classify(fc8)
            placeholders = [view_, y_, keep_prob_, prediction, loss]
            validation_loss = tf.placeholder('float32',
                                             shape=(),
                                             name='validation_loss')
            validation_acc = tf.placeholder('float32',
                                            shape=(),
                                            name='validation_accuracy')

            saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000)

            init_op = tf.global_variables_initializer()
            sess = tf.Session(config=tf_config)
            weights = config.weights
            if weights == -1:
                startepoch = 0
                if caffemodel:
                    sess.run(init_op)
                    model.load_alexnet_to_mvcnn(sess, caffemodel)
                    print('loaded pretrained caffemodel:', caffemodel)
                else:
                    sess.run(init_op)
                    print('init_op done')
            else:
                ld = config.log_dir
                startepoch = weights + 1
                ckptfile = os.path.join(ld,
                                        config.snapshot_prefix + str(weights))

                saver.restore(sess, ckptfile)
                print('restore variables done')

            total_seen = 0
            total_correct = 0
            total_loss = 0

            step = 0
            begin = startepoch
            end = config.max_epoch + startepoch
            for epoch in xrange(begin, end + 1):
                acc, eval_loss, predictions, labels = _test(
                    dataset_test, config, sess, placeholders)
                print('epoch %d: step %d, validation loss=%.4f, acc=%f' %
                      (epoch, step, eval_loss, acc * 100.))

                LOSS_LOGGER.log(eval_loss, epoch, "eval_loss")
                ACC_LOGGER.log(acc, epoch, "eval_accuracy")
                ACC_LOGGER.save(config.log_dir)
                LOSS_LOGGER.save(config.log_dir)
                ACC_LOGGER.plot(dest=config.log_dir)
                LOSS_LOGGER.plot(dest=config.log_dir)

                for batch_x, batch_y in dataset_train.batches(batch_size):
                    step += 1

                    feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 0.5}

                    _, pred, loss_value = sess.run([
                        train_op,
                        prediction,
                        loss,
                    ],
                                                   feed_dict=feed_dict)

                    total_loss += loss_value
                    correct = np.sum(pred == batch_y)
                    total_correct += correct
                    total_seen += batch_size

                    assert not np.isnan(
                        loss_value), 'Model diverged with loss = NaN'

                    if step % max(config.train_log_frq / config.batch_size,
                                  1) == 0:
                        acc_ = total_correct / float(total_seen)
                        ACC_LOGGER.log(acc_, epoch, "train_accuracy")
                        loss_ = total_loss / float(total_seen / batch_size)
                        LOSS_LOGGER.log(loss_, epoch, "train_loss")
                        print('epoch %d step %d, loss=%.2f, acc=%.2f' %
                              (epoch, step, loss_, acc_))
                        total_seen = 0
                        total_correct = 0
                        total_loss = 0

                if epoch % config.save_period == 0 or epoch == end:
                    checkpoint_path = os.path.join(
                        config.log_dir, config.snapshot_prefix + str(epoch))
                    saver.save(sess, checkpoint_path)
Beispiel #20
0
    synapse = json.load(data_file)
    synapse_0 = np.asarray(synapse['synapse0'])
    synapse_1 = np.asarray(synapse['synapse1'])

print sys.argv

if len(sys.argv) <= 1:
    print(
        "Error : Argument missing. Run like...\npython process.py train\npython process.py test\n"
    )
    sys.exit()

if sys.argv[1] == 'train':
    train(X,
          y,
          classes,
          words,
          hidden_neurons=10,
          alpha=0.021,
          epochs=50000,
          dropout=True,
          dropout_percent=0.2)
if sys.argv[1] == 'test':
    with open('test-data.txt') as f:
        content = f.readlines()
        for item in content:
            sents = item.strip()
            predicted_cls = classify(sents, stemmer, classes, words, synapse_0,
                                     synapse_1, ERROR_THRESHOLD)
            print predicted_cls, sents
Beispiel #21
0
def train(cfg, dataset_train, dataset_val, ckptfile='', caffemodel=''):
    print ('train() called')
    is_finetune = bool(ckptfile)
    V = g_.NUM_VIEWS
    batch_size = FLAGS.batch_size

    # dataset_train.shuffle()
    # dataset_val.shuffle()
    data_size, num_batch = dataset_train.get_len()
    # data_size = len(dataset_train)
    # print ('train size:', data_size)

    data_size_test, num_batch_test = dataset_val.get_len()
    print ('train size:', data_size)
    print ('test size:', data_size_test)

    best_eval_acc = 0




    with tf.Graph().as_default():
        # startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1])
        startstep = 0
        global_step = tf.Variable(startstep, trainable=False)
         
        # placeholders for graph input
        view_ = tf.placeholder('float32', shape=(None, V, 224, 224, 3), name='im0')
        y_ = tf.placeholder('int64', shape=(None), name='y')
        is_training_pl = tf.placeholder(tf.bool, shape=())
        bn_decay = get_bn_decay(startstep)

        # graph outputs
        fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, is_training_pl, bn_decay=bn_decay)
        loss = model.loss(fc8, y_)
        train_op = model.train(loss, global_step, data_size)
        prediction = model.classify(fc8)

        # build the summary operation based on the F colection of Summaries
        summary_op = tf.summary.merge_all()


        # must be after merge_all_summaries
        validation_loss = tf.placeholder('float32', shape=(), name='validation_loss')
        validation_summary = tf.summary.scalar('validation_loss', validation_loss)
        validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy')
        validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc)

        # tvars = tf.trainable_variables()
        # print (tvars)
        # print (tf.get_default_graph().as_graph_def())

        saver = tf.train.Saver()

        init_op = tf.global_variables_initializer()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)
        
        if is_finetune:
            # load checkpoint file
            sess.run(init_op)
            optimistic_restore(sess, ckptfile)
            # saver.restore(sess, ckptfile)
            print ('restore variables done')
        elif caffemodel:
            # load caffemodel generated with caffe-tensorflow
            sess.run(init_op)
            model.load_alexnet_to_mvcnn(sess, caffemodel)
            print ('loaded pretrained caffemodel:', caffemodel)
        else:
            # from scratch
            sess.run(init_op)
            print ('init_op done')

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=sess.graph) 

        step = startstep


        for epoch in range(100):
            total_correct_mv = 0
            loss_sum_mv = 0
            total_seen = 0

            val_correct_sum = 0
            val_seen = 0
            loss_val_sum = 0
            print ('epoch:', epoch)

            for i in range(num_batch):
                # st = time.time()
                batch_x, batch_y = dataset_train.get_batch(i)
                # print (time.time()-st)
                step += 1

                start_time = time.time()
                feed_dict = {view_: batch_x,
                             y_ : batch_y,
                             is_training_pl: True }

                _, pred, loss_value = sess.run(
                        [train_op, prediction,  loss,],
                        feed_dict=feed_dict)

                duration = time.time() - start_time

                correct_mv = np.sum(pred == batch_y)
                total_correct_mv += correct_mv
                total_seen += g_.BATCH_SIZE
                loss_sum_mv += (loss_value * g_.BATCH_SIZE)

                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

                # print training information
                if step % 500 == 0 :
                    # print (pred)
                    # print (batch_y)
                    sec_per_batch = float(duration)
                    print ('%s: step %d, loss=%.2f, acc=%.4f (%.1f examples/sec; %.3f sec/batch)' \
                         % (datetime.now(), step, loss_sum_mv / float(total_seen), total_correct_mv / float(total_seen),
                                    FLAGS.batch_size/duration, sec_per_batch))

                    # for i in range(num_batch_test):
                    #     val_batch_x, val_batch_y = dataset_val.get_batch(i)
                    #     val_feed_dict = {view_: val_batch_x,
                    #                      y_: val_batch_y,
                    #                      is_training_pl: False}
                    #     val_loss, pred = sess.run([loss, prediction], feed_dict=val_feed_dict)
                    #
                    #     correct_mv_val = np.sum(pred == val_batch_y)
                    #     val_correct_sum += correct_mv_val
                    #     val_seen += g_.BATCH_SIZE
                    #     loss_val_sum += (val_loss * g_.BATCH_SIZE)
                    #
                    #     if i == 10:
                    #         print (pred)
                    #         print (val_batch_y)
                    #         print ('val loss=%.4f, acc=%.4f' % ((loss_val_sum / float(val_seen)), (val_correct_sum / float(val_seen))))


                if step % 1000 == 0:
                    # print 'running summary'
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

                        
            # validation
            # val_losses = []
            # predictions = np.array([])
            # val_y = []

            for i in range(num_batch_test):
                val_batch_x, val_batch_y = dataset_val.get_batch(i)
                val_feed_dict = {view_: val_batch_x,
                                 y_  : val_batch_y,
                                 is_training_pl: False }
                val_loss, pred = sess.run([loss, prediction], feed_dict=val_feed_dict)

                correct_mv_val = np.sum(pred == val_batch_y)
                val_correct_sum += correct_mv_val
                val_seen += g_.BATCH_SIZE
                loss_val_sum += (val_loss * g_.BATCH_SIZE)

            val_mean_loss = (loss_val_sum / float(val_seen))
            acc = (val_correct_sum / float(val_seen))
            if acc > best_eval_acc:
                best_eval_acc = acc
                checkpoint_path = os.path.join(cfg.ckpt_folder, 'best_model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            print ('%s: epoch %d, validation loss=%.4f, acc=%f, best_acc=%f' %\
                    (datetime.now(), epoch, val_mean_loss, acc, best_eval_acc))
            # validation summary
            val_loss_summ = sess.run(validation_summary,
                    feed_dict={validation_loss: val_mean_loss})
            val_acc_summ = sess.run(validation_acc_summary,
                    feed_dict={validation_acc: acc})
            summary_writer.add_summary(val_loss_summ, step)
            summary_writer.add_summary(val_acc_summ, step)
            summary_writer.flush()
Beispiel #22
0
def test(dataset, ckptfile):
    print 'train() called'
    batch_size = FLAGS.batch_size

    data_size = dataset.size()
    print 'training size:', data_size

    with tf.Graph().as_default():
        startstep = 0
        global_step = tf.Variable(startstep, trainable=False)

        image_, y_ = model.input()
        keep_prob_ = tf.placeholder('float32', name='keep_prob')
        phase_train_ = tf.placeholder(tf.bool, name='phase_train')

        logits = model.inference(image_, keep_prob_, phase_train_)
        prediction = model.classify(logits)
        loss, print_op = model.loss(logits, y_)
        train_op = model.train(loss, global_step, data_size)

        # build the summary operation based on the F colection of Summaries
        summary_op = tf.merge_all_summaries()

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000)

        init_op = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        if FLAGS.caffemodel:
            caffemodel = FLAGS.caffemodel
            # sess.run(init_op)
            model.load_model(sess, caffemodel, fc8=True)
            print 'loaded pretrained caffemodel:', caffemodel
        else:
            saver.restore(sess, ckptfile)
            print 'restore variables done'

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph_def=sess.graph_def)

        step = startstep

        predictions = []
        labels = []

        for batch_x, batch_y in dataset.batches(batch_size):
            if step >= FLAGS.max_steps:
                break
            step += 1

            if step == 1:
                img = batch_x[0, ...]
                cv2.imwrite('img0.jpg', img)

            start_time = time.time()
            feed_dict = {image_: batch_x, y_: batch_y, keep_prob_: 1.0}

            pred, loss_value = sess.run([
                prediction,
                loss,
            ],
                                        feed_dict=feed_dict)

            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                sec_per_batch = float(duration)
                print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \
                     % (datetime.now(), step, loss_value,
                                FLAGS.batch_size/duration, sec_per_batch)

            predictions.extend(pred.tolist())
            labels.extend(batch_y.tolist())
            # print pred
            # print batch_y

        print labels
        print predictions
        acc = metrics.accuracy_score(labels, predictions)
        print 'acc:', acc * 100
Beispiel #23
0
def test(dataset, ckptfile):
    print 'test() called'
    V = g_.NUM_VIEWS
    batch_size = FLAGS.batch_size

    data_size = dataset.size()
    print 'dataset size:', data_size

    with tf.Graph().as_default():
        startstep = 0
        global_step = tf.Variable(startstep, trainable=False)

        view_ = tf.placeholder('float32',
                               shape=(None, V, 227, 227, 3),
                               name='im0')
        y_ = tf.placeholder('int64', shape=(None), name='y')
        keep_prob_ = tf.placeholder('float32')

        fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_)
        loss = model.loss(fc8, y_)
        train_op = model.train(loss, global_step, data_size)
        prediction = model.classify(fc8)

        # build the summary operation based on the F colection of Summaries
        summary_op = tf.merge_all_summaries()

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000)

        init_op = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        saver.restore(sess, ckptfile)
        print 'restore variables done'

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph=sess.graph)

        step = startstep

        predictions = []
        labels = []

        print "Start testing"
        print "Size:", data_size
        print "It'll take", int(math.ceil(data_size /
                                          batch_size)), "iterations."

        for batch_x, batch_y in dataset.batches(batch_size):
            step += 1

            start_time = time.time()
            feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 1.0}

            pred, loss_value = sess.run([
                prediction,
                loss,
            ],
                                        feed_dict=feed_dict)

            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                sec_per_batch = float(duration)
                print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \
                     % (datetime.now(), step, loss_value,
                                FLAGS.batch_size/duration, sec_per_batch)

            predictions.extend(pred.tolist())
            labels.extend(batch_y.tolist())

        # print labels
        # print predictions
        acc = metrics.accuracy_score(labels, predictions)
        print 'acc:', acc * 100
import global_var as gvr
import global_fe as gfe
import model as mdl
import utly

print('\nStarting program...')
gvr.init()

val = utly.check()
if (val):
    print('\nDataset not found.')
    gfe.createDataset()
else:
    print('\nDataset found.')

mdl.classify()
print('\nProgram executed successfully.')
Beispiel #25
0
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''):
    print 'train() called'
    is_finetune = bool(ckptfile)
    batch_size = FLAGS.batch_size

    data_size = dataset_train.size()
    print 'training size:', data_size

    with tf.Graph().as_default():
        startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1])
        global_step = tf.Variable(startstep, trainable=False)

        image_, y_ = model.input()
        keep_prob_ = tf.placeholder('float32', name='keep_prob')
        phase_train_ = tf.placeholder(tf.bool, name='phase_train')

        logits = model.inference(image_, keep_prob_, phase_train_)
        prediction = model.classify(logits)
        loss, print_op = model.loss(logits, y_)
        train_op = model.train(loss, global_step, data_size)

        # build the summary operation based on the F colection of Summaries
        summary_op = tf.summary.merge_all()

        # must be after merge_all_summaries
        validation_loss = tf.placeholder('float32',
                                         shape=(),
                                         name='validation_loss')
        validation_summary = tf.summary.scalar('validation_loss',
                                               validation_loss)
        validation_acc = tf.placeholder('float32',
                                        shape=(),
                                        name='validation_accuracy')
        validation_acc_summary = tf.summary.scalar('validation_accuracy',
                                                   validation_acc)

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)

        init_op = tf.initialize_all_variables()

        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        # sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement,
        # gpu_options=gpu_options))

        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        if is_finetune:
            saver.restore(sess, ckptfile)
            print 'restore variables done'
        elif caffemodel:
            sess.run(init_op)
            model.load_alexnet(sess, caffemodel)
            print 'loaded pretrained caffemodel:', caffemodel
        else:
            # from scratch
            sess.run(init_op)
            print 'init_op done'

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=sess.graph)

        step = startstep
        for epoch in xrange(100):
            print 'epoch:', epoch

            dataset_train.shuffle()
            # dataset_val.shuffle()

            for batch_x, batch_y in dataset_train.batches(batch_size):
                # print batch_x_v[0,0,:]
                # print batch_y

                if step >= FLAGS.max_steps:
                    break
                step += 1

                start_time = time.time()
                feed_dict = {
                    image_: batch_x,
                    y_: batch_y,
                    keep_prob_: 0.5,
                    phase_train_: True
                }

                _, loss_value, logitsyo, _ = sess.run(
                    [train_op, loss, logits, print_op], feed_dict=feed_dict)

                # print batch_y
                # print logitsyo.max(), logitsyo.min()

                duration = time.time() - start_time

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                if step % 10 == 0 or step < 30:
                    sec_per_batch = float(duration)
                    print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \
                         % (datetime.now(), step, loss_value,
                                    FLAGS.batch_size/duration, sec_per_batch)

                # val
                if step % 100 == 0:  # and step > 0:
                    val_losses = []

                    val_logits = []
                    predictions = np.array([])
                    val_y = []
                    for val_step, (val_batch_x, val_batch_y) in \
                            enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)):
                        # enumerate(dataset_val.batches(batch_size)):
                        val_feed_dict = {
                            image_: val_batch_x,
                            y_: val_batch_y,
                            keep_prob_: 1.0,
                            phase_train_: False
                        }
                        val_loss, pred, val_logit, _ = sess.run(
                            [loss, prediction, logits, print_op],
                            feed_dict=val_feed_dict)

                        val_losses.append(val_loss)
                        val_logits.extend(val_logit.tolist())
                        predictions = np.hstack((predictions, pred))
                        val_y.extend(val_batch_y)

                    val_logits = np.array(val_logits)
                    # print val_logits
                    # print val_y
                    # print predictions
                    # print val_logits[0].tolist()

                    # val_logits.dump('val_logits.npy')
                    # predictions.dump('predictions.npy')
                    # np.array(val_y).dump('val_y.npy')

                    val_loss = np.mean(val_losses)
                    acc = metrics.accuracy_score(val_y[:predictions.size],
                                                 np.array(predictions))
                    print '%s: step %d, validation loss=%.4f, acc=%f' %\
                            (datetime.now(), step, val_loss, acc*100.)

                    # validation summary
                    val_loss_summ = sess.run(
                        validation_summary,
                        feed_dict={validation_loss: val_loss})
                    val_acc_summ = sess.run(validation_acc_summary,
                                            feed_dict={validation_acc: acc})
                    summary_writer.add_summary(val_loss_summ, step)
                    summary_writer.add_summary(val_acc_summ, step)
                    summary_writer.flush()

                if step % 100 == 0:
                    # print 'running f*****g summary'
                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

                if step % 200  == 0 or (step+1) == FLAGS.max_steps \
                        and step > startstep:
                    checkpoint_path = os.path.join(FLAGS.train_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
import json

import model

training_data_path = 'train.json'
test_data_path = 'test.json'

with open(training_data_path) as training_data_file:
    training_data = json.loads(training_data_file.read())

with open(test_data_path) as test_data_file:
    test_data = json.loads(test_data_file.read())

prior, condprob, vocab = model.train(training_data)

corrects = 0
total = len(test_data)

for doc in test_data:
    category = doc.pop('category')
    predicted_category = model.classify(prior, condprob, vocab, doc)
    if category == predicted_category:
        corrects += 1

accuracy = corrects / total

print('Accuracy: {:.3f}'.format(accuracy))
Beispiel #27
0
    Y = [data[k]['labels'] for k in data.keys()]
    Y = encode_label_hierarchical(Y, all_labels)

    train = indices[:int(N * 0.5)]  # 70% data used for training
    test = indices[int(N * 0.5):int(N * 0.8)]  # 20% data used for testing
    val = indices[int(N * 0.8):]  # 10% data used for validation

    train = data_set(X[train, :], Y[train, :], A)
    test = data_set(X[test, :], Y[test, :], A)
    val = data_set(X[val, :], Y[val, :], A)

    return train, test, val


labeled_patent_data, unlabeled_patent_data = load_data_small(10000)
label_list = [
    labeled_patent_data[k]['labels'] for k in labeled_patent_data.keys()
]
sections, classes, subclasses = get_all_labels(
    label_list)  # returns A, B, .. | A01, A02, ..| A01B, A01C, ..
data = extract_features(labeled_patent_data, extractor="tfidf+glove", K=300)

A = construct_adjacency_matrix(sections, classes, subclasses)

all_labels = sections + classes + subclasses
train, test, val = construct_train_test_val_datasets(data, all_labels, A)

results = classify(
    train, test, val, 300,
    [len(sections), len(classes), len(subclasses)])  # epoch num = 500