def index(): photo = request.files["file"] in_memory_file = BytesIO() photo.save(in_memory_file) data = np.fromstring(in_memory_file.getvalue(), dtype=np.uint8) label = classify(cv2.imdecode(data, 1)) return {"label": str(label)}
def main(): accountId = sys.argv[1] # account_list = scrap.get_following(accountId) # scrap.twint_loop(accountId, account_list, 20) # print(accountId) DATA_DIR = Path(f"./data/{scrap.name_cleaning(accountId)}") json_files = [ pos_json for pos_json in os.listdir(DATA_DIR) if pos_json.endswith('.json') ] df_list = [] for file_name in json_files: temp_df = pd.read_json(DATA_DIR / file_name, lines=True) df_list.append(temp_df) df = pd.concat(df_list, sort=False) new_df = df.sort_values(by='created_at', ascending=False) # cut_df = new_df.head(50) unclassified = new_df[["link", "created_at", "username", "tweet"]] # unclassified = new_df[["link", "created_at", "username", "tweet"]].to_json(orient="records") classified = [] count = 0 test_set, train_set = model.make_sets() NBclassifier = model.train(test_set, train_set) for index, row in unclassified.iterrows(): if count == 20: break if model.classify(row['tweet'], NBclassifier): data = {} data['tweet'] = row['tweet'] data['created_at'] = row['created_at'].strftime( '%Y-%m-%d %H:%M:%S') data['link'] = row['link'] data['username'] = row['username'] # json_data = json.dumps(data) # # print(row) # break classified.append(data) count += 1 # print("{test: 'test'}") print(classified) # parsed = json.loads(result) # data = json.dumps(parsed, indent=2) # print (data) pass
def classify_type(): try: sepal_len = request.args.get('slen') # Get parameters for sepal length sepal_wid = request.args.get('swid') # Get parameters for sepal width petal_len = request.args.get('plen') # Get parameters for petal length petal_wid = request.args.get('pwid') # Get parameters for petal width # Get the output from the classification model variety = model.classify(sepal_len, sepal_wid, petal_len, petal_wid) # Render the output in new HTML page return render_template('output.html', variety=variety) except: return 'Error'
def main(): """ Loads an existing model, opens audio input stream, classifies input """ args = parse_args() print('Audio stream classifier') print("Restoring model: ", args.model) mdl = model.restore(args.model) if mdl is None: print("Can't classify data without an existing model.") return print("Opening audio input..") audio = pyaudio.PyAudio() stream = audio.open(format=pyaudio.paFloat32, channels=1, rate=args.sample_rate, input=True, frames_per_buffer=args.frame_size) label_a = label_b = "" if args.labels is not None: label_a = args.labels[0] label_b = args.labels[1] while True: # Peel off [frame_size] bytes from the audio stream stream_data = stream.read(args.frame_size) # Unpack the binary stream and expand data = struct.unpack("%df" % args.frame_size, stream_data) data = np.expand_dims([data], axis=2) avg = model.classify(mdl, data) steps = 20 a = int(math.ceil(avg * steps)) b = steps - a print(label_a + " [" + ("." * a) + "|" + ("." * b) + "] " + label_b + " - " + str(avg), end='\r')
def test(dataset, config): print('test() called') weights = config.weights V = config.num_views batch_size = config.batch_size ckptfile = os.path.join(config.log_dir, config.snapshot_prefix + str(weights)) data_size = dataset.size() print('dataset size:', data_size) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) view_ = tf.placeholder('float32', shape=(None, V, 227, 227, 3), name='im0') y_ = tf.placeholder('int64', shape=(None), name='y') keep_prob_ = tf.placeholder('float32') fc8 = model.inference_multiview(view_, config.num_classes, keep_prob_) loss = model.loss(fc8, y_) #train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) placeholders = [view_, y_, keep_prob_, prediction, loss] saver = tf.train.Saver(tf.all_variables()) init_op = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) saver.restore(sess, ckptfile) print('restore variables done') print("Start testing") print("Size:", data_size) print("It'll take", int(math.ceil(data_size / batch_size)), "iterations.") acc, _, predictions, labels = _test(dataset, config, sess, placeholders) print('acc:', acc * 100) import Evaluation_tools as et eval_file = os.path.join(config.log_dir, '{}.txt'.format(config.name)) et.write_eval_file(config.data, eval_file, predictions, labels, config.name) et.make_matrix(config.data, eval_file, config.log_dir)
def classsification(file_dir): X = util.preprocessing(file_dir) y_pred = classify(X) class0 = 0 class1 = 0 class2 = 0 for k in y_pred: if k == 0: class0 += 1 elif k == 1: class1 += 1 elif k == 2: class2 += 1 return class0, class1, class2
def test(dataset_test, model_path): batch_size = FLAGS.batch_size # Foward graph with tf.Graph().as_default(): images = tf.placeholder('float32', shape=(None, 227, 227, 3)) labels = tf.placeholder('int64', shape=(None)) logits = model.inference(images, 1., CLASS_NUMBER) loss = model.loss(logits, labels) prediction = model.classify(logits) saver = tf.train.Saver() #session for testing with tf.Session(config=config) as sess: if not model_path: saver.restore(sess, osp.join(model_dir ,'model_best')) else: saver.restore(sess, model_path) accuracy_sum = [] isnextepoch = False step = 0 loss_value_sum = [] while not isnextepoch: batch_x, batch_y, isnextepoch = dataset_test.sample(batch_size) step += len(batch_x) feed_dict = {images: batch_x, labels: batch_y} l, pred, loss_value= sess.run([logits, prediction, loss], feed_dict=feed_dict) loss_value_sum.append(loss_value) #IPython.embed() accuracy = 0. for index in xrange(len(pred)): if pred[index] == batch_y[index]: accuracy += 1 accuracy /= len(pred) accuracy_sum.append(accuracy) sys.stdout.write("\r{:7d}/{}".format(step, len(dataset_test))) sys.stdout.flush() print('\nTest loss: {},Accuarcy: {}'.format(np.mean(loss_value_sum), np.mean(accuracy_sum)) )
def inference(input_image, m_encoded_test_cand_keys, m_encoded_test_cand_values, m_label_test_cand): """Constructs inference graph.""" processed_input_image = input_data.parse_function_test(input_image) _, encoded_query, _ = model.cnn_encoder( processed_input_image, reuse=False, is_training=False) weighted_encoded_test, weight_coefs_test = model.relational_attention( encoded_query, tf.constant(m_encoded_test_cand_keys), tf.constant(m_encoded_test_cand_values), reuse=False) _, prediction_weighted_test = model.classify( weighted_encoded_test, reuse=False) predicted_class = tf.argmax(prediction_weighted_test, axis=1) expl_per_class = tf.py_func( utils.class_explainability, (tf.constant(m_label_test_cand), weight_coefs_test), tf.float32) confidence = tf.reduce_max(expl_per_class, axis=1) return predicted_class, confidence, weight_coefs_test
def evaluateModel(model, sample_set, filename=None, class_list=[]): N = 0 N_wrong = 0 least_certain = [] header = "\t".join(class_list) if not filename == None: f = open(filename, "w") print("%s\tklasa" % header, file=f) for sample in sample_set: N += 1 sample_class = sample[-1] sample = numpy.array(sample[:-1]) out_class, probability, probs = model.classify(sample) if not filename == None: buff = "" for clas in class_list: buff += "%.2lf\t" % probs[clas] buff += out_class print("%s" % buff, file=f) least_certain.append((probability, buff)) if not sample_class == out_class: N_wrong += 1 if filename == "../output/opceniti.dat": least_certain = sorted(least_certain, key=lambda tup:tup[0]) least_certain = least_certain[:5] f = open("../output/nejednoznacne.dat", "w") print("%s\tklasa" % header, file=f) for (x, y) in least_certain: print("%s" % y, file=f) return N_wrong / float(N)
def process_data(data, frame_size, mdl, stream): """ Processes a chunk of data from the stream, returns the average classification Optionally outputs to the debug stream if provided :param data: the data to classify :param frame_size: the size of the data (in audio samples) :param mdl: the previously trained model :param stream: optional output stream for debugging :return: the classification """ unpacked = list(struct.unpack("%dh" % frame_size, data)) # Unpack the first 2000 16bit samples inverse_short_max = 3.0517578125e-5 data = [x * inverse_short_max for x in unpacked] if stream is not None: packed = struct.pack("%df" % frame_size, *data) stream.write(packed) data = np.expand_dims([data], axis=2) average = model.classify(mdl, data) return average
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print('train() called') is_finetune = bool(ckptfile) V = g_.NUM_VIEWS batch_size = FLAGS.batch_size dataset_train.shuffle() dataset_val.shuffle() data_size = dataset_train.size() print('training size:', data_size) with tf.Graph().as_default(): startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) # placeholders for graph input view_ = tf.placeholder('float32', shape=(None, V, 227, 227, 3), name='im0') y_ = tf.placeholder('int64', shape=(None), name='y') keep_prob_ = tf.placeholder('float32') # graph outputs fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_) loss = model.loss(fc8, y_) train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) # build the summary operation based on the F collection of Summaries summary_op = tf.summary.merge_all() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.summary.scalar('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.global_variables_initializer() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) if is_finetune: # load checkpoint file saver.restore(sess, ckptfile) print('restore variables done') elif caffemodel: # load caffemodel generated with caffe-tensorflow sess.run(init_op) model.load_alexnet_to_mvcnn(sess, caffemodel) print('loaded pretrained caffemodel:', caffemodel) else: # from scratch sess.run(init_op) print('init_op done') summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in range(100): print('epoch:', epoch) for batch_x, batch_y in dataset_train.batches(batch_size): step += 1 start_time = time.time() feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 0.5} _, pred, loss_value = sess.run([train_op, prediction, loss], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # print training information if step % 10 == 0 or step - startstep <= 30: sec_per_batch = float(duration) print( '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' % (datetime.now(), step, loss_value, FLAGS.batch_size / duration, sec_per_batch)) # validation if step % g_.VAL_PERIOD == 0: # and step > 0: val_losses = [] predictions = np.array([]) val_y = [] for val_step, (val_batch_x, val_batch_y) in \ enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)): val_feed_dict = { view_: val_batch_x, y_: val_batch_y, keep_prob_: 1.0 } val_loss, pred = sess.run([loss, prediction], feed_dict=val_feed_dict) val_losses.append(val_loss) predictions = np.hstack((predictions, pred)) val_y.extend(val_batch_y) val_loss = np.mean(val_losses) acc = metrics.accuracy_score(val_y[:predictions.size], np.array(predictions)) print('%s: step %d, validation loss=%.4f, acc=%f' % (datetime.now(), step, val_loss, acc * 100.)) # validation summary val_loss_summ = sess.run( validation_summary, feed_dict={validation_loss: val_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush() if step % 100 == 0: # print ('running summary') summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % g_.SAVE_PERIOD == 0 and step > startstep: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def test_classify(): filename = "./dog.png" image = tf.io.encode_base64(tf.io.read_file(filename)) assert classify(image, isb64=True)[1] == "vizsla"
def run_classifier(): value = request.args.get('value', default='*', type=str) return ml.classify(value)
def main(unused_argv): """Main function.""" # Load training and eval data - this portion can be modified if the data is # imported from other sources. (m_train_data, m_train_labels), (m_eval_data, m_eval_labels) = \ tf.keras.datasets.fashion_mnist.load_data() train_dataset = tf.data.Dataset.from_tensor_slices( (m_train_data, m_train_labels)) eval_dataset = tf.data.Dataset.from_tensor_slices( (m_eval_data, m_eval_labels)) train_dataset = train_dataset.map(input_data.parse_function_train) eval_dataset = eval_dataset.map(input_data.parse_function_eval) eval_batch_size = int( math.floor(len(m_eval_data) / FLAGS.batch_size) * FLAGS.batch_size) train_batch = train_dataset.repeat().batch(FLAGS.batch_size) train_cand = train_dataset.repeat().batch(FLAGS.example_cand_size) eval_cand = train_dataset.repeat().batch(FLAGS.eval_cand_size) eval_batch = eval_dataset.repeat().batch(eval_batch_size) iter_train = train_batch.make_initializable_iterator() iter_train_cand = train_cand.make_initializable_iterator() iter_eval_cand = eval_cand.make_initializable_iterator() iter_eval = eval_batch.make_initializable_iterator() image_batch, _, label_batch = iter_train.get_next() image_train_cand, _, _ = iter_train_cand.get_next() image_eval_cand, orig_image_eval_cand, label_eval_cand = iter_eval_cand.get_next( ) eval_batch, orig_eval_batch, eval_labels = iter_eval.get_next() # Model and loss definitions _, encoded_batch_queries, encoded_batch_values = model.cnn_encoder( image_batch, reuse=False, is_training=True) encoded_cand_keys, _, encoded_cand_values = model.cnn_encoder( image_train_cand, reuse=True, is_training=True) weighted_encoded_batch, weight_coefs_batch = model.relational_attention( encoded_batch_queries, encoded_cand_keys, encoded_cand_values, normalization=FLAGS.normalization) tf.summary.scalar( "Average max. coef. train", tf.reduce_mean(tf.reduce_max(weight_coefs_batch, axis=1))) # Sparsity regularization entropy_weights = tf.reduce_sum( -weight_coefs_batch * tf.log(FLAGS.epsilon_sparsity + weight_coefs_batch), axis=1) sparsity_loss = tf.reduce_mean(entropy_weights) - tf.log( FLAGS.epsilon_sparsity + tf.constant(FLAGS.example_cand_size, dtype=tf.float32)) tf.summary.scalar("Sparsity entropy loss", sparsity_loss) # Intermediate loss joint_encoded_batch = (1 - FLAGS.alpha_intermediate) * encoded_batch_values \ + FLAGS.alpha_intermediate * weighted_encoded_batch logits_joint_batch, _ = model.classify(joint_encoded_batch, reuse=False) softmax_joint_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_joint_batch, labels=label_batch)) # Self loss logits_orig_batch, _ = model.classify(encoded_batch_values, reuse=True) softmax_orig_key_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_orig_batch, labels=label_batch)) # Prototype combination loss logits_weighted_batch, _ = model.classify(weighted_encoded_batch, reuse=True) softmax_weighted_op = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits_weighted_batch, labels=label_batch)) train_loss_op = softmax_orig_key_op + softmax_weighted_op + \ softmax_joint_op + FLAGS.sparsity_weight * sparsity_loss tf.summary.scalar("Total loss", train_loss_op) global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(FLAGS.init_learning_rate, global_step=global_step, decay_steps=FLAGS.decay_every, decay_rate=FLAGS.decay_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) tf.summary.scalar("Learning rate", learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gvs = optimizer.compute_gradients(train_loss_op) capped_gvs = [(tf.clip_by_value(grad, -FLAGS.gradient_thresh, FLAGS.gradient_thresh), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) # Evaluate model # Process sequentially to avoid out-of-memory. i = tf.constant(0) encoded_cand_keys_val = tf.zeros([0, FLAGS.attention_dim]) encoded_cand_queries_val = tf.zeros([0, FLAGS.attention_dim]) encoded_cand_values_val = tf.zeros([0, FLAGS.val_dim]) def cond(i, unused_l1, unused_l2, unused_l3): return i < int( math.ceil(FLAGS.eval_cand_size / FLAGS.example_cand_size)) def body(i, encoded_cand_keys_val, encoded_cand_queries_val, encoded_cand_values_val): """Loop body.""" temp = image_eval_cand[i * FLAGS.example_cand_size:(i + 1) * FLAGS.example_cand_size, :, :, :] temp_keys, temp_queries, temp_values = model.cnn_encoder( temp, reuse=True, is_training=False) encoded_cand_keys_val = tf.concat([encoded_cand_keys_val, temp_keys], 0) encoded_cand_queries_val = tf.concat( [encoded_cand_queries_val, temp_queries], 0) encoded_cand_values_val = tf.concat( [encoded_cand_values_val, temp_values], 0) return i+1, encoded_cand_keys_val, encoded_cand_queries_val, \ encoded_cand_values_val _, encoded_cand_keys_val, encoded_cand_queries_val, \ encoded_cand_values_val, = tf.while_loop( cond, body, [i, encoded_cand_keys_val, encoded_cand_queries_val, encoded_cand_values_val], shape_invariants=[ i.get_shape(), tf.TensorShape([None, FLAGS.attention_dim]), tf.TensorShape([None, FLAGS.attention_dim]), tf.TensorShape([None, FLAGS.val_dim])]) j = tf.constant(0) encoded_val_keys = tf.zeros([0, FLAGS.attention_dim]) encoded_val_queries = tf.zeros([0, FLAGS.attention_dim]) encoded_val_values = tf.zeros([0, FLAGS.val_dim]) def cond2(j, unused_j1, unused_j2, unused_j3): return j < int(math.ceil(eval_batch_size / FLAGS.batch_size)) def body2(j, encoded_val_keys, encoded_val_queries, encoded_val_values): """Loop body.""" temp = eval_batch[j * FLAGS.batch_size:(j + 1) * FLAGS.batch_size, :, :, :] temp_keys, temp_queries, temp_values = model.cnn_encoder( temp, reuse=True, is_training=False) encoded_val_keys = tf.concat([encoded_val_keys, temp_keys], 0) encoded_val_queries = tf.concat([encoded_val_queries, temp_queries], 0) encoded_val_values = tf.concat([encoded_val_values, temp_values], 0) return j + 1, encoded_val_keys, encoded_val_queries, encoded_val_values _, encoded_val_keys, encoded_val_queries, \ encoded_val_values = tf.while_loop( cond2, body2, [ j, encoded_val_keys, encoded_val_queries, encoded_val_values], shape_invariants=[ j.get_shape(), tf.TensorShape([None, FLAGS.attention_dim]), tf.TensorShape([None, FLAGS.attention_dim]), tf.TensorShape([None, FLAGS.val_dim])]) weighted_encoded_val, weight_coefs_val = model.relational_attention( encoded_val_queries, encoded_cand_keys_val, encoded_cand_values_val, normalization=FLAGS.normalization) # Coefficient distribution tf.summary.scalar("Average max. coefficient val", tf.reduce_mean(tf.reduce_max(weight_coefs_val, axis=1))) # Analysis of median number of prototypes above a certain # confidence threshold. sorted_weights = tf.contrib.framework.sort(weight_coefs_val, direction="DESCENDING") cum_sorted_weights = tf.cumsum(sorted_weights, axis=1) for threshold in [0.5, 0.9, 0.95]: num_examples_thresh = tf.shape(sorted_weights)[1] + 1 - tf.reduce_sum( tf.cast(cum_sorted_weights > threshold, tf.int32), axis=1) tf.summary.histogram( "Number of samples for explainability above " + str(threshold), num_examples_thresh) tf.summary.scalar( "Median number of samples for explainability above " + str(threshold), tf.contrib.distributions.percentile(num_examples_thresh, q=50)) expl_per_class = tf.py_func(utils.class_explainability, (label_eval_cand, weight_coefs_val), tf.float32) max_expl = tf.reduce_max(expl_per_class, axis=1) tf.summary.histogram("Maximum per-class explainability", max_expl) _, prediction_val = model.classify(encoded_val_values, reuse=True) _, prediction_weighted_val = model.classify(weighted_encoded_val, reuse=True) val_eq_op = tf.equal(tf.cast(tf.argmax(prediction_val, 1), dtype=tf.int32), eval_labels) val_acc_op = tf.reduce_mean(tf.cast(val_eq_op, dtype=tf.float32)) tf.summary.scalar("Val accuracy input query", val_acc_op) val_weighted_eq_op = tf.equal( tf.cast(tf.argmax(prediction_weighted_val, 1), dtype=tf.int32), eval_labels) val_weighted_acc_op = tf.reduce_mean( tf.cast(val_weighted_eq_op, dtype=tf.float32)) tf.summary.scalar("Val accuracy weighted prototypes", val_weighted_acc_op) conf_wrong = tf.reduce_mean( (1 - tf.cast(val_weighted_eq_op, tf.float32)) * max_expl) tf.summary.scalar("Val average confidence of wrong decisions", conf_wrong) conf_right = tf.reduce_mean( tf.cast(val_weighted_eq_op, tf.float32) * max_expl) tf.summary.scalar("Val average confidence of right decisions", conf_right) # Confidence-controlled prediction for ti in [0.5, 0.8, 0.9, 0.95, 0.99, 0.999]: mask = tf.cast(tf.greater(max_expl, ti), tf.float32) acc_tot = tf.reduce_sum(tf.cast(val_weighted_eq_op, tf.float32) * mask) conf_tot = tf.reduce_sum(mask) tf.summary.scalar("Val accurate ratio for confidence above " + str(ti), acc_tot / conf_tot) tf.summary.scalar("Val total ratio for confidence above " + str(ti), conf_tot / eval_batch_size) # Visualization of example images and corresponding prototypes for image_ind in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: tf.summary.image( "Input image " + str(image_ind), tf.expand_dims(orig_eval_batch[image_ind, :, :, :], 0)) mask = tf.greater(weight_coefs_val[image_ind, :], 0.05) mask = tf.squeeze(mask) mask.set_shape([None]) relational_attention_images = tf.boolean_mask(orig_image_eval_cand, mask, axis=0) relational_attention_weight_coefs = tf.boolean_mask(tf.squeeze( weight_coefs_val[image_ind, :]), mask, axis=0) annotated_images = utils.tf_put_text( relational_attention_images, relational_attention_weight_coefs) tf.summary.image("Prototype images for image " + str(image_ind), annotated_images) # Training setup init = (tf.global_variables_initializer(), tf.local_variables_initializer()) saver_all = tf.train.Saver() summaries = tf.summary.merge_all() with tf.Session() as sess: summary_writer = tf.summary.FileWriter("./tflog/" + model_name, sess.graph) sess.run(init) sess.run(iter_train.initializer) sess.run(iter_train_cand.initializer) sess.run(iter_eval_cand.initializer) sess.run(iter_eval.initializer) for step in range(1, FLAGS.num_steps): if step % FLAGS.display_step == 0: _, train_loss = sess.run([train_op, train_loss_op]) print("Step " + str(step) + " , Training loss = " + "{:.4f}".format(train_loss)) else: sess.run(train_op) if step % FLAGS.val_step == 0: val_acc, merged_summary = sess.run( [val_weighted_acc_op, summaries]) print("Step " + str(step) + " , Val Accuracy = " + "{:.4f}".format(val_acc)) summary_writer.add_summary(merged_summary, step) if step % FLAGS.save_step == 0: saver_all.save(sess, checkpoint_name)
import json import model training_data_path = 'data/train.json' test_data_path = 'data/validation.json' with open(training_data_path) as training_data_file: training_data = json.loads(training_data_file.read()) with open(test_data_path) as test_data_file: test_data = json.loads(test_data_file.read()) model.train(training_data) corrects = 0 total = len(test_data) for doc in test_data: category = doc.pop('category') predicted_category = model.classify(doc) if category == predicted_category: corrects += 1 accuracy = corrects / total print('Accuracy: {:.3f}'.format(accuracy))
preprocessor=preprocessor) cv_data = np.array_split(np.hstack((train_data, train_labels)), 5) cv_acc = 0 if max_param is None: cv_acc, max_param = cross_validate(cv_data, weights, update_weights, params, update_params) max_weights = train(train_data, train_labels, weights, update_weights, max_param, update_params, epochs=10) train_acc = classify(train_data, train_labels, max_weights) test_acc = classify(test_data, test_labels, max_weights) def predictor(row): label = np.sign(np.dot(row, max_weights)) if label == -1: label = 0 return label write_output('Logistic regression', max_param, cv_acc, train_acc, test_acc) write_predictions('logreg', predictor, n_features=n_features, neg_labels=True, bias=True,
import boto3 import model access_key = "AKIAIZHM4QZ2PYAPVE6Q" access_secret = "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" region = "us-east-1" return_queue = "https://sqs.us-east-1.amazonaws.com/083630338242/DeepSightReturns" sqs = boto3.client('sqs', aws_access_key_id=access_key, aws_secret_access_key=access_secret, region_name=region) classification = model.classify() response = sqs.send_message(QueueUrl=return_queue, MessageBody=classification)
def analyze(): img_data = request.files.get('imfile', '') img = img_data.read() prediction = ml.classify(img) disp_im = b64encode(img).decode("utf-8") return render_template("predictor.html", result=prediction, file=disp_im)
def train(dataset_train, dataset_test, caffemodel=''): print('train() called') V = config.num_views batch_size = config.batch_size dataset_train.shuffle() data_size = dataset_train.size() print('training size:', data_size) with tf.Graph().as_default(): with tf.device('/gpu:0'): tf_config = tf.ConfigProto(log_device_placement=False) tf_config.gpu_options.allow_growth = True tf_config.allow_soft_placement = True global_step = tf.Variable(0, trainable=False) # placeholders for graph input view_ = tf.placeholder('float32', shape=(None, V, 227, 227, 3), name='im0') y_ = tf.placeholder('int64', shape=(None), name='y') keep_prob_ = tf.placeholder('float32') # graph outputs fc8 = model.inference_multiview(view_, config.num_classes, keep_prob_) loss = model.loss(fc8, y_) train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) placeholders = [view_, y_, keep_prob_, prediction, loss] validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.global_variables_initializer() sess = tf.Session(config=tf_config) weights = config.weights if weights == -1: startepoch = 0 if caffemodel: sess.run(init_op) model.load_alexnet_to_mvcnn(sess, caffemodel) print('loaded pretrained caffemodel:', caffemodel) else: sess.run(init_op) print('init_op done') else: ld = config.log_dir startepoch = weights + 1 ckptfile = os.path.join(ld, config.snapshot_prefix + str(weights)) saver.restore(sess, ckptfile) print('restore variables done') total_seen = 0 total_correct = 0 total_loss = 0 step = 0 begin = startepoch end = config.max_epoch + startepoch for epoch in xrange(begin, end + 1): acc, eval_loss, predictions, labels = _test( dataset_test, config, sess, placeholders) print('epoch %d: step %d, validation loss=%.4f, acc=%f' % (epoch, step, eval_loss, acc * 100.)) LOSS_LOGGER.log(eval_loss, epoch, "eval_loss") ACC_LOGGER.log(acc, epoch, "eval_accuracy") ACC_LOGGER.save(config.log_dir) LOSS_LOGGER.save(config.log_dir) ACC_LOGGER.plot(dest=config.log_dir) LOSS_LOGGER.plot(dest=config.log_dir) for batch_x, batch_y in dataset_train.batches(batch_size): step += 1 feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 0.5} _, pred, loss_value = sess.run([ train_op, prediction, loss, ], feed_dict=feed_dict) total_loss += loss_value correct = np.sum(pred == batch_y) total_correct += correct total_seen += batch_size assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % max(config.train_log_frq / config.batch_size, 1) == 0: acc_ = total_correct / float(total_seen) ACC_LOGGER.log(acc_, epoch, "train_accuracy") loss_ = total_loss / float(total_seen / batch_size) LOSS_LOGGER.log(loss_, epoch, "train_loss") print('epoch %d step %d, loss=%.2f, acc=%.2f' % (epoch, step, loss_, acc_)) total_seen = 0 total_correct = 0 total_loss = 0 if epoch % config.save_period == 0 or epoch == end: checkpoint_path = os.path.join( config.log_dir, config.snapshot_prefix + str(epoch)) saver.save(sess, checkpoint_path)
synapse = json.load(data_file) synapse_0 = np.asarray(synapse['synapse0']) synapse_1 = np.asarray(synapse['synapse1']) print sys.argv if len(sys.argv) <= 1: print( "Error : Argument missing. Run like...\npython process.py train\npython process.py test\n" ) sys.exit() if sys.argv[1] == 'train': train(X, y, classes, words, hidden_neurons=10, alpha=0.021, epochs=50000, dropout=True, dropout_percent=0.2) if sys.argv[1] == 'test': with open('test-data.txt') as f: content = f.readlines() for item in content: sents = item.strip() predicted_cls = classify(sents, stemmer, classes, words, synapse_0, synapse_1, ERROR_THRESHOLD) print predicted_cls, sents
def train(cfg, dataset_train, dataset_val, ckptfile='', caffemodel=''): print ('train() called') is_finetune = bool(ckptfile) V = g_.NUM_VIEWS batch_size = FLAGS.batch_size # dataset_train.shuffle() # dataset_val.shuffle() data_size, num_batch = dataset_train.get_len() # data_size = len(dataset_train) # print ('train size:', data_size) data_size_test, num_batch_test = dataset_val.get_len() print ('train size:', data_size) print ('test size:', data_size_test) best_eval_acc = 0 with tf.Graph().as_default(): # startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) startstep = 0 global_step = tf.Variable(startstep, trainable=False) # placeholders for graph input view_ = tf.placeholder('float32', shape=(None, V, 224, 224, 3), name='im0') y_ = tf.placeholder('int64', shape=(None), name='y') is_training_pl = tf.placeholder(tf.bool, shape=()) bn_decay = get_bn_decay(startstep) # graph outputs fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, is_training_pl, bn_decay=bn_decay) loss = model.loss(fc8, y_) train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) # build the summary operation based on the F colection of Summaries summary_op = tf.summary.merge_all() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.summary.scalar('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) # tvars = tf.trainable_variables() # print (tvars) # print (tf.get_default_graph().as_graph_def()) saver = tf.train.Saver() init_op = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) if is_finetune: # load checkpoint file sess.run(init_op) optimistic_restore(sess, ckptfile) # saver.restore(sess, ckptfile) print ('restore variables done') elif caffemodel: # load caffemodel generated with caffe-tensorflow sess.run(init_op) model.load_alexnet_to_mvcnn(sess, caffemodel) print ('loaded pretrained caffemodel:', caffemodel) else: # from scratch sess.run(init_op) print ('init_op done') summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in range(100): total_correct_mv = 0 loss_sum_mv = 0 total_seen = 0 val_correct_sum = 0 val_seen = 0 loss_val_sum = 0 print ('epoch:', epoch) for i in range(num_batch): # st = time.time() batch_x, batch_y = dataset_train.get_batch(i) # print (time.time()-st) step += 1 start_time = time.time() feed_dict = {view_: batch_x, y_ : batch_y, is_training_pl: True } _, pred, loss_value = sess.run( [train_op, prediction, loss,], feed_dict=feed_dict) duration = time.time() - start_time correct_mv = np.sum(pred == batch_y) total_correct_mv += correct_mv total_seen += g_.BATCH_SIZE loss_sum_mv += (loss_value * g_.BATCH_SIZE) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # print training information if step % 500 == 0 : # print (pred) # print (batch_y) sec_per_batch = float(duration) print ('%s: step %d, loss=%.2f, acc=%.4f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_sum_mv / float(total_seen), total_correct_mv / float(total_seen), FLAGS.batch_size/duration, sec_per_batch)) # for i in range(num_batch_test): # val_batch_x, val_batch_y = dataset_val.get_batch(i) # val_feed_dict = {view_: val_batch_x, # y_: val_batch_y, # is_training_pl: False} # val_loss, pred = sess.run([loss, prediction], feed_dict=val_feed_dict) # # correct_mv_val = np.sum(pred == val_batch_y) # val_correct_sum += correct_mv_val # val_seen += g_.BATCH_SIZE # loss_val_sum += (val_loss * g_.BATCH_SIZE) # # if i == 10: # print (pred) # print (val_batch_y) # print ('val loss=%.4f, acc=%.4f' % ((loss_val_sum / float(val_seen)), (val_correct_sum / float(val_seen)))) if step % 1000 == 0: # print 'running summary' summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # validation # val_losses = [] # predictions = np.array([]) # val_y = [] for i in range(num_batch_test): val_batch_x, val_batch_y = dataset_val.get_batch(i) val_feed_dict = {view_: val_batch_x, y_ : val_batch_y, is_training_pl: False } val_loss, pred = sess.run([loss, prediction], feed_dict=val_feed_dict) correct_mv_val = np.sum(pred == val_batch_y) val_correct_sum += correct_mv_val val_seen += g_.BATCH_SIZE loss_val_sum += (val_loss * g_.BATCH_SIZE) val_mean_loss = (loss_val_sum / float(val_seen)) acc = (val_correct_sum / float(val_seen)) if acc > best_eval_acc: best_eval_acc = acc checkpoint_path = os.path.join(cfg.ckpt_folder, 'best_model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print ('%s: epoch %d, validation loss=%.4f, acc=%f, best_acc=%f' %\ (datetime.now(), epoch, val_mean_loss, acc, best_eval_acc)) # validation summary val_loss_summ = sess.run(validation_summary, feed_dict={validation_loss: val_mean_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush()
def test(dataset, ckptfile): print 'train() called' batch_size = FLAGS.batch_size data_size = dataset.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.merge_all_summaries() saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if FLAGS.caffemodel: caffemodel = FLAGS.caffemodel # sess.run(init_op) model.load_model(sess, caffemodel, fc8=True) print 'loaded pretrained caffemodel:', caffemodel else: saver.restore(sess, ckptfile) print 'restore variables done' summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) step = startstep predictions = [] labels = [] for batch_x, batch_y in dataset.batches(batch_size): if step >= FLAGS.max_steps: break step += 1 if step == 1: img = batch_x[0, ...] cv2.imwrite('img0.jpg', img) start_time = time.time() feed_dict = {image_: batch_x, y_: batch_y, keep_prob_: 1.0} pred, loss_value = sess.run([ prediction, loss, ], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) predictions.extend(pred.tolist()) labels.extend(batch_y.tolist()) # print pred # print batch_y print labels print predictions acc = metrics.accuracy_score(labels, predictions) print 'acc:', acc * 100
def test(dataset, ckptfile): print 'test() called' V = g_.NUM_VIEWS batch_size = FLAGS.batch_size data_size = dataset.size() print 'dataset size:', data_size with tf.Graph().as_default(): startstep = 0 global_step = tf.Variable(startstep, trainable=False) view_ = tf.placeholder('float32', shape=(None, V, 227, 227, 3), name='im0') y_ = tf.placeholder('int64', shape=(None), name='y') keep_prob_ = tf.placeholder('float32') fc8 = model.inference_multiview(view_, g_.NUM_CLASSES, keep_prob_) loss = model.loss(fc8, y_) train_op = model.train(loss, global_step, data_size) prediction = model.classify(fc8) # build the summary operation based on the F colection of Summaries summary_op = tf.merge_all_summaries() saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) saver.restore(sess, ckptfile) print 'restore variables done' summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=sess.graph) step = startstep predictions = [] labels = [] print "Start testing" print "Size:", data_size print "It'll take", int(math.ceil(data_size / batch_size)), "iterations." for batch_x, batch_y in dataset.batches(batch_size): step += 1 start_time = time.time() feed_dict = {view_: batch_x, y_: batch_y, keep_prob_: 1.0} pred, loss_value = sess.run([ prediction, loss, ], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) predictions.extend(pred.tolist()) labels.extend(batch_y.tolist()) # print labels # print predictions acc = metrics.accuracy_score(labels, predictions) print 'acc:', acc * 100
import global_var as gvr import global_fe as gfe import model as mdl import utly print('\nStarting program...') gvr.init() val = utly.check() if (val): print('\nDataset not found.') gfe.createDataset() else: print('\nDataset found.') mdl.classify() print('\nProgram executed successfully.')
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print 'train() called' is_finetune = bool(ckptfile) batch_size = FLAGS.batch_size data_size = dataset_train.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.summary.merge_all() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.summary.scalar('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement, # gpu_options=gpu_options)) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if is_finetune: saver.restore(sess, ckptfile) print 'restore variables done' elif caffemodel: sess.run(init_op) model.load_alexnet(sess, caffemodel) print 'loaded pretrained caffemodel:', caffemodel else: # from scratch sess.run(init_op) print 'init_op done' summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in xrange(100): print 'epoch:', epoch dataset_train.shuffle() # dataset_val.shuffle() for batch_x, batch_y in dataset_train.batches(batch_size): # print batch_x_v[0,0,:] # print batch_y if step >= FLAGS.max_steps: break step += 1 start_time = time.time() feed_dict = { image_: batch_x, y_: batch_y, keep_prob_: 0.5, phase_train_: True } _, loss_value, logitsyo, _ = sess.run( [train_op, loss, logits, print_op], feed_dict=feed_dict) # print batch_y # print logitsyo.max(), logitsyo.min() duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 10 == 0 or step < 30: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) # val if step % 100 == 0: # and step > 0: val_losses = [] val_logits = [] predictions = np.array([]) val_y = [] for val_step, (val_batch_x, val_batch_y) in \ enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)): # enumerate(dataset_val.batches(batch_size)): val_feed_dict = { image_: val_batch_x, y_: val_batch_y, keep_prob_: 1.0, phase_train_: False } val_loss, pred, val_logit, _ = sess.run( [loss, prediction, logits, print_op], feed_dict=val_feed_dict) val_losses.append(val_loss) val_logits.extend(val_logit.tolist()) predictions = np.hstack((predictions, pred)) val_y.extend(val_batch_y) val_logits = np.array(val_logits) # print val_logits # print val_y # print predictions # print val_logits[0].tolist() # val_logits.dump('val_logits.npy') # predictions.dump('predictions.npy') # np.array(val_y).dump('val_y.npy') val_loss = np.mean(val_losses) acc = metrics.accuracy_score(val_y[:predictions.size], np.array(predictions)) print '%s: step %d, validation loss=%.4f, acc=%f' %\ (datetime.now(), step, val_loss, acc*100.) # validation summary val_loss_summ = sess.run( validation_summary, feed_dict={validation_loss: val_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush() if step % 100 == 0: # print 'running f*****g summary' summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 200 == 0 or (step+1) == FLAGS.max_steps \ and step > startstep: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
import json import model training_data_path = 'train.json' test_data_path = 'test.json' with open(training_data_path) as training_data_file: training_data = json.loads(training_data_file.read()) with open(test_data_path) as test_data_file: test_data = json.loads(test_data_file.read()) prior, condprob, vocab = model.train(training_data) corrects = 0 total = len(test_data) for doc in test_data: category = doc.pop('category') predicted_category = model.classify(prior, condprob, vocab, doc) if category == predicted_category: corrects += 1 accuracy = corrects / total print('Accuracy: {:.3f}'.format(accuracy))
Y = [data[k]['labels'] for k in data.keys()] Y = encode_label_hierarchical(Y, all_labels) train = indices[:int(N * 0.5)] # 70% data used for training test = indices[int(N * 0.5):int(N * 0.8)] # 20% data used for testing val = indices[int(N * 0.8):] # 10% data used for validation train = data_set(X[train, :], Y[train, :], A) test = data_set(X[test, :], Y[test, :], A) val = data_set(X[val, :], Y[val, :], A) return train, test, val labeled_patent_data, unlabeled_patent_data = load_data_small(10000) label_list = [ labeled_patent_data[k]['labels'] for k in labeled_patent_data.keys() ] sections, classes, subclasses = get_all_labels( label_list) # returns A, B, .. | A01, A02, ..| A01B, A01C, .. data = extract_features(labeled_patent_data, extractor="tfidf+glove", K=300) A = construct_adjacency_matrix(sections, classes, subclasses) all_labels = sections + classes + subclasses train, test, val = construct_train_test_val_datasets(data, all_labels, A) results = classify( train, test, val, 300, [len(sections), len(classes), len(subclasses)]) # epoch num = 500