def prepare_data_for_modelling(train, sub): '''Create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.''' id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply( lambda x: x.split('_')[0]).value_counts(). \ reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'}) train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values, random_state=42, stratify=id_mask_count['count'], test_size=0.1) print("train_ids = ", train_ids) print(len(train_ids)) test_ids = sub['Image_Label'].apply( lambda x: x.split('_')[0]).drop_duplicates().values print("len(test_ids) = ", len(test_ids)) for i in range(0, 10): #image_name = '8242ba0.jpg' image_name = train_ids[i] image = helpers.get_img(image_name, input_path=input_path) mask = helpers.make_mask(train, image_name) helpers.visualize(image, mask)
def visualize_strong(self): image_hsv = cv2.cvtColor(self.image.copy(), cv2.COLOR_BGR2HSV) indices = np.where(1.03*image_hsv[:, :, 0] + 0.47 * image_hsv[:, :, 2] < image_hsv[:, :, 1]) t = self.image.copy() t[indices] = 0 h.visualize(original_image=self.image, strong=t, strong_with_closing=h.closing(t, channel=True)) strong_with_closing = h.closing(t, channel=True) area = np.count_nonzero(strong_with_closing == 0) return area, area/(np.size(self.image) // 3), indices
def visualize_weak(self): image_hsv = cv2.cvtColor(self.image.copy(), cv2.COLOR_BGR2HSV) indices = np.where(np.logical_and(image_hsv[:, :, 1] > 15 ,np.logical_and( image_hsv[:, :, 0] > 1.15*image_hsv[:, :, 1], image_hsv[:, :, 2] <= 215))) print(indices) t = self.image.copy() t[indices] = 0 h.visualize(original_image=self.image, weak=t, weak_with_closing=h.closing(t, channel=True)) weak_with_closing = h.closing(t, channel=True) area = np.count_nonzero(weak_with_closing == 0) return area, area/(np.size(self.image) // 3), indices
def object_detect(model_name, url_file, extension=EXTENSION, downloaded=DOWNLOADED, json_output_file=JSON_OUTPUT_FILE, n_threads=N_THREADS, visualize=VISUALIZE, path_to_labels=PATH_TO_LABELS): model_file = model_name + '.tar.gz' path_to_ckpt = model_name + '/frozen_inference_graph.pb' json_url_file = url_file + '.json' csv_url_file = url_file + '.csv' helpers.download_extract_model(model_file=model_file, downloaded=downloaded) if extension == '.json': helpers.json2csv(json_name=json_url_file, csv_name=csv_url_file) li = pd.read_csv(csv_url_file) url_li = li['img_url'].tolist() id_li = li['img_id'].tolist() detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(path_to_ckpt, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): im = helpers.url_image_reader(url_li) queue = tf.PaddingFIFOQueue(capacity=500, dtypes=tf.uint8, shapes=[(None, None, None)]) enq_op = queue.enqueue(im) inputs = queue.dequeue_many(BATCH_SIZE) qr = tf.train.QueueRunner(queue, [enq_op] * n_threads) with tf.Session(graph=detection_graph) as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() enqueue_threads = qr.create_threads(sess=sess, coord=coord, start=True) conversion_time = [] ix = 1 res = {} category_index = helpers.load_category_index(path_to_labels) t = time.time() try: while not coord.should_stop(): image = sess.run( inputs) # Tensor of dimension (1, None, None, 3) print('Processing Image', ix) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') t2 = time.time() (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image}) conversion_time.append(time.time() - t2) print('Image', ix, 'Processing Time:', conversion_time[ix - 1], 'sec') res[id_li[ix - 1]] = { 'boxes': np.squeeze(boxes), 'scores': np.squeeze(scores), 'classes': np.squeeze(classes).astype(np.int32), 'num_detections': num_detections } ix += 1 except tf.errors.OutOfRangeError: print('Total Image Processing Time:', sum(conversion_time), 'sec') print('Total Time Consumed:', time.time() - t, 'sec') finally: coord.request_stop() helpers.dict2json(res, json_output_file) if visualize: helpers.visualize(csv_url_file, res, category_index) coord.join(enqueue_threads) return res
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--input', help='Path to input file with contexts', required=True) arg('--model', help='Path to word2vec model', required=True) arg('--weights', dest='weights', action='store_true', help='Use word weights?') arg('--2stage', dest='twostage', action='store_true', help='2-stage clustering?') arg('--test', dest='testing', action='store_true', help='Make predictions for test file with no gold labels?') parser.set_defaults(testing=False) parser.set_defaults(twostage=False) parser.set_defaults(weights=False) args = parser.parse_args() modelfile = args.model if modelfile.endswith('.bin.gz'): # Word2vec binary format model = gensim.models.KeyedVectors.load_word2vec_format(modelfile, binary=True) elif modelfile.endswith('.vec.gz'): # Word2vec text format model = gensim.models.KeyedVectors.load_word2vec_format(modelfile, binary=False) elif 'fasttext' in modelfile and modelfile.endswith( 'model'): # fastText in Gensim native format model = gensim.models.fasttext.FastText.load(modelfile) else: # word2vec in Gensim native format model = gensim.models.KeyedVectors.load(modelfile) model.init_sims(replace=True) dataset = args.input # This combination of the Affinity Propagation parameters was best in our experiments. # But in your task they can be different! damping = 0.7 preference = -0.7 df = read_csv(dataset, sep="\t", encoding="utf-8") predicted = [] goldsenses = [] for query in df.word.unique(): print('Now analyzing', query, '...', file=sys.stderr) subset = df[df.word == query] if not args.testing: goldsenses.append(len(subset.gold_sense_id.unique())) contexts = [] matrix = np.empty((subset.shape[0], model.vector_size)) counter = 0 lengths = [] for line in subset.iterrows(): con = line[1].context identifier = line[1].context_id label = query + str(identifier) contexts.append(label) if type(con) == float: print('Empty context at', label, file=sys.stderr) fp = np.zeros(model.vector_size) else: bow = con.split() bow = [b for b in bow if b != query] fp = fingerprint(bow, model, weights=args.weights) lengths.append(len(bow)) matrix[counter, :] = fp counter += 1 clustering = AffinityPropagation(preference=preference, damping=damping, random_state=None).fit(matrix) # Two-stage clustering if args.twostage: nclusters = len(clustering.cluster_centers_indices_) if nclusters < 1: print('Fallback to 1 cluster!', file=sys.stderr) nclusters = 1 elif nclusters == len(contexts): print('Fallback to 4 clusters!', file=sys.stderr) nclusters = 4 clustering = SpectralClustering(n_clusters=nclusters, n_init=20, assign_labels='discretize', n_jobs=2).fit(matrix) # End two-stage clustering cur_predicted = clustering.labels_.tolist() predicted += cur_predicted if not args.testing: gold = subset.gold_sense_id print('Gold clusters:', len(set(gold)), file=sys.stderr) print('Predicted clusters:', len(set(cur_predicted)), file=sys.stderr) if args.testing: if len(set(cur_predicted)) < 12: visualize(contexts, matrix, cur_predicted, query) else: if len(set(gold)) < 6 and len(set(cur_predicted)) < 12: visualize(contexts, matrix, cur_predicted, query, gold) else: print('Too many clusters, not visualizing', file=sys.stderr) df.predict_sense_id = predicted fname = path.splitext(path.basename(args.input))[0] if args.testing: save(df, fname) else: res = evaluate(save(df, fname)) print('ARI:', res) print('Average number of senses:', np.average(goldsenses)) print('Variation of the number of senses:', np.std(goldsenses)) print('Minimum number of senses:', np.min(goldsenses)) print('Maximum number of senses:', np.max(goldsenses))
model.load_weights(model_path) if(args.timer==True): time_elapsed0 = (time.clock() - time_start) print("****** Loading the model takes","%.2f" % time_elapsed0,'s *******') #load the image image = cv2.imread(args.image_path) #Read image image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) #Turn image to RGB after imread reads it in BGR image = cv2.resize(image,(320,480)) image = np.expand_dims(image, axis=0) #Arrange the shape of the image preprocess_input = sm.get_preprocessing(args.backbone) #Normalize features of the image image = preprocess_input(image) #infer the segmentation print('Infering the avalanche...') time_start = time.clock() prediction = model.predict(image) if(args.timer==True): time_elapsed = (time.clock() - time_start) print("****** Infering the avalanche takes","%.2f" % time_elapsed,'s *******') if(args.timer==True): print('****** Total time',"%.2f" % (time_elapsed0+time_elapsed),'s ****** ') #Visualize the results pr_mask = prediction.round() visualize(image=denormalize(image.squeeze()),Prediction_mask=pr_mask[...,0].squeeze(),)