def k_means(dataset, k, features, center_range, scale): # K-means initialization centroids = gen_rand_centroids(k=k, features=features, center_range=center_range, scale=scale) # Generates k random centroids labeled_dataset = label(dataset, centroids, k, features) # K-means main loop points_in_cluster = np.zeros(k) # Total number of points in each cluster (after classification) features_sum = np.zeros((k, features)) # Partial sum of each feature, calculated for each cluster while True: prev_centroids = centroids # Keeps track of the previous centroids, enabling breaking the loop for i in range(0, len(labeled_dataset)): # Computes the centroid for each cluster points_in_cluster[int(labeled_dataset.loc[i, features])] += 1 # Counts the number of points in each cluster for j in range(0, features): # Sums the features of each point, for each cluster features_sum[int(labeled_dataset.loc[i, features]), j] += labeled_dataset.loc[i, j] for i in range(0, k): # For each cluster, calculates the new centroid for j in range(0, features): if points_in_cluster[i] == 0: # Avoids runtime warnings because of divisions by zero pass else: centroids[i, j] = features_sum[i, j] / points_in_cluster[i] labeled_dataset = label(labeled_dataset.drop(columns=[features]), centroids, k, features) # Re-assigns the points in function of the new centroids if (centroids == prev_centroids).all(): # Breaks the loop if centroids are not moving anymore break return labeled_dataset, centroids
def main_knife(args): '''Main function for jackknife with randoms.''' # make jackknife regions if args.bdf == '' and args.rand_lbed == '': print('====== Making jackknife regions ======') rand = miscfuncs.load_data_pd(args.rand, tp='knife') jk_map, jk_bounds = knife( rand, args.njr, args.nra, args.nside, args.rra) if args.fmap != '': miscfuncs.save_jk_map(jk_map, args.fmap) if args.plotmap: print('-- note: not labeled yet, just demo of regions') miscfuncs.plot_jk_map(jk_map, shuffle=args.sf, njr=args.njr) if args.fbounds != '': miscfuncs.save_jk_bounds(jk_bounds, args.fbounds) if args.tp == 'bounds': jkr = jk_bounds elif args.tp == 'map': jkr = jk_map else: print('>> Error: wrong tp option!') sys.exit() # load bounds file if provided if args.bdf != '' and args.rand_lbed == '': print('>> Loading bounds file: {}'.format(args.bdf)) jkr = np.loadtxt(args.bdf) # label data and random points if (args.lb == 1 or args.bdf != '') and args.rand_lbed == '': print('====== Labeling data points ======') data = miscfuncs.load_data_pd(args.data) label.label(data, jkr, tp=args.tp, f_data=args.fodata, jk0=args.jk0) print('====== Labeling random points ======') data = miscfuncs.load_data_pd(args.rand) label.label(data, jkr, tp=args.tp, f_data=args.forand, jk0=args.jk0) # analyze labeled random points if args.rand_lbed != '': rand = miscfuncs.load_data_pd(args.rand_lbed) miscfuncs.analyze_rand(rand, args.sf)
def run(): handler = parse() groups = label(handler) blocks = [block for block in handler.blocks if block.is_valid()] chars = [char for char in handler.chars if char.is_valid()] emojis = [emoji for emoji in handler.emojis if emoji.is_valid()] generate_sqlite(chars, emojis, blocks, groups)
def print_result(G, methode): t0 = time() if methode == 1: result = exhaustif(G) t1 = time() print("graphe : " + str(G)) print("methode exhaustive") print(result) print("calcul en " + str(t1 - t0) + " secondes") print("") elif methode == 2: result = parcours(G) t1 = time() print("graphe : " + str(G)) print("methode par parcours") print(result) print("calcul en " + str(t1 - t0) + " secondes") print("") elif methode == 3: result = label(G) t1 = time() print("graphe : " + str(G)) print("methode par label") print(result) print("calcul en " + str(t1 - t0) + " secondes") print("") else: print("methode =") print("1 : methode exhaustive") print("2 : methode par parcours") print("3 : methode par label") return 0
def __init__(self, cnn_model_path, source_path, target_path, vocab_path, sent_len, labeled_save_dir): """ :param cnn_model_path: Path to a trained cnn model. :param source_path: Path to instance data, the latter part of which will be labeled during active learning. :param target_path: Path to labels for already labeled part of the data. :param vocab_path: Path to vocab file. :param labeled_save_dir: Directory to which the labeled files will be stored. """ unlabeled_data = util.read_data_unlabeled_part(source_path, target_path, sent_len, shuffle=False) self.unlabeled_data = np.array(unlabeled_data) self.data_size = self.unlabeled_data.shape[0] self.labeled_data, self.labeled_result = util.read_data_labeled_part( source_path, target_path, sent_len, shuffle=False) sentence_indices_input = self.unlabeled_data[:, :-2] self.vocab_path = vocab_path _, rev_vocab = preprocessing_util.initialize_vocabulary(vocab_path) self.sentence_input = preprocessing_util.indices_to_sentences( sentence_indices_input, rev_vocab) self.kp_indices_input = self.unlabeled_data[:, -2:] for i, sentence in enumerate(self.sentence_input): # Label the key phrases of interest in the current sentence with *. sentence[self.kp_indices_input[i, 0]] += '*' sentence[self.kp_indices_input[i, 1]] += '*' self.update_labeled_save_dir(labeled_save_dir) # self.labeled_save_dir = labeled_save_dir # self.source_save_dir = os.path.join(labeled_save_dir, 'test_cs_unlabeled_data_combined.txt') # self.target_save_dir = os.path.join(labeled_save_dir, 'test_cs_labels_combined.txt') # self.vocab_save_dir = os.path.join(labeled_save_dir, 'test_cs_vocab_combined') label_config = util.load_from_dump( os.path.join(cnn_model_path, 'flags.cPickle')) label_config['train_dir'] = cnn_model_path _, predicted_label = label(self.unlabeled_data, config=label_config) assert predicted_label.shape[0] == self.data_size predicted_label_exp = np.exp(predicted_label) predicted_label_softmax = predicted_label_exp / np.sum( predicted_label_exp, axis=1, keepdims=True) # Entropy = -sum(p * log p) so this is actually the negative of entropy. For sorting purpose I took out the neg. predicted_label_entropy = np.sum(np.multiply( predicted_label_softmax, np.log(predicted_label_softmax)), axis=1) # The following are ways to rank what question should be asked first. # The first one uses entropy, but there might be some implementation errors. self.predicted_label_entropy_argsort = np.argsort( predicted_label_entropy, axis=0).tolist() pass
def loadSettingMenu(self): playerColor = 0 clock = pygame.time.Clock() fenetre = pygame.display.set_mode((self.WIDTH, self.HEIGHT)) back = pygame.image.load("background.jpg").convert() back2 = pygame.transform.scale(back, (int(Game.WIDTH), int(Game.HEIGHT))) back3 = sprite(0, 0, back2) menuSprites = list([back3], "sprites") Game.toDisplay.append(menuSprites) strColors = ["GREEN", "BLUE", "PURPLE", "RED", "YELLOW"] rgbColors = [Game.GREEN, Game.BLUE, Game.PURPLE, Game.RED, Game.YELLOW] buttons = list([], "buttons") for i in range(0, 5): b = button(Game.WIDTH / 6 * (i + 1) - Game.WIDTH_BUTTON / 2, Game.HEIGHT / 4, Game.WIDTH_BUTTON, Game.HEIGHT_BUTTON, rgbColors[i], strColors[i], 14) buttons.l.append(b) Game.toDisplay.append(buttons) menuLabel = list([], "labels") text = "Select a player!" lab = label((Game.WIDTH - len(text) * 26) / 2, Game.HEIGHT / 5 - 26, text, 52) menuLabel.l.append(lab) Game.toDisplay.append(menuLabel) loop = True while loop: self.updateDisplay(Game.toDisplay, fenetre) for event in pygame.event.get(): if event.type == pygame.QUIT: loop = False if event.type == pygame.MOUSEBUTTONDOWN: for li in Game.toDisplay: if li.name == "buttons": for b in li.l: if b.intersect(event.pos): playerColor = b.settings[4] loop = False return playerColor
def main(): if len(sys.argv) != 5: logging.info('please input args: car_path, road_path, cross_path, answerPath') exit(1) car_path = sys.argv[1] road_path = sys.argv[2] cross_path = sys.argv[3] answer_path = sys.argv[4] logging.info("car_path is %s" % (car_path)) logging.info("road_path is %s" % (road_path)) logging.info("cross_path is %s" % (cross_path)) logging.info("answer_path is %s" % (answer_path)) # to read input file car_dict, road_dict, cross_dict, car_ascending, cross_ascending = read(car_path, road_path, cross_path) P1, P2, total_lane_num = construct(car_dict, road_dict, cross_dict) # process ans, total_car_num, road_num = defaultdict(list), len(car_ascending), len(road_dict) best_balance, best_limit = 0, 0 min_time, dead_lock = 99999, 0 for balance in [0.5, ]: limit = 2000 for _ in range(1): car_dict_copy = copy.deepcopy(car_dict) road_dict_copy = copy.deepcopy(road_dict) car_ascending_copy = copy.deepcopy(car_ascending) time, arrival_num, temp_ans = 0, 0, defaultdict(list) while arrival_num < total_car_num: waiting_car_num = label(road_dict_copy, car_dict_copy, cross_dict, P1, P2, balance) dead_lock, schedule_arrival_num = \ schedule(cross_ascending, cross_dict, road_dict_copy, car_dict_copy, temp_ans, waiting_car_num, P1, P2, balance) arrival_num += schedule_arrival_num if dead_lock: print("dead_lock at time:", time) break remaining_car_num = total_car_num - arrival_num start(time, car_ascending_copy, car_dict_copy, cross_dict, road_dict_copy, temp_ans, remaining_car_num, P1, limit) time += 1 if time > min_time: break print("balance:", balance, "limit:", limit, "time:", time, "arrival_num:", arrival_num) if not dead_lock and time < min_time: best_limit = limit best_balance = balance min_time = time ans = copy.deepcopy(temp_ans) if dead_lock: limit -= 500 else: limit += 800 print("best_balance:",best_balance, "best_limit:",best_limit, "total_car_num:",total_car_num, "road_num:",road_num, "total_lane_num:",total_lane_num, "min_time:",min_time) # to write output file with open(answer_path, 'w') as ans_file: for car_id in ans: ans_file.write("(" + str(car_id) + ',' + ','.join(list(map(str, ans[car_id]))) + ")\n")
def display(self, fenetre): posX = self.settings[0] posY = self.settings[1] sizeX = self.settings[2] sizeY = self.settings[3] color = self.settings[4] text = self.settings[5] fontSize = self.settings[6] pygame.draw.rect(fenetre, color, [posX, posY, sizeX, sizeY]) self.label = label(posX + 5, int(posY + sizeY / 2 - fontSize / 2), text, fontSize) self.label.display(fenetre)
def loadInventorsKnowledges(self, firstNamePos, inventor, labelList): widthOffset = 15 currentThenTarget = [ inventor.currentKnowledge, inventor.targetKnowledge ] for i in range(0, 2): heightOffset = 0 for knowledge in currentThenTarget[i]: lab = label( firstNamePos[0] + i * widthOffset, firstNamePos[1] + (heightOffset + 4) * Game.ONELINE, str(knowledge), 9) labelList.l.append(lab) heightOffset += 1
def main(): arguments = docopt.docopt(__doc__) template_path = arguments['--templates'] path_kitti = arguments['--kitti'] drives = [] for date in os.listdir(path_kitti): if date.startswith('2011'): path_date = os.path.join(path_kitti, date) for drive in os.listdir(path_date): if drive.startswith('2011'): drive_path = os.path.join(path_date, drive, 'seg', 'data') if not os.path.exists(drive_path): print(' * Skipping (path does not exist)', drive_path) continue print(' * Labeling', drive_path) samples_path = os.path.join(drive_path, '*.npy') templates = load_data(template_path) samples = load_data(samples_path) labels = label(templates, samples) out_path = os.path.join('./out', date, drive, 'labels.npy') os.makedirs(os.path.dirname(out_path), exist_ok=True) np.save(out_path, labels)
#entrainement de la carte neurons = map(map_shape, data_shape, sigma_max_value, sigma_min_value, eta_max_value, eta_min_value, decay_start_iter, decay_stop_iter, training_samples, initial) #sauvegarde de la carte np.save(neurons_path, neurons) #=============================================================================== # Labellisation de la carte #=============================================================================== print("labelling") #labellisation de la carte neuron_labels = label(map_shape, data_shape, labelling_samples, labelling_labels, neurons) #sauvegarde das labels np.save(neuron_labels_path, neuron_labels) #=============================================================================== # Test des performances #=============================================================================== print("testing") #calcul des performances global_performance, own_performances = test(neurons, neuron_labels, testing_samples, testing_labels) #affichage des performances dans la console
def placeInventorsNames(self, position, inventorID, nameOffset, line, name): lab = label(position[0] + inventorID * nameOffset + 15, position[1] + 30 + line * Game.ONELINE, name, 9) return lab
#!/usr/bin/env python # coding=utf-8 import label # # label.label('pool1') # label.label('conv2') # label.label('pool2') # label.label('conv3') # label.label('conv4') label.label('conv5') # label.label('pool5') # label.label('fc6') # label.label('fc7') # label.label('fc8')
from mne.time_frequency import tfr_array_stockwell import label import load import featureExtraction path = '/Users/ishitachordia/Documents/Thomas_Agata_Research/GoodwinData/test/' study = 'Study0' ##README: there are four parts of this: loadData, labelData, featureExtraction, classification. #Look at stereotypyMain.m to see how Goodwin did it- we need to follow it exactly #preprocessedDataAndLabels is exactly the same as Goodwin. When I start back up, start with featureExtraction #Steps To Do: #1. You load the Hd.mat in python and use it to filter the preprocessedData #2. Figure out how to do Stockwell transform #3. Classify then using what they did + Neural nets ##When I start back up, you can run matlab code by going on https://mycloud.gatech.edu/Citrix/GTMyCloudWeb/ ##Documentation: https://docs.google.com/document/d/12cjQ6QPVeTjPgOZZtoWGJ0Wqh9KEk20LOLi3qEW17D4/edit# ##How accelerometer data works: http://stackoverflow.com/questions/5871429/accelerometer-data-how-to-interpret if __name__ == '__main__': for studyType in os.listdir(path): study = studyType if (studyType != '.DS_Store'): for session in os.listdir(path + study): if (session != '.DS_Store'): rawData, rawAnnotation = load.load(session, study, path) preprocessedDataAndLabels = label.label( rawData, rawAnnotation, session, study) featureExtraction.featureExtraction( preprocessedDataAndLabels, path, study, session)
entry1.render(values_json["entry1"][1]) buttonCanvas = button(newWindow, entry1.getTextVal, values_json["buttonCanvas"][0], values_json["buttonCanvas"][1]) content = frame(root, values_json["content"][0], values_json["content"][1]) tree = treeview(content.getObject()) tree.addFile("a1.h5") tree.addFile("a1.h5") frame = frame(content.getObject(), values_json["frame"][0], values_json["frame"][1]) frame1 = frame.getObject() button1 = button(frame1, frame1.quit, values_json["button"][0], values_json["button"][1]) label = label(content.getObject(), values_json["label"][0], values_json["label"][1]) # entry = entry(content.getObject(), values_json["entry"][0], values_json["entry"][1]) # entry.render() textBig = text(content.getObject(), values_json["textBig"][0], values_json["textBig"][1]) buttonPrintEntry = button(frame1, entry.getTextVal, values_json["buttonEntry"][0], values_json["buttonEntry"][1]) buttonPrintText = button(frame1, textBig.getTextVal, values_json["buttonText"][0], values_json["buttonText"][1]) progBar = progressBar(content.getObject(), values_json["progressBar"][0], values_json["progressBar"][1]) buttonProgBar = button(frame1, progBar.step, values_json["progButton"][0], values_json["progButton"][1]) menubar = menuBar(frame1, position=values_json["menubar"][0])
#alternatively you can supply a different url for the browser to open #NOTE: webbrowser always, opens a new window, in my case konqueror opt={} opt['colors']={} c=caller_color=opt['colors']['caller_color']=opt['colors']['func_me_color']="white_on_blue" a=opt['colors']['colors_active'] = 1 #display stdout output with colors b=opt['colors']['output_caller'] = 0 #display function caller and called/current function d=opt['colors']['show_lineno_write'] = 0 e=opt['colors']['show_lineno_caller'] = 1 f=opt['colors']['break_all']=1 soc=stdout_colors(colors_active=a,output_caller=b,caller_color=c,show_lineno_write=d,show_lineno_caller=e,break_all=f) soc.me(['ENTER:',__name__],caller_color) opt['source'] = sys.argv[0] print (label.label(text=opt['source'],timeout=.5)) print ("\n%s BEST VIEWED IN FULLSCREEN"%opt['source']) try: time.sleep(2) except (KeyboardInterrupt,EOFError,e): pass opts, args = getopt.getopt(sys.argv[1:], 'j:f:s:v:o:k:',['jars=','files=', 'sites=','verbose=','options=','flavors=']) _help_ =format_help_message(opt['source']) if len(args) < 1: print format_help_message(opt['source']),sys.exit() opt['site_name'] = None # store site domain dir eg. http://www.example.com/site/ opt['file_name']= None
def main(argv=None): # Flags are defined in train.py if FLAGS.hide_key_phrases: raise AssertionError( "Please turn the hide_key_phrases off for co-training.") # First generate cross validation data if it does not exist. if not os.path.exists(FLAGS.cross_validation_dir): print("Cross validation data folder does not exist. Creating one.") os.mkdir(FLAGS.cross_validation_dir) source_path = os.path.join( FLAGS.data_dir, 'test_cs_unlabeled_data_combined_inferred.txt') target_path = os.path.join(FLAGS.data_dir, 'test_cs_labels_combined_inferred.txt') cross_validation_split(source_path, target_path, FLAGS.cross_validation_dir, fold_number=FLAGS.cross_validation_fold) for cross_val_round_i in range(FLAGS.cross_validation_fold): if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) latest_sentence_checkpoint_dir = None latest_pair_checkpoint_dir = None latest_checkpoint_dir = None used_unlabeled_kp_pair_set = set() # The validation set is separate from the test and training set from the very beginning. val_source_path = os.path.join( FLAGS.cross_validation_dir, "cross_validation_val_%d_data.txt" % (cross_val_round_i)) val_target_path = os.path.join( FLAGS.cross_validation_dir, "cross_validation_val_%d_labels.txt" % (cross_val_round_i)) val_labeled_data, val_labeled_result = util.read_data_labeled_part( val_source_path, val_target_path, FLAGS.sent_len, shuffle=False) # For legacy code reasons, I have to add a None column to the training data... val_data = np.array( zip(val_labeled_data, val_labeled_result, [None] * val_labeled_result.shape[0])) val_precision = [] val_recall = [] val_pr_auc = [] # Precision recall area under the curve. for round_i in range(FLAGS.max_co_training_rounds): # load dataset if round_i == 0: source_path = os.path.join( FLAGS.cross_validation_dir, "cross_validation_train_%d_data.txt" % (cross_val_round_i)) target_path = os.path.join( FLAGS.cross_validation_dir, "cross_validation_train_%d_labels.txt" % (cross_val_round_i)) # source_path = os.path.join(FLAGS.data_dir, 'test_cs_unlabeled_data_combined_inferred_train.txt') # target_path = os.path.join(FLAGS.data_dir, 'test_cs_labels_combined_inferred_train.txt') else: source_path = os.path.join( latest_checkpoint_dir, 'test_cs_unlabeled_data_combined_round_%d.txt' % (round_i)) target_path = os.path.join( latest_checkpoint_dir, 'test_cs_labels_combined_round_%d.txt' % (round_i)) train_data, test_data = util.read_data(source_path, target_path, FLAGS.sent_len, attention_path=None, train_size=FLAGS.train_size, hide_key_phrases=False) # I probably need to implement getting all the sentences with the same kp here as well? train_data_hide_kp, test_data_hide_kp = util.read_data( source_path, target_path, FLAGS.sent_len, attention_path=None, train_size=FLAGS.train_size, hide_key_phrases=True) print("Round %d. Reading labeled data from previous round." % (round_i)) labeled_data, labeled_result = util.read_data_labeled_part( source_path, target_path, FLAGS.sent_len, shuffle=False) unlabeled_data = util.read_data_unlabeled_part( source_path, target_path, FLAGS.sent_len, shuffle=False, hide_key_phrases=False) unlabeled_data_hide_kp = util.read_data_unlabeled_part( source_path, target_path, FLAGS.sent_len, shuffle=False, hide_key_phrases=True) # For each round, we draw a fresh set of unlabeled data and label them using the trained classifier. current_unlabeled_data, used_unlabeled_kp_pair_set, current_drawn_indices = draw_from_unused_unlabeled( unlabeled_data, used_unlabeled_kp_pair_set, FLAGS.test_size_per_round) current_unlabeled_data_hide_kp = [ unlabeled_data_hide_kp[i] for i in current_drawn_indices ] # Currently this one works, but we need a version that throws away used ones. So we need to keep track of which # ones we've used. # current_unlabeled_data, current_drawn_indices = draw_from_unlabeled(unlabeled_data, # FLAGS.test_size_per_round) # current_unlabeled_data_hide_kp = [unlabeled_data_hide_kp[i] for i in current_drawn_indices] additional_label_index = [] additional_label_result = [] for classifier_i in range(2): additional_label_index.append([]) additional_label_result.append([]) if _is_sentence_train(classifier_i): train.train(train_data_hide_kp, test_data_hide_kp) latest_sentence_checkpoint_dir = util.get_latest_checkpoint_dir( FLAGS.train_dir) else: train_kp_pair_classifier.train(train_data, test_data) latest_pair_checkpoint_dir = util.get_latest_checkpoint_dir( FLAGS.train_dir) # Refresh the latest checkpoint. latest_checkpoint_dir = util.get_latest_checkpoint_dir( FLAGS.train_dir) restore_param = util.load_from_dump( os.path.join(latest_checkpoint_dir, 'flags.cPickle')) restore_param['train_dir'] = latest_checkpoint_dir if _is_sentence_train(classifier_i): x_input, actual_output = label.label( current_unlabeled_data_hide_kp, restore_param) else: x_input, actual_output = train_kp_pair_classifier.label( current_unlabeled_data, restore_param) actual_output_exp = np.exp(actual_output) actual_output_softmax = actual_output_exp / np.sum( actual_output_exp, axis=1, keepdims=True) actual_output_argmax = np.argmax(actual_output_softmax, axis=1) # If we do not want "Neither" relation, then calculate max on only the first 2 dimensions. # sentence_i_list = np.argsort(-np.max(actual_output_softmax[..., :2], axis=1)).tolist() if FLAGS.use_product_method: sentence_i_list = range(actual_output_softmax.shape[0]) else: sentence_i_list = np.argsort( -np.max(actual_output_softmax, axis=1)).tolist() # We need the version with key phrases not replaced in order to print things correctly. sentence_indices_input = current_unlabeled_data[:, :-2] vocab_path = os.path.join(restore_param['data_dir'], 'test_cs_vocab_combined') _, rev_vocab = preprocessing_util.initialize_vocabulary( vocab_path) sentence_input = preprocessing_util.indices_to_sentences( sentence_indices_input, rev_vocab, ignore_pad=True) kp_indices_input = current_unlabeled_data[:, -2:] with open( os.path.join(latest_checkpoint_dir, 'added_instances.tsv'), "w") as inferred_instances_f: inferred_instances_f.write( 'Type\tSentence\t\tProbability [A is-a B, B is-a A, Neither]\n' ) additional_label_num_positive = 0 additional_label_num_negative = 0 for sentence_i in sentence_i_list: # # This is the current max probability # current_softmax = actual_output_softmax[sentence_i,actual_output_argmax[sentence_i]] sentence = sentence_input[sentence_i] # Label the key phrases of interest in the current sentence with *. sentence[kp_indices_input[sentence_i, 1]] += '*' sentence[kp_indices_input[sentence_i, 0]] += '*' if actual_output_argmax[sentence_i] == 2: current_type = 'Neither' if not FLAGS.use_product_method and additional_label_num_negative >= FLAGS.co_training_has_relation_num_label_negative: continue else: additional_label_num_negative += 1 if actual_output_argmax[sentence_i] == 0: current_type = 'A is-a B' if not FLAGS.use_product_method and additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive: continue else: additional_label_num_positive += 1 elif actual_output_argmax[sentence_i] == 1: current_type = 'B is-a A' if not FLAGS.use_product_method and additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive: continue else: additional_label_num_positive += 1 inferred_instances_f.write( '%s\t%s\t\t%s\n' % (current_type, ' '.join(sentence), str(actual_output_softmax[sentence_i]))) if not FLAGS.use_product_method: additional_label_index[classifier_i].append( sentence_i) # If use_product_method is off, then the result is the label. current_additional_label_result = np.zeros((3, )) current_additional_label_result[ actual_output_argmax[sentence_i]] = 1 additional_label_result[classifier_i].append( current_additional_label_result) if additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive and \ additional_label_num_negative >= FLAGS.co_training_has_relation_num_label_negative: break else: # If use_product_method is on, then the result is the output softmax, i.e. probability. current_additional_label_result = actual_output_softmax[ sentence_i] additional_label_result[classifier_i].append( current_additional_label_result) print( "Number of additional data points added through co-training classifier %d" ": %d positives and %d negatives out of %d unlabeled instances." % (classifier_i, additional_label_num_positive, additional_label_num_negative, len(sentence_i_list))) # Check if there are any conflicts and merge the additional labels labeled by the two classifier. if not FLAGS.use_product_method: merged_additional_label_index, merged_additional_label_result = check_conflict_and_merge( additional_label_index, additional_label_result) else: merged_additional_label_index, merged_additional_label_result = compute_product_and_save( additional_label_result, latest_checkpoint_dir, sentence_input, kp_indices_input) latest_checkpoint_dir = util.get_latest_checkpoint_dir( FLAGS.train_dir) save_source_path = os.path.join( latest_checkpoint_dir, 'test_cs_unlabeled_data_combined_round_%d.txt' % (round_i + 1)) save_target_path = os.path.join( latest_checkpoint_dir, 'test_cs_labels_combined_round_%d.txt' % (round_i + 1)) # Now recover the original index in the unlabeled data. merged_additional_label_index = [ current_drawn_indices[i] for i in merged_additional_label_index ] # Save the additionally labeled 2p+2n examples. save_additional_label(unlabeled_data, merged_additional_label_index, merged_additional_label_result, labeled_data, labeled_result, save_source_path, save_target_path) # I also need to get rid of those inferred instances from the whole bag of unlabeled dataset that we're drawing # from at each round. before_inference_unlabeled_data = util.read_data_unlabeled_part( save_source_path, save_target_path, FLAGS.sent_len, shuffle=False) inferred_additional_label_index, inferred_additional_label_result = infer_from_labeled( save_source_path, save_target_path, FLAGS.sent_len, vocab_path, do_save=True, save_source_path=save_source_path, save_target_path=save_target_path) inferred_additional_data = before_inference_unlabeled_data[ inferred_additional_label_index] inferred_additional_sentence_index = inferred_additional_data[:, : -2] inferred_additional_kp_index = inferred_additional_data[:, -2:] inferred_additional_sentence_input = preprocessing_util.indices_to_sentences( inferred_additional_sentence_index, rev_vocab, ignore_pad=True) inferred_additional_label_result_argmax = np.argmax( inferred_additional_label_result, axis=1) with open( os.path.join(latest_checkpoint_dir, 'inferred_instances.tsv'), "w") as inferred_instances_f: inferred_instances_f.write('Type\tSentence\n') for sentence_i in range(inferred_additional_kp_index.shape[0]): # # This is the current max probability # current_softmax = actual_output_softmax[sentence_i,actual_output_argmax[sentence_i]] sentence = inferred_additional_sentence_input[sentence_i] # Label the key phrases of interest in the current sentence with *. sentence[inferred_additional_kp_index[sentence_i, 1]] += '*' sentence[inferred_additional_kp_index[sentence_i, 0]] += '*' if inferred_additional_label_result_argmax[ sentence_i] == 2: current_type = 'Neither' if inferred_additional_label_result_argmax[ sentence_i] == 0: current_type = 'A is-a B' elif inferred_additional_label_result_argmax[ sentence_i] == 1: current_type = 'B is-a A' inferred_instances_f.write( '%s\t%s\n' % (current_type, ' '.join(sentence))) # Now all is left is to use the validation dataset to calculate the area under precision recall curve. val_precision.append([[[] for _ in range(3)] for _ in range(3)]) val_recall.append([[[] for _ in range(3)] for _ in range(3)]) val_pr_auc.append([[0.0, 0.0, 0.0] for _ in range(3)]) # Each time we calculate the precision recall for classifier 1, 2, and combined. for classifier_j in range(3): if classifier_j == 0: # Use classifier 1. restore_param = util.load_from_dump( os.path.join(latest_sentence_checkpoint_dir, 'flags.cPickle')) restore_param['train_dir'] = latest_sentence_checkpoint_dir _, val_actual_output = label.label(val_labeled_data, restore_param) elif classifier_j == 1: # Use classifier 2. restore_param = util.load_from_dump( os.path.join(latest_pair_checkpoint_dir, 'flags.cPickle')) restore_param['train_dir'] = latest_pair_checkpoint_dir _, val_actual_output = train_kp_pair_classifier.label( val_labeled_data, restore_param) else: # Use both classifier and, due to design choice of caring more about precision than recall, label # an instance as having a subcategory relation only when both classifier agrees, otherwise output # no relation, aka `Neither`. restore_param = util.load_from_dump( os.path.join(latest_sentence_checkpoint_dir, 'flags.cPickle')) restore_param['train_dir'] = latest_sentence_checkpoint_dir _, val_actual_output_sentence = label.label( val_labeled_data, restore_param) restore_param = util.load_from_dump( os.path.join(latest_pair_checkpoint_dir, 'flags.cPickle')) restore_param['train_dir'] = latest_pair_checkpoint_dir _, val_actual_output_pair = train_kp_pair_classifier.label( val_labeled_data, restore_param) val_actual_output_sentence_argmax = np.argmax( val_actual_output_sentence, axis=1) val_actual_output_pair_argmax = np.argmax( val_actual_output_pair, axis=1) # Label the actual output as [1,0,0] if both classify as A is B, [0,1,0] if both classify as B is A, # and [0,0,1] in all other situations. val_actual_output = np.array([[ 1 if k == val_actual_output_sentence_argmax[j] else 0 for k in range(3) ] if np.all( val_actual_output_sentence_argmax[j] == val_actual_output_pair_argmax[j]) else [ 0, 0, 1 ] for j in range(val_actual_output_sentence.shape[0])]) val_actual_output_exp = np.exp(val_actual_output) val_actual_output_softmax = val_actual_output_exp / np.sum( val_actual_output_exp, axis=1, keepdims=True) for i in range(3): val_precision[round_i][classifier_j][i], val_recall[ round_i][classifier_j][i], _ = precision_recall_curve( val_labeled_result[:, i], val_actual_output_softmax[:, i]) val_pr_auc[round_i][classifier_j][ i] = average_precision_score( val_labeled_result[:, i], val_actual_output_softmax[:, i], ) # Lastly output the precision recall file for each classifier and each category. with open(os.path.join(latest_checkpoint_dir, 'pr_auc.tsv'), "w") as f: for classifier_j in range(3): for i in range(3): f.write( "Classifier%d_%s\t%s\n" % (classifier_j, CATEGORY_NAME[i], "\t".join([ str(val_pr_auc[round_i][classifier_j][i]) for round_i in range(FLAGS.max_co_training_rounds) ]))) np.save(os.path.join(latest_checkpoint_dir, 'precision_recall_data'), np.array([val_precision, val_recall, val_pr_auc]))
for i in range(len(result)): if result[i] not in output: output[result[i]] = [] output[result[i]].append(files[i].split('.')[0]) output = OrderedDict(sorted(output.items(), key=lambda t: t[0])) # for k, v in output.items(): # print(len(v)) # with open('output.csv', 'w') as f: # writer = csv.writer(f) # for i in output: # writer.writerow(output[i]) print(">>>>> Labeling") labels_list = label(output, files, terms, tfidf_path) with open('label.csv', 'w') as f: writer = csv.writer(f) for labels in labels_list: for label in labels: writer.writerow(label) writer.writerow(['\n']) #unique, counts = np.unique(result, return_counts=True) #d = dict(zip(unique, counts)) #for i in d: # print(str(i) + ' : ' + str(d[i])) #print(result) visualization(result, cluster_count, doc_vecs)
def run(): # initialize VGG Model and PCA iset = init.Init() # initialize neural network model model = networks.Network() model.init_model() # initialize global instance uset = users.Users() # store special features in memory # dset_special = dataset.Dataset(set.PATH_TO_SPECIAL) dset_special = None print "Dataset Loaded." # set normal features in memory to false is_normal_loaded = True tset_name = None is_reloaded = False m_checkpoints = 0 while True: queue = db.lrange(set.REQUEST_QUEUE, set.REQUEST_START, set.REQUEST_END) q_uid = None # initialize local instance select = selectonly.Select() finalize = save.Save() viewer = view.View() retrain_v = retrainView.retrainView() retrain_h = retrainHeatmap.retrainHeatmap() heat = heatmap.Heatmap() t_train = train.Train() report_label = label.label() report_count = count.count() report_map = mapping.map() for q in queue: q = json.loads(q.decode("utf-8")) q_uid = q["uid"] target = q["target"] session_uid = q["uid"] dataSetPath = set.DATASET_DIR + q["dataset"] pcaPath = set.DATASET_DIR + q["pca"] # if specific features then set m_loaded to true is_normal_loaded = False if dataSetPath == set.PATH_TO_SPECIAL else True if target == "label": report_label.setData(q) if target == "count": report_count.setData(q) if target == "map": report_map.setData(q) if target == 'selectonly': select.setData(q) if target == 'save': finalize.setData(q) if target == 'view': viewer.setData(q) if target == 'retrainView': retrain_v.setData(q) if target == 'retrainHeatmap': retrain_h.setData(q) if target == 'heatmapAll': heatmaps = q["viewJSONs"] if target == 'heatmap': heat.setData(q) if target == 'train': t_train.setData(q) if target == 'reload': t_path = set.TRAININGSET_DIR + q["trainingSetName"] is_reloaded = True if target == 'reviewSave': q_samples = json.loads(q["samples"]) if q_uid is not None: print target, " Session Start ....." no_uid = True uidx = 0 # find current user Index for i in range(len(uset.users)): if uset.users[i]['uid'] == session_uid: uidx = i no_uid = False if no_uid: # set users data uset.addUser(session_uid) if is_normal_loaded: dset = dataset.Dataset(dataSetPath) else: dset = dset_special PCA = joblib.load(pcaPath) if target == 'selectonly': uset.setIter(uidx, select.iter) print "Predict Start ... " t0 = time() scores = model.predict_prob(dset.features) t1 = time() print "Predict took ", t1 - t0 # Find uncertain samples data = select.getData(scores, dset.slideIdx, dset.slides, dset.x_centroid, dset.y_centroid) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'view': slide_idx = dset.getSlideIdx(viewer.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Predict Start ... " t0 = time() predictions = model.predict(feature_set) t1 = time() print "Predict took ", t1 - t0 object_idx = load( viewer.left, viewer.right, viewer.top, viewer.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float) ) data = {} for i in object_idx: data[str(x_centroid_set[i][0])+'_'+str(y_centroid_set[i][0])] = str(predictions[i]) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'heatmap': slide_idx = dset.getSlideIdx(heat.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Predict Start ... " t0 = time() if set.IS_HEATMAP == False: scores = model.predict_prob(feature_set) t1 = time() print "Predict took ", t1 - t0 # set x and y maps heat.setXandYmap() # write heatmaps heat.setHeatMap(x_centroid_set, y_centroid_set, scores) # get heatmap data data = heat.getData(0) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'heatmapAll': data = [] index = 0 t0 = time() scores = model.predict_prob(dset.features) t1 = time() print "Predict took ", t1 - t0 for h in heatmaps: h['uid'] = session_uid heat.setData(h) slide_idx = dset.getSlideIdx(heat.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) # feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) score_set = scores[data_idx: data_idx+object_num] # set x and y maps heat.setXandYmap() # write heatmaps heat.setHeatMap(x_centroid_set, y_centroid_set, score_set) # get heatmap data data_k = heat.getData(index) data.append(data_k) index += 1 # print data db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'reload': # initialize augment agen = augments.Augments() # set user train samples # uset.setReloadedData(uidx, t_path, dset.slides) uset.setReloadedData(uidx, t_path) sample_size = len(uset.users[uidx]['samples']) m_checkpoints = uset.users[uidx]['samples'][sample_size-1]['checkpoints'] sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] tset_path = t_path.split('/')[-1] tset_name = tset_path.split('.')[0] print "Training ... ", len(train_labels) t0 = time() model.train_model(train_features, train_labels, tset_name) t1 = time() print "Training took ", t1 - t0 data = {"success": 'pass'} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'label': # initialize augment agen = augments.Augments() # set user train samples uset.setReloadedData(uidx, report_label.trainSet) sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] print "Training ... ", len(train_labels) t0 = time() model.train_model(train_features, train_labels, report_label.classifier) t1 = time() print "Training took ", t1 - t0 slide_idx = dset.getSlideIdx(report_label.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) test_features = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Testing Start ... " t0 = time() predicts = model.predict(test_features) t1 = time() print "Predict took ", t1 - t0 inputImageFile = '/datasets/tif/'+ report_label.slide + '.svs.dzi.tif' bold = 512 bold_left = report_label.left - bold bold_top = report_label.top - bold bold_bottom = report_label.bottom + bold bold_right = report_label.right + bold bold_width = report_label.width + 2*bold bold_height = report_label.height + 2*bold ts = large_image.getTileSource(inputImageFile) region = dict( left=report_label.left, top=report_label.top, width=report_label.width, height=report_label.height, ) im_region = ts.getRegion( region=region, format=large_image.tilesource.TILE_FORMAT_NUMPY )[0] mydb = mysql.connector.connect( host=set.MYSQL_HOST, user="******", passwd="guest", database="nuclei", charset='utf8', use_unicode=True ) boundaryTablename = 'sregionboundaries' runcursor = mydb.cursor() query = 'SELECT centroid_x, centroid_y, boundary from ' + boundaryTablename + ' where slide="' + report_label.slide + \ '" AND centroid_x BETWEEN ' + str(report_label.left) + ' AND ' + str(report_label.right) + \ ' AND centroid_y BETWEEN ' + str(report_label.top) + ' AND ' + str(report_label.bottom) runcursor.execute(query) boundarySet = runcursor.fetchall() # find region index from hdf5 object_idx = load( report_label.left, report_label.right, report_label.top, report_label.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float) ) # set an array for boundary points in a region to zero im_bold = np.zeros((bold_height, bold_width), dtype=np.uint8) for i in object_idx: for j in range(len(boundarySet)): x = int(boundarySet[j][0]) y = int(boundarySet[j][1]) boundaryPoints = [] if x == int(x_centroid_set[i, 0]) and y == int(y_centroid_set[i, 0]): object = boundarySet[j][2].encode('utf-8').split(' ') object_points = [] for p in range(len(object)-1): intP = map(int, object[p].split(',')) intP[0] = intP[0] - report_label.left + bold intP[1] = intP[1] - report_label.top + bold object_points.append(intP) boundaryPoints.append(np.asarray(object_points)) cv2.fillPoly(im_bold, boundaryPoints, 255 if predicts[i] > 0 else 128) im_out = im_bold[bold:bold+report_label.height, bold:bold+report_label.width] imsave(report_label.inFile, im_out) runcursor.close() mydb.close() print ("label success ", report_label.inFile) data = {"success": report_label.outFile} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) uset.users = [] uset.u_size = 0 model = networks.Network() model.init_model() print ("label done") if target == 'count': # initialize augment agen = augments.Augments() # set user train samples uset.setReloadedData(uidx, report_count.trainSet) sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] print "Training ... ", len(train_labels) t0 = time() model.train_model(train_features, train_labels, report_count.classifier) t1 = time() print "Training took ", t1 - t0 print "Testing Start ... " t0 = time() predicts = model.predict(dset.features) t1 = time() print "Predict took ", t1 - t0 # find positive and negative numbers for each slide pos_num = [] neg_num = [] for i in range(dset.n_slides): if i == len(dset.dataIdx) - 1: predict = predicts[dset.dataIdx[i, 0]:] else: predict = predicts[dset.dataIdx[i, 0]: dset.dataIdx[i+1, 0]] pos = len(predict[predict>0]) neg = len(predict) - pos pos_num.append(pos) neg_num.append(neg) print('>> Writing count file') out_file = open(report_count.inFile, 'w') out_file.write("Slide\t") out_file.write("Predicted positive (superpixels)\t") out_file.write("Predicted negative (superpixels)\t") out_file.write("\n") for i in range(len(dset.slides)): out_file.write("%s\t" % dset.slides[i]) out_file.write("%d\t" % pos_num[i]) out_file.write("%d\t" % neg_num[i]) out_file.write("\n") out_file.close() print ("count success ", report_count.inFile) data = {"success": report_count.outFile} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) uset.users = [] uset.u_size = 0 model = networks.Network() model.init_model() print ("count done") if target == 'map': # initialize augment agen = augments.Augments() # set user train samples uset.setReloadedData(uidx, report_map.trainSet) sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] print "Training ... ", len(train_labels) t0 = time() model.train_model(train_features, train_labels, report_map.classifier) t1 = time() print "Training took ", t1 - t0 slide_idx = dset.getSlideIdx(report_map.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) test_features = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Testing Start ... " t0 = time() predicts = model.predict(test_features) t1 = time() print "Predict took ", t1 - t0 output = h5py.File(report_map.inFile, 'w') output.create_dataset('features', data=test_features) output.create_dataset('predicts', data=predicts) output.create_dataset('x_centroid', data=x_centroid_set) output.create_dataset('y_centroid', data=y_centroid_set) output.create_dataset('slides', data=[report_map.slide]) output.close() print ("map success ", report_map.inFile) data = {"success": report_map.outFile} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) uset.users = [] uset.u_size = 0 model = networks.Network() model.init_model() print ("map done") if target == 'save': data = finalize.getData(uset.users[uidx]) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'review': data = {} data['review'] = [] for sample in uset.users[uidx]['samples']: sample_data = {} sample_data['id'] = str(sample['id']) sample_data['label'] = 1 if sample['label'] == 1 else -1 sample_data['iteration'] = int(sample['iteration']) sample_data['slide'] = str(sample['slide']) sample_data['centX'] = str(sample['centX']) sample_data['centY'] = str(sample['centY']) sample_data['boundary'] = "" sample_data['maxX'] = 0 sample_data['maxY'] = 0 data['review'].append(sample_data) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'train': # increase checkpoint by 1 m_checkpoints += 1 # initialize augment agen = augments.Augments() uset.setIter(uidx, t_train.iter) for sample in t_train.samples: # init sample and augment init_sample = dict( id=0, f_idx=0, checkpoints=0, aurl=None, feature=None, label=0, iteration=0, centX=0, centY=0, slideIdx=0, slide=None ) init_augment = dict( id=[], checkpoints=[], feature=[], label=[] ) # check db_id in users samples remove_idx = [] for u in range(len(uset.users[uidx]['samples'])): if uset.users[uidx]['samples'][u]['id'] == sample['id']: remove_idx.append(u) for r in remove_idx: uset.users[uidx]['samples'].pop(r) uset.users[uidx]['augments'].pop(r) # add feature init_sample['id'] = sample['id'] init_sample['aurl'] = str(sample['aurl']) init_sample['slide'] = str(sample['slide']) slide_idx = dset.getSlideIdx(init_sample['slide']) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) slideIdx_set = dset.getSlideIdxSet(data_idx, object_num) c_idx = getIdx( x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx ) f_idx = data_idx + c_idx init_sample['f_idx'] = f_idx init_sample['feature'] = feature_set[c_idx] init_sample['label'] = 1 if sample['label'] == 1 else 0 init_sample['iteration'] = t_train.iter init_sample['centX'] = sample['centX'] init_sample['centY'] = sample['centY'] init_sample['checkpoints'] = m_checkpoints # add augment features slide_idx = dset.getSlideIdx(init_sample['slide']) slide_mean = dset.getWSI_Mean(slide_idx) slide_std = dset.getWSI_Std(slide_idx) a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std) a_featureSet = iset.FC1_MODEL.predict(a_imgs) a_featureSet = PCA.transform(a_featureSet) a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8) a_idSet = [] a_checkpointSet = [] for i in range(agen.AUG_BATCH_SIZE): a_idSet.append(init_sample['id']) a_checkpointSet.append(init_sample['checkpoints']) if init_sample['label'] > 0: a_labelSet.fill(1) init_augment['id'] = a_idSet init_augment['feature'] = a_featureSet init_augment['label'] = a_labelSet init_augment['checkpoints'] = a_checkpointSet uset.setAugmentData(uidx, init_augment) uset.setTrainSampleData(uidx, init_sample) sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] # train_labels = to_categorical(train_labels, num_classes=2) if tset_name is None: tset_name = t_train.classifier print "Training ... ", len(train_labels) t0 = time() model.train_model(train_features, train_labels, tset_name) t1 = time() print "Training took ", t1 - t0 data = {"success": 'pass'} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'retrainView': m_checkpoints += 1 # initialize augment agen = augments.Augments() uset.setIter(uidx, retrain_v.iter) print "Augment ... ", len(retrain_v.samples) t0 = time() for sample in retrain_v.samples: # init sample and augment init_sample = dict( id=0, f_idx=0, checkpoints=0, aurl=None, feature=None, label=0, iteration=0, centX=0, centY=0, slideIdx=0, slide=None ) init_augment = dict( id=[], checkpoints=[], feature=[], label=[] ) # remove samples stored if it already exists remove_idx = [] for u in range(len(uset.users[uidx]['samples'])): if uset.users[uidx]['samples'][u]['id'] == sample['id']: remove_idx.append(u) for r in remove_idx: uset.users[uidx]['samples'].pop(r) uset.users[uidx]['augments'].pop(r) # add feature init_sample['id'] = sample['id'] init_sample['aurl'] = str(sample['aurl']) init_sample['slide'] = str(sample['slide']) slide_idx = dset.getSlideIdx(init_sample['slide']) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) slideIdx_set = dset.getSlideIdxSet(data_idx, object_num) c_idx = getIdx( x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx ) f_idx = data_idx + c_idx init_sample['f_idx'] = f_idx init_sample['feature'] = feature_set[c_idx] init_sample['label'] = 1 if sample['label'] == 1 else 0 init_sample['iteration'] = retrain_v.iter init_sample['centX'] = sample['centX'] init_sample['centY'] = sample['centY'] init_sample['checkpoints'] = m_checkpoints # add augment features slide_idx = dset.getSlideIdx(init_sample['slide']) slide_mean = dset.getWSI_Mean(slide_idx) slide_std = dset.getWSI_Std(slide_idx) a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std) a_featureSet = iset.FC1_MODEL.predict(a_imgs) a_featureSet = PCA.transform(a_featureSet) a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8) a_idSet = [] a_checkpointSet = [] for i in range(agen.AUG_BATCH_SIZE): a_idSet.append(init_sample['id']) a_checkpointSet.append(init_sample['checkpoints']) if init_sample['label'] > 0: a_labelSet.fill(1) init_augment['id'] = a_idSet init_augment['feature'] = a_featureSet init_augment['label'] = a_labelSet init_augment['checkpoints'] = a_checkpointSet uset.setAugmentData(uidx, init_augment) uset.setTrainSampleData(uidx, init_sample) t1 = time() print "Augmentation took ", t1 - t0 sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] # train_labels = to_categorical(train_labels, num_classes=2) if tset_name is None: tset_name = retrain_v.classifier t0 = time() model.train_model(train_features, train_labels, tset_name) t1 = time() print "Training took ", t1 - t0, " ", len(train_labels), "Samples" slide_idx = dset.getSlideIdx(retrain_v.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Testing Start ... " t0 = time() predictions = model.predict(feature_set) t1 = time() print "Predict took ", t1 - t0 object_idx = load( retrain_v.left, retrain_v.right, retrain_v.top, retrain_v.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float) ) data = {} for i in object_idx: data[str(x_centroid_set[i][0])+'_'+str(y_centroid_set[i][0])] = str(predictions[i]) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'retrainHeatmap': m_checkpoints += 1 # initialize augment agen = augments.Augments() uset.setIter(uidx, retrain_h.iter) for sample in retrain_h.samples: # init sample and augment init_sample = dict( id=0, f_idx=0, checkpoints=0, aurl=None, feature=None, label=0, iteration=0, centX=0, centY=0, slideIdx=0, slide=None ) init_augment = dict( id=[], checkpoints=[], feature=[], label=[] ) # remove samples stored if it already exists remove_idx = [] for u in range(len(uset.users[uidx]['samples'])): if uset.users[uidx]['samples'][u]['id'] == sample['id']: remove_idx.append(u) for r in remove_idx: uset.users[uidx]['samples'].pop(r) uset.users[uidx]['augments'].pop(r) # add feature init_sample['id'] = sample['id'] init_sample['aurl'] = str(sample['aurl']) init_sample['slide'] = str(sample['slide']) slide_idx = dset.getSlideIdx(init_sample['slide']) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) slideIdx_set = dset.getSlideIdxSet(data_idx, object_num) c_idx = getIdx( x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx ) f_idx = data_idx + c_idx init_sample['f_idx'] = f_idx init_sample['feature'] = feature_set[c_idx] init_sample['label'] = 1 if sample['label'] == 1 else 0 init_sample['iteration'] = retrain_h.iter init_sample['centX'] = sample['centX'] init_sample['centY'] = sample['centY'] init_sample['checkpoints'] = m_checkpoints # add augment features slide_idx = dset.getSlideIdx(init_sample['slide']) slide_mean = dset.getWSI_Mean(slide_idx) slide_std = dset.getWSI_Std(slide_idx) a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std) a_featureSet = iset.FC1_MODEL.predict(a_imgs) a_featureSet = PCA.transform(a_featureSet) a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8) a_idSet = [] a_checkpointSet = [] for i in range(agen.AUG_BATCH_SIZE): a_idSet.append(init_sample['id']) a_checkpointSet.append(init_sample['checkpoints']) if init_sample['label'] > 0: a_labelSet.fill(1) init_augment['id'] = a_idSet init_augment['feature'] = a_featureSet init_augment['label'] = a_labelSet init_augment['checkpoints'] = a_checkpointSet uset.setAugmentData(uidx, init_augment) uset.setTrainSampleData(uidx, init_sample) sample_size = len(uset.users[uidx]['samples']) sample_batch_size = agen.AUG_BATCH_SIZE * sample_size train_size = sample_size + sample_batch_size train_features = np.zeros((train_size, set.FEATURE_DIM)) train_labels = np.zeros((train_size, )) for i in range(sample_size): train_features[i] = uset.users[uidx]['samples'][i]['feature'] train_labels[i] = uset.users[uidx]['samples'][i]['label'] train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature'] train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label'] if tset_name is None: tset_name = retrain_h.classifier t0 = time() model.train_model(train_features, train_labels, tset_name) t1 = time() print "Training took ", t1 - t0, " ", len(train_labels), "Samples" slide_idx = dset.getSlideIdx(retrain_h.slide) object_num = dset.getObjNum(slide_idx) data_idx = dset.getDataIdx(slide_idx) feature_set = dset.getFeatureSet(data_idx, object_num) x_centroid_set = dset.getXcentroidSet(data_idx, object_num) y_centroid_set = dset.getYcentroidSet(data_idx, object_num) print "Testing Start ... " t0 = time() if set.IS_HEATMAP == False: scores = model.predict_prob(feature_set) t1 = time() print "Predict took ", t1 - t0 # set x and y maps retrain_h.setXandYmap() # write heatmaps retrain_h.setHeatMap(x_centroid_set, y_centroid_set, scores) # get heatmap data data = retrain_h.getData(0) db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'cancel': uset.users = [] uset.u_size = 0 is_normal_loaded = True tset_name = None is_reloaded = False m_checkpoints = 0 del select del finalize del viewer del retrain_v del retrain_h del heat del t_train del report_label model = networks.Network() model.init_model() # dset = dataset.Dataset(set.PATH_TO_SPECIAL) data = {"success": 'pass'} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1) if target == 'reviewSave': # modify labels if they are changed on review tab for q_sample in q_samples: for sample in uset.users[uidx]['samples']: if sample['id'] == q_sample['id']: sample['label'] = 1 if q_sample['label'] == 1 else 0 for sample in uset.users[uidx]['augments']: if sample['id'][0] == q_sample['id']: sample['label'][:] = 1 if q_sample['label'] == 1 else 0 data = {"success": 'pass'} db.set(q_uid, json.dumps(data)) db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)
ind = pick[ind - 1] #print(ind) # original image resizing I = np.array(Image.open(image_path)) I = imutils.resize(I, width=min(600, I.shape[1])) plt.figure(1) plt.imshow(I) # auto-refined mask x1 = ind[1] y1 = ind[0] x2 = ind[3] y2 = ind[2] mask, h, w = Domask(I, x1, x2, y1, y2) I_label = label(I, mask, x1, x2, y1, y2) plt.figure(2) plt.imshow(I_label) # object removal with weighted seam carving nr = 0 nc = ind[3] - ind[1] [Ic, T] = carv(I, nr, nc, I_label) fig2 = plt.figure(3) plt.imshow(Ic) # seam insertion Ic_n = Ic summ = int(nc / 2) I_label_insert = np.ones(np.shape(Ic_n[:, :, 0])) a = genEngMap(Ic_n, I_label_insert)
"""
dcc_10_layout = dcc_10.Layout() dcc_15 = my_dc(gap_inc_vec=[378.0, 388.0, 398.0, 408.0, 418.0], length_inc_vec=[87, 97, 107], name="ring5", width=15.0) dcc_15_layout = dcc_15.Layout() dcc_20 = my_dc(gap_inc_vec=[378.0, 388.0, 398.0, 408.0, 418.0], length_inc_vec=[87, 97, 107], name="ring6", width=20.0) dcc_20_layout = dcc_20.Layout() marker = dicingMarker() marker_layout = marker.Layout() marker2 = label() marker2_layout = marker2.Layout() pr = PlaceComponents( child_cells={ "comp1": dc_10, "comp2": dc_15, "comp3": dc_20, "comp4": dcc_10, "comp5": dcc_15, "comp6": dcc_20, "marker1": marker, "marker2": marker2, }) pr_layout = pr.Layout( child_transformations={
def main(argv=None): restore_param = util.load_from_dump(os.path.join(FLAGS.train_dir, 'flags.cPickle')) restore_param['train_dir'] = FLAGS.train_dir source_path = os.path.join(restore_param['data_dir'], 'test_cs_unlabeled_data_combined.txt') target_path = os.path.join(restore_param['data_dir'], 'test_cs_labels_combined.txt') vocab_path = os.path.join(restore_param['data_dir'], 'test_cs_vocab_combined') unlabeled_data = util.read_data_unlabeled_part(source_path, target_path, restore_param['sent_len']) data_size = unlabeled_data.shape[0] # # Now hard code to take the first 1000 # data_first_1000 = unlabeled_data x_input, actual_output = label(unlabeled_data, restore_param) actual_output_exp = np.exp(actual_output) actual_output_softmax = actual_output_exp / np.sum(actual_output_exp, axis=1, keepdims=True) actual_output_argmax = np.argmax(actual_output_softmax,axis=1) # Entropy = -sum(p * log p) so this is actually the negative of entropy. For sorting purpose I took out the neg. actual_output_entropy = np.sum(np.multiply(actual_output_softmax, np.log(actual_output_softmax)), axis=1) # The following are ways to rank what question should be asked first. # The first one uses entropy, but there might be some implementation errors. actual_output_entropy_argsort = np.argsort(actual_output_entropy, axis=0) # This doesn:t seem to give me the most uncertain ones??? in theory it does. or maybe it's just the model is too sure of everything. # The second one uses the softmax probability and only ask the one with highest probability in the first two # classes. # actual_output_entropy_argsort = np.argsort(-np.max(actual_output_softmax[...,:2], axis=1)) sentence_indices_input = x_input[:,:-2] _,rev_vocab = preprocessing_util.initialize_vocabulary(vocab_path) sentence_input = preprocessing_util.indices_to_sentences(sentence_indices_input,rev_vocab) kp_indices_input = x_input[:,-2:] # # print('Sentence\t\tPredicted Score (A is-a B, B is-a A, Neither)\t') # for sentence_i, sentence in enumerate(sentence_input): # # Label the key phrases of interest in the current sentence with *. # sentence[kp_indices_input[sentence_i,1]] += '*' # sentence[kp_indices_input[sentence_i,0]] += '*' # if actual_output_argmax[sentence_i] == 2: # # current_type = 'Neither' # continue # if actual_output_argmax[sentence_i] == 0: # current_type = 'A is-a B' # elif actual_output_argmax[sentence_i] == 1: # current_type = 'B is-a A' # # print('%s\t%s\t\t%s\t' # % (current_type, ' '.join(sentence), str(actual_output_softmax[sentence_i]))) user_input = -1 num_user_labeled = 0 user_label_results = [] while user_input != 4 and num_user_labeled < data_size: sentence_i = actual_output_entropy_argsort[num_user_labeled] sentence = sentence_input[sentence_i] print('Key phrase pair\tSentence\t\tPredicted Score (A is-a B, B is-a A, Neither)\t') current_key_phrase_pair = sentence[kp_indices_input[sentence_i,0]] + ' ' + sentence[kp_indices_input[sentence_i,1]] # Label the key phrases of interest in the current sentence with *. sentence[kp_indices_input[sentence_i,1]] += '*' sentence[kp_indices_input[sentence_i,0]] += '*' print('%s\n%s\t\t%s\t' % (current_key_phrase_pair,' '.join(sentence), str(actual_output_softmax[sentence_i]))) user_input = raw_input('In your opinion, what should be the category of the key phrase pair? ' 'Please enter 1, 2, or 3. Enter 4 to stop answering.\n' '1. A is-a B\n2. B is-a A\n3. Neither.') user_input = util.get_valid_user_input(user_input, 1, 4) if user_input != 4: user_label_result = np.array([0,0,0]) user_label_result[user_input-1] = 1 user_label_results.append(user_label_result) num_user_labeled += 1 actual_output_entropy_indices = actual_output_entropy_argsort[:num_user_labeled] if len(user_label_results) > 0: labeled_data, labeled_result = util.read_data_labeled_part(source_path, target_path, restore_param['sent_len'], shuffle=False) user_label_results = np.array(user_label_results) save_additional_label(unlabeled_data, actual_output_entropy_indices, user_label_results,labeled_data,labeled_result, source_path, target_path)
kG = kp.process_k_skip(G,K, priority) t1 = time.time() print '%4.4f sec -- kG process finished' %(t1-s1) f.write('%.4f ' %(t1-s1)) s2 = time.time() ov_G1 = CH_construct.CH_construct(G,priority) t2 = time.time() print '%4.4f sec -- ovG finished' %(t2-s2) f.write('%.4f ' %(t2-s2)) s3 = time.time() ov_G2 = CH_construct.CH_construct(kG,priority) t3 = time.time() print '%4.4f sec -- ov_kG finished' %(t3-s3) f.write('%.4f ' %(t3-s3)) s4 = time.time() labels1 = label.label(ov_G1, priority) t4 = time.time() print '%4.4f sec -- ov_G_label finished' %(t4-s4) f.write('%.4f ' %(t4-s4)) s5 = time.time() labels2 = label.label(ov_G2, priority) t5 = time.time() print '%4.4f sec -- ov_kG_label finished' %(t5-s5) f.write('%.4f\n' %(t5-s5)) f.close() f = open(filename+'_size.txt','a') G_node = G.number_of_nodes() G_edge = G.number_of_edges() kG_node = kG.number_of_nodes() kG_edge = kG.number_of_edges()
def __init__(self, posX, posY, sizeX, sizeY, color, text, fontSize): self.settings = [posX, posY, sizeX, sizeY, color, text, fontSize] self.label = label(posX + 5, int(posY + sizeY / 2 - fontSize / 2), text, fontSize)