コード例 #1
0
def k_means(dataset, k, features, center_range, scale):

    # K-means initialization

    centroids = gen_rand_centroids(k=k, features=features, center_range=center_range, scale=scale)  # Generates k random centroids

    labeled_dataset = label(dataset, centroids, k, features)

    # K-means main loop

    points_in_cluster = np.zeros(k)  # Total number of points in each cluster (after classification)
    features_sum = np.zeros((k, features))  # Partial sum of each feature, calculated for each cluster

    while True:

        prev_centroids = centroids  # Keeps track of the previous centroids, enabling breaking the loop

        for i in range(0, len(labeled_dataset)):  # Computes the centroid for each cluster
            points_in_cluster[int(labeled_dataset.loc[i, features])] += 1  # Counts the number of points in each cluster
            for j in range(0, features):  # Sums the features of each point, for each cluster
                features_sum[int(labeled_dataset.loc[i, features]), j] += labeled_dataset.loc[i, j]

        for i in range(0, k):  # For each cluster, calculates the new centroid
            for j in range(0, features):
                if points_in_cluster[i] == 0:  # Avoids runtime warnings because of divisions by zero
                    pass
                else:
                    centroids[i, j] = features_sum[i, j] / points_in_cluster[i]

        labeled_dataset = label(labeled_dataset.drop(columns=[features]), centroids, k, features)  # Re-assigns the points in function of the new centroids

        if (centroids == prev_centroids).all():  # Breaks the loop if centroids are not moving anymore
            break

    return labeled_dataset, centroids
コード例 #2
0
ファイル: kniferand.py プロジェクト: Yucheng-Zhang/cosmoknife
def main_knife(args):
    '''Main function for jackknife with randoms.'''
    # make jackknife regions
    if args.bdf == '' and args.rand_lbed == '':
        print('====== Making jackknife regions ======')
        rand = miscfuncs.load_data_pd(args.rand, tp='knife')
        jk_map, jk_bounds = knife(
            rand, args.njr, args.nra, args.nside, args.rra)

        if args.fmap != '':
            miscfuncs.save_jk_map(jk_map, args.fmap)

        if args.plotmap:
            print('-- note: not labeled yet, just demo of regions')
            miscfuncs.plot_jk_map(jk_map, shuffle=args.sf, njr=args.njr)

        if args.fbounds != '':
            miscfuncs.save_jk_bounds(jk_bounds, args.fbounds)

        if args.tp == 'bounds':
            jkr = jk_bounds
        elif args.tp == 'map':
            jkr = jk_map
        else:
            print('>> Error: wrong tp option!')
            sys.exit()

    # load bounds file if provided
    if args.bdf != '' and args.rand_lbed == '':
        print('>> Loading bounds file: {}'.format(args.bdf))
        jkr = np.loadtxt(args.bdf)

    # label data and random points
    if (args.lb == 1 or args.bdf != '') and args.rand_lbed == '':
        print('====== Labeling data points ======')
        data = miscfuncs.load_data_pd(args.data)
        label.label(data, jkr, tp=args.tp,
                    f_data=args.fodata, jk0=args.jk0)

        print('====== Labeling random points ======')
        data = miscfuncs.load_data_pd(args.rand)
        label.label(data, jkr, tp=args.tp,
                    f_data=args.forand, jk0=args.jk0)

    # analyze labeled random points
    if args.rand_lbed != '':
        rand = miscfuncs.load_data_pd(args.rand_lbed)
        miscfuncs.analyze_rand(rand, args.sf)
コード例 #3
0
def run():
  handler = parse()
  groups = label(handler)
  blocks = [block for block in handler.blocks if block.is_valid()]
  chars = [char for char in handler.chars if char.is_valid()]
  emojis = [emoji for emoji in handler.emojis if emoji.is_valid()]
  generate_sqlite(chars, emojis, blocks, groups)
コード例 #4
0
def print_result(G, methode):
    t0 = time()
    if methode == 1:
        result = exhaustif(G)
        t1 = time()
        print("graphe : " + str(G))
        print("methode exhaustive")
        print(result)
        print("calcul en " + str(t1 - t0) + " secondes")
        print("")
    elif methode == 2:
        result = parcours(G)
        t1 = time()
        print("graphe : " + str(G))
        print("methode par parcours")
        print(result)
        print("calcul en " + str(t1 - t0) + " secondes")
        print("")
    elif methode == 3:
        result = label(G)
        t1 = time()
        print("graphe : " + str(G))
        print("methode par label")
        print(result)
        print("calcul en " + str(t1 - t0) + " secondes")
        print("")
    else:
        print("methode =")
        print("1 : methode exhaustive")
        print("2 : methode par parcours")
        print("3 : methode par label")
    return 0
コード例 #5
0
    def __init__(self, cnn_model_path, source_path, target_path, vocab_path,
                 sent_len, labeled_save_dir):
        """

        :param cnn_model_path: Path to a trained cnn model.
        :param source_path: Path to instance data, the latter part of which will be labeled during active learning.
        :param target_path: Path to labels for already labeled part of the data.
        :param vocab_path: Path to vocab file.
        :param labeled_save_dir: Directory to which the labeled files will be stored.
        """
        unlabeled_data = util.read_data_unlabeled_part(source_path,
                                                       target_path,
                                                       sent_len,
                                                       shuffle=False)
        self.unlabeled_data = np.array(unlabeled_data)
        self.data_size = self.unlabeled_data.shape[0]

        self.labeled_data, self.labeled_result = util.read_data_labeled_part(
            source_path, target_path, sent_len, shuffle=False)

        sentence_indices_input = self.unlabeled_data[:, :-2]
        self.vocab_path = vocab_path
        _, rev_vocab = preprocessing_util.initialize_vocabulary(vocab_path)
        self.sentence_input = preprocessing_util.indices_to_sentences(
            sentence_indices_input, rev_vocab)
        self.kp_indices_input = self.unlabeled_data[:, -2:]

        for i, sentence in enumerate(self.sentence_input):
            # Label the key phrases of interest in the current sentence with *.
            sentence[self.kp_indices_input[i, 0]] += '*'
            sentence[self.kp_indices_input[i, 1]] += '*'

        self.update_labeled_save_dir(labeled_save_dir)
        # self.labeled_save_dir = labeled_save_dir
        # self.source_save_dir = os.path.join(labeled_save_dir, 'test_cs_unlabeled_data_combined.txt')
        # self.target_save_dir = os.path.join(labeled_save_dir, 'test_cs_labels_combined.txt')
        # self.vocab_save_dir = os.path.join(labeled_save_dir, 'test_cs_vocab_combined')

        label_config = util.load_from_dump(
            os.path.join(cnn_model_path, 'flags.cPickle'))
        label_config['train_dir'] = cnn_model_path
        _, predicted_label = label(self.unlabeled_data, config=label_config)

        assert predicted_label.shape[0] == self.data_size

        predicted_label_exp = np.exp(predicted_label)
        predicted_label_softmax = predicted_label_exp / np.sum(
            predicted_label_exp, axis=1, keepdims=True)
        # Entropy = -sum(p * log p) so this is actually the negative of entropy. For sorting purpose I took out the neg.
        predicted_label_entropy = np.sum(np.multiply(
            predicted_label_softmax, np.log(predicted_label_softmax)),
                                         axis=1)

        # The following are ways to rank what question should be asked first.
        # The first one uses entropy, but there might be some implementation errors.
        self.predicted_label_entropy_argsort = np.argsort(
            predicted_label_entropy, axis=0).tolist()

        pass
コード例 #6
0
    def loadSettingMenu(self):

        playerColor = 0

        clock = pygame.time.Clock()

        fenetre = pygame.display.set_mode((self.WIDTH, self.HEIGHT))

        back = pygame.image.load("background.jpg").convert()
        back2 = pygame.transform.scale(back,
                                       (int(Game.WIDTH), int(Game.HEIGHT)))
        back3 = sprite(0, 0, back2)

        menuSprites = list([back3], "sprites")

        Game.toDisplay.append(menuSprites)

        strColors = ["GREEN", "BLUE", "PURPLE", "RED", "YELLOW"]
        rgbColors = [Game.GREEN, Game.BLUE, Game.PURPLE, Game.RED, Game.YELLOW]

        buttons = list([], "buttons")

        for i in range(0, 5):
            b = button(Game.WIDTH / 6 * (i + 1) - Game.WIDTH_BUTTON / 2,
                       Game.HEIGHT / 4, Game.WIDTH_BUTTON, Game.HEIGHT_BUTTON,
                       rgbColors[i], strColors[i], 14)
            buttons.l.append(b)

        Game.toDisplay.append(buttons)

        menuLabel = list([], "labels")

        text = "Select a player!"

        lab = label((Game.WIDTH - len(text) * 26) / 2, Game.HEIGHT / 5 - 26,
                    text, 52)

        menuLabel.l.append(lab)

        Game.toDisplay.append(menuLabel)

        loop = True

        while loop:

            self.updateDisplay(Game.toDisplay, fenetre)

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    loop = False

                if event.type == pygame.MOUSEBUTTONDOWN:
                    for li in Game.toDisplay:
                        if li.name == "buttons":
                            for b in li.l:
                                if b.intersect(event.pos):
                                    playerColor = b.settings[4]
                                    loop = False
        return playerColor
コード例 #7
0
def main():
    if len(sys.argv) != 5:
        logging.info('please input args: car_path, road_path, cross_path, answerPath')
        exit(1)
    car_path = sys.argv[1]
    road_path = sys.argv[2]
    cross_path = sys.argv[3]
    answer_path = sys.argv[4]
    logging.info("car_path is %s" % (car_path))
    logging.info("road_path is %s" % (road_path))
    logging.info("cross_path is %s" % (cross_path))
    logging.info("answer_path is %s" % (answer_path))
    # to read input file
    car_dict, road_dict, cross_dict, car_ascending, cross_ascending = read(car_path, road_path, cross_path)
    P1, P2, total_lane_num = construct(car_dict, road_dict, cross_dict)
    # process
    ans, total_car_num, road_num = defaultdict(list), len(car_ascending), len(road_dict)
    best_balance, best_limit = 0, 0
    min_time, dead_lock = 99999, 0
    for balance in [0.5, ]:
        limit = 2000
        for _ in range(1):
            car_dict_copy = copy.deepcopy(car_dict)
            road_dict_copy = copy.deepcopy(road_dict)
            car_ascending_copy = copy.deepcopy(car_ascending)
            time, arrival_num, temp_ans = 0, 0, defaultdict(list)
            while arrival_num < total_car_num:
                waiting_car_num = label(road_dict_copy, car_dict_copy, cross_dict, P1, P2, balance)
                dead_lock, schedule_arrival_num = \
                schedule(cross_ascending, cross_dict, road_dict_copy, car_dict_copy, temp_ans, waiting_car_num, P1, P2, balance)
                arrival_num += schedule_arrival_num
                if dead_lock:
                    print("dead_lock at time:", time)
                    break
                remaining_car_num = total_car_num - arrival_num
                start(time, car_ascending_copy, car_dict_copy, cross_dict, road_dict_copy, temp_ans, remaining_car_num, P1, limit)
                time += 1
                if time > min_time: break
            print("balance:", balance, "limit:", limit, "time:", time, "arrival_num:", arrival_num)
            if not dead_lock and time < min_time:
                best_limit = limit
                best_balance = balance
                min_time = time
                ans = copy.deepcopy(temp_ans)
            if dead_lock: limit -= 500
            else: limit += 800
    print("best_balance:",best_balance, "best_limit:",best_limit, "total_car_num:",total_car_num, "road_num:",road_num, "total_lane_num:",total_lane_num, "min_time:",min_time)
    # to write output file
    with open(answer_path, 'w') as ans_file:
        for car_id in ans:
            ans_file.write("(" + str(car_id) + ',' + ','.join(list(map(str, ans[car_id]))) + ")\n")
コード例 #8
0
    def display(self, fenetre):
        posX = self.settings[0]
        posY = self.settings[1]
        sizeX = self.settings[2]
        sizeY = self.settings[3]
        color = self.settings[4]
        text = self.settings[5]
        fontSize = self.settings[6]

        pygame.draw.rect(fenetre, color, [posX, posY, sizeX, sizeY])

        self.label = label(posX + 5, int(posY + sizeY / 2 - fontSize / 2),
                           text, fontSize)

        self.label.display(fenetre)
コード例 #9
0
    def loadInventorsKnowledges(self, firstNamePos, inventor, labelList):

        widthOffset = 15

        currentThenTarget = [
            inventor.currentKnowledge, inventor.targetKnowledge
        ]

        for i in range(0, 2):
            heightOffset = 0
            for knowledge in currentThenTarget[i]:
                lab = label(
                    firstNamePos[0] + i * widthOffset,
                    firstNamePos[1] + (heightOffset + 4) * Game.ONELINE,
                    str(knowledge), 9)
                labelList.l.append(lab)
                heightOffset += 1
コード例 #10
0
ファイル: kitti.py プロジェクト: siayou/pcmatch
def main():
    arguments = docopt.docopt(__doc__)
    template_path = arguments['--templates']
    path_kitti = arguments['--kitti']

    drives = []
    for date in os.listdir(path_kitti):
        if date.startswith('2011'):
            path_date = os.path.join(path_kitti, date)
            for drive in os.listdir(path_date):
                if drive.startswith('2011'):
                    drive_path = os.path.join(path_date, drive, 'seg', 'data')
                    if not os.path.exists(drive_path):
                        print(' * Skipping (path does not exist)', drive_path)
                        continue
                    print(' * Labeling', drive_path)
                    samples_path = os.path.join(drive_path, '*.npy')
                    templates = load_data(template_path)
                    samples = load_data(samples_path)
                    labels = label(templates, samples)
                    out_path = os.path.join('./out', date, drive, 'labels.npy')
                    os.makedirs(os.path.dirname(out_path), exist_ok=True)
                    np.save(out_path, labels)
コード例 #11
0
#entrainement de la carte
neurons = map(map_shape, data_shape, sigma_max_value, sigma_min_value,
              eta_max_value, eta_min_value, decay_start_iter, decay_stop_iter,
              training_samples, initial)

#sauvegarde de la carte
np.save(neurons_path, neurons)

#===============================================================================
# Labellisation de la carte
#===============================================================================

print("labelling")

#labellisation de la carte
neuron_labels = label(map_shape, data_shape, labelling_samples,
                      labelling_labels, neurons)

#sauvegarde das labels
np.save(neuron_labels_path, neuron_labels)

#===============================================================================
# Test des performances
#===============================================================================

print("testing")

#calcul des performances
global_performance, own_performances = test(neurons, neuron_labels,
                                            testing_samples, testing_labels)

#affichage des performances dans la console
コード例 #12
0
 def placeInventorsNames(self, position, inventorID, nameOffset, line,
                         name):
     lab = label(position[0] + inventorID * nameOffset + 15,
                 position[1] + 30 + line * Game.ONELINE, name, 9)
     return lab
コード例 #13
0
#!/usr/bin/env python
# coding=utf-8
import label
#  
# label.label('pool1')
# label.label('conv2')
# label.label('pool2')
# label.label('conv3')
# label.label('conv4')
label.label('conv5')
# label.label('pool5')
# label.label('fc6')
# label.label('fc7')
# label.label('fc8')
コード例 #14
0
from mne.time_frequency import tfr_array_stockwell
import label
import load
import featureExtraction

path = '/Users/ishitachordia/Documents/Thomas_Agata_Research/GoodwinData/test/'
study = 'Study0'

##README: there are four parts of this: loadData, labelData, featureExtraction, classification.
#Look at stereotypyMain.m to see how Goodwin did it- we need to follow it exactly
#preprocessedDataAndLabels is exactly the same as Goodwin. When I start back up, start with featureExtraction
#Steps To Do:
#1. You load the Hd.mat in python and use it to filter the preprocessedData
#2. Figure out how to do Stockwell transform
#3. Classify then using what they did + Neural nets
##When I start back up, you can run matlab code by going on https://mycloud.gatech.edu/Citrix/GTMyCloudWeb/
##Documentation: https://docs.google.com/document/d/12cjQ6QPVeTjPgOZZtoWGJ0Wqh9KEk20LOLi3qEW17D4/edit#
##How accelerometer data works: http://stackoverflow.com/questions/5871429/accelerometer-data-how-to-interpret

if __name__ == '__main__':
    for studyType in os.listdir(path):
        study = studyType
        if (studyType != '.DS_Store'):
            for session in os.listdir(path + study):
                if (session != '.DS_Store'):
                    rawData, rawAnnotation = load.load(session, study, path)
                    preprocessedDataAndLabels = label.label(
                        rawData, rawAnnotation, session, study)
                    featureExtraction.featureExtraction(
                        preprocessedDataAndLabels, path, study, session)
コード例 #15
0
ファイル: show.py プロジェクト: lloydas/mdp_gui
    entry1.render(values_json["entry1"][1])
    buttonCanvas = button(newWindow, entry1.getTextVal,
                          values_json["buttonCanvas"][0],
                          values_json["buttonCanvas"][1])


content = frame(root, values_json["content"][0], values_json["content"][1])
tree = treeview(content.getObject())
tree.addFile("a1.h5")
tree.addFile("a1.h5")
frame = frame(content.getObject(), values_json["frame"][0],
              values_json["frame"][1])
frame1 = frame.getObject()
button1 = button(frame1, frame1.quit, values_json["button"][0],
                 values_json["button"][1])
label = label(content.getObject(), values_json["label"][0],
              values_json["label"][1])
# entry = entry(content.getObject(), values_json["entry"][0], values_json["entry"][1])
# entry.render()
textBig = text(content.getObject(), values_json["textBig"][0],
               values_json["textBig"][1])
buttonPrintEntry = button(frame1, entry.getTextVal,
                          values_json["buttonEntry"][0],
                          values_json["buttonEntry"][1])
buttonPrintText = button(frame1, textBig.getTextVal,
                         values_json["buttonText"][0],
                         values_json["buttonText"][1])
progBar = progressBar(content.getObject(), values_json["progressBar"][0],
                      values_json["progressBar"][1])
buttonProgBar = button(frame1, progBar.step, values_json["progButton"][0],
                       values_json["progButton"][1])
menubar = menuBar(frame1, position=values_json["menubar"][0])
コード例 #16
0
ファイル: pynutbutter.py プロジェクト: gxela/pynutbutter
    #alternatively you can supply a different url for the browser to open
    #NOTE: webbrowser always, opens a new window, in my case konqueror
    opt={}
    opt['colors']={}
    c=caller_color=opt['colors']['caller_color']=opt['colors']['func_me_color']="white_on_blue"
    a=opt['colors']['colors_active'] = 1 #display stdout output with colors
    b=opt['colors']['output_caller'] = 0 #display function caller and called/current function
    d=opt['colors']['show_lineno_write'] = 0
    e=opt['colors']['show_lineno_caller'] = 1
    f=opt['colors']['break_all']=1

    soc=stdout_colors(colors_active=a,output_caller=b,caller_color=c,show_lineno_write=d,show_lineno_caller=e,break_all=f)
    soc.me(['ENTER:',__name__],caller_color)

    opt['source'] = sys.argv[0]
    print (label.label(text=opt['source'],timeout=.5))
    print ("\n%s BEST VIEWED IN FULLSCREEN"%opt['source'])
    try:
        time.sleep(2)
    except (KeyboardInterrupt,EOFError,e):
        pass
    opts, args = getopt.getopt(sys.argv[1:], 'j:f:s:v:o:k:',['jars=','files=', 'sites=','verbose=','options=','flavors='])

    _help_ =format_help_message(opt['source'])
    if len(args) < 1:
        print format_help_message(opt['source']),sys.exit()



    opt['site_name'] = None # store site domain dir eg. http://www.example.com/site/
    opt['file_name']= None
コード例 #17
0
def main(argv=None):
    # Flags are defined in train.py
    if FLAGS.hide_key_phrases:
        raise AssertionError(
            "Please turn the hide_key_phrases off for co-training.")

    # First generate cross validation data if it does not exist.

    if not os.path.exists(FLAGS.cross_validation_dir):
        print("Cross validation data folder does not exist. Creating one.")
        os.mkdir(FLAGS.cross_validation_dir)
        source_path = os.path.join(
            FLAGS.data_dir, 'test_cs_unlabeled_data_combined_inferred.txt')
        target_path = os.path.join(FLAGS.data_dir,
                                   'test_cs_labels_combined_inferred.txt')
        cross_validation_split(source_path,
                               target_path,
                               FLAGS.cross_validation_dir,
                               fold_number=FLAGS.cross_validation_fold)

    for cross_val_round_i in range(FLAGS.cross_validation_fold):

        if not os.path.exists(FLAGS.train_dir):
            os.mkdir(FLAGS.train_dir)
        latest_sentence_checkpoint_dir = None
        latest_pair_checkpoint_dir = None
        latest_checkpoint_dir = None
        used_unlabeled_kp_pair_set = set()
        # The validation set is separate from the test and training set from the very beginning.
        val_source_path = os.path.join(
            FLAGS.cross_validation_dir,
            "cross_validation_val_%d_data.txt" % (cross_val_round_i))
        val_target_path = os.path.join(
            FLAGS.cross_validation_dir,
            "cross_validation_val_%d_labels.txt" % (cross_val_round_i))
        val_labeled_data, val_labeled_result = util.read_data_labeled_part(
            val_source_path, val_target_path, FLAGS.sent_len, shuffle=False)
        # For legacy code reasons, I have to add a None column to the training data...
        val_data = np.array(
            zip(val_labeled_data, val_labeled_result,
                [None] * val_labeled_result.shape[0]))
        val_precision = []
        val_recall = []
        val_pr_auc = []  # Precision recall area under the curve.

        for round_i in range(FLAGS.max_co_training_rounds):
            # load dataset

            if round_i == 0:

                source_path = os.path.join(
                    FLAGS.cross_validation_dir,
                    "cross_validation_train_%d_data.txt" % (cross_val_round_i))
                target_path = os.path.join(
                    FLAGS.cross_validation_dir,
                    "cross_validation_train_%d_labels.txt" %
                    (cross_val_round_i))
                # source_path = os.path.join(FLAGS.data_dir, 'test_cs_unlabeled_data_combined_inferred_train.txt')
                # target_path = os.path.join(FLAGS.data_dir, 'test_cs_labels_combined_inferred_train.txt')
            else:
                source_path = os.path.join(
                    latest_checkpoint_dir,
                    'test_cs_unlabeled_data_combined_round_%d.txt' % (round_i))
                target_path = os.path.join(
                    latest_checkpoint_dir,
                    'test_cs_labels_combined_round_%d.txt' % (round_i))
            train_data, test_data = util.read_data(source_path,
                                                   target_path,
                                                   FLAGS.sent_len,
                                                   attention_path=None,
                                                   train_size=FLAGS.train_size,
                                                   hide_key_phrases=False)
            # I probably need to implement getting all the sentences with the same kp here as well?
            train_data_hide_kp, test_data_hide_kp = util.read_data(
                source_path,
                target_path,
                FLAGS.sent_len,
                attention_path=None,
                train_size=FLAGS.train_size,
                hide_key_phrases=True)

            print("Round %d. Reading labeled data from previous round." %
                  (round_i))
            labeled_data, labeled_result = util.read_data_labeled_part(
                source_path, target_path, FLAGS.sent_len, shuffle=False)
            unlabeled_data = util.read_data_unlabeled_part(
                source_path,
                target_path,
                FLAGS.sent_len,
                shuffle=False,
                hide_key_phrases=False)
            unlabeled_data_hide_kp = util.read_data_unlabeled_part(
                source_path,
                target_path,
                FLAGS.sent_len,
                shuffle=False,
                hide_key_phrases=True)

            # For each round, we draw a fresh set of unlabeled data and label them using the trained classifier.
            current_unlabeled_data, used_unlabeled_kp_pair_set, current_drawn_indices = draw_from_unused_unlabeled(
                unlabeled_data, used_unlabeled_kp_pair_set,
                FLAGS.test_size_per_round)
            current_unlabeled_data_hide_kp = [
                unlabeled_data_hide_kp[i] for i in current_drawn_indices
            ]
            # Currently this one works, but we need a version that throws away used ones. So we need to keep track of which
            # ones we've used.
            # current_unlabeled_data, current_drawn_indices = draw_from_unlabeled(unlabeled_data,
            #                                                                                  FLAGS.test_size_per_round)
            # current_unlabeled_data_hide_kp = [unlabeled_data_hide_kp[i] for i in current_drawn_indices]

            additional_label_index = []
            additional_label_result = []

            for classifier_i in range(2):
                additional_label_index.append([])
                additional_label_result.append([])
                if _is_sentence_train(classifier_i):
                    train.train(train_data_hide_kp, test_data_hide_kp)
                    latest_sentence_checkpoint_dir = util.get_latest_checkpoint_dir(
                        FLAGS.train_dir)
                else:
                    train_kp_pair_classifier.train(train_data, test_data)
                    latest_pair_checkpoint_dir = util.get_latest_checkpoint_dir(
                        FLAGS.train_dir)

                # Refresh the latest checkpoint.
                latest_checkpoint_dir = util.get_latest_checkpoint_dir(
                    FLAGS.train_dir)
                restore_param = util.load_from_dump(
                    os.path.join(latest_checkpoint_dir, 'flags.cPickle'))
                restore_param['train_dir'] = latest_checkpoint_dir
                if _is_sentence_train(classifier_i):
                    x_input, actual_output = label.label(
                        current_unlabeled_data_hide_kp, restore_param)
                else:
                    x_input, actual_output = train_kp_pair_classifier.label(
                        current_unlabeled_data, restore_param)

                actual_output_exp = np.exp(actual_output)
                actual_output_softmax = actual_output_exp / np.sum(
                    actual_output_exp, axis=1, keepdims=True)
                actual_output_argmax = np.argmax(actual_output_softmax, axis=1)
                # If we do not want "Neither" relation, then calculate max on only the first 2 dimensions.
                # sentence_i_list = np.argsort(-np.max(actual_output_softmax[..., :2], axis=1)).tolist()
                if FLAGS.use_product_method:
                    sentence_i_list = range(actual_output_softmax.shape[0])
                else:
                    sentence_i_list = np.argsort(
                        -np.max(actual_output_softmax, axis=1)).tolist()

                # We need the version with key phrases not replaced in order to print things correctly.
                sentence_indices_input = current_unlabeled_data[:, :-2]
                vocab_path = os.path.join(restore_param['data_dir'],
                                          'test_cs_vocab_combined')
                _, rev_vocab = preprocessing_util.initialize_vocabulary(
                    vocab_path)
                sentence_input = preprocessing_util.indices_to_sentences(
                    sentence_indices_input, rev_vocab, ignore_pad=True)

                kp_indices_input = current_unlabeled_data[:, -2:]

                with open(
                        os.path.join(latest_checkpoint_dir,
                                     'added_instances.tsv'),
                        "w") as inferred_instances_f:

                    inferred_instances_f.write(
                        'Type\tSentence\t\tProbability [A is-a B, B is-a A, Neither]\n'
                    )
                    additional_label_num_positive = 0
                    additional_label_num_negative = 0
                    for sentence_i in sentence_i_list:
                        # # This is the current max probability
                        # current_softmax = actual_output_softmax[sentence_i,actual_output_argmax[sentence_i]]
                        sentence = sentence_input[sentence_i]
                        # Label the key phrases of interest in the current sentence with *.
                        sentence[kp_indices_input[sentence_i, 1]] += '*'
                        sentence[kp_indices_input[sentence_i, 0]] += '*'
                        if actual_output_argmax[sentence_i] == 2:
                            current_type = 'Neither'
                            if not FLAGS.use_product_method and additional_label_num_negative >= FLAGS.co_training_has_relation_num_label_negative:
                                continue
                            else:
                                additional_label_num_negative += 1
                        if actual_output_argmax[sentence_i] == 0:
                            current_type = 'A is-a B'
                            if not FLAGS.use_product_method and additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive:
                                continue
                            else:
                                additional_label_num_positive += 1
                        elif actual_output_argmax[sentence_i] == 1:
                            current_type = 'B is-a A'
                            if not FLAGS.use_product_method and additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive:
                                continue
                            else:
                                additional_label_num_positive += 1

                        inferred_instances_f.write(
                            '%s\t%s\t\t%s\n' %
                            (current_type, ' '.join(sentence),
                             str(actual_output_softmax[sentence_i])))

                        if not FLAGS.use_product_method:
                            additional_label_index[classifier_i].append(
                                sentence_i)
                            # If use_product_method is off, then the result is the label.
                            current_additional_label_result = np.zeros((3, ))
                            current_additional_label_result[
                                actual_output_argmax[sentence_i]] = 1
                            additional_label_result[classifier_i].append(
                                current_additional_label_result)
                            if additional_label_num_positive >= FLAGS.co_training_has_relation_num_label_positive and \
                                additional_label_num_negative >= FLAGS.co_training_has_relation_num_label_negative:
                                break
                        else:
                            # If use_product_method is on, then the result is the output softmax, i.e. probability.
                            current_additional_label_result = actual_output_softmax[
                                sentence_i]
                            additional_label_result[classifier_i].append(
                                current_additional_label_result)

                print(
                    "Number of additional data points added through co-training classifier %d"
                    ": %d positives and %d negatives out of %d unlabeled instances."
                    % (classifier_i, additional_label_num_positive,
                       additional_label_num_negative, len(sentence_i_list)))

            # Check if there are any conflicts and merge the additional labels labeled by the two classifier.
            if not FLAGS.use_product_method:
                merged_additional_label_index, merged_additional_label_result = check_conflict_and_merge(
                    additional_label_index, additional_label_result)
            else:
                merged_additional_label_index, merged_additional_label_result = compute_product_and_save(
                    additional_label_result, latest_checkpoint_dir,
                    sentence_input, kp_indices_input)

            latest_checkpoint_dir = util.get_latest_checkpoint_dir(
                FLAGS.train_dir)
            save_source_path = os.path.join(
                latest_checkpoint_dir,
                'test_cs_unlabeled_data_combined_round_%d.txt' % (round_i + 1))
            save_target_path = os.path.join(
                latest_checkpoint_dir,
                'test_cs_labels_combined_round_%d.txt' % (round_i + 1))
            # Now recover the original index in the unlabeled data.
            merged_additional_label_index = [
                current_drawn_indices[i] for i in merged_additional_label_index
            ]
            # Save the additionally labeled 2p+2n examples.
            save_additional_label(unlabeled_data,
                                  merged_additional_label_index,
                                  merged_additional_label_result, labeled_data,
                                  labeled_result, save_source_path,
                                  save_target_path)

            # I also need to get rid of those inferred instances from the whole bag of unlabeled dataset that we're drawing
            # from at each round.
            before_inference_unlabeled_data = util.read_data_unlabeled_part(
                save_source_path,
                save_target_path,
                FLAGS.sent_len,
                shuffle=False)
            inferred_additional_label_index, inferred_additional_label_result = infer_from_labeled(
                save_source_path,
                save_target_path,
                FLAGS.sent_len,
                vocab_path,
                do_save=True,
                save_source_path=save_source_path,
                save_target_path=save_target_path)
            inferred_additional_data = before_inference_unlabeled_data[
                inferred_additional_label_index]
            inferred_additional_sentence_index = inferred_additional_data[:, :
                                                                          -2]
            inferred_additional_kp_index = inferred_additional_data[:, -2:]
            inferred_additional_sentence_input = preprocessing_util.indices_to_sentences(
                inferred_additional_sentence_index, rev_vocab, ignore_pad=True)

            inferred_additional_label_result_argmax = np.argmax(
                inferred_additional_label_result, axis=1)
            with open(
                    os.path.join(latest_checkpoint_dir,
                                 'inferred_instances.tsv'),
                    "w") as inferred_instances_f:
                inferred_instances_f.write('Type\tSentence\n')

                for sentence_i in range(inferred_additional_kp_index.shape[0]):
                    # # This is the current max probability
                    # current_softmax = actual_output_softmax[sentence_i,actual_output_argmax[sentence_i]]
                    sentence = inferred_additional_sentence_input[sentence_i]
                    # Label the key phrases of interest in the current sentence with *.
                    sentence[inferred_additional_kp_index[sentence_i,
                                                          1]] += '*'
                    sentence[inferred_additional_kp_index[sentence_i,
                                                          0]] += '*'
                    if inferred_additional_label_result_argmax[
                            sentence_i] == 2:
                        current_type = 'Neither'
                    if inferred_additional_label_result_argmax[
                            sentence_i] == 0:
                        current_type = 'A is-a B'
                    elif inferred_additional_label_result_argmax[
                            sentence_i] == 1:
                        current_type = 'B is-a A'
                    inferred_instances_f.write(
                        '%s\t%s\n' % (current_type, ' '.join(sentence)))

            # Now all is left is to use the validation dataset to calculate the area under precision recall curve.
            val_precision.append([[[] for _ in range(3)] for _ in range(3)])
            val_recall.append([[[] for _ in range(3)] for _ in range(3)])
            val_pr_auc.append([[0.0, 0.0, 0.0] for _ in range(3)])
            # Each time we calculate the precision recall for classifier 1, 2, and combined.
            for classifier_j in range(3):
                if classifier_j == 0:
                    # Use classifier 1.
                    restore_param = util.load_from_dump(
                        os.path.join(latest_sentence_checkpoint_dir,
                                     'flags.cPickle'))
                    restore_param['train_dir'] = latest_sentence_checkpoint_dir
                    _, val_actual_output = label.label(val_labeled_data,
                                                       restore_param)
                elif classifier_j == 1:
                    # Use classifier 2.
                    restore_param = util.load_from_dump(
                        os.path.join(latest_pair_checkpoint_dir,
                                     'flags.cPickle'))
                    restore_param['train_dir'] = latest_pair_checkpoint_dir
                    _, val_actual_output = train_kp_pair_classifier.label(
                        val_labeled_data, restore_param)
                else:
                    # Use both classifier and, due to design choice of caring more about precision than recall, label
                    # an instance as having a subcategory relation only when both classifier agrees, otherwise output
                    # no relation, aka `Neither`.
                    restore_param = util.load_from_dump(
                        os.path.join(latest_sentence_checkpoint_dir,
                                     'flags.cPickle'))
                    restore_param['train_dir'] = latest_sentence_checkpoint_dir
                    _, val_actual_output_sentence = label.label(
                        val_labeled_data, restore_param)
                    restore_param = util.load_from_dump(
                        os.path.join(latest_pair_checkpoint_dir,
                                     'flags.cPickle'))
                    restore_param['train_dir'] = latest_pair_checkpoint_dir
                    _, val_actual_output_pair = train_kp_pair_classifier.label(
                        val_labeled_data, restore_param)
                    val_actual_output_sentence_argmax = np.argmax(
                        val_actual_output_sentence, axis=1)
                    val_actual_output_pair_argmax = np.argmax(
                        val_actual_output_pair, axis=1)

                    # Label the actual output as [1,0,0] if both classify as A is B, [0,1,0] if both classify as B is A,
                    # and [0,0,1] in all other situations.
                    val_actual_output = np.array([[
                        1 if k == val_actual_output_sentence_argmax[j] else 0
                        for k in range(3)
                    ] if np.all(
                        val_actual_output_sentence_argmax[j] ==
                        val_actual_output_pair_argmax[j]) else [
                            0, 0, 1
                        ] for j in range(val_actual_output_sentence.shape[0])])

                val_actual_output_exp = np.exp(val_actual_output)
                val_actual_output_softmax = val_actual_output_exp / np.sum(
                    val_actual_output_exp, axis=1, keepdims=True)
                for i in range(3):
                    val_precision[round_i][classifier_j][i], val_recall[
                        round_i][classifier_j][i], _ = precision_recall_curve(
                            val_labeled_result[:, i],
                            val_actual_output_softmax[:, i])
                    val_pr_auc[round_i][classifier_j][
                        i] = average_precision_score(
                            val_labeled_result[:, i],
                            val_actual_output_softmax[:, i],
                        )

        # Lastly output the precision recall file for each classifier and each category.
        with open(os.path.join(latest_checkpoint_dir, 'pr_auc.tsv'), "w") as f:
            for classifier_j in range(3):
                for i in range(3):
                    f.write(
                        "Classifier%d_%s\t%s\n" %
                        (classifier_j, CATEGORY_NAME[i], "\t".join([
                            str(val_pr_auc[round_i][classifier_j][i])
                            for round_i in range(FLAGS.max_co_training_rounds)
                        ])))

        np.save(os.path.join(latest_checkpoint_dir, 'precision_recall_data'),
                np.array([val_precision, val_recall, val_pr_auc]))
コード例 #18
0
for i in range(len(result)):
    if result[i] not in output:
        output[result[i]] = []
    output[result[i]].append(files[i].split('.')[0])
output = OrderedDict(sorted(output.items(), key=lambda t: t[0]))

# for k, v in output.items():
#     print(len(v))

# with open('output.csv', 'w') as f:
#     writer = csv.writer(f)
#     for i in output:
#         writer.writerow(output[i])

print(">>>>> Labeling")
labels_list = label(output, files, terms, tfidf_path)
with open('label.csv', 'w') as f:
    writer = csv.writer(f)
    for labels in labels_list:
        for label in labels:
            writer.writerow(label)
        writer.writerow(['\n'])

#unique, counts = np.unique(result, return_counts=True)
#d = dict(zip(unique, counts))
#for i in d:
#    print(str(i) + ' : ' + str(d[i]))

#print(result)

visualization(result, cluster_count, doc_vecs)
コード例 #19
0
def run():
    # initialize VGG Model and PCA
    iset = init.Init()
    # initialize neural network model
    model = networks.Network()
    model.init_model()
    # initialize global instance
    uset = users.Users()

    # store special features in memory
    # dset_special = dataset.Dataset(set.PATH_TO_SPECIAL)
    dset_special = None
    print "Dataset Loaded."
    # set normal features in memory to false
    is_normal_loaded = True
    tset_name = None
    is_reloaded = False
    m_checkpoints = 0

    while True:

        queue = db.lrange(set.REQUEST_QUEUE, set.REQUEST_START, set.REQUEST_END)
        q_uid = None
        # initialize local instance
        select = selectonly.Select()
        finalize = save.Save()
        viewer = view.View()
        retrain_v = retrainView.retrainView()
        retrain_h = retrainHeatmap.retrainHeatmap()
        heat = heatmap.Heatmap()
        t_train = train.Train()
        report_label = label.label()
        report_count = count.count()
        report_map = mapping.map()

        for q in queue:

            q = json.loads(q.decode("utf-8"))
            q_uid = q["uid"]
            target = q["target"]
            session_uid = q["uid"]
            dataSetPath = set.DATASET_DIR + q["dataset"]
            pcaPath = set.DATASET_DIR + q["pca"]
            # if specific features then set m_loaded to true
            is_normal_loaded = False if dataSetPath == set.PATH_TO_SPECIAL else True

            if target == "label":
                report_label.setData(q)

            if target == "count":
                report_count.setData(q)

            if target == "map":
                report_map.setData(q)

            if target == 'selectonly':
                select.setData(q)

            if target == 'save':
                finalize.setData(q)

            if target == 'view':
                viewer.setData(q)

            if target == 'retrainView':
                retrain_v.setData(q)

            if target == 'retrainHeatmap':
                retrain_h.setData(q)

            if target == 'heatmapAll':
                heatmaps = q["viewJSONs"]

            if target == 'heatmap':
                heat.setData(q)

            if target == 'train':
                t_train.setData(q)

            if target == 'reload':
                t_path = set.TRAININGSET_DIR + q["trainingSetName"]
                is_reloaded = True

            if target == 'reviewSave':
                q_samples = json.loads(q["samples"])

        if q_uid is not None:

            print target, " Session Start ....."

            no_uid = True
            uidx = 0

            # find current user Index
            for i in range(len(uset.users)):
                if uset.users[i]['uid'] == session_uid:
                    uidx = i
                    no_uid = False

            if no_uid:
                # set users data
                uset.addUser(session_uid)

            if is_normal_loaded:
                dset = dataset.Dataset(dataSetPath)
            else:
                dset = dset_special

            PCA = joblib.load(pcaPath)

            if target == 'selectonly':
                uset.setIter(uidx, select.iter)
                print "Predict Start ... "
                t0 = time()
                scores = model.predict_prob(dset.features)
                t1 = time()
                print "Predict took ", t1 - t0
                # Find uncertain samples
                data = select.getData(scores, dset.slideIdx, dset.slides, dset.x_centroid, dset.y_centroid)
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'view':
                slide_idx = dset.getSlideIdx(viewer.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                feature_set = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)

                print "Predict Start ... "
                t0 = time()
                predictions = model.predict(feature_set)
                t1 = time()
                print "Predict took ", t1 - t0
                object_idx = load(
                    viewer.left, viewer.right, viewer.top, viewer.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float)
                )
                data = {}

                for i in object_idx:
                    data[str(x_centroid_set[i][0])+'_'+str(y_centroid_set[i][0])] = str(predictions[i])

                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'heatmap':
                slide_idx = dset.getSlideIdx(heat.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                feature_set = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)

                print "Predict Start ... "
                t0 = time()
                if set.IS_HEATMAP == False:
                    scores = model.predict_prob(feature_set)
                t1 = time()
                print "Predict took ", t1 - t0
                # set x and y maps
                heat.setXandYmap()
                # write heatmaps
                heat.setHeatMap(x_centroid_set, y_centroid_set, scores)
                # get heatmap data
                data = heat.getData(0)

                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'heatmapAll':
                data = []
                index = 0

                t0 = time()
                scores = model.predict_prob(dset.features)
                t1 = time()
                print "Predict took ", t1 - t0

                for h in heatmaps:

                    h['uid'] = session_uid
                    heat.setData(h)

                    slide_idx = dset.getSlideIdx(heat.slide)
                    object_num = dset.getObjNum(slide_idx)
                    data_idx = dset.getDataIdx(slide_idx)
                    # feature_set = dset.getFeatureSet(data_idx, object_num)
                    x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                    y_centroid_set = dset.getYcentroidSet(data_idx, object_num)
                    score_set = scores[data_idx: data_idx+object_num]
                    # set x and y maps
                    heat.setXandYmap()
                    # write heatmaps
                    heat.setHeatMap(x_centroid_set, y_centroid_set, score_set)
                    # get heatmap data
                    data_k = heat.getData(index)
                    data.append(data_k)
                    index += 1

                # print data
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'reload':
                # initialize augment
                agen = augments.Augments()
                # set user train samples
                # uset.setReloadedData(uidx, t_path, dset.slides)
                uset.setReloadedData(uidx, t_path)

                sample_size = len(uset.users[uidx]['samples'])

                m_checkpoints = uset.users[uidx]['samples'][sample_size-1]['checkpoints']

                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                tset_path = t_path.split('/')[-1]
                tset_name = tset_path.split('.')[0]

                print "Training ... ", len(train_labels)
                t0 = time()
                model.train_model(train_features, train_labels, tset_name)
                t1 = time()
                print "Training took ", t1 - t0

                data = {"success": 'pass'}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'label':
                # initialize augment
                agen = augments.Augments()
                # set user train samples
                uset.setReloadedData(uidx, report_label.trainSet)

                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                print "Training ... ", len(train_labels)
                t0 = time()
                model.train_model(train_features, train_labels, report_label.classifier)
                t1 = time()
                print "Training took ", t1 - t0

                slide_idx = dset.getSlideIdx(report_label.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                test_features = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)
                print "Testing Start ... "
                t0 = time()
                predicts = model.predict(test_features)
                t1 = time()
                print "Predict took ", t1 - t0

                inputImageFile = '/datasets/tif/'+ report_label.slide + '.svs.dzi.tif'

                bold = 512
                bold_left = report_label.left - bold
                bold_top = report_label.top - bold
                bold_bottom = report_label.bottom + bold
                bold_right = report_label.right + bold
                bold_width = report_label.width + 2*bold
                bold_height = report_label.height + 2*bold

                ts = large_image.getTileSource(inputImageFile)

                region = dict(
                    left=report_label.left, top=report_label.top,
                    width=report_label.width, height=report_label.height,
                )

                im_region = ts.getRegion(
                    region=region, format=large_image.tilesource.TILE_FORMAT_NUMPY
                )[0]

                mydb = mysql.connector.connect(
                  host=set.MYSQL_HOST,
                  user="******",
                  passwd="guest",
                  database="nuclei",
                  charset='utf8',
                  use_unicode=True
                )

                boundaryTablename = 'sregionboundaries'

                runcursor = mydb.cursor()

                query = 'SELECT centroid_x, centroid_y, boundary from ' + boundaryTablename + ' where slide="' +  report_label.slide + \
                '" AND centroid_x BETWEEN ' + str(report_label.left) + ' AND ' + str(report_label.right) + \
                ' AND centroid_y BETWEEN ' + str(report_label.top) + ' AND ' + str(report_label.bottom)

                runcursor.execute(query)

                boundarySet = runcursor.fetchall()

                # find region index from hdf5
                object_idx = load(
                    report_label.left, report_label.right, report_label.top, report_label.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float)
                )

                # set an array for boundary points in a region to zero
                im_bold = np.zeros((bold_height, bold_width), dtype=np.uint8)

                for i in object_idx:
                    for j in range(len(boundarySet)):
                      x = int(boundarySet[j][0])
                      y = int(boundarySet[j][1])
                      boundaryPoints = []
                      if x == int(x_centroid_set[i, 0]) and y == int(y_centroid_set[i, 0]):
                          object = boundarySet[j][2].encode('utf-8').split(' ')
                          object_points = []
                          for p in range(len(object)-1):
                              intP = map(int, object[p].split(','))
                              intP[0] = intP[0] - report_label.left + bold
                              intP[1] = intP[1] - report_label.top + bold
                              object_points.append(intP)
                          boundaryPoints.append(np.asarray(object_points))
                          cv2.fillPoly(im_bold, boundaryPoints, 255 if predicts[i] > 0 else 128)

                im_out = im_bold[bold:bold+report_label.height, bold:bold+report_label.width]

                imsave(report_label.inFile, im_out)

                runcursor.close()
                mydb.close()

                print ("label success ", report_label.inFile)
                data = {"success": report_label.outFile}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

                uset.users = []
                uset.u_size = 0

                model = networks.Network()
                model.init_model()
                print ("label done")

            if target == 'count':
                # initialize augment
                agen = augments.Augments()
                # set user train samples
                uset.setReloadedData(uidx, report_count.trainSet)

                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                print "Training ... ", len(train_labels)
                t0 = time()
                model.train_model(train_features, train_labels, report_count.classifier)
                t1 = time()
                print "Training took ", t1 - t0

                print "Testing Start ... "
                t0 = time()
                predicts = model.predict(dset.features)
                t1 = time()
                print "Predict took ", t1 - t0

                # find positive and negative numbers for each slide
                pos_num = []
                neg_num = []

                for i in range(dset.n_slides):
                    if i == len(dset.dataIdx) - 1:
                        predict = predicts[dset.dataIdx[i, 0]:]
                    else:
                        predict = predicts[dset.dataIdx[i, 0]: dset.dataIdx[i+1, 0]]
                    pos = len(predict[predict>0])
                    neg = len(predict) - pos
                    pos_num.append(pos)
                    neg_num.append(neg)

                print('>> Writing count file')
                out_file = open(report_count.inFile, 'w')

                out_file.write("Slide\t")
                out_file.write("Predicted positive (superpixels)\t")
                out_file.write("Predicted negative (superpixels)\t")                
                out_file.write("\n")

                for i in range(len(dset.slides)):
                    out_file.write("%s\t" % dset.slides[i])
                    out_file.write("%d\t" % pos_num[i])
                    out_file.write("%d\t" % neg_num[i])
                    out_file.write("\n")

                out_file.close()
                print ("count success ", report_count.inFile)
                data = {"success": report_count.outFile}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

                uset.users = []
                uset.u_size = 0

                model = networks.Network()
                model.init_model()
                print ("count done")

            if target == 'map':
                # initialize augment
                agen = augments.Augments()
                # set user train samples
                uset.setReloadedData(uidx, report_map.trainSet)

                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                print "Training ... ", len(train_labels)
                t0 = time()
                model.train_model(train_features, train_labels, report_map.classifier)
                t1 = time()
                print "Training took ", t1 - t0

                slide_idx = dset.getSlideIdx(report_map.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                test_features = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)

                print "Testing Start ... "
                t0 = time()
                predicts = model.predict(test_features)
                t1 = time()
                print "Predict took ", t1 - t0

                output = h5py.File(report_map.inFile, 'w')
                output.create_dataset('features', data=test_features)
                output.create_dataset('predicts', data=predicts)
                output.create_dataset('x_centroid', data=x_centroid_set)
                output.create_dataset('y_centroid', data=y_centroid_set)
                output.create_dataset('slides', data=[report_map.slide])
                output.close()

                print ("map success ", report_map.inFile)
                data = {"success": report_map.outFile}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

                uset.users = []
                uset.u_size = 0

                model = networks.Network()
                model.init_model()
                print ("map done")

            if target == 'save':
                data = finalize.getData(uset.users[uidx])
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'review':
                data = {}
                data['review'] = []

                for sample in uset.users[uidx]['samples']:
                    sample_data = {}
                    sample_data['id'] = str(sample['id'])
                    sample_data['label'] = 1 if sample['label'] == 1 else -1
                    sample_data['iteration'] = int(sample['iteration'])
                    sample_data['slide'] = str(sample['slide'])
                    sample_data['centX'] = str(sample['centX'])
                    sample_data['centY'] = str(sample['centY'])
                    sample_data['boundary'] = ""
                    sample_data['maxX'] = 0
                    sample_data['maxY'] = 0

                    data['review'].append(sample_data)

                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'train':
                # increase checkpoint by 1
                m_checkpoints += 1
                # initialize augment
                agen = augments.Augments()
                uset.setIter(uidx, t_train.iter)

                for sample in t_train.samples:
                    # init sample and augment
                    init_sample = dict(
                        id=0, f_idx=0, checkpoints=0,
                        aurl=None, feature=None, label=0,
                        iteration=0, centX=0, centY=0,
                        slideIdx=0, slide=None
                    )
                    init_augment = dict(
                        id=[], checkpoints=[], feature=[], label=[]
                    )

                    # check db_id in users samples
                    remove_idx = []
                    for u in range(len(uset.users[uidx]['samples'])):
                        if uset.users[uidx]['samples'][u]['id'] == sample['id']:
                            remove_idx.append(u)

                    for r in remove_idx:
                        uset.users[uidx]['samples'].pop(r)
                        uset.users[uidx]['augments'].pop(r)

                    # add feature
                    init_sample['id'] = sample['id']
                    init_sample['aurl'] = str(sample['aurl'])
                    init_sample['slide'] = str(sample['slide'])

                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    object_num = dset.getObjNum(slide_idx)
                    data_idx = dset.getDataIdx(slide_idx)
                    feature_set = dset.getFeatureSet(data_idx, object_num)
                    x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                    y_centroid_set = dset.getYcentroidSet(data_idx, object_num)
                    slideIdx_set = dset.getSlideIdxSet(data_idx, object_num)

                    c_idx = getIdx(
                        x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx
                    )

                    f_idx = data_idx + c_idx

                    init_sample['f_idx'] =  f_idx
                    init_sample['feature'] = feature_set[c_idx]
                    init_sample['label'] = 1 if sample['label'] == 1 else 0
                    init_sample['iteration'] = t_train.iter
                    init_sample['centX'] = sample['centX']
                    init_sample['centY'] = sample['centY']
                    init_sample['checkpoints'] = m_checkpoints

                    # add augment features
                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    slide_mean = dset.getWSI_Mean(slide_idx)
                    slide_std = dset.getWSI_Std(slide_idx)

                    a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std)
                    a_featureSet = iset.FC1_MODEL.predict(a_imgs)
                    a_featureSet = PCA.transform(a_featureSet)
                    a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8)
                    a_idSet = []
                    a_checkpointSet = []
                    for i in range(agen.AUG_BATCH_SIZE):
                        a_idSet.append(init_sample['id'])
                        a_checkpointSet.append(init_sample['checkpoints'])
                    if init_sample['label'] > 0:
                        a_labelSet.fill(1)

                    init_augment['id'] = a_idSet
                    init_augment['feature'] = a_featureSet
                    init_augment['label'] = a_labelSet
                    init_augment['checkpoints'] = a_checkpointSet

                    uset.setAugmentData(uidx, init_augment)
                    uset.setTrainSampleData(uidx, init_sample)

                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                # train_labels = to_categorical(train_labels, num_classes=2)
                if tset_name is None:
                    tset_name = t_train.classifier

                print "Training ... ", len(train_labels)
                t0 = time()
                model.train_model(train_features, train_labels, tset_name)
                t1 = time()
                print "Training took ", t1 - t0

                data = {"success": 'pass'}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'retrainView':

                m_checkpoints += 1
                # initialize augment
                agen = augments.Augments()

                uset.setIter(uidx, retrain_v.iter)

                print "Augment ... ", len(retrain_v.samples)
                t0 = time()
                for sample in retrain_v.samples:
                    # init sample and augment
                    init_sample = dict(
                        id=0, f_idx=0, checkpoints=0,
                        aurl=None, feature=None, label=0,
                        iteration=0, centX=0, centY=0,
                        slideIdx=0, slide=None
                    )
                    init_augment = dict(
                        id=[], checkpoints=[], feature=[], label=[]
                    )

                    # remove samples stored if it already exists
                    remove_idx = []
                    for u in range(len(uset.users[uidx]['samples'])):
                        if uset.users[uidx]['samples'][u]['id'] == sample['id']:
                            remove_idx.append(u)

                    for r in remove_idx:
                        uset.users[uidx]['samples'].pop(r)
                        uset.users[uidx]['augments'].pop(r)

                    # add feature
                    init_sample['id'] = sample['id']
                    init_sample['aurl'] = str(sample['aurl'])
                    init_sample['slide'] = str(sample['slide'])

                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    object_num = dset.getObjNum(slide_idx)
                    data_idx = dset.getDataIdx(slide_idx)
                    feature_set = dset.getFeatureSet(data_idx, object_num)
                    x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                    y_centroid_set = dset.getYcentroidSet(data_idx, object_num)
                    slideIdx_set = dset.getSlideIdxSet(data_idx, object_num)

                    c_idx = getIdx(
                        x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx
                    )

                    f_idx = data_idx + c_idx

                    init_sample['f_idx'] =  f_idx
                    init_sample['feature'] = feature_set[c_idx]
                    init_sample['label'] = 1 if sample['label'] == 1 else 0
                    init_sample['iteration'] = retrain_v.iter
                    init_sample['centX'] = sample['centX']
                    init_sample['centY'] = sample['centY']
                    init_sample['checkpoints'] = m_checkpoints

                    # add augment features
                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    slide_mean = dset.getWSI_Mean(slide_idx)
                    slide_std = dset.getWSI_Std(slide_idx)

                    a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std)
                    a_featureSet = iset.FC1_MODEL.predict(a_imgs)
                    a_featureSet = PCA.transform(a_featureSet)
                    a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8)
                    a_idSet = []
                    a_checkpointSet = []
                    for i in range(agen.AUG_BATCH_SIZE):
                        a_idSet.append(init_sample['id'])
                        a_checkpointSet.append(init_sample['checkpoints'])
                    if init_sample['label'] > 0:
                        a_labelSet.fill(1)

                    init_augment['id'] = a_idSet
                    init_augment['feature'] = a_featureSet
                    init_augment['label'] = a_labelSet
                    init_augment['checkpoints'] = a_checkpointSet

                    uset.setAugmentData(uidx, init_augment)
                    uset.setTrainSampleData(uidx, init_sample)

                t1 = time()
                print "Augmentation took ", t1 - t0
                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                # train_labels = to_categorical(train_labels, num_classes=2)
                if tset_name is None:
                    tset_name = retrain_v.classifier

                t0 = time()
                model.train_model(train_features, train_labels, tset_name)
                t1 = time()
                print "Training took ", t1 - t0, " ", len(train_labels), "Samples"

                slide_idx = dset.getSlideIdx(retrain_v.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                feature_set = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)

                print "Testing Start ... "
                t0 = time()
                predictions = model.predict(feature_set)
                t1 = time()
                print "Predict took ", t1 - t0

                object_idx = load(
                    retrain_v.left, retrain_v.right, retrain_v.top, retrain_v.bottom, x_centroid_set.astype(np.float), y_centroid_set.astype(np.float)
                )
                data = {}
                for i in object_idx:
                    data[str(x_centroid_set[i][0])+'_'+str(y_centroid_set[i][0])] = str(predictions[i])

                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'retrainHeatmap':
                m_checkpoints += 1
                # initialize augment
                agen = augments.Augments()

                uset.setIter(uidx, retrain_h.iter)

                for sample in retrain_h.samples:
                    # init sample and augment
                    init_sample = dict(
                        id=0, f_idx=0, checkpoints=0,
                        aurl=None, feature=None, label=0,
                        iteration=0, centX=0, centY=0,
                        slideIdx=0, slide=None
                    )
                    init_augment = dict(
                        id=[], checkpoints=[], feature=[], label=[]
                    )

                    # remove samples stored if it already exists
                    remove_idx = []
                    for u in range(len(uset.users[uidx]['samples'])):
                        if uset.users[uidx]['samples'][u]['id'] == sample['id']:
                            remove_idx.append(u)

                    for r in remove_idx:
                        uset.users[uidx]['samples'].pop(r)
                        uset.users[uidx]['augments'].pop(r)

                    # add feature
                    init_sample['id'] = sample['id']
                    init_sample['aurl'] = str(sample['aurl'])
                    init_sample['slide'] = str(sample['slide'])

                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    object_num = dset.getObjNum(slide_idx)
                    data_idx = dset.getDataIdx(slide_idx)
                    feature_set = dset.getFeatureSet(data_idx, object_num)
                    x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                    y_centroid_set = dset.getYcentroidSet(data_idx, object_num)
                    slideIdx_set = dset.getSlideIdxSet(data_idx, object_num)

                    c_idx = getIdx(
                        x_centroid_set.astype(np.float), y_centroid_set.astype(np.float), slideIdx_set.astype(np.int), np.float32(sample['centX']), np.float32(sample['centY']), slide_idx
                    )

                    f_idx = data_idx + c_idx

                    init_sample['f_idx'] =  f_idx
                    init_sample['feature'] = feature_set[c_idx]
                    init_sample['label'] = 1 if sample['label'] == 1 else 0
                    init_sample['iteration'] = retrain_h.iter
                    init_sample['centX'] = sample['centX']
                    init_sample['centY'] = sample['centY']
                    init_sample['checkpoints'] = m_checkpoints

                    # add augment features
                    slide_idx = dset.getSlideIdx(init_sample['slide'])
                    slide_mean = dset.getWSI_Mean(slide_idx)
                    slide_std = dset.getWSI_Std(slide_idx)

                    a_imgs = agen.prepare_image(init_sample['aurl'], slide_mean, slide_std)
                    a_featureSet = iset.FC1_MODEL.predict(a_imgs)
                    a_featureSet = PCA.transform(a_featureSet)
                    a_labelSet = np.zeros((agen.AUG_BATCH_SIZE, )).astype(np.uint8)
                    a_idSet = []
                    a_checkpointSet = []
                    for i in range(agen.AUG_BATCH_SIZE):
                        a_idSet.append(init_sample['id'])
                        a_checkpointSet.append(init_sample['checkpoints'])
                    if init_sample['label'] > 0:
                        a_labelSet.fill(1)

                    init_augment['id'] = a_idSet
                    init_augment['feature'] = a_featureSet
                    init_augment['label'] = a_labelSet
                    init_augment['checkpoints'] = a_checkpointSet

                    uset.setAugmentData(uidx, init_augment)
                    uset.setTrainSampleData(uidx, init_sample)

                sample_size = len(uset.users[uidx]['samples'])
                sample_batch_size = agen.AUG_BATCH_SIZE * sample_size
                train_size = sample_size + sample_batch_size

                train_features = np.zeros((train_size, set.FEATURE_DIM))
                train_labels = np.zeros((train_size, ))

                for i in range(sample_size):
                    train_features[i] = uset.users[uidx]['samples'][i]['feature']
                    train_labels[i] = uset.users[uidx]['samples'][i]['label']
                    train_features[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['feature']
                    train_labels[i+sample_size:i+sample_size+agen.AUG_BATCH_SIZE] = uset.users[uidx]['augments'][i]['label']

                if tset_name is None:
                    tset_name = retrain_h.classifier

                t0 = time()
                model.train_model(train_features, train_labels, tset_name)
                t1 = time()
                print "Training took ", t1 - t0, " ", len(train_labels), "Samples"

                slide_idx = dset.getSlideIdx(retrain_h.slide)
                object_num = dset.getObjNum(slide_idx)
                data_idx = dset.getDataIdx(slide_idx)
                feature_set = dset.getFeatureSet(data_idx, object_num)
                x_centroid_set = dset.getXcentroidSet(data_idx, object_num)
                y_centroid_set = dset.getYcentroidSet(data_idx, object_num)

                print "Testing Start ... "
                t0 = time()
                if set.IS_HEATMAP == False:
                    scores = model.predict_prob(feature_set)
                t1 = time()
                print "Predict took ", t1 - t0
                # set x and y maps
                retrain_h.setXandYmap()
                # write heatmaps
                retrain_h.setHeatMap(x_centroid_set, y_centroid_set, scores)
                # get heatmap data
                data = retrain_h.getData(0)

                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'cancel':

                uset.users = []
                uset.u_size = 0
                is_normal_loaded = True
                tset_name = None
                is_reloaded = False
                m_checkpoints = 0

                del select
                del finalize
                del viewer
                del retrain_v
                del retrain_h
                del heat
                del t_train
                del report_label

                model = networks.Network()
                model.init_model()
                # dset = dataset.Dataset(set.PATH_TO_SPECIAL)

                data = {"success": 'pass'}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)

            if target == 'reviewSave':
                # modify labels if they are changed on review tab
                for q_sample in q_samples:
                    for sample in uset.users[uidx]['samples']:
                        if sample['id'] == q_sample['id']:
                            sample['label'] = 1 if q_sample['label'] == 1 else 0

                    for sample in uset.users[uidx]['augments']:
                        if sample['id'][0] == q_sample['id']:
                            sample['label'][:] = 1 if q_sample['label'] == 1 else 0

                data = {"success": 'pass'}
                db.set(q_uid, json.dumps(data))
                db.ltrim(set.REQUEST_QUEUE, len(q_uid), -1)
コード例 #20
0
ind = pick[ind - 1]
#print(ind)

# original image resizing
I = np.array(Image.open(image_path))
I = imutils.resize(I, width=min(600, I.shape[1]))
plt.figure(1)
plt.imshow(I)

# auto-refined mask
x1 = ind[1]
y1 = ind[0]
x2 = ind[3]
y2 = ind[2]
mask, h, w = Domask(I, x1, x2, y1, y2)
I_label = label(I, mask, x1, x2, y1, y2)
plt.figure(2)
plt.imshow(I_label)

# object removal with weighted seam carving
nr = 0
nc = ind[3] - ind[1]
[Ic, T] = carv(I, nr, nc, I_label)
fig2 = plt.figure(3)
plt.imshow(Ic)

# seam insertion
Ic_n = Ic
summ = int(nc / 2)
I_label_insert = np.ones(np.shape(Ic_n[:, :, 0]))
a = genEngMap(Ic_n, I_label_insert)
コード例 #21
0
ファイル: button.py プロジェクト: Lithumist/Reflection
"""
コード例 #22
0
ファイル: MMI22_v1.py プロジェクト: xiaobo3332/textdongbo
dcc_10_layout = dcc_10.Layout()
dcc_15 = my_dc(gap_inc_vec=[378.0, 388.0, 398.0, 408.0, 418.0],
               length_inc_vec=[87, 97, 107],
               name="ring5",
               width=15.0)
dcc_15_layout = dcc_15.Layout()
dcc_20 = my_dc(gap_inc_vec=[378.0, 388.0, 398.0, 408.0, 418.0],
               length_inc_vec=[87, 97, 107],
               name="ring6",
               width=20.0)
dcc_20_layout = dcc_20.Layout()

marker = dicingMarker()
marker_layout = marker.Layout()

marker2 = label()
marker2_layout = marker2.Layout()

pr = PlaceComponents(
    child_cells={
        "comp1": dc_10,
        "comp2": dc_15,
        "comp3": dc_20,
        "comp4": dcc_10,
        "comp5": dcc_15,
        "comp6": dcc_20,
        "marker1": marker,
        "marker2": marker2,
    })
pr_layout = pr.Layout(
    child_transformations={
コード例 #23
0
def main(argv=None):
    restore_param = util.load_from_dump(os.path.join(FLAGS.train_dir, 'flags.cPickle'))
    restore_param['train_dir'] = FLAGS.train_dir

    source_path = os.path.join(restore_param['data_dir'], 'test_cs_unlabeled_data_combined.txt')
    target_path = os.path.join(restore_param['data_dir'], 'test_cs_labels_combined.txt')
    vocab_path = os.path.join(restore_param['data_dir'], 'test_cs_vocab_combined')
    unlabeled_data = util.read_data_unlabeled_part(source_path, target_path, restore_param['sent_len'])
    data_size = unlabeled_data.shape[0]

    # # Now hard code to take the first 1000
    # data_first_1000 = unlabeled_data

    x_input, actual_output = label(unlabeled_data, restore_param)

    actual_output_exp = np.exp(actual_output)
    actual_output_softmax = actual_output_exp / np.sum(actual_output_exp, axis=1, keepdims=True)
    actual_output_argmax = np.argmax(actual_output_softmax,axis=1)
    # Entropy = -sum(p * log p) so this is actually the negative of entropy. For sorting purpose I took out the neg.
    actual_output_entropy = np.sum(np.multiply(actual_output_softmax, np.log(actual_output_softmax)), axis=1)

    # The following are ways to rank what question should be asked first.
    # The first one uses entropy, but there might be some implementation errors.
    actual_output_entropy_argsort = np.argsort(actual_output_entropy, axis=0) # This doesn:t seem to give me the most uncertain ones??? in theory it does. or maybe it's just the model is too sure of everything.
    # The second one uses the softmax probability and only ask the one with highest probability in the first two
    # classes.
    # actual_output_entropy_argsort = np.argsort(-np.max(actual_output_softmax[...,:2], axis=1))

    sentence_indices_input = x_input[:,:-2]
    _,rev_vocab = preprocessing_util.initialize_vocabulary(vocab_path)
    sentence_input = preprocessing_util.indices_to_sentences(sentence_indices_input,rev_vocab)

    kp_indices_input = x_input[:,-2:]
    #
    # print('Sentence\t\tPredicted Score (A is-a B, B is-a A, Neither)\t')
    # for sentence_i, sentence in enumerate(sentence_input):
    #     # Label the key phrases of interest in the current sentence with *.
    #     sentence[kp_indices_input[sentence_i,1]] += '*'
    #     sentence[kp_indices_input[sentence_i,0]] += '*'
    #     if actual_output_argmax[sentence_i] == 2:
    #         # current_type = 'Neither'
    #         continue
    #     if actual_output_argmax[sentence_i] == 0:
    #         current_type = 'A is-a B'
    #     elif actual_output_argmax[sentence_i] == 1:
    #         current_type = 'B is-a A'
    #
    #     print('%s\t%s\t\t%s\t'
    #           % (current_type, ' '.join(sentence), str(actual_output_softmax[sentence_i])))
    user_input = -1
    num_user_labeled = 0
    user_label_results = []
    while user_input != 4 and num_user_labeled < data_size:
        sentence_i = actual_output_entropy_argsort[num_user_labeled]
        sentence = sentence_input[sentence_i]
        print('Key phrase pair\tSentence\t\tPredicted Score (A is-a B, B is-a A, Neither)\t')

        current_key_phrase_pair = sentence[kp_indices_input[sentence_i,0]] + ' ' + sentence[kp_indices_input[sentence_i,1]]
        # Label the key phrases of interest in the current sentence with *.
        sentence[kp_indices_input[sentence_i,1]] += '*'
        sentence[kp_indices_input[sentence_i,0]] += '*'
        print('%s\n%s\t\t%s\t'
              % (current_key_phrase_pair,' '.join(sentence), str(actual_output_softmax[sentence_i])))
        user_input = raw_input('In your opinion, what should be the category of the key phrase pair? '
                                   'Please enter 1, 2, or 3. Enter 4 to stop answering.\n'
                                   '1. A is-a B\n2. B is-a A\n3. Neither.')
        user_input = util.get_valid_user_input(user_input, 1, 4)

        if user_input != 4:
            user_label_result = np.array([0,0,0])
            user_label_result[user_input-1] = 1
            user_label_results.append(user_label_result)
            num_user_labeled += 1

    actual_output_entropy_indices = actual_output_entropy_argsort[:num_user_labeled]

    if len(user_label_results) > 0:
        labeled_data, labeled_result = util.read_data_labeled_part(source_path, target_path, restore_param['sent_len'], shuffle=False)
        user_label_results = np.array(user_label_results)
        save_additional_label(unlabeled_data, actual_output_entropy_indices, user_label_results,labeled_data,labeled_result, source_path, target_path)
コード例 #24
0
ファイル: main.py プロジェクト: sk413025/experiment
kG = kp.process_k_skip(G,K, priority)
t1 = time.time()
print '%4.4f sec -- kG process finished' %(t1-s1)
f.write('%.4f ' %(t1-s1))
s2 = time.time()
ov_G1 = CH_construct.CH_construct(G,priority)
t2 = time.time()
print '%4.4f sec -- ovG finished' %(t2-s2)
f.write('%.4f ' %(t2-s2))
s3 = time.time()
ov_G2 = CH_construct.CH_construct(kG,priority)
t3 = time.time()
print '%4.4f sec -- ov_kG finished' %(t3-s3)
f.write('%.4f ' %(t3-s3))
s4 = time.time()
labels1 = label.label(ov_G1, priority)
t4 = time.time()
print  '%4.4f sec -- ov_G_label finished' %(t4-s4)
f.write('%.4f ' %(t4-s4))
s5 = time.time()
labels2 = label.label(ov_G2, priority)
t5 = time.time()
print  '%4.4f sec -- ov_kG_label finished' %(t5-s5)
f.write('%.4f\n' %(t5-s5))
f.close()

f = open(filename+'_size.txt','a')
G_node = G.number_of_nodes()
G_edge = G.number_of_edges()
kG_node = kG.number_of_nodes()
kG_edge = kG.number_of_edges()
コード例 #25
0
 def __init__(self, posX, posY, sizeX, sizeY, color, text, fontSize):
     self.settings = [posX, posY, sizeX, sizeY, color, text, fontSize]
     self.label = label(posX + 5, int(posY + sizeY / 2 - fontSize / 2),
                        text, fontSize)