models, optimizers, _, _, opt = model_utils.load_model(opt.model_name, opt)

enc = models['enc']
dec = models['dec']

enc.train(False)
dec.train(False)

models = None
optimizers = None


print('Done loading model.')

embeddings_ref_path = model_struct_dir + os.sep + 'embeddings.pkl'
embeddings_ref = model_utils.load_embeddings(embeddings_ref_path)

embeddings_struct_path = model_struct_dir + os.sep + 'embeddings.pkl'
embeddings_struct = model_utils.load_embeddings(embeddings_struct_path)

opt.batch_size = args.batch_size
gpu_id = opt.gpu_ids[0]

ndat = dp.get_n_dat('train')
nlabels = dp.get_n_classes()

img_paths_all = list()
err_save_paths = list()
label_names_all = list()
  
class_list = np.arange(0, nlabels)
enc = models['enc']
dec = models['dec']

enc.train(False)
dec.train(False)

models = None
optimizers = None

print('Done loading model.')

# Get the embeddings for the structure localization
opt.batch_size = 100
embeddings_path = opt.save_dir + os.sep + 'embeddings_struct.pyt'
embeddings = model_utils.load_embeddings(embeddings_path, enc, dp, opt)

print('Done loading embeddings.')

#######
### Main Loop
#######

import pdb
from aicsimage.io import omeTifWriter
from imgToProjection import imgtoprojection
from IPython.core.display import display
import PIL.Image
import matplotlib.pyplot as plt
import scipy.misc
コード例 #3
0
    logger.add((epoch, this_iter) + errors + (deltaT, ))

    if model_utils.maybe_save(epoch, epoch_next, models, optimizers, logger,
                              zAll, dp, opt):
        zAll = list()

#######
### DONE TRAINING REFERENCE MODEL
#######

#######
### TRAIN STRUCTURE MODEL
#######

embeddings_path = opt.save_dir + os.sep + 'embeddings.pkl'
embeddings = model_utils.load_embeddings(embeddings_path, models['enc'], dp,
                                         opt)

models = None
optimizers = None


def get_ref(self, inds, train_or_test='train'):
    inds = torch.LongTensor(inds)
    return self.embeddings[train_or_test][inds]


dp.embeddings = embeddings

# do this thing to bind the get_ref method to the dataprovider object
import types
dp.get_ref = types.MethodType(get_ref, dp)
コード例 #4
0
df_data = dp.csv_data

df_data = df_data.merge(
    df_mito,
    on=['inputFolder', 'inputFilename', 'outputThisCellIndex'],
    how='left')
df_data = df_data.rename(columns={'MitosisLabel_y': 'MitosisLabel'})

df_data_labeled = df_data[~np.isnan(df_data['MitosisLabel'])]
labels = df_data_labeled['MitosisLabel']

# labels[labels > 2] = 3

ulabels = np.unique(labels)

embeddings_shape = model_utils.load_embeddings(ref_dir + os.sep +
                                               'embeddings.pkl')

use_train_or_test = 'test'

df_train = df_data.iloc[dp.data[use_train_or_test]['inds']]
# df_train['MitosisLabel'][df_train['MitosisLabel']>2] = 3

positions = list()
for label in ulabels:
    label_inds = np.where(label == df_train['MitosisLabel'])

    embeddings = embeddings_shape[use_train_or_test][label_inds].numpy()
    D = squareform(pdist(embeddings, metric='cityblock'))
    positions.append(embeddings[np.argmin(np.sum(D, axis=0))])
#     positions.append(np.mean(embeddings,axis=0))
positions = np.vstack(positions)
コード例 #5
0
def main(args=None):
    args = parser.parse_args(args)

    if args.model not in MODEL_TYPES:
        raise ValueError('Invalid model type')

    if not args.model_name:
        raise ValueError('Model name must be provided')

    embeddings = load_embeddings('../embeddings/')

    with open('../ontology/java_utils_main.json') as json_file:
        java_utils = json.load(json_file)

    with open('../ontology/java_utils_methods.json') as json_file:
        java_utils_methods = json.load(json_file)

    with open(FULL_DATASET) as json_file:
        data = json.load(json_file)

    train_associations = load_data_from_object(data['train'])
    test_associations = load_data_from_object(data['test'])
    valid_associations = load_data_from_object(data['valid'])
    deleted_associations = load_data_from_object(data['deleted'])

    if args.model == 'more_data_feedforward' or args.model == 'more_data_crf':
        if not args.delete_size or int(
                args.delete_size) > len(deleted_associations):
            raise ValueError('Delete size must be provided. Max size is ' +
                             str(len(deleted_associations)))

        train_associations = train_associations + deleted_associations[
            0:int(args.delete_size)]

    with open(FULL_ANNOTATIONS) as json_file:
        annotations = json.load(json_file)

    if not args.oracle:
        # Use annotated data for evaluation.
        for association in test_associations:
            annotation_id = association.annotation_id
            for candidate in association.full_code_representation:
                token = candidate['token']
                line_number = candidate['line_idx']
                position = candidate['pos_idx']
                key = token + '-' + str(line_number) + '-' + str(position)
                if key in annotations[str(annotation_id)]:
                    if annotations[str(annotation_id)][key] == 'True':
                        candidate['is_associated'] = True
                    else:
                        candidate['is_associated'] = False

    if 'crf' in args.model:
        process_crf_dataset(train_associations, embeddings, java_utils,
                            java_utils_methods)
        process_crf_dataset(test_associations, embeddings, java_utils,
                            java_utils_methods)
        process_crf_dataset(valid_associations, embeddings, java_utils,
                            java_utils_methods)

    elif 'baseline' not in args.model:
        process_dataset(train_associations, embeddings, java_utils,
                        java_utils_methods)
        process_dataset(test_associations, embeddings, java_utils,
                        java_utils_methods)
        process_dataset(valid_associations, embeddings, java_utils,
                        java_utils_methods)

    print "Train: " + str(len(train_associations))
    print "Test: " + str(len(test_associations))
    print "Valid: " + str(len(valid_associations))

    if args.model == 'crf' or args.model == 'more_data_crf':
        model = CRFModel(train_associations, test_associations,
                         valid_associations, args)
    elif args.model == 'feedforward' or args.model == 'more_data_feedforward':
        model = FeedForwardNN(train_associations, test_associations,
                              valid_associations, args)
    elif args.model == 'subtoken_matching_baseline':
        model = SubtokenMatchingBaseline(train_associations, test_associations,
                                         valid_associations)
    elif args.model == 'return_line_baseline':
        model = ReturnLineBaseline(train_associations, test_associations,
                                   valid_associations)
    elif args.model == 'random_baseline':
        model = RandomBaseline(train_associations, test_associations,
                               valid_associations)
    elif args.model == 'majority_class_random_baseline':
        model = MajorityClassRandomBaseline(train_associations,
                                            test_associations,
                                            valid_associations)
    else:
        raise ValueError('Unable to identify model type')

    print("Evaluation:")
    print("------------------")
    print("Train:")
    model.classify(train_associations)
    print("------------------")
    print("Valid:")
    model.classify(valid_associations)
    print("------------------")
    print("Test:")
    model.classify(test_associations)
    print("------------------")
    sys.stdout.flush()

    if args.v:
        with open(OUTPUT_FILE, 'w+') as f:
            for association in test_associations:
                f.write("NP: " + association.np_chunks[0] + '\n')
                f.write("Comment line: " + association.comment_line + '\n\n')
                f.write(association.full_code.encode('utf-8') + '\n\n')

                predicted = [
                    str(c['token'])
                    for c in association.full_code_representation
                    if c['prediction'] == 1
                ]
                gold = [
                    str(c['token'])
                    for c in association.full_code_representation
                    if c['is_associated']
                ]

                f.write("Predicted: " + str(predicted) + '\n\n')
                f.write("Gold: " + str(gold) + '\n\n')
                f.write(
                    "Candidates: " +
                    str([str(c)
                         for c in association.candidate_code_tokens]) + '\n')
                f.write('***************************\n\n')