def main(): ################################################################################ weight_dir = '/media/work/Data/dsb/cache/UnetRCNN_180410-221747' ################################################################################ df_name = 'stage2_df' df = load_from_cache(df_name) tags = ['quarter', 'half', None, 'two'] preds = [] for tag in tags: if tag is None: fl = os.path.join(weight_dir, '{}.dat'.format(df_name)) pred = load_file(fl) elif tag == 'two': fl_names = glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9+].dat'.format(df_name, tag)))+\ glob.glob(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}_[0-9][0-9].dat'.format(df_name, tag))) pred = load_from_cache_multi(os.path.join(weight_dir, '{}_{}'.format(df_name, tag), '{}_{}'.format(df_name, tag)), nb=len(fl_names)) else: fl = os.path.join(weight_dir, '{}_{}.dat'.format(df_name,tag)) pred = load_file(fl) preds.append(pred) nb_fls = len(tags) results = [] for ind in df.index: masks = [pred.loc[ind, 'pred'] for pred in preds] scores = [pred.loc[ind, 'con'] for pred in preds] res={} for key, vals in zip(np.arange(nb_fls),scores): for nb in range(len(vals)): res['{}_{}'.format(key, nb)] = vals[nb] res = pd.Series(res).sort_values() res = res[res<0.2] mask = np.zeros_like(masks[0], dtype='int16') val = 1 for ind_res in res.index: size, label = ind_res.split('_') size, label = int(size), int(label) index = masks[size]==label+1 if (np.sum(mask[index]>0)/np.sum(index))<0.5: mask[(index)&(mask==0)] = val val = val+1 results.append(mask) preds_df = pd.DataFrame(index = df.index) preds_df['pred'] = results save_to_cache(preds_df, os.path.join(weight_dir, 'preds_df_scale_01')) make_submission(preds_df)
def all_triplets(path): """ takes in captions and creates triplets :params: path : path to resnet18 file model : inited model batch_indices : the indecies we need to return as a batch :return: List[Tuple(caption, good image, bad image)] List shape (300000,) caption : shape (50,) word embedding of an image good image : shape (512,) descriptor vector of image that matches caption bad image : shape(512,) descriptor vector of image that doesn't match caption """ caption_id = utils.get_caption_ids() # returns dictionary caption_id = caption_id[:30000] caption_id_to_img_id = utils.cap_id_to_im_id() #dictonary that maps caption to image ID img_id_to_descriptor = load_resnet(path) #dic that maps image id to descriptor img_id_to_caption_id = utils.im_id_to_cap_ids() triplets = [] all_images = np.array([utils.get_img_ids()]) #caption_id_to_caption = utils.cap_id_to_vec() #with open("capid2cap", mode="wb") as opened_file: # pickle.dump(caption_id_to_caption, opened_file) caption_id_to_caption = load_file(r"data\capid2cap") for i, indiv_caption_id in enumerate(caption_id): print(i) caption = caption_id_to_caption[indiv_caption_id] caption = caption / np.linalg.norm(caption) img_id = caption_id_to_img_id[indiv_caption_id] if img_id in img_id_to_descriptor.keys(): good_img = img_id_to_descriptor[img_id] else: continue #print("goodimg") for i in range(10): while True: rng = np.random.default_rng() images = rng.choice(all_images, size=25, axis=1) #print(images) #print(img_id, images.shape, all_images.shape) if img_id not in images: break img_key = choose_bad_img(caption, images, caption_id_to_caption, img_id_to_caption_id) if img_key in img_id_to_descriptor.keys(): bad_img = img_id_to_descriptor[img_key] #print(bad_img) triplets.append((caption, good_img, bad_img)) with open("triplets", mode="wb") as opened_file: pickle.dump(triplets, opened_file) return triplets
def display_topk(ids): """ displays each of the top k images Parameters ---------- ids: List[string] - shape: (k,) List of the top k ids """ urlsdata = load_file(r"data\idstourls") urls = [] for id in ids: urls.append(urlsdata[id]) for url in urls: img = download_image(url) img.show()
def initialize_params(self): """ maps the image feature vectors to semantic embeddings from model """ model = Model.Model(512, 50) # update this Model.train(model, 5, 0.1, load_file(r"data\triplets"), learning_rate=0.1, batch_size=32) img_vectors = image_features.load_resnet(r"data\resnet18_features.pkl") for key, img_vector in img_vectors.items(): self.database[key] = model(img_vector).data self.vector_id = self.database
from Model import Model from Model import train from Model import test from load import load_file from Model import accuracy from extract_triplets import all_triplets from noggin import create_plot #only for jupyter notebook plotter, fig, ax = create_plot(metrics=["loss", "accuracy"]) #getting model, model params, data learning_rate=0.1 model = Model(512, 50) num_epochs = 1 batch_size = 32 margin = 0.1 path = r'data\resnet18_features.pkl' triplets = load_file(r'data\triplets') #training the model!! train(model, num_epochs, margin, triplets, learning_rate=learning_rate, batch_size=batch_size)
if model_file is None: #model_file = tf.train.latest_checkpoint(options.model_dir) if model_file is None: models = glob.glob(options.model_dir + '/*.index') models.sort(key=lambda x: os.path.getmtime(x), reverse=True) #print(models) model_file = models[0][:-6] ts.init_all_variables() print('Loading model from %s...' % model_file) tf.train.Saver().restore(ts.sess, model_file) # loading problem if options.random == 0: print('Loading problem from %s...' % options.problem_file) LC_mat, _ = load.load_file(options.problem_file) else: print('Generating random problem...') import PyRandSAT as rs LC_mat = rs.getProblem(options.random) nvars = int(LC_mat.dense_shape[0] / 2) nclauses = LC_mat.dense_shape[1] print('%d variables, %d clauses, %d membership' % (nvars, nclauses, LC_mat.indices.shape[0])) maxrounds = options.rounds roundsbatch = 1 nruns = math.ceil(maxrounds / roundsbatch) # initialize data before loops
def run_file(filename): load_file(filename) machine.spawn_label(base_env, 'main') machine.run() return 0