def load_whole(mdir): obj_path = os.path.join(mdir, 'args.pkl') assert os.path.exists(obj_path) args = utils.load_obj(obj_path) cdir = args.cdir ft_names = [ '1-gram', '2-gram', '3-gram', '4-gram', 'unicode-block', 'word' ] ft_extractors = {name: None for name in ft_names} for name in ft_extractors: cache_path = os.path.join(cdir, f'{name}.pkl') assert os.path.exists(cache_path) ft_extractors[name] = utils.load_obj(cache_path) cache_path = os.path.join(cdir, 'lang.pkl') assert os.path.exists(cache_path) LANG = utils.load_obj(cache_path) mdl = FeedforwardNetwork(args, ft_extractors, LANG) fmdl = os.path.join(mdir, 'mdl.pkl') mdl.load_state_dict(torch.load(fmdl, map_location=torch.device('cpu'))) mdl.eval() utils.log(f'Loaded model from {fmdl}') iso_639_4 = pd.read_csv('ISO-639-4.csv', '\t') lang2label = {row.iso: row.label for idx, row in iso_639_4.iterrows()} utils.log(f'Loaded ISO-639-4') return ft_extractors, LANG, cdir, mdl, lang2label
def Q1_3(): hashtags = [ '#gohawks', '#nfl', '#sb49', '#gopatriots', '#patriots', '#superbowl' ] for tag in hashtags: X = load_obj(tag + '_Q13')[:-1, :] y = load_obj(tag + '_numTweetsInHour')[1:] model = stats_api.OLS(y, X) res = model.fit() y_pred = res.predict(X) y_resid = y - y_pred sum_err = pow(y_resid, 2) sum_err = np.sum(sum_err) print(res.summary()) # print(sum_err) rmse = sqrt(sum_err / len(y_resid)) print('%s has RMSE of %.3f' % (tag, rmse)) features = [ 'mentionCount', 'rankScore', 'passitivity', 'co-occurrence_of_tags', 'unique_author' ] for i in [0, 2, 3]: x_plt = X[:, i] ys = [[y, 'Predictant']] x_label = features[i] y_label = 'number of tweets for next hour' title = tag + ', ' + x_label make_plot(x_plt, ys, scatter=True, xlabel=x_label, ylabel=y_label, title=title) print('=============================')
def configure_optimizers(self): if 'decoder_lr' in self.cfg.optimizer.params.keys(): params = [ { 'params': self.model.decoder.parameters(), 'lr': self.cfg.optimizer.params.lr }, { 'params': self.model.encoder.parameters(), 'lr': self.cfg.optimizer.params.decoder_lr }, ] optimizer = load_obj(self.cfg.optimizer.class_name)(params) else: optimizer = load_obj(self.cfg.optimizer.class_name)( self.model.parameters(), **self.cfg.optimizer.params) scheduler = load_obj(self.cfg.scheduler.class_name)( optimizer, **self.cfg.scheduler.params) return [optimizer], [{ "scheduler": scheduler, "interval": self.cfg.scheduler.step, 'monitor': self.cfg.scheduler.monitor }]
def get_datasets(period_params, symbols_list_name, thresholds_lst, target_shift, mode='all', datasets=None): print("Initializing datasets for periods: %s" % period_params) if not datasets: datasets = {} for thresholds in thresholds_lst: for resample_period, magic_number in period_params: normal_name, z_name = get_datasets_name(resample_period, symbols_list_name, thresholds, target_shift) normal_file = os.path.join(DATA_PATH, normal_name) z_file = os.path.join(DATA_PATH, z_name) if exists_obj(normal_file) and exists_obj(z_file): print("Loading from cache:\n * %s\n * %s" % ( normal_file, z_file)) dfn = load_obj(normal_file) dfz = load_obj(z_file) else: dfn, dfz = get_data(resample_period=resample_period, symbols_list_name=symbols_list_name, thresholds=thresholds, target_shift=target_shift) if mode == 'all' or mode == 'normal': datasets[normal_name] = (dfn, magic_number, thresholds) if mode == 'all' or mode == 'z-score': datasets[z_name] = (dfz, magic_number, thresholds) return datasets
def configure_optimizers(self): """TODO Add missing docstring.""" if "decoder_lr" in self.cfg.optimizer.params.keys(): params = [ { "params": self.model.decoder.parameters(), "lr": self.cfg.optimizer.params.lr, }, { "params": self.model.encoder.parameters(), "lr": self.cfg.optimizer.params.decoder_lr, }, ] optimizer = load_obj(self.cfg.optimizer.class_name)(params) else: optimizer = load_obj(self.cfg.optimizer.class_name)( self.model.parameters(), **self.cfg.optimizer.params) scheduler = load_obj(self.cfg.scheduler.class_name)( optimizer, **self.cfg.scheduler.params) return ( [optimizer], [{ "scheduler": scheduler, "interval": self.cfg.scheduler.step, "monitor": self.cfg.scheduler.monitor, }], )
def configure_optimizers(self, *args, **kwargs): opt = self.conf.optimizer.class_name self.optimizer = load_obj(opt)(self.net.parameters(), **self.conf.optimizer.params) if self.conf.scheduler.class_name is None: return [self.optimizer] else: schedps = self.conf.scheduler __scheduler = load_obj(schedps.class_name)(self.optimizer, **schedps.params) if not self.conf.scheduler.monitor: self.scheduler = { "scheduler": __scheduler, "interval": schedps.interval, "frequency": schedps.frequency, } else: self.scheduler = { "scheduler": __scheduler, "interval": schedps.interval, "frequency": schedps.frequency, "monitor": schedps.monitor, } return [self.optimizer], [self.scheduler]
def load_index(self, fn): """ Loads a pre-computed index (or indices) so we can answer queries. Input: fn - file name of pickled index. read from disk """ utils.load_obj(fn)
def merge_index(config, files_num): """ The function loads all the temporary index files that was made by the parse_and_index function and merge them into a united index. The function deals with the capital letters rule, where all the occurences of a term are starting with capital letters, it will be save in all capital. Otherwise it will be saved in the lower version. The function also merge the entites into the inverted index in case they appear in the corpus more than once. The function save the merged index to the disk for future use. :param config: config class that contains info about where to retrieve the saved files :param files_num: How many temporary files to merge in each category :return: Number of total terms in the index """ merged_index = {} # Just merge all the terms in the index into one index file_prefix = config.get_save_files_dir() + "/tmp/inverted_idx_" for i in range(files_num): current_index = utils.load_obj(file_prefix + str(i)) for term, apperances in current_index.items(): if term not in merged_index.keys(): merged_index[term] = apperances else: merged_index[term] += apperances # Handle the capital restriction merged_index_after_cap = {} for term, value in merged_index.items(): if term[0].islower(): if term not in merged_index_after_cap.keys(): merged_index_after_cap[term] = value else: merged_index_after_cap[term] += value else: # case it contains uppercase if term.lower() in merged_index.keys( ): # case there is the same term in lower somewhere in the corpus if term.lower() not in merged_index_after_cap.keys(): merged_index_after_cap[term.lower()] = value else: merged_index_after_cap[term.lower()] += value else: # case it is actually capital only merged_index_after_cap[term.upper()] = value # Check if an entity appears more than once in the corpus it's being added to the index entities_idxs_prefix = config.get_save_files_dir() + "/tmp/entities_idx_" for i in range(files_num): current_entities = utils.load_obj(entities_idxs_prefix + str(i)) for term, apperances in current_entities.items(): if apperances > 1: merged_index_after_cap[term] = apperances total_terms = len(merged_index) #print("Total num of terms: {}".format(total_terms)) # Save the merged index to disk saving_dir = config.get_save_files_dir() utils.save_obj(merged_index_after_cap, saving_dir + "/inverted_index") return total_terms
def get_training_dataset(cfg: DictConfig = None) -> Dict[str, Dataset]: """ Get training and validation datasets. Parameters ---------- cfg : DictConfig, optional Project configuration, by default None Returns ------- Dict[str, Dataset] {"train": train_dataset, "valid": valid_dataset} """ images_dir = to_absolute_path(cfg.data.images_folder_path) data = pd.read_csv(to_absolute_path(cfg.data.dataset_path)) data["x1"] = data["x"] + data["w"] data["y1"] = data["y"] + data["h"] data["area"] = data["w"] * data["h"] train_ids, valid_ids = train_test_split( data["image_id"].unique(), test_size=cfg.data.validation_split, random_state=cfg.training.seed, ) # for fast training if cfg.training.debug: train_ids = train_ids[:10] valid_ids = valid_ids[:10] train_df = data.loc[data["image_id"].isin(train_ids)] valid_df = data.loc[data["image_id"].isin(valid_ids)] train_augs_list = [ load_obj(i["class_name"])(**i["params"]) for i in cfg["augmentation"]["train"]["augs"] ] train_bbox_params = OmegaConf.to_container( (cfg["augmentation"]["train"]["bbox_params"]) ) train_augs = Compose(train_augs_list, bbox_params=train_bbox_params) valid_augs_list = [ load_obj(i["class_name"])(**i["params"]) for i in cfg["augmentation"]["valid"]["augs"] ] valid_bbox_params = OmegaConf.to_container( (cfg["augmentation"]["valid"]["bbox_params"]) ) valid_augs = Compose(valid_augs_list, bbox_params=valid_bbox_params) train_dataset = XrayDataset(train_df, "train", images_dir, cfg, train_augs) valid_dataset = XrayDataset(valid_df, "valid", images_dir, cfg, valid_augs) return {"train": train_dataset, "valid": valid_dataset}
def __init__(self, args): device = torch.device(args.gpu if args.gpu != -1 else 'cpu') self.device = device ftrain = args.ftrain fvalid = args.fvalid ftest = args.ftest futable = args.futable bsz = args.bsz cdir = args.cdir train, valid, test = self.load_data(ftrain), \ self.load_data(fvalid), \ self.load_data(ftest) ft_extractors = {f'{n}-gram': NgramFeature(n, vsize) for n, vsize in \ zip([1, 2, 3, 4], [args.vsizes[VSIZE_1GRAM], args.vsizes[VSIZE_2GRAM], args.vsizes[VSIZE_3GRAM], args.vsizes[VSIZE_4GRAM]])} ft_extractors['unicode-block'] = UnicodeBlockFeature() ft_extractors['word'] = WordFeature(args.vsizes[VSIZE_WORD]) for name in ft_extractors: cache_path = os.path.join(cdir, f'{name}.pkl') if os.path.exists(cache_path): ft_extractors[name] = utils.load_obj(cache_path) else: utils.log(f'Building feature {name}') if 'gram' in name: ft_extractors[name].build(train.txt) elif name == 'unicode-block': ft_extractors[name].build(futable) elif name == 'word': ft_extractors[name].build(train.txt) else: raise NotImplementedError utils.save_obj(ft_extractors[name], cache_path) cache_path = os.path.join(cdir, 'lang.pkl') LANG = Lang() if os.path.exists(cache_path): LANG = utils.load_obj(cache_path) else: utils.log('Building LANG') LANG.build(train.lang) utils.save_obj(LANG, cache_path) utils.log('Building batches') self.train_iter, _ = self.build_batches(train, cdir, 'train', ft_extractors, bsz, LANG, True, device) self.valid_iter, _ = self.build_batches(valid, cdir, 'valid', ft_extractors, bsz, LANG, False, device) self.test_iter, _ = self.build_batches(test, cdir, 'test', ft_extractors, bsz, LANG, False, device) self.ft_extractors = ft_extractors self.LANG = LANG
def expand_query(self, query_as_list): new_query_list = [] embedding_dict = utils.load_obj("embedding_dict") new_embedding_dict = utils.load_obj("new_embedding_dict") for term in query_as_list: if term in embedding_dict.keys(): new_query_list.extend( find_closest_embeddings(embedding_dict[term], 4, new_embedding_dict)) return new_query_list
def init_embeddings(self): if self.one_hot_embed: embed_arr = utils.load_obj('datasets/context/embeddings/one_hot_33_dim') else: embed_arr = utils.load_obj('datasets/context/embeddings/norm_embed_arr_' + str(self.embed_dim)) num_classes = embed_arr.shape[0] self.embeddings = torch.nn.Embedding(num_classes, self.embed_dim) self.embeddings.weight.requires_grad = False self.embeddings.weight.data.copy_(torch.from_numpy(embed_arr))
def set_precomputed_ct(self, base_obj_path, ancestor_dict_path, sample_idx_vec_path, point_num): self.ct = load_obj(base_obj_path) self.ct.__init__() self.point_num = 1000 self.ct.point_num = self.point_num self.ct.ancestor_dict = load_obj(ancestor_dict_path) self.ct.sample_idx_vec_dict = load_obj(sample_idx_vec_path) self.ct.fidx_vec = np.array( [fidx for fidx in self.ct.sample_idx_vec_dict.keys()])
def Init_model(self): #init dataloader self.data_loader = DataLoader_test(self.save_dir) # init model self.ort_session = onnxruntime.InferenceSession(self.save_dir + self.model_nm) # init dict self.idx2lbl = load_obj(self.save_dir + "idx2lbl.json") self.idx2cls = load_obj(self.save_dir + "idx2cls.json") # get valid slot for a specific intent self.idx_mask = load_obj(self.save_dir + "idx_mask_onnx.json")
def initialize(self, services): self.services = services self.valid_actions_getter = MyValidActionsGetter( self.services.parser, self.services.perception) self.uncompleted_goals = self.services.goal_tracking.uncompleted_goals if os.path.exists(self.env_name + "_transitions"): self.transitions = load_obj(self.env_name + "_transitions") if os.path.exists(self.env_name + "_state_action_transition_count"): self.state_action_transition_count = load_obj( self.env_name + "_state_action_transition_count")
def get_val_slides(self, resample_round): patients_train = load_obj( 'train_img_paths_DX_round_{}'.format(resample_round), self.DATA_SPLIT_DIR + 'train/') patients_train = list( set([p.split('/')[-1].split('.')[0][:15] for p in patients_train])) patients_val = load_obj( 'val_img_paths_DX_round_{}'.format(resample_round), self.DATA_SPLIT_DIR + 'val/') patients_val = list( set([p.split('/')[-1].split('.')[0][:15] for p in patients_val])) return patients_val, patients_train
def train(cfg: DictConfig) -> None: """ Run model training. Parameters ---------- cfg : DictConfig Project configuration object """ model = load_obj(cfg.model.backbone.class_name) model = model(**cfg.model.backbone.params) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features head = load_obj(cfg.model.head.class_name) # replace the pre-trained head with a new one model.roi_heads.box_predictor = head(in_features, cfg.model.head.params.num_classes) set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) xray_detection = XrayDetection(hparams=hparams, cfg=cfg, model=model) callbacks = xray_detection.get_callbacks() loggers = xray_detection.get_loggers() trainer = pl.Trainer( logger=loggers, early_stop_callback=callbacks["early_stopping"], checkpoint_callback=callbacks["model_checkpoint"], **cfg.trainer, ) trainer.fit(xray_detection) # Load the best checkpoint get_logger().info("Saving model from the best checkpoint...") checkpoints = [ ckpt for ckpt in os.listdir("./") if ckpt.endswith(".ckpt") and ckpt != "last.ckpt" ] best_checkpoint_path = checkpoints[0] model = XrayDetection.load_from_checkpoint(best_checkpoint_path, hparams=hparams, cfg=cfg, model=model) save_best(model, cfg)
def merge_files(self, out, letter, file_name_letter_idx): # temp_letter_dict): permanent_file_name = out + letter file_name_letter_idx = utils.load_obj(out + file_name_letter_idx) permanent_dict_file = utils.load_obj(permanent_file_name) for key in file_name_letter_idx: if key in permanent_dict_file: permanent_dict_file[key].extend(file_name_letter_idx[key]) else: permanent_dict_file[key] = file_name_letter_idx[key] utils.save_obj(permanent_dict_file, permanent_file_name)
def __init__(self, save_dir): self.save_dir = save_dir self.word2idx = load_obj(self.save_dir + "dict.json") self.config = load_obj(self.save_dir + "Config.json") self.max_len = self.config["max_len"] self.WORD = {int(k): v for k, v in self.config["WORD"].items()} self.BOS = self.config["BOS"] self.UNK = self.config["UNK"] self.PAD = self.config["PAD"] assert self.BOS == self.word2idx[self.WORD[self.BOS]] assert self.UNK == self.word2idx[self.WORD[self.UNK]] assert self.PAD == self.word2idx[self.WORD[self.PAD]]
def __init__(self, args_dict, set, w2i_tit, w2i, transform=None): """ Args: set: 'train', 'val', 'test w2i_tit: word to index for titles w2i: word to index for comments transform: data transform """ self.args_dict = args_dict self.set = set # Load Data if self.set == 'train': textfile = args_dict.csvtrain self.mismtch = 0.8 elif self.set == 'val': textfile = args_dict.csvval self.mismtch = 0 elif self.set == 'test': textfile = args_dict.csvtest self.mismtch = 0 df = pd.read_csv(textfile, delimiter='\t') self.imageurls = list(df['IMAGE_FILE']) self.comment_map = get_mapped_text(df, w2i, field='DESCRIPTION') self.titles_map = get_mapped_text(df, w2i_tit, field='TITLE') # Parameters self.numpairs = len(df) / (1 - self.mismtch) self.comw2i = w2i self.titw2i = w2i_tit # self.titw2i = dict([(w, i) for i, w in enumerate(titvocab)]) self.imagefolder = args_dict.dir_images self.transform = transform # tfidf weights and vectors if os.path.exists(args_dict.dir_data + args_dict.tfidf_coms_file): self.tfidf_coms = load_obj(args_dict.dir_data + args_dict.tfidf_coms_file) else: self.tfidf_coms = self.get_tfidf(self.comment_map, self.comw2i) save_obj(self.tfidf_coms, args_dict.dir_data + args_dict.tfidf_coms_file) if os.path.exists(args_dict.dir_data + args_dict.tfidf_tits_file): self.tfidf_tits = load_obj(args_dict.dir_data + args_dict.tfidf_tits_file) else: self.tfidf_tits = self.get_tfidf(self.titles_map, self.titw2i) save_obj(self.tfidf_tits, args_dict.dir_data + args_dict.tfidf_tits_file)
def merge_posting_letter(saving_dir, prefix, files_num, inverted_idx): """ Merge one posting file, by it's prefix. (This task is dispatched to several processes so it runs in parallel) It reads all the posting dict and the entities candidate_dicts with the relevant prefix and merge them into one. It also makes sure that entities and capital letters are aligned with the way we dealt with it in the inverted idx :param saving_dir: Where to save the output and find the temp files :param prefix: Which posting prefix this task is being applied to :param files_num: How many temp files to read :param inverted_idx: The inverted index of the corpus that contains all the final version ok keys :return: Which prefix this task worked on """ #print("merging posting of prefix {}, files_num: {}".format(prefix, files_num)) loading_dir = saving_dir + '/tmp' file_prefix = loading_dir + "/postingDict_" + prefix + "_" entities_prefix = loading_dir + "/entitiesDict_" + prefix + "_" merged_letter_posting = {} # Merge all the posting entries for i in range(files_num): try: current_letter_posting = utils.load_obj(file_prefix + str(i)) for term, apperances in current_letter_posting.items(): if term in merged_letter_posting.keys(): # already found term merged_letter_posting[term] += apperances else: if term in inverted_idx.keys( ): # term capital that is valid, or a lower one merged_letter_posting[term] = apperances else: # capital term candidate that haven't made it, will be lowered merged_letter_posting[term.lower()] = apperances # load entities_posting and merge it curent_entity_posting = utils.load_obj(entities_prefix + str(i)) for term, apperances in curent_entity_posting.items(): if term in inverted_idx.keys(): # Valid entity merged_letter_posting[term] = apperances except: pass # Sort every posting entry by it doc_id for postings_entry in merged_letter_posting.values(): postings_entry.sort(key=lambda x: x[0]) # Save relevant posting dict utils.save_obj(merged_letter_posting, saving_dir + "/postingDict_" + prefix) #print("saved {} posting dict".format(prefix)) return prefix
def load_tweet_dict(): """ read the tweet vector files and insert the vectors to the tweet dictionary :return tweet_Dictionary including the Glove vector data """ tweet_dict = utils.load_obj("docDictionary") buckets = [] for i in range(tweet_dict["metadata"]["tweet_vector_buckets"]): buckets.append(utils.load_obj("avgVector" + str(i))) for tweet_id in tweet_dict.keys(): if tweet_id == "metadata": continue address = tweet_dict[tweet_id][5] tweet_dict[tweet_id][5] = buckets[address[0]][address[1]] return tweet_dict
def load_blacklist(self): filename = self.blacklist_filename() if not os.path.exists(filename): blacklist = set() else: blacklist = utils.load_obj(filename) return blacklist
def load_index(self, fn): """ Loads a pre-computed index (or indices) so we can answer queries. Input: fn - file name of pickled index. """ self.inverted_idx, self.documents = utils.load_obj(fn)
def reconstruct_from_postings(output_path, stemming): postings = glob(output_path + "\\{}\\*.pkl".format("WithStem" if stemming else "WithoutStem"), recursive=True) reconstructed = set() corpus_size = 0 total_length = 0 for posting in postings: if "inverted_idx" not in posting: splited_path = os_path_splitext(posting) print(splited_path) file = utils.load_obj(splited_path[0]) for doc_list in file.values(): for doc in doc_list: doc_id = doc[0] doc_length = doc[4] if doc_id not in reconstructed: reconstructed.add(doc_id) total_length += doc_length corpus_size += 1 return corpus_size, float(total_length) / corpus_size
def train(df, attrs, clf_class, clf_name, model_params, mode, magic_number, dates, dataset_name, trading_params): trade_freq = trading_params['trade_frequency'] name = '%s-%s-attr%s-%s-%s-%s-%s-%s_' % ( clf_name, dataset_name, len(attrs), dict_to_str(model_params).replace( ' ', '_').replace(':', ''), mode, magic_number, pd.to_datetime(dates[0], format=DATE_FORMAT).date(), pd.to_datetime(dates[1], format=DATE_FORMAT).date()) cached_file = os.path.join(CACHE_PATH + '/models/', name) start_date, final_date = dates idx = 0 indices = sorted([ day for day in list(set(df.index.values)) if start_date <= day <= final_date ]) print("Model and params: %s %s " % (clf_name, model_params)) # magic number is by default 53, 52 weeks for training 1 for prediction while idx + magic_number < len(indices) and indices[idx + magic_number] <= \ indices[-1]: if mode == CLASSIFICATION: train_x, train_y, test_x, test_y = \ get_classification_data(clf_name, df, attrs, indices, idx, magic_number) elif mode == REGRESSION: # get regression datasets (target is float y -> ratio of increase) train_x, train_y, test_x, test_y = \ get_regression_data(clf_name, df, attrs, indices, idx, magic_number) print( "Training %s/%s with %s instances." % (idx // trade_freq, len(indices) // trade_freq, train_x.shape[0])) sys.stdout.flush() clf_cached_file = cached_file + str(indices[idx])[:10] if not CHECKPOINTING: clf = clf_class(**model_params).fit(train_x, train_y) else: try: clf = load_obj(clf_cached_file) except: clf = clf_class(**model_params).fit(train_x, train_y) save_obj(clf, clf_cached_file) pred = clf.predict(test_x) # import ipdb # ipdb.set_trace() df.loc[indices[idx + magic_number], clf_name] = pred idx += trade_freq df_trade = df.dropna(axis=0) print("Finished training for %s" % (clf_name)) return df_trade
def set_default_ct(self): self.ct = load_obj("cell_tracker_with_lineage") self.ct.__init__() self.point_num = 1000 self.ct.point_num = self.point_num self.ct.fidx_vec = np.array( [fidx for fidx in self.ct.sample_idx_vec_dict.keys()])
def classify(k, text): target_vec = lda_all.get_document_topics(dictionary_all.doc2bow( utils.tokenize(sampletext)), per_word_topics=True)[0] closest_points = [] with open('./data/corpus-labels.csv') as labels: labelreader = csv.reader(labels) if not os.path.exists('./data/ldaspace-titles-abstracts.pkl'): print "data/ldaspace-titles-abstract.pkl not found. Generating file (this may take a while)" save_pointcloud('./data/ldaspace-titles-abstracts') ldaspace = utils.load_obj('./data/ldaspace-titles-abstracts') for l, current_vec in zip(labelreader, ldaspace): dist = get_distance(current_vec, target_vec) if len(closest_points) >= k: if dist < closest_points[k - 1]: closest_points.pop(k - 1) closest_points.append((l, dist)) else: closest_points.append((l, dist)) closest_points.sort(key=lambda point: point[1]) category_counter = Counter() for x in closest_points: category_counter.update(x[0]) return category_counter
def test(args): # see if we already ran this experiment code_root = os.path.dirname(os.path.realpath(__file__)) exp_dir = utils.get_path_from_args( args) if not args.output_dir else args.output_dir path = "{}/results/{}".format(code_root, exp_dir) assert os.path.isdir(path) task_family_test = tasks_sine.RegressionTasksSinusoidal( "test", args.skew_task_distribution) best_valid_model = utils.load_obj(os.path.join(path, "logs")).best_valid_model k_shots = [5, 10, 20, 40] df = [] for k_shot in k_shots: losses = np.array( eval( args, copy.copy(best_valid_model), task_family=task_family_test, num_updates=10, lr_inner=0.01, n_tasks=1000, k_shot=k_shot, )) for grad_step, task_losses in enumerate(losses.T, 1): new_rows = [[k_shot, grad_step, tl] for tl in task_losses] df.extend(new_rows) df = pd.DataFrame(df, columns=["k_shot", "grad_steps", "loss"]) df.to_pickle(os.path.join(path, "res.pkl")) utils.plot_df(df, path)
def __init__(self, model_variables, variables, database): self.MV = model_variables self.Vars = variables self.DB = database self.model = None self.weight_save_path = "saved_weights" self.outputs_path = "outputs" self.model_name = self.Vars["name"] if self.Vars["class"] == "age": self.model_class = "age" self.class_count = self.DB.age_class_count self.class_labels = self.DB.age_labels self.db_train_path = self.DB.db_age_train_folder_path self.db_test_path = self.DB.db_age_test_folder_path self.mean_image = myutils.load_image(self.DB.age_mean_image_path) elif self.Vars["class"] == "sex": self.model_class = "sex" self.class_count = self.DB.sex_class_count self.class_labels = self.DB.sex_labels self.db_train_path = self.DB.db_sex_train_folder_path self.db_test_path = self.DB.db_sex_test_folder_path self.mean_image = myutils.load_image(self.DB.sex_mean_image_path) self.class_weights = myutils.load_obj(self.DB.db_new_path + "/" + self.model_class)
def show_examples(args): # Load reason instances and embeddings fileembds = os.path.join(args.data_dir, args.embsfile) embeddings = utils.load_obj(fileembds) allreasons = read_data(args) allinds = list(range(len(allreasons))) assert len(allreasons) == embeddings.shape[0] numReasons = len(allreasons) # Get a random instance, compute scores and show most similar thisidx = random.sample(allinds, 1)[0] thisreason = allreasons[thisidx] print('-' * 25) print("REASON: {}".format(thisidx)) print(thisreason) print('.') # Compute scores and sort allscores = sklearn.metrics.pairwise.cosine_similarity(embeddings) thisscores = allscores[thisidx, :] ranking = np.argsort(thisscores)[::-1].tolist() sortedscores = np.sort(thisscores)[::-1].tolist() # show top 5 numshow = 10 print("MATCHES") for k in list(range(numshow)): kidx = ranking[k] score = sortedscores[k] reason = allreasons[kidx] print("sample %d, score %.03f: %s" % (kidx, score, reason)) return allscores
def load_dicts(self, variant): filename = self.cache_filename(variant) if not os.path.exists(filename): cache = self.default_cache() else: cache = utils.load_obj(filename) return cache
def main(args): if(len(args) != 2): print "Usage: mds.py clustering.pkl" print " C is the cluster in clustering.pkl to display" sys.exit(0) path = args[1] print "Loading" clusters = clustering = utils.load_obj(path) #map(lambda c: c.set_label(), clustering) for i in [5]: clusters = reclusterWithOPTICS(clusters, i) _docs = reduce(lambda x,y: x+y, map(lambda c: c.members, clusters)) confirm = BaseCONFIRM(_docs) confirm.clusters = clusters print "Original Number of Clusters:", len(clustering) print "Final Number of Clusters:", len(clusters) '''print reps imgs = [] for idx in reps: if idx == 0: imgs.append(clustering[i].center) else: idx = idx -1 imgs.append(clustering[i].members[idx]) display(imgs)''' #print len(selectWithHac(clustering)) #print streamSelector(clustering) #print entropy(clustering) #print "Analyzing" analyzer = metric.KnownClusterAnalyzer(confirm) analyzer.print_all() print "User Queries:", QueryCount
def get(self, filename): with self._disk_lock: if type(filename) == types.UnicodeType: filename = filename.encode('utf-8') data = super(LifoCache, self).get(filename) if not data: self.disk_read_count += 1 if filename in self.disk_cache: if os.path.exists(self.disk_cache_dir + filename): self.disk_read_hit += 1 data = utils.load_obj(self.disk_cache_dir + filename) del self.disk_cache[filename] if data: self.disk_cache[filename] = True super(LifoCache, self).set(filename, data) return data
def main(args): if(len(args) != 3): print "Usage: clusterFrame.py C clustering.pkl" print " C is the cluster in clustering.pkl to display" sys.exit(0) C = int(args[1]) path = args[2] clustering = utils.load_obj(path) root = Tk() frame = ClusterFrame(root, clustering[C]) frame.grid() root.mainloop()
def __load_imagenet_weights(self): variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) try: print("Loading ImageNet pretrained weights...") dict = load_obj(self.args.pretrained_path) run_list = [] for variable in variables: for key, value in dict.items(): # Adding ':' means that we are interested in the variable itself and not the variable parameters # that are used in adaptive optimizers if key + ":" in variable.name: run_list.append(tf.assign(variable, value)) self.sess.run(run_list) print("Weights loaded\n\n") except KeyboardInterrupt: print("No pretrained ImageNet weights exist. Skipping...\n\n")
def main(args): if(len(args) != 3): print "Usage: clusterFrame.py C clustering.pkl" print " C is the cluster in clustering.pkl to display" sys.exit(0) C = int(args[1]) path = args[2] print "Loading" clustering = utils.load_obj(path) #clustering = doc.get_docs_nested(driver.get_data_dir("very_small")) hierarchy = Hierarchy.createHierarchy(clustering) print "Starting GUI" root = Tk() frame = GraphFrame(root, hierarchy) frame.pack(fill=BOTH,expand=1) root.mainloop()
def _load(self, filename): # no need of a lock here items = utils.load_obj(filename) for d in items: self.put(*d)
def default_jobs(): return { "match_queue": job_queue.JobQueue(), "split_queue": job_queue.JobQueue(), "number_of_match_job": 0, "number_of_split_job": 0, } if __name__ == "__main__": try: cache_dir = "match_and_split_text_layer" if not os.path.exists(os.path.expanduser("~/cache/" + cache_dir)): os.mkdir(os.path.expanduser("~/cache/" + cache_dir)) # qdel send a SIGUSR2 if -notify is used when starting the job. # signal.signal(signal.SIGUSR2, on_exit) try: jobs = utils.load_obj("wsdaemon.jobs") except: jobs = default_jobs() thread.start_new_thread(job_thread, (jobs["match_queue"], do_match)) thread.start_new_thread(job_thread, (jobs["split_queue"], do_split)) bot_listening() except KeyboardInterrupt: pywikibot.stopme() os._exit(1) finally: pywikibot.stopme()
def __init__(self, filename): self.base_path = "/".join(filename.split('/')[:-1]) self.index = utils.load_obj(filename + '.index') self.fd_data = open(filename)
def load_thrift_app(self): return utils.load_obj(self.app_uri)
def load(self): self.chdir() self.tfactory = utils.load_obj(self.cfg.thrift_transport_factory)() self.pfactory = utils.load_obj(self.cfg.thrift_protocol_factory)() self.thrift_app = self.load_thrift_app() return lambda: 1