def get_jenni(config_path): config_files = [] config = configs.Configs([config_path]) config.load_modules(config_files) try: Watcher() except Exception, e: print >> sys.stderr, 'Warning:', e, '(in __init__.py)'
def generate_vocab(data_path): train_files = glob.glob(data_path + 'train/*') valid_files = glob.glob(data_path + 'valid/*') cfgs = configs.Configs() all_dic = {} files = train_files + valid_files i = 0 for file in tqdm.tqdm(files): with open(file, 'r', encoding='utf-8') as f: new = f.read() for w in new: all_dic[w] = all_dic.get(w, 0) + 1 i += 1 # if i == 2000: # break all_dic = {i: j for i, j in all_dic.items() if j >= 100} dic = {} w2i = {} i2w = {} w2w = {} for w in [cfgs.W_PAD, cfgs.W_UNK, cfgs.W_EOS]: w2i[w] = len(dic) i2w[w2i[w]] = w dic[w] = 10000 w2w[w] = w for w, tf in all_dic.items(): if w in dic: continue w2i[w] = len(dic) i2w[w2i[w]] = w dic[w] = tf w2w[w] = w hfw = [] sorted_x = sorted(dic.items(), key=operator.itemgetter(1), reverse=True) # print(sorted_x) for w in sorted_x: hfw.append(w[0]) assert len(hfw) == len(dic) assert len(w2i) == len(dic) print("dump dict...") print(len(w2i))
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Handle connections to WMS API""" import ssl import urllib2 import urlparse import logging import configs CONFIG_FILE = "/opt/google/gehttpd/wsgi-bin/wms/ogc/wms.cfg" CONFIGS = configs.Configs(CONFIG_FILE) logger = logging.getLogger("wms_maps") def HandleConnection(url): logger.debug("Opening url: [%s]", url) if CONFIGS.GetStr("DATABASE_HOST") != "": url = CONFIGS.GetStr("DATABASE_HOST") + urlparse.urlsplit(url)[2:] fp = None try: # Set the context based on cert requirements if CONFIGS.GetBool("VALIDATE_CERTIFICATE"): cert_file = CONFIGS.GetStr("CERTIFICATE_CHAIN_PATH")
self.pp_dir_sa, npz_dir), shell=True) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-n', '--number', type=int, default=None, help='How many patients to maximally process.') args = parser.parse_args() total_stime = time.time() import configs cf = configs.Configs() # analysis finding: the following patients have unclear annotations. some raters gave more than one judgement # on the same roi. patients_to_exclude = [ "0137a", "0404a", "0204a", "0252a", "0366a", "0863a", "0815a", "0060a", "0249a", "0436a", "0865a" ] # further finding: the following patients contain nodules with segmentation-label inconsistencies # running Preprocessor.verify_seg_label_pairings() produces a data frame with detailed findings. patients_to_exclude += ["0305a", "0447a"] exclude_paths = [ os.path.join(cf.raw_data_dir, pid) for pid in patients_to_exclude ] # These pids are automatically found and excluded, when setting exclude_inconsistents=True at Preprocessor # initialization instead of passing the pre-compiled list.
rel_dir = os.path.relpath(out_dir, self.cf.pp_rootdir).split(os.sep) npz_out_dir = os.path.join(self.cf.pp_npz_dir, str(os.sep).join(rel_dir)) print("npz out dir: ", npz_out_dir) os.makedirs(npz_out_dir, exist_ok=True) group_df.to_pickle(os.path.join(npz_out_dir, 'info_df.pickle')) dmanager.pack_dataset(out_dir, npz_out_dir, recursive=True, verbose=False) else: print( "Did not convert .npy-files to .npz because npz directory not set in configs." ) if __name__ == '__main__': import configs as cf cf = cf.Configs() total_stime = time.time() toy_gen = ToyGenerator(cf) toy_gen.create_sets() toy_gen.convert_copy_npz() mins, secs = divmod((time.time() - total_stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t))