def word_distribution(base_pth): words = {} for dialect in sorted(listdir(base_pth)): for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))): data = sorted( os.listdir(os.path.join(base_pth, dialect, speaker_id))) wav_files = [x for x in data if x.split('.')[-1] == 'wav'] # all the .wav files for wav_file in wav_files: wav_path = os.path.join(base_pth, dialect, speaker_id, wav_file) wrd_path = wav_path[:-3] + 'WRD' with open(wrd_path, 'r') as fw: wrd_list = list(fw.readlines()) for line in wrd_list: # extract word from start sample, end sample, word format word_start, word_end, word = line.rstrip().split(' ') # add entry in dictionary if word not in words.keys(): words[word] = 0 words[word] += 1 print(sorted(words.items(), key=lambda x: x[1], reverse=True))
def mean_condition_latents(model, data_dir): print('Extracting stimuli features') conditions = utils.listdir(data_dir, path=False) condition_features = {} for c in tqdm(conditions): stimuli = utils.listdir(os.path.join(data_dir, c)) stimuli = [utils.load_image(s) for s in stimuli] stimuli = torch.stack(stimuli).to(device) with torch.no_grad(): feats = model.encode(stimuli).mean(dim=0).cpu().numpy() condition_features[c] = feats return condition_features
def add_scribble(image): """ Add a scribble. """ files = list(utils.listdir('resources', 'scribble_[0-9]+.png')) fname = np.random.choice(files) scribble = Image.open(fname) # Adjust opacity. scribble2 = max_opacity(scribble, 0.85) # Shrink a bit scribble2.thumbnail((256, 256), Image.ANTIALIAS) # Rotate the stain by a small random amount. angle = (np.random.random() - 0.5) * 20 scribble2 = scribble2.rotate(angle, resample=Image.BICUBIC, expand=True) # Find a random place near the upper corner. x = np.random.randint(image.size[0] - (1.5 * scribble2.size[0]), image.size[0] - (0.8 * scribble2.size[0])) y = np.random.randint(0, scribble2.size[1]) # Do it. image.paste(scribble2, (x, y), scribble2) return
def _load_guitars(self, opts): """Load data from Thomann files. """ logging.debug('Loading Guitars dataset') data_dir = os.path.join('./', 'thomann') X = None files = utils.listdir(data_dir) pics = [] for f in sorted(files): if '.jpg' in f and f[0] != '.': im = Image.open(utils.o_gfile((data_dir, f), 'rb')) res = np.array(im.getdata()).reshape(128, 128, 3) pics.append(res) X = np.array(pics) seed = 123 np.random.seed(seed) np.random.shuffle(X) np.random.seed() self.data_shape = (128, 128, 3) self.data = Data(opts, X / 255.) self.num_points = len(X) logging.debug('Loading Done.')
def input_fn(self, mode, data_dir): def decode_record(record): """Decodes a record to a TensorFlow example.""" example = tf.io.parse_single_example(record, self.name_to_features) # tf.Example don't supports tf.int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.cast(t, tf.int32) example[name] = t return {'input': example['input']}, example['target'][0] files = utils.listdir(data_dir) print('in mode {}, files: {}'.format(mode, files)) with tf.name_scope('dataset'): d = tf.data.TFRecordDataset(files) if mode == 'train': # d = d.repeat() d = d.shuffle(buffer_size=self.shuffle_size) d = d.map(lambda record: decode_record(record), num_parallel_calls=self.num_parallel_calls) if mode == 'train': d = d.batch(batch_size=self.batch_size, drop_remainder=self.train_drop_remainder) else: d = d.batch(batch_size=self.batch_size, drop_remainder=False) d = d.prefetch(buffer_size=self.prefetch_size) return d
def __init__(self): self.home = run_command_w_output('echo $HOME')[:-1] self.db = Base(self.home + '/driver') home_files = listdir(self.home) if not 'netapps' in home_files: cd(self.home) mkdir('netapps')
def __init__(self, hor_dir, velocity, params): # First generate the parent object. super(HorizonContainer, self).__init__(params) self.velocity = velocity self.data = {} self.coords = {} self.lookup = {} for fname in utils.listdir(hor_dir): with open(fname) as f: samples = f.readlines() name = samples.pop(0).strip().strip('#') points = [] for s in samples: line, cdp, x, y, t, surv = s.split() x, y = int(float(x)), int(float(y)) t = float(t) points.append(Point(x, y, t)) if self.lookup.get(name): self.lookup[name] += points else: self.lookup[name] = points
def initialize_data_file(self): # log_time_interval = 5; # seconds_process = 0; # def log_time(): # global seconds_process # seconds_process += 5 # print("It's still running, be patient, please. Running time: " # "%s"%(seconds_process)) # t = Timer(log_time_interval,log_time) # t.start() if path.exists(self.ppiscript) == False: self.func_log.critical(('get ppi script cannot be found. dir ' 'contains %s') % utils.listdir(utils.dirname(self.ppiscript))) raise EnvironmentError('get ppi script cannot be found') print("Calling the get ppi script with ruby") process = Popen(self.ppiscript, stdout=PIPE) output = list(process.communicate()) # t.cancel() # hard code the parse rule due to some bad thing # the data looks like this # (b"D, [2016-07-06T14:40:22.172340 #1676] DEBUG -- : Successfully logged into COINS.\r\n{{'M53799763':{'gender': 'F'}}\r\n{{'M53799718':{'gender': 'M'}}\r\n", None) string = output[0].decode() data = string.split('\r\n')[1:] print("Initializing the file") with open(self.temp_file_path, 'w') as tempfile: for subject in data: # tempfile.writelines() tempfile.write(subject + '\n')
def reset_qlearner_folders(): for sub_dir in [ "hunter", "smalllake", "largelake", "shaped_smalllake", "shaped_largelake", ]: whole_sub_dir = os.path.join(output_dir, 'qlearning', sub_dir) if not os.path.exists(whole_sub_dir): os.mkdir(whole_sub_dir) for explore_dir in [ "random", "equal", "greedy", "epsilongreedy", "epsilondecay", "epsilonstatedecay", ]: clear_dir = os.path.join(output_dir, "qlearning", sub_dir, explore_dir) if not os.path.exists(clear_dir): os.mkdir(clear_dir) continue for filename in listdir(clear_dir): os.unlink(os.path.join(clear_dir, filename))
def pics_to_pdf(pics_directory: str, result_filename: str): with open('pdf/' + result_filename + '.pdf', 'wb') as f: f.write( img2pdf.convert([ ut.get_files_folder() + pics_directory + '/' + i for i in ut.listdir( # noqa ut.get_files_folder() + pics_directory) ]))
def load_stimuli(stimuli_folder): conditions = os.listdir(stimuli_folder) conditions = [c for c in conditions if c != '.DS_Store'] stimuli = { c: utils.listdir(os.path.join(stimuli_folder, c), path=False) for c in conditions } return stimuli
def create_image_generator(path): filenames = utils.listdir(path, extensions=('.jpg', '.png')) while True: random.shuffle(filenames) for filename in filenames: filepath = os.path.join(path, filename) yield image_utils.load_image(filepath)
def _given_labels(self): bounding_labels = list(itertools.chain(*self.SPLITTING_INFO.values())) prefixes = set(map(lambda x: x.split('_')[0] + '_', bounding_labels)) given_labels = list( map( lambda prefix: list( filter(lambda x: x.startswith(prefix), utils.listdir(self.RAW_PATH))), prefixes)) given_labels = list(itertools.chain(*given_labels)) return given_labels
def bulk_insert2ES(org, bulk_size=100, json_file_folder='./txt2ES_json'): json_file_paths = [] listdir(json_file_folder, json_file_paths, ['.json']) for path in json_file_paths: if path.split('\\')[-1].split('_')[0] != org: continue with open(path, 'r') as f1: bulk_content_lines = f1.readlines() url = f'http://localhost:9200/{org.lower()}/_bulk?pretty&refresh' headers = {'Content-Type': 'application/json'} for _index in range(0, len(bulk_content_lines), bulk_size * 2): data = ''.join( bulk_content_lines[_index:_index + bulk_size * 2]) + '\n' r = requests.post(url=url, headers=headers, data=data) print(f'{org} DONE!') return True
def __init__(self, bedrock_dir, params): super(BedrockContainer, self).__init__(params) self.reset_all() # Read in all shape files for f in utils.listdir(bedrock_dir, '\\.shp$'): for line in fiona.open(f): self.lookup[(shape(line['geometry']))] = line['properties']
def add_from_txt(txt_folder_name=r'..\..\papers_data'): file_paths = [] listdir(txt_folder_name, file_paths, ['.txt']) start_time = time.time() res_json_text = [] org_years = collections.defaultdict(set) # for json name. for txt_file_path in tqdm(file_paths, ncols=100, desc=f'process txts'): folder_name, org, org_year, file_name = split_path(txt_file_path) name = file_name.split('.txt')[0] year = org_year.split('_')[1] json_body = {} json_body['Organization'] = org json_body['Name'] = name json_body['Year'] = year with open(txt_file_path, 'r', encoding='utf8') as f: json_body['Text'] = clean_text(f.read()) json_start = '{"index":{"_id":\"' + curlmd5(json_body['Name']) + '\"}}' json_body = json_start + '\n' + json.dumps(json_body) + '\n' res_json_text.append([org, json_body]) org_years[org].add(year) out_folder = 'txt2ES_json' if not os.path.exists(out_folder): os.mkdir(out_folder) for _org in org_years.keys(): org_years[_org] = '_'.join(sorted(org_years[_org])) with open(f'./{out_folder}/{_org}_{org_years[_org]}.json', 'w', encoding='utf8') as f: for __org, _json_body in res_json_text: if __org == _org: f.write(_json_body) end_time = time.time() print(f'time cost: {end_time - start_time:.2f}s.') return None
def simulate_all_hunterschoice_policies(): policies = [ os.path.join(hunter_dir, x) for x in listdir(hunter_dir) if "evaluation" not in x ] for policy in policies: success_rate, episode_lengths = simulate_hunterschoice_policy(policy) result_dict = { "success_rate": success_rate, "episode_lengths": episode_lengths } out_dir = policy + "_evaluation" with open(out_dir, "wb") as f: pickle.dump(result_dict, f)
def get_condition_latents(model, data_dir): print('Extracting features') stimuli = utils.listdir(data_dir, path=False) condition_features = {} batch_size = 256 for i in tqdm(range(0, len(stimuli), batch_size)): batch_names = stimuli[i:i + batch_size] batch = [ utils.load_image(os.path.join(data_dir, n)) for n in batch_names ] batch = torch.stack(batch).to(device) with torch.no_grad(): batch_feats = model.encode(batch).cpu().numpy() for name, feats in zip(batch_names, batch_feats): condition_features[name] = feats return condition_features
def __init__(self, segy_dir, params): super(SegyContainer, self).__init__(params) try: # This creates a (hidden) shapefile for each seismic line, # then steps over them to read their positions and meta. # TODO Simplify this... maybe don't even write the files. sgy2shp(segy_dir, segy_dir) except ShapeFileExists: pass self.reset_all() for f in utils.listdir(segy_dir, '\\..+\\.shp$'): with fiona.open(f, "r") as traces: for trace in traces: self.lookup[shape(trace["geometry"])] = trace["properties"]
def search_jdk(): if jdk_zip_exists(): locales.adv_print(f"JDK_ZIP_ALREADY_EXISTS", variables={"zipfile": settings["jdk_zip_name"]}) utils.extract_file(settings["jdk_zip_name"]) os.remove(settings["jdk_zip_name"]) for file in os.listdir(): jdk_path = os.path.join(os.getcwd(), file) if "jdk" in file and not os.path.isfile(jdk_path) and utils.verify_path(Path(jdk_path)): extend_path(jdk_path) return True for file in utils.listdir(settings["jdk_installation_path"]): jdk_path = os.path.join(settings["jdk_installation_path"], file) if "jdk" in file and os.path.isdir(jdk_path) and utils.verify_path(Path(jdk_path)): extend_path(os.path.join(settings["jdk_installation_path"], file)) utils.set_java_home(os.path.join(settings["jdk_installation_path"], file)) return True jdk = os.environ.get("JAVA_HOME") # why tf your jdk points to recycle bin bitch are you retarted return settings["skip_jdk_checks"] or (jdk is not None and utils.verify_path(Path(jdk)))
def evaluate(self, label, selected_energy_levels, test_batch_size=50): prefixed = label + '_' given_labels = list(filter(lambda x: x.startswith(prefixed), utils.listdir(self.CONFIG_CLASS().RAW_PATH))) evaluation = dict() for tmp_label in given_labels: if tmp_label == label + '_0': self.CONFIG_CLASS.SPLITTING_INFO = { 'regular': [tmp_label] } else: self.CONFIG_CLASS.SPLITTING_INFO = { 'chaotic': [tmp_label] } val_exper = experiment.Experiment(self.EXPN, self.EXP_PATH, self.CONFIG_CLASS (), self.LOGLEVEL) val_exper.prepare_validation() val_loader = val_exper.get_validation_loader(test_batch_size, selected_energy_levels) evaluation[tmp_label] = learning.test(self.MODEL, val_loader) val_exper.remove() return evaluation
def view_history(path=None): tree=listdir(g.repo, g.commit, g.path) try: page = int(request.args.get('page')) except (TypeError, ValueError): page = 0 if page: history_length = 30 skip = (page-1) * 30 + 10 if page > 7: previous_pages = [0, 1, 2, None] + range(page)[-3:] else: previous_pages = xrange(page) else: history_length = 10 skip = 0 previous_pages=None return render_template('history.html', page=page, history_length=history_length, skip=skip, tree=tree, previous_pages=previous_pages)
def __init__(self, expn=None, exp_path=None, config=None, loglevel=log.NO): self.LOGLEVEL = loglevel myconfig = MyConfig() if expn is not None and exp_path is not None: self.EXPN = expn self.FOLDER = os.path.join(exp_path, str(expn)) self.config = myconfig #load_config(self.FOLDER) log.info(self.LOGLEVEL, "Load experiment #{}".format(self.EXPN)) elif config is not None: self.config = config exp_path = os.path.join(self.config.ROOT_PATH, self.config.EXP_FOLDER) os.makedirs(exp_path, exist_ok=True) self.EXPN = 1 + len(utils.listdir(exp_path)) self.FOLDER = os.path.join(self.config.ROOT_PATH, self.config.EXP_FOLDER, str(self.EXPN)) log.info(self.LOGLEVEL, self.__str__()) else: raise RuntimeError()
def view_blob(path): tree=listdir(g.repo, g.commit, g.path) blob=get_blob(g.repo, g.commit, g.path) raw_url = url_for('view_raw_blob', path=g.path) too_large = sum(map(len, blob.chunked)) > 100*1024 return render_template('view_blob.html', blob=blob, raw_url=raw_url, too_large=too_large, tree=tree)
if label is not None: for i in range(len(label)): train_writer.write(build_data(sample=np.array(sample[i], dtype=np.uint8).tobytes(), label=label[i], legal=np.array(legal[i], dtype=np.uint8).tobytes()).SerializeToString()) train_line_cnt += 1 if train_line_cnt >= LINE_NUM: # 文件结束条件 train_writer.close() train_line_cnt = 0 train_file_no += 1 train_writer = tf.python_io.TFRecordWriter(train_file_pattern % train_file_no) train_writer.close() if __name__ == '__main__': file_src = 'F:/go_data/records3' file_name = [] listdir(file_src, file_name) multi = 8 # 创建新线程 process_list = [] lock = multiprocessing.Lock() for p in range(multi): t = MyProcess(p, "Process-" + str(p), lock, file_name[int(p / 4)::int(multi / 4)]) t.start() process_list.append(t) for process in process_list: process.join()
def choose_keywords(base_pth, chosen_keywords, num_templates, gen_template, template_save_loc=None, blank_id=40): """ Choose keywords from TIMIT TEST according to the minimum number of templates required :param blank_id: blank_id index :param gen_template: If True, RNN is used for generating template also, else, template extracted form TIMIT :param template_save_loc: directory where keywords are stored :param base_pth: path to root directory TIMIT/TEST :param chosen_keywords: list of keywords to be tested on :param num_templates: the top-n templates which are chosen for every keyword """ if gen_template: print("Generating templates using RNN") if not os.path.exists(template_save_loc): os.mkdir(template_save_loc) words = {} for dialect in sorted(listdir(base_pth)): for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))): data = sorted( os.listdir(os.path.join(base_pth, dialect, speaker_id))) wav_files = [x for x in data if x.split('.')[-1] == 'wav' ] # all the .wav files for wav_file in wav_files: wav_path = os.path.join(base_pth, dialect, speaker_id, wav_file) wrd_path = wav_path[:-3] + 'WRD' with open(wrd_path, 'r') as fw: wrd_list = list(fw.readlines()) for line in wrd_list: # extract word from start sample, end sample, word format word_start, word_end, word = line.rstrip().split(' ') word_start, word_end = int(word_start), int(word_end) # add entry in dictionary if word not in words.keys( ) and word in chosen_keywords: words[word] = [] if word in chosen_keywords: words[word].append( (wav_path, word_start, word_end)) clip_paths = [] for word, paths in words.items(): np.random.shuffle(paths) i = 0 for path, start, end in paths[:num_templates]: (rate, sig) = wav.read(path) assert rate == 16000 sig = sig[start:end] write_name = template_save_loc + word + '_' + str(i) + '.wav' wav.write(write_name, rate, sig) clip_paths.append(write_name) i += 1 templates = {} rnn = dl_model('test_one') outputs, phone_to_id, id_to_phone = rnn.test_one(clip_paths) for out, path in outputs: word = path.split('/')[-1].split('_')[0] if word not in templates.keys(): templates[word] = [] out = np.argmax(out[0], axis=1) final_seq = utils.ctc_collapse(out, blank_id) final_seq = [id_to_phone[x] for x in final_seq] if final_seq[0] == 'pau': final_seq = final_seq[1:] templates[word].append(final_seq) print("Templates from RNN:", templates) return templates else: print("Extracting templates from TIMIT") keywords = {} for dialect in sorted(listdir(base_pth)): for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))): data = sorted( os.listdir(os.path.join(base_pth, dialect, speaker_id))) wav_files = [x for x in data if x.split('.')[-1] == 'wav' ] # all the .wav files for wav_file in wav_files: wav_path = os.path.join(base_pth, dialect, speaker_id, wav_file) wrd_path = wav_path[:-3] + 'WRD' ph_path = wav_path[:-3] + 'PHN' with open(wrd_path, 'r') as fw: wrd_list = list(fw.readlines()) with open(ph_path, 'r') as fp: ph_list = list(fp.readlines()) for line in wrd_list: phones_in_word = [] # extract word from start sample, end sample, word format word_start, word_end, word = line.rstrip().split(' ') word_start, word_end = int(word_start), int(word_end) # add entry in dictionary if word not in keywords.keys(): keywords[word] = {} # iterate over list of phones for ph_line in ph_list: # extract phones from start sample, end sample, phone format ph_start, ph_end, ph = ph_line.rstrip().split(' ') ph_start, ph_end = int(ph_start), int(ph_end) if ph_start == word_end: break # if phone corresponds to current word, add to list if ph_start >= word_start and ph_end <= word_end: # collapse for father, list_of_sons in replacement.items( ): if ph in list_of_sons: ph = father break phones_in_word.append(ph) phones_in_word = tuple(phones_in_word) # increment count in dictionary if phones_in_word not in keywords[word].keys(): keywords[word][phones_in_word] = 0 keywords[word][phones_in_word] += 1 # choose most frequently occurring templates from dictionary final_templates = {} for keyword in chosen_keywords: temps = keywords[keyword] temps = sorted(temps.items(), key=lambda kv: kv[1], reverse=True) chosen = [x[0] for x in temps[:num_templates]] final_templates[keyword] = chosen print("Templates from TIMIT:", final_templates) return final_templates
def gen_cases(base_pth_template, base_pth_totest, pkl_name, num_templates, num_clips, num_none, keywords, gen_template): """ Generates test cases on which model is to be tested :param gen_template: Whether to generate template using RNN or extract them from TIMIT :param base_pth: root directory of TIMIT/TEST from where examples are picked :param pkl_name: path to pickle dump which stores list of paths :param num_clips: number of clips containing the keyword on which we want to test :param keywords: list of keywords to be tested :param num_templates: top-n templates to be returned for each keyword :param num_none: number of clips which do not contain any keyword :return: {kw1: {'templates':[[phone_list 1], [phone_list 2],..], 'test_wav_paths':[parth1,path2,...]}, kw2:...} """ if os.path.exists(pkl_name): with open(pkl_name, 'rb') as f: return pickle.load(f) kws_chosen = choose_keywords(base_pth_template, keywords, num_templates, gen_template) final_paths = {} paths = [] for kw in keywords: final_paths[kw] = {'templates': kws_chosen[kw], 'test_wav_paths': []} final_paths['NONE'] = {'templates': [], 'test_wav_paths': []} for dialect in sorted(listdir(base_pth_totest)): for speaker_id in sorted( listdir(os.path.join(base_pth_totest, dialect))): data = sorted( os.listdir(os.path.join(base_pth_totest, dialect, speaker_id))) wav_files = [x for x in data if x.split('.')[-1] == 'wav'] # all the .wav files for wav_file in wav_files: wav_path = os.path.join(base_pth_totest, dialect, speaker_id, wav_file) wrd_path = wav_path[:-3] + 'WRD' paths.append((wav_path, wrd_path)) # shuffle paths np.random.shuffle(paths) for wav_path, wrd_path in paths: with open(wrd_path, 'r') as f: wrd_list = f.readlines() for line in wrd_list: # extract word from start frame, end frame, word format word_start, word_end, word = line.rstrip().split(' ') if word in keywords: # use wav file to compare with keyword if len(final_paths[word]['test_wav_paths']) < num_clips: final_paths[word]['test_wav_paths'].append(wav_path) break elif len(final_paths['NONE']['test_wav_paths']) < num_none: final_paths['NONE']['test_wav_paths'].append(wav_path) break with open(pkl_name, 'wb') as f: pickle.dump(final_paths, f) print('Number of templates:', {word: len(dat['templates']) for word, dat in final_paths.items()}) print('Number of clips:', { word: len(dat['test_wav_paths']) for word, dat in final_paths.items() }) return final_paths
enc_name = args.name.split('_') model = enc_name[0][0:-1] layer = int(enc_name[0][-1]) roi = enc_name[1] maxpool = enc_name[2] roi_r = [] random_r = [] sub_voxel_regressor = {} for subj in args.subject: path = os.path.join('processed/predicted/', args.predicted_data, subj, args.name + '_' + args.random) conditions = sorted(embed.keys()) condition_voxels = {} for condition in conditions: file_name = listdir((os.path.join(path, condition)))[0] features = np.load(file_name) condition_voxels[condition] = np.mean(features, axis=0) voxel_regressor = np.stack([condition_voxel for condition, condition_voxel in OrderedDict(condition_voxels).items()]) sub_voxel_regressor[subj] = voxel_regressor all_voxel_regressor = np.hstack([sub_voxel_regressor[s] for s in args.subject]) for pc in tqdm(range(10, 80, 10), total=7, position=0, leave=True): _, voxel_mean_r = cv_regression_w(all_voxel_regressor, embed, fit=None, k=9, l2=0.0, pc_fmri=pc, pc_embedding=args.pc_embedding) roi_r.append(voxel_mean_r) roi_dim = [] roi_max = [] roi_mean = [] roi_med = []
def _find_modules(modulepath, terse, tolerant): """Find all available modules on modulepath""" loaded_modules = _loaded_modules() splitext = os.path.splitext available_modules = OrderedDict() starting_dir = os.getcwd() for directory in modulepath: # Go through each module in the MODULEPATH and collect modules if not directory.split(): # Skip empty entries continue if not os.path.isdir(directory): if tolerant: # Skip nonexistent directories continue raise Exception('Nonexistent directory in ' 'MODULEPATH: {0!r}'.format(directory)) # Collect modules in this directory this_dir_modules = [] # change to directory and get modules # Files in the first level don't have name/version format, just name os.chdir(directory) files = listdir(directory, key=os.path.isfile) for filename in files: moduletype = is_module(filename) name = os.path.splitext(filename)[0] if moduletype is None: continue dikt = { 'name': name, 'fullname': name, 'path': os.path.join(directory, filename), 'realpath': os.path.join(directory, filename), } if moduletype == TCL_MODULEFILE: # TCL module dikt['type'] = 'tcl' elif moduletype == PY_MODULEFILE: dikt['type'] = 'python' else: raise Exception('Unknown module type') dikt['loaded'] = dikt['fullname'] in loaded_modules this_dir_modules.append(Module(**dikt)) # Look for modules 1 directory in # Modules 1 level in have name/version format dirs = listdir(directory, os.path.isdir) for dirname in dirs: default_module = None os.chdir(os.path.join(directory, dirname)) for item in os.listdir('.'): if os.path.isdir(item): io.log_warning( 'The following directory, nested more than 1 ' 'deep from a MODULEPATH directory, will not be ' 'searched: {0}'.format(item)) continue elif os.path.islink(item): if item == 'default': default_module = os.path.realpath(item) if not os.path.isfile(default_module): raise Exception('Default module symlink points to ' 'nonexistent file ' '{0!r}'.format(default_module)) d = os.path.basename(os.path.dirname(default_module)) if d != dirname: raise Exception('Default module symlink points to ' 'a file in a different directory') continue if not os.path.isfile(os.path.realpath(item)): raise Exception('Symlink {0!r} points to ' 'nonexistent file'.format(item)) moduletype = is_module(item) if moduletype is None: continue name = dirname version = os.path.splitext(item)[0] fullname = os.path.join(name, version) path = os.path.join(directory, dirname, item) realpath = os.path.realpath(path) dikt = { 'name': name, 'fullname': fullname, 'path': path, 'realpath': realpath, } if moduletype == TCL_MODULEFILE: # TCL module dikt['type'] = 'tcl' elif moduletype == PY_MODULEFILE: dikt['type'] = 'python' if re.search('^[0-9]', version): # versioned dikt['version'] = version else: raise Exception('Unknown module type') dikt['loaded'] = dikt['fullname'] in loaded_modules this_dir_modules.append(Module(**dikt)) # Set the default if default_module is not None: for module in this_dir_modules: if module.realpath == default_module: module.default = True break else: # Determine if modules are versioned if any([module.version for module in this_dir_modules]): this_dir_modules[-1].default = True fun = lambda x: x.fullname available_modules[directory] = sorted(this_dir_modules, key=fun) os.chdir(starting_dir) if terse: return [x.fullname for (k, v) in available_modules.items() for x in v] return available_modules
args = parser.parse_args() # load features layers = { 1: 'conv1', 2: 'conv2', 3: 'conv3', 4: 'conv4', 5: 'conv5', 6: 'fc6', 7: 'fc7' } layer = layers[args.layer] feature_path = os.path.join("./processed/feature", args.train_data, args.model) conditions = listdir(feature_path) condition_features = {} for c in conditions: features = listdir(c) c_name = c.split('/')[-1] if args.maxpool == 'maxpool': if (layer != 'fc6') and (layer != 'fc7'): features = torch.stack( [torch.load(f)[layer] for f in features]) features, _ = torch.max( features.view(features.size(0), features.size(1), features.size(2), features.size(3) * features.size(4)).squeeze(), -1) # global maxpooling #features = torch.stack([torch.tensor(torch.load(f)[layer].numpy().max(axis=-1).max(axis=-1)).flatten() for f in features])
feature_extractor = AlexNet() elif args.model == 'vgg': feature_extractor = VGG16() else: raise ValueError('Unimplemented feature extractor: {}'.format( args.model)) feature_extractor.conv1.register_forward_hook(get_activation('conv1')) feature_extractor.conv2.register_forward_hook(get_activation('conv2')) feature_extractor.conv3.register_forward_hook(get_activation('conv3')) feature_extractor.conv4.register_forward_hook(get_activation('conv4')) feature_extractor.conv5.register_forward_hook(get_activation('conv5')) feature_extractor.fc6.register_forward_hook(get_activation('fc6')) feature_extractor.fc7.register_forward_hook(get_activation('fc7')) conditions = listdir('data/image/' + args.dataset) for c in tqdm(conditions): stimuli = listdir(c) c_name = c.split('/')[-1] os.mkdir('processed/feature/' + args.dataset + '/' + args.model + '/' + c_name) stimuli_tensor = [ image_to_tensor(s, resolution=args.resolution) for s in stimuli ] for name, tensor in zip(stimuli, stimuli_tensor): activation = {} output = feature_extractor(tensor.unsqueeze(0)) file = name.split('/')[-1] + '.pth' torch.save( activation, os.path.join('processed/feature', args.dataset, args.model,
'coffee_rings': 0, 'distort': False, 'scribble': False, } for k, v in defaults.items(): if cfg.get(k) is None: cfg[k] = v cfg['outfile'] = args.out # Gather files to work on, then go and do them. if os.path.isfile(target): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) Notice.hr_header("Done") elif os.path.isdir(target): if args.recursive: Notice.info("Looking for SEGY files in {} and its subdirectories".format(target)) for target in utils.walk(target, "\\.se?gy$"): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) else: Notice.info("Finding SEGY files in {}".format(target)) for target in utils.listdir(target, "\\.se?gy$"): Notice.hr_header("Processing file: {}".format(target)) main(target, cfg) Notice.hr_header("Done") else: Notice.fail("Not a file or directory.")
def gen_pickle(self): """ # Iterates over the TEST dataset and picks up recordings such that each phone is covered atleast x no of times :return: Huge list of feature vectors of audio recordings and phones as a tuple for each frame Each item in returned list is a list corresponding to a single recording Each recording is in turn a list of tuples of (ph, feature_vector) for each frame """ # Return if already exists if os.path.exists(self.pkl_name): print("Found pickle dump for recordings to be tested") with open(self.pkl_name, 'rb') as f: return pickle.load(f) print("Generating Q value pickle dump for a minimum of", self.min_phones, 'utterances of each phone') # final list to be returned to_return = [] base_pth = self.config['dir']['dataset'] + 'TRAIN/' # keeps track of number of phones. Terminate only when all phones are above a threshold ph_count_dict = {} for phone, ph_id in self.phone_to_id.items(): if ph_id < self.config['num_phones']: ph_count_dict[phone] = 0 # keywords chosen keywords_chosen = set() paths = [] # Iterate over entire dataset and store paths of wav files for dialect in sorted(utils.listdir(base_pth)): for speaker_id in sorted(utils.listdir(os.path.join(base_pth, dialect))): data = sorted(os.listdir(os.path.join(base_pth, dialect, speaker_id))) wav_files = [x for x in data if x.split('.')[-1] == 'wav'] # all the .wav files for wav_file in wav_files: wav_path = os.path.join(base_pth, dialect, speaker_id, wav_file) wrd_path = wav_path[:-3] + 'WRD' phone_path = wrd_path[:-3] + 'PHN' paths.append((wav_path, wrd_path, phone_path)) # Shuffle the recordings so that we pick up recordings from various dialects and speakers np.random.shuffle(paths) for wav_path, wrd_path, phone_path in paths: # break if found required number of phones if all(x > self.min_phones for x in ph_count_dict.values()): print("Found enough utterances to cover all phones") break cur_phones = [] with open(wrd_path, 'r') as f: wrd_list = f.readlines() for line in wrd_list: # extract word from sframe, eframe, word format word_start, word_end, word = line.split(' ') word = word[:-1] keywords_chosen.add(word) with open(phone_path, 'r') as f: phones_read = f.readlines() for phone in phones_read: s_e_i = phone[:-1].split(' ') # start, end, phonee_name e.g. 0 5432 'aa' start, end, ph = int(s_e_i[0]), int(s_e_i[1]), s_e_i[2] # collapse into father phone for father, list_of_sons in replacement.items(): if ph in list_of_sons: ph = father break cur_phones.append(ph) # increment count of phone ph_count_dict[ph] += 1 final_vec = utils.read_wav(wav_path, winlen=self.config['window_size'], winstep=self.config['window_step'], fbank_filt=self.config['n_fbank'], mfcc_filt=self.config['n_mfcc']) to_return.append((final_vec, cur_phones)) print("Final phone count dict:", ph_count_dict) with open(self.pkl_name, 'wb') as f: pickle.dump(to_return, f) print("Dumped pickle for recordings to be tested") # print("Final chosen words:", keywords_chosen) return to_return