Example #1
0
def word_distribution(base_pth):

    words = {}

    for dialect in sorted(listdir(base_pth)):

        for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))):

            data = sorted(
                os.listdir(os.path.join(base_pth, dialect, speaker_id)))
            wav_files = [x for x in data
                         if x.split('.')[-1] == 'wav']  # all the .wav files

            for wav_file in wav_files:
                wav_path = os.path.join(base_pth, dialect, speaker_id,
                                        wav_file)
                wrd_path = wav_path[:-3] + 'WRD'

                with open(wrd_path, 'r') as fw:
                    wrd_list = list(fw.readlines())

                for line in wrd_list:
                    # extract word from start sample, end sample, word format
                    word_start, word_end, word = line.rstrip().split(' ')
                    # add entry in dictionary
                    if word not in words.keys():
                        words[word] = 0
                    words[word] += 1
    print(sorted(words.items(), key=lambda x: x[1], reverse=True))
Example #2
0
def mean_condition_latents(model, data_dir):
    print('Extracting stimuli features')
    conditions = utils.listdir(data_dir, path=False)
    condition_features = {}
    for c in tqdm(conditions):
        stimuli = utils.listdir(os.path.join(data_dir, c))
        stimuli = [utils.load_image(s) for s in stimuli]
        stimuli = torch.stack(stimuli).to(device)
        with torch.no_grad():
            feats = model.encode(stimuli).mean(dim=0).cpu().numpy()
        condition_features[c] = feats
    return condition_features
Example #3
0
def add_scribble(image):
    """
    Add a scribble.
    """
    files = list(utils.listdir('resources', 'scribble_[0-9]+.png'))
    fname = np.random.choice(files)
    scribble = Image.open(fname)

    # Adjust opacity.
    scribble2 = max_opacity(scribble, 0.85)

    # Shrink a bit
    scribble2.thumbnail((256, 256), Image.ANTIALIAS)

    # Rotate the stain by a small random amount.
    angle = (np.random.random() - 0.5) * 20
    scribble2 = scribble2.rotate(angle, resample=Image.BICUBIC, expand=True)

    # Find a random place near the upper corner.
    x = np.random.randint(image.size[0] - (1.5 * scribble2.size[0]),
                          image.size[0] - (0.8 * scribble2.size[0]))
    y = np.random.randint(0, scribble2.size[1])

    # Do it.
    image.paste(scribble2, (x, y), scribble2)
    return
Example #4
0
    def _load_guitars(self, opts):
        """Load data from Thomann files.

        """
        logging.debug('Loading Guitars dataset')
        data_dir = os.path.join('./', 'thomann')
        X = None
        files = utils.listdir(data_dir)
        pics = []
        for f in sorted(files):
            if '.jpg' in f and f[0] != '.':
                im = Image.open(utils.o_gfile((data_dir, f), 'rb'))
                res = np.array(im.getdata()).reshape(128, 128, 3)
                pics.append(res)
        X = np.array(pics)

        seed = 123
        np.random.seed(seed)
        np.random.shuffle(X)
        np.random.seed()

        self.data_shape = (128, 128, 3)
        self.data = Data(opts, X / 255.)
        self.num_points = len(X)

        logging.debug('Loading Done.')
Example #5
0
    def input_fn(self, mode, data_dir):
        def decode_record(record):
            """Decodes a record to a TensorFlow example."""
            example = tf.io.parse_single_example(record, self.name_to_features)

            # tf.Example don't supports tf.int32.
            for name in list(example.keys()):
                t = example[name]
                if t.dtype == tf.int64:
                    t = tf.cast(t, tf.int32)
                example[name] = t
            return {'input': example['input']}, example['target'][0]

        files = utils.listdir(data_dir)
        print('in mode {}, files: {}'.format(mode, files))

        with tf.name_scope('dataset'):
            d = tf.data.TFRecordDataset(files)
            if mode == 'train':
                # d = d.repeat()
                d = d.shuffle(buffer_size=self.shuffle_size)

            d = d.map(lambda record: decode_record(record),
                      num_parallel_calls=self.num_parallel_calls)
            if mode == 'train':
                d = d.batch(batch_size=self.batch_size,
                            drop_remainder=self.train_drop_remainder)
            else:
                d = d.batch(batch_size=self.batch_size, drop_remainder=False)

            d = d.prefetch(buffer_size=self.prefetch_size)

        return d
Example #6
0
 def __init__(self):
     self.home = run_command_w_output('echo $HOME')[:-1]
     self.db = Base(self.home + '/driver')
     home_files = listdir(self.home)
     if not 'netapps' in home_files:
         cd(self.home)
         mkdir('netapps')
Example #7
0
    def __init__(self, hor_dir, velocity, params):

        # First generate the parent object.
        super(HorizonContainer, self).__init__(params)

        self.velocity = velocity

        self.data = {}
        self.coords = {}
        self.lookup = {}

        for fname in utils.listdir(hor_dir):
            with open(fname) as f:
                samples = f.readlines()
            name = samples.pop(0).strip().strip('#')
            points = []
            for s in samples:
                line, cdp, x, y, t, surv = s.split()
                x, y = int(float(x)), int(float(y))
                t = float(t)
                points.append(Point(x, y, t))
            if self.lookup.get(name):
                self.lookup[name] += points
            else:
                self.lookup[name] = points
Example #8
0
    def initialize_data_file(self):

        # log_time_interval = 5;
        # seconds_process = 0;

        # def log_time():
        #	global seconds_process
        #	seconds_process += 5
        #	print("It's still running, be patient, please. Running time: "
        #		"%s"%(seconds_process))

        # t = Timer(log_time_interval,log_time)
        # t.start()

        if path.exists(self.ppiscript) == False:
            self.func_log.critical(('get ppi script cannot be found. dir '
                               'contains %s') % utils.listdir(utils.dirname(self.ppiscript)))
            raise EnvironmentError('get ppi script cannot be found')

        print("Calling the get ppi script with ruby")
        process = Popen(self.ppiscript, stdout=PIPE)
        output = list(process.communicate())
        # t.cancel()
        # hard code the parse rule due to some bad thing
        # the data looks like this
        # (b"D, [2016-07-06T14:40:22.172340 #1676] DEBUG -- : Successfully logged into COINS.\r\n{{'M53799763':{'gender': 'F'}}\r\n{{'M53799718':{'gender': 'M'}}\r\n", None)
        string = output[0].decode()
        data = string.split('\r\n')[1:]

        print("Initializing the file")
        with open(self.temp_file_path, 'w') as tempfile:
            for subject in data:
                # tempfile.writelines()
                tempfile.write(subject + '\n')
Example #9
0
def reset_qlearner_folders():
    for sub_dir in [
            "hunter",
            "smalllake",
            "largelake",
            "shaped_smalllake",
            "shaped_largelake",
    ]:
        whole_sub_dir = os.path.join(output_dir, 'qlearning', sub_dir)
        if not os.path.exists(whole_sub_dir):
            os.mkdir(whole_sub_dir)

        for explore_dir in [
                "random",
                "equal",
                "greedy",
                "epsilongreedy",
                "epsilondecay",
                "epsilonstatedecay",
        ]:

            clear_dir = os.path.join(output_dir, "qlearning", sub_dir,
                                     explore_dir)
            if not os.path.exists(clear_dir):
                os.mkdir(clear_dir)
                continue

            for filename in listdir(clear_dir):
                os.unlink(os.path.join(clear_dir, filename))
Example #10
0
    def __init__(self, hor_dir, velocity, params):

        # First generate the parent object.
        super(HorizonContainer, self).__init__(params)

        self.velocity = velocity

        self.data = {}
        self.coords = {}
        self.lookup = {}

        for fname in utils.listdir(hor_dir):
            with open(fname) as f:
                samples = f.readlines()
            name = samples.pop(0).strip().strip('#')
            points = []
            for s in samples:
                line, cdp, x, y, t, surv = s.split()
                x, y = int(float(x)), int(float(y))
                t = float(t)
                points.append(Point(x, y, t))
            if self.lookup.get(name):
                self.lookup[name] += points
            else:
                self.lookup[name] = points
Example #11
0
def pics_to_pdf(pics_directory: str, result_filename: str):
    with open('pdf/' + result_filename + '.pdf', 'wb') as f:
        f.write(
            img2pdf.convert([
                ut.get_files_folder() + pics_directory + '/' + i
                for i in ut.listdir(  # noqa
                    ut.get_files_folder() + pics_directory)
            ]))
Example #12
0
def load_stimuli(stimuli_folder):
    conditions = os.listdir(stimuli_folder)
    conditions = [c for c in conditions if c != '.DS_Store']
    stimuli = {
        c: utils.listdir(os.path.join(stimuli_folder, c), path=False)
        for c in conditions
    }
    return stimuli
Example #13
0
def create_image_generator(path):
    filenames = utils.listdir(path, extensions=('.jpg', '.png'))

    while True:
        random.shuffle(filenames)

        for filename in filenames:
            filepath = os.path.join(path, filename)

            yield image_utils.load_image(filepath)
Example #14
0
 def _given_labels(self):
     bounding_labels = list(itertools.chain(*self.SPLITTING_INFO.values()))
     prefixes = set(map(lambda x: x.split('_')[0] + '_', bounding_labels))
     given_labels = list(
         map(
             lambda prefix: list(
                 filter(lambda x: x.startswith(prefix),
                        utils.listdir(self.RAW_PATH))), prefixes))
     given_labels = list(itertools.chain(*given_labels))
     return given_labels
def bulk_insert2ES(org, bulk_size=100, json_file_folder='./txt2ES_json'):

    json_file_paths = []
    listdir(json_file_folder, json_file_paths, ['.json'])

    for path in json_file_paths:
        if path.split('\\')[-1].split('_')[0] != org: continue

        with open(path, 'r') as f1:
            bulk_content_lines = f1.readlines()

        url = f'http://localhost:9200/{org.lower()}/_bulk?pretty&refresh'
        headers = {'Content-Type': 'application/json'}
        for _index in range(0, len(bulk_content_lines), bulk_size * 2):
            data = ''.join(
                bulk_content_lines[_index:_index + bulk_size * 2]) + '\n'
            r = requests.post(url=url, headers=headers, data=data)
    print(f'{org} DONE!')
    return True
Example #16
0
    def __init__(self, bedrock_dir, params):

        super(BedrockContainer, self).__init__(params)

        self.reset_all()

        # Read in all shape files
        for f in utils.listdir(bedrock_dir, '\\.shp$'):
            for line in fiona.open(f):
                self.lookup[(shape(line['geometry']))] = line['properties']
Example #17
0
    def __init__(self, bedrock_dir, params):

        super(BedrockContainer, self).__init__(params)

        self.reset_all()

        # Read in all shape files
        for f in utils.listdir(bedrock_dir, '\\.shp$'):
            for line in fiona.open(f):
                self.lookup[(shape(line['geometry']))] = line['properties']
def add_from_txt(txt_folder_name=r'..\..\papers_data'):

    file_paths = []
    listdir(txt_folder_name, file_paths, ['.txt'])
    start_time = time.time()
    res_json_text = []
    org_years = collections.defaultdict(set)  # for json name.
    for txt_file_path in tqdm(file_paths, ncols=100, desc=f'process txts'):
        folder_name, org, org_year, file_name = split_path(txt_file_path)

        name = file_name.split('.txt')[0]
        year = org_year.split('_')[1]

        json_body = {}
        json_body['Organization'] = org
        json_body['Name'] = name
        json_body['Year'] = year

        with open(txt_file_path, 'r', encoding='utf8') as f:
            json_body['Text'] = clean_text(f.read())

        json_start = '{"index":{"_id":\"' + curlmd5(json_body['Name']) + '\"}}'
        json_body = json_start + '\n' + json.dumps(json_body) + '\n'
        res_json_text.append([org, json_body])
        org_years[org].add(year)

    out_folder = 'txt2ES_json'
    if not os.path.exists(out_folder): os.mkdir(out_folder)
    for _org in org_years.keys():
        org_years[_org] = '_'.join(sorted(org_years[_org]))
        with open(f'./{out_folder}/{_org}_{org_years[_org]}.json',
                  'w',
                  encoding='utf8') as f:
            for __org, _json_body in res_json_text:
                if __org == _org: f.write(_json_body)

    end_time = time.time()
    print(f'time cost: {end_time - start_time:.2f}s.')
    return None
Example #19
0
def simulate_all_hunterschoice_policies():
    policies = [
        os.path.join(hunter_dir, x) for x in listdir(hunter_dir)
        if "evaluation" not in x
    ]
    for policy in policies:
        success_rate, episode_lengths = simulate_hunterschoice_policy(policy)
        result_dict = {
            "success_rate": success_rate,
            "episode_lengths": episode_lengths
        }
        out_dir = policy + "_evaluation"
        with open(out_dir, "wb") as f:
            pickle.dump(result_dict, f)
Example #20
0
def get_condition_latents(model, data_dir):
    print('Extracting features')
    stimuli = utils.listdir(data_dir, path=False)
    condition_features = {}
    batch_size = 256
    for i in tqdm(range(0, len(stimuli), batch_size)):
        batch_names = stimuli[i:i + batch_size]
        batch = [
            utils.load_image(os.path.join(data_dir, n)) for n in batch_names
        ]
        batch = torch.stack(batch).to(device)
        with torch.no_grad():
            batch_feats = model.encode(batch).cpu().numpy()
        for name, feats in zip(batch_names, batch_feats):
            condition_features[name] = feats
    return condition_features
Example #21
0
    def __init__(self, segy_dir, params):

        super(SegyContainer, self).__init__(params)

        try:
            # This creates a (hidden) shapefile for each seismic line,
            # then steps over them to read their positions and meta.
            # TODO Simplify this... maybe don't even write the files.
            sgy2shp(segy_dir, segy_dir)
        except ShapeFileExists:
            pass

        self.reset_all()

        for f in utils.listdir(segy_dir, '\\..+\\.shp$'):
            with fiona.open(f, "r") as traces:
                for trace in traces:
                    self.lookup[shape(trace["geometry"])] = trace["properties"]
Example #22
0
    def __init__(self, segy_dir, params):

        super(SegyContainer, self).__init__(params)

        try:
            # This creates a (hidden) shapefile for each seismic line,
            # then steps over them to read their positions and meta.
            # TODO Simplify this... maybe don't even write the files.
            sgy2shp(segy_dir, segy_dir)
        except ShapeFileExists:
            pass

        self.reset_all()

        for f in utils.listdir(segy_dir, '\\..+\\.shp$'):
            with fiona.open(f, "r") as traces:
                for trace in traces:
                    self.lookup[shape(trace["geometry"])] = trace["properties"]
Example #23
0
def search_jdk():
    if jdk_zip_exists():
        locales.adv_print(f"JDK_ZIP_ALREADY_EXISTS", variables={"zipfile": settings["jdk_zip_name"]})
        utils.extract_file(settings["jdk_zip_name"])
        os.remove(settings["jdk_zip_name"])
    for file in os.listdir():
        jdk_path = os.path.join(os.getcwd(), file)
        if "jdk" in file and not os.path.isfile(jdk_path) and utils.verify_path(Path(jdk_path)):
            extend_path(jdk_path)
            return True
    for file in utils.listdir(settings["jdk_installation_path"]):
        jdk_path = os.path.join(settings["jdk_installation_path"], file)
        if "jdk" in file and os.path.isdir(jdk_path) and utils.verify_path(Path(jdk_path)):
            extend_path(os.path.join(settings["jdk_installation_path"], file))
            utils.set_java_home(os.path.join(settings["jdk_installation_path"], file))
            return True
    jdk = os.environ.get("JAVA_HOME")
    # why tf your jdk points to recycle bin bitch are you retarted
    return settings["skip_jdk_checks"] or (jdk is not None and utils.verify_path(Path(jdk)))
Example #24
0
    def evaluate(self, label, selected_energy_levels, test_batch_size=50):
        prefixed = label + '_'
        given_labels = list(filter(lambda x: x.startswith(prefixed), utils.listdir(self.CONFIG_CLASS().RAW_PATH)))
        evaluation = dict()

        for tmp_label in given_labels:
            if tmp_label == label + '_0':
                self.CONFIG_CLASS.SPLITTING_INFO = {
                    'regular': [tmp_label]
                }
            else:
                self.CONFIG_CLASS.SPLITTING_INFO = {
                    'chaotic': [tmp_label]
                }
            val_exper = experiment.Experiment(self.EXPN, self.EXP_PATH, self.CONFIG_CLASS (), self.LOGLEVEL)
            val_exper.prepare_validation()
            val_loader = val_exper.get_validation_loader(test_batch_size, selected_energy_levels)
            evaluation[tmp_label] = learning.test(self.MODEL, val_loader)
            val_exper.remove()
        return evaluation
Example #25
0
def view_history(path=None):
    tree=listdir(g.repo, g.commit, g.path)
    
    try:
        page = int(request.args.get('page'))
    except (TypeError, ValueError):
        page = 0
    
    if page:
        history_length = 30
        skip = (page-1) * 30 + 10
        if page > 7:
            previous_pages = [0, 1, 2, None] + range(page)[-3:]
        else:
            previous_pages = xrange(page)
    else:
        history_length = 10
        skip = 0
        previous_pages=None
    return render_template('history.html', page=page, history_length=history_length, skip=skip, tree=tree, previous_pages=previous_pages)
Example #26
0
    def __init__(self, expn=None, exp_path=None, config=None, loglevel=log.NO):
        self.LOGLEVEL = loglevel

        myconfig = MyConfig()
        if expn is not None and exp_path is not None:
            self.EXPN = expn
            self.FOLDER = os.path.join(exp_path, str(expn))
            self.config = myconfig  #load_config(self.FOLDER)
            log.info(self.LOGLEVEL, "Load experiment #{}".format(self.EXPN))
        elif config is not None:
            self.config = config
            exp_path = os.path.join(self.config.ROOT_PATH,
                                    self.config.EXP_FOLDER)
            os.makedirs(exp_path, exist_ok=True)
            self.EXPN = 1 + len(utils.listdir(exp_path))
            self.FOLDER = os.path.join(self.config.ROOT_PATH,
                                       self.config.EXP_FOLDER, str(self.EXPN))
            log.info(self.LOGLEVEL, self.__str__())
        else:
            raise RuntimeError()
Example #27
0
def view_blob(path):
    tree=listdir(g.repo, g.commit, g.path)
    blob=get_blob(g.repo, g.commit, g.path)
    raw_url = url_for('view_raw_blob', path=g.path)
    too_large = sum(map(len, blob.chunked)) > 100*1024
    return render_template('view_blob.html', blob=blob, raw_url=raw_url, too_large=too_large, tree=tree)
Example #28
0
                    if label is not None:
                        for i in range(len(label)):
                            train_writer.write(build_data(sample=np.array(sample[i], dtype=np.uint8).tobytes(),
                                                          label=label[i],
                                                          legal=np.array(legal[i], dtype=np.uint8).tobytes()).SerializeToString())
                            train_line_cnt += 1
                            if train_line_cnt >= LINE_NUM:  # 文件结束条件
                                train_writer.close()
                                train_line_cnt = 0
                                train_file_no += 1
                                train_writer = tf.python_io.TFRecordWriter(train_file_pattern % train_file_no)
        train_writer.close()


if __name__ == '__main__':
    file_src = 'F:/go_data/records3'
    file_name = []
    listdir(file_src, file_name)

    multi = 8
    # 创建新线程
    process_list = []
    lock = multiprocessing.Lock()
    for p in range(multi):
        t = MyProcess(p, "Process-" + str(p), lock, file_name[int(p / 4)::int(multi / 4)])
        t.start()
        process_list.append(t)

    for process in process_list:
        process.join()
Example #29
0
def choose_keywords(base_pth,
                    chosen_keywords,
                    num_templates,
                    gen_template,
                    template_save_loc=None,
                    blank_id=40):
    """
    Choose keywords from TIMIT TEST according to the minimum number of templates required
    :param blank_id: blank_id index
    :param gen_template: If True, RNN is used for generating template also, else, template extracted form TIMIT
    :param template_save_loc: directory where keywords are stored
    :param base_pth: path to root directory TIMIT/TEST
    :param chosen_keywords: list of keywords to be tested on
    :param num_templates: the top-n templates which are chosen for every keyword
    """

    if gen_template:
        print("Generating templates using RNN")
        if not os.path.exists(template_save_loc):
            os.mkdir(template_save_loc)
        words = {}

        for dialect in sorted(listdir(base_pth)):

            for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))):

                data = sorted(
                    os.listdir(os.path.join(base_pth, dialect, speaker_id)))
                wav_files = [x for x in data if x.split('.')[-1] == 'wav'
                             ]  # all the .wav files

                for wav_file in wav_files:
                    wav_path = os.path.join(base_pth, dialect, speaker_id,
                                            wav_file)
                    wrd_path = wav_path[:-3] + 'WRD'

                    with open(wrd_path, 'r') as fw:
                        wrd_list = list(fw.readlines())

                    for line in wrd_list:
                        # extract word from start sample, end sample, word format
                        word_start, word_end, word = line.rstrip().split(' ')
                        word_start, word_end = int(word_start), int(word_end)
                        # add entry in dictionary
                        if word not in words.keys(
                        ) and word in chosen_keywords:
                            words[word] = []
                        if word in chosen_keywords:
                            words[word].append(
                                (wav_path, word_start, word_end))

        clip_paths = []
        for word, paths in words.items():
            np.random.shuffle(paths)
            i = 0
            for path, start, end in paths[:num_templates]:
                (rate, sig) = wav.read(path)
                assert rate == 16000
                sig = sig[start:end]
                write_name = template_save_loc + word + '_' + str(i) + '.wav'
                wav.write(write_name, rate, sig)
                clip_paths.append(write_name)
                i += 1

        templates = {}
        rnn = dl_model('test_one')
        outputs, phone_to_id, id_to_phone = rnn.test_one(clip_paths)

        for out, path in outputs:
            word = path.split('/')[-1].split('_')[0]
            if word not in templates.keys():
                templates[word] = []

            out = np.argmax(out[0], axis=1)
            final_seq = utils.ctc_collapse(out, blank_id)
            final_seq = [id_to_phone[x] for x in final_seq]
            if final_seq[0] == 'pau':
                final_seq = final_seq[1:]
            templates[word].append(final_seq)

        print("Templates from RNN:", templates)
        return templates

    else:
        print("Extracting templates from TIMIT")
        keywords = {}

        for dialect in sorted(listdir(base_pth)):

            for speaker_id in sorted(listdir(os.path.join(base_pth, dialect))):

                data = sorted(
                    os.listdir(os.path.join(base_pth, dialect, speaker_id)))
                wav_files = [x for x in data if x.split('.')[-1] == 'wav'
                             ]  # all the .wav files

                for wav_file in wav_files:
                    wav_path = os.path.join(base_pth, dialect, speaker_id,
                                            wav_file)
                    wrd_path = wav_path[:-3] + 'WRD'
                    ph_path = wav_path[:-3] + 'PHN'

                    with open(wrd_path, 'r') as fw:
                        wrd_list = list(fw.readlines())
                    with open(ph_path, 'r') as fp:
                        ph_list = list(fp.readlines())

                    for line in wrd_list:
                        phones_in_word = []
                        # extract word from start sample, end sample, word format
                        word_start, word_end, word = line.rstrip().split(' ')
                        word_start, word_end = int(word_start), int(word_end)
                        # add entry in dictionary
                        if word not in keywords.keys():
                            keywords[word] = {}
                        # iterate over list of phones
                        for ph_line in ph_list:
                            # extract phones from start sample, end sample, phone format
                            ph_start, ph_end, ph = ph_line.rstrip().split(' ')
                            ph_start, ph_end = int(ph_start), int(ph_end)
                            if ph_start == word_end:
                                break
                            # if phone corresponds to current word, add to list
                            if ph_start >= word_start and ph_end <= word_end:
                                # collapse
                                for father, list_of_sons in replacement.items(
                                ):
                                    if ph in list_of_sons:
                                        ph = father
                                        break
                                phones_in_word.append(ph)

                        phones_in_word = tuple(phones_in_word)
                        # increment count in dictionary
                        if phones_in_word not in keywords[word].keys():
                            keywords[word][phones_in_word] = 0
                        keywords[word][phones_in_word] += 1

        # choose most frequently occurring templates from dictionary
        final_templates = {}
        for keyword in chosen_keywords:
            temps = keywords[keyword]
            temps = sorted(temps.items(), key=lambda kv: kv[1], reverse=True)
            chosen = [x[0] for x in temps[:num_templates]]
            final_templates[keyword] = chosen

        print("Templates from TIMIT:", final_templates)
        return final_templates
Example #30
0
def gen_cases(base_pth_template, base_pth_totest, pkl_name, num_templates,
              num_clips, num_none, keywords, gen_template):
    """
    Generates test cases on which model is to be tested
    :param gen_template: Whether to generate template using RNN or extract them from TIMIT
    :param base_pth: root directory of TIMIT/TEST from where examples are picked
    :param pkl_name: path to pickle dump which stores list of paths
    :param num_clips: number of clips containing the keyword on which we want to test
    :param keywords: list of keywords to be tested
    :param num_templates: top-n templates to be returned for each keyword
    :param num_none: number of clips which do not contain any keyword
    :return: {kw1: {'templates':[[phone_list 1], [phone_list 2],..], 'test_wav_paths':[parth1,path2,...]}, kw2:...}
    """
    if os.path.exists(pkl_name):
        with open(pkl_name, 'rb') as f:
            return pickle.load(f)

    kws_chosen = choose_keywords(base_pth_template, keywords, num_templates,
                                 gen_template)
    final_paths = {}

    paths = []

    for kw in keywords:
        final_paths[kw] = {'templates': kws_chosen[kw], 'test_wav_paths': []}

    final_paths['NONE'] = {'templates': [], 'test_wav_paths': []}

    for dialect in sorted(listdir(base_pth_totest)):

        for speaker_id in sorted(
                listdir(os.path.join(base_pth_totest, dialect))):

            data = sorted(
                os.listdir(os.path.join(base_pth_totest, dialect, speaker_id)))
            wav_files = [x for x in data
                         if x.split('.')[-1] == 'wav']  # all the .wav files

            for wav_file in wav_files:
                wav_path = os.path.join(base_pth_totest, dialect, speaker_id,
                                        wav_file)
                wrd_path = wav_path[:-3] + 'WRD'

                paths.append((wav_path, wrd_path))

    # shuffle paths
    np.random.shuffle(paths)

    for wav_path, wrd_path in paths:

        with open(wrd_path, 'r') as f:
            wrd_list = f.readlines()

        for line in wrd_list:
            # extract word from start frame, end frame, word format
            word_start, word_end, word = line.rstrip().split(' ')

            if word in keywords:
                # use wav file to compare with keyword
                if len(final_paths[word]['test_wav_paths']) < num_clips:
                    final_paths[word]['test_wav_paths'].append(wav_path)
                break

            elif len(final_paths['NONE']['test_wav_paths']) < num_none:
                final_paths['NONE']['test_wav_paths'].append(wav_path)
                break

    with open(pkl_name, 'wb') as f:
        pickle.dump(final_paths, f)

    print('Number of templates:',
          {word: len(dat['templates'])
           for word, dat in final_paths.items()})
    print('Number of clips:', {
        word: len(dat['test_wav_paths'])
        for word, dat in final_paths.items()
    })
    return final_paths
Example #31
0
    enc_name = args.name.split('_')
    model = enc_name[0][0:-1] 
    layer = int(enc_name[0][-1])
    roi = enc_name[1]
    maxpool = enc_name[2]

    roi_r = []
    random_r = []
    sub_voxel_regressor = {}

    for subj in args.subject:
        path = os.path.join('processed/predicted/', args.predicted_data, subj, args.name + '_' + args.random)
        conditions = sorted(embed.keys())
        condition_voxels = {}
        for condition in conditions:
            file_name = listdir((os.path.join(path, condition)))[0]
            features = np.load(file_name)
            condition_voxels[condition] = np.mean(features, axis=0)
        voxel_regressor = np.stack([condition_voxel for condition, condition_voxel in OrderedDict(condition_voxels).items()])
        sub_voxel_regressor[subj] = voxel_regressor
    
    all_voxel_regressor = np.hstack([sub_voxel_regressor[s] for s in args.subject])

    for pc in tqdm(range(10, 80, 10), total=7, position=0, leave=True):
        _, voxel_mean_r = cv_regression_w(all_voxel_regressor, embed, fit=None, k=9, l2=0.0, pc_fmri=pc, pc_embedding=args.pc_embedding)
        roi_r.append(voxel_mean_r)
    
    roi_dim = []
    roi_max = []
    roi_mean = []
    roi_med = []       
Example #32
0
def _find_modules(modulepath, terse, tolerant):
    """Find all available modules on modulepath"""
    loaded_modules = _loaded_modules()

    splitext = os.path.splitext
    available_modules = OrderedDict()
    starting_dir = os.getcwd()

    for directory in modulepath:
        # Go through each module in the MODULEPATH and collect modules

        if not directory.split():
            # Skip empty entries
            continue

        if not os.path.isdir(directory):
            if tolerant:
                # Skip nonexistent directories
                continue
            raise Exception('Nonexistent directory in '
                            'MODULEPATH: {0!r}'.format(directory))

        # Collect modules in this directory
        this_dir_modules = []

        # change to directory and get modules
        # Files in the first level don't have name/version format, just name
        os.chdir(directory)
        files = listdir(directory, key=os.path.isfile)

        for filename in files:
            moduletype = is_module(filename)
            name = os.path.splitext(filename)[0]
            if moduletype is None:
                continue
            dikt = {
                'name': name,
                'fullname': name,
                'path': os.path.join(directory, filename),
                'realpath': os.path.join(directory, filename),
            }

            if moduletype == TCL_MODULEFILE:
                # TCL module
                dikt['type'] = 'tcl'
            elif moduletype == PY_MODULEFILE:
                dikt['type'] = 'python'
            else:
                raise Exception('Unknown module type')
            dikt['loaded'] = dikt['fullname'] in loaded_modules
            this_dir_modules.append(Module(**dikt))

        # Look for modules 1 directory in
        # Modules 1 level in have name/version format
        dirs = listdir(directory, os.path.isdir)
        for dirname in dirs:
            default_module = None
            os.chdir(os.path.join(directory, dirname))
            for item in os.listdir('.'):
                if os.path.isdir(item):
                    io.log_warning(
                        'The following directory, nested more than 1 '
                        'deep from a MODULEPATH directory, will not be '
                        'searched: {0}'.format(item))
                    continue
                elif os.path.islink(item):
                    if item == 'default':
                        default_module = os.path.realpath(item)
                        if not os.path.isfile(default_module):
                            raise Exception('Default module symlink points to '
                                            'nonexistent file '
                                            '{0!r}'.format(default_module))
                        d = os.path.basename(os.path.dirname(default_module))
                        if d != dirname:
                            raise Exception('Default module symlink points to '
                                            'a file in a different directory')
                        continue
                    if not os.path.isfile(os.path.realpath(item)):
                        raise Exception('Symlink {0!r} points to '
                                        'nonexistent file'.format(item))

                moduletype = is_module(item)
                if moduletype is None:
                    continue

                name = dirname
                version = os.path.splitext(item)[0]
                fullname = os.path.join(name, version)
                path = os.path.join(directory, dirname, item)
                realpath = os.path.realpath(path)
                dikt = {
                    'name': name,
                    'fullname': fullname,
                    'path': path,
                    'realpath': realpath,
                }

                if moduletype == TCL_MODULEFILE:
                    # TCL module
                    dikt['type'] = 'tcl'
                elif moduletype == PY_MODULEFILE:
                    dikt['type'] = 'python'
                    if re.search('^[0-9]', version):
                        # versioned
                        dikt['version'] = version
                else:
                    raise Exception('Unknown module type')
                dikt['loaded'] = dikt['fullname'] in loaded_modules
                this_dir_modules.append(Module(**dikt))

            # Set the default
            if default_module is not None:
                for module in this_dir_modules:
                    if module.realpath == default_module:
                        module.default = True
                        break
            else:
                # Determine if modules are versioned
                if any([module.version for module in this_dir_modules]):
                    this_dir_modules[-1].default = True

        fun = lambda x: x.fullname
        available_modules[directory] = sorted(this_dir_modules, key=fun)

    os.chdir(starting_dir)

    if terse:
        return [x.fullname for (k, v) in available_modules.items() for x in v]

    return available_modules
    args = parser.parse_args()

    # load features
    layers = {
        1: 'conv1',
        2: 'conv2',
        3: 'conv3',
        4: 'conv4',
        5: 'conv5',
        6: 'fc6',
        7: 'fc7'
    }
    layer = layers[args.layer]
    feature_path = os.path.join("./processed/feature", args.train_data,
                                args.model)
    conditions = listdir(feature_path)
    condition_features = {}
    for c in conditions:
        features = listdir(c)
        c_name = c.split('/')[-1]
        if args.maxpool == 'maxpool':
            if (layer != 'fc6') and (layer != 'fc7'):
                features = torch.stack(
                    [torch.load(f)[layer] for f in features])
                features, _ = torch.max(
                    features.view(features.size(0), features.size(1),
                                  features.size(2),
                                  features.size(3) *
                                  features.size(4)).squeeze(),
                    -1)  # global maxpooling
                #features = torch.stack([torch.tensor(torch.load(f)[layer].numpy().max(axis=-1).max(axis=-1)).flatten() for f in features])
Example #34
0
        feature_extractor = AlexNet()
    elif args.model == 'vgg':
        feature_extractor = VGG16()
    else:
        raise ValueError('Unimplemented feature extractor: {}'.format(
            args.model))

    feature_extractor.conv1.register_forward_hook(get_activation('conv1'))
    feature_extractor.conv2.register_forward_hook(get_activation('conv2'))
    feature_extractor.conv3.register_forward_hook(get_activation('conv3'))
    feature_extractor.conv4.register_forward_hook(get_activation('conv4'))
    feature_extractor.conv5.register_forward_hook(get_activation('conv5'))
    feature_extractor.fc6.register_forward_hook(get_activation('fc6'))
    feature_extractor.fc7.register_forward_hook(get_activation('fc7'))

    conditions = listdir('data/image/' + args.dataset)
    for c in tqdm(conditions):
        stimuli = listdir(c)
        c_name = c.split('/')[-1]
        os.mkdir('processed/feature/' + args.dataset + '/' + args.model + '/' +
                 c_name)
        stimuli_tensor = [
            image_to_tensor(s, resolution=args.resolution) for s in stimuli
        ]
        for name, tensor in zip(stimuli, stimuli_tensor):
            activation = {}
            output = feature_extractor(tensor.unsqueeze(0))
            file = name.split('/')[-1] + '.pth'
            torch.save(
                activation,
                os.path.join('processed/feature', args.dataset, args.model,
Example #35
0
                'coffee_rings': 0,
                'distort': False,
                'scribble': False,
                }

    for k, v in defaults.items():
        if cfg.get(k) is None:
            cfg[k] = v

    cfg['outfile'] = args.out

    # Gather files to work on, then go and do them.
    if os.path.isfile(target):
        Notice.hr_header("Processing file: {}".format(target))
        main(target, cfg)
        Notice.hr_header("Done")
    elif os.path.isdir(target):
        if args.recursive:
            Notice.info("Looking for SEGY files in {} and its subdirectories".format(target))
            for target in utils.walk(target, "\\.se?gy$"):
                Notice.hr_header("Processing file: {}".format(target))
                main(target, cfg)
        else:
            Notice.info("Finding SEGY files in {}".format(target))
            for target in utils.listdir(target, "\\.se?gy$"):
                Notice.hr_header("Processing file: {}".format(target))
                main(target, cfg)
        Notice.hr_header("Done")
    else:
        Notice.fail("Not a file or directory.")
Example #36
0
    def gen_pickle(self):
        """
        # Iterates over the TEST dataset and picks up recordings such that each phone is covered atleast x no of times
        :return: Huge list of feature vectors of audio recordings and phones as a tuple for each frame
                 Each item in returned list is a list corresponding to a single recording
                 Each recording is in turn a list of tuples of (ph, feature_vector) for each frame
        """

        # Return if already exists
        if os.path.exists(self.pkl_name):
            print("Found pickle dump for recordings to be tested")
            with open(self.pkl_name, 'rb') as f:
                return pickle.load(f)

        print("Generating Q value pickle dump for a minimum of", self.min_phones, 'utterances of each phone')

        # final list to be returned
        to_return = []

        base_pth = self.config['dir']['dataset'] + 'TRAIN/'

        # keeps track of number of phones. Terminate only when all phones are above a threshold
        ph_count_dict = {}
        for phone, ph_id in self.phone_to_id.items():
            if ph_id < self.config['num_phones']:
                ph_count_dict[phone] = 0

        # keywords chosen
        keywords_chosen = set()

        paths = []

        # Iterate over entire dataset and store paths of wav files
        for dialect in sorted(utils.listdir(base_pth)):

            for speaker_id in sorted(utils.listdir(os.path.join(base_pth, dialect))):

                data = sorted(os.listdir(os.path.join(base_pth, dialect, speaker_id)))
                wav_files = [x for x in data if x.split('.')[-1] == 'wav']  # all the .wav files

                for wav_file in wav_files:
                    wav_path = os.path.join(base_pth, dialect, speaker_id, wav_file)
                    wrd_path = wav_path[:-3] + 'WRD'
                    phone_path = wrd_path[:-3] + 'PHN'

                    paths.append((wav_path, wrd_path, phone_path))

        # Shuffle the recordings so that we pick up recordings from various dialects and speakers
        np.random.shuffle(paths)

        for wav_path, wrd_path, phone_path in paths:

            # break if found required number of phones
            if all(x > self.min_phones for x in ph_count_dict.values()):
                print("Found enough utterances to cover all phones")
                break

            cur_phones = []

            with open(wrd_path, 'r') as f:
                wrd_list = f.readlines()
                for line in wrd_list:
                    # extract word from sframe, eframe, word format
                    word_start, word_end, word = line.split(' ')
                    word = word[:-1]
                    keywords_chosen.add(word)

            with open(phone_path, 'r') as f:
                phones_read = f.readlines()

            for phone in phones_read:
                s_e_i = phone[:-1].split(' ')  # start, end, phonee_name e.g. 0 5432 'aa'
                start, end, ph = int(s_e_i[0]), int(s_e_i[1]), s_e_i[2]

                # collapse into father phone
                for father, list_of_sons in replacement.items():
                    if ph in list_of_sons:
                        ph = father
                        break
                cur_phones.append(ph)
                # increment count of phone
                ph_count_dict[ph] += 1

            final_vec = utils.read_wav(wav_path, winlen=self.config['window_size'], winstep=self.config['window_step'],
                                       fbank_filt=self.config['n_fbank'], mfcc_filt=self.config['n_mfcc'])
            to_return.append((final_vec, cur_phones))

        print("Final phone count dict:", ph_count_dict)
        with open(self.pkl_name, 'wb') as f:
            pickle.dump(to_return, f)
            print("Dumped pickle for recordings to be tested")

        # print("Final chosen words:", keywords_chosen)

        return to_return