Beispiel #1
0
 def collect(self):
     li = []
     for remark, url in self.urls:
         t = MyThread(self.check, args=(remark, url))
         li.append(t)
         t.start()
     for t in li:
         t.join()
         yield t.get_result()
Beispiel #2
0
def main(cross_num=5,
         exp_path='',
         sav_dir='',
         conf='',
         cfg_sec='',
         bool_vad=False):

    if not os.path.exists(exp_path):
        os.mkdir(exp_path)

    for i in range(1, 6):
        session_name = 'Session' + str(i)
        session_path = exp_path + '/' + session_name
        if not os.path.exists(session_path):
            os.mkdir(session_path)

    if not os.path.exists(sav_dir):
        os.mkdir(sav_dir)

    thread_list = []
    lab_list = []
    for i in range(5):
        t = MyThread(
            get_emo_data,
            args=('Session' + str(i + 1), True, bool_vad, 'spectrogram', 3,
                  exp_path + 'Session' + str(i + 1) + '/', conf, cfg_sec))
        thread_list.append(t)

    for t in thread_list:
        t.start()

    for t in thread_list:
        t.join()
        lab_list.append(t.get_result())

    lab_dict = dict()
    for item in lab_list:
        lab_dict.update(item)

    min_val = np.Inf

    freq_bag = set()
    time_bag = set()

    for sess_spk in lab_dict.keys():
        # sess_spk aims to specific session of F/M,
        # wav_file_dict: {wav_file_name: [[mat1,lab1], [mat2, lab2] ...], wav_file_name: [[mat1,lab1], [mat2,
        # lab2] ...] , ..}
        wav_file_info_dict = lab_dict.get(sess_spk)
        print(sess_spk, len(list(wav_file_info_dict.keys())))
        for wav_file in wav_file_info_dict:
            info_list = wav_file_info_dict.get(wav_file)
            for mat, label, valence, arouse, domain in info_list:
                min_val = min(min_val, mat.min())
                # max_length = max(max_length, mat.shape[1])
                # min_length = min(min_length, mat.shape[1])
                time_bag.add(mat.shape[1])
                freq_bag.add(mat.shape[0])
    #
    print('Time bag:\n\t', time_bag)
    print('Freq bag:\n\t', freq_bag)
    print(max(time_bag), min(time_bag))
    bias = min_val - 1
    print(bias)

    sess_spk = set(lab_dict.keys())
    if cross_num == 5:
        for i in range(1, 6):
            # construct the dataset for five-fold cross validation
            cross_val = 'leave_' + str(i)
            file_path = sav_dir + '/' + cross_val
            dev_key = {'Session' + str(i) + '_F'}
            test_key = {'Session' + str(i) + '_M'}
            train_key = sess_spk - test_key - dev_key
            if not os.path.exists(file_path):
                os.mkdir(file_path)
            print(test_key)
            store(file_path=file_path,
                  train_key=train_key,
                  test_key=test_key,
                  data_dict=lab_dict,
                  bias=bias,
                  bool_vad=bool_vad)
            print()
    else:
        sess_spk_list = list(sess_spk)
        # construct the dataset for ten-fold cross validation
        for i in range(1, 11):
            idx = sess_spk_list[i - 1]
            cross_val = 'leave_' + str(i)
            file_path = data_path_prefix + '10cross_set/' + cross_val
        test_key = {idx}
        train_key = sess_spk - test_key
        if not os.path.exists(file_path):
            os.mkdir(file_path)
        store(file_path=file_path,
              train_key=train_key,
              test_key=test_key,
              data_dict=lab_dict)
Beispiel #3
0
        file_dict=config_dic.get(section, None)
        if(file_dict==None):
            raise Exception("section[{}] not found".format(section))  

        t=MyThread(deal_csv,args=(section, file_dict, glob_config, relation_ds))
        threads.append(t)
        t.start()
        threads_num+=1
        # print("\n\nNow begin deal section[{}]".format(section))
        # ret_code, ret_msg, relation_ds, out_ds =deal_csv(file_dict, glob_config, relation_ds, out_ds)
        # if(ret_code!=0):
        #     raise Exception("section[{}] func[{}] error[{}]".format(section, 'deal_csv', ret_msg))  

    for t in threads:
        t.join()  # 一定要join,不然主线程比子线程跑的快,会拿不到结果
        ret_code, ret_msg, relation_ds, out_ds_list[t.args[0]] = t.get_result()
        if(ret_code!=0):
            raise Exception("thread[{}] func[{}] error[{}]".format(t, 'deal_csv', ret_msg))  

    # out_ds=pd.DataFrame({'openday':[], 'detail_type':[], 'detail_cnt':[], 'detail_amt':[]})
    i=0
    out_ds=pd.DataFrame({'openday':[], 'detail_type':[], 'detail_cnt':[], 'detail_amt':[]})
    for key, ds in out_ds_list.items():
        if(ds.shape[0]>0):
            if(i==0):
                out_ds=ds.copy()
            else:
                out_ds=out_ds.append(ds)
            i+=1
    if(out_ds.shape[0]>0):
        out_ds.insert(0, 'openday', yyyymmdd)