def collect(self): li = [] for remark, url in self.urls: t = MyThread(self.check, args=(remark, url)) li.append(t) t.start() for t in li: t.join() yield t.get_result()
def main(cross_num=5, exp_path='', sav_dir='', conf='', cfg_sec='', bool_vad=False): if not os.path.exists(exp_path): os.mkdir(exp_path) for i in range(1, 6): session_name = 'Session' + str(i) session_path = exp_path + '/' + session_name if not os.path.exists(session_path): os.mkdir(session_path) if not os.path.exists(sav_dir): os.mkdir(sav_dir) thread_list = [] lab_list = [] for i in range(5): t = MyThread( get_emo_data, args=('Session' + str(i + 1), True, bool_vad, 'spectrogram', 3, exp_path + 'Session' + str(i + 1) + '/', conf, cfg_sec)) thread_list.append(t) for t in thread_list: t.start() for t in thread_list: t.join() lab_list.append(t.get_result()) lab_dict = dict() for item in lab_list: lab_dict.update(item) min_val = np.Inf freq_bag = set() time_bag = set() for sess_spk in lab_dict.keys(): # sess_spk aims to specific session of F/M, # wav_file_dict: {wav_file_name: [[mat1,lab1], [mat2, lab2] ...], wav_file_name: [[mat1,lab1], [mat2, # lab2] ...] , ..} wav_file_info_dict = lab_dict.get(sess_spk) print(sess_spk, len(list(wav_file_info_dict.keys()))) for wav_file in wav_file_info_dict: info_list = wav_file_info_dict.get(wav_file) for mat, label, valence, arouse, domain in info_list: min_val = min(min_val, mat.min()) # max_length = max(max_length, mat.shape[1]) # min_length = min(min_length, mat.shape[1]) time_bag.add(mat.shape[1]) freq_bag.add(mat.shape[0]) # print('Time bag:\n\t', time_bag) print('Freq bag:\n\t', freq_bag) print(max(time_bag), min(time_bag)) bias = min_val - 1 print(bias) sess_spk = set(lab_dict.keys()) if cross_num == 5: for i in range(1, 6): # construct the dataset for five-fold cross validation cross_val = 'leave_' + str(i) file_path = sav_dir + '/' + cross_val dev_key = {'Session' + str(i) + '_F'} test_key = {'Session' + str(i) + '_M'} train_key = sess_spk - test_key - dev_key if not os.path.exists(file_path): os.mkdir(file_path) print(test_key) store(file_path=file_path, train_key=train_key, test_key=test_key, data_dict=lab_dict, bias=bias, bool_vad=bool_vad) print() else: sess_spk_list = list(sess_spk) # construct the dataset for ten-fold cross validation for i in range(1, 11): idx = sess_spk_list[i - 1] cross_val = 'leave_' + str(i) file_path = data_path_prefix + '10cross_set/' + cross_val test_key = {idx} train_key = sess_spk - test_key if not os.path.exists(file_path): os.mkdir(file_path) store(file_path=file_path, train_key=train_key, test_key=test_key, data_dict=lab_dict)
file_dict=config_dic.get(section, None) if(file_dict==None): raise Exception("section[{}] not found".format(section)) t=MyThread(deal_csv,args=(section, file_dict, glob_config, relation_ds)) threads.append(t) t.start() threads_num+=1 # print("\n\nNow begin deal section[{}]".format(section)) # ret_code, ret_msg, relation_ds, out_ds =deal_csv(file_dict, glob_config, relation_ds, out_ds) # if(ret_code!=0): # raise Exception("section[{}] func[{}] error[{}]".format(section, 'deal_csv', ret_msg)) for t in threads: t.join() # 一定要join,不然主线程比子线程跑的快,会拿不到结果 ret_code, ret_msg, relation_ds, out_ds_list[t.args[0]] = t.get_result() if(ret_code!=0): raise Exception("thread[{}] func[{}] error[{}]".format(t, 'deal_csv', ret_msg)) # out_ds=pd.DataFrame({'openday':[], 'detail_type':[], 'detail_cnt':[], 'detail_amt':[]}) i=0 out_ds=pd.DataFrame({'openday':[], 'detail_type':[], 'detail_cnt':[], 'detail_amt':[]}) for key, ds in out_ds_list.items(): if(ds.shape[0]>0): if(i==0): out_ds=ds.copy() else: out_ds=out_ds.append(ds) i+=1 if(out_ds.shape[0]>0): out_ds.insert(0, 'openday', yyyymmdd)