def extract_feature_by_kmeans(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number) : file_name = gen_feature_fname(class_num, subsample_size, window_size, cluster_num) start = time.time() train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number) print("Generate Data Time: ",time.time()-start) features = learnvocabulary(train_X, cluster_num, max_iter) features = mynormalize_multi(features) write_features(file_name, features) print("=== Feature Extraction Finish ===")
def grid_search_for_neighbor_multiprocess(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num_seq): #Classification.classifiy(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num) train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number) jobs = [] #for neighbor_num in [2**i for i in range(neighbor_log2_num)]: for neighbor_num in neighbor_num_seq: p = multiprocessing.Process(target=Classification.classifiy, args=(class_num, subsample_size, window_size, cluster_num, \ max_iter, rnd_number, neighbor_num, train_X, train_y, test_X, test_y)) jobs.append(p) p.start() # end for print(jobs)
centroid.append(get_feature.read_features(centroid_file)) return centroid if __name__ == "__main__": rnd_number = 8131985 class_num = __CLASS_NUM__ subsample_size = 92 window_size = __WINDOW_SIZE__ cluster_num = [__CLUSTER_NUM__] ################################################### # permutation data and split into train and test # # split our data into half of train data and half of text data randomly. train_X, test_X, train_y, test_y = FetchFile.gen_file(class_num, subsample_size, window_size, rnd_number) ## USAGE : gen_file(class_num, subsample_size, window_size, rnd_number) ### print(train_y) print(test_y) ################################################### #Start to get the features learned from feed in data centroid = got_feature(class_num, subsample_size, window_size, cluster_num) print(centroid[0]) print(len(centroid[0])) ###################################################
def grid_search_for_neighbor(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num_seq): train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number) #for neighbor_num in [2**i for i in range(neighbor_log2_num)]: for neighbor_num in neighbor_num_seq: Classification.classifiy(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num, train_X, train_y, test_X, test_y)
date_str = str(cur_date.month).zfill(2) + str(cur_date.day).zfill(2) if (len(sys.argv) == 2) and (sys.argv[1].startswith('--debugdate:')): date_str = sys.argv[1][12:] print 'Analysis target date = ',date_str #下载当前日期的配置文件 config_file_name = download_path[mode] + date_str + conf_file_suff config_file_url = fixed_conf_url + date_str + conf_file_suff check_file_name = download_path[mode] + date_str + check_file_suff #先判断有无标记文件,避免重复下载 if os.path.isfile(check_file_name): print 'No contest today.' exit() FF.download_url_to_file(config_file_url, config_file_name) #检测配置文件是否正确,如果不正确=没有比赛,直接退出 #如果是非法的配置文件,不需要转码,本身就是utf-8 if AD.legal_config_file(config_file_name) == no_contest: debug_info_file = open(debug_file_name[mode], 'w') debug_info_file.writelines('No contest') debug_info_file.close() chk_file = open(check_file_name, 'w') chk_file.write('No contest') chk_file.close() print 'No contest' exit() FF.file_convert_pagecode(config_file_name, source_pagecode, config_file_name, dest_pagecode) AD.load_config_file(config_file_name)