def birch(X, y, n): """ Birchによるクラスタリング Parameters ---------- X : numpy array データ y : numpy array 正解ラベル n : int クラスタ数 Returns ------- acc_br : float 正解率 time_br : float 実行時間 """ br = Birch(n_clusters=2) start_br = time.time() y_br = br.fit_predict(X) end_br = time.time() y_br = np.reshape(y_br, (1, len(y[0]))) acc_br, _, _ = acc(y_br, y) time_br = round(end_br - start_br, 2) make_graph(X, y_br, n, "Birch") return acc_br, time_br
def dbscan(X, y, eps, n): """ DBSCANを用いたクラスタリング Parameters ---------- X : numpy array データ y : numpy array 正解ラベル eps : float 二つのサンプルの最大距離 n : int クラスタ数 Returns ------- acc_dbs : float 正解率 time_dbs : float 実行時間 """ dbs = DBSCAN(eps=eps, min_samples=2) start_dbs = time.time() y_dbs = dbs.fit_predict(X) end_dbs = time.time() y_dbs = np.reshape(y_dbs, (1, len(y[0]))) acc_dbs, _, _ = acc(y_dbs, y) time_dbs = round(end_dbs - start_dbs, 2) make_graph(X, y_dbs, n, "DBSCAN") return acc_dbs, time_dbs
def agglomerativeclustering(X, y, n): """ Agglomerative Clusteringを用いたクラスタリング Parameters ---------- X : numpy array データ y : numpy array 正解ラベル n : int クラスタ数 Returns ------- acc_ac : float 正解率 time_ac : float 実行時間 ------- """ ac = AgglomerativeClustering(n_clusters=n) start_ac = time.time() y_ac = ac.fit_predict(X) end_ac = time.time() y_ac = np.reshape(y_ac, (1, len(y[0]))) acc_ac, _, _ = acc(y_ac, y) time_ac = round(end_ac - start_ac, 2) make_graph(X, y_ac, n, "AgglomerativeClustering") return acc_ac, time_ac
def kmeans(X, y, n): """ K Meansによるクラスタリング Parameters ---------- X : numpy array データ y : numpy array 正解ラベル n : int クラスタ数 Returns ------- acc_km : float 正解率 time_km : float 実行時間 """ km = KMeans(n_clusters=n, init="random", n_init=10, max_iter=300, random_state=0) start_km = time.time() y_km = km.fit_predict(X) end_km = time.time() y_km = np.reshape(y_km, (1, len(y[0]))) acc_km, _, _ = acc(y_km, y) time_km = round(end_km - start_km, 2) make_graph(X, y_km, n, "KMeans") return acc_km, time_km
def spectralclustering(X, y, n): """ Spectral Clusteringによるクラスタリング Parameters ---------- X : numpy array データ y : numpy array 正解ラベル n : int クラスタ数 Returns ------- acc_sc : float 正解率 time_sc : float 実行時間 """ sc = SpectralClustering(n_clusters=2, affinity="nearest_neighbors") start_sc = time.time() y_sc = sc.fit_predict(X) end_sc = time.time() y_sc = np.reshape(y_sc, (1, len(y[0]))) acc_sc, _, _ = acc(y_sc, y) time_sc = round(end_sc - start_sc, 2) make_graph(X, y_sc, n, "SpectralClustering") return acc_sc, time_sc
def predict_folder(image_folder, ref_folder, output_prefix, net_config_location, net_weights_location, alpha_channel, use_gpu, win_size, crop_size): images = listdir(image_folder) if (ref_folder != ''): pred_concat = np.array([]) ref_concat = np.array([]) refs = listdir(ref_folder) #start counting time start_time = time.time() for i in range(0, len(images)): image_location = path.join(image_folder, images[i]) pred = classify(net_config_location, net_weights_location, image_location, alpha_channel, use_gpu, win_size, crop_size) out = Image.fromarray(pred * 40) out.save(output_prefix + images[i]) if (ref_folder != ''): ref_location = path.join(ref_folder, refs[i]) ref = np.array(Image.open(ref_location)) / 40 pred = pred.flatten() ref = ref.flatten() pred_concat = np.concatenate((pred_concat, pred)) ref_concat = np.concatenate((ref_concat, ref)) #print elapsed time print("--- %s seconds ---" % (time.time() - start_time)) if (ref_folder != ''): accuracies = acc(pred_concat, ref_concat) return accuracies else: return 0
def predict_folder(image_folder,ref_folder,output_prefix, net_config_location,net_weights_location, alpha_channel,use_gpu, win_size, crop_size): images = listdir(image_folder) if (ref_folder!=''): pred_concat = np.array([]) ref_concat = np.array([]) refs = listdir(ref_folder) #start counting time start_time = time.time() for i in range(0,len(images)): image_location = path.join(image_folder,images[i]) pred = classify(net_config_location, net_weights_location, image_location, alpha_channel, use_gpu, win_size, crop_size) out = Image.fromarray(pred*40) out.save(output_prefix+images[i]) if (ref_folder!=''): ref_location = path.join(ref_folder,refs[i]) ref = np.array(Image.open(ref_location))/40 pred=pred.flatten() ref=ref.flatten() pred_concat=np.concatenate((pred_concat,pred)) ref_concat=np.concatenate((ref_concat,ref)) #print elapsed time print("--- %s seconds ---" % (time.time() - start_time)) if (ref_folder!=''): accuracies = acc(pred_concat,ref_concat) return accuracies else: return 0
def main(args): names=[] with open(args.inputfile) as af: seq_list=read_fasta(af) for e in seq_list: names.append(e.name) #kmer.py -f tab -l +1 -r 1 -k ',num2str(k),' TAIR10_DHSs.fas TAIR10_DHSs_reckmer_',num2str(k),'.txt DNA' res_kmer = make_kmer_vector(k=2, alphabet=index_list.DNA, filename=args.inputfile, revcomp=True) #acc.py -e user_indices.txt -f svm -l +1 -lag ',num2str(lag),' TAIR10_DHSs.fas TAIR10_DHSs_dac_',num2str(lag),'.txt DNA DAC' if args.s==0: model_file='pDHSdata_TAIR_model.txt' lag=3 else: model_file='pDHSdata_TIGR_model.txt' lag=8 with open(args.inputfile) as f: k = read_k('DNA', 'DAC', 0) ind_list=[] res_acc = acc(f, k, lag, ind_list, index_list.DNA, extra_index_file='user_indices.txt', all_prop=False, theta_type=1) # features= revckmer+dac,formed by add each row res=[] for i in range(len(res_kmer)): res.append(res_kmer[i]+res_acc[i]) featuresfile=args.inputfile+'_tmp_features.txt' # Write correspond res file. from util import write_libsvm write_libsvm(res, ['+1'] * len(res), featuresfile) #predict the result tmp_predict_result_file=args.inputfile+'_tmp_result.txt' if sys.platform == 'win32': options='svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file else: options='./svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file os.system(options) pf=open(args.outputfile,'w') with open(tmp_predict_result_file) as nf: label, TrueProb, FalseProb= '', '','' count = 0 while True: line = nf.readline().strip() if not line: break if count>len(names): break if 0==count: pf.write('ID\t\tLabel\t\tProb\n') count+=1 continue label=int(line.split()[0]) TrueProb=line.split()[1] FalseProb=line.split()[2] if label==-1: pf.write(names[count-1]+'\t\t'+'Non DHS'+'\t\t'+str(FalseProb)+'\n') else: pf.write(names[count-1]+'\t\t'+'DHS'+'\t\t'+str(TrueProb)+'\n') count+=1 pf.close() cwd = os.getcwd() files = [x for x in os.listdir(os.getcwd()) if os.path.isfile(os.path.join(cwd,x))] #print files for file in files: if -1 != file.find('tmp'): os.remove(file)