Beispiel #1
0
def birch(X, y, n):
    """
    Birchによるクラスタリング

    Parameters
    ----------
    X : numpy array
        データ
    y : numpy array
        正解ラベル
    n : int
        クラスタ数

    Returns
    -------
    acc_br : float
        正解率
    time_br : float
        実行時間
    """
    br = Birch(n_clusters=2)
    start_br = time.time()
    y_br = br.fit_predict(X)
    end_br = time.time()
    y_br = np.reshape(y_br, (1, len(y[0])))
    acc_br, _, _ = acc(y_br, y)
    time_br = round(end_br - start_br, 2)

    make_graph(X, y_br, n, "Birch")

    return acc_br, time_br
Beispiel #2
0
def dbscan(X, y, eps, n):
    """
    DBSCANを用いたクラスタリング

    Parameters
    ----------
    X : numpy array
        データ
    y : numpy array
        正解ラベル
    eps : float
        二つのサンプルの最大距離
    n : int
        クラスタ数

    Returns
    -------
    acc_dbs : float
        正解率
    time_dbs : float
        実行時間
    """
    dbs = DBSCAN(eps=eps, min_samples=2)
    start_dbs = time.time()
    y_dbs = dbs.fit_predict(X)
    end_dbs = time.time()
    y_dbs = np.reshape(y_dbs, (1, len(y[0])))
    acc_dbs, _, _ = acc(y_dbs, y)
    time_dbs = round(end_dbs - start_dbs, 2)

    make_graph(X, y_dbs, n, "DBSCAN")

    return acc_dbs, time_dbs
def agglomerativeclustering(X, y, n):
    """
    Agglomerative Clusteringを用いたクラスタリング

    Parameters
    ----------
    X : numpy array
        データ
    y : numpy array
        正解ラベル
    n : int
        クラスタ数
    
    Returns
    -------
    acc_ac : float
        正解率
    time_ac : float
        実行時間
    -------
    """
    ac = AgglomerativeClustering(n_clusters=n)
    start_ac = time.time()
    y_ac = ac.fit_predict(X)
    end_ac = time.time()
    y_ac = np.reshape(y_ac, (1, len(y[0])))
    acc_ac, _, _ = acc(y_ac, y)
    time_ac = round(end_ac - start_ac, 2)

    make_graph(X, y_ac, n, "AgglomerativeClustering")

    return acc_ac, time_ac
Beispiel #4
0
def kmeans(X, y, n):
    """
    K Meansによるクラスタリング

    Parameters
    ----------
    X : numpy array
        データ
    y : numpy array
        正解ラベル
    n : int
        クラスタ数
    
    Returns
    -------
    acc_km : float
        正解率
    time_km : float
        実行時間
    """
    km = KMeans(n_clusters=n,
                init="random",
                n_init=10,
                max_iter=300,
                random_state=0)
    start_km = time.time()
    y_km = km.fit_predict(X)
    end_km = time.time()
    y_km = np.reshape(y_km, (1, len(y[0])))
    acc_km, _, _ = acc(y_km, y)
    time_km = round(end_km - start_km, 2)

    make_graph(X, y_km, n, "KMeans")

    return acc_km, time_km
def spectralclustering(X, y, n):
    """
    Spectral Clusteringによるクラスタリング

    Parameters
    ----------
    X : numpy array
        データ
    y : numpy array
        正解ラベル
    n : int
        クラスタ数
    
    Returns
    -------
    acc_sc : float
        正解率
    time_sc : float
        実行時間
    """
    sc = SpectralClustering(n_clusters=2,
                            affinity="nearest_neighbors")
    start_sc = time.time()
    y_sc = sc.fit_predict(X)
    end_sc = time.time()
    y_sc = np.reshape(y_sc, (1, len(y[0])))
    acc_sc, _, _ = acc(y_sc, y)
    time_sc = round(end_sc - start_sc, 2)

    make_graph(X, y_sc, n, "SpectralClustering")

    return acc_sc, time_sc
def predict_folder(image_folder, ref_folder, output_prefix,
                   net_config_location, net_weights_location, alpha_channel,
                   use_gpu, win_size, crop_size):

    images = listdir(image_folder)

    if (ref_folder != ''):
        pred_concat = np.array([])
        ref_concat = np.array([])
        refs = listdir(ref_folder)

    #start counting time
    start_time = time.time()

    for i in range(0, len(images)):

        image_location = path.join(image_folder, images[i])
        pred = classify(net_config_location, net_weights_location,
                        image_location, alpha_channel, use_gpu, win_size,
                        crop_size)

        out = Image.fromarray(pred * 40)
        out.save(output_prefix + images[i])

        if (ref_folder != ''):
            ref_location = path.join(ref_folder, refs[i])
            ref = np.array(Image.open(ref_location)) / 40

            pred = pred.flatten()
            ref = ref.flatten()

            pred_concat = np.concatenate((pred_concat, pred))
            ref_concat = np.concatenate((ref_concat, ref))

    #print elapsed time
    print("--- %s seconds ---" % (time.time() - start_time))

    if (ref_folder != ''):
        accuracies = acc(pred_concat, ref_concat)
        return accuracies
    else:
        return 0
def predict_folder(image_folder,ref_folder,output_prefix,
	       net_config_location,net_weights_location,
	       alpha_channel,use_gpu, win_size, crop_size):

	images = listdir(image_folder)
	

	if (ref_folder!=''):
		pred_concat = np.array([])
		ref_concat = np.array([])
		refs = listdir(ref_folder)

	#start counting time
	start_time = time.time()

	for i in range(0,len(images)):

		image_location = path.join(image_folder,images[i])
		pred = classify(net_config_location, net_weights_location, image_location, alpha_channel, use_gpu, win_size, crop_size)

		out = Image.fromarray(pred*40)
		out.save(output_prefix+images[i])

		if (ref_folder!=''):
			ref_location = path.join(ref_folder,refs[i])
			ref = np.array(Image.open(ref_location))/40

			pred=pred.flatten()
			ref=ref.flatten()
	
			pred_concat=np.concatenate((pred_concat,pred))
			ref_concat=np.concatenate((ref_concat,ref))


	#print elapsed time
	print("--- %s seconds ---" % (time.time() - start_time))

	if (ref_folder!=''):
		accuracies = acc(pred_concat,ref_concat)
		return accuracies
	else:
		return 0
Beispiel #8
0
def main(args):
	
    names=[]
    with open(args.inputfile) as af:
        seq_list=read_fasta(af)
        for e in seq_list:
            names.append(e.name)
    #kmer.py -f tab -l +1 -r 1 -k ',num2str(k),' TAIR10_DHSs.fas TAIR10_DHSs_reckmer_',num2str(k),'.txt DNA'
    res_kmer = make_kmer_vector(k=2, alphabet=index_list.DNA, filename=args.inputfile, revcomp=True)  
    #acc.py -e user_indices.txt -f svm -l +1 -lag ',num2str(lag),' TAIR10_DHSs.fas TAIR10_DHSs_dac_',num2str(lag),'.txt DNA DAC'
        if args.s==0:
        model_file='pDHSdata_TAIR_model.txt'
        lag=3
    else:
        model_file='pDHSdata_TIGR_model.txt'
        lag=8
    with open(args.inputfile) as f:
        k = read_k('DNA', 'DAC', 0)
        ind_list=[]
        res_acc = acc(f, k, lag, ind_list, index_list.DNA, extra_index_file='user_indices.txt', all_prop=False, theta_type=1)
    # features= revckmer+dac,formed by add each row
    res=[]
    for i in range(len(res_kmer)):
        res.append(res_kmer[i]+res_acc[i])
    featuresfile=args.inputfile+'_tmp_features.txt'
    # Write correspond res file.
    from util import write_libsvm
    write_libsvm(res, ['+1'] * len(res), featuresfile)

    #predict the result
    tmp_predict_result_file=args.inputfile+'_tmp_result.txt'
    if sys.platform == 'win32':
        options='svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file
    else:
        options='./svm-predict -b 1 -q '+featuresfile+' '+model_file+' '+ tmp_predict_result_file
    os.system(options)
    pf=open(args.outputfile,'w')
    with open(tmp_predict_result_file) as nf:
            label, TrueProb, FalseProb= '', '',''
            count = 0
            while True:
                line = nf.readline().strip()
                if not line:
                    break
                if count>len(names):
                    break
                if 0==count:
                     pf.write('ID\t\tLabel\t\tProb\n')
                     count+=1
                     continue
                label=int(line.split()[0])
                TrueProb=line.split()[1]
                FalseProb=line.split()[2]
                if label==-1:
                    pf.write(names[count-1]+'\t\t'+'Non DHS'+'\t\t'+str(FalseProb)+'\n')
                else:                
                    pf.write(names[count-1]+'\t\t'+'DHS'+'\t\t'+str(TrueProb)+'\n')
                count+=1
    pf.close()
    cwd = os.getcwd()
    files = [x for x in os.listdir(os.getcwd()) if os.path.isfile(os.path.join(cwd,x))]
    #print files 
    for file in files:
        if -1 != file.find('tmp'):
            os.remove(file)