예제 #1
0
def high_zero_crossing_rate(frames):
    """
    Return the high zero-crossing rate of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, the function computes high zero-crossing rate (HZCR)  
    per segment. The output values are stored in 1D numpy array (elements  
    hold the HZCR of all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        HZCR:   (numpy array) high zero-crossing rate vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute and segment zero crossing rate
    LEN = 100
    ZCR = seg.segment(zero_crossing_rate(frames), LEN, 0)
    
    # Compute high zero-crossing rate
    HZCR = np.sum(np.sign(ZCR-1.5*np.mean(ZCR, axis=0))+1)/(2*LEN);
    
    # Return high zero-crossing rate vector
    return np.array(HZCR)
예제 #2
0
def low_squared_energy_operator(frames):
    """
    Return the low squared energy operator of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes low squared energy operator (LSEO)   
    per segment. The output values are stored in 1D numpy array (elements   
    hold the LSEO of all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        LSEO:   (numpy array) low squared energy operator vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute and segment squared energy operator
    LEN = 100
    SEO = seg.segment(squared_energy_operator(frames), LEN, 0)
    
    # Compute low squared energy operator
    LSEO = np.sum(np.sign(0.5*np.mean(SEO, axis=0)-SEO)+1)/(2*LEN);
    
    # Return low squared energy operator vector
    return np.array(LSEO)
def test_im_shape_segment():
    im = np.array([[[1, 2], [1, 2]], [[1, 2], [1, 2]]])
    with pytest.raises(RuntimeError) as excinfo:
        segmentation.segment(im)
    excinfo.match(
        "Need to provide an array with shape \(n, m\). Provided array has shape \(2, 2, 2\)"
    )
def test_provided_function_returns_correct_types():
    im = np.array([[1, 2, 3], [1, 2, 3]])

    def bad_func(im):
        return ('Hello, world!')

    with pytest.raises(RuntimeError) as excinfo:
        segmentation.segment(im, thresh_func=bad_func)
    excinfo.match("The provided function must output a numeric or array \
                           provided function returns type <class 'str'>")
def test_provided_function_returns_correct_shape():
    im = np.array([[1, 2, 3], [1, 2, 3]])

    def bad_func(im):
        return (np.array([[1, 2], [1, 2]]))

    with pytest.raises(RuntimeError) as excinfo:
        segmentation.segment(im, thresh_func=bad_func)
    excinfo.match(
        "Array output of the function must have same shape as the image \
                           the output array has shape \(2, 2\), image has shape \(2, 3\)"
    )
예제 #6
0
    def run(self, options):
        """
        Define the code to be run by this plugin app.
        """

        if options.b_version:
            print(Neuproseg.VERSION)
            sys.exit(0)

        print('Multistream = ' + str(options.b_multistream))
        segmentation.segment(inputDirectory=options.inputdir,
                             outputDirectory=options.outputdir,
                             multistream=options.b_multistream)
예제 #7
0
def thumbnail(audio, fs, length, include_self=True, seg_method='regular'):
    # Segment the audio
    segments_in_seconds = seg.segment(audio, fs, length=length, method=seg_method)
    segments = segments_in_seconds * fs

    # Calculate the self-similarity matrix
    num_segments = segments_in_seconds.shape[0]
    similarity = np.zeros((num_segments, num_segments))

    for i in range(0, num_segments):
        cur_start = int(segments[i])
        cur_end = int(segments[i] + (length * fs))
        cur_sound = audio[cur_start: cur_end]

        # Calculate similarity with matched filter
        cur_matches = np.abs(matched_filter(cur_sound, audio, segments))

        if include_self is False:
            cur_matches[i] = 0

        similarity[:, i] = cur_matches

    # Row normalization (after similarity calculation)
    similarity = 0.5 * (similarity.T + similarity)
    similarity = seg.row_normalize(segments, similarity)

    # Identify the thumbnail
    sim_curve = np.sum(similarity, axis=1) / np.sum(similarity)
    thumb_idx = np.argmax(sim_curve)
    thumb_start = int(segments[thumb_idx])
    thumb_end = int(segments[thumb_idx] + (length * fs))
    thumb = audio[thumb_start: thumb_end]

    return thumb, sim_curve, segments_in_seconds, similarity
예제 #8
0
def squared_energy_operator(frames):
    """
    Return the squared energy operator of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes squared energy operator (SEO) per  
    segment. The output values are stored in 1D numpy array (elements hold  
    the SEO of all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        SEO:    (numpy array) squared energy operator vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute squared energy operator (per segment)
    SEO = []
    for i in xrange(0,frames.shape[1]):
        frame  = frames[:,i].copy()
        energy = np.sum(frame**2)/np.float64(len(frame))
        SEO.append(energy)
        
    # Return squared energy operator vector
    return np.array(SEO)
예제 #9
0
def zero_crossing_rate(frames):
    """
    Return the zero-crossing rate of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, the function computes zero-crossing rate (ZCR) per 
    segment. The output values are stored in 1D numpy array (elements hold 
    the ZCR of all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        ZCR:    (numpy array) zero-crossing rate vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute zero-crossing rate (per segment)
    ZCR = []
    for i in xrange(0,frames.shape[1]):
        frame = frames[:,i].copy()
        count = np.sum(np.abs(np.diff(np.sign(frame))))/2
        ZCR.append(np.float64(count)/np.float64(len(frame)-1.0))
        
    # Return zero-crossing rate vector
    return np.array(ZCR)
예제 #10
0
def evalulate(model, image):
    segments = segmentation.segment(image)
    predicted = []
    for i in range(len(segments)):
        digit = predict(model, segments[i])
        predicted.append(digit)
    return ''.join(predicted)
예제 #11
0
def preprocess(s):
    array = seg.segment(s)
    if (len(array) < 1):
        keras.backend.clear_session()
        return "Not segmentable!"
    image = array[0]
    image = i.img_to_array(image)
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    image = np.expand_dims(image, axis=0)
    return image
예제 #12
0
def segmentData(input_directory):

    symbols = load_images_from_folder(input_directory)

    for i in range(len(symbols)):
        images = segmentation.segment(symbols[i])

        for thing in range(len(CLASSES)):
            cv2.imwrite(thing + '/' + thing + '_' + str(i) + '.png',
                        images[thing])
def segmentData(input_directory, classes):

    symbols = load_images_from_folder(input_directory)

    for i in range(len(symbols)):
        images = segmentation.segment(symbols[i])

        for class_ in range(len(classes)):
            cv2.imwrite(
                classes[class_] + '/' + classes[class_] + '_' + str(i) +
                '.png', images[class_])
예제 #14
0
파일: merger.py 프로젝트: johancc/Styler
def main(video_path,
         model_path,
         background,
         foreground,
         output_path,
         frame_rate,
         keep_temp=False,
         gpus=1):
    video_name = get_base_name(video_path)
    style_name = get_base_name(model_path)
    audio_file = create_audio_file(video_path)
    # Get all the frames_path
    print("Extracting frames...")
    frame_dir = extract_video_frames(video_path)
    print("Styling frames...")
    style_dir = style_frames(model_path,
                             frame_dir,
                             style_dir="{}_styled/".format(video_name))
    if background or foreground:
        print(
            "Applying segmentation...(will take a while (~10 min per 500 frames)"
        )
        segmentation_dir = segment(video_name, frame_dir,
                                   "{}_segmented/".format(video_name), gpus)
        print("Merging styled and segmented frames...")
        segmented_styled_frames = apply_style_over_segmentation(
            original_folder=frame_dir,
            style_folder=style_dir,
            segmentation_folder=segmentation_dir,
            output_folder="{}_final".format(video_name),
            mode=0 if foreground else 1)
        styled_video = frames_to_video(frames_path=segmented_styled_frames,
                                       video_name=video_name,
                                       output_path=output_path,
                                       frame_rate=frame_rate)
    else:
        styled_video = frames_to_video(frames_path=style_dir,
                                       video_name=video_name,
                                       output_path=output_path,
                                       frame_rate=frame_rate)
    final_video_name = os.path.join(
        output_path, video_name + "_{}_style.mp4".format(style_name))
    add_audio_to_video(styled_video, audio_file, final_video_name)
    assert (os.path.isfile(final_video_name))
    if not keep_temp:
        cleanup_files = {
            "directories": [frame_dir, style_dir],
            "files": [styled_video, audio_file]
        }
        cleanup_temp_files(cleanup_files)

    return final_video_name
예제 #15
0
 def open_xml(self, file_data):
     sents = []
     dghs = []
     with open(file_data[0]) as fobj:
         if file_data[2] > str(self.date_hsd.value()):
             return [file_data[3], [{}]]
         xml = fobj.read()
         root = etree.fromstring(xml.encode('utf-8'))
         descs = extract_desc(xml)
         for desc in descs:
             if desc != []:
                 sents = segment(desc).split("\n")
                 # for sent in sents:
                 dghs.append(extract_func(sents[0]))
     return [file_data[3], dghs, file_data[2], file_data[0]]
예제 #16
0
def count_cars(image, **kwargs):
    segments = segment(image)
    padded, segment_val = padded_segments(image,
                                          segments,
                                          list(range(segments.max() + 1)),
                                          mask=kwargs.get("mask", None))
    predictions = model.predict(padded)

    count = 0
    result = image.copy()
    for val, pred in zip(segment_val, predictions):
        result[segments == val] = [255 * pred[1], 255 * pred[0], 0]
        count += pred[0] > threshold

    return (count, result)
예제 #17
0
def spectral_distance(frames):
    """
    Return the spectral distance of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes spectral distance (SD) per segment  
    The output values are stored in 1D numpy array (elements hold the SD of 
    all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
    Tuple composed of
        SD_mag: (numpy array) spectral distance based on module vector
        SD_phs: (numpy array) spectral distance based on phase vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute spectral distance (per segment)
    SD_mag = []
    SD_phs = []
    for i in xrange(1,frames.shape[1]):
        p_frame = frames[:,i-1].copy()
        ps_mag  = np.abs(fft(p_frame)[0:frames.shape[0]])
        ps_phs  = np.angle(fft(p_frame)[0:frames.shape[0]])
        ps_mag /= ps_mag.max()
        ps_phs /= ps_phs.max()
        
        a_frame = frames[:,i].copy()
        as_mag  = np.abs(fft(a_frame)[0:frames.shape[0]])
        as_phs  = np.angle(fft(a_frame)[0:frames.shape[0]])
        as_mag /= as_mag.max()
        as_phs /= as_phs.max()
        
        SD_mag.append(np.sum(abs(ps_mag-as_mag)))
        SD_phs.append(np.sum(abs(ps_phs-as_phs)))
        
    # Return pectral tuple of distance vectors
    return (np.array(SD_mag), np.array(SD_phs))
예제 #18
0
def thumbnail(audio, fs, length, include_self=True, seg_method='regular'):
    # Segment the audio
    segments_in_seconds = seg.segment(audio,
                                      fs,
                                      length=length,
                                      method=seg_method)
    segments = segments_in_seconds * fs

    # Calculate the self-similarity matrix
    num_segments = segments_in_seconds.shape[0]
    similarity = np.zeros((num_segments, num_segments))
    segment_fp = []

    # Pre-compute Shazam fingerprints
    for i in range(0, num_segments):
        cur_start = int(segments[i])
        cur_end = int(segments[i] + (length * fs))
        cur_sound = audio[cur_start:cur_end]
        cur_fp, _, _ = shazam.fingerprint(cur_sound)
        segment_fp.append(cur_fp)

    for i in range(0, num_segments):
        # Calculate similarity (forwards only, for a symmetric measure)
        cur_matches = np.zeros(num_segments)
        for j in range(i, num_segments):

            if include_self is False and i == j:
                continue

            cur_matches[j] = shazam.hash_search(segment_fp[i], segment_fp[j])

        similarity[:, i] = cur_matches

    # Row normalization (after similarity calculation)
    similarity = 0.5 * (similarity.T + similarity)
    similarity = seg.row_normalize(segments, similarity)

    # Identify the thumbnail
    sim_curve = np.sum(similarity, axis=1) / np.sum(similarity)
    thumb_idx = np.argmax(sim_curve)
    thumb_start = int(segments[thumb_idx])
    thumb_end = int(segments[thumb_idx] + (length * fs))
    thumb = audio[thumb_start:thumb_end]

    return thumb, sim_curve, segments_in_seconds, similarity
예제 #19
0
def spectral_flatness(frames):
    """
    Return the spectral flatness of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes spectral flatness (SF) per segment  
    The output values are stored in 1D numpy array (elements hold the SF of 
    all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        SF:     (numpy array) spectral flatness vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute spectral flatness (per segment)
    SF = []
    for i in xrange(0,frames.shape[1]):
        frame  = frames[:,i].copy()
        spect  = abs(fft(frame)[0:frames.shape[0]])**2
        spect /= spect.max()
        
        gMean = np.float64(0)
        aMean = 0 
        for i in range(len(spect)):
            sample = np.float64(spect[i])
            gMean += np.float64(log(sample))
            aMean += sample
        
        gMean /= np.float64(len(spect))
        gMean  = exp(gMean)
        aMean /= float(len(spect))
        SF.append(gMean/aMean)
        
    # Return spectral flatness vector
    return np.array(SF)
예제 #20
0
def resize(im_dir, out_dir, csv_file, unet=None):

    csv_data = pd.DataFrame.from_csv(csv_file, index_col=None)

    if unet:
        unet = SegmentUnet(None, unet)

    for image, row in csv_data.iterrows():

        im_path = join(im_dir, row['image']) + '.jpeg'
        level = 'Healthy' if row['level'] < 2 else 'Diseased'

        im_arr = cv2.imread(im_path)[:, :, ::-1]
        resized_im = imresize(im_arr, (480, 640), interp='bicubic')

        if unet:
            resized_im = segment(resized_im, unet)

        level_dir = make_sub_dir(out_dir, level)
        visualize(resized_im, join(level_dir, row['image']))
예제 #21
0
def spectral_flux(frames):
    """
    Return the spectral flux of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes spectral flux (SF) per segment.  
    The output values are stored in 1D numpy array (elements hold the SF of 
    all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        
    Returns:
        SF:     (numpy array) spectral flux vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute spectral flux (per segments)
    SF = []
    for i in xrange(1,frames.shape[1]):
        p_frame  = frames[:,i-1].copy()
        p_spect  = abs(fft(p_frame)[0:frames.shape[0]])
        p_spect /= p_spect.max()
        
        a_frame  = frames[:,i].copy()
        a_spect  = abs(fft(a_frame)[0:frames.shape[0]])
        a_spect /= a_spect.max()
        
        a_overall = a_spect/np.sum(a_spect)
        p_overall = p_spect/np.sum(p_spect)
        SF.append(np.sum((a_overall - p_overall)**2))
        
    # Return spectral flux vector
    return np.array(SF)
예제 #22
0
def spectral_rolloff(frames, c=0.8):
    """
    Return the spectral roll-off of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes spectral roll-off (SR) per segment  
    The output values are stored in 1D numpy array (elements hold the SR of 
    all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        c:      (float) roll-off factor (default=0.8)
        
    Returns:
        SR:     (numpy array) spectral roll-off vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute spectral roll-off (per segment)
    SR = []
    for i in xrange(0,frames.shape[1]):
        frame  = frames[:,i].copy()
        spect  = abs(fft(frame)[0:frames.shape[0]])
        spect /= spect.max()
        
        [pos,] = np.nonzero(np.cumsum(spect**2) > c*np.sum(spect**2))
        if len(pos) > 0:
            SR.append(np.float64(pos[0])/(float(len(spect))))
        else:
            SR.append(0.0)
            
    # Return spectral roll-off vector
    return np.array(SR)
예제 #23
0
def spectral_spread(frames, Fs):
    """
    Return the spectral spread of an input signal
    
    The input signal can be an 1D numpy array (waveform), or 2D numpy array.
    If the input signal is one-dimensional, function performs a segmentation
    to automatically segment the signal into frames of predefined length and
    overlap. Otherwise, function computes spectral spread (SS) per segment.  
    The output values are stored in 1D numpy array (elements hold the SS of 
    all segments).

    Args:
        frames: (numpy array) input data vector (one/two-dimensional)
        Fs:     (int) sampling rate (frequency)
        
    Returns:
        SS:     (numpy array) spectral spread vector
        
    """
    
    # Perform segmentation if necesary
    frames = seg.segment(frames)
    
    # Compute spectral spread (per segment)
    SS = []
    ID = (np.arange(1,frames.shape[0]+1))*(Fs/(2.0*frames.shape[0]))
    for i in xrange(0,frames.shape[1]):
        frame  = frames[:,i].copy()
        spect  = abs(fft(frame)[0:frames.shape[0]])
        spect /= spect.max()
    
        NUM = np.sum(ID*spect)
        DEN = np.sum(spect)+0.010
        SC  = ((NUM/DEN)/(Fs/2.0))
        SS.append(np.sqrt(np.sum(((ID-SC)**2)*spect)/DEN))

    # Return spectral spread vector
    return np.array(SS)
예제 #24
0
def confusion_matrix(image, model, **kwargs):
	threshold = kwargs.get("threshold", default_threshold)

	segments = segment(image)
	segment_values = range(segments.max() + 1)

	selection = default_selection

	ground_truth = [True if i in selection else False for i in segment_values]

	padded, padded_segment_values = padded_segments(image, segments, segment_values)
	partial_predictions = model.predict(padded)[:,0] > threshold
	predictions = []

	index = 0
	for i in segment_values:
		if i in padded_segment_values:
			predictions.append(partial_predictions[index])
			index += 1
		else:
			predictions.append(False)

	return [[sum(array_and(ground_truth, predictions)), sum(array_and(ground_truth, array_not(predictions)))],
			[sum(array_and(array_not(ground_truth), predictions)), sum(array_not(array_or(ground_truth, predictions)))]]
예제 #25
0
import segmentation
import cv2
import os

# Classes 
CLASSES = ['one','two','three','four','five','six','seven', 'eight','nine','plus','minus','multiplication', 'division1', 'division2', 'period']

def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = os.path.join(folder,filename)
        if img is not None:
            images.append(img)
	print (images)
    return images

def segmentData(input_directory):
	print("5")
    symbols = load_images_from_folder(input_directory)

    for i in range(len(symbols)):
        images = segmentation.segment(symbols[i])

        for thing in range(len(CLASSES)):
            cv2.imwrite(thing + '/' + thing + '_' + str(i) + '.png', images[thing])

segmentData('images')
예제 #26
0
from segment import segment
msg('starting segmentation')
#sys('chmod 777 blackandwhite.jpg')
imageAddress = str(sys.argv[1])
#img = cv.imread(imageAddress)
#img = preprocess(img)
#msg(str(img.shape))
#cv.imwrite('/home/rathod/saving.jpg',img)
msg('removing mitotic')
system('rm -rf ./segmented_data/mitotic/*')
msg('removing nonmitotic')
system('rm -rf ./segmented_data/non_mitotic/*')
msg('removing pickles')
system('rm -rf ./processed_dataset/*')

msg('reading image ' + imageAddress)
n = len(imageAddress)
csvAddress = imageAddress[:n - 3] + "csv"
msg('reading csv')
destinaton = "/var/www/html/Models-Comparison-Cls-Imb/pp/segmented_data/"
misCount = 0
#msg("misCount is "+(str(misCount)))
msg('starting segmentation')
misCount = segment(imageAddress, csvAddress, destinaton)
msg('end')
msg('creating pickle')
create_pickle('./segmented_data/', './processed_dataset/')
f = open('status.xml', 'w')
f.write("<msg>1</msg>")
f.close()
예제 #27
0
sys.path.insert(0, caffe_root + 'python')
import caffe
caffe.set_mode_cpu()

from classify import prediction
import segmentation

if os.path.isdir("segmentation") is not True:
    os.makedirs("segmentation")

input_image = sys.argv[1]
print "image is " + input_image

showname([input_image], 1, "Original Image")

segments = segmentation.segment(input_image)
showname(segments, 3, "Segmentation Results")

#weight = "weights/lenet_iter_3000.caffemodel"

categories = prediction(segments, model, weight)
print categories
"""
result = {}
for i in xrange(len(categories)):
   print i
   print categories[i]
   result[categories[i]] = segments[i]
showwithtitle(result,5)
"""
예제 #28
0
def seg():
    return segmentation.segment(request.json.get('data'), './dic.json',
                                cls_rnn, segmodel, cwsInfo)
예제 #29
0
 # LDA analysis for each user's blog set
 user_blogs = sorted(os.listdir('./data/user_blogs/'))
 with open('./data/user_feature_mean.txt', 'w') as fuser_mean:
     for user in user_blogs:
         # for each user
         blogs = pd.read_csv('./data/user_blogs/' + user,
                             header=None,
                             sep='\001',
                             names=['id', 'title', 'text'])
         # Increase the weight of the title and remove the ellipsis
         texts = [
             re.sub('\.\.+', '.',
                    str(row[1]['title'] * 6 + row[1]['text']).lower())
             for row in blogs.iterrows()
         ]
         corpus = [' '.join(line) for line in segment(texts)]
         # get document-term matrix
         vec = CountVectorizer(stop_words=stop_words)
         vec_tf = vec.fit_transform(corpus)
         # LDA analysis
         n_components = 4
         lda = LatentDirichletAllocation(n_components=n_components,
                                         learning_method='batch',
                                         max_iter=50,
                                         n_jobs=4)
         lda.fit(vec_tf)
         # select 40 words
         words = []
         len_feature = lda.components_.shape[1]
         vec_feature_names = vec.get_feature_names()
         for k in range(n_components):
예제 #30
0
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from segmentation import segment, stop_words
# make_Tfidfvectorizer
def make_idf(corpus):
	vectorizer = TfidfVectorizer(stop_words=stop_words)
	vectorizer.fit_transform(corpus)
	return vectorizer

if __name__ == '__main__':
    with open("./data/blog_article_original.txt", "r", encoding='utf-8') as fblog:
        text = fblog.readlines()
    # segmentation
    corpus = [' '.join(line) for line in segment(text)]
    # make idf.txt
    vec = make_idf(corpus)
    pd.DataFrame({'col1':vec.get_feature_names(), 'col2':vec.idf_}).to_csv("./model/idf.txt", encoding='utf-8', sep=' ', index=None, header=None)
예제 #31
0
def main(dataset, results_path, method='color', k=400):

    segmentation_path = os.path.join(results_path, 'our_segmentation')
    figures_path = os.path.join(results_path, 'figures')
    all_mse = []
    all_jaccard = []
    all_acc = []

    print("{:10} {:20} {:20}".format('Imagen', 'MSE', 'JACCARD'))
    for image_idx in range(dataset.num_images):

        image = dataset.image_names[image_idx]

        # reads the image information from the dataset
        original_image = dataset.get_image_data(image_idx)

        # Gets the mask to avoid dark areas in segmentation
        mask = get_mask(original_image.shape[0:2])
        I = gray2rgb(mask) * original_image
        GT = (rgb2gray(
            dataset.get_ground_truth_data(image_idx).astype(float) * 255) *
              mask) > 0

        #Segment the each mole
        print('Segmenting image {0} ({1} / {2})'.format(
            dataset.image_names[image_idx], image_idx + 1, dataset.num_images))
        Isegmented, LMerged, Islic2, IOtsu, Superpixels = segment(
            I, mask, method=method, k=k)

        auxmse = compare_mse(GT, Isegmented)
        all_mse.append(auxmse)
        aux_jaccard = compare_jaccard(GT, Isegmented)
        aux_acc = 1.0 - np.sum(np.logical_xor(GT, Isegmented)) / float(GT.size)
        all_jaccard.append(aux_jaccard)
        all_acc.append(aux_acc)

        print("Image name, MSE, JACCARD_IDX, ACC")
        print("{:10} {:0.25f} {:0.25f} {:0.25f}".format(
            image, auxmse, aux_jaccard, aux_acc))

        if not os.path.exists(segmentation_path):
            os.makedirs(segmentation_path)
        if not os.path.exists(figures_path):
            os.makedirs(figures_path)

        subplot(2, 3, 1)
        title('Original + Superpixels')
        imshow(Superpixels)
        subplot(2, 3, 2)
        title('Ground Truth')
        imshow(GT, cmap='gray')
        subplot(2, 3, 3)
        title('Our Segmentation')
        imshow(Isegmented, cmap='gray')
        subplot(2, 3, 4)
        title('Labels')
        imshow(LMerged)
        subplot(2, 3, 5)
        title('Merged Superpixels')
        imshow(Islic2)
        subplot(2, 3, 6)
        title('Otsu')
        imshow(IOtsu, cmap='gray')
        savefig(figures_path + '/' + image + '_all.png')

        imsave(segmentation_path + '/' + image + '_our.png',
               255 * Isegmented.astype(int),
               cmap='gray')

        C = np.zeros_like(Isegmented).astype(int)
        a = np.where(np.logical_and(GT, Isegmented))  # TP
        b = np.where(np.logical_and(GT, np.logical_not(Isegmented)))  #FN
        d = np.where(np.logical_and(Isegmented, np.logical_not(GT)))  #FP
        C[a] = 1
        C[b] = 2
        C[d] = 3

        figure()
        title('Seg. comparison')
        imshow(C)
        savefig(figures_path + '/' + image + '_k_{}_seg_comp.png'.format(k))

        figure()
        title('SLIC Segmentation, k = {}'.format(k))
        imshow(Superpixels)
        savefig(figures_path + '/' + image + '_k_{}_seg.png'.format(k))

        figure()
        title('Merged superpixels')
        imshow(Islic2)
        savefig(figures_path + '/' + image + '_k_{}_merged.png'.format(k))

        figure()
        title('Otsu')
        imshow(IOtsu, cmap='gray')
        savefig(figures_path + '/' + image + '_k_{}_otsu.png'.format(k))

    print('jaccard overall: {}'.format(np.mean(np.array(all_jaccard))))
    print('acc. overall: {}'.format(np.mean(np.array(all_acc))))