Exemple #1
0
def process_image(img):
    # Loader.print_image(img)
    # print("[DEBUG] Showing VISUAL denoising algorithm comparison")
    # denoising_comparison(img)
    # print("[DEBUG] Showing HISTOGRAM denoising algorithm comparison")
    # denoising_comparison(img, True)

    # DENOISING IMAGE
    denoised_img = smooth.median_filter(img, 9)
    denoised_img = smooth.median_filter(denoised_img, 7)
    # PRINT DENOISED IMAGE AND HISTOGRAM
    #Loader.print_image(denoised_img)
    # Loader.hist_and_cumsum(denoised_img)

    # thresholding_comparison(denoised_img)
    th_img = thresh.apply_thresholding_algorithm(denoised_img,
                                                 thresh.THRESH_TRIANGLE)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
    stretched = cv2.morphologyEx(th_img, cv2.MORPH_ERODE, kernel)
    back, front = thresh.get_regions(denoised_img, stretched)
    # Loader.hist_compare([back, front], ["Back", "Front"])
    #Loader.print_image(front)
    eq = Loader.equalization(front.astype("uint8"))
    eq = bright_and_contrast(eq, 2.8, 80)
    eq = smooth.gaussian(eq, 2.5)
    Loader.print_image(eq)
    # Loader.hist_and_cumsum(eq)

    # EDGE DETECTION
    #edgesFunctions(eq) # Comparison of different edge detection method
    edges = edg.laplacian_of_gaussian(eq, 2)
    Loader.print_image(edges)
    # Fill the cornea area with white pixels
    dilated = fill_cornea(edges)
    #Loader.print_image(dilated)
    #Calculate distances in the cornea-lens region
    #lineas = dw.find_vertical_lines(dilated)
    #diferencias,posiciones = dw.calculate_differences(lineas)
    #dw.draw_graph_distance(diferencias, posiciones)
    #output_image = dw.lines_image(lineas, img)
    # Surround the córnea area and lens edges with visible and thin line
    (i, contornos, jerarquia) = cv2.findContours(dilated, cv2.RETR_EXTERNAL,
                                                 cv2.CHAIN_APPROX_SIMPLE)
    cnts = []
    for c in contornos:
        if cv2.contourArea(c) < 1000:
            continue
        else:
            cnts.append(c)
    cv2.drawContours(img, cnts, -1, (0, 0, 255), 3)
    #Loader.print_image(img)
    return img
Exemple #2
0
def PowerSpectrum(data):
	# power spectrum
	smooth_data = SmoothFunction(data)
	freqs = numpy.array(numpy.fft.fftfreq(n=len(data), d=dt))/c
	fft = numpy.array(numpy.fft.fft(smooth_data))
	fft = fft * freqs
	spectrum = abs(fft) * abs(fft)
	smooth_spectrum = Smoothing.window_smooth(spectrum, window='hamming', window_len=15)
  	return (freqs,spectrum,smooth_spectrum)
def crossValidationLinearInterpolation(tweetList, k, maxNgram):
    for i in xrange(k):
        trainSet, testSet = divideDataset(tweetList, k, i)
        trainDist, arrayLanguages, languagesAll = utils.obtainNgrams(trainSet, maxNgram)
        linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, trainDist)
        print linearCoefficients
        count = 0
        tot = 0

        for tweet in testSet:
            predictedLanguage, probability = linear.getPredictedLanguageForTweet(linearCoefficients, tweet.text, maxNgram, trainDist)
            utils.printResultTXT(predictedLanguage, tweet)

            if(predictedLanguage == tweet.language):
                count = count + 1;
            tot = tot +1
            # print str(count)+'/'+str(tot)
        print 'correct tweets fold '+str(i)+' = '+str(count)+'/'+str(tot)
def fill_cornea(edge_image):

    k1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 10))
    res1 = cv2.morphologyEx(edge_image, cv2.MORPH_DILATE, k1)
    print("DILATED")
    #Loader.print_image(res1)
    enhance_black = smooth.min_filter(res1, 7)
    print("BLACk")
    #Loader.print_image(enhance_black)
    k2 = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 5))
    res2 = cv2.morphologyEx(enhance_black, cv2.MORPH_ERODE, k2)
    print("eroded")
    #Loader.print_image(res2)
    res3 = smooth.max_filter(res2, 3)
    res3 = smooth.gaussian(res3, 1.5)
    print("dilated")
    #Loader.print_image(res3)

    return res3
def PlotSpans (files, axs, clr):
	cdfs = [CDF(f) for f in files]
	data = [c[0] for c in cdfs]
	data = reduce(operator.add, data)

	hist,edges = numpy.histogram(data, range=(0.0,400.0), bins=200)

  	hist = Smoothing.window_smooth(hist)
  	if clr == 'b':
		plt.bar([i-width/2 for i in range(len(new_bins[:-1]))], hist, width, color=clr, align='center')
	else:
		plt.bar([i+width/2 for i in range(len(new_bins[:-1]))], hist, width, color=clr, align='center')
Exemple #6
0
def getLifespan(file,axs=None):

	cdf = CDF(file)
	# raw data
	data = cdf[0]

	# smooth out the data and digitize above a certain threshold
	smooth = Smoothing.window_smooth(numpy.array(data), window_len=40, window='blackman')
	smooth = smooth - 0.18
	new_data = numpy.ceil(smooth)
  	smooth = smooth + 0.18

	if axs:
		axs.plot(data[1200:2800], linewidth=2.5, color='k', linestyle=':', alpha=0.7, label=r'$C(t)$')
		axs.plot(smooth[1200:2800], linewidth=2.0, color='r', linestyle='-', alpha=0.7, label=r'$C_s(t)$')
		axs.plot(new_data[1200:2800], linewidth=3.5, color='g', linestyle='-', alpha=0.8, label=r'$f(t)$')

  	return [c for c in CutUpCycleBits(new_data)]
def denoising_comparison(input_image, hist: bool = False):
    # NON LOCAL MEAN DENOISING
    nl_means_denoised_img = smooth.denoising_NlMeans(input_image)
    # MEDIAN FILTER DENOISING
    mean_denoised_img = smooth.median_filter(input_image, 9)
    mean_denoised_img = smooth.median_filter(input_image, 9)
    # GAUSSIAN DENOISING
    gaussian_denoised = smooth.gaussian(input_image, 1.5)
    # MINIMUM FILTER
    minimum_denoised = smooth.min_filter(input_image, (5, 5))
    # MAXIMUM FILTER
    maximum_denoised = smooth.max_filter(input_image, (5, 5))

    denoised_imgs = [
        img, gaussian_denoised, mean_denoised_img, nl_means_denoised_img,
        minimum_denoised, maximum_denoised
    ]
    denoised_titles = [
        "Original", "Denoised Gaussian", "Median Filtered", "NL Means Filter",
        "Minimums Filter", "Maximums Filter"
    ]
    Loader.hist_compare(denoised_imgs, denoised_titles, hist)

    return denoised_imgs, denoised_titles
def plot(x,y,wanted_list,groups):
    #print(x[0:5])
    #one graph for each group
    font = {'size': 13}
    plt.rc('font', **font)

    stevec = 0
    for g in groups:
        #if len(g) == 1: plt.plot(x, y[a])
        #else:
        for a in g:
            #plt.plot(x,y[a], label=wanted_list[a])
            yy = [1000000 * m for m in Smoothing.smoothTriangle(y[a],3)]
            el = yy[0]
            yy =  [el,el,el] + yy[:-3]
            #yy =   yy[3:-3]
            #word  = wanted_list[a][0].title() + ' ' + wanted_list[a][1].title()
            word = wanted_list[a].title()
            #if word == 'Dicaprio': word ='DiCaprio'
            #if wanted_list[a][1]=='hiv': word = 'Virus HIV'
            #plt.plot(x,yy,label=wanted_list[a].title())
            yy = yy[6:-10]
            xx = x[6:-10]
            print('lenx',len(x),'len y',len(yy))
            plt.plot(xx,yy,label = word)

        #plt.legend(bbox_to_anchor=(1, 1), loc=0, borderaxespad=1.)
        plt.legend(loc=1)
        if stevec==0:
            plt.text(datetime.datetime(2004, 8, 13, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2004, 8, 13, 0, 0), -110,  'OI Atene',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2000, 9, 15, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2000, 9, 15, 0, 0), -110,  'OI Sydney',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(1998, 2, 7, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(1998, 2, 7, 0, 0), -110,  'OI Nagano',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2006, 2, 10, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2006, 2, 10, 0, 0), -110,  'OI Torino',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2002, 2, 8, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2002, 2, 8, 0, 0), -110,  'OI Salt Lake City',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.xlabel('Leto',labelpad=70)
        elif stevec==1:
            plt.text(datetime.datetime(2001, 1, 20, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2001, 1, 20, 0, 0), -80,  'Izvolitev Busha',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.xlabel('Leto',labelpad=80)
        elif stevec==3:
            plt.text(datetime.datetime(2003, 3, 20, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2003, 3, 20, 0, 0), -310,  'Invazija na Irak',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2001, 10, 7, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2001, 10, 7, 0, 0), -310,  'Invazija na Afganistan',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.xlabel('Leto',labelpad=100)

        elif stevec==20:
            plt.text(datetime.datetime(2001, 11, 23, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2001, 11, 23, 0, 0), -75,  'Primer BSE v SLO',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2006, 2, 12, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2006, 2, 12, 0, 0), -75,  'Primer ptičje gripe v SLO ',horizontalalignment='right', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.xlabel('Leto',labelpad=75)  
        elif stevec==6:
            plt.text(datetime.datetime(2001, 9, 11, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2001, 9, 11, 0, 0), -110,  '11. september',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2004, 9, 3, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2004, 9, 3, 0, 0), -145,  'Beslan',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2004, 3, 11, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2004, 3, 11, 0, 0), -110,  'Madrid',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2005, 7, 7, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2005, 7, 7, 0, 0), -110,  'London',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2002, 10, 23, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2002, 10, 23, 0, 0), -145,  'Moskva',horizontalalignment='left', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(2002, 10, 12, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(2002, 10, 12, 0, 0), -110,  'Bali',horizontalalignment='right', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.text(datetime.datetime(1998, 8, 7, 0, 0), 0, ' |', fontsize=15,weight='heavy')
            plt.text(datetime.datetime(1998, 8, 7, 0, 0), -110,  'Tanzanija',horizontalalignment='center', bbox=dict(facecolor='blue', alpha=0.1),size=23,style='italic',weight='light')
            plt.xlabel('Leto',labelpad=130)
        else:
            plt.xlabel('Leto',labelpad=0)
        #if stevec==4: plt.legend(loc=2)
        #else: plt.legend(loc=1)
        stevec+=1

        plt.gcf().autofmt_xdate()
        plt.ylabel('Število pojavitev na milijon besed',labelpad=50)
        #plt.xlabel('Leto',labelpad=20)

        plt.show()
tweetListPreProcessed_test = preprocess.main(tweetList_test)
# shuffle(tweetListPreProcessed)

# 3-. Algorithms

# 3.1-. Algorithms: Bayesian Networks
#   3.2.1-. Linear interpolation
#       Generate linear coefficients: input (n-grams and language)
#       Smooth data

# cv.crossValidationLinearInterpolation(tweetListPreProcessed_train, 3, maxNgram)
linearCoefficientsAll = list()

trainDist, arrayLanguages, languagesAll = utils.obtainNgrams(tweetListPreProcessed_train, maxNgram)
for gram in xrange(1, maxNgram+1):
    linearCoefficientsAll.append(linear.getlinearcoefficientsForLanguageArray(arrayLanguages, gram, trainDist))

print linearCoefficientsAll

# linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients)


count = 4 # Desde que gram empezar

for i in xrange(count, maxNgram):
    count = count + 1
    t0 = time.time()

    for tweet in tweetListPreProcessed_test:
        # t0 = time.time()
        predictedLanguage, probability = linear.getPredictedLanguageForTweet(linearCoefficientsAll[i], tweet.text, count,
    return outputCanvas


if __name__ == '__main__':

    print("[DEBUG] Load image from local sources")
    # img = Loader.load_image("test/th.jpeg")
    # Loader.print_image(stretched)
    # Loader.print_image(smoothed_thresholded)
    # Loader.print_image(front)

    for i in np.arange(1, 13):
        img = Loader.load_image("im" + i.__str__() + ".jpeg")
        print("Loaded image " + "im" + i.__str__() + ".jpeg")
        # DENOISING IMAGE
        denoised_img = smooth.median_filter(img, 9)
        denoised_img = smooth.median_filter(denoised_img, 7)

        # thresholding_comparison(denoised_img)
        th_img = thresh.apply_thresholding_algorithm(denoised_img,
                                                     thresh.THRESH_TRIANGLE)
        #Suavizamos los bordes marcados por la segmentacion
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))
        stretched = cv2.morphologyEx(th_img, cv2.MORPH_ERODE, kernel)
        smoothed_thresholded = smooth.max_filter(stretched, 5)
        #Obtenemos las areas correspondientes a la imagen original, despues del segmentado
        back, front = thresh.get_regions(denoised_img, stretched)

        IMAGE_TYPE_CLASSIFICATION = define_image_properties(
            smoothed_thresholded)
        if IMAGE_TYPE_CLASSIFICATION == "A":
Exemple #11
0
c = '"En Cada Lucha Aquel Que Va A Muerte Es El Que Gana" Goazen @PasaiaRegional!! #aupaekipo #aupapasaia pic.twitter.com/BQ1ikdE2Qt'


text = preprocess.preprocessText(tweetEU)

# linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, corpusNgrams)
linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients)

linearCoefficients = linearCoefficientsALL[maxNgram-1]
import time
t1 = time.time()
for tweet in tweetListPreProcessedtest:
    t0 = time.time()

    predictedLanguage, probability = linear.getPredictedLanguageForTweet(linearCoefficients, tweet.text, maxNgram, corpusNgrams)
    utils.printResultTXT(predictedLanguage, tweet, 5)

    print "time for tweet= "+str(time.time()-t0)
print "time total= "+str(time.time()-t1)

# sys.stdout.write("\n    Tweet:  "+str(text.encode("utf-8")))
# sys.stdout.write("\n    Tweet language:   "+str(predictedLanguage)+"\n    Probability of:  "+str(probability)+"\n")


# 3.3-. Algorithms: Ranking Methods

# cv.nestedCrossValidation(tweetListPreProcessed,5,5,[0,0,0],arrayLanguagesFull)
# cv.crossValidation(tweetListPreProcessed, 3, [0,0,0], arrayLanguagesFull, maxNgram+1)

# 3.4-. Out-of-place Measure
# _____________________________________________________________________________


# 1-. Read dataset and create tweetList fullfilled of Tweet object*

dataset = sys.argv[1]
maxNgram = int(sys.argv[2])

filename = os.path.basename(dataset).split('.')

tweetList = read.read_tweets_dataset(dataset)

# 2-. Pre-process state
    # Raw data -> tweetList
    # Clean data -> tweetListPreProcessed
tweetListPreProcessed = preprocess.main(tweetList)

# 3-. OBTAIN N-GRAMS and Linear Coefficients

for i in xrange(5, maxNgram+1):
    corpusNgrams, arrayLanguages,arrayLanguagesFull = utils.obtainNgrams(tweetListPreProcessed, i+1)
    linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, i, corpusNgrams)
    # print linearCoefficients
    file = open('../Dataset/LICoefficients_'+str(maxNgram)+'gram_for-'+str(filename[0])+'.txt', 'a+')
    for li in linearCoefficients:
        file.write(str(i)+"\t"+str(li[0]))
        for co in xrange(1, i+1):
            file.write("\t"+str(li[co]))
        file.write("\n")
file.close()
fig = plt.figure(num=None, facecolor='w', edgecolor='w', frameon=True)
axs = fig.add_subplot(1,1,1)
axs.set_ylabel(r'Spectrum', size='xx-large')
axs.set_xlabel(r'Frequency / cm$^{-1}$', size='xx-large')

# use the same axis for all the spectra
freq_axis = FreqAxis(len(tcfs[0])/2,dt)

fft_tcfs_x = [FFT(t) for t in tcfs_x]
fft_tcfs_y = [FFT(t) for t in tcfs_y]
fft_tcfs_z = [FFT(t) for t in tcfs_z]

fft_tcfs = [(x+y+z)/3.0 for x,y,z in zip(fft_tcfs_x,fft_tcfs_y,fft_tcfs_z)]
#avg_fft = reduce(operator.add, fft_tcfs_z)/len(fft_tcfs_z)
avg_fft = FFT(avg_tcf)
smooth = Smoothing.window_smooth(avg_fft)


#for f in fft_tcfs:
	#axs.plot(freq_axis, f, linewidth=2.0)
axs.plot(freq_axis, smooth, linewidth=4.0, linestyle='-', color='k')

vector_tcf_ffts = [FFT(t) for t in vector_tcfs]
avg_vector_fft = numpy.array(reduce(operator.add,vector_tcf_ffts))/len(vector_tcf_ffts)

smooth_vector_fft = Smoothing.window_smooth(avg_vector_fft)
vector_freq_axis = FreqAxis(correlation_tau,dt)
print len(vector_freq_axis)
print len(smooth_vector_fft)
axs.plot(vector_freq_axis, smooth_vector_fft, linewidth=4.0, linestyle='-', color='r')
b = 'Hau ez dakit zer den estamos hablando en un idioma edo beste batean'

c = '"En Cada Lucha Aquel Que Va A Muerte Es El Que Gana" Goazen @PasaiaRegional!! #aupaekipo #aupapasaia pic.twitter.com/BQ1ikdE2Qt'

text = preprocess.preprocessText(tweetEU)

# linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, corpusNgrams)
linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients)

linearCoefficients = linearCoefficientsALL[maxNgram - 1]
import time
t1 = time.time()
for tweet in tweetListPreProcessedtest:
    t0 = time.time()

    predictedLanguage, probability = linear.getPredictedLanguageForTweet(
        linearCoefficients, tweet.text, maxNgram, corpusNgrams)
    utils.printResultTXT(predictedLanguage, tweet, 5)

    print "time for tweet= " + str(time.time() - t0)
print "time total= " + str(time.time() - t1)

# sys.stdout.write("\n    Tweet:  "+str(text.encode("utf-8")))
# sys.stdout.write("\n    Tweet language:   "+str(predictedLanguage)+"\n    Probability of:  "+str(probability)+"\n")

# 3.3-. Algorithms: Ranking Methods

# cv.nestedCrossValidation(tweetListPreProcessed,5,5,[0,0,0],arrayLanguagesFull)
# cv.crossValidation(tweetListPreProcessed, 3, [0,0,0], arrayLanguagesFull, maxNgram+1)

# 3.4-. Out-of-place Measure
Exemple #15
0
def fetch_output(training_file, dev_set):

    # Defines

    tags_file = r'POSTagList.txt'
    delimiter = '\t'
    word_column = 1
    tag_column = 2

    # Initialization

    answers = []

    # Parsing sentences from training data and generating word-tag bigrams

    tag_frequency_count = CountFrequency.give_freq_counts(
        training_file, delimiter, tag_column)
    word_frequency_count = CountFrequency.give_freq_counts(
        training_file, delimiter, word_column)
    sentence_seq_word_list = TPM.construct_sentence_sequence(
        training_file, delimiter, word_column, 0)
    sentence_seq_tag_list = TPM.construct_sentence_sequence(
        training_file, delimiter, tag_column, 0)
    unked_sequence_word_list = Input_Generation.define_training_unk_words(
        word_frequency_count, sentence_seq_word_list)
    word_tag_pairs = EPM.get_epm_bigrams(sentence_seq_tag_list,
                                         unked_sequence_word_list)
    tag_tag_pairs = TPM.get_bigrams(sentence_seq_tag_list)
    vocabulary = set(unked_sequence_word_list)

    # Creating the master parameter list

    # master_a = Smoothing.get_backoff_smoothed_tpm(tags_file, tag_tag_pairs, tag_frequency_count)
    master_a = Smoothing.get_add_k_smoothed_tpm(tags_file, tag_tag_pairs,
                                                tag_frequency_count)
    # master_a = TPM.get_transition_probability_matrix(tags_file, tag_tag_pairs, tag_frequency_count)
    master_b = EPM.get_emission_probability_matrix(tags_file, vocabulary,
                                                   word_tag_pairs,
                                                   tag_frequency_count)
    master_pie_1 = TPM.get_initial_pi_matrix(tags_file, tag_tag_pairs,
                                             unked_sequence_word_list)
    master_pie_2 = TPM.get_end_pi_matrix(tags_file, tag_tag_pairs,
                                         tag_frequency_count)

    # Apply smoothing to Transition probability matrix
    # Generating the list of sentences to be fed

    all_inputs = TPM.construct_sentence_sequence(dev_set, delimiter, 1, 0)

    # Find out the state_sequence_list and observation_sequence_list

    extracted_inputs = Input_Generation.extract_input_sentences_list(
        all_inputs)
    unked_extracted_inputs = Input_Generation.define_extracted_unk_words(
        unked_sequence_word_list, extracted_inputs)
    #extracted_tags = Input_Generation.extract_possible_tags_list(extracted_inputs, word_tag_pairs)

    # loop this in for all sentences
    for index, observation_sequence in enumerate(unked_extracted_inputs):

        if len(observation_sequence):

            state_sequence = ['I', 'O', 'B']

            # construct matrix A
            a = LocalParamters.construct_local_transition(
                state_sequence, master_a)

            # construct matrix B
            b = LocalParamters.construct_local_emission(
                state_sequence, observation_sequence, vocabulary, master_b)

            # construct matrix pie_1
            pie_1 = LocalParamters.construct_local_pie_start(
                state_sequence, master_pie_1)

            # construct matrix pie_2
            pie_2 = LocalParamters.construct_local_pie_end(
                state_sequence, master_pie_2)

            # input it to viterbi
            answer_string = ViterbiDecoding.viterbi_decode(
                observation_sequence, state_sequence, a, b, pie_1, pie_2)

            # fetch the answer strings
            answers.extend(answer_string)
            answers.extend(" ")

    return answers
Exemple #16
0
#import data
#specify sim run
dataDM = np.loadtxt(dmType+'_dm.txt',skiprows=1)


# In[5]:

#cut function call
cutDataMin,radMin,rMinSearch,cutDataMax,radMax,rMaxSearch = Cuts(dataDM)


# In[6]:

#smoothing function call
hMin,hMax = Smoothing(cutDataMin,cutDataMax,rMinSearch,rMaxSearch)


# In[7]:

#density function call
rhoMin,rhoMax = Densities(kernel, cutDataMin, hMin, cutDataMax, hMax)


# In[8]:

#density gradient and laplacian function call
del_rhoMin, delsq_rhoMin, del_rhoMax, delsq_rhoMax = Gradients(del_kernel,delsq_kernel,cutDataMin,rhoMin,                                                             cutDataMax,rhoMax,hMin,hMax)


# In[9]: