def loading_data_by_category(data_path, eng=True, num=True, punc=False):
    # data example : 'title', 'content', 'category', 'nouns'
    # data format : xlsx
    corpus = pd.read_excel(data_path)
    corpus = np.array(corpus)
    title = []
    contents = []
    nouns = set()
    for doc in corpus:
        if type(doc[3]) is not str or type(doc[4]) is not str:
            continue
        if len(doc[3]) > 0 and len(doc[4]) > 0:
            tmptitle = normalize(doc[3],
                                 english=eng,
                                 number=num,
                                 punctuation=punc)
            title.append(tmptitle)

            tmpcontents = normalize(doc[4],
                                    english=eng,
                                    number=num,
                                    punctuation=punc)
            contents.append(tmpcontents)

            for noun in doc[5].split():
                nouns.add(noun)
    return title, contents, nouns
Example #2
0
def main():
    mitStopWords = createMitStopWordList(
    )  # New is in mit stopword list! NEW York!
    #cleanUp_1(extractTxt,sublStopWords)
    cleanUp_2(extractTxt)
    normalize(processedListOfTexts, mitStopWords, nounList)
    processTexts(processedListOfTexts)
Example #3
0
def trainModel(data):
    # Normalize data
    print('Please wait while I normalize the data...', end=' ')
    normalize(data)
    print('Done!\n')
    # Introduce Algorithms
    features = list(data.columns[1:])
    accuracy = validator(data, features)
    print(
        f'Running "Forward Selection" & "Nearest Neighbor" with all {len(features)} features, using "Leaving-One-Out" evaluation, I get an accuracy of',
        '{:.1%}\n'.format(accuracy))
    # Find best features
    searchFeatures(data)
Example #4
0
def highpass(sr, data):
    # b = firwin(101, cutoff = 1000, nyq = sr/2, pass_zero = False)
    # data = lfilter(b, [1.0], data)
    data = data.high_pass_filter(1000)
    # wavfile.write("./musicFiles/testHigh.wav", sr, data.astype(np.int16))
    normalized_sound = normalize(data, -20.0)
    normalized_sound.export("./musicFiles/testHighNorm.wav", format="wav")
Example #5
0
def show_normalized():
    if 'imgFile' not in request.files:
        return 'No selected file'
    file = request.files['imgFile']
    # if user does not select file, browser also
    # submit an empty part without filename
    if file.filename == '':
        return 'No selected file'
    image = request.files['imgFile']
    method = request.form.get('method')
    img_path = 'static/images/image.png'
    image.save(img_path)
    if method == 'k':
        # kumulatif
        title = 'Cumulative'
        imagee = Image.open(img_path)
        new_image = normalize(imagee)
        norm_img_path = 'static/images/normalized_image.png'
        new_image.save(norm_img_path)
    else:
        # scaling
        title = 'Scaling'
        base_image = Image.open(img_path)
        width, height = base_image.size
        normalized_img = scaling(base_image, width, height)
        norm_img_path = 'static/images/normalized_image.png'
        normalized_img.save(norm_img_path)

    return render_template('result.html',
                           title='Normalized Picture (' + title + ')',
                           url_before=img_path + '?' + str(time.time()),
                           url_after=norm_img_path + '?' + str(time.time()))
Example #6
0
def main():
	global scoreOfStates
	initializeScoreDictionary()
	# sentiDictionary=sentiReadWord('SentiWordNet_3.0.0_20100705.txt')
	# bingNegativeWords=bingReadWords('negative-words.txt')
	# bingPositiveWords=bingReadWords('positive-words.txt')
	# print bingPositiveWords
	affinDictionary = affinReadwords('AFINN-111.txt')
	i = 0
	initializeGeoCode()
	print scoreOfStates
	with open("twits.txt") as f:
		for line in f:
			if i == 20:
				break
			# print line
			try:
				TweetInfo = json.loads(line)
				if checkLiesInIndia(TweetInfo) & TweetInfo.has_key('text'):
					i+=1
					listOfTokens= normalize(TweetInfo['text'])
					print listOfTokens
					print ""
					tweetScore = affinCalculateSentiment(listOfTokens,affinDictionary)
					print tweetScore 
					print type(tweetScore)
					print ""
					addTweetScore(TweetInfo,tweetScore)
					print scoreOfStates
					print "done"	
			except:
				print "Exception found"
				pass

	print scoreOfStates
def show_normalized():
    if 'imgFile' not in request.files:
        return json.dumps({'status': 'Error1'})
    file = request.files['imgFile']
    if file.filename == '':
        return json.dumps({'status': 'Error2'})
    image = request.files['imgFile']
    method = request.form.get('method')
    img_path = 'static/images/image.png'
    image.save(app.root_path + '/' + img_path)
    if method == 'k':
        # kumulatif
        title = 'Cumulative'
        imagee = Image.open(app.root_path + '/' + img_path)
        new_image = normalize(imagee, app.root_path)
        norm_img_path = 'static/images/normalized_image.png'
        new_image.save(app.root_path + '/' + norm_img_path)
    else:
        # scaling
        title = 'Scaling'
        base_image = Image.open(app.root_path + '/' + img_path)
        width, height = base_image.size
        normalized_img = scaling(base_image, width, height, app.root_path)
        norm_img_path = 'static/images/normalized_image.png'
        normalized_img.save(app.root_path + '/' + norm_img_path)

    return json.dumps({'url_after': norm_img_path + '?' + str(time.time())})
Example #8
0
def lowpass(sr, data):
    b = firwin(5, cutoff=400, nyq=sr / 2, pass_zero=True)
    data = normalize("./musicFiles/furTest.wav", -20.0)
    data = lfilter(b, [1.0], data.get_array_of_samples())
    data = np.array(data)
    # data = data.low_pass_filter(400)
    # data.export("./musicFiles/testLow.wav", format = "wav")
    wavfile.write("./musicFiles/testLow.wav", sr, data.astype(np.int16))
Example #9
0
def decompose(fds, u):
    new_fds = normalize(fds)
    r = set({u})

    continuer, table_faux, fd_faux = check_condition(fds, r)
    while (continuer):
        r.remove(table_faux)
        closure = improved(fds, fd_faux.prerequis)
        r.add(SetAttr(table_faux.intersection(closure)))
        r.add(SetAttr(table_faux.difference(closure).union(fd_faux.prerequis)))
        continuer, table_faux, fd_faux = check_condition(fds, r)

    # check_condition_debug(fds,r)
    return r
Example #10
0
def get_quantized_features(features, quantization_factor=30):
    normalized_features = normalize(features, axis=1)
    offset = np.abs(np.min(normalized_features))
    offset_features = normalized_features + offset # Making all feature values positive

    # Let's proceed to quantize these positive feature values
    min_val = np.min(offset_features)
    max_val = np.max(offset_features)

    bins = np.linspace(start=min_val, stop=max_val, num=quantization_factor)
    median_values = get_median_values_for_bins(bins)
    original_quantized_features = np.digitize(offset_features, bins)

    quantized_features = np.apply_along_axis(lambda row: map(lambda x: median_values[x], row), 1, original_quantized_features)
    quantized_features = np.floor(quantization_factor*quantized_features)
    return quantized_features
Example #11
0
def tester(listOfTweets,annotation):
	bingNegativeWords=bingReadWords('negative-words.txt')
	bingPositiveWords=bingReadWords('positive-words.txt')
	affinDictionary=affinReadwords('AFINN-111.txt')
	sentiDictionary=sentiReadWord('SentiWordNet_3.0.0_20100705.txt')
	correctCount1=0
	correctCount2=0
	correctCount3=0
	totalNumberOfTweets= len(listOfTweets)
	for i in range(len(listOfTweets)):
		listOfTokens=normalize(listOfTweets[i])
		classified1=bingCalculateSentiment(listOfTokens,bingPositiveWords,bingNegativeWords)
		classified2=affinCalculateSentiment(listOfTokens,affinDictionary)
		sentiback = sentiCalculateSentiment(listOfTokens,sentiDictionary)
		classified3= sentiback[1]
		if classified1!=annotation[i]:
			correctCount1+=1
		if classified2!=annotation[i]:
			correctCount2+=1
		if classified3!=annotation[i]:
			correctCount3+=1

	print "accuracyBing : "+ str(float(correctCount1)/totalNumberOfTweets)+" accuracyAffin : "+ str(float(correctCount2)/totalNumberOfTweets)+" accuracySenti : "+ str(float(correctCount3)/totalNumberOfTweets)
Example #12
0
from addAbsoluteEfficiency import addAbsoluteEfficiency
from addVisibilityGraph import addVisibilityGraph
from fillClearTweet import *
from normalize import *

if __name__ == "__main__":
    print("Start: {0}".format(datetime.datetime.now()))

    connectMongoDB = Connect2MongoDB('localhost', 27017)
    connectMongoDB.setDB('Huelga')
    db = MongoDB(connectMongoDB)
    graph = Neo4jDB(
        Connect2Neo4J(CONST_NEO4J_URI, CONST_NEO4J_USER, CONST_NEO4J_PASSWORD))
    """tweets = db.find(MongoDB.TWEETS_COLLECTION)
    #Inside of MongoDB.find() --> 0 = MongoDB.TWEETS_COLLECTION
    #tweet = db.find(0, getTweet("id_str",'917946195410128897'))



    process = ProcessTweet (db, graph)
    for t in tweets:
        process.process(t)"""

    #fillClearTweet(db, graph)
    addAbsoluteEfficiency(db)
    addVisibilityGraph(db, graph)
    normalize(db)

    graph.connect2Neo4J.closeDB()

    print("\nStop: {0}".format(datetime.datetime.now()))
Example #13
0
    ## Main loop ##

    while (t < NGer):

        ##        if(t == 150):
        ##            weight = array([0.0,1.0,3.0,10.0,5.0]) # Array of weights used for TOPSIS

        Rt = Pt.addPopulation(Qt)  # Combined population
        Rt.fastNonDominatedSort()  # Nondominated sorting
        ##    Rt.topsisPop(rank=rankType) # Rank within each front
        ##    Rt.globalRankEval()

        if (sampleAll):

            RtObj = Rt.obj  # Save original objective values of St
            Zr, a = normalize(Rt, objRec, minim, Z, p)
            ZRef = Zr * a + objRec.objIdeal
            associate(Rt, Zr, 0)
            ##            distPareto(Rt,ZRef,objRec.objIdeal,a)
            hvContribution(Rt, objRec.objIdeal, a)
            indRemove = niching(NPop, Rt, array([0, 2 * NPop]), weight,
                                multiple)
            Pt = Rt.removeMembers(indRemove, RtObj)

        else:

            indList = zeros(
                NPop,
                dtype=int)  # List of indexes of members to the next population
            i = 0  # Counter of fronts
            sizeEvol = array([
Example #14
0
def calculate(review, sentiDictionary):
    listOfTokens = normalize(review)
    return sentiCalculateSentiment(listOfTokens, sentiDictionary)
Example #15
0
def calculate(review,sentiDictionary):
	listOfTokens=normalize(review)
	return sentiCalculateSentiment(listOfTokens,sentiDictionary)
Example #16
0
if (algo == "-generate"):
    print("generate", file=log)
    if (len(sys.argv) != 3):
        print("Too much arguments for generate", file=sys.stderr)
    generate(int(sys.argv[2]))

else:
    if (sys.argv[2][0] != '-'):
        fds = parseFD(False, sys.argv[2])
    else:
        fds = parseFD(True)

    if (algo == "-normalize"):
        print("normalize", file=log)
        print("Normalize:", normalize(fds))

    elif (algo == "-decompose"):
        print("decompose", file=log)
        print("Decompose:", decompose(fds, schema(fds)))

    else:
        if (len(sys.argv) != 4):
            print("Wrong number of arguments, 4 expected", file=sys.stderr)

        setAttr = SetAttr("".join(sys.argv[3:]))

        if (algo == "-improved"):
            print("improved", file=log)
            print("Improved:", improved(fds, setAttr))
Example #17
0
def runMOMCEDA(NPop,
               NEval,
               function,
               Nref,
               nReps,
               RTPlot,
               refPoint,
               weight,
               seed=None):

    print 'Running MOMCEDA\n'

    NGer = NEval / NPop - 1  # Number of generations
    NObj = 2  # Number of objectives to optimize
    minim = 1  # minim = 1 if minimizing objectives, minim = 0 otherwise
    p = Nref - 1  # Number of objective axes divisions to generate structured ref. points

    random.seed(seed)

    if (function == 'ZDT4'):
        Nvar = 10
        Vmin = append(0, -5 * ones(Nvar - 1))  # Limits of chromosome values
        Vmax = append(1, 5 * ones(Nvar - 1))
        sigma = append(1.0, 0.1 * ones(Nvar - 1))  # Mutation parameter
    else:
        Nvar = 30
        if (function == 'ZDT6'):
            Nvar = 10
        Vmin = 0.0 * ones(Nvar)  # Limits of chromosome values
        Vmax = 1.0 * ones(Nvar)
        sigma = 1.0 * ones(Nvar) / 2.0  # Mutation parameter

    minObj = array([0.0, 0.0])  # Limits of objective values
    maxObj = array([1.0, 1.0])

    objRec = objectiveRecords(NObj, minim)  # Records for objective values

    Z = generateRefPoints(NObj, p)  # Generate structured reference points
    rankType = 'hv'  # Type of rank used for TOPSIS

    multiple = True  #    multiple: use multiple criteria to select solutions from the last front
    sampleAll = True  #sampleAll: if true, all members from parent population are sampled with the TOPSIS rank

    hvValues = zeros((nReps, NGer))
    conv = []
    finalPop = []
    extime = []

    # Plot parameters
    color = plt.cm.get_cmap('Reds')
    deltac = 0.3
    ObjNames = ['f1', 'f2', 'f3']
    scale = 1.0 / (maxObj - minObj)
    center = array([0.0, 0.0, 0.0])
    countFig = 0  # counter of figures

    ## Offspring parameters

    pMut = 1.0 / Nvar  # Mutation probability
    pSwitch = 0.5  # Probability to switch variables between members
    spread = 0.5  # Parameter to control the spread of generated members
    nc = 30
    nm = 20

    ##sigma = append(1.0,0.1*ones(Nvar-1))
    ##sigma0 = r_[array([1.0]),1.0/10*ones(Nvar-1)]
    ##sigmaf = r_[array([1.0/10]),1.0/500*ones(Nvar-1)]
    ###deltaSigma = (sigma0 - sigmaf) / NGer
    ##qSigma = (sigmaf/sigma)**(1.0/NGer)
    ##sigma = sigma0

    spreadf = 0.05
    qSpread = (spreadf / spread)**(1.0 / NGer)
    deltaSpread = (spread - spreadf) / NGer

    dist = zeros((NGer, Nvar))

    ## Distribution variables

    # Coeffients of the gaussians of the mixture

    ##choiceOK = False
    ##while(not choiceOK):
    ##    dec = input('Choose decay type:\n(1) Linear\n(2) Exponential\n(3) Logarithmic\n')
    ##    if(dec in [1,2,3]):
    ##        choiceOK = True

    dec = 1  # decay type:1 - Linear, 2 - Exponential, 3 - Logarithmic
    coefGau = calc_coefGau(NPop, dec)

    with open(''.join(['../dev/pareto_front/zdt', function[3],
                       '_front.json'])) as optimal_front_data:
        optimal_front = json.load(optimal_front_data)

    for nExec in arange(nReps, dtype=int):

        start = time.time()

        print 'Starting execution %d ...' % (nExec + 1)

        ## Initialization ##

        t = 0  # Counter of generations
        Pt = Population(NPop, Nvar, Vmin, Vmax, NObj, minim,
                        function)  # Initial Population
        Pt.fastNonDominatedSort()  # Nondominated sorting
        Pt.topsisPop()  # Rank within each front
        Qt = Population(NPop, Nvar, Vmin, Vmax, NObj, minim,
                        function)  # Offspring population

        if (RTPlot):
            #plt.ion()
            plt.figure(figsize=(12, 12))
            plt.title('Population at execution %d' % (nExec + 1), fontsize=18)
            f = array(optimal_front)
            plt.plot(f[:, 0], f[:, 1], color='b', label='Pareto front')
            plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=18)
            #plt.draw()

        ## Main loop ##

        while (t < NGer):

            ##        if(t == 150):
            ##            weight = array([0.0,1.0,3.0,10.0,5.0]) # Array of weights used for TOPSIS

            Rt = Pt.addPopulation(Qt)  # Combined population
            Rt.fastNonDominatedSort()  # Nondominated sorting
            ##    Rt.topsisPop(rank=rankType) # Rank within each front
            ##    Rt.globalRankEval()

            if (sampleAll):

                RtObj = Rt.obj  # Save original objective values of St
                Zr, a = normalize(Rt, objRec, minim, Z, p)
                ZRef = Zr * a + objRec.objIdeal
                associate(Rt, Zr, 0)
                ##            distPareto(Rt,ZRef,objRec.objIdeal,a)
                hvContribution(Rt, objRec.objIdeal, a)
                indRemove = niching(NPop, Rt, array([0, 2 * NPop]), weight,
                                    multiple)
                Pt = Rt.removeMembers(indRemove, RtObj)

            else:

                indList = zeros(
                    NPop, dtype=int
                )  # List of indexes of members to the next population
                i = 0  # Counter of fronts
                sizeEvol = array([
                    0, len(Rt.fronts[i])
                ])  # Evolution of population's size by adding the fronts

                # Fill population with the first fronts
                while (sizeEvol[i + 1] <= NPop):
                    #         Rt.crowd(Rt.fronts[i]) = Rt.crowdingDistanceAssignment(Rt.fronts[i],minObj,maxObj)
                    indList[sizeEvol[i]:sizeEvol[i + 1]] = Rt.fronts[
                        i]  # Add members to the list
                    i = i + 1
                    sizeEvol = append(sizeEvol,
                                      sizeEvol[i] + len(Rt.fronts[i]))

                # Sort members of the last front according to
                # crowding distance
            #     Rt.crowd[Rt.fronts[i]] = Rt.crowdingDistanceAssignment(Rt.fronts[i],minObj,maxObj)
            #     ind = argsort(Rt.crowd[Rt.fronts[i]])[::-1]

            # Sort members of the last front according to the rank
            #     ind = argsort(Rt.rank[Rt.fronts[i],1])

                listSt = r_[indList[:sizeEvol[i]], Rt.fronts[i]]
                St = Rt.addMembers(listSt)

                K = NPop - sizeEvol[i]
                StObj = St.obj  # Save original objective values of St
                Zr, a = normalize(St, objRec, minim, Z, p)
                ZRef = Zr * a + objRec.objIdeal
                associate(St, Zr, sizeEvol[i])
                ZRef = Zr * a + objRec.objIdeal
                ##    distPareto(St,ZRef,objRec.objIdeal,a)
                hvContribution(St, objRec.objIdeal, a)
                indRemove = niching(K, St, sizeEvol, weight, multiple)
                Pt = St.removeMembers(indRemove,
                                      StObj)  # Next generation's population

            if (RTPlot):
                Pt.plot(color((1 - deltac) * float(t) / NGer + deltac), scale,
                        center, ObjNames, countFig)
                #axes = plt.gca()
                #axes.set_ylim([0,1])
            Pt.topsisPop(weight, rank=rankType)  # Rank within each front
            Qt = Pt.offspringPop(
                coefGau, sigma, pMut, spread,
                pSwitch)  # Selection, recombination and mutation

            ##    dist[t] = maxDist(Pt.members)/(1*(Vmax - Vmin))
            ##    sigma = dist[t]
            ##        sigma = random.rand()*append(10.0,1.0*ones(Nvar-1))

            ##    MEv = sqrt(sum(Pt.members[:,1:]**2)/(NPop*(Nvar-1)))
            ##    print 'Generation = ', t, 'Mean Square Value =', MEv

            hv = HyperVolume(refPoint)
            hvValues[nExec, t] = hv.compute(Pt.obj)

            t = t + 1
            ##    sigma = sigma - deltaSigma
            ##        sigma = sigma*qSigma

            ##        spread = spread*qSpread
            spread = spread - deltaSpread

        end = time.time()
        extime.append(end - start)

        conv.append(convergence(Pt.obj.tolist(), optimal_front))

        print 'Hypervolume = ', hvValues[nExec, -1]
        print 'Convergence metric = ', conv[nExec]
        print 'Execution ', nExec + 1, ' completed in ', extime[
            -1], ' seconds \n'

        ##    normIgd,Zint = normIGDmetric(ZRef,objRec.objIdeal,a,Pt.obj,function)
        ##    igd = IGDmetric(ZRef,objRec.objIdeal,Pt.obj,function)[0]

        ##    normIgd2,Zint = normIGDmetric2(ZRef,objRec.objIdeal,a,Pt.obj,function)
        ##    igd2 = IGDmetric2(ZRef,objRec.objIdeal,Pt.obj,function)[0]

        ##    print 'NormIGD = {0:e}'.format(normIgd)
        ##    print 'IGD = {0:e}'.format(igd)
        ##    print 'NormIGD2 = {0:e}'.format(normIgd2)
        ##    print 'IGD2 = {0:e}'.format(igd2)

        ##    with open(''.join(['Pareto/Prt_',function,'.pk1']), 'r') as filename:
        ##        f = pickle.load(filename)
        ##step = len(f)/50
        ##plt.scatter(f[::step,0],f[::step,1],s=1,color='b')

        if (RTPlot):
            Pt.plot(color((1 - deltac) * float(t) / NGer + deltac), scale,
                    center, ObjNames, countFig)
            #axes = plt.gca()
            #axes.set_ylim([0,1])
            #plt.draw()
            #plt.ioff()
            plt.show()
            #plt.savefig(''.join(['../figures/',function,'.png']), bbox_inches='tight')

        countFig = countFig + NObj * (NObj - 1) / 2

        finalPop.append(Pt)

##    if(nReps == 1):
##        MEv = sqrt(sum(Pt.members[:,1:]**2)/(NPop*(Nvar-1)))
##        print 'Generation = ', t, 'Mean Square Value =', MEv
##
##        print 'rho =', Pt.rho
##        print 'minObj=', objRec.objIdeal

##refPoint = objRec.extPoints.max(axis=0)*1.1
##print 'refPoint: ',refPoint
##        hv = HyperVolume(refPoint)
##        print 'hypervolume=', hv.compute(Pt.obj)

##ind = where((St.obj[:,0] != 0) & (St.obj[:,1]!=0))[0]
##j = random.choice(ind)
##a = (StObj[j] - objRec.objIdeal) / St.obj[j]
##ZPlot = Z*a + objRec.objIdeal
##
##for i in arange(0,len(Z),len(Z)/10):
##    plt.plot(vstack((objRec.objIdeal[0],ZPlot[i,0])),vstack((objRec.objIdeal[1],ZPlot[i,1])),'-',color='k')
##plt.plot(vstack((objRec.objIdeal[0],ZPlot[-1,0])),vstack((objRec.objIdeal[1],ZPlot[-1,1])),'-',color='k')
##inter = diag(a) + objRec.objIdeal
##plt.plot(inter[:,0],inter[:,1],'-',color='b')

##inter = diag(a) + objRec.objIdeal
##plt.plot(inter[:,0],inter[:,1],'-',color='b')

##plt.show()
##        axes = plt.gca()
##        axes.set_ylim([0,1])
##
##        plt.savefig(''.join(['../figures/',function,'.png']), bbox_inches='tight')

##        plt.figure(2)
##        plt.plot(hvValues)
##        plt.savefig(''.join(['../figures/HV_',function,'.png']), bbox_inches='tight')

##        # Save Population
##        with open(''.join(['../dev/files/Pop_',function,'_MOMCEDA','.pk1']), 'wb') as output:
##            pickle.dump(finalPop, output, pickle.HIGHEST_PROTOCOL)
##
##        # Save Hypervolume
##        with open(''.join(['../dev/files/HV_',function,'_MOMCEDA','.pk1']), 'wb') as output:
##            pickle.dump(hvValues, output, pickle.HIGHEST_PROTOCOL)
##
##        # Save Elapsed time
##        with open(''.join(['../dev/files/time_',function,'_MOMCEDA','.pk1']), 'wb') as output:
##            pickle.dump(extime, output, pickle.HIGHEST_PROTOCOL)

    print 'Average hypervolume=', mean(hvValues[:, -1])
    print 'Best hypervolume=', max(hvValues[:, -1])

    ##        plt.figure(countFig+1,figsize=(12, 12))
    ##        plt.title('Average hypervolume evolution for %s problem' %(function), fontsize=18)
    ##        plt.xlabel('Generations', fontsize=18)
    ##        plt.ylabel('Average hypervolume', fontsize=18)
    ##        plt.plot(arange(1,NGer+1),hvValues.mean(axis=0))
    ##        plt.savefig(''.join(['../figures/meanHV_',function,'.png']), bbox_inches='tight')

    # Save Population
    with open(''.join(['../dev/files/Pop_', function, '_MOMCEDA', '.pk1']),
              'wb') as output:
        pickle.dump(finalPop, output, pickle.HIGHEST_PROTOCOL)

    # Save Hypervolume
    with open(''.join(['../dev/files/HV_', function, '_MOMCEDA', '.pk1']),
              'wb') as output:
        pickle.dump(hvValues, output, pickle.HIGHEST_PROTOCOL)

    # Save Elapsed time
    with open(''.join(['../dev/files/time_', function, '_MOMCEDA', '.pk1']),
              'wb') as output:
        pickle.dump(extime, output, pickle.HIGHEST_PROTOCOL)

    # Save Convergence
    with open(''.join(['../dev/files/conv_', function, '_MOMCEDA.json']),
              'w') as outfile:
        json.dump(conv, outfile)

    print '\nMOMCEDA finished all experiments\n'
Example #18
0
    g_init = g1 + g2
    fit_g = fitting.LevMarLSQFitter()
    g = fit_g(g_init, x, y)
    print(g)
    xvals = np.linspace(min(x), max(x), 1000)
    return xvals, g(xvals)+1


if __name__=="__main__":
    min_wl = 5390
    max_wl = 5540
    #filename = "lrisShri_galA.spec" # (07:53:50, +42:42:22)
    filename = "lrisAnna_gal_08.0.spec"
    wl, flux, err = load_data(filename) 
    ivar = 1/err**2
    n_flux, n_ivar = normalize(wl, flux, ivar, L=40)
    choose = np.logical_and(wl > min_wl, wl < max_wl)
    xvals, gaus = fit_gaussian(wl[choose], n_flux[choose]-1, 5460)

    #plot_spectrum(wl, flux, ivar, min_wl, max_wl)
    plt.step(wl, n_flux, where='mid', c='k', lw=0.5)
    plt.step(xvals, gaus, where='mid', c='r', lw=1)
    plt.xlabel("Wavelength", fontsize=16)
    plt.ylabel("Flux", fontsize=16)
    plt.tick_params(axis='x', labelsize=20)
    plt.tick_params(axis='y', labelsize=20)
    plt.xlim(min_wl, max_wl)
    plt.ylim(-1, 7)
    plt.tight_layout()
    #plt.show()
    plt.savefig("gal2_fit_oii.png")
Example #19
0
def main():
    mitStopWords = createMitStopWordList() # New is in mit stopword list! NEW York!
    #cleanUp_1(extractTxt,sublStopWords)
    cleanUp_2(extractTxt)
    normalize(processedListOfTexts,mitStopWords,nounList)
    processTexts(processedListOfTexts)
Example #20
0
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import VotingClassifier
from scipy import stats
from numpy import sqrt

from read_data import *
from normalize import *

X, y = read_data("../abalone.data")

X = normalize(X, 7)
X = normalize(X, 3)

net = MLPClassifier(verbose=False,
                    activation='logistic',
                    validation_fraction=0.33,
                    hidden_layer_sizes=(5, ),
                    early_stopping=True,
                    learning_rate='constant',
                    learning_rate_init=0.2,
                    max_iter=500,
                    momentum=0.9)

SVM = svm.SVC()
GNB = GaussianNB()
Example #21
0
# do for each file in the collection

for title,doc in docs.iteritems():
    ## tokenize SGM doc to a list

    print "Title:" ,title
    
    #terms = re.split('\s|(?<!\d)[,.](?!\d)', doc) # same as nltk.word_tokenize()
    terms = nltk.word_tokenize (doc)
    
    # print(terms)
    terms = [t for t in terms if (not t in empty_words)]

    ## normalize words
    normalized_words = normalize(terms)
    print("NORMALIZE---------------------------")
    pprint.pprint(normalized_words)

    ## stemming
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in normalized_words]

    ## build inverted index, docID would stand for each Reuters doc

    print("STEMMER---------------------------")
    pprint.pprint(tokens)

    # put in index
    print("INVERTED INDEX---------------------------")
    for token in tokens:
Example #22
0
    with open(''.join(['results/Pop_ZDT', i, '_', algo, '.pk1']),
              'r') as filename:
        Pop = pickle.load(filename)

    nreps = len(Pop)

    igdValues = np.zeros(nreps)
    normIgdValues = np.zeros(nreps)
    function = Pop[0].function

    for run in np.arange(nreps):

        Pt = Pop[run]
        objRec.objIdeal = Pt.obj.min(axis=0)
        Zr, a = normalize(Pt, objRec, minim, Z, p)
        ZRef = Zr * a + objRec.objIdeal

        ##    print ZRef

        normIgd, Zint = normIGDmetric(ZRef, objRec.objIdeal, a, Pt.obj,
                                      function)
        igd = IGDmetric(ZRef, objRec.objIdeal, Pt.obj, function)[0]

        normIgdValues[run] = normIgd
        igdValues[run] = igd
##
##        print 'Execution',run+1
##        print 'NormIGD = {0:e}'.format(normIgd)
##        print 'IGD = {0:e}'.format(igd)
Example #23
0
from normalize import *

words= ['U.S.',
 'warships',
 '.',
 '``',
 'It',
 "'s",
 'removal',
 'will',
 'contribute',
 'significantly']

normalized_words = normalize(words)
pprint.pprint(normalized_words)
'''Draw plot for voltage showing the fluctuation'''
figure(1)
[plt,fig]=create_plot_options('Voltage vs Time','Time','Voltage','V')
plt.plot(time,data["VLN"][:minimum-1])
#plt.show()

'''Draw real and normalized power and add the capability to zoom the two together'''
figure(2)
'''ax2 = figure(2).add_subplot(211)
ax3 = figure(2).add_subplot(212, sharex=ax2)'''


  # share ax1's xaxis

data["N_W3"]=normalize(data[active_power],data["VLN"])
data["N_VAR3"]=normalize(data[reactive_power],data["VLN"])
[plt,fig]=create_plot_options('Normzalized Power vs Time','Time','Power','W')
plt.plot(time,data["N_W3"])
plt.plot(time,data["N_VAR3"])
plt.legend(('Active Power','Reactive Power'),'upper right')
'''ax2.plot(time,data["N_W3"])
ax3.plot(time,data["N_VAR3"])'''

#plt.show()

'''Removing transients'''
'''
i=1
while i<len(data["N_W3"]):
    if (math.fabs(data["N_W3"][i]-data["N_W3"][i-1])>100 and math.fabs((data["N_W3"][i+1]+data["N_W3"][i+2])/2-data["N_W3"][i])>100):
Example #25
0
    3: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
}

#obtaining necessary inputs and casting to int type
# number_of_input_neurons = int(input("Enter the number of neurons in the i/p layer: "))
# number_of_hidden_neurons = int(input("Enter the number of hidden neurons: "))
# number_of_output_neurons = int(input("Enter the number of neurons in the o/p layer: "))

number_of_input_neurons = 784
number_of_hidden_neurons = 784
number_of_output_neurons = 10
learning_rate = int(input("Enter the learning rate: "))

#obtain specific network input
pixels, labels = read_input()
pixels_normalized = normalize(pixels)
#print(pixels_normalized)

#initialize weights in the network
input_to_hidden_layer_wts, hidden_to_output_layer_wts = initialize_weights(
    number_of_input_neurons, number_of_hidden_neurons,
    number_of_output_neurons)

error_over_time = []

for i in range(10):

    collective_forward_pass_output = []
    for i in range(len(labels)):
        #obtain output of the forward pass
        hidden_layer_input, hidden_layer_output, forward_pass_output = forward_propagate(
        if Y_train[i]==1:
            mu1 += X_train[i]
            cnt1 += 1
        else:
            mu2 += X_train[i]
            cnt2 += 1
    mu1 /= cnt1
    mu2 /= cnt2

    # variances of Gaussian Models
    sigma1 = np.zeros((106, 106))
    sigma2 = np.zeros((106, 106))
    for i in range(train_data_size):
        if Y_train[i]==1:
            sigma1 += np.dot(np.transpose([X_train[i]-mu1]),[X_train[i]-mu1])
        else:
            sigma2 += np.dot(np.transpose([X_train[i]-mu1]),[X_train[i]-mu1])
    sigma1 /= cnt1
    sigma2 /= cnt2
    shared_sigma = (float(cnt1)/train_data_size)*sigma1+(float(cnt2)/train_data_size)*sigma2
    N1 = cnt1
    N2 = cnt2

    valid(X_valid,Y_valid,mu1,mu2,shared_sigma,N1,N2)

if __name__=='__main__':
    train_data_path = r'./data/X_train.csv'
    train_label_path = r'./data/Y_train.csv'
    X_all, Y_all = load_data(train_data_path, train_label_path)
    X_all = normalize(X_all)
    train(X_all,Y_all)
Example #27
0

#open the test list file  
testlist = open("testList","r")

#delete the testlist from the dictionary 
for test in testlist:
    if test[0] == '#': 
       del testfunction[test[1:].strip()]


if sys.argv[1] == "-n":
    dicList = []
    for filename in glob.glob(os.path.join(path_normalize, '*.yaml')):
        dicList.append(yaml.load(open(filename)))
    normalize(testfunction,dicList)
    save(dicList)

if sys.argv[1] == "-r":
    dic_ideal = {}
    for key,value in testfunction.iteritems():
        value(dic_ideal)
    filelist=[]
    heading_list = ["SL NO", "TOOLS", "TopDimention", "Category", "Scenario", "Testcase"]
    generateReport("Consolidated.xlsx", "Consolidate", testfunction, heading_list)
   
    pos = len(heading_list) + 1
    size = len(heading_list) + 1
    for filename in glob.glob(os.path.join(path, '*.yaml')):
        filelist.append(filename)
    filelist.sort()
Example #28
0
    def main(self):

        # Read data
        print('Reading data...\n')
        filename1 = '_data.csv'
        filename2 = 'ratings.dat'
        if not exists(filename1):
            if not exists(filename2):
                print('Error: Please add file', filename2, 'into the path!')
                exit(1)
            else:
                create_file(filename2, filename1)
        A = loadtxt(filename1, delimiter=',')

        # Initialize variables
        no_user = num_user(A)
        no_movie = num_movie(A)
        B = shuffle(A)

        # Set parameters
        k_set = [1, 3]
        fold_set = [3, 4]
        rmse = zeros((len(fold_set), len(k_set)))
        ratings_round = False

        # Main algorithm
        for ff in range(len(fold_set)):
            num_fold = fold_set[ff]
            print(str(num_fold) + '-fold Cross Validation begins.\n')
            num_test = int(floor(100000/num_fold))
            num_train = 100000 - num_test

            for kk in range(len(k_set)):
                k = k_set[kk]
                print('Reducing dimensions to', k, '.')
                error_each_fold = zeros((num_fold,1))

                for i in range(num_fold):
                    print('Fold ' + str(i+1) + '. Splitting train/test...')
                    tr, tt = train_test(B, i, num_test)
                    u, v = id_map(B)

                    # Build matrix R in the paper
                    print('Building matrix R...')
                    R_raw = build_matrix(tr, u, v, num_test, no_user, no_movie)
                    R_filled = fill_matrix(R_raw)
                    m = column_mean(R_filled)
                    R = normalize(R_filled, m)

                    # Dimensionality Reduction
                    print('Dimensionality Reduction...')
                    U, S, V = svd(R, full_matrices=False)
                    Ss = copy(S[0:k])
                    Sk = S_to_matrix(Ss)
                    Uk = copy(U[:, 0:k])
                    Vk = copy(V[0:k, :])
                    sqrt_Sk = sqrt(Sk)
                    US = dot(Uk,transpose(sqrt_Sk))
                    SV = dot(sqrt_Sk, Vk)

                    # Predict the ratings
                    print('Predicting ratings...')
                    pr = predict_ratings(US, SV, m, u, v, tt, num_test)
                    if ratings_round == True:
                        pr = round_array(pr)
                    pr_trim = trim_rating(pr)

                    # Find error
                    print('Calculating error...')
                    real = copy(tt[:, 2])
                    error = pr_trim - real
                    error_each_fold[i] = sqrt(sum(error**2)/num_test)
                    print('End one fold.\n')

                rmse[ff, kk] = mean(error_each_fold)

        savetxt("_rmse.csv", rmse, fmt='%.4f', delimiter=",")
        print(rmse)