def loading_data_by_category(data_path, eng=True, num=True, punc=False): # data example : 'title', 'content', 'category', 'nouns' # data format : xlsx corpus = pd.read_excel(data_path) corpus = np.array(corpus) title = [] contents = [] nouns = set() for doc in corpus: if type(doc[3]) is not str or type(doc[4]) is not str: continue if len(doc[3]) > 0 and len(doc[4]) > 0: tmptitle = normalize(doc[3], english=eng, number=num, punctuation=punc) title.append(tmptitle) tmpcontents = normalize(doc[4], english=eng, number=num, punctuation=punc) contents.append(tmpcontents) for noun in doc[5].split(): nouns.add(noun) return title, contents, nouns
def main(): mitStopWords = createMitStopWordList( ) # New is in mit stopword list! NEW York! #cleanUp_1(extractTxt,sublStopWords) cleanUp_2(extractTxt) normalize(processedListOfTexts, mitStopWords, nounList) processTexts(processedListOfTexts)
def trainModel(data): # Normalize data print('Please wait while I normalize the data...', end=' ') normalize(data) print('Done!\n') # Introduce Algorithms features = list(data.columns[1:]) accuracy = validator(data, features) print( f'Running "Forward Selection" & "Nearest Neighbor" with all {len(features)} features, using "Leaving-One-Out" evaluation, I get an accuracy of', '{:.1%}\n'.format(accuracy)) # Find best features searchFeatures(data)
def highpass(sr, data): # b = firwin(101, cutoff = 1000, nyq = sr/2, pass_zero = False) # data = lfilter(b, [1.0], data) data = data.high_pass_filter(1000) # wavfile.write("./musicFiles/testHigh.wav", sr, data.astype(np.int16)) normalized_sound = normalize(data, -20.0) normalized_sound.export("./musicFiles/testHighNorm.wav", format="wav")
def show_normalized(): if 'imgFile' not in request.files: return 'No selected file' file = request.files['imgFile'] # if user does not select file, browser also # submit an empty part without filename if file.filename == '': return 'No selected file' image = request.files['imgFile'] method = request.form.get('method') img_path = 'static/images/image.png' image.save(img_path) if method == 'k': # kumulatif title = 'Cumulative' imagee = Image.open(img_path) new_image = normalize(imagee) norm_img_path = 'static/images/normalized_image.png' new_image.save(norm_img_path) else: # scaling title = 'Scaling' base_image = Image.open(img_path) width, height = base_image.size normalized_img = scaling(base_image, width, height) norm_img_path = 'static/images/normalized_image.png' normalized_img.save(norm_img_path) return render_template('result.html', title='Normalized Picture (' + title + ')', url_before=img_path + '?' + str(time.time()), url_after=norm_img_path + '?' + str(time.time()))
def main(): global scoreOfStates initializeScoreDictionary() # sentiDictionary=sentiReadWord('SentiWordNet_3.0.0_20100705.txt') # bingNegativeWords=bingReadWords('negative-words.txt') # bingPositiveWords=bingReadWords('positive-words.txt') # print bingPositiveWords affinDictionary = affinReadwords('AFINN-111.txt') i = 0 initializeGeoCode() print scoreOfStates with open("twits.txt") as f: for line in f: if i == 20: break # print line try: TweetInfo = json.loads(line) if checkLiesInIndia(TweetInfo) & TweetInfo.has_key('text'): i+=1 listOfTokens= normalize(TweetInfo['text']) print listOfTokens print "" tweetScore = affinCalculateSentiment(listOfTokens,affinDictionary) print tweetScore print type(tweetScore) print "" addTweetScore(TweetInfo,tweetScore) print scoreOfStates print "done" except: print "Exception found" pass print scoreOfStates
def show_normalized(): if 'imgFile' not in request.files: return json.dumps({'status': 'Error1'}) file = request.files['imgFile'] if file.filename == '': return json.dumps({'status': 'Error2'}) image = request.files['imgFile'] method = request.form.get('method') img_path = 'static/images/image.png' image.save(app.root_path + '/' + img_path) if method == 'k': # kumulatif title = 'Cumulative' imagee = Image.open(app.root_path + '/' + img_path) new_image = normalize(imagee, app.root_path) norm_img_path = 'static/images/normalized_image.png' new_image.save(app.root_path + '/' + norm_img_path) else: # scaling title = 'Scaling' base_image = Image.open(app.root_path + '/' + img_path) width, height = base_image.size normalized_img = scaling(base_image, width, height, app.root_path) norm_img_path = 'static/images/normalized_image.png' normalized_img.save(app.root_path + '/' + norm_img_path) return json.dumps({'url_after': norm_img_path + '?' + str(time.time())})
def lowpass(sr, data): b = firwin(5, cutoff=400, nyq=sr / 2, pass_zero=True) data = normalize("./musicFiles/furTest.wav", -20.0) data = lfilter(b, [1.0], data.get_array_of_samples()) data = np.array(data) # data = data.low_pass_filter(400) # data.export("./musicFiles/testLow.wav", format = "wav") wavfile.write("./musicFiles/testLow.wav", sr, data.astype(np.int16))
def decompose(fds, u): new_fds = normalize(fds) r = set({u}) continuer, table_faux, fd_faux = check_condition(fds, r) while (continuer): r.remove(table_faux) closure = improved(fds, fd_faux.prerequis) r.add(SetAttr(table_faux.intersection(closure))) r.add(SetAttr(table_faux.difference(closure).union(fd_faux.prerequis))) continuer, table_faux, fd_faux = check_condition(fds, r) # check_condition_debug(fds,r) return r
def get_quantized_features(features, quantization_factor=30): normalized_features = normalize(features, axis=1) offset = np.abs(np.min(normalized_features)) offset_features = normalized_features + offset # Making all feature values positive # Let's proceed to quantize these positive feature values min_val = np.min(offset_features) max_val = np.max(offset_features) bins = np.linspace(start=min_val, stop=max_val, num=quantization_factor) median_values = get_median_values_for_bins(bins) original_quantized_features = np.digitize(offset_features, bins) quantized_features = np.apply_along_axis(lambda row: map(lambda x: median_values[x], row), 1, original_quantized_features) quantized_features = np.floor(quantization_factor*quantized_features) return quantized_features
def tester(listOfTweets,annotation): bingNegativeWords=bingReadWords('negative-words.txt') bingPositiveWords=bingReadWords('positive-words.txt') affinDictionary=affinReadwords('AFINN-111.txt') sentiDictionary=sentiReadWord('SentiWordNet_3.0.0_20100705.txt') correctCount1=0 correctCount2=0 correctCount3=0 totalNumberOfTweets= len(listOfTweets) for i in range(len(listOfTweets)): listOfTokens=normalize(listOfTweets[i]) classified1=bingCalculateSentiment(listOfTokens,bingPositiveWords,bingNegativeWords) classified2=affinCalculateSentiment(listOfTokens,affinDictionary) sentiback = sentiCalculateSentiment(listOfTokens,sentiDictionary) classified3= sentiback[1] if classified1!=annotation[i]: correctCount1+=1 if classified2!=annotation[i]: correctCount2+=1 if classified3!=annotation[i]: correctCount3+=1 print "accuracyBing : "+ str(float(correctCount1)/totalNumberOfTweets)+" accuracyAffin : "+ str(float(correctCount2)/totalNumberOfTweets)+" accuracySenti : "+ str(float(correctCount3)/totalNumberOfTweets)
from addAbsoluteEfficiency import addAbsoluteEfficiency from addVisibilityGraph import addVisibilityGraph from fillClearTweet import * from normalize import * if __name__ == "__main__": print("Start: {0}".format(datetime.datetime.now())) connectMongoDB = Connect2MongoDB('localhost', 27017) connectMongoDB.setDB('Huelga') db = MongoDB(connectMongoDB) graph = Neo4jDB( Connect2Neo4J(CONST_NEO4J_URI, CONST_NEO4J_USER, CONST_NEO4J_PASSWORD)) """tweets = db.find(MongoDB.TWEETS_COLLECTION) #Inside of MongoDB.find() --> 0 = MongoDB.TWEETS_COLLECTION #tweet = db.find(0, getTweet("id_str",'917946195410128897')) process = ProcessTweet (db, graph) for t in tweets: process.process(t)""" #fillClearTweet(db, graph) addAbsoluteEfficiency(db) addVisibilityGraph(db, graph) normalize(db) graph.connect2Neo4J.closeDB() print("\nStop: {0}".format(datetime.datetime.now()))
## Main loop ## while (t < NGer): ## if(t == 150): ## weight = array([0.0,1.0,3.0,10.0,5.0]) # Array of weights used for TOPSIS Rt = Pt.addPopulation(Qt) # Combined population Rt.fastNonDominatedSort() # Nondominated sorting ## Rt.topsisPop(rank=rankType) # Rank within each front ## Rt.globalRankEval() if (sampleAll): RtObj = Rt.obj # Save original objective values of St Zr, a = normalize(Rt, objRec, minim, Z, p) ZRef = Zr * a + objRec.objIdeal associate(Rt, Zr, 0) ## distPareto(Rt,ZRef,objRec.objIdeal,a) hvContribution(Rt, objRec.objIdeal, a) indRemove = niching(NPop, Rt, array([0, 2 * NPop]), weight, multiple) Pt = Rt.removeMembers(indRemove, RtObj) else: indList = zeros( NPop, dtype=int) # List of indexes of members to the next population i = 0 # Counter of fronts sizeEvol = array([
def calculate(review, sentiDictionary): listOfTokens = normalize(review) return sentiCalculateSentiment(listOfTokens, sentiDictionary)
def calculate(review,sentiDictionary): listOfTokens=normalize(review) return sentiCalculateSentiment(listOfTokens,sentiDictionary)
if (algo == "-generate"): print("generate", file=log) if (len(sys.argv) != 3): print("Too much arguments for generate", file=sys.stderr) generate(int(sys.argv[2])) else: if (sys.argv[2][0] != '-'): fds = parseFD(False, sys.argv[2]) else: fds = parseFD(True) if (algo == "-normalize"): print("normalize", file=log) print("Normalize:", normalize(fds)) elif (algo == "-decompose"): print("decompose", file=log) print("Decompose:", decompose(fds, schema(fds))) else: if (len(sys.argv) != 4): print("Wrong number of arguments, 4 expected", file=sys.stderr) setAttr = SetAttr("".join(sys.argv[3:])) if (algo == "-improved"): print("improved", file=log) print("Improved:", improved(fds, setAttr))
def runMOMCEDA(NPop, NEval, function, Nref, nReps, RTPlot, refPoint, weight, seed=None): print 'Running MOMCEDA\n' NGer = NEval / NPop - 1 # Number of generations NObj = 2 # Number of objectives to optimize minim = 1 # minim = 1 if minimizing objectives, minim = 0 otherwise p = Nref - 1 # Number of objective axes divisions to generate structured ref. points random.seed(seed) if (function == 'ZDT4'): Nvar = 10 Vmin = append(0, -5 * ones(Nvar - 1)) # Limits of chromosome values Vmax = append(1, 5 * ones(Nvar - 1)) sigma = append(1.0, 0.1 * ones(Nvar - 1)) # Mutation parameter else: Nvar = 30 if (function == 'ZDT6'): Nvar = 10 Vmin = 0.0 * ones(Nvar) # Limits of chromosome values Vmax = 1.0 * ones(Nvar) sigma = 1.0 * ones(Nvar) / 2.0 # Mutation parameter minObj = array([0.0, 0.0]) # Limits of objective values maxObj = array([1.0, 1.0]) objRec = objectiveRecords(NObj, minim) # Records for objective values Z = generateRefPoints(NObj, p) # Generate structured reference points rankType = 'hv' # Type of rank used for TOPSIS multiple = True # multiple: use multiple criteria to select solutions from the last front sampleAll = True #sampleAll: if true, all members from parent population are sampled with the TOPSIS rank hvValues = zeros((nReps, NGer)) conv = [] finalPop = [] extime = [] # Plot parameters color = plt.cm.get_cmap('Reds') deltac = 0.3 ObjNames = ['f1', 'f2', 'f3'] scale = 1.0 / (maxObj - minObj) center = array([0.0, 0.0, 0.0]) countFig = 0 # counter of figures ## Offspring parameters pMut = 1.0 / Nvar # Mutation probability pSwitch = 0.5 # Probability to switch variables between members spread = 0.5 # Parameter to control the spread of generated members nc = 30 nm = 20 ##sigma = append(1.0,0.1*ones(Nvar-1)) ##sigma0 = r_[array([1.0]),1.0/10*ones(Nvar-1)] ##sigmaf = r_[array([1.0/10]),1.0/500*ones(Nvar-1)] ###deltaSigma = (sigma0 - sigmaf) / NGer ##qSigma = (sigmaf/sigma)**(1.0/NGer) ##sigma = sigma0 spreadf = 0.05 qSpread = (spreadf / spread)**(1.0 / NGer) deltaSpread = (spread - spreadf) / NGer dist = zeros((NGer, Nvar)) ## Distribution variables # Coeffients of the gaussians of the mixture ##choiceOK = False ##while(not choiceOK): ## dec = input('Choose decay type:\n(1) Linear\n(2) Exponential\n(3) Logarithmic\n') ## if(dec in [1,2,3]): ## choiceOK = True dec = 1 # decay type:1 - Linear, 2 - Exponential, 3 - Logarithmic coefGau = calc_coefGau(NPop, dec) with open(''.join(['../dev/pareto_front/zdt', function[3], '_front.json'])) as optimal_front_data: optimal_front = json.load(optimal_front_data) for nExec in arange(nReps, dtype=int): start = time.time() print 'Starting execution %d ...' % (nExec + 1) ## Initialization ## t = 0 # Counter of generations Pt = Population(NPop, Nvar, Vmin, Vmax, NObj, minim, function) # Initial Population Pt.fastNonDominatedSort() # Nondominated sorting Pt.topsisPop() # Rank within each front Qt = Population(NPop, Nvar, Vmin, Vmax, NObj, minim, function) # Offspring population if (RTPlot): #plt.ion() plt.figure(figsize=(12, 12)) plt.title('Population at execution %d' % (nExec + 1), fontsize=18) f = array(optimal_front) plt.plot(f[:, 0], f[:, 1], color='b', label='Pareto front') plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=18) #plt.draw() ## Main loop ## while (t < NGer): ## if(t == 150): ## weight = array([0.0,1.0,3.0,10.0,5.0]) # Array of weights used for TOPSIS Rt = Pt.addPopulation(Qt) # Combined population Rt.fastNonDominatedSort() # Nondominated sorting ## Rt.topsisPop(rank=rankType) # Rank within each front ## Rt.globalRankEval() if (sampleAll): RtObj = Rt.obj # Save original objective values of St Zr, a = normalize(Rt, objRec, minim, Z, p) ZRef = Zr * a + objRec.objIdeal associate(Rt, Zr, 0) ## distPareto(Rt,ZRef,objRec.objIdeal,a) hvContribution(Rt, objRec.objIdeal, a) indRemove = niching(NPop, Rt, array([0, 2 * NPop]), weight, multiple) Pt = Rt.removeMembers(indRemove, RtObj) else: indList = zeros( NPop, dtype=int ) # List of indexes of members to the next population i = 0 # Counter of fronts sizeEvol = array([ 0, len(Rt.fronts[i]) ]) # Evolution of population's size by adding the fronts # Fill population with the first fronts while (sizeEvol[i + 1] <= NPop): # Rt.crowd(Rt.fronts[i]) = Rt.crowdingDistanceAssignment(Rt.fronts[i],minObj,maxObj) indList[sizeEvol[i]:sizeEvol[i + 1]] = Rt.fronts[ i] # Add members to the list i = i + 1 sizeEvol = append(sizeEvol, sizeEvol[i] + len(Rt.fronts[i])) # Sort members of the last front according to # crowding distance # Rt.crowd[Rt.fronts[i]] = Rt.crowdingDistanceAssignment(Rt.fronts[i],minObj,maxObj) # ind = argsort(Rt.crowd[Rt.fronts[i]])[::-1] # Sort members of the last front according to the rank # ind = argsort(Rt.rank[Rt.fronts[i],1]) listSt = r_[indList[:sizeEvol[i]], Rt.fronts[i]] St = Rt.addMembers(listSt) K = NPop - sizeEvol[i] StObj = St.obj # Save original objective values of St Zr, a = normalize(St, objRec, minim, Z, p) ZRef = Zr * a + objRec.objIdeal associate(St, Zr, sizeEvol[i]) ZRef = Zr * a + objRec.objIdeal ## distPareto(St,ZRef,objRec.objIdeal,a) hvContribution(St, objRec.objIdeal, a) indRemove = niching(K, St, sizeEvol, weight, multiple) Pt = St.removeMembers(indRemove, StObj) # Next generation's population if (RTPlot): Pt.plot(color((1 - deltac) * float(t) / NGer + deltac), scale, center, ObjNames, countFig) #axes = plt.gca() #axes.set_ylim([0,1]) Pt.topsisPop(weight, rank=rankType) # Rank within each front Qt = Pt.offspringPop( coefGau, sigma, pMut, spread, pSwitch) # Selection, recombination and mutation ## dist[t] = maxDist(Pt.members)/(1*(Vmax - Vmin)) ## sigma = dist[t] ## sigma = random.rand()*append(10.0,1.0*ones(Nvar-1)) ## MEv = sqrt(sum(Pt.members[:,1:]**2)/(NPop*(Nvar-1))) ## print 'Generation = ', t, 'Mean Square Value =', MEv hv = HyperVolume(refPoint) hvValues[nExec, t] = hv.compute(Pt.obj) t = t + 1 ## sigma = sigma - deltaSigma ## sigma = sigma*qSigma ## spread = spread*qSpread spread = spread - deltaSpread end = time.time() extime.append(end - start) conv.append(convergence(Pt.obj.tolist(), optimal_front)) print 'Hypervolume = ', hvValues[nExec, -1] print 'Convergence metric = ', conv[nExec] print 'Execution ', nExec + 1, ' completed in ', extime[ -1], ' seconds \n' ## normIgd,Zint = normIGDmetric(ZRef,objRec.objIdeal,a,Pt.obj,function) ## igd = IGDmetric(ZRef,objRec.objIdeal,Pt.obj,function)[0] ## normIgd2,Zint = normIGDmetric2(ZRef,objRec.objIdeal,a,Pt.obj,function) ## igd2 = IGDmetric2(ZRef,objRec.objIdeal,Pt.obj,function)[0] ## print 'NormIGD = {0:e}'.format(normIgd) ## print 'IGD = {0:e}'.format(igd) ## print 'NormIGD2 = {0:e}'.format(normIgd2) ## print 'IGD2 = {0:e}'.format(igd2) ## with open(''.join(['Pareto/Prt_',function,'.pk1']), 'r') as filename: ## f = pickle.load(filename) ##step = len(f)/50 ##plt.scatter(f[::step,0],f[::step,1],s=1,color='b') if (RTPlot): Pt.plot(color((1 - deltac) * float(t) / NGer + deltac), scale, center, ObjNames, countFig) #axes = plt.gca() #axes.set_ylim([0,1]) #plt.draw() #plt.ioff() plt.show() #plt.savefig(''.join(['../figures/',function,'.png']), bbox_inches='tight') countFig = countFig + NObj * (NObj - 1) / 2 finalPop.append(Pt) ## if(nReps == 1): ## MEv = sqrt(sum(Pt.members[:,1:]**2)/(NPop*(Nvar-1))) ## print 'Generation = ', t, 'Mean Square Value =', MEv ## ## print 'rho =', Pt.rho ## print 'minObj=', objRec.objIdeal ##refPoint = objRec.extPoints.max(axis=0)*1.1 ##print 'refPoint: ',refPoint ## hv = HyperVolume(refPoint) ## print 'hypervolume=', hv.compute(Pt.obj) ##ind = where((St.obj[:,0] != 0) & (St.obj[:,1]!=0))[0] ##j = random.choice(ind) ##a = (StObj[j] - objRec.objIdeal) / St.obj[j] ##ZPlot = Z*a + objRec.objIdeal ## ##for i in arange(0,len(Z),len(Z)/10): ## plt.plot(vstack((objRec.objIdeal[0],ZPlot[i,0])),vstack((objRec.objIdeal[1],ZPlot[i,1])),'-',color='k') ##plt.plot(vstack((objRec.objIdeal[0],ZPlot[-1,0])),vstack((objRec.objIdeal[1],ZPlot[-1,1])),'-',color='k') ##inter = diag(a) + objRec.objIdeal ##plt.plot(inter[:,0],inter[:,1],'-',color='b') ##inter = diag(a) + objRec.objIdeal ##plt.plot(inter[:,0],inter[:,1],'-',color='b') ##plt.show() ## axes = plt.gca() ## axes.set_ylim([0,1]) ## ## plt.savefig(''.join(['../figures/',function,'.png']), bbox_inches='tight') ## plt.figure(2) ## plt.plot(hvValues) ## plt.savefig(''.join(['../figures/HV_',function,'.png']), bbox_inches='tight') ## # Save Population ## with open(''.join(['../dev/files/Pop_',function,'_MOMCEDA','.pk1']), 'wb') as output: ## pickle.dump(finalPop, output, pickle.HIGHEST_PROTOCOL) ## ## # Save Hypervolume ## with open(''.join(['../dev/files/HV_',function,'_MOMCEDA','.pk1']), 'wb') as output: ## pickle.dump(hvValues, output, pickle.HIGHEST_PROTOCOL) ## ## # Save Elapsed time ## with open(''.join(['../dev/files/time_',function,'_MOMCEDA','.pk1']), 'wb') as output: ## pickle.dump(extime, output, pickle.HIGHEST_PROTOCOL) print 'Average hypervolume=', mean(hvValues[:, -1]) print 'Best hypervolume=', max(hvValues[:, -1]) ## plt.figure(countFig+1,figsize=(12, 12)) ## plt.title('Average hypervolume evolution for %s problem' %(function), fontsize=18) ## plt.xlabel('Generations', fontsize=18) ## plt.ylabel('Average hypervolume', fontsize=18) ## plt.plot(arange(1,NGer+1),hvValues.mean(axis=0)) ## plt.savefig(''.join(['../figures/meanHV_',function,'.png']), bbox_inches='tight') # Save Population with open(''.join(['../dev/files/Pop_', function, '_MOMCEDA', '.pk1']), 'wb') as output: pickle.dump(finalPop, output, pickle.HIGHEST_PROTOCOL) # Save Hypervolume with open(''.join(['../dev/files/HV_', function, '_MOMCEDA', '.pk1']), 'wb') as output: pickle.dump(hvValues, output, pickle.HIGHEST_PROTOCOL) # Save Elapsed time with open(''.join(['../dev/files/time_', function, '_MOMCEDA', '.pk1']), 'wb') as output: pickle.dump(extime, output, pickle.HIGHEST_PROTOCOL) # Save Convergence with open(''.join(['../dev/files/conv_', function, '_MOMCEDA.json']), 'w') as outfile: json.dump(conv, outfile) print '\nMOMCEDA finished all experiments\n'
g_init = g1 + g2 fit_g = fitting.LevMarLSQFitter() g = fit_g(g_init, x, y) print(g) xvals = np.linspace(min(x), max(x), 1000) return xvals, g(xvals)+1 if __name__=="__main__": min_wl = 5390 max_wl = 5540 #filename = "lrisShri_galA.spec" # (07:53:50, +42:42:22) filename = "lrisAnna_gal_08.0.spec" wl, flux, err = load_data(filename) ivar = 1/err**2 n_flux, n_ivar = normalize(wl, flux, ivar, L=40) choose = np.logical_and(wl > min_wl, wl < max_wl) xvals, gaus = fit_gaussian(wl[choose], n_flux[choose]-1, 5460) #plot_spectrum(wl, flux, ivar, min_wl, max_wl) plt.step(wl, n_flux, where='mid', c='k', lw=0.5) plt.step(xvals, gaus, where='mid', c='r', lw=1) plt.xlabel("Wavelength", fontsize=16) plt.ylabel("Flux", fontsize=16) plt.tick_params(axis='x', labelsize=20) plt.tick_params(axis='y', labelsize=20) plt.xlim(min_wl, max_wl) plt.ylim(-1, 7) plt.tight_layout() #plt.show() plt.savefig("gal2_fit_oii.png")
def main(): mitStopWords = createMitStopWordList() # New is in mit stopword list! NEW York! #cleanUp_1(extractTxt,sublStopWords) cleanUp_2(extractTxt) normalize(processedListOfTexts,mitStopWords,nounList) processTexts(processedListOfTexts)
from sklearn.neural_network import MLPClassifier from sklearn import svm from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedKFold from sklearn.ensemble import VotingClassifier from scipy import stats from numpy import sqrt from read_data import * from normalize import * X, y = read_data("../abalone.data") X = normalize(X, 7) X = normalize(X, 3) net = MLPClassifier(verbose=False, activation='logistic', validation_fraction=0.33, hidden_layer_sizes=(5, ), early_stopping=True, learning_rate='constant', learning_rate_init=0.2, max_iter=500, momentum=0.9) SVM = svm.SVC() GNB = GaussianNB()
# do for each file in the collection for title,doc in docs.iteritems(): ## tokenize SGM doc to a list print "Title:" ,title #terms = re.split('\s|(?<!\d)[,.](?!\d)', doc) # same as nltk.word_tokenize() terms = nltk.word_tokenize (doc) # print(terms) terms = [t for t in terms if (not t in empty_words)] ## normalize words normalized_words = normalize(terms) print("NORMALIZE---------------------------") pprint.pprint(normalized_words) ## stemming stemmer = PorterStemmer() tokens = [stemmer.stem(word) for word in normalized_words] ## build inverted index, docID would stand for each Reuters doc print("STEMMER---------------------------") pprint.pprint(tokens) # put in index print("INVERTED INDEX---------------------------") for token in tokens:
with open(''.join(['results/Pop_ZDT', i, '_', algo, '.pk1']), 'r') as filename: Pop = pickle.load(filename) nreps = len(Pop) igdValues = np.zeros(nreps) normIgdValues = np.zeros(nreps) function = Pop[0].function for run in np.arange(nreps): Pt = Pop[run] objRec.objIdeal = Pt.obj.min(axis=0) Zr, a = normalize(Pt, objRec, minim, Z, p) ZRef = Zr * a + objRec.objIdeal ## print ZRef normIgd, Zint = normIGDmetric(ZRef, objRec.objIdeal, a, Pt.obj, function) igd = IGDmetric(ZRef, objRec.objIdeal, Pt.obj, function)[0] normIgdValues[run] = normIgd igdValues[run] = igd ## ## print 'Execution',run+1 ## print 'NormIGD = {0:e}'.format(normIgd) ## print 'IGD = {0:e}'.format(igd)
from normalize import * words= ['U.S.', 'warships', '.', '``', 'It', "'s", 'removal', 'will', 'contribute', 'significantly'] normalized_words = normalize(words) pprint.pprint(normalized_words)
'''Draw plot for voltage showing the fluctuation''' figure(1) [plt,fig]=create_plot_options('Voltage vs Time','Time','Voltage','V') plt.plot(time,data["VLN"][:minimum-1]) #plt.show() '''Draw real and normalized power and add the capability to zoom the two together''' figure(2) '''ax2 = figure(2).add_subplot(211) ax3 = figure(2).add_subplot(212, sharex=ax2)''' # share ax1's xaxis data["N_W3"]=normalize(data[active_power],data["VLN"]) data["N_VAR3"]=normalize(data[reactive_power],data["VLN"]) [plt,fig]=create_plot_options('Normzalized Power vs Time','Time','Power','W') plt.plot(time,data["N_W3"]) plt.plot(time,data["N_VAR3"]) plt.legend(('Active Power','Reactive Power'),'upper right') '''ax2.plot(time,data["N_W3"]) ax3.plot(time,data["N_VAR3"])''' #plt.show() '''Removing transients''' ''' i=1 while i<len(data["N_W3"]): if (math.fabs(data["N_W3"][i]-data["N_W3"][i-1])>100 and math.fabs((data["N_W3"][i+1]+data["N_W3"][i+2])/2-data["N_W3"][i])>100):
3: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] } #obtaining necessary inputs and casting to int type # number_of_input_neurons = int(input("Enter the number of neurons in the i/p layer: ")) # number_of_hidden_neurons = int(input("Enter the number of hidden neurons: ")) # number_of_output_neurons = int(input("Enter the number of neurons in the o/p layer: ")) number_of_input_neurons = 784 number_of_hidden_neurons = 784 number_of_output_neurons = 10 learning_rate = int(input("Enter the learning rate: ")) #obtain specific network input pixels, labels = read_input() pixels_normalized = normalize(pixels) #print(pixels_normalized) #initialize weights in the network input_to_hidden_layer_wts, hidden_to_output_layer_wts = initialize_weights( number_of_input_neurons, number_of_hidden_neurons, number_of_output_neurons) error_over_time = [] for i in range(10): collective_forward_pass_output = [] for i in range(len(labels)): #obtain output of the forward pass hidden_layer_input, hidden_layer_output, forward_pass_output = forward_propagate(
if Y_train[i]==1: mu1 += X_train[i] cnt1 += 1 else: mu2 += X_train[i] cnt2 += 1 mu1 /= cnt1 mu2 /= cnt2 # variances of Gaussian Models sigma1 = np.zeros((106, 106)) sigma2 = np.zeros((106, 106)) for i in range(train_data_size): if Y_train[i]==1: sigma1 += np.dot(np.transpose([X_train[i]-mu1]),[X_train[i]-mu1]) else: sigma2 += np.dot(np.transpose([X_train[i]-mu1]),[X_train[i]-mu1]) sigma1 /= cnt1 sigma2 /= cnt2 shared_sigma = (float(cnt1)/train_data_size)*sigma1+(float(cnt2)/train_data_size)*sigma2 N1 = cnt1 N2 = cnt2 valid(X_valid,Y_valid,mu1,mu2,shared_sigma,N1,N2) if __name__=='__main__': train_data_path = r'./data/X_train.csv' train_label_path = r'./data/Y_train.csv' X_all, Y_all = load_data(train_data_path, train_label_path) X_all = normalize(X_all) train(X_all,Y_all)
#open the test list file testlist = open("testList","r") #delete the testlist from the dictionary for test in testlist: if test[0] == '#': del testfunction[test[1:].strip()] if sys.argv[1] == "-n": dicList = [] for filename in glob.glob(os.path.join(path_normalize, '*.yaml')): dicList.append(yaml.load(open(filename))) normalize(testfunction,dicList) save(dicList) if sys.argv[1] == "-r": dic_ideal = {} for key,value in testfunction.iteritems(): value(dic_ideal) filelist=[] heading_list = ["SL NO", "TOOLS", "TopDimention", "Category", "Scenario", "Testcase"] generateReport("Consolidated.xlsx", "Consolidate", testfunction, heading_list) pos = len(heading_list) + 1 size = len(heading_list) + 1 for filename in glob.glob(os.path.join(path, '*.yaml')): filelist.append(filename) filelist.sort()
def main(self): # Read data print('Reading data...\n') filename1 = '_data.csv' filename2 = 'ratings.dat' if not exists(filename1): if not exists(filename2): print('Error: Please add file', filename2, 'into the path!') exit(1) else: create_file(filename2, filename1) A = loadtxt(filename1, delimiter=',') # Initialize variables no_user = num_user(A) no_movie = num_movie(A) B = shuffle(A) # Set parameters k_set = [1, 3] fold_set = [3, 4] rmse = zeros((len(fold_set), len(k_set))) ratings_round = False # Main algorithm for ff in range(len(fold_set)): num_fold = fold_set[ff] print(str(num_fold) + '-fold Cross Validation begins.\n') num_test = int(floor(100000/num_fold)) num_train = 100000 - num_test for kk in range(len(k_set)): k = k_set[kk] print('Reducing dimensions to', k, '.') error_each_fold = zeros((num_fold,1)) for i in range(num_fold): print('Fold ' + str(i+1) + '. Splitting train/test...') tr, tt = train_test(B, i, num_test) u, v = id_map(B) # Build matrix R in the paper print('Building matrix R...') R_raw = build_matrix(tr, u, v, num_test, no_user, no_movie) R_filled = fill_matrix(R_raw) m = column_mean(R_filled) R = normalize(R_filled, m) # Dimensionality Reduction print('Dimensionality Reduction...') U, S, V = svd(R, full_matrices=False) Ss = copy(S[0:k]) Sk = S_to_matrix(Ss) Uk = copy(U[:, 0:k]) Vk = copy(V[0:k, :]) sqrt_Sk = sqrt(Sk) US = dot(Uk,transpose(sqrt_Sk)) SV = dot(sqrt_Sk, Vk) # Predict the ratings print('Predicting ratings...') pr = predict_ratings(US, SV, m, u, v, tt, num_test) if ratings_round == True: pr = round_array(pr) pr_trim = trim_rating(pr) # Find error print('Calculating error...') real = copy(tt[:, 2]) error = pr_trim - real error_each_fold[i] = sqrt(sum(error**2)/num_test) print('End one fold.\n') rmse[ff, kk] = mean(error_each_fold) savetxt("_rmse.csv", rmse, fmt='%.4f', delimiter=",") print(rmse)