def main(numberOfSteps, numberOfInvids, numberOfMutations): rData = prepareData(numberOfGenes) dataAll = rData.readDataFromFiles() perfectSin = rData.createPerfectSin() listOfStim = [14, 28, 8] for sub in range(1): bestIndvids = np.empty((3, numberOfGenes)) bestIndvidsFitness = np.empty((3, numberOfSteps)) for stim in range(len(listOfStim)): indvids = createPopulation( dataAll[stim * 5 + sub * 15:numberOfLSig + stim * 5 + sub * 15], numberOfInvids) indvids = mutate(4000, numberOfInvids, indvids) for step in range(numberOfSteps): indvids = generation(indvids, dataAll, listOfStim[stim], sub, numberOfInvids, numberOfMutations) bestIndvidsFitness[stim][step] = get_fitness( indvids[0], listOfStim[stim], dataAll[0 + sub * 15:15 + sub * 15]) bestIndvids[stim] = indvids[0] classifyPrint(bestIndvids, perfectSin, dataAll[0 + sub * 15:15 + sub * 15]) plt.figure() plt.plot(bestIndvidsFitness.T) plt.show()
def __init__(self,mode = "None", use = "TweetData", combine_embeddings=False, BATCHSIZE=64.0): #use = "1000Kalimat", "TweetData" self.mode = mode self.use = use self.combine_embeddings = combine_embeddings self.BATCHSIZE = BATCHSIZE self.EMBEDDING_DIM = 100 self.HIDDEN_DIM = 50 self.nerData = prepareData() self.evaluation = evaluate() self.text = findEntity() self.data, self.tags = self.text.corpus2BIO(mode=self.mode) START_TAG = "<START>" STOP_TAG = "<STOP>" if self.mode == "withIntermediate": self.tag_to_ix = {"None": 0, "B-PER":1, "I-PER":2, "B-LOC":3, "I-LOC":4, "B-ORG":5, "I-ORG": 6, START_TAG:7, STOP_TAG:8} #Version 2 else: self.tag_to_ix = {"None": 0, "I-PER": 1, "I-LOC": 2, "I-ORG":3, START_TAG: 4, STOP_TAG: 5} #Version 1 if self.use=="10000Kalimat": if self.combine_embeddings==True: print("loading combinded embeddings, option: 1000Kalimat") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_with10000Kalimat_50Dimension.pic") self.tag2vec = self.nerData.restore_model("./Embeddings/tag2vec_with10000Kalimat_50Dimension.pic") self.dataPOSTag = self.nerData.restore_model("./Embeddings/tagFeed_with10000Kalimat.pic") else: print("loading word embeddings, option: 1000Kalimat") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_with10000Kalimat_100Dimension.pic") elif self.use=="TweetData": if self.combine_embeddings==True: print("loading combinded embeddings, option: TweetData") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_withTweetData_50Dimension.pic") self.tag2vec = self.nerData.restore_model("./Embeddings/tag2vec_withTweetData_50Dimension.pic") self.dataPOSTag = self.nerData.restore_model("./Embeddings/tagFeed_withTweetData.pic") else: print("loading word embeddings2, option: TweetData") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_withTweetData2_100Dimension.pic") elif self.use=="All": if self.combine_embeddings==True: print("loading combinded embeddings, option: All") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_All_50Dimension.pic") self.tag2vec = self.nerData.restore_model("./Embeddings/tag2vec_All_50Dimension.pic") self.dataPOSTag = self.nerData.restore_model("./Embeddings/tagFeed_All.pic") else: print("loading word embeddings, option: All") self.word2vec = self.nerData.restore_model("./Embeddings/word2vec_All_100Dimension.pic")
from GA import GA from prepareData import prepareData from classification import classification import numpy as np from sklearn.cross_decomposition import CCA def computeCorr(signal,signal_set): n_components = 1 cca = CCA(n_components) cca.fit(signal,signal_set) U, V = cca.transform(signal,signal_set) return abs(np.corrcoef(U.T, V.T)[0, 1]) fs=256 raw_data=prepareData(['Subj4'],fs) t=np.linspace(0,1,256) ref14=np.array([np.sin(2*np.pi*14*t),np.cos(2*np.pi*14*t),np.sin(4*np.pi*14*t),np.cos(4*np.pi*14*t)]).T ref8=np.array([np.sin(2*np.pi*8*t),np.cos(2*np.pi*8*t),np.sin(4*np.pi*8*t),np.cos(4*np.pi*8*t)]).T licz=0 wynik=0 for pierwszy in range(5): signal14=raw_data.prepared_data(1)[:,pierwszy,:] signal8=raw_data.prepared_data(1)[:,10+pierwszy,:] if computeCorr(ref14, signal14)>computeCorr(ref8, signal14): print(computeCorr(ref14, signal14),computeCorr(ref8, signal14)) wynik+=1 if computeCorr(ref14, signal8)<computeCorr(ref8, signal8): print(computeCorr(ref14, signal8),computeCorr(ref8, signal8)) wynik+=1
#subject test subte = open('C:\Users\Brad\\activity\\activitylearn\data\\test\subject_test.txt','r') #body xacc test #y_test yte = open('C:\Users\Brad\\activity\\activitylearn\data\\test\y_test.txt','r') ''' import os bxtr = open(os.getcwd()+'/data/train/Inertial Signals/total_acc_x_train.txt','r') bytr = open(os.getcwd()+'/data/train/Inertial Signals/total_acc_y_train.txt','r') bztr = open(os.getcwd()+'/data/train/Inertial Signals/total_acc_z_train.txt','r') bxte = open(os.getcwd()+'/data/test/Inertial Signals/total_acc_x_test.txt','r') byte = open(os.getcwd()+'/data/test/Inertial Signals/total_acc_y_test.txt','r') bzte = open(os.getcwd()+'/data/test/Inertial Signals/total_acc_z_test.txt','r') subtr = open(os.getcwd()+'/data/train/subject_train.txt','r') ytr = open(os.getcwd()+'/data/train/y_train.txt','r') subte = open(os.getcwd()+'/data/test/subject_test.txt','r') yte = open(os.getcwd()+'/data/test/y_test.txt','r') #prepare the above data files prd.prepareData(bxtr, bytr, bztr,bxte,byte,bzte,subtr,ytr,subte,yte) print 'data prepared successfully' ## Ok works up until here so far #Extract the featured from the buffered data eaf.extractAllFeatures() print 'features extracted successfully'
from prepareData import prepareData from models import * from train import * from test import * #Set parameters char_based = True attn = False n_iter = 60 hidden_size = 256 #Prepare data if (char_based): input_lang, output_lang, train_pairs, test_pairs = prepareData( 'NoDiac', 'Diac', '../../dataset_for_train_test/char_basedTrain.data', '../../dataset_for_train_test/char_basedTest.data', char_based) else: input_lang, output_lang, train_pairs, test_pairs = prepareData( 'NoDiac', 'Diac', '../../dataset_for_train_test/trainig_data_3000.csv', '../../dataset_for_train_test/test_data_3000_425.csv', char_based) #Encoder encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) #Decoder if (attn): decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) else:
########### IMPORTANTE ###################### # Support Vector Machine #vease http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html #from sklearn.svm import LinearSVC # kNN algorithm #vease http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier #from sklearn.neighbors import KNeighborsClassifier # Logistic Regression #vease http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html #from sklearn.linear_model import LogisticRegression clf.fit(X_train, y_train) y_pred = clf.predict(X_test) from sklearn import metrics from sklearn.metrics import confusion_matrix print('Accuracy of prediction is', clf.score(X_test, y_test)) print('Confusion matrix:\n', confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) if __name__ == '__main__': from prepareData import prepareData sampleTexts, y = prepareData( 'C:\\Users\\navi_\\Dropbox\\NLP\\Corpus\\SMS_Spam_Corpus.txt') build_and_evaluate_sklearn(sampleTexts, y)
sw = set(cw) # precomputes the "length" of the word vector lw = sqrt(sum(c * c for c in cw.values())) # return a tuple return cw, sw, lw def cosdis(v1, v2): # which characters are common to the two words? common = v1[1].intersection(v2[1]) # by definition of cosine distance we have return sum(v1[0][ch] * v2[0][ch] for ch in common) / v1[2] / v2[2] DATA = prepareData(dataFile) words, tags, n_words, n_tags = get_words_and_tags(DATA) sentences = getSentence(DATA) #random.shuffle(sentences) #plot_sentences_long_graph(sentences) word2idx, tag2idx = word_tag_dictionaries(words, tags, MAX_LEN) X, y = map_sentences_and_numbers(sentences, word2idx, tag2idx, n_words, MAX_LEN) y = change_labels_to_categorial(y, n_tags) X_train, X_test, y_train, y_test = split_dataset(X, y)
import torch.autograd as autograd from entityTagger import entityTagger from evaluate import evaluate from lookupBase import lookUp mode = "None" use = "TweetData" #"All" #"1000Kalimat", # combine_embeddings = False code_name = "TEST" use10000Kalimat = True experiment = [] BATCHSIZE = 64.0 nerData = prepareData() # if use=="10000Kalimat": # if combine_embeddings==True: # print("loading combinded embeddings, option: 1000Kalimat") # word2vec = nerData.restore_model("./Embeddings/word2vec_with10000Kalimat_50Dimension.pic") # tag2vec = nerData.restore_model("./Embeddings/tag2vec_with10000Kalimat_50Dimension.pic") # dataPOSTag = nerData.restore_model("./Embeddings/tagFeed_with10000Kalimat.pic") # else: # print("loading word embeddings, option: 1000Kalimat") # word2vec = nerData.restore_model("./Embeddings/word2vec_with10000Kalimat_100Dimension.pic") # elif use=="TweetData": # if combine_embeddings==True: # print("loading combinded embeddings, option: TweetData") # word2vec = nerData.restore_model("./Embeddings/word2vec_withTweetData_50Dimension.pic") # tag2vec = nerData.restore_model("./Embeddings/tag2vec_withTweetData_50Dimension.pic")
parser.add_argument('--step_size', default=10, type=int, help='Step size') parser.add_argument('--batch_size', default=100, type=int, help='Batch size') parser.add_argument('--embedding_size', default=100, type=int, help='Embedding size') parser.add_argument('--rnn_layers', default=2, type=int, help='RNN layer size') parser.add_argument('--rnn_units', default=50, type=int, help='RNN layer size') args = parser.parse_args() print("================== Custom model in LSTM ===========================") print(parser.print_help()) print("===================================================================") ############################### Load and Preprocesing data ########################################### # Load data data = prepareData() # Preprocessing data #data.preProcessing() vocab_size = data.vocab_size embedding_size = args.embedding_size sentence_size = data.sentence_size # Prepare data x_train = data.x_train x_test = data.x_test y_train = data.y_train y_test = data.y_test
from prepareData import prepareData from geneticAlgorithm import createOutputSignal import numpy as np listOfSubjects = ['SUBJ1'] seconds = 1 fs = 256 numberOfGenes = seconds * fs stim = [14, 28, 8] populationSizeList = [10] #[150,200,300,500] #10,20,50,10 numberOfMutationsList = [10] #[10,20,50,70,100,150,200,300] data = prepareData(listOfSubjects, seconds, fs) dataEEG = data.readDataFromFiles() #dataAfterGA = np.empty((fs, len(stim),4)) for i in range(len(stim)): dataAfterGA = createOutputSignal(stim[i], populationSizeList, numberOfMutationsList, dataEEG, numberOfGenes, fs) np.savetxt('stim' + str(stim[i]) + '.txt', np.squeeze(dataAfterGA), delimiter=',')
from GA import GA from prepareData import prepareData from classification import classification fs = 256 seconds = 1 raw_data = prepareData(['Subj1'], fs, seconds) signal14 = raw_data.filtered_data()[:, 0, :] signal28 = raw_data.filtered_data()[:, 5, :] signal8 = raw_data.filtered_data()[:, 10, :] dupa = GA(fs, seconds, signal14, signal8) ref14 = dupa.run() #dupa=GA(fs, seconds, signal28,signal8, signal14) #ref28=dupa.run() dupa = GA(fs, seconds, signal8, signal14) ref8 = dupa.run() wynik = classification( ref14, ref8, signal14, signal8, raw_data.filtered_data()[:, 1:5, :], 0) + classification( ref14, ref8, signal14, signal8, raw_data.filtered_data()[:, 11:, :], 1 ) #+classification(ref14,ref28,ref8,raw_data.filtered_data()[:,11:,:],2) print(wynik, wynik / 8)
from prepareData import prepareData from classificationGA import processing import matplotlib.pyplot as plt numberOfGenes = 256 learningSetSize = 3 listOfStim = (14, 28, 8) electrodeSet = (9, 22, 38) numberOfSteps = 50 numberOfInvids = 120 numberOfMutations = 500 listOfSubjects = ['SUBJ4'] #,'SUBJ4']#,'SUBJ3','SUBJ4'] electrode14 = prepareData(numberOfGenes, learningSetSize, electrodeSet[0], listOfSubjects) electrode22 = prepareData(numberOfGenes, learningSetSize, electrodeSet[1], listOfSubjects) electrode27 = prepareData(numberOfGenes, learningSetSize, electrodeSet[2], listOfSubjects) data14 = electrode14.readDataFromFiles() data22 = electrode22.readDataFromFiles() data27 = electrode27.readDataFromFiles() for i in range(1): oProcess = processing(numberOfGenes, learningSetSize, listOfStim, data14, data22, data27, numberOfSteps, numberOfInvids, numberOfMutations, len(listOfSubjects)) oProcess.crossValidation()