Esempio n. 1
0
def build():
    if not entry1.get() == "":
        checkBinNum()
        #get sturucture file
        pathToStructure = entry1.get() + "\\Structure.txt"
        pathToStructure = pathToStructure.replace('/', '\\')
        try:
            structure = pd.read_csv(pathToStructure, index_col=False, sep='\t')
        except:
            popErrorMessage("Error- Empty Files!")

    #  df_structure=pd.DataFrame(structure)
    # print df_structure
    #get train set
        pathToTrainSet = entry1.get() + "\\train.csv"
        pathToTrainSet = pathToTrainSet.replace('/', '\\')
        try:
            _trainSet = pd.read_csv(pathToTrainSet)
        except:
            popErrorMessage("Error- Empty Files!")

        df_trainSet = pd.DataFrame(_trainSet)
        classifier = Classifier(structure, entry2)
        updateTrainSet, numericFeaturesArr = classifier.cleanData(
            pathToTrainSet, df_trainSet)
        globals()['trainSet'] = updateTrainSet
        #relase Classify botton
        classifyBut.config(state="normal")
        popErrorMessage("Building classifier using train-set is done!")
Esempio n. 2
0
def run_fbk(seq_record):
    repeat_regions = []
    for feat in seq_record.features:
        if feat.type == "CDS":
            scan_seqfeature_translation(seq_record, feat)
            Classifier.check_known_classes(feat)
            find_top_kmer(feat)
            if len(feat.qualifiers["top_kmer_hits"]) > 0:
                build_table(feat)

                expand_table(feat)
                seq_record.annotations["repetitive_sequence_number"] = 0
                if len(
                        feat.qualifiers["table"][0]
                ) > 3:  #TODO replace this with assesment module as bool check
                    seq_record.annotations["repetitive_sequence_number"] += 1
                    if "gene" in feat.qualifiers:
                        repeat_regions.append(feat.qualifiers["gene"])
                    else:
                        repeat_regions.append("unknown region")
                    make_pattern(feat)
                    feat.qualifiers["has_repeat"] = True
                    seq_record.annotations["repeat_regions"] = repeat_regions
                    should_delete = False
                    for t in feat.qualifiers["table"]:
                        if len(set(list(t))) <= 2:
                            should_delete = True
                    if should_delete:
                        feat.qualifiers["table"] = None
                        feat.qualifiers["has_repeat"] = False
                        del (feat.qualifiers["pattern"])
Esempio n. 3
0
 def buildClicked(self):
     attrs = Data.getAttributesDictionary(self.path + "\\Structure.txt")
     trainData = pandas.DataFrame.from_csv(self.path + "\\train.csv", index_col=None)
     processedData = Data(trainData=trainData, attributes=attrs, numOfBins=self.numOfBins)
     self.classifier = Classifier(data=processedData)
     self.classifyButton['state'] = 'normal'
     tkMessageBox.showinfo("Naive Bayes Classifier", "Building classifier using train-set is done!")
Esempio n. 4
0
def saveClf():
    clf = Classifier()
    mass = 125
    clf.loadData("heavyTrainSet_DS_mass{}.npy".format(mass))

    # Extracting features.
    nComps = 50
    print "Extracting features from training data.."
    startExtractTime = time.time()
    percentVarCovered = clf.extractFeatsPCA(nComps)
    print "Original Image Size:", clf.imSet[0].shape
    print "Number of selected principal components:", nComps
    print "Percentage of variance covered:", percentVarCovered
    endExtractTime = time.time()
    extractTime = endExtractTime - startExtractTime
    print "Training data feature extraction time:", extractTime, "sec"
    print

    # Obtain classifier model and print the classification results on
    # training data.
    clf.model.fit(clf.featSet, clf.labelSet)
    predicts = clf.model.predict(clf.featSet)
    print "Classification results on training data (mass = {}):".format(mass)
    getScores(predicts, clf.labelSet, ["Double Chirp", "Not Double Chirp"])

    # Save model
    joblib.dump(clf, "svm_mass{}.joblib".format(mass))
def eval_classifier(G, subs_coo, word_vec):
    #Sometimes the model doesn't predict anything at all for some inputs. Its either the model's fault or that user has no subscriptions at
    #all, in that case the model is predicting properly but of course a zero output would raise exceptions during sklearn's
    #F1 score function.
    #Currently evaluating performance with OVR Logistic Regression.
    print("\t**Evaluating classifier performance with the embeddings**")

    results = Classifier.evaluate(G, subs_coo, word_vec)
    
    print("\n Evaluation completed using the following:")
    for i in results.keys():
        print("--> ",i)
    
    print("\nPrinting evaluation results : ")
    trainsize = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    for (name,res) in results.items():
        print("\n\nClassifier : ",name)
        for (tr_size,res_) in zip(trainsize,res):
            print("\tTraining size : ",tr_size)
            print("\t\tMicro F1: ",res_[0])
            print("\t\tMacro F1: ",res_[1])
        
        avg = np.average(res,axis=0)
        print("\t---------------------------------------")
        print("\t Average Micro F1 : ",avg[0])
        print("\t Average Macro F1 : ",avg[1])
        Classifier.plot_graph(trainsize, res)
Esempio n. 6
0
   def _predictAnnotated(self, input):
      scores = defaultdict(lambda: -sys.maxint)
      actual = input.words[input.index][0]
      best_distance = sys.maxint
      prev_tokens = True
      i = 0
      for word, loc in input.words:
         if word == actual and i != input.index:
            dist = input.index - i
            if abs(dist) < best_distance:
               best_distance = abs(dist)
            prev_tokens = dist >= 0

         if i < input.index:
            scores[word] = max(i, scores[word])
         elif i > input.index:
            scores[word] = max(i - len(input.words), scores[word])
         i += 1 
      
      results = {word:score for word, score in scores.items()
                 if Classifier._matchesBase(input.input.base, word)}
      for w in self.words:
         if len(results) > 50:
            break
         elif Classifier._matchesBase(input.input.base, w) and w not in results.keys():
            results[w] = -sys.maxint

      if prev_tokens:
         r = sorted(results.items(), key=lambda (word,score): score, reverse=True)
         return r
      else:
         r = sorted(results.items(), key=lambda (word,score): score, reverse=False)
         return r
Esempio n. 7
0
def Train(data, epochs, batch_size):

    if use_rnn:
        model = C.GRUClassifier(embedding_dim=data.embedding.shape[1],
                                hidden_dim=hidden_dim,
                                label_size=nlabel,
                                batch_size=batch_size,
                                embedding_weights=data.embedding,
                                bidirectional=BiDirection)
    else:
        model = C.CNNClassifier(DIM_EMB=data.embedding.shape[1],
                                NUM_CLASSES=nlabel,
                                NUM_FILTERS=nfilter,
                                embedding_weights=data.embedding)

    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    loss_function = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        total_loss = 0.0
        for i, batch in enumerate(data.train_iter):
            # feature, label = batch.sentence, batch.label
            (feature, batch_length), label = batch.sentence, batch.label
            optimizer.zero_grad()
            output = model(feature, batch_length)
            # print(output)
            loss = loss_function(output, label)
            total_loss += loss
            loss.backward()
            optimizer.step()
        print(f"loss on epoch {epoch} = {total_loss}")
    return model
Esempio n. 8
0
 def runTest(self):
     Classifier.classify_edits(self.edits)
     predictions = [edit.lexical_entailment for edit in self.edits]
     for prediction, edit in zip(predictions, self.edits):
         if prediction != 3:
             print prediction, edit
     self.assertEqual(predictions, self.target)
 def __getClassifier(self):
     if self.__isClassifierExists():
         cls = self.__loadModel()
     else:
         cls = Classifier()
     cls.SetLogger(self)
     return cls
Esempio n. 10
0
def show(request, filter_id=0):
    """based on the current user's filer, rank the items in source, and show out.
		current one only show the default classifier(last one). Could be extended 
		to show different classifier.
	"""
    errors = []
    c = {}
    c['username'] = request.user.username
    c['filter_id'] = filter_id
    if filter_id == 0:
        user = auth.get_user(request)
        temp = user.profile.default_filter
        if user.profile.default_filter is None:
            errors.append('plase train a filter first!')
            return render_to_response('show.html', {
                'username': request.user.username,
                'errors': errors
            })
        else:
            classifier = Classifier(user=auth.get_user(request),
                                    id=user.profile.default_filter.id)
            classifier.load()
            items = Item.objects.all()
            p_label, p_acc, p_val = classifier.predict(items)
            tempList = map(lambda x, y: [x, y], items, p_val)
            tempList = sorted(tempList, key=itemgetter(1), reverse=True)
            c['items'] = map(lambda x: x[0], tempList[:20])

    return render_to_response('show.html', c)
Esempio n. 11
0
def predict(data, samples, classifier='SVM',
            classification='combined', selectFeatures=('CUK', 10)):
    """
    Learns the data-set with the given classifier and
    gives a prediction for each of the samples.
    """
    if (classification == "trained"):
        classifyTrained = True
        classifySurface = False
    elif (classification == 'surface'):
        classifyTrained = False
        classifySurface = True
    else:
        classifyTrained = True
        classifySurface = True
    if (classifier == "SVM"):
        clf = cl.classifyDataSVM(data, classifyTrained,
                                 classifySurface, selectFeatures,scaling=False)
    elif (classifier == "DT"):
        clf = cl.classifyDataDT(data, classifyTrained,
                                classifySurface, selectFeatures,scaling=False)
    elif (classifier == "KNN"):
        clf = cl.classifyDataKNN(data, classifyTrained,
                                 classifySurface, selectFeatures,scaling=False)
    elif (classifier == "LogReg"):
        clf = cl.classifyDataLR(data, classifyTrained,
                                classifySurface, selectFeatures,scaling=False)
    else:
        print (str(classifier) + " is not a valid option")
        
    [samples, _,_,_] = clf.extractData(samples,scaling=False)
    
    predictions = [clf.predict(s) for s in samples]
    return predictions
Esempio n. 12
0
 def runTest(self):
     Classifier.classify_edits(self.edits)
     predictions = [edit.lexical_entailment for edit in self.edits]
     for prediction, edit in zip(predictions, self.edits):
         if prediction != 3:
             print prediction, edit
     self.assertEqual(predictions, self.target)
Esempio n. 13
0
def main():
    """Main handler for running the experiments.

    Command line args:
        1: The organism to run the experiments on.

    """
    positiveSamples, negativeSamples = SampleParser.parseTrainingSamples()
    print "Number of positive training samples: " + str(len(positiveSamples))
    print "Number of negative training samples: " + str(len(negativeSamples))
    if len(sys.argv) != 2:
        sys.exit("Wrong number of arguments")
    organism = sys.argv[1]
    methods = ('SVM', 'NB')
    pos = []
    neg = []

    for sign in ['pos', 'neg']:
        for i in range(0, len(methods)):
            classifier, count = Classifier.train(positiveSamples,
                                                 negativeSamples, methods[i])
            seqs = SampleParser.readFile(Constants.getTestData(organism, sign))
            if sign == 'pos':
                print "Number of positive testing samples: " + " " + str(
                    len(seqs))
            else:
                print "Number of negative testing samples: " + " " + str(
                    len(seqs))

            predicted = Classifier.predict(classifier, seqs, count)
            incorrect = 0
            correct = 0
            for p in predicted:
                if sign == "pos":
                    if p == 1:
                        correct = correct + 1
                    elif p == 0:
                        incorrect = incorrect + 1
                elif sign == "neg":
                    if p == 0:
                        correct = correct + 1
                    elif p == 1:
                        incorrect = incorrect + 1
            if sign == "pos":
                pos.append(Decimal(correct) / Decimal(correct + incorrect))
            elif sign == "neg":
                neg.append(Decimal(correct) / Decimal(correct + incorrect))

    tot = []
    for i in range(0, len(methods)):
        tot.append((pos[i] + neg[i]) / Decimal(2))

    print "--- SUMMARY ---"
    for i in range(0, len(methods)):
        print "--- RESULTS for " + str(methods[i] + " ---")
        print "Positive: " + str(pos[i])
        print "Negative: " + str(neg[i])
        print "Total: " + str(tot[i])
    ResultPrinter.plot(pos, neg, tot, methods, organism)
    def ShowResultsTab4(self):
        if (not self.listWidget_2.currentItem()):
            return

        fileName = self.listWidget_2.currentItem().text()
        fileURL = self.Tab4DirPath + '/' + fileName
        if (os.path.splitext(fileName)[1] == ".mat"):
            if (self.lineEdit_4.text().__len__() is 0):
                kflod = 5
            else:
                kflod = int(self.lineEdit_4.text())

            if (self.lineEdit_5.text().__len__() is 0):
                numWave = 128
            else:
                numWave = int(self.lineEdit_5.text())

            if (self.lineEdit_6.text().__len__() is 0):
                numLearn = 30
            else:
                numLearn = int(self.lineEdit_6.text())

            if (self.lineEdit_3.text().__len__() is 0):
                iterNum = 5
            else:
                iterNum = int(self.lineEdit_3.text())

            self.textEdit.clear()
            accLDA, self.matLDA = Classifier.LDA(fileURL, iterNum, kflod)

            text = 'LDA,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format(
                kflod, iterNum, accLDA)
            self.textEdit.append(text)

            accSVM, self.matSVM = Classifier.SVM(fileURL, iterNum, kflod)
            text = 'SVM,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format(
                kflod, iterNum, accSVM)
            self.textEdit.append(text)

            accKNN, self.matKNN = Classifier.KNN(fileURL, iterNum, kflod)
            text = 'KNN,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format(
                kflod, iterNum, accKNN)
            self.textEdit.append(text)

            accDT, self.matDT = Classifier.DT(fileURL, iterNum, kflod)
            text = 'DecisionTree,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format(
                kflod, iterNum, accDT)
            self.textEdit.append(text)

            accSDE, self.matSDE = Classifier.SDE(fileURL, kflod, numLearn,
                                                 numWave)
            text = 'SDE,{0}折交叉验证,{1}个弱学习器,子空间维数为{2}时,准确率为:{3}'.format(
                kflod, numLearn, numWave, accSDE)
            self.textEdit.append(text)

        if (os.path.splitext(fileName)[1] == ".xls"):
            pass
        if (os.path.splitext(fileName)[1] == ".csv"):
            pass
Esempio n. 15
0
def PCA_func():
	X, y = load_svmlight_file('./resource/向量化后_带上下文信息_everyline.svmdata')
	print Classifier.trainTestReportRF(X.toarray(),y,0.70)
	for ite in range(1,31):
		print 'ite:'+str(ite)+'\t',
		pca = PCA(n_components=ite)
		newX = pca.fit_transform(X.toarray())
		Classifier.trainTestReportRF(newX,y,0.70)
Esempio n. 16
0
def runOneTrial(books):
    trainingList, testingList = splitBooksIntoTrainingAndTestingSet(books)
    Classifier.train(trainingList)
    
    accuracy = Classifier.test(testingList)
    
    print("\nclassifier accuracy: " + str(accuracy))
    print("\nnumber of books used: " + str( len(books)))
    
    return accuracy
def get_tags():
    Title = request.args.get('title')
    Body = request.args.get('body')
    t = str(Title) + str(Body)
    txt = cl.stemm_stop(cl.clean(t))
    x = cl.TFIDF.transform([txt]).toarray()
    res = cl.lr.predict(x)[0]
    Tags = list(fb.get_ferq_with_txt(txt, [res]))
    data = {'title': Title, 'body': Body, 'tags': Tags}
    return jsonify(data)
Esempio n. 18
0
 def train(self):
     global Models
     name = self.eval_data.name
     if name in Models:
         self.trainer = Models[name]
     else:
         Models[name] = self.trainer
         self.trainer.train(self.train_data, persist=False)
         self.trainer.train_gist(self.train_data, persist=False)
     self.classifier = Classifier(self.trainer)
     return self
Esempio n. 19
0
def Main():
    dataReader = DataReader()
    allUserData = dataReader.loadData(
        "DSL-StrongPasswordData")  #loads all users data

    classifier = Classifier()
    scalar = 1.0
    scalarCap = 1.6

    dimDeviation = 1
    dimCap = 21

    while (dimDeviation < dimCap):
        print "testing dims: " + str(dimDeviation)
        for k in range(0, 50):
            correct_person_accuracy = []
            wrong_person_accuracy = []
            owner_index = k  # index for the user that is to be tested
            first_time = True  # temp variable for checking if first time creating test_data_wrong
            #print "testing for person "+str(k)+" created!"
            for i in range(0, 50):
                userDataRaw = allUserData[i]  #data from 1 user
                userData = dataReader.formatData(
                    userDataRaw
                )  #formats data (strips user and session ids etc), returns Matrix.
                if i == owner_index:
                    np.random.shuffle(
                        userData
                    )  # Shuffle to get data from different sessions
                    person1 = DataCluster(
                        userData[0:300],
                        scalar)  # creates the person to be tested
                    test_data_right = userData[300:]
                    # print test_data_right
                else:
                    if first_time:
                        test_data_wrong = userData
                        first_time = False
                    else:
                        test_data_wrong = np.concatenate(
                            (test_data_wrong, userData), axis=0)
            correct_person_accuracy.append(
                classifier.compare_all(person1, test_data_right, True,
                                       dimDeviation))
            wrong_person_accuracy.append(
                classifier.compare_all(person1, test_data_wrong, False,
                                       dimDeviation))

        print "False recognition rate: " + str(
            1 - np.mean(correct_person_accuracy))
        print "False acceptance rate: " + str(1 -
                                              np.mean(wrong_person_accuracy))
        #       scalar += 0.1
        dimDeviation += 1
Esempio n. 20
0
class CarbonaraBros():
    def __init__(self, relevant_threshold=0.8):
        self.fe = FeaturesExtractor()
        self.relevant_threshold = relevant_threshold
        self.tableClassifier = Classifier('models/table_classifier.h5')
        self.listClassifier = Classifier('models/list_classifier.h5')

    def processDom(self, dom):
        analysis = {
            'table': {
                'relevant': [],
                'not_relevant': [],
            },
            'list': {
                'relevant': [],
                'not_relevant': []
            }
        }

        # table
        for table in dom.xpath("//table"):
            features = self.fe.extract(
                table,
                selected=DefaultFeatures.table_selected,
                features_descriptor=DefaultFeatures.table)
            features_array = self.fe.toArray(features)
            probabilities = self.tableClassifier.classify(features_array)

            score = probabilities[1]
            if score >= self.relevant_threshold:
                analysis['table']['relevant'].append((score, table))
            else:
                analysis['table']['not_relevant'].append((score, table))

        lists = dom.xpath("//ul")
        lists = lists + dom.xpath("//ol")
        lists = lists + dom.xpath("//dl")

        for list in lists:
            features = self.fe.extract(
                list,
                selected=DefaultFeatures.list_selected,
                features_descriptor=DefaultFeatures.list)
            features_array = self.fe.toArray(features)
            probabilities = self.listClassifier.classify(features_array)
            score = probabilities[1]

            if score >= self.relevant_threshold:
                analysis['list']['relevant'].append((score, list))
            else:
                analysis['list']['not_relevant'].append((score, list))

        return analysis
Esempio n. 21
0
def testPCAFit():
    dat = np.load("heavyTrainSet_noDS.npy")
    # dat = dat[: 600]

    # clf = Classifier(svm.SVR(kernel="linear", gamma="auto"))
    clf = Classifier(svm.SVC(kernel="linear", gamma="auto", probability=True))

    # Loading data
    print "Loading training data.."
    clf.imSet, clf.labelSet = DataFactory.getTrainableArrays(dat)

    # Extracting features
    ncomps = 30
    print "Extracting features from training data.."
    startExtractTime = time.time()
    percentVarCovered = clf.extractFeatsPCA(ncomps)
    endExtractTime = time.time()
    extractTime = endExtractTime - startExtractTime
    print "Original Image Size:", clf.imSet[0].shape
    print "Number of selected principal components:", ncomps
    print "Percentage of variance covered:", percentVarCovered
    print "Training data feature extraction time:", extractTime, "sec"
    print

    numIns = len(clf.featSet)
    shuffIndices = range(numIns)
    # np.random.shuffle(shuffIndices)
    shuffFeats = clf.featSet[shuffIndices]
    shuffLabels = clf.labelSet[shuffIndices]
    confMat = np.array([[0, 0], [0, 0]])

    print "Start training.."
    clf.model.fit(shuffFeats, shuffLabels)
    print "Start predicting.."
    probs = clf.model.predict_proba(shuffFeats)
    assert probs.shape == (numIns, 2)

    for i, prob in enumerate(probs):
        if dat[shuffIndices[i]].hasDoubleChirp:
            if prob[0] > 0.5:
                confMat[0, 0] += 1
            else:
                confMat[0, 1] += 1
        else:
            if prob[0] <= 0.5:
                confMat[1, 1] += 1
            else:
                confMat[1, 0] += 1

    print "Training accuracy:", 1.0 * (confMat[0, 0] + confMat[1, 1]) / numIns
    print "Total number of fails:", confMat[0, 1] + confMat[1, 0]
    print "Confusion Matrix"
    printConfMat(confMat, ["DoubleChirp", "NotDoubleChirp"])
Esempio n. 22
0
 def __init__(self, config):
     self.config = self._Parameters(config)
     if self.config.classifier_method == 'svc':
         self.classifier = Classifier.SVC(config)
     elif self.config.classifier_method == 'sgd':
         self.classifier = Classifier.SGD(config)
     elif self.config.classifier_method == 'random forest':
         self.classifier = Classifier.RandomForest(config)
     elif self.config.classifier_method == 'bagging svc':
         self.classifier = Classifier.BaggingSVC(config)
     elif self.config.classifier_method == 'logistic regression':
         self.classifier = Classifier.LogisticRegression(config)
Esempio n. 23
0
def build_handler():

    try:
        #setNumOfBins()
        global numOfIntervals
        toCheck = e2.get()
        if toCheck == "":
            showinfo(
                "Naive Bayes Classifier",
                "Please insert an integer for the Discretization bins attribute"
            )
            return
        numOfIntervals = int(toCheck)
    except:
        showinfo("Naive Bayes Classifier",
                 "Discretization bins must be an integer")
        return
    if numOfIntervals < 2:
        showinfo("Naive Bayes Classifier",
                 "Discretization bins must be at least 2")
        return

    if os.stat(pathToStructure).st_size == 0:
        showinfo("Naive Bayes Classifier",
                 "The file Structure.txt is empty. Please load valid files")
        return
    structure_file = open(pathToStructure, "r")
    try:
        dfTrain = pd.read_csv(pathToTrain)
    except Exception as e:
        if e.__str__() == "No columns to parse from file":
            showinfo("Naive Bayes Classifier",
                     "The file train.csv is empty. Please load valid files")
        else:
            showinfo("Naive Bayes Classifier",
                     "The file train.csv has errors. Please load valid files")
    totalNumOfRecords_train = dfTrain.shape[0]  # num of records
    if numOfIntervals > totalNumOfRecords_train:
        showinfo(
            "Naive Bayes Classifier",
            "Discretization bins must not be grater than the number of train set records"
        )
        return
    global dfTrainFinal
    dfTrainFinal = pp.preProcess(structure_file, dfTrain, numOfIntervals)
    structure_file = open(pathToStructure, "r")
    attribute_values_dict = pp.set_attribute_values_dict(structure_file)

    cl.prepareModel(dfTrainFinal, pathToStructure, numOfIntervals,
                    attribute_values_dict)
    classify_Button.config(state='normal')
    showinfo("Naive Bayes Classifier",
             "Building classifier using train-set is done!")
Esempio n. 24
0
 def runTest(self):
     Classifier.classify_edits(self.edits)
     predictions = [edit.lexical_entailment for edit in self.edits]
     num_incorrect = 0
     for prediction, edit in zip(predictions, self.edits):
         if prediction != 2:
             num_incorrect += 1
             print 'Predicted: %s, target: 2' % prediction
             print edit
             print num_incorrect, len(predictions)
     print '%s percent correct' % (
         (len(predictions) - num_incorrect) / len(predictions) * 100)
     self.assertEqual(predictions, self.target)
Esempio n. 25
0
 def runTest(self):
     Classifier.classify_edits(self.edits)
     predictions = [edit.lexical_entailment for edit in self.edits]
     num_incorrect = 0
     for prediction, edit in zip(predictions, self.edits):
         if prediction != 1:
             num_incorrect += 1
             print 'Predicted: %s, target: 1' % prediction
             print edit
             print num_incorrect, len(predictions)
     print '%s percent correct' % (
         (len(predictions) - num_incorrect) / len(predictions) * 100)
     self.assertEqual(predictions, self.target)
Esempio n. 26
0
def run(pdfpath, toPath):
	if not os.path.exists(r'./RandomForestScikitModel'):
		Classifier.outputModel()
	title, authorInfo, header , predictLabel = PdfProcessor.run(pdfpath)
	dicSet = []
	output = ''
	output += '[Title]:' + title + '\n\n'
	for author in authorInfo:
		output += author.toString() + '\n'
		dicSet.append(author.toDic())
	print output
	open(toPath+'/'+os.path.split(pdfpath)[1].replace('.pdf','.txt'), 'w').writelines(output)
	return title, dicSet, header, predictLabel
Esempio n. 27
0
 def __init__(self):
     super(VehicleDetector, self).__init__()
     # Sliding windows
     self.yStart = 400
     self.yStop = 650
     self.x_overlap = 0.65
     self.y_overlap = 0.75
     # Filter
     self.filterThreshold = 2
     self.filter = F.Filter(self.filterThreshold)
     # Print summary to check correct parameters
     self.Summary()
     # Sub-components
     self.renderer = R.Renderer()
     self.database = D.Database()
     cars, notcars = self.database.GetListOfImages()
     self.classifier = C.Classifier(cars,
                                    notcars,
                                    loadFromFile=True,
                                    database=self.database)
     # Output video parameters
     self.outputToImages = 0
     self.outputVideoName = self.database.GetOutputVideoPath()
     # Train classifier ?
     self.trainClassifier = 1
     # TODO: implement the loading
     # Bounding boxes
     self.bboxes = self.LoadSlidingWindows()
def test():
    FilePathReadStr = 'Data/Classified/'
    FilePathWriteStr = 'Data/DataProcessing/'
    TextList = Filter(FilePathReadStr + 'Boston.txt')  # 过滤
    CreateDir(FilePathWriteStr + 'Boston/')
    Classifier.WriteFileLine(FilePathWriteStr + 'Boston/' + 'FilterData.txt',
                             TextList, 'w')  # 文件结构建立重新写
Esempio n. 29
0
def main():
    print('---正在读取数据并降维---')
    data = np.empty([110, 10000], np.float32)
    for idx in range(110):
        image = Image.open('Data/s' + str(idx + 1) + '.bmp')
        data[idx] = np.reshape(image, [10000])
    file = open('Data/labels.txt')
    label = np.array(file.readline().strip('\n').split(','), np.int32)
    '''
    算法的调用
    '''
    data_reduced = mds_func(data)
    # data_reduced = isomap_func(data)
    # data_reduced = le_func(data)
    # data_reduced = lle_func(data)

    classifier = Classifier.Classifier(20)
    for repeat in range(500):
        for idx in range(110):
            if idx % 11 != 0:
                classifier.fit(data_reduced[idx], label[idx])
        sys.stdout.write('\r正在训练,已完成 %.1f%%' % (repeat * 100 / 500))
    sys.stdout.write('\r训练完毕,下面开始测试\n')
    correct_times = 0
    for idx in range(10):
        val = classifier.classify(data_reduced[idx * 11])
        print('第 %2d 次预测值:%d,真实值:%d' % (idx + 1, val, label[idx * 11]))
        if val == label[idx * 11]:
            correct_times += 1
    print('测试完毕,准确率:%.2f%%' % (correct_times * 100 / 10))
Esempio n. 30
0
 def __init__(self):
     super(Test, self).__init__()
     self.database = D.Database()
     cars, notcars = self.database.GetListOfImages()
     self.classifier = C.Classifier(cars, notcars, loadFromFile=True, database=self.database)
     self.renderer = R.Renderer()
     self.vehicleDetector = V.VehicleDetector()
Esempio n. 31
0
def check_success(userid):
    data = {}
    try:
        print("check" + str(userid))
        ticketId = request.form['id']
        details = request.form['details']
        timestamp = request.form['timestamp']
        print(id)
        print(details)
        print(timestamp)
        # initialize list of lists
        newdata = [[details]]

        # Create the pandas DataFrame
        df = pd.DataFrame(newdata, columns=['sentence'])

        modelFilePath = uploadDir + '//' + str(userid) + '//' + str(
            userid) + ".joblib"
        featuresFilePath = uploadDir + '//' + str(userid) + '/features.pkl'
        dataFilePath = uploadDir + '//' + str(userid) + '//' + str(
            userid) + ".data"
        df.to_csv(dataFilePath, index=False)
        predictedPriority = classifier.check(featuresFilePath, modelFilePath,
                                             dataFilePath)
        print(predictedPriority)
        data['message'] = {}
        data['message']['id'] = ticketId
        data['message']['priority'] = int(predictedPriority)
    except Exception as ex:
        print(ex)
        template = "An exception of type {0} occurred. Arguments:\n{1!r}"
        message = template.format(type(ex).__name__, ex.args)
        data['error'] = "Exception occured"
    #return render_template("success.html", name = "123")
    return jsonify(data)
Esempio n. 32
0
def classifier():
    if request.method == 'POST':
        data = request.data
        dataDict = json.loads(data_test)
        text = dataDict.get('text')
    elif request.method == 'GET':
        text = request.args.get('text')

    textToSend = []
    textToSend.append(text)
    """CREO IL BANCH"""
    dataToTest = Bunch(data=textToSend, filenames="", target="")
    text_embedded = doc2vec.infer_vector(textToSend[0].split())

    categoriesSimple = Classifier.start(neuralNetworks, dictionaries,
                                        normalizers, dataToTest, 10, "root",
                                        path, text_embedded)

    json_simple = json.dumps(categoriesSimple)
    body = "{\"simple\":" + json_simple + "}"
    response = Response(body, mimetype='application/json')
    response.headers.add('content-length', str(len(body)))
    response.headers.add('Access-Control-Allow-Origin', '*')

    return response
Esempio n. 33
0
 def build(self):
     #try:
         self.train = pd.read_csv(self.entryPath.get() + "/train.csv")
         if self.validate(self.entryDiscBins.get()):
             # load train file, test file and structure file
             if (os.path.getsize(self.entryPath.get() + "/Structure.txt") == 0):
                 raise Exception("The structure file is empty")
             self.structureFile = open(self.entryPath.get() + "/Structure.txt")
             self.fileHandler = FilesHandler()
             self.structureDic = self.fileHandler.createStstructureDic(self.structureFile)
             self.dataCleaner = DataCleaner(self.structureDic, self.numOfBins)
             self.toLowerCase("train")
             self.train = self.dataCleaner.trainCleaning(self.train)
             self.classifier = Classifier(self.train, self.entryPath.get(), self.structureDic, self.numOfBins)
             self.wasBuilt = True
             tkMessageBox.showinfo("Build Message", "Building classifier using train-set is done!")
Esempio n. 34
0
def SVMDraw(data, clf, histogram):
    fig = plt.figure()
    ax = Axes3D(fig)
    labels = Classifier.getLabels(clf, data)
    Xs, Ys, Zs, sizes, colors = Utils.getDrawInfo(data, histogram, labels)
    scatter = ax.scatter(Xs, Ys, Zs, s=sizes, c=colors)
    plt.show()
Esempio n. 35
0
def train(config):
    c = Classifier.classifier(config)
    dataroot = config.get("train", "dataroot")
    dataset = config.get("train", "dataset")
    dw = sutils.dataset_walker(dataset=dataset, dataroot=dataroot, labels=True)
    c.train(dw)
    c.save(config.get("train", "output"))
Esempio n. 36
0
def classify():
    global data
    classifier = Classifier(test=data[3],
                            structure=data[0],
                            train=data[1],
                            meta_data=data[2],
                            bins=bins_num)
    output = open(filename + "/output.txt", "a")
    i = 1
    for classification in classifier.classify():
        output.write(str(i) + " " + str(classification) + "\n")
        i += 1
    output.close()
    messagebox.showinfo("Naive Bayes Classifier", "Classification is done!")
    root.destroy()
    sys.exit(0)
def main():
    CityFileNameStr = 'Data/City/City.txt'
    FilePathReadStr = 'Data/Classified/'
    FilePathWriteStr = 'Data/DataProcessing/'

    CityList = (Classifier.CreateCityDict(CityFileNameStr)).keys()  #创建城市列表

    for i in CityList:  #进行过滤和文件重新整理
        try:
            TextList = Filter(FilePathReadStr + i + '.txt')  #过滤
            CreateDir(FilePathWriteStr + i + '/')
            Classifier.WriteFileLine(FilePathWriteStr + i + '/' +
                                     'FilterData.txt', TextList,
                                     'w')  #文件结构建立重新写入
        except Exception as e:
            print str(e) + '\t' + i
Esempio n. 38
0
def predict (scalers, classifiers, scores, info, output) :
    global buf

    info ("start predict")

    shcmd = "arecord -t raw -c 2 -r 2000 -f S16_LE - 2>/dev/null"
    proc = subprocess.Popen (shcmd, stdout = subprocess.PIPE, shell = True)
    read_thread = readdataThread (proc.stdout)
    read_thread.daemon = True
    read_thread.start ()

    count = 0
    p = [0] * Classifier.NUM_OF_LABELS
    while True :
        if len (buf) >= Classifier.WINDOW_SIZE * 4 :
            data1, data2 = getdata (buf[-Classifier.WINDOW_SIZE * 4:])
            buf = buf[-(Classifier.WINDOW_SIZE - 50) * 4:]

            X = extract_feature(data1, data2)
            tp = Classifier.multi_classification([X], scalers, classifiers, scores)[0]

            p[tp] += 1
            count += 1
            if count >= 5 :
                maj = p.index (max (p))
                count = 0
                p = [0] * Classifier.NUM_OF_LABELS
                output (maj)
                sys.stdout.flush ()

    read_thread.join ()
Esempio n. 39
0
def evalKNN(data, classifyTrained, classifySurface, selectFeatures):
    classifier = cl.classifyDataKNN(data, classifyTrained,
                                    classifySurface, selectFeatures)
    classifier.crossValidation()
#     classifier.showKNeighborsGraph()
    if (plotSurfaces):
        classifier.plotDecisionSurface()
def calc(datasetIndex, multiplierInt):
    csv = pd.DataFrame(columns=['dataset', 'bins', 'f1', 'zero-one'])
    exp = ((multiplierInt + 1) / 2)
    bins = math.ceil(2**exp)
    results = []
    for k in range(trials):
        dp = DataProcessor.DataProcessor(bin_count=bins)
        binnedDataset = dp.StartProcess(datasets[datasetIndex])
        N, Q, F, testData = train(binnedDataset)

        model = Classifier.Classifier(N, Q, F)
        classifiedData = model.classify(testData)

        stats = Results.Results()
        zeroOne = stats.ZeroOneLoss(classifiedData)
        macroF1Average = stats.statsSummary(classifiedData)
        datapoint = {
            'dataset': dataset_names[datasetIndex],
            'bins': bins,
            'f1': macroF1Average,
            'zero-one': zeroOne / 100
        }
        print(datapoint)
        csv = csv.append(datapoint, ignore_index=True)
        # trial = {"zeroOne": zeroOne, "F1": macroF1Average}
        # results.append(trial)
        # print(trial)
    data.append(csv)
Esempio n. 41
0
def getNbestTreeFeaturesPos(data, n,  Klassifizierer="Forest"):
    end = len(data[1,:])
    xlf, X_train, X_test, y_train, y_test = Classifier.classify(data,range(2,end),classifier=Klassifizierer)
    z = (xlf.feature_importances_)
    z= np.array(z)
    k = z.argsort()[-n:][::-1]
    newlist = [x+2 for x in k]
    return newlist
Esempio n. 42
0
def test(testText, dictionary):
    classCntTxt = FileIO.readFile('corpus\classCount.txt');
    classCnt = {};
    for line in classCntTxt.split('\n'):
        statistics = line.split('\t');
        if len(statistics) > 1:
            classCnt[statistics[0]] = statistics[1];
    return Classifier.naiveBayes(dictionary, classCnt, testText);
Esempio n. 43
0
def evalDT(data, classifyTrained, classifySurface, selectFeatures):
    classifier = cl.classifyDataDT(data, classifyTrained,
                                   classifySurface, selectFeatures)
    classifier.crossValidation()
#     classifier.showFeatureImportances()
    classifier.createTreePdf()
    if (plotSurfaces):
        classifier.plotDecisionSurface()
Esempio n. 44
0
def evalSVM(data, classifyTrained, classifySurface, selectFeatures):
    classifier = cl.classifyDataSVM(data, classifyTrained,
                                    classifySurface, selectFeatures)
    classifier.crossValidation()
#     classifier.showProperties()
#     classifier.showSupportVectors()
#     classifier.showSelectedFeatures()
    if (plotSurfaces):
        classifier.plotDecisionSurface()
Esempio n. 45
0
	def predict(self):
		files = os.listdir(DIR_TO_PREDICT)
		result = []
		for f in files:
			it = clock()
			c_class = Classifier._predict(DIR_TO_PREDICT+f, 1, PROTOTYPE, MODEL, FUNCTIONS)
			result.append(report(f, c_class, clock()-it))
			
		return result
Esempio n. 46
0
 def train(self):
     global Models
     name = self.eval_data.name
     if name in Models:
         self.trainer = Models[name]
     else:
         Models[name] = self.trainer.train(self.train_data, persist=False)
     self.classifier = Classifier(self.trainer)
     return self
Esempio n. 47
0
	def predict(self):
		files = os.listdir(DIR_TO_PREDICT)
		result = []
		for f in files:
			it = clock()
			c_class = Classifier._predict(DIR_TO_PREDICT+f, 1, "prototypes.trained", "model_w2v.mm", "ffunctions.mm")
			result.append(report(f, c_class, clock()-it))
			
		return result
def trainCascadeClassifier(maxFPR=Util.DEFAULT_MAX_FPR,
                           minDR=Util.DEFAULT_MIN_DR,
                           targetFPR=Util.DEFAULT_TARGET_FPR,
                           posSampleNum=200, posSamplePath=Util.DEFAULT_FACE_IMAGE_PATH_PREFIX,
                           negSampleNum=400, negSamplePath=Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX,
                           partitionPercentage=Util.DEFAULT_PARTITION_PERCENTAGE,
                           jsonFile=Util.DEFAULT_JSON_FILE):

    trainPosSetSize = int(posSampleNum * partitionPercentage)
    trainNegSetSize = int(negSampleNum * partitionPercentage)
    validPosSetSize = posSampleNum - trainPosSetSize
    validNegSetSize = negSampleNum - trainNegSetSize

    if not (posSamplePath == Util.DEFAULT_FACE_IMAGE_PATH_PREFIX):
        Util.DEFAULT_FACE_IMAGE_PATH_PREFIX = posSamplePath

    if not (negSamplePath == Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX):
        Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX = negSamplePath

    sampleImageBundle = Util.getSampleImageSet(posSampleNum,
                                               posSamplePath,
                                               negSampleNum,
                                               negSamplePath,
                                               partitionPercentage)
    # sampleImageBundle = tuple([tuple([posTrainSampleSet, negTrainSampleSet]),
    #                            tuple([posValidSampleSet, negValidSampleSet])])
    if not sampleImageBundle:
        print 'Reduce the number of sample images, you don\'t so many data.'
        return None

    sampleSizeBundle = tuple([tuple([trainPosSetSize, trainNegSetSize]),
                              tuple([validPosSetSize, validNegSetSize])])

    print '===== start constructing the cascade classifier and json file ====='

    cascadeClassifier = Classifier.getCascadeClassifier(maxFPR,
                                                        minDR,
                                                        targetFPR,
                                                        sampleImageBundle,
                                                        sampleSizeBundle)

    jsonCascadeClassifier = cascadeClassifier.jsonEncode()
    with open(jsonFile, 'w') as outputJsonFile:
        json.dump(jsonCascadeClassifier, outputJsonFile)

    print '===== complete the training of cascade classifier ====='
    print '===== please check the output file json file ', jsonFile, ' ====='

    return jsonCascadeClassifier
Esempio n. 49
0
def train(rawdata1, rawdata2, y, info):
    info ("start training")
    X = []
    X1 = rawdata1
    X2 = rawdata2
    y_2 = []
    for yi, x1, x2 in zip(y, X1, X2):
        for i in range(500, 1500, Classifier.WINDOW_SHIFT_TRAIN):
            X.append( extract_feature(
                        x1[i: i+Classifier.WINDOW_SIZE],
                        x2[i: i+Classifier.WINDOW_SIZE]) )
            y_2.append( yi )
    y = y_2
    scalers, classifiers, scores = Classifier.gen_model(X, y, verbose=False)
    info ("finish training")
    return scalers, classifiers, scores
Esempio n. 50
0
	def __init__(self,X,y,ratio):
		len = int(ratio*X.shape[0])
		self.X = X[:len]
		self.y = y[:len]
		self.X_testT = X[len:]
		self.y_testT = y[len:]
		
		self.X_test = X[len:]
		self.y_test = y[len:]
		
		#self.X_test = X[:len]
		#self.y_test = y[:len]
		
		self.sums=np.zeros(self.y_test.shape)
		self.W=np.ones((self.X_test.shape[0],1)).flatten(1)/self.X_test.shape[0]
		
		self.M = 20
		self.G={}
		for i in range(0,self.M):
			self.G[i] = Classifier.randomSampleRandomAlgorithmForWeakClf(self.X, self.y, 0.75)
Esempio n. 51
0
	def train(self):
		return Classifier._fit("./toTrain/Malw", "./toTrain/NoMalw", "prototypes.trained", "model_w2v.mm", "ffunctions.mm")
Esempio n. 52
0
 def runTest(self):
     Classifier.classify_edits(self.edits)
     predictions = [edit.lexical_entailment for edit in self.edits]
     print 'Edit:\n%s\nTarget: %s\nPrediction: %s' % (
         self.edits[0], self.target[0], predictions[0])
     self.assertEqual(predictions[0], self.target[0])
Esempio n. 53
0
	def train(self):
		return Classifier._fit(MALW_PATH, NOMALW_PATH, PROTOTYPE, MODEL, FUNCTIONS)
Time:  %d
""" % (fname,cl,t)

if __name__ == "__main__":
	logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
	system('cls' if name == 'nt' else 'clear')
	header()
	if len(argv)<2: usage(); exit()
	if argv[1]=="--train":
		if len(argv)!=7: usage(); exit()
		train_malware_path     = argv[2]
		train_non_malware_path = argv[3]
		fprototypes = argv[4]
		f2wvmodel 	= argv[5]
		ffmodel		= argv[6]
		Classifier._fit(train_malware_path,train_non_malware_path,True,fprototypes,f2wvmodel,ffmodel)
	
	elif argv[1]=="--predict":
		if len(argv)!=7: usage(); exit()
		exe_file,k,path_prototypes,path_w2v_model,path_f_model = argv[2],int(argv[3]),argv[4],argv[5],argv[6]
		it = clock()
		c_class = Classifier._predict(exe_file,k,path_prototypes,path_w2v_model,path_f_model)
		report(exe_file,c_class,clock()-it)
			
	elif argv[1]=="--statistics":
		if len(argv)!=6: usage(); exit()
		type_statistics        = argv[2]
		k					   = int(argv[3])
		train_malware_path     = argv[4]
		train_non_malware_path = argv[5]
		if type_statistics=="-lou": Statistics._leaving_one_out(train_malware_path,train_non_malware_path,k)
Esempio n. 55
0
        i = i + 1
        
list_models = []

print "Loading models"
#Load the models
for j in range(len(name_models)):
     #For the moment don't put True is there are more that 2 models in Ubuntu
     gm = loadModel(name_models[j],th[j],False)
     list_models.append(gm)


print "Calculating weigths"

#Used to calculate the weights
v0 = Classifier()

for j in range(len(name_models)):
    print "\nFor model " + name_models[j] + ":"
    w_g, w_b = v0.calculateW(list_files[j],list_models[j])
    list_models[j].addWeight("gravity",w_g)
    list_models[j].addWeight("body",w_b)
    

print "\n Init classifers"

l_class = []

for j in range(len(name_models)):
     l_class.append(Classifier())
Esempio n. 56
0
import re
drname="handshape"
from Classifier import *
import sys
sys.path.insert(0,'/home/lzz/project/project/lstm/')

import lstm.RNN_with_gating

#import whole_network,whole_level_network

if __name__ == '__main__':
    #caffedl=caffeDL('/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/proto/lenet_test.prototxt','/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/model/lenet_iter_5000.caffemodel')
    caffedl=caffeDL('/home/lzz/caffe/caffe-master/examples/imagenet/train_val_16_py.prototxt','/home/lzz/caffe/caffe-master/examples/imagenet/model/4096_iter_10000.caffemodel')
    #caffedlInter=caffeDL('/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/proto_inter/lenet_test.prototxt','/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/model/lenet__iter_400.caffemodel')
    caffedlInter=caffeDL('/home/lzz/caffe/caffe-master/examples/imagenet/intermodel/train_val_inter.prototxt','/home/lzz/caffe/caffe-master/examples/imagenet/intermodel/24inter_iter_300.caffemodel')
    classifier = Classifier()
    #pathTotal='/media/lzz/HD1/1Michael/split/301-610new/'
    #pathTotal='/media/lzz/HD1/1Michael/split/791-1000/'
    #pathTotal='/media/lzz/HD1/1Michael/split/1-23/'
    #pathTotal='/media/lzz/HD1/1Michael/split/new/'
    #pathTotal='/media/lzz/HD1/1Michael/split/new/301-610new/'
    #pathTotal='/media/lzz/HD1/1Michael/split/new/1-250/'
    #pathTotal='/media/lzz/HD1/1Michael/split/new/1-250/Aaron 1-180/'
    #pathTotal='/media/lzz/Data1/michael/301-400/'
    #pathTotal="/home/lzz/hand/"
    #pathTotal='/media/lzz/HD1/real/'
    #pathTotal='/home/lzz/sign/data/'
    #pathTotal='/media/lzz/Data1/kinect/'
    #pathTotal='/home/lzz//sign/data/'

Esempio n. 57
0
                        dist2+=math.pow(handdicboth[p][i]-self.dic[path].handhog[i],2)
                    if dist1+dist2*lamda<dist:
                        dist=dist1+dist2*lamda
                        prediction=self.dic[p].wordName
                if self.dic[path].wordName==prediction:
                    correct+=1
                else:
                    wrong+=1
        accuracy=float(correct)/(float(correct+wrong))
        print accuracy
    def dtwfeature(self):
        for path in self.filelist:
            print path
            self.dic[path].dtwfeature()
if __name__ == '__main__':
    classifier = Classifier()
    dataset='our'

    trainname={}
    testname={}

    if dataset=='devisign':
        pathTotal='/media/lzz/Data1/devisign/'
        #pathTotal='/media/lzz/Data1/own/'
        #pathTotal='/home/lzz/sign/data1/'
        trainname['P08']=0
        trainname['P02']=0
        trainname['P01']=0
        trainname['P07']=0
        #trainname['P01']=0
        testname['P03']=0
def runClassifier(params, settings, fold):    
    classifier.printParameters('Model Parameters',params)
    classifier.printParameters('Classifier Settings',settings)
    
    populationsInput = list()
    populationsNoiseSource = list()
    populationsRN = list()
    populationsPN = list()
    populationsAN = list()
    projectionsPNAN = list() #keep handle to these for saving learnt weights
    
    if settings['LEARNING']:
        totalSimulationTime = float(settings['OBSERVATION_EXPOSURE_TIME_MS'] * 
                                settings['NUM_OBSERVATIONS'])
    else:
        totalSimulationTime = float(settings['OBSERVATION_EXPOSURE_TIME_MS'] *
                                settings['NUM_OBSERVATIONS_TEST'])
        
    print 'Total Simulation Time will be', totalSimulationTime
    
    DT = 1.0 #ms Integration timestep for simulation
    
    classifier.setupModel(params, settings, DT, totalSimulationTime, 
                          populationsInput, populationsNoiseSource,
                          populationsRN,populationsPN,populationsAN,projectionsPNAN)
    
    utils.recordPopulations(populationsInput,settings['RECORD_POP_INPUT'])
    utils.recordPopulations(populationsNoiseSource,settings['RECORD_POP_NOISE_SOURCE'])
    utils.recordPopulations(populationsRN,settings['RECORD_POP_RN'])
    utils.recordPopulations(populationsPN,settings['RECORD_POP_PN'])
    utils.recordPopulations(populationsAN,settings['RECORD_POP_AN'])
    
    #run the model for the whole learning or the whole testing period
    classifier.run(totalSimulationTime)
    
    fig1 = plt.figure(figsize=(20,20))
    plt.xlabel('Time[ms]', fontsize = 16)
    plt.ylabel('Neurons', fontsize = 16)
    title = 'Testing'
    if settings['LEARNING']:
        title = 'Training'
    title = title + ' - Odour Classification - ' + str(params['NUM_VR']) + \
                                                    ' Virtual Receptors'
    fig1.suptitle(title, fontsize = 18)
    
    indexOffset = 0
    indexOffset = 1 + utils.plotAllSpikes(populationsInput,
                            totalSimulationTime, indexOffset,
                            settings['RECORD_POP_INPUT'])
                            
    indexOffset = 1 + utils.plotAllSpikes(populationsNoiseSource,
                            totalSimulationTime, indexOffset,
                            settings['RECORD_POP_NOISE_SOURCE'])
                            
    indexOffset = 1 + utils.plotAllSpikes(populationsRN,
                                          totalSimulationTime,
                                          indexOffset,settings['RECORD_POP_RN'])
                                          
    indexOffset = 1 + utils.plotAllSpikes(populationsPN,
                                          totalSimulationTime,
                                          indexOffset,settings['RECORD_POP_PN'])
                                          
    indexOffset = 1 + utils.plotAllSpikes(populationsAN,
                                          totalSimulationTime,
                                          indexOffset,settings['RECORD_POP_AN'])
                                          
    
        
    filename = 'RasterPlot-Testing-fold' + str(fold)+'.pdf'
    if settings['LEARNING']:
        filename = 'RasterPlot-Training-fold' + str(fold)+'.pdf'
    plt.savefig(filename)
    plt.close()
    
    
    (fig2, (ax1, ax2, ax3)) = plt.subplots(3, 1, figsize=(20,20), sharex=True)
    plt.axes(ax1)
    utils.plotAllSpikes(populationsRN,totalSimulationTime,0, settings['RECORD_POP_RN'])
    plt.axes(ax2)
    utils.plotAllSpikes(populationsPN,totalSimulationTime,0, settings['RECORD_POP_PN'])
    plt.axes(ax3)
    utils.plotAllSpikes(populationsAN,totalSimulationTime,0, settings['RECORD_POP_AN'])
    ax1.set_title('RN layer spikes', fontsize = 30)
    ax2.set_title('PN layer spikes', fontsize = 30)
    ax3.set_title('AN layer spikes', fontsize = 30)
    ax3.set_xlabel('Simulation time[ms]', fontsize = 30)
    ax3.set_ylabel('Neuron indices', fontsize = 30)
    ax3.tick_params(labelsize=20)
    ax2.tick_params(labelsize=20)
    ax1.tick_params(labelsize=20)



    
    filename = 'Separated_RasterPlot-Testing-fold' + str(fold)+'.pdf'
    if settings['LEARNING']:
        filename = 'Separated_RasterPlot-Training-fold' + str(fold)+'.pdf'
    plt.savefig(filename)
    plt.close()
                                              
#        fig.add_subplot(2,1,2)
#        utils.plotAllSpikes(populationsAN,totalSimulationTime, 0, settings['RECORD_POP_AN'])
    
    #if in the learning stage
    if settings['LEARNING']:
        #store the weight values learnt via plasticity, these will be reloaded as 
        #static weights for test stage
        classLabels = utils.loadListFromCsvFile(settings['CLASS_LABELS_TRAIN'],True)
        classifier.saveLearntWeightsPNAN(settings, params, projectionsPNAN,
                                         len(populationsPN),len(populationsAN))
        winningClassesByObservation, winningSpikeCounts = classifier.calculateWinnersAN(settings,populationsAN, classLabels)
        scorePercent = classifier.calculateScore(winningClassesByObservation,classLabels)
    
                           
    else:
        #save the AN layer spike data from the testing run.
        #This data will be interrogated to find the winning class (most active AN pop)
        #during the presentation of each test observation
        #classifier.saveSpikesAN(settings,populationsAN)
        classLabels = utils.loadListFromCsvFile(settings['CLASS_LABELS_TEST'],True)
        winningClassesByObservation, winningSpikeCounts = classifier.calculateWinnersAN(settings,populationsAN, classLabels)
        scorePercent = classifier.calculateScore(winningClassesByObservation, classLabels)
        utils.saveListAsCsvFile(winningClassesByObservation,settings['CLASSIFICATION_RESULTS_PATH'])
        utils.saveListAsCsvFile(winningSpikeCounts,settings['SPIKE_COUNT_RESULTS_PATH'])

    classifier.end()
    
    #write a marker file to allow invoking programs to know that the Python/Pynn run completed
    utils.saveListToFile(['Pynn Run complete'],settings['RUN_COMPLETE_FILE'])
    
    print 'PyNN run completed.'
    return scorePercent  
            else:
                wrong+=1
        print correct,wrong
        accuracy=float(correct)/float(correct+wrong)
        print accuracy

    def constructTrajectoryMicrosoft(self):
        for path in self.filelist:
            frames=self.dic[path].framelist
            self.dic[path].consTrajectoryYin(frames,'microsoft')
    def combineFeatureMicrosoft(self):
        for path in self.filelist:
            self.dic[path].combineFeatureMicrosoft()
#Microsoft
if __name__ == '__main__':
    classifier = Classifier()
    dataset='our'

    trainname={}
    testname={}

    if dataset=='devisign':
        #pathTotal='/media/lzz/Data1/devisign/'
        #pathTotal='/media/lzz/Data1/own/'
        pathTotal='/home/lzz/sign/data0/'
        trainname['P08']=0
        trainname['P02']=0
        trainname['P01']=0
        trainname['P07']=0
        #trainname['P01']=0
        testname['P03']=0
Esempio n. 60
0
def evalLR(data, classifyTrained, classifySurface, selectFeatures):
    classifier = cl.classifyDataLR(data, classifyTrained,
                                   classifySurface, selectFeatures)
    classifier.crossValidation()
    if (plotSurfaces):
        classifier.plotDecisionSurface()