Beispiel #1
0
def kFoldCrossValidation(k,dataset):
    x = dataset.getx();
    y = dataset.gety();
    dim = dataset.getDimension();
    subDataDim=dim//k;
    #mix the dataset
    randomx=x;
    randomy=y;
    randomList = random.sample(range(0, dim), dim);
    for i in range(0,dim):
        randomx[i]=x[randomList[i]];
        randomy[i]=y[randomList[i]];
    x=randomx;
    y=randomy;
    for i in range(0,k):
        if i==k-1:
            xtest = x[i * subDataDim:];
            ytest = y[i * subDataDim:];
            xtrain = x[0:i * subDataDim];
            ytrain = y[0:i * subDataDim];
        else:
            xtest = x[i * subDataDim:i * subDataDim + subDataDim];
            ytest = y[i * subDataDim:i * subDataDim + subDataDim];
            if i==0:
                xtrain=x[i * subDataDim + subDataDim:];
                ytrain=y[i * subDataDim + subDataDim:];
            else:
                xtrain = x[:i * subDataDim];
                xtrain=np.concatenate((xtrain,x[i * subDataDim + subDataDim:]))
                ytrain = y[:i * subDataDim];
                ytrain=np.append(ytrain,y[i * subDataDim + subDataDim:])

        trainingSet = d.DataSet(xtrain, ytrain, len(ytrain), dataset.getFeaturesNumber());
        testSet = d.DataSet(xtest, ytest, len(ytest), dataset.getFeaturesNumber());
        test(trainingSet,testSet);
def main():
    fold = 10
    data_set = dataFile('wine/wine.data', 0).Open()
    cv_data = CV.CrossValidation(data_set, fold)
    acc = []
    res = 0
    for i in range(5):
        for i in range(0, fold):
            training_data, test_data = cv_data.findTest(i)

            training_data = DataSet(training_data)
            test_data = DataSet(test_data)

            size_of_test_data = test_data.getNumberOfRow()

            X_train, X_test, y_train, y_test = training_data.getData(
            ), test_data.getData(), training_data.getLabels(
            ), test_data.getLabels()
            classifier = RandomForestClassifier(n_estimators=10,
                                                criterion='entropy')
            classifier.fit(X_train, y_train)
            y_predict = classifier.predict(X_test)
            accuracy = accuracy_score(y_test, y_predict,
                                      normalize=False) / size_of_test_data
            acc.append(accuracy)

        print(1 - cv_data.Accuracy(acc))
        res += 1 - cv_data.Accuracy(acc)
    print(res / 5)
Beispiel #3
0
def holdoutCrossValidation(dataset, scale=True):
    x=dataset.getx();
    y=dataset.gety();
    dim=dataset.getDimension();
    #70% train - 30% test
    traindim=dim//100*70;
    #create a random list of values to decide the examples that will be the train
    randomList=random.sample(range(0, dim), traindim);
    #sort reverse the list in order to delete the values form the originale dataset without errors
    randomList.sort(reverse=True);
    xtrain=np.empty(shape=(traindim,dataset.getFeaturesNumber()));
    ytrain=np.empty(traindim);

    #delete selected random values from x and put it into xtrain, same things for y
    for i in range(0, traindim):
        xtrain[i]=x[randomList[i]];
        x = np.delete(x, randomList[i],0);
        ytrain[i]=y[randomList[i]];
        y = np.delete(y, randomList[i]);

    xtest = x;
    ytest = y;

    trainingSet=d.DataSet(xtrain,ytrain,traindim,dataset.getFeaturesNumber());
    testSet=d.DataSet(xtest,ytest,dim-traindim,dataset.getFeaturesNumber());
    print("NOT MINMAXSCALED");
    test(trainingSet,testSet);
    if scale==True:
        print("MINMAXSCALED");
        trainingSet.minmaxScale();
        testSet.minmaxScale();
        test(trainingSet,testSet);
Beispiel #4
0
def test_log_reg(type='batch'):
    trn = ds.DataSet("./Data/usps_train.csv",
                     set_type='log_reg',
                     delim=',',
                     y_col=256)
    tst = ds.DataSet("./Data/usps_test.csv",
                     set_type='log_reg',
                     delim=',',
                     y_col=256)

    if type == 'batch':
        w = mla.get_data_mwlogb(trn.data_mx, trn.data_my)
    elif type == 'online':
        w = mla.get_data_mwlogo(trn.data_mx, trn.data_my)

    trn_g = mla.get_data_mglog(w, trn.data_mx)
    trn_sse = ds.get_sse(trn_g, trn.data_my)
    trn_ase = ds.get_ase(trn_g, trn.data_my)

    tst_g = mla.get_data_mglog(w, tst.data_mx)
    tst_sse = ds.get_sse(tst_g, tst.data_my)
    tst_ase = ds.get_ase(tst_g, tst.data_my)

    #print "W: ", w
    #print "Training G: ", trn_g
    #print "Testing G: ", tst_g

    print "Logistic Regression: "
    print "Training SSE: ", trn_sse
    print "Testing SSE: ", tst_sse
    print "Training ASE: ", trn_ase
    print "Testing ASE: ", tst_ase, "\n"
Beispiel #5
0
 def testDaily2Weekly(self):
     date = []
     open = []
     high = []
     low = []
     close = []
     for i in xrange(20021202, 20021205):
         date.append(i)
         open.append(10.0)
         high.append(10.2)
         low.append(9.8)
         close.append(9.9)
     self.assertEquals(open[0], 10.0)
     dailyDataSet = DataSet.DataSet()
     dailyDataSet.register('date', date)
     dailyDataSet.register('open', open)
     self.assertEquals(dailyDataSet.get('open')[0], 10.0)
     dailyDataSet.register('high', high)
     dailyDataSet.register('low', low)
     dailyDataSet.register('close', close)
     newds = DataSet.DataSet()
     dates_from_daily_to_weekly(dailyDataSet, newds)
     self.assertEquals('date' in newds.get_headers(), 1)
     translatedDates = newds.get('date')
     self.assertEquals(len(translatedDates), 1)
     self.assertEquals(translatedDates[0], 20021202)
     self.assertEquals(dailyDataSet.get('open')[0], 10.0)
     self.assertEquals(dailyDataSet.get('close')[0], 9.9)
Beispiel #6
0
def test_per(type='batch'):
    trn = ds.DataSet("./Data/usps_train.csv",
                     set_type='per',
                     delim=',',
                     y_col=256)
    tst = ds.DataSet("./Data/usps_test.csv",
                     set_type='per',
                     delim=',',
                     y_col=256)

    if type == 'batch':
        w = mla.get_data_mwpb(trn.data_mx, trn.data_my)
    elif type == 'online':
        w = mla.get_data_mwpo(trn.data_mx, trn.data_my)
    elif type == 'voted':
        w = mla.get_data_mwpv(trn.data_mx, trn.data_my)

    trn_g = mla.get_data_mgp(w, trn.data_mx)
    trn_sse = ds.get_sse(trn_g, trn.data_my)
    trn_ase = ds.get_ase(trn_g, trn.data_my)

    tst_g = mla.get_data_mgp(w, tst.data_mx)
    tst_sse = ds.get_sse(tst_g, tst.data_my)
    tst_ase = ds.get_ase(tst_g, tst.data_my)

    # print "WS: ", np.shape(w)
    # print "XS: ", np.shape(trn.data_mx)
    # print "YS: ", np.shape(trn.data_my)

    print type.upper(), " Perceptron: "
    print "Training Mistakes: ", trn_sse
    print "Testing Mistakes:: ", tst_sse
    print "Training Mistake Percent: ", trn_ase
    print "Testing Mistake Percent: ", tst_ase, "\n"
def read_img_sets(image_dir, image_size, validation_size=0):
    class DataSets:
        pass

    data_sets = DataSets()

    images, labels, ids, cls, cls_map = load_data(image_dir, image_size)

    if isinstance(validation_size, float):
        validation_size = int(validation_size * images.shape[0])

    test_images = images[:validation_size]
    test_labels = labels[:validation_size]
    test_ids = ids[:validation_size]
    test_cls = cls[:validation_size]

    train_images = images[validation_size:]
    train_labels = labels[validation_size:]
    train_ids = ids[validation_size:]
    train_cls = cls[validation_size:]

    data_sets.train = DataSet.DataSet(train_images, train_labels, train_ids,
                                      train_cls)
    data_sets.test = DataSet.DataSet(test_images, test_labels, test_ids,
                                     test_cls)

    return data_sets, cls_map
Beispiel #8
0
 def make_dates_and_dataset(self, dates):
     date = []
     for x in dates:
         date.append(x)
     dailyDataSet = DataSet.DataSet()
     dailyDataSet.register('date', date)
     newds = DataSet.DataSet()
     dates_from_daily_to_weekly(dailyDataSet, newds)
     return dates, dailyDataSet, newds
Beispiel #9
0
def test_knn():
    trn = ds.DataSet("./Data/knn_train.csv",
                     set_type='knn',
                     delim=',',
                     y_col=0)
    tst = ds.DataSet("./Data/knn_test.csv", set_type='knn', delim=',', y_col=0)

    trn_ks = mla.get_best_k_cv(trn.data_mx, trn.data_my, 51)
    tst_ks = mla.get_best_k_cv(trn.data_mx, trn.data_my, 51)

    print trn_ks
Beispiel #10
0
def divideDataSet(file, attrnames, target, values):
    #the dataset is divided into train (90%) and test (10%) and the two dataset objects are created
    trainsize = (file_len(file) * 90) / 100  #90% train 10% test
    allExamples = list()
    for line in open(file).readlines():
        content = line.split(',')
        if file in datasetWithID:  #only these datasets have the attribute id
            content.pop(
                0
            )  #remove the first element, I guess I would do tests on the attribute id
        content = [c.rstrip() for c in content]
        allExamples.append(content)
    train = random.sample(
        allExamples,
        trainsize)  #take 90% of the examples in dataset for the train
    dsetC = list(allExamples)  #copy the list to work without modifying it
    trainC = list(train)  #copy the list to work without modifying it
    inCommon = [val for val in dsetC if val in trainC]  #find common elements
    for i in range(len(inCommon)):  #remove elements in common from both
        dsetC.remove(inCommon[i])
        if inCommon[i] in trainC:
            trainC.remove(inCommon[i])
    test = dsetC + trainC  #join to get exactly the remaining 10% of the examples for the test
    #creates dataset structure for the Train
    examples = []
    for i in range(len(train)):
        example = []
        for j in range(len(train[0])):
            example.append(train[i][j])
        examples.append(example)
    attributes = []
    for i in range(0, len(example)):
        attributes.append(i)
    inputs = removeTarget(attributes, target)
    DataTrain = DataSet.DataSet(examples, inputs, attributes, target,
                                attrnames, values)  #crates dataset for use
    #creates dataset structure for the Test
    examples = []
    for i in range(len(test)):
        example = []
        for j in range(len(test[0])):
            example.append(test[i][j])
        examples.append(example)
    attributes = []
    for i in range(0, len(example)):
        attributes.append(i)
    inputs = removeTarget(attributes, target)
    DataTest = DataSet.DataSet(examples, inputs, attributes, target, attrnames,
                               values)  #crates dataset for use

    return DataTest, DataTrain
Beispiel #11
0
    def __init__(self,
                 name,
                 start_bar=-1,
                 numbars=-1,
                 dataset=None,
                 filename=None):
        self.filename = filename
        self.subchart = {}  # key is the title of the subchart. Value is the
        # subchart object.
        self.subchart_order = []  # list of subchart titles in order of display
        self.subchart_coords = [{}]  # store y and height values
        self.start_bar = start_bar
        self.numbars = numbars
        self.name = name
        self.drawtrendline = 0
        # assume daily data
        if dataset == None:
            self.daily_dataset = DataSet.DataSet()
        else:
            self.daily_dataset = dataset
        self.weekly_dataset = DataSet.DataSet()
        self.monthly_dataset = DataSet.DataSet()

        self.vertical_border = 15
        self.horizontal_border = 5
        self.yscale_space = 40  # used in labeling the y axis
        self.xscale_space = 30  # used in labeling the x axis
        self.xscale = -1  # xscale is calculated once at the chart level
        self.prev_values = {}  # used by the draw method to only draw
        # when needed
        self.prev_values['x'] = -1
        self.prev_values['y'] = -1
        self.prev_values['width'] = -1
        self.prev_values['height'] = -1
        self.prev_values['numbars'] = -1
        self.prev_values['start_bar'] = -1

        self.scale = SCALE_DAILY  # default

        self.able_to_draw = 0  # disable drawing.
        # Used when chart is first created.
        self.x = -1
        self.y = -1
        self.width = -1
        self.height = -1
        self.calc_next_draw = 1
        # ensure that all datasets have a date series.
        TranslateDate.dates_from_daily_to_weekly(self.get_daily_dataset(),
                                                 self.get_weekly_dataset())
        TranslateDate.dates_from_daily_to_monthly(self.get_daily_dataset(),
                                                  self.get_monthly_dataset())
Beispiel #12
0
    def getImageNumber(self, targetLabel):
        if self.data_set == 'mnist':
            datasetTest = DataSet('mnist', 'test')
        if self.data_set == 'cifar10':
            datasetTest = DataSet('cifar10', 'test')
        test_x, test_y = datasetTest.get_dataset()

        myList = []
        for i in range(0, 10000):
            label = numpy.where(test_y[i] > 0)[0][0]
            strLabel = self.get_label(int(label))
            if str(targetLabel) == str(strLabel):
                myList.append(i)
        print(myList)
        raw_input()
Beispiel #13
0
def select_tuple(tuples, k):
    r"""Select the top-k confidence tuples into dataset_list

    Args:
        tuples (list): A list of all instances of Tuple Class, which contains all tuples in whole csv file.
        k (int): A argument for selecting top-k confidence tuples.

    Return:
        dataset_list (DataSet): A list of DataSet instances, which be used as training set.
    """
    for i in range(k):
        print(tuples[i].cid, tuples[i].value_dict)

    # true_str = input("Which tuples violate the CFDs you want to express? "
    #                    "(Please input the cid of tuples, e.g. 2,3,4,5) >>> ")
    # false_str = input("Which tuple don't violate the CFDs you want to express? "
    #                    "(Please input the cid of tuples, e.g. 2,3,4,5) >>> ")
    true_str = '0,1,7'
    false_str = '2,3,4,5,6'

    mark_label(true_str, false_str, tuples)

    dataset_list = list()
    for i in range(k):
        dataset = DataSet()
        dataset.cid = tuples[i].cid
        dataset.feature_vec = tuples[i].feature_vec
        dataset.label = tuples[i].label
        dataset_list.append(dataset)
    return dataset_list
Beispiel #14
0
 def add_source(self):
     filename = tkFileDialog.askopenfilename()
     if filename:
         dataset = DataSet()
         dataset.readFromFile(filename)
         self.datasets.append(dataset)
         self.sourcelist.insert(END, str(dataset))
Beispiel #15
0
 def __init__(self, DataFileName, IndependentVariablesList, DependentVariablesList):
     self.Data = DataSet(DataFileName)
     self.TrainingData = self.Data.TrainingData()
     self.TestingData = self.Data.TestingData()
     self.DependentVariablesList = DependentVariablesList
     self.IndependentVariablesList = IndependentVariablesList
     self.CalculateAICs()
Beispiel #16
0
class OLSUnitTests(unittest.TestCase):
    Data = DataSet('TestData.csv')
    OLSTest = OLSRegression('TestData.csv', 'y', ['x', 'x2'])

    def test_FitSize(self):
        NewOLS = OLSRegression('TestData.csv', 'x', ['x'])
        Actual = len(NewOLS.ModelFit())
        Expected = len(NewOLS.Data.TestingData()['x'])
        self.assertEqual(Actual, Expected)

    def test_DataSetWithParameters(self):
        TrainingDataLenght = len(self.Data.TrainingData())
        Expected = numpy.round(len(self.Data.AllData().index) * 0.8)
        self.assertEqual(TrainingDataLenght, Expected)

    def test_TestingSet(self):
        Actual = len(self.Data.TestingData())
        Expected = numpy.round(len(self.Data.AllData().index) * 0.2)
        self.assertEqual(Actual, Expected)

    def test_SetDependentVariableList(self):
        Actual = self.OLSTest.DependentVariablesList
        Expected = ['x', 'x2']
        self.assertEqual(Actual, Expected)

    def test_RegressionCoefficient(self):
        NewOLS = OLSRegression('TestData.csv', 'y', ['x'])
        Actual = numpy.round(NewOLS.Regression.coef_)
        self.assertEqual(Actual, 2)

    def test_AIC(self):
        NewOLS = OLSRegression('TestData.csv', 'x', ['x'])
        Actual = numpy.round(NewOLS.AIC())
        self.assertEqual(Actual, -94)
Beispiel #17
0
def shuffleDataTrain(dataset):
    #given the train test, the order of the examples is randomized and a subset is taken
    l = len(dataset.examples)
    sds = random.sample(
        dataset.examples, l
    )  #sample function takes n random examples from the dataset. Set n = number of examples to simulate a total randamization
    for i in range(
            0, l / 2
    ):  #della meta' degli esempi ne prendo un sottoinsime per cercare di creare alberi un po' diversi tra loro
        a = random.randint(0, 4)
        if a == 1:
            sds.pop(i)
    #create examples, attributes and inputs as before...
    examples = []
    for i in range(len(sds)):
        example = []
        for j in range(len(sds[0])):
            example.append(sds[i][j])
        examples.append(example)
    attributes = []
    for i in range(0, len(example)):
        attributes.append(i)
    inputs = removeTarget(attributes, dataset.target)
    return DataSet.DataSet(examples, inputs, attributes, dataset.target,
                           dataset.attrnames,
                           dataset.values)  #crates dataset for use
Beispiel #18
0
def __InterpretTxtFile(self, filePath, hasFeatureLine, seperator, deleteBadData):
    try:
        fptr = open(filePath, 'r')
    except Exception:
        Errors.ShowWarningMsgBox(self, Exception.message)

    lines = fptr.readlines()

    lineCount = len(lines)

    newLines = list()

    for i in range(lineCount):
        newLineArray = lines[i].split(" ")
        if not (seperator == ""):
            newLineArray = lines[i].split(seperator)
        if deleteBadData:
            if newLineArray.count(" ") is not 0 or newLineArray.count("") is not 0:
                continue
        newLine = ",".join(newLineArray)
        newLines.append(newLine)

    for tmpLine in newLines:

        if tmpLine.count("\n") > 0:
            tmp = tmpLine.replace("\n","")
            index = newLines.index(tmpLine)
            newLines[index] = tmp

    ourDataSet = DataSet.DataSet(newLines, hasFeatureLine)

    fptr.close()

    return ourDataSet
Beispiel #19
0
def openDataSet(filename, v):
    data = DS.DataSet("./Data/" + filename)
    #Number of input units, one for each element in input entry
    n = data.getNumInputElem()
    inputs = data.getInputs(test=v)
    targets = data.getTargets()
    return n, inputs, targets
Beispiel #20
0
    def _readOrLoadDataset(self, ds_type, reference_dataset=None):
        fname = "cache/%s_data.pickle" % ds_type
        try:
            if not params.USE_DATA_CACHE:
                raise ("Do not use cache")
            f = open(fname, 'rb')
            ds = pickle.load(f)
            if params.DEBUG:
                print "Using cached %s..." % fname
        except:
            if params.DEBUG:
                print "Reading and dumping %s..." % fname

            data_fname = "data/%s.csv" % ds_type
            ds = DataSet.DataSet(ds_type == 'train')
            ds.importData(data_fname)

            if reference_dataset is not None:
                ds.dropUselessFeatures(reference_dataset.getUselessFeatures())
                ds.addNanFeatures(reference_dataset.getNanColumns())

            if params.LOG_TRANSFORM:
                ds.logTransformQuantitativeFeatures()
            if params.STANDARDIZE_DATA:
                ds.standardizeQuantitativeFeatures(
                    means=(reference_dataset.getQuantitativeFeatureMeans()
                           if reference_dataset is not None else None),
                    variances=(
                        reference_dataset.getQuantitativeFeatureVariances()
                        if reference_dataset is not None else None))

            pickle.dump(ds, open(fname, 'w'))

        return ds
def kFoldCrossValidation(k, dataset):
    perceptronAccuracies = []
    votedPerceptronAccuracies = []
    x = dataset.x
    y = dataset.y
    size = dataset.size
    subDataSize = size / k
    # shuffle dataset
    randomx = copy.deepcopy(x)
    randomy = copy.deepcopy(y)
    randomList = random.sample(xrange(size), size)
    for i in xrange(0, size):
        randomx[i] = x[randomList[i]]
        randomy[i] = y[randomList[i]]
    x = randomx
    y = randomy
    for i in xrange(0, k):
        if i == k - 1:
            xtest = x[i * subDataSize:]
            ytest = y[i * subDataSize:]
            xtrain = x[0:i * subDataSize]
            ytrain = y[0:i * subDataSize]
        else:
            xtest = x[i * subDataSize:i * subDataSize + subDataSize]
            ytest = y[i * subDataSize:i * subDataSize + subDataSize]
            if i == 0:
                xtrain=x[i * subDataSize + subDataSize:]
                ytrain=y[i * subDataSize + subDataSize:]
            else:
                xtrain = x[:i * subDataSize]
                xtrain = np.concatenate((xtrain, x[i * subDataSize + subDataSize:]))
                ytrain = y[:i * subDataSize]
                ytrain = np.append(ytrain, y[i * subDataSize + subDataSize:])

        trainingSet = ds.DataSet(xtrain, ytrain, len(ytrain), dataset.numAttributes)
        testSet = ds.DataSet(xtest, ytest, len(ytest), dataset.numAttributes)
        perceptronAccuracy, votedPerceptronAccuracy = test(trainingSet, testSet)
        perceptronAccuracies.append(perceptronAccuracy)
        votedPerceptronAccuracies.append(votedPerceptronAccuracy)

    avgPerceptronAccuracy = round(sum(perceptronAccuracies) / k, 2)
    avgVotedPerceptronAccuracy = round(sum(votedPerceptronAccuracies) / k, 2)
    print("")
    print("Perceptron average accuracy: {}%.".format(avgPerceptronAccuracy))
    print("Voted perceptron average accuracy: {}%.".format(avgVotedPerceptronAccuracy))
    print("")
    print("")
Beispiel #22
0
def load_patterns():
    # load pattern data
    dataSet = ds.DataSet(
        '/home/adriano/Projects/ANNDispersionRelation/ann_training/2d/square/te/tests_new_db/16_interpolated_points/'
    )
    dataSet.read_csv_file('dr_te_pc_dataset.csv')
    #print(len(dataSet.all_patterns[192:,:]))
    return dataSet
def load_patterns():
    # load pattern data
    dataSet = ds.DataSet(
        '/home/adriano/Projects/ANNDispersionRelation/ann_training/3d/fcc/diamond2/no_material/16_interpolated_points/'
    )
    dataSet.read_csv_file('dr_diamond_pc_dataset.csv')
    #print(len(dataSet.all_patterns[192:,:]))
    return dataSet
Beispiel #24
0
    def trainHMM(self, filename):
        print "Reading training data from %s" % (filename)

        # Read in the training data from the file
        dataset = DataSet(filename)
        states, obs = dataset.read_file()

        # Instatiate and train the HMM
        self.hmm, ll = train_model(dataset, 1e-5)
Beispiel #25
0
def load_chart(file_name):
    d = load_file(file_name)
    if d != None:
        ds = DataSet()
        for x in d.keys():
            ds.register(x, d[x])
        name = os.path.basename(file_name)
        chart = create_standard_chart(name, ds)
        chart.set_file_name(file_name)
        chart.can_draw(1)
        current_chart.add(name, chart)
Beispiel #26
0
def setup_loaded_chart(d, file_name):
    ds = DataSet()
    for x in d.keys():
        ds.register(x, d[x])
    name = os.path.basename(file_name)
    chart = create_default_chart(name, ds)
    chart.set_file_name(file_name)
    current_chart.add(name, chart)
    current_chart.set_current(name)
    chart.can_draw(1)
    schedule_redraw()
Beispiel #27
0
def testing(fileDataset, number):
    length = len(fileDataset.examples) / 10
    k = number * length
    validation = []
    i = 0
    while i < length:
        validation.append(fileDataset.examples[i + k])
        i = i + 1
    return DataSet.DataSet(validation, fileDataset.inputs,
                           fileDataset.attributes, fileDataset.target,
                           fileDataset.attrnames, fileDataset.values)
    def estMaxSequence(self, filename):

        print("Reading testing data from %s" % (filename))

        # Read in the testing dta from the file
        self.dataset = DataSet(filename)
        self.dataset.readFile(200, "test")

        # Run Viterbi to estimate most likely sequence
        viterbi = Viterbi(self.hmm)
        self.maxSequence = viterbi.mostLikelySequence(self.dataset.testOutput)
Beispiel #29
0
    def train_network_QNN(self):
        # Train an mnist model.

        if self.data_set == 'mnist':
            datasetTrain = DataSet('mnist', 'training')
            datasetTest = DataSet('mnist', 'test')
        if self.data_set == 'cifar10':
            datasetTrain = DataSet('cifar10', 'training')
            datasetTest = DataSet('cifar10', 'test')
        if self.data_set == 'fashion':
            datasetTrain = DataSet('fashion', 'training')
            datasetTest = DataSet('fashion', 'test')

        train_x, train_y = datasetTrain.get_dataset()
        test_x, test_y = datasetTest.get_dataset()

        needToTrain, myModel = func.getModelFromQNN(self.cf, train_x, train_y,
                                                    test_x, test_y)

        # myModel=func.getModelFromDeepGame(cf, train_x,train_y,test_x,test_y,epochs,batch_size)

        self.model = myModel

        score = (self.model).evaluate(test_x, test_y, verbose=0)
        print(
            "Precision " + str(self.abits) + " " + str(self.wbits) +
            " Test loss:", score[0])
        print(
            "Precision " + str(self.abits) + " " + str(self.wbits) +
            " Test accuracy:", score[1])
Beispiel #30
0
    def estimate(self):
        if self.twords > 0 :
            da = DataSet()
            da.read_wordmap2(self.dir + self.wordmapfile,self.id2word)

        print("Sampling ",self.niters," iterations!\n")

        # 申请 TPTM
        self.tp = TPTM(10,self.K,100,10000,self.nw,self.ut) # 这里设置默认参数,后期封装到外层
        self.tp.preprocessing()
        self.alpha_c = self.tp.Get_alpha_c()
        self.liter = self.tp.iteration

        last_iter = self.liter
        for self.liter in range(last_iter+1,self.niters+last_iter) :
            print("Iteration ",self.liter," ...\n")

            if self.liter != 0 and (self.liter%2)!=0: # 奇数更新 lambda 值
                self.tp.update_lambda_s(self.liter)
                self.alpha_c = self.tp.Get_alpha_c()
            elif self.liter != 0 and (self.liter%2)==0: # 偶数更新 x_u_c_t 值
                self.tp.update_x_u_c_t(self.liter)
                self.alpha_c = self.tp.Get_alpha_c()
            elif self.liter != 0 and self.liter%50 == 0:
                self.tp.update_Mpre_c(self.liter)
                self.tp.update_Mpre_s(self.liter)
                self.alpha_c = self.tp.Get_alpha_c()

            # for all z_i
            for m in range(self.M) :
                for n in range(self.ptrndata.docs[m].length) :
                    # (z_i) = z[m][n]
                    # sample from p(z_i|z_-i,w)
                    topic = self.sampling(m,n)
                    self.z[m][n] = topic

            if self.savestep > 0 :
                if self.liter % self.savestep == 0 :
                    # saving the model
                    print("Saving the model at iteration ",self.liter," ...\n")
                    self.compute_theta()
                    self.compute_phi()
                    u = Utils()
                    self.save_model(u.generate_model_name(self.liter))

        print("Gibbs sampling completed!\n")
        print("Saving the final model!\n")
        self.compute_theta()
        self.compute_phi()
        self.liter -= 1
        u = Utils()
        self.save_model(u.generate_model_name(-1))