def train(self, dataFile):
     '''Trains the Naive Bayes Sentiment Classifier.'''
     reader = DataReader(dataFile)
     #There may be a better way to get the two labels, but we just grabbed one label with the next three lines ...
     for label, tokens, company, date, price, risklength in reader:
         self.label2 = label
         break
     # ...and then grabbed the other label by iterating through until we found a label different from the first one
     for label, tokens, company, date, price, risklength in reader:
         if self.label2 != label:
             self.label1 = label
     reader = DataReader(dataFile)
     #Iterate through all of the documents in the training set
     for label, tokens, company, date, price, risklength in reader:
         #Check if the document is positive or negative, so that we can modify the according dictionary
         if label == self.label1:
             #using i and tokens[i], we iterate through all of the words in the document
             for i in range(0, len(tokens)):
                 #for each word, add one to it's count in the dictionary,
                 # add one to "total*" tracking the number of words in positive documents,
                 # and add the word to "allwords" (this only changes anything if the word is not already in allwords) by setting its value equal to zero
                 self.positivedict[tokens[i]] += 1
                 self.positivedict['total*'] += 1
                 self.allwords[tokens[i]] = 0
         # Repeat for negative
         if label == self.label2:
             for i in range(0, len(tokens)):
                 self.negativedict[tokens[i]] += 1
                 self.negativedict['total*'] += 1
                 self.allwords[tokens[i]] = 0
def WalkThroughAllOptimizers(option):

    dataReader = DataReader(x_data_name, y_data_name)
    XData, YData = dataReader.ReadData()
    X = dataReader.NormalizeX()
    Y = dataReader.NormalizeY()

    n_input, n_output = dataReader.num_feature, 1
    n_hidden = option[2]
    eta, batch_size, max_epoch = option[1], 10, 10000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps, InitialMethod.Xavier, option[0])

    loss_history = CLossHistory()
    net = TwoLayerNet(NetType.Fitting)

    wbs = net.train(dataReader, params, loss_history)

    trace = loss_history.GetMinimalLossData()
    print(trace.toString())
    title = loss_history.ShowLossHistory(params)

    print("wait for 10 seconds...")

    ShowResult(net, X, Y, title, trace.wb1, trace.wb2)
Exemple #3
0
 def update_answer(self):
     dataset = db.DataReader()
     self.data2 = self.text2.get()
     if (self.data2 == "Positive" or self.data2 == "positive"):
         value = "Pos"
     if (self.data2 == "Negative" or self.data2 == "negative"):
         value = "Neg"
     dataset.Store_data(value, self.data)
Exemple #4
0
	def train(self, dataFile):
		'''Trains the Naive Bayes Sentiment Classifier.'''
		
		reader = DataReader(dataFile)
		
		# go through all the docs in our corpus
		for doc in reader:
			(label, data) = doc
			self.total_docs += 1
			
			# if we haven't seen this label at all yet
			if label not in self.label_dictionary:
				
				self.label_dictionary[label] = 1				# note that we have seen one document of that label
				
				self.master_unigram_dictionary[label] = {}	 			# add a new dictionary to the master dictionary under that label 
				self.master_bigram_dictionary[label] = {}
				
				self.unigram_dictionary[label] = 0				# note that we have seen 0 words of that label
				self.bigram_dictionary[label] = 0
			else:
				self.label_dictionary[label] += 1				# increment our count of the documents of this label 
				
			# we now have to count the actual words in this doc 
			unigram_dict = self.master_unigram_dictionary[label]
			bigram_dict = self.master_bigram_dictionary[label]		
			
			# train on the unigrams
			for unigram in data:

				self.unigram_vocab.add( unigram )
				
				# if we have not seen this unigram under this label
				if unigram not in unigram_dict:
					unigram_dict[unigram] = 1					# note that we have seen the unigram once under this label
				else:
					unigram_dict[unigram] += 1					# increment the count of this unigram under this label
					
				self.unigram_dictionary[label] += 1			# increment our count of the total non-unique unigrams under this label
			
			# train on the bigrams
			for i in range(len(data)-1):
				bigram = (data[i], data[i+1])

				self.bigram_vocab.add( bigram )
				
				# if we have not seen this bigram under this label
				if bigram not in bigram_dict:
					bigram_dict[bigram] = 1					# note that we have seen the bigram once under this label
				else:
					bigram_dict[bigram] += 1					# increment the count of this bigram under this label
					
				self.bigram_dictionary[label] += 1			# increment our count of the total non-unique bigrams under this label
				
		self.save(dataFile + ".best.pickled")
Exemple #5
0
def Main():
    dataReader = DataReader()
    allUserData = dataReader.loadData(
        "DSL-StrongPasswordData")  #loads all users data

    classifier = Classifier()
    scalar = 1.0
    scalarCap = 1.6

    dimDeviation = 1
    dimCap = 21

    while (dimDeviation < dimCap):
        print "testing dims: " + str(dimDeviation)
        for k in range(0, 50):
            correct_person_accuracy = []
            wrong_person_accuracy = []
            owner_index = k  # index for the user that is to be tested
            first_time = True  # temp variable for checking if first time creating test_data_wrong
            #print "testing for person "+str(k)+" created!"
            for i in range(0, 50):
                userDataRaw = allUserData[i]  #data from 1 user
                userData = dataReader.formatData(
                    userDataRaw
                )  #formats data (strips user and session ids etc), returns Matrix.
                if i == owner_index:
                    np.random.shuffle(
                        userData
                    )  # Shuffle to get data from different sessions
                    person1 = DataCluster(
                        userData[0:300],
                        scalar)  # creates the person to be tested
                    test_data_right = userData[300:]
                    # print test_data_right
                else:
                    if first_time:
                        test_data_wrong = userData
                        first_time = False
                    else:
                        test_data_wrong = np.concatenate(
                            (test_data_wrong, userData), axis=0)
            correct_person_accuracy.append(
                classifier.compare_all(person1, test_data_right, True,
                                       dimDeviation))
            wrong_person_accuracy.append(
                classifier.compare_all(person1, test_data_wrong, False,
                                       dimDeviation))

        print "False recognition rate: " + str(
            1 - np.mean(correct_person_accuracy))
        print "False acceptance rate: " + str(1 -
                                              np.mean(wrong_person_accuracy))
        #       scalar += 0.1
        dimDeviation += 1
Exemple #6
0
 def __init__(self, file_tree, search_box, setup):
     # initialize the global values
     self.file_tree = file_tree
     self.search_box = search_box
     self.setup = setup
     self.directory = ""
     self.selected_file_path = ""
     self.file_list = None
     self.search_list = None
     self.index = None
     self.DataReader = DataReader.DataReader()
Exemple #7
0
    def __init__(self, sess, img_h, img_w, img_c, op):
        #---input setting---#
        self.sess = sess
        self.op = op
        self.output_height, self.output_width = img_h, img_w
        self.c_dim = img_c
        self.orig_embed_size = 4800
       
        #---training data---#
        if op == "train":
            self.batch_size = 64
            print "loading training data......"
            #
            path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data'
            train_path = os.path.join(path,'train')
            self.data = DataReader.DataReader(batch_size=self.batch_size)
            self.data_objs = self.data.acgen_data()

            
            #
           
            
           
        #---testing data---#
        if op == "test":
            self.batch_size = 1
            self.test_sent = tf.placeholder(tf.float32, shape=
            [1, self.orig_embed_size])
        #---model network setting---#
        self.gf_dim = 64
        self.df_dim = 64
        self.z_dim = 100
        self.embed_size = 128
        self.keep_prob = tf.placeholder(tf.float32)
        self.cat_dim = 1
        self.con_dim = 1
        self.rand_dim = self.z_dim

        #---batch_norm of discriminator---#
        self.d_bn0 = batch_norm(name="d_bn0")
        self.d_bn1 = batch_norm(name="d_bn1")
        self.d_bn2 = batch_norm(name="d_bn2")
        self.d_bn3 = batch_norm(name="d_bn3")
        self.d_bn4 = batch_norm(name="d_bn4")
        #---batch_norm of generator---#
        self.g_bn0 = batch_norm(name="g_bn0")
        self.g_bn1 = batch_norm(name="g_bn1")
        self.g_bn2 = batch_norm(name="g_bn2")
        self.g_bn3 = batch_norm(name="g_bn3")
        #---build model---#
        print "building model......"
        self.build_model()
Exemple #8
0
def eval(sText):
    totalaccuracy_numer = 0
    totalaccuracy_denom = 0
    for test in range(0, 10):
        thisaccuracy_numer = 0
        thisaccuracy_denom = 0
        split(sText, "output")
        for doc in range(0, 5):
            print "i is: ", doc
            totaldic = defaultdict(lambda: 0)
            totalcorrectdic = defaultdict(lambda: 0)
            bc = BayesClassifier()
            bc.train("output.train{0}".format(doc % 5))
            bc.train("output.train{0}".format((doc + 1) % 5))
            bc.train("output.train{0}".format((doc + 2) % 5))
            bc.train("output.train{0}".format((doc + 3) % 5))
            reader = DataReader("output.train{0}".format((doc + 4) % 5))
            correct = 0
            total = 0
            hold = 0
            for label, tokens, company, date, price, risklength in reader:
                print label
                tokenstring = " "
                tokenstring = tokenstring.join(tokens)
                print date
                if risklength == 1:
                    print "invalid document; ignore"
                elif bc.classify(tokenstring, risklength, date) == "HOLD":
                    #elif bc.classify(tokenstring, risklength) == "HOLD":
                    hold += 1
                else:
                    totaldic[label] += 1
                    total += 1

                    if bc.classify(tokenstring, risklength, date) == label:
                        #if bc.classify(tokenstring, risklength) == label:
                        correct += 1
                        totalcorrectdic[label] += 1

            print "Holds: ", hold
            print "Accuracy:", correct / float(total)
            thisaccuracy_numer += correct / float(total)
            thisaccuracy_denom += 1
            for key in totaldic:
                print totalcorrectdic[key], totaldic[key]
                print key, " precision: ", totalcorrectdic[key] / float(
                    totaldic[key])

        print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom
        totalaccuracy_numer += thisaccuracy_numer
        totalaccuracy_denom += thisaccuracy_denom
    print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
Exemple #9
0
    def store_data(self):
        self.popup.destroy()
        dataset = db.DataReader()
        sta = Statistics.Stats(dataset.unique_pos, dataset.unique_neg)
        sa = Senti.SentimentalAnalysis()

        if ((sa.NaiveBayes(self.data, sta) == "Positive")
                and (sa.BayesianBayesResult(self.data, sta) == "Positive")):
            value = "Pos"
        if ((sa.NaiveBayes(self.data, sta) == "Negative")
                and (sa.BayesianBayesResult(self.data, sta) == "Negative")):
            value = "Neg"
        dataset.Store_data(value, self.data)
Exemple #10
0
def prepareData(inputFilePath):
    total_row_list = list()
    reader = DataReader()
    df = reader.mergeData(inputFilePath)
    print(df.columns.values)
    cols = df.columns.values
    print(df.head())
    for eachColName in cols:
        feature_data = df[eachColName]
        feature_data = feature_data[feature_data.notnull()]
        for colData in feature_data:
            each_row = generate_feature_list(colData, eachColName)
            total_row_list.append(each_row)
    features_df = pd.DataFrame(total_row_list, columns=output_field)
    return (cols, features_df)
Exemple #11
0
def main():
    #let`s take a look how profit (units*prices) depends on datetime for one product
    UPC = input("enter UPC for your product: ")
    reader = dataReader.DataReader()
    DisplayProfitAmongPeriod(reader, UPC, '2009-01-14', '2011-12-28',
                             '2009-2011')
    # let`s take a look how profit depends on dates within one year for three years (2011, 2010, 2009)
    DisplayProfitAmongPeriod(reader, UPC, '2011-01-05', '2011-12-28', '2011')
    DisplayProfitAmongPeriod(reader, UPC, '2010-01-06', '2010-12-29', '2010')
    DisplayProfitAmongPeriod(reader, UPC, '2009-01-14', '2009-12-30', '2009')
    # let`s take a look how units depends on prices for one week (2011-12-07)
    DisplayUnitsVsPrices(reader, UPC, '2011-12-07')
    # as i can see there is no obvious dependencies within these features.
    # That`s why I decided to calculate best price this way.
    price = GetBestPriceForNextTwoWeeks(reader, UPC)
    print(price)
Exemple #12
0
    def train(self, batch_size=64, num_batches=1000000):

        self.sess.run(tf.global_variables_initializer())
        #
        path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data'
        train_path = os.path.join(path, 'train')
        data = DataReader.DataReader(batch_size=batch_size)
        data.get_data(train_path)
        #
        start_time = time.time()
        for t in range(0, num_batches):
            d_iters = 5
            data_batch = data.minibatch()
            #if t % 500 == 0 or t < 25:
            #     d_iters = 100

            for _ in range(0, d_iters):
                bx = data_batch[_]
                bz = batch_z = np.random.uniform(
                    -1, 1, [batch_size, self.z_dim]).astype(np.float32)
                self.sess.run(self.d_adam, feed_dict={self.x: bx, self.z: bz})

            bz = self.z_sampler(batch_size, self.z_dim)
            self.sess.run(self.g_adam, feed_dict={self.z: bz, self.x: bx})

            if t % 100 == 0:
                bx = data_batch[0]
                bz = batch_z = np.random.uniform(
                    -1, 1, [batch_size, self.z_dim]).astype(np.float32)

                d_loss = self.sess.run(self.d_loss,
                                       feed_dict={
                                           self.x: bx,
                                           self.z: bz
                                       })
                g_loss = self.sess.run(self.g_loss, feed_dict={self.z: bz})
                print('Iter [%8d] Time [%5.4f] d_loss [%.4f] g_loss [%.4f]' %
                      (t, time.time() - start_time, d_loss, g_loss))

            if t % 100 == 0:
                bz = self.z_sampler(batch_size, self.z_dim)
                bx = self.sess.run(self.x_, feed_dict={self.z: bz})
                bx = xs.data2img(bx)
                #fig = plt.figure(self.data + '.' + self.model)
                #grid_show(fig, bx, xs.shape)
                bx = self.grid_transform(bx, xs.shape)
                imsave('logs/{}/{}.png'.format(self.data, t / 100), bx)
Exemple #13
0
def train(ne, batch, eta):
    dataReader = DataReader(x_data_name, y_data_name)
    XData, YData = dataReader.ReadData()
    X = dataReader.NormalizeX(passthrough=True)
    Y = dataReader.NormalizeY()

    n_input, n_hidden, n_output = 1, ne, 1
    eta, batch_size, max_epoch = eta, batch, 10000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps, LossFunctionName.MSE,
                         InitialMethod.Xavier)

    loss_history = CLossHistory(params)
    net = TwoLayerFittingNet()
    wb1, wb2 = net.train(dataReader, params, loss_history)
    return loss_history
Exemple #14
0
 def Btn_Submit(self):
     StopWords = set(stopwords.words('english'))
     dataset = db.DataReader()
     sta = Statistics.Stats(dataset.unique_pos, dataset.unique_neg)
     self.data = self.text.get()
     words = self.data.split()
     for word in words:
         if word in StopWords or word in string.punctuation:
             continue
         else:
             temp = word
             print(temp)
     sa = Senti.SentimentalAnalysis()
     self.textbox.insert(tk.END, self.data)
     self.textbox.insert(tk.END, "\nNaiveBayes Result :")
     self.textbox.insert(tk.END, sa.NaiveBayes(temp, sta))
     self.textbox.insert(tk.END, "\nBayesianBayes Result :")
     self.textbox.insert(tk.END, sa.BayesianBayesResult(temp, sta))
Exemple #15
0
def train(init_method):
    dataReader = DataReader(x_data_name, y_data_name)
    XData, YData = dataReader.ReadData()
    X = dataReader.NormalizeX(passthrough=True)
    Y = dataReader.NormalizeY()

    n_input, n_hidden, n_output = 1, 4, 1
    eta, batch_size, max_epoch = 0.5, 10, 30000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps, init_method, OptimizerName.SGD)

    loss_history = CLossHistory()
    net = TwoLayerNet(NetType.Fitting)
    net.train(dataReader, params, loss_history)

    trace = loss_history.GetMinimalLossData()
    print(trace.toString())
    title = loss_history.ShowLossHistory(params)
    ShowResult(net, X, YData, title, trace.wb1, trace.wb2)
Exemple #16
0
    def run(self):
        """
        run in new thread
        """
        import Graph
        import DataReader

        dataReader = DataReader.DataReader(countriesData, worldData,
                                           amountOfTopCountries)
        yearsDict = init_years(dataReader)
        graph = Graph.Graph(dataReader, yearsDict, imageOutputFolder,
                            videoOutputFolder)

        global FigureList
        FigureList = graph.getFigureList()

        global NumberOfFramesWouldBeRendered
        NumberOfFramesWouldBeRendered = graph.getNumberofFramesWouldBeRendered(
        )

        graph.render()
def testClassifier(outputLabel):
    bc = BayesClassifier()
    bc.train(outputLabel + ".train")
    reader = DataReader(outputLabel + ".test")
    correctLabel = {}
    numberGuess = {}
    correct = 0.0
    total = 0.0
    for label, tokens in reader:
        if not label in correctLabel:
            correctLabel[label] = 0.0
        guess = bc.classify(" ".join(tokens))
        if not guess in numberGuess:
            numberGuess[guess] = 0.0
        if guess == label:
            correctLabel[guess] += 1
            correct += 1
        numberGuess[guess] += 1
        total += 1
    for label in correctLabel:
        print "Correct " + label, "-", correctLabel[label] / numberGuess[label]
    print "Total accuracy -", correct / total
def WalkThroughAllOptimizers(option):

    dataReader = DataReader(x_data_name, y_data_name)
    XData,YData = dataReader.ReadData()
    X = dataReader.NormalizeX()
    Y = dataReader.ToOneHot()
    
    n_input, n_output = dataReader.num_feature,  dataReader.num_category
    n_hidden = 8
    eta, batch_size, max_epoch = option[1], 10, 10000
    eps = 0.06

    params = HyperParameters41(n_input, n_output, n_hidden,
                         eta, max_epoch, batch_size, eps, 
                         LossFunctionName.CrossEntropy3, 
                         InitialMethod.Xavier,
                         option[0])

    loss_history = CLossHistory()
    net = TwoLayerClassificationNet()

    #ShowData(XData, YData)

    net.train(dataReader, params, loss_history)

    trace = loss_history.GetMinimalLossData()
    print(trace.toString())
    title = loss_history.ShowLossHistory(params)

    print("wait for 10 seconds...")

    wbs_min = WeightsBias30(params)
    wbs_min.W1 = trace.dict_weights["W1"]
    wbs_min.W2 = trace.dict_weights["W2"]
    wbs_min.B1 = trace.dict_weights["B1"]
    wbs_min.B2 = trace.dict_weights["B2"]
    ShowAreaResult(X, wbs_min, net, title)
    ShowData(X, YData)
Exemple #19
0
def WalkThroughAllOptimizers(option):

    dataReader = DataReader(x_data_name, y_data_name)
    XData,YData = dataReader.ReadData()
    X = dataReader.NormalizeX()
    Y = dataReader.NormalizeY()
    
    n_input, n_output = dataReader.num_feature, 1
    n_hidden = 4
    eta, batch_size, max_epoch = option[1], 10, 10000
    eps = 0.001

    params = CParameters(n_input, n_output, n_hidden,
                         eta, max_epoch, batch_size, eps, 
                         LossFunctionName.MSE, 
                         InitialMethod.Xavier,
                         option[0])

    loss_history = CLossHistory()
    net = TwoLayerFittingNet()

    #ShowData(XData, YData)

    wbs = net.train(dataReader, params, loss_history)

    trace = loss_history.GetMinimalLossData()
    print(trace.toString())
    title = loss_history.ShowLossHistory(params)

    print("wait for 10 seconds...")

    wbs_min = WeightsBias(params)
    wbs_min.W1 = trace.dict_weights["W1"]
    wbs_min.W2 = trace.dict_weights["W2"]
    wbs_min.B1 = trace.dict_weights["B1"]
    wbs_min.B2 = trace.dict_weights["B2"]
    ShowResult(X, Y, net, wbs_min, title)
def Train():
    dataReader = DataReader(x_data_name, y_data_name)
    dataReader.ReadData()
    dataReader.NormalizeX()
    dataReader.NormalizeY()

    n_input, n_hidden, n_output = 1, 3, 1
    eta, batch_size, max_epoch = 0.5, 10, 50000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps)

    # SGD, MiniBatch, FullBatch
    loss_history = CLossHistory()
    net = TwoLayerFittingNet()
    wb1, wb2 = net.train(dataReader, params, loss_history)

    trace = loss_history.GetMinimalLossData()
    print(trace.toString())
    title = loss_history.ShowLossHistory(params)

    ShowResult(net, dataReader.X, dataReader.Y, title, trace.wb1, trace.wb2)
    trace.wb1.Save("wb1")
    trace.wb2.Save("wb2")
Exemple #21
0
import subprocess
# a rough and dirty check whether the raw data files are available,
# call the request script otherwise
dustfiles = subprocess.check_output(("ls dust_concentrations2018-04-1*grib"), shell=True).split()
tempfiles = subprocess.check_output(("ls temp_v_gh2018-04-1*grib"), shell=True).split()
if len(dustfiles) < 4 or len(tempfiles) < 4:
    subprocess.call(("python", "request.py"))

if len (sys.argv) == 3:
    dates = eval(sys.argv[1])
    hours = eval(sys.argv[2])
else: dates, hours = (range(1,22), (0,6,12,18))
    
# instantiate a data reader obeject to read in and preprocess
# files for date range April 11-17, at 00:00 and 12:00 hours
dr = DataReader((dates, hours), area=(80,15, -80,100))

#instantiate a Euromap instance
em = Euromap(width=5000)

# create a series of maps showing the dust transport rate
# along north-south axis
#for idx,time in enumerate(dr.timeslots):
#    em.densitymap(savefilename="North_South_Dust_Transport_{}_{}.png".format(*time),
#                  formatsample=dr.dataformatsample[1:],
#                  title = "North-South Dust Transport, 2018-04-{}, {}:00".format(*time),
#                  array3d=dr.aggregatedrate.data[idx],
#                  timeslot=time,
#                  scale=np.arange(-100,110,10) ** 3 * 0.00000005,
#                  name=dr.aggregatedrate.name)
        while iteration * self.hparams.batch_size < self.hparams.training_size:
            train_cost, train_accuracy = self.sess.run(
                [self.train_loss, self.accuracy])

        print("iterations: [%2d] time: %4.4f, loss: %.8f, accuracy: %.8f" %
              (iteration, time.time() - start_time, np.mean(train_cost),
               train_accuracy))

        coord.request_stop()
        coord.join(threads)


if __name__ == '__main__':
    dataset_name = "cnn"
    dataset_dir = "../data_2"
    dr = DataReader()

    hparams = tf.flags
    hparams.DEFINE_integer("training_size", 381000,
                           "total number of training samples")  #381000
    hparams.DEFINE_integer("number_of_epochs", 200, "Epoch to train [25]")
    hparams.DEFINE_integer("vocab_size", 10000,
                           "The size of vocabulary [10000]")
    hparams.DEFINE_integer("batch_size", 32, "The size of batch images [32]")
    hparams.DEFINE_integer("depth", 1, "Depth [1]")
    hparams.DEFINE_integer("max_nsteps", 1000, "Max number of steps [1000]")
    hparams.DEFINE_integer("number_of_hidden_units", 512,
                           "The size of hidden layers")
    hparams.DEFINE_float("learning_rate", 5e-5, "Learning rate [0.00005]")
    hparams.DEFINE_float("momentum", 0.9, "Momentum of RMSProp [0.9]")
    hparams.DEFINE_float("keep_prob", 0.7, "keep_prob [0.5]")
Exemple #23
0
logs = os.path.join(directory, 'logs')
trainloss = os.path.join(logs, 'train_loss.txt')

if os.path.isdir(logs) == False:
    os.makedirs(logs)

# choose network, can be either DRN18 or DRN26
network = 'DRN26'
# set parameters
batch_size = 8
num_epochs = 100
use_weights = 1
num_classes = 5
image_dims = [500, 500, 3]

data = DataReader(directory, batch_size, num_epochs, use_weights=1)
train_data = data.train_batch(train_file)
num_train_images = data.num_images

test_data = data.test_batch(test_file)
num_val_images = data.num_images

# determine number of iterations based on number of images
training_iterations = int(np.floor(num_train_images / batch_size))
validation_iterations = int(np.floor(num_val_images / batch_size))

handle = tf.placeholder(tf.string, shape=[])
# create iterator allowing us to switch between datasets
iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types,
                                               train_data.output_shapes)
next_element = iterator.get_next()
Exemple #24
0
 def _get_data_loader(self, data_conf):
     loader = DataReader(data_conf, self.logger, self.n_fold)
     return loader
Exemple #25
0
from DataReader import *
from AGDSStructure import *
from AGDSKNearest import *
import numpy as np


def classify(data_holder, model, X):
    predicted_label = model.find_similarity(np.array(X))
    win_class = data_holder.get_real_label(predicted_label)
    print(win_class)


if __name__ == '__main__':
    data_reader = DataReader("IrisData.xls")
    agds_structure = AGDSStructure(data_reader.data_frame, data_reader.label)
    k_nearest = AGDSKNearest(agds_structure, 3)

    classify(data_reader, k_nearest, [4.5, 3.0, 1.1, 0.1])
    classify(data_reader, k_nearest, [7.0, 3.2, 4.7, 1.4])
    classify(data_reader, k_nearest, [5.0, 2.0, 4.0, 1.0])
    classify(data_reader, k_nearest, [5.7, 2.5, 4.8, 1.6])
Exemple #26
0
def train(args):

    learning_rate = args.learning_rate
    batch_size = args.batch_size
    training_epochs = args.training_epochs
    display_step = args.display_step
    checkpoint_step = args.checkpoint_step  # save training results every check point step
    z_dim = args.z_dim  # number of latent variables.
    path = args.indir  #input_file

    if args.train:
        dirname = 'save_train'
    else:
        dirname = 'save'
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    vae = ConvVAE(learning_rate=learning_rate,
                  batch_size=batch_size,
                  z_dim=z_dim,
                  train=args.train)

    #mnist = read_data_sets()
    #n_samples = mnist.num_examples

    celabdata = DataReader.DataReader(batch_size=batch_size)
    #path = '/home/master/05/john81923/data/VLDS2018/hw4_dataset/hw4_data'
    train_path = os.path.join(path, 'train')
    test_path = os.path.join(path, 'test')
    print train_path
    print test_path
    celabdata.get_data(train_path)
    test_data = celabdata.testdata(test_path)
    n_samples = celabdata.datanumb

    # load previously trained model if appilcable
    ckpt = tf.train.get_checkpoint_state(dirname)
    if ckpt:
        vae.load_model(dirname)

# Training cycle
    step = 0
    steps = []
    KLD_fig = []
    MSE_fig = []

    print 'ploting fig1_2... please wait.....'
    for epoch in range(training_epochs):
        avg_cost = 0.
        #mnist.shuffle_data()
        train_batch = celabdata.minibatch()
        total_batch = int(n_samples / batch_size)
        print total_batch
        # Loop over all batches

        for i in range(total_batch):
            #batch_xs = mnist.next_batch(batch_size)
            batch_xs = train_batch[i]

            # Fit training using batch data
            cost, mse, kl_loss, new_image, z_log_sigma_sq = vae.partial_fit(
                batch_xs)

            # Display logs per epoch step
            if i % display_step == 0:
                scipy.misc.imsave('hat.jpg', new_image[0].reshape((64, 64, 3)))
                steps.append(step)
                KLD_fig.append(kl_loss)
                MSE_fig.append(mse)
            step += 1
            # Compute average loss
            avg_cost += cost / n_samples * batch_size

        # save model
        if epoch >= 0 and epoch % checkpoint_step == 0:
            checkpoint_path = os.path.join('save', 'model.ckpt')
            vae.save_model(checkpoint_path, epoch)
            print "model saved to {}".format(checkpoint_path)

    save_path = 'repro/'
    fig = plt.figure()
    plt.title('KLD')
    plt.plot(steps, KLD_fig)
    plt.savefig(save_path + 'tmpfig1_2.jpg', format='png')

    fig = plt.figure()
    plt.title('MSE')
    plt.plot(steps, MSE_fig)
    plt.savefig(save_path + 'tmpfig1_2_.jpg', format='png')

    pillist = [save_path + 'tmpfig1_2.jpg', save_path + 'tmpfig1_2_.jpg']

    pilimages = []  # images in each folder
    for file in pillist:
        pilimages.append(Image.open(file))
        w, h = Image.open(file).size
    fig_1_2(pilimages, os.path.join(args.outdir, 'fig1_2.jpg'), w, h)

    # save model one last time, under zero label to denote finish.
    #vae.save_model(checkpoint_path, 0)

    return vae
Exemple #27
0
def test(args):

    learning_rate = args.learning_rate
    batch_size = 1  #args.batch_size
    training_epochs = args.training_epochs
    display_step = args.display_step
    checkpoint_step = args.checkpoint_step  # save training results every check point step
    z_dim = args.z_dim  # number of latent variables.

    dirname = 'save'
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    vae = ConvVAE(learning_rate=learning_rate,
                  batch_size=batch_size,
                  z_dim=z_dim)

    #mnist = read_data_sets()
    #n_samples = mnist.num_examples

    celabdata = DataReader.DataReader(batch_size=batch_size)
    path = args.indir  #input_file
    #train_path = os.path.join(path,'train')
    test_path = os.path.join(path, 'test')
    #print train_path
    print test_path
    #celabdata.get_data(train_path)
    test_data = celabdata.testdata(test_path)
    n_samples = celabdata.datanumb

    # load previously trained model if appilcable
    ckpt = tf.train.get_checkpoint_state(dirname)
    if ckpt:
        vae.load_model(dirname)

    UNIT_SIZE = 64
    target = Image.new('RGB', (UNIT_SIZE * 10, UNIT_SIZE * 2), 255)
    leftone = 0
    lefttwo = 0
    rightone = UNIT_SIZE
    righttwo = UNIT_SIZE

    avg_cost = 0.
    #mnist.shuffle_data()
    #train_batch = celabdata.minibatch()
    #total_batch = int(n_samples / batch_size)
    #print total_batch
    # Loop over all batches
    steps = []
    KLD_fig = []
    MSE_fig = []
    pillist = []

    for i in range(10):
        #batch_xs = mnist.next_batch(batch_size)
        #batch_xs = train_batch[i]

        # Fit training using batch data
        new_image, z = vae.testing_fit(test_data[i].reshape((1, 64, 64, 3)))

        scipy.misc.imsave('repro/1_3out{}.jpg'.format(i), new_image[0].reshape(
            (64, 64, 3)))
        scipy.misc.imsave('repro/1_3in{}.jpg'.format(i), test_data[i].reshape(
            (64, 64, 3)))

        pillist.append('repro/1_3out{}.jpg'.format(i))
        pillist.append('repro/1_3in{}.jpg'.format(i))

    pilimages = []  # images in each folder
    for file in pillist:
        pilimages.append(Image.open(file))
    pinjie(pilimages, os.path.join(args.outdir, 'fig1_3.jpg'))

    pillist_14 = []
    for i in range(32):
        #eps = tf.random_normal((10, 512), 0.0, 1.0, dtype=tf.float32)
        z = np.random.uniform(-1, 1, [1, 512]).astype(np.float32)
        new_image = vae.testing_1_4(z)
        scipy.misc.imsave('repro/1_4out{}.jpg'.format(i), new_image[0].reshape(
            (64, 64, 3)))
        pillist_14.append('repro/1_4out{}.jpg'.format(i))

    pilimages2 = []  # images in each folder
    for file in pillist_14:
        pilimages2.append(Image.open(file))
    fig_1_4(pilimages2, os.path.join(args.outdir, 'fig1_4.jpg'))

    imgdata = celabdata.testdata_tsne(path)
    with open('repro/tsne_z.txt', 'wb') as f:
        mse_sum = 0
        for i in range(len(imgdata)):
            _, mse, z = vae.mse_fit(imgdata[i].reshape((1, 64, 64, 3)))
            mse_sum += mse
            zin = ''
            for t in range(512):
                zin += '%.4f ' % (z[0][t])  #"%.2f" % x
            f.write(zin + '\n')
    print 'test set mse : ', mse_sum
    fdata = "repro/tsne_z.txt"
    ftarget = "repro/tsne_lb.txt"
    iris = chj_load_file(fdata, ftarget)

    X_tsne = TSNE(n_components=2, learning_rate=100).fit_transform(iris.data)
    #X_pca = PCA().fit_transform(iris.data)
    print("finishe!")
    plt.figure()
    #plt.subplot(121)
    plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=iris.target)
    plt.savefig(os.path.join(args.outdir, 'fig1_5.jpg'), format='png')

    return vae
Exemple #28
0
import tensorflow as tf
from CellSeg_CNN import *
import numpy as np
import DataReader

#Reading the images
data_reader = DataReader.DataReader()
input_reader = data_reader.input_reader

training_images = data_reader.training_images
if (input_reader.use_data_rotation):
    rotated_images = data_reader.pi_half_rotated_images
number_of_training_images = np.size(training_images, axis=0)
image_height = np.size(training_images, axis=1)
image_width = np.size(training_images, axis=2)

test_images = data_reader.test_images
number_of_test_images = np.size(test_images, axis=0)
#Reading the ground truth classes
[training_classes, training_defined_samples] = data_reader.training_classes
if (input_reader.use_data_rotation):
    [rotated_classes,
     rotated_defined_mask] = data_reader.pi_half_rotated_classes_and_masks
[test_classes, test_defined_samples] = data_reader.test_classes

#Reading parameters
learning_rate = input_reader.learning_rate
regularisation_param = tf.constant(input_reader.regularisation_parameter)
n_epochs = input_reader.number_of_epochs
tensorboard_file_location = input_reader.tensorboard_location
input_patch_width = input_reader.input_patch_width
Exemple #29
0
def ShowResult(net, X, Y, title, wb1, wb2):
    # draw train data
    plt.plot(X[0, :], Y[0, :], '.', c='b')
    # create and draw visualized validation data
    TX = np.linspace(0, 1, 100).reshape(1, 100)
    dict_cache = net.ForwardCalculationBatch(TX, wb1, wb2)
    TY = dict_cache["Output"]
    plt.plot(TX, TY, 'x', c='r')
    plt.title(title)
    plt.show()


#end def

if __name__ == '__main__':
    dataReader = DataReader(x_data_name, y_data_name)
    dataReader.ReadData()
    dataReader.NormalizeX()
    dataReader.NormalizeY()

    n_input, n_hidden, n_output = 1, 3, 1
    eta, batch_size, max_epoch = 0.5, 10, 50000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps)

    # SGD, MiniBatch, FullBatch
    loss_history = CLossHistory()
    net = TwoLayerFittingNet()
    wb1, wb2 = net.train(dataReader, params, loss_history)
Exemple #30
0
#TrendsScraper
#YahooFinanceScraper

import DataReader from pandas.io.data 
from datetime import datetime
goog = DataReader("GOOG",  "yahoo", datetime(2000,1,1), datetime(2012,1,1))
goog["Adj Close"]