def train(self, dataFile):   
      '''Trains the Naive Bayes Sentiment Classifier.'''
      dr = DataReader(dataFile)

      label, data = dr.next()
      while(label):
         try:
            if label not in self.word_counts:
               self.word_counts[label] = {}

            if label in self.docs:
               self.docs[label] += 1
            else:     
               self.docs[label] = 1
            self.total_docs += 1   

            for i in range(len(data)):
               if data[i] in self.word_counts[label]:
                  self.word_counts[label][data[i]] += 1
               else:
                  self.word_counts[label][data[i]] = 1

            label, data = dr.next()

         except StopIteration:
            # Calculate the total number of words / label
            for label, label_words in self.word_counts.items():
               self.word_sums[label] = 0
               for word, word_count in label_words.items():
                  self.word_sums[label] += word_count                     

            self.save(dataFile+".pickle")
            return
def constructPredictionWithOutput(classifier,classifierIndex,xTest, testBatchIndex):

    print "Predicting with classifier {}".format(classifierIndex)

    yPred = classifier.predict_proba(xTest)

    print "Writing to csv..."
    outputFileName="data\\ensembleTraining\\out"+str(classifierIndex)+".csv"
    dataReader.writePredToCsv(yPred,testBatchIndex,outputFileName=outputFileName)
Example #3
0
def constructTrainingData(trainDataSize):

    #training data
    trainData = dataReader.getTrainData(trainDataSize)
    trainData = trainData.append(dataReader.getSuffixDataFrame())

    # feature engineering
    trainData =  regularFeatExtr.convertTargetFeatureToNumeric(trainData)
    xTrain, yTrain = regularFeatExtr.getRegularFeatures(trainData, True)


    return xTrain,yTrain
   def train(self, dataFile):   
      '''Trains the Naive Bayes Sentiment Classifier.'''
      dr = DataReader(dataFile)

      label, data = dr.next()
      while(label):
         try:
            if label not in self.word_counts:
               self.word_counts[label] = {}

            if label in self.docs:
               self.docs[label] += 1
            else:     
               self.docs[label] = 1
            self.total_docs += 1   

            # Add counts for individual words
            for i in range(len(data)):
               if data[i] in self.word_counts[label]:
                  self.word_counts[label][data[i]] += 1
               else:
                  self.word_counts[label][data[i]] = 1

            # Add counts for bigrams to the same dictionary
            for i in range(len(data)/2): #implementing bigrams instead of unigrams
               j = 2*i
               bigram = data[j] + " " + data[j+1]
               if bigram in self.word_counts[label]:
                  self.word_counts[label][bigram] += 1
                  if data[j].isupper():
                     self.word_counts[label][bigram] += .5 # counts a word an extra half time if it is all caps
                  if data[j+1].isupper():
                     self.word_counts[label][bigram] += .5 # counts a word an extra half time if it is all caps
               else:
                  self.word_counts[label][bigram] = 1

            label, data = dr.next()


         except StopIteration:
            # Calculate the total number of words / label
            for label, label_words in self.word_counts.items():
               self.word_sums[label] = 0
               for word, word_count in label_words.items():
                  self.word_sums[label] += word_count                     

            self.save(dataFile+".pickle")
            return
def make_data_from_file(feature_type,input_folder,start,end):
    """
    Read data from original feature files directly
    feature_type should be a number:
    0 - chi1, 1 - chi2, 2 - hbonds, 3 - rmsd
    """
        
    data = []
#   data = numpy.array([])
    feature_len = 0
    all_data = dr.preprocess(start, end, input_folder)
    
    for f in all_data[feature_type]:
        f = np.asarray(f)
        data.append((f,f))
        if feature_len == 0:
            feature_len = len(f)
    
    all_data = [] # release the memory
    X = np.asarray(data)
    if feature_type == 2:
    # special treatment for hbonds [0,3]
        X_std = X/3.1
    else:
        X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    data = [(np.reshape(x, (len(x), 1)),  np.reshape(y, (len(y), 1))) for x, y in X_std]
#   pickle.dump(data,open("/output/"+features[feature_type]+"array","wb"))
    return data, feature_len
def getRegularFeatures(data, isTrainData):

    data = performRegularFeatureEngineering(data, isTrainData)

    # splitting data into X and Y
    if 'Category' in data.columns.values:
        yData =  data.Category
        data = data.drop(['Category'], 1)
    else:
        yData = []

    xData = data.values


    dataReader.serializeObject(data.columns.values,"data\\misc\\columns.csv")

    print "Features used {}".format(data.columns.values)
    return xData,yData
Example #7
0
def splitTestDataIntoChunks():

    testData = dataReader.getWholeTestData()

    miniDataFrames = np.array_split(testData, numberOfPartitions)

    for i in range(numberOfPartitions):
        outputFileName = 'data\\miniTestData\\miniDataFrame'+str(i)+'.csv'
        miniDataFrames[i].to_csv(outputFileName,index=False)
Example #8
0
def calculateLearningCurve():
    classifier = classifierSelector.constructGradientBoostingClassifier()
    trainData = dataReader.getTrainData()

    # feature engineering
    trainData =  featureExtractor.convertTargetFeatureToNumeric(trainData)
    xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True)

    trainSizes =  np.linspace(100000,500000,5,dtype=int)

    plot_learning_curve(classifier,xTrain,yTrain,trainSizes)
   def test(self, dataName, logFilename):
      ''' Tests against dataName and logs to logFilename. '''
      dr = DataReader(dataName)
      correct = 0
      total = 0
      found_counts = {}
      actual_counts = {}
      label, data = dr.next()
      log = open(logFilename, 'w')
      while( label,data ):
         try:
            if label in actual_counts:
               actual_counts[label] += 1
            else:
               actual_counts[label] = 1

            total += 1
            string = ""
            for i in data:
               string += i + " "
            bayes_label, bayes_prob = self.classify(string)

            # print "Result:" + bayes_label + " Correct Label: " + label
            # log.write("Result:" + bayes_label + " Correct Label: " + label+ "\n")
           
            if bayes_label == label:
               if bayes_label in found_counts:
                  found_counts[bayes_label] += 1
               else:
                  found_counts[bayes_label] = 1
               correct += 1
            label, data = dr.next()
         except StopIteration:
            for k, v in actual_counts.items():
               if k in found_counts:
                  log.write(k + " " + str(float(found_counts[k])/actual_counts[k]) + "\n")
               else:
                  log.write(k + " 0\n")

            log.close()
            return float(correct)/total        
Example #10
0
def createEnsembleBasedODifferentTrainingSets():


    # constructing the limits
    margins = np.linspace(0,878000,5,dtype=int)

    marginTuples=[]
    for i in range(len(margins)-1):
        marginTuples.append((margins[i],margins[i+1]))


    # training classifiers
    allClassifiers = Parallel(n_jobs=-1)(delayed(mainScript.trainClassifierOnTrainingData)(margins=marginTuple) for marginTuple in marginTuples)

    # Predicting on batch test data
    partitionNumber = utils.numberOfPartitions
    for batchIndex in range(partitionNumber):

        print "Predicting batch {}".format(batchIndex)
        miniTestData = dataReader.getSerializedMiniTestData(batchIndex)

        xTest,yTest = mainScript.constructTestData(miniTestData)

        for classifierIndex,currentClassifier in enumerate(allClassifiers):
            constructPredictionWithOutput(currentClassifier,classifierIndex,xTest,batchIndex)


    # post process
    print "Post processing everything..."
    outputFileNames = ["data\\ensembleTraining\\out"+str(index)+".csv" for index in range(len(allClassifiers))]

    for outputFileName in outputFileNames:
        dataReader.postProcessCsv(outputFileName=outputFileName)



    #Merging everything together
    print "Merging all solutions...."
    fileRegex = "data\\ensembleTraining\\*.csv"
    createEnsembleBasedOnExitingPredictions(fileRegex=fileRegex)
Example #11
0
def trainClassifierOnTrainingDataReturnAll(numberOfTrainingExamples = -1):

    trainData = dataReader.getTrainData(numberOfTrainingExamples)

    # feature engineering
    trainData =  featureExtractor.convertTargetFeatureToNumeric(trainData)
    xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True)


     # classifier training
    classifier = classifierSelector.trainClassifier(xTrain, yTrain)

    return classifier, xTrain, yTrain
Example #12
0
def calculateValidationCurve():
    classifier = classifierSelector.constructGradientBoostingClassifier()

    numberOfTrainData = 50000

    trainData = dataReader.getTrainData(numberOfTrainData)

    # feature engineering
    trainData =  featureExtractor.convertTargetFeatureToNumeric(trainData)
    xTrain, yTrain = featureExtractor.getRegularFeatures(trainData, True)

    paramRange = [0.1,0.13,0.16]

    plot_validation_curve(classifier,xTrain,yTrain,"learning_rate",paramRange)
Example #13
0
 def load_image(self,img_num,load_step):
     dir_start       = self.init_dirs[load_step]
     dir_num         = str(dir_start  + img_num)
     dir_num_dark    = str(self.dark_dirs[load_step])
     
     im_dir          = os.path.join(self.data_dir,dir_num,'ff')
     im_file         = os.listdir(im_dir)        
     assert len(im_file) == 1
     im_path = os.path.join(im_dir,im_file[0])
     
     dark_dir        = os.path.join(self.data_dir,dir_num_dark,'ff')
     dark_file       = os.listdir(dark_dir)
     assert len(dark_file) == 1
     dark_path       = os.path.join(dark_dir,dark_file[0])
 
     dark_image      = DataReader.ge2_reader_image(dark_path,0)
     if len(dark_image.shape) > 1:
         dark_image        = np.mean(dark_image, axis=0)        
     
     ring_image      = DataReader.ge2_reader_image(im_path,0) 
     
     img             = ring_image - dark_image
     return img        
Example #14
0
def trainClassifierOnTrainingData(trainData=None, numberOfTrainingExamples = -1, margins=None):

    if trainData is None:
        trainData = dataReader.getTrainData(numberOfTrainingExamples,margins)

    # feature engineering
    trainData =  regularFeatExtr.convertTargetFeatureToNumeric(trainData)
    xTrain, yTrain = regularFeatExtr.getRegularFeatures(trainData, True)


     # classifier training
    classifier = classifierSelector.trainClassifier(xTrain, yTrain)

    return classifier
Example #15
0
def predictForSubmission():
    startTime = time.time()
    allAlgorithmStartTime = startTime

    numberOfTrainingExamples = -1
    classifier = trainClassifierOnTrainingData(numberOfTrainingExamples)

    print "Beginning to load test data..."

    partitionNumber = utils.numberOfPartitions
    for index in range(partitionNumber):

        miniTestData = dataReader.getSerializedMiniTestData(index)

        xTest,yTest = constructTestData(miniTestData)

        print "Predicting..."
        yPred = classifier.predict_proba(xTest)

        dataReader.writePredToCsv(yPred,index)

    print "Post processing..."
    dataReader.postProcessCsv()
    print("Total run time:{}".format(time.time() - allAlgorithmStartTime))
Example #16
0
def plotFeatureImportance(classifier):

    featureNames = dataReader.deserializeObject("data\\misc\\columns.csv")

    feature_importance = classifier.feature_importances_
    # make importances relative to max importance
    feature_importance = 100.0 * (feature_importance / feature_importance.max())
    sorted_idx = np.argsort(feature_importance)
    pos = np.arange(sorted_idx.shape[0]) + .5
    plt.subplot(1, 2, 2)
    plt.barh(pos, feature_importance[sorted_idx], align='center')
    plt.yticks(pos,featureNames[sorted_idx])
    plt.xlabel('Relative Importance')
    plt.title('Variable Importance')
    plt.show()
Example #17
0
def getDifferentTrainAndTestData(trainDataSize, testDataSize):

    data = dataReader.getWholeTrainingData()

    if trainDataSize+testDataSize > data.shape[0]: # request more rows than the DF has
        print "Getting different train & test data with possible duplicates"
        trainData = data.sample(trainDataSize)
        testData = data.sample(testDataSize)
    else:
        print "Getting totally different train & test data"
        indexes = np.arange(data.shape[0]) #0->873k
        random.shuffle(indexes) # works in-place

        trainData = data.ix[indexes[0:trainDataSize]]
        testData = data.ix[indexes[trainDataSize+1:trainDataSize+1+testDataSize]]


    return trainData,testData
Example #18
0
def testParameterPerformance():
    startTime = time.time()
    allAlgorithmStartTime = startTime

    # define sizes
    trainDataSize = 10000
    testDataSize = 100000

    trainData,testData = utils.getDifferentTrainAndTestData(trainDataSize,testDataSize)

    #in order to assure that we have members form each class present
    testData = testData.append(dataReader.getSuffixDataFrame())

    classifier = trainClassifierOnTrainingData(trainData=trainData)

    xTest,yTest = constructTestData(testData)

    yPred = classifier.predict(xTest)

    validator.performValidation(yPred, yTest)


    print("Total run time:{} s".format((time.time() - allAlgorithmStartTime)))
        while iteration * self.hparams.batch_size < self.hparams.training_size:
            train_cost, train_accuracy = self.sess.run(
                [self.train_loss, self.accuracy])

        print("iterations: [%2d] time: %4.4f, loss: %.8f, accuracy: %.8f" %
              (iteration, time.time() - start_time, np.mean(train_cost),
               train_accuracy))

        coord.request_stop()
        coord.join(threads)


if __name__ == '__main__':
    dataset_name = "cnn"
    dataset_dir = "../data_2"
    dr = DataReader()

    hparams = tf.flags
    hparams.DEFINE_integer("training_size", 381000,
                           "total number of training samples")  #381000
    hparams.DEFINE_integer("number_of_epochs", 200, "Epoch to train [25]")
    hparams.DEFINE_integer("vocab_size", 10000,
                           "The size of vocabulary [10000]")
    hparams.DEFINE_integer("batch_size", 32, "The size of batch images [32]")
    hparams.DEFINE_integer("depth", 1, "Depth [1]")
    hparams.DEFINE_integer("max_nsteps", 1000, "Max number of steps [1000]")
    hparams.DEFINE_integer("number_of_hidden_units", 512,
                           "The size of hidden layers")
    hparams.DEFINE_float("learning_rate", 5e-5, "Learning rate [0.00005]")
    hparams.DEFINE_float("momentum", 0.9, "Momentum of RMSProp [0.9]")
    hparams.DEFINE_float("keep_prob", 0.7, "keep_prob [0.5]")
Example #20
0
    metrics = {}
    metrics["muc"] = (mr, mp, mf)
    metrics["b3"] = (br, bp, bf)
    metrics["ceaf"] = (cr, cp, cf)
    return metrics


def print_performance(m):
    mp, mr, mf = m["muc"]
    print "MUC: recall: %f precision: %f  f1: %f" % (mr, mp, mf)
    bp, br, bf = m["b3"]
    print "BCUBED: recall: %f precision: %f  f1: %f" % (br, bp, bf)
    cp, cr, cf = m["ceaf"]
    print "CEAF: recall: %f precision: %f  f1: %f" % (cr, cp, cf)


if __name__ == "__main__":

    #network_file = "./model/pretrain/network_model_pretrain.best"
    network_file = "./model/pretrain/network_model_pretrain.top.best"
    #network_file = "./model/model.pkl"
    print >> sys.stderr, "Read model from ./model/model.pkl"
    network_model = torch.load(network_file)

    #dev_docs = DataReader.DataGnerater("dev")
    dev_docs = DataReader.DataGnerater("test")

    best_thres = 0.4

    best_thres = evaluate(network_model, dev_docs, best_thres)
Example #21
0
 def _get_data_loader(self, data_conf):
     loader = DataReader(data_conf, self.logger, self.n_fold)
     return loader
Example #22
0
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import nltk
import DataReader
import Wordsmith
import json

entries = nltk.corpus.cmudict.entries()
dictionary = dict(entries)

bucket = Wordsmith.bucket

wordlist = dictionary.keys()
wordlist.extend(DataReader.collocationEntries())

def seed():
	count = 0
	for w in wordlist:
		bucket.add(w)

		count+=1

		if count % 20000 == 0:
			print str((count/20000)*10) + " percent done loading"

	f = open('./nltk_data/bucketstore', 'a')
	f.write(json.dumps(bucket.buckets))
Example #23
0
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    #dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    metric = performance(test_docs, worker, manager)
    print "Ave", metric["average"]

    #network_file = "./model/network_model_pretrain.top.best"
    #network_model = torch.load(network_file)

    #ana_network_file = "./model/network_model_pretrain.top.best"
    #ana_network_model = torch.load(ana_network_file)

    #reduced=""
    #if args.reduced == 1:
    #    reduced="_reduced"

    #metric = performance(test_docs,network_model,ana_network_model)
def main():
    solar_data = []
    solar_data += DataReader.get_daily_totals_for_file('Data/2005.csv')
    solar_data += DataReader.get_daily_totals_for_file('Data/2006.csv')
    solar_data += DataReader.get_daily_totals_for_file('Data/2007.csv')
    solar_data += DataReader.get_daily_totals_for_file('Data/2008.csv')
    solar_data += DataReader.get_daily_totals_for_file('Data/2009.csv')
    print("Imported Data")
    print("Running Simulation:")

    num_of_sims = 0
    with open('Output/rs.csv', 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow(['Smart Agents Wealth', 'Smart Agents Trades', 'Smart Agents Wealth (all)', 'Smart Agents Trades (all)','Controlled Agents Wealth', 'Controlled Agents Trades', 'Controlled Agents Wealth (all)', 'Controlled Agents Trades (all)', 'All Agents Wealth', 'All Agents Trades', 'All Agents Wealth (all)', 'All Agents Trades (all)', 'Smart Agents Price Correlation', 'Controlled Agents Price Correlation', 'All Agents Price Correlation'])
        for i in range(num_of_sims):
            print("Simulation:" + str(i + 1))
            market = Market(100, 10)
            for i in range(len(solar_data)):
                weather = solar_data[i]
                market.update(float(weather)/1000.0)
                price = market.price_history[-1]
                supply = market.asks[-1]
                demand = market.bids[-1]

            smart_agents = [agent for agent in market.agents if agent.use_brain == True]
            controlled_agents = market.agents[:10]
            all_agents = market.agents

            smart_wealth = sum([agent.wealth for agent in smart_agents if agent.wealth > 0])/len(smart_agents)
            smart_no_trades = sum([agent.no_trades for agent in smart_agents if agent.wealth > 0])/len(smart_agents)
            smart_wealth_a = sum([agent.wealth for agent in smart_agents])/len(smart_agents)
            smart_no_trades_a = sum([agent.no_trades for agent in smart_agents])/len(smart_agents)
                       
            controlled_wealth = sum([agent.wealth for agent in controlled_agents if agent.wealth > 0])/len(controlled_agents)
            controlled_no_trades = sum([agent.no_trades for agent in controlled_agents if agent.wealth > 0])/len(controlled_agents)
            controlled_wealth_a = sum([agent.wealth for agent in controlled_agents])/len(controlled_agents)
            controlled_no_trades_a = sum([agent.no_trades for agent in controlled_agents])/len(controlled_agents)

            all_wealth = sum([agent.wealth for agent in all_agents if agent.wealth > 0])/len(all_agents)
            all_no_trades = sum([agent.no_trades for agent in all_agents if agent.wealth > 0])/len(all_agents)
            all_wealth_a = sum([agent.wealth for agent in all_agents])/len(all_agents)
            all_no_trades_a = sum([agent.no_trades for agent in all_agents])/len(all_agents)

            smart_price_history = [agent.price_history for agent in smart_agents]
            smart_price_history = [sum(col) / float(len(col)) for col in zip(*smart_price_history)]
            smart_prediction_accuracy = str(numpy.corrcoef(smart_price_history,market.price_history)[0][1])

            controlled_price_history = [agent.price_history for agent in controlled_agents]
            controlled_price_history = [sum(col) / float(len(col)) for col in zip(*controlled_price_history)]
            controlled_prediction_accuracy = str(numpy.corrcoef(controlled_price_history,market.price_history)[0][1])

            all_price_history = [agent.price_history for agent in all_agents]
            all_price_history = [sum(col) / float(len(col)) for col in zip(*all_price_history)]
            all_prediction_accuracy = str(numpy.corrcoef(all_price_history,market.price_history)[0][1])

            writer.writerow([smart_wealth, smart_no_trades, smart_wealth_a, smart_no_trades_a, controlled_wealth, controlled_no_trades, controlled_wealth_a, controlled_no_trades_a,all_wealth, all_no_trades, all_wealth_a, all_no_trades_a,smart_prediction_accuracy,controlled_prediction_accuracy,all_prediction_accuracy])
        
    print("Creating Graphs:")
    market = Market(100, 10)
    for i in range(len(solar_data)):
        if int(i % (len(solar_data) / 100)) == 0:
            print(str(int(i / len(solar_data) * 100)) + "%")
        weather = solar_data[i]
        market.update(float(weather)/1000.0)
        price = market.price_history[-1]
        supply = market.asks[-1]
        demand = market.bids[-1]    
    
    solar_plot = pyplot.figure()
    a = solar_plot.add_subplot(111)
    a.plot(range(len(solar_data)), solar_data, label='Solar Radiance')
    a.legend()
    a.set_ylabel('kWh per m^2')
    a.set_xlabel('Day')
    solar_plot.savefig('Output/solar_radiance.png')

    aggregate_supply_demand_plot = pyplot.figure()
    b = aggregate_supply_demand_plot.add_subplot(111)
    b.plot(range(len(solar_data)), market.bids, label='Aggregate Demand')
    b.plot(range(len(solar_data)), market.asks, label='Aggregate Supply')
    b.legend()
    b.set_ylabel('Quantity')
    b.set_xlabel('Day')
    aggregate_supply_demand_plot.savefig('Output/supply_demand_history.png')

    price_history_plot = pyplot.figure()
    c = price_history_plot.add_subplot(111)
    c.plot(range(len(solar_data)), market.price_history, label='Average Trading Price')
    c.legend()
    c.set_ylabel('Price')
    c.set_xlabel('Day')
    c.set_ylim(12,22)
    price_history_plot.savefig('Output/price_history.png')

    #Plot average price expectation
    smart_agents = [agent for agent in market.agents if agent.use_brain == True]
    smart_price_history = [agent.price_history for agent in smart_agents]
    smart_price_history = [sum(col) / float(len(col)) for col in zip(*smart_price_history)]
    smart_price_prediction = pyplot.figure()
    d = smart_price_prediction.add_subplot(111)
    d.plot(range(len(solar_data)), market.price_history, label='Average Trading Price')
    d.plot(range(len(solar_data)), smart_price_history, label='Smart Agent Predicted Price')
    d.legend()
    d.set_ylabel('Price')
    d.set_xlabel('Day')
    d.set_ylim(12,22)
    smart_price_prediction.savefig('Output/smart_price_history.png')

    controlled_agents = market.agents[:10]
    controlled_price_history = [agent.price_history for agent in controlled_agents]
    controlled_price_history = [sum(col) / float(len(col)) for col in zip(*controlled_price_history)]
    controlled_price_prediction = pyplot.figure()
    e = controlled_price_prediction.add_subplot(111)
    e.axis('equal')
    e.scatter(market.price_history, controlled_price_history, label='Control Group Predicted Price', color = 'blue', alpha = 0.5, s=10)
    e.scatter(market.price_history, smart_price_history, label='Machine Learning Predicted Price', color = 'green', alpha = 0.5, s=10)
    
    e.legend()
    e.set_ylabel('Predicted Price')
    e.set_xlabel('Actual Price')
    e.set_ylim(12,20)
    e.set_xlim(12,20)
    controlled_price_prediction.savefig('Output/controlled_price_history.png')

    all_agents = market.agents
    all_price_history = [agent.price_history for agent in all_agents]
    all_price_history = [sum(col) / float(len(col)) for col in zip(*all_price_history)]
    all_price_prediction = pyplot.figure()
    f = all_price_prediction.add_subplot(111)
    f.plot(range(len(solar_data)), market.price_history, label='Average Trading Price')
    f.plot(range(len(solar_data)), all_price_history, label='All Group Predicted Price')
    f.legend()
    f.set_ylabel('Price')
    f.set_xlabel('Day')
    f.set_ylim(12,22)
    all_price_prediction.savefig('Output/all_price_history.png')
    
    #Sample Supply Demand Curve
    for agent in market.agents:
        agent.day_begin(5.0, market)

    buyers = [agent.price for agent in [agent for agent in market.agents if agent.demand > 0]]
    sellers = [agent.price for agent in [agent for agent in market.agents if agent.supply > 0]]
    buyers.sort(reverse=True)
    sellers.sort()
    while len(sellers) < len(buyers):
        buyers.pop()
    while len(buyers) < len(sellers):
        sellers.pop()

    supply_demand = pyplot.figure()
    g = supply_demand.add_subplot(111)
    g.plot(range(len(sellers)), sellers, label='Supply')
    g.plot(range(len(buyers)), buyers, label='Demand')
    g.legend()
    g.set_ylabel('Price')
    g.set_xlabel('Quantity')
    g.set_ylim(12,22)
    supply_demand.savefig('Output/supply_demand.png')

    print("Done")
Example #25
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/pretrain/network_model_pretrain.best"
    print >> sys.stderr, "Read model from ./model/model.pkl"
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(pair_feature_dimention,
                                    mention_feature_dimention,
                                    word_embedding_dimention, span_dimention,
                                    1000, embedding_size, embedding_dimention,
                                    embedding_matrix).cuda()
    print >> sys.stderr, "save model ..."
    #torch.save(network_model,network_file)

    net_copy(network_model, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    lr = 0.0002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/pretrain/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        #if echo == 100:
        #    lr = lr/2.0
        #if echo == 150:
        #    lr = lr/2.0

        #optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, network_model.parameters()), lr=lr, weight_decay=l2_lambda)
        #optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)
        optimizer = optim.RMSprop(network_model.parameters(),
                                  lr=lr,
                                  eps=1e-5,
                                  weight_decay=l2_lambda)

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target,top_x = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))

            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            top_gold = autograd.Variable(
                torch.from_numpy(top_x["top_gold"]).type(
                    torch.cuda.FloatTensor))

            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, dropout_rate)
            loss = F.binary_cross_entropy(
                output, top_gold,
                size_average=False) / train_docs.scale_factor_top

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(
                ana_output, ana_lable,
                size_average=False) / train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss

            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain", echo, "Pair total cost:", pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        print >> sys.stderr, "save model ..."
        torch.save(network_model,
                   model_save_dir + "network_model_pretrain.%d.top" % echo)

        #if cost_this_turn > last_cost:
        #    lr = lr*0.7
        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False, top=True):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target, top_x = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            reindex = autograd.Variable(
                torch.from_numpy(top_x["score_index"]).type(
                    torch.cuda.LongTensor))
            start_index = autograd.Variable(
                torch.from_numpy(top_x["starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(
                torch.from_numpy(top_x["ends"]).type(torch.cuda.LongTensor))

            gold += top_x["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()

            output, output_reindex = network_model.forward_top_pair(
                word_embedding_dimention, mention_index, mention_span,
                candi_index, candi_spans, pair_feature, anaphors, antecedents,
                reindex, start_index, end_index, 0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                word_embedding_dimention, anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] > all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.top.best")

        ana_gold = numpy.array(ana_gold, dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if (echo + 1) % 10 == 0:
            best_network_model = torch.load(model_save_dir +
                                            "network_model_pretrain.top.best")
            print "DEV:"
            performance.performance(dev_docs, best_network_model)
            print "TEST:"
            performance.performance(test_docs, best_network_model)
Example #26
0
def ShowResult(net, X, Y, title, wb1, wb2):
    # draw train data
    plt.plot(X[0, :], Y[0, :], '.', c='b')
    # create and draw visualized validation data
    TX = np.linspace(0, 1, 100).reshape(1, 100)
    dict_cache = net.ForwardCalculationBatch(TX, wb1, wb2)
    TY = dict_cache["Output"]
    plt.plot(TX, TY, 'x', c='r')
    plt.title(title)
    plt.show()


#end def

if __name__ == '__main__':
    dataReader = DataReader(x_data_name, y_data_name)
    dataReader.ReadData()
    dataReader.NormalizeX()
    dataReader.NormalizeY()

    n_input, n_hidden, n_output = 1, 4, 1
    eta, batch_size, max_epoch = 0.5, 10, 50000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps)

    # SGD, MiniBatch, FullBatch
    loss_history = CLossHistory()
    net = TwoLayerFittingNet()
    wb1, wb2 = net.train(dataReader, params, loss_history)
    temp = []
    for a in anchors[1]:
        temp.append(linalg.norm(X - a, axis=1, ord=1))
    temp = np.array(temp)
    mins2 = np.min(temp, axis=0)
    mins2 = mins2 / np.std(mins2)
    nz = [mins1 != 0, mins2 != 0]
    d = np.all(nz, axis=0)
    mins1[mins1 == 0] = np.mean(mins1[d])
    mins2[mins2 == 0] = np.mean(mins2[d])
    # end: argmin
    X1 = np.hstack((X, mins1.reshape((len(X), 1))))
    return np.hstack((X1, mins2.reshape((len(X), 1))))


X, y = DR.fourclass()
y_unique = np.unique(y)
random_state = 42
cv_outer = StratifiedKFold(n_splits=10,
                           shuffle=True,
                           random_state=random_state)
cv_acc_training, cv_acc_test, cv_training_time = [], [], []

XD_k = []
y_k = []
# 10-fold stratified cross validation
for o_train_index, o_test_index in cv_outer.split(X, y):
    X_train_k, y_train_k, X_test_k, y_test_k = X[o_train_index], y[
        o_train_index], X[o_test_index], y[o_test_index]
    sets_of_anchors = []
    sets_of_anchors.append(find_anchors_from_class_0(X_train_k, y_train_k))
Example #28
0
    xa                    = np.linspace(x_range[1], x_range[0], num=x_num, endpoint=True)
    za                    = np.linspace(z_range[1], z_range[0], num=z_num, endpoint=True)
    x2d, z2d              = np.meshgrid(xa, za)
if specimen_name == 'al7075_mlf':
    xa                    = np.linspace(x_range[0], x_range[1], num=x_num, endpoint=True)
    za                    = np.linspace(z_range[0], z_range[1], num=z_num, endpoint=True)
    x2d, z2d              = np.meshgrid(xa, za)

x1d, z1d              = x2d.flatten(), z2d.flatten()
#%%    read peak diameters if they have been fit, if not fit here

orient                = 'h'
try:
    x, z, diams = [], [], []
    for i_step in range(sample.n_load_step):
        txt_data          = DataReader.read_data_from_text(ring.out_dir+sample.step_names[i_step]+'_diams_'+orient+'.txt')
        x.append(txt_data[:, 0]), z.append(txt_data[:, 1]), diams.append(txt_data[:, 2])
except:
    l_centers, l_errs     = np.zeros((sample.n_load_step, sample.n_data_pt)), np.zeros((sample.n_load_step, sample.n_data_pt))
    u_centers, u_errs     = np.zeros((sample.n_load_step, sample.n_data_pt)), np.zeros((sample.n_load_step, sample.n_data_pt))
    diams                 = np.zeros((sample.n_load_step, sample.n_data_pt))
    for i_step in range(sample.n_load_step):         
        l_centers[i_step,:], l_errs[i_step,:], u_centers[i_step,:], u_errs[i_step,:], diams[i_step,:] = DataAnalysis.write_scan_diameters(sample, ring, x1d, z1d, i_step, orient)

#%% 

#     total variation filtering 

fits, coords          = [], []
for i_step in range(sample.n_load_step):
    s_fits, s_coords      = [], []
Example #29
0
def main():
    path = "/Users/u15672269/stat"
    data_path = "/Users/u15672269/Desktop/For_Kseniya/однородность.xls"
    title = "Отчет о показателях качества тестовых заданий по курсу Информатика 2018-2019 учебного года 1 семестра"
    KO_I = True
    KO_II = True
    correlation = True

    report = Document()
    report.add_heading(title, 0)

    if (KO_I or KO_II or correlation):
        dictionary = DataReader.read_dictionary_from_excel(data_path)
        data = DataReader.read_raw_data_from_excel(data_path, dictionary)

        data_KO = []
        keys = []
        # состав вопросов в тесте
        test = {}
        for i in data:
            if i[2] != '':
                question = dictionary[i[0]][0]
                val = test.get(question[0])
                if val is None:
                    test[question[0]] = list()
                    test[question[0]].append(question[1])
                else:
                    if question[1] not in test[question[0]]:
                        test[question[0]].append(question[1])

                key = (question, i[1], i[2])
                if key not in keys:
                    count = sum(elem[0] == i[0] and elem[1] == key[1]
                                and elem[2] == key[2] for elem in data)
                    data_KO.append([i[0], i[1], i[2], count, i[4], i[5]])
                    keys.append(key)
        print("ok")

    if KO_I:
        print("KO_I processing started")
        formulation_stat = Stat.get_question_formulation_stat(
            Stat.count_formulation_stat(data_KO, dictionary))
        formulation_homogeneity = {}
        for key, question_stat in formulation_stat.items():
            formulation_homogeneity[key] = Stat.test_formulation_homogeneity(
                question_stat)
        DataPrinter.create_report_KO_I(report, formulation_homogeneity, path)
        print("KO_I processing finished")

    if KO_II:
        print("KO_II processing started")
        distractor_frequency_stat = Stat.get_distractor_frequency_stat(
            data_KO, dictionary)
        distractor_homogeneity = Stat.test_distractor_homogeneity(
            distractor_frequency_stat, 0.05, 0, 100)
        DataPrinter.create_report_KO_II(report, distractor_frequency_stat,
                                        distractor_homogeneity, path)
        print("KO_II processing finished")

    if correlation:
        print("correlation processing started")
        correlation_stat = Stat.get_correlation_matrix(
            test, Stat.group_stat_by_student(data, dictionary))
        DataPrinter.create_report_correlation(report, correlation_stat, path)
        print("correlation processing finished")

    report.save(os.path.join(path, '{}.docx'.format(title)))

    return
Example #30
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    network_model = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs = DataReader.DataGnerater("train" + reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev" + reduced)
    test_docs = DataReader.DataGnerater("test" + reduced)

    l2_lambda = 1e-6
    #lr = 0.00009
    lr = 0.0001
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/"

    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
    }

    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(100):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        inside_time = 0.0

        for data in train_docs.train_generater(shuffle=shuffle):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative,anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data
            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold = target.tolist()
            anaphoricity_gold = anaphoricity_target.tolist()

            pair_nums += len(gold)
            ana_nums += len(anaphoricity_gold)

            lable = autograd.Variable(torch.cuda.FloatTensor([gold]))
            ana_lable = autograd.Variable(
                torch.cuda.FloatTensor([anaphoricity_gold]))

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, dropout_rate)
            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, dropout_rate)

            optimizer.zero_grad()

            #loss = get_pair_loss(output,positive,negative,train_docs.scale_factor)
            loss = F.binary_cross_entropy(
                output, lable, size_average=False) / train_docs.scale_factor
            #ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor

            pair_cost_this_turn += loss.data[0] * train_docs.scale_factor

            loss_all = loss
            loss_all.backward()
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTRAINING Use %.3f seconds" % (end_time -
                                                               start_time)
        print >> sys.stderr, "Learning Rate", lr

        #print >> sys.stderr,"save model ..."
        #torch.save(network_model, model_save_dir+"network_model_pretrain.%d"%echo)

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_word_index, mention_span, candi_word_index,candi_span,feature_pair,pair_antecedents,pair_anaphors,\
            target,positive,negative, anaphoricity_word_indexs, anaphoricity_spans, anaphoricity_features, anaphoricity_target = data

            mention_index = autograd.Variable(
                torch.from_numpy(mention_word_index).type(
                    torch.cuda.LongTensor))
            mention_span = autograd.Variable(
                torch.from_numpy(mention_span).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(
                torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(
                torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(
                torch.from_numpy(feature_pair).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(
                torch.from_numpy(pair_anaphors).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(
                torch.from_numpy(pair_antecedents).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(
                torch.from_numpy(anaphoricity_word_indexs).type(
                    torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(
                torch.from_numpy(anaphoricity_spans).type(
                    torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(
                torch.from_numpy(anaphoricity_features).type(
                    torch.cuda.FloatTensor))

            gold += target.tolist()
            ana_gold += anaphoricity_target.tolist()

            output, _ = network_model.forward_all_pair(
                nnargs["word_embedding_dimention"], mention_index,
                mention_span, candi_index, candi_spans, pair_feature, anaphors,
                antecedents, 0.0)
            predict += output.data.cpu().numpy()[0].tolist()

            ana_output, _ = network_model.forward_anaphoricity(
                nnargs["word_embedding_dimention"], anaphoricity_index,
                anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()

        gold = numpy.array(gold, dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results

        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush()

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model,
                       model_save_dir + "network_model_pretrain.best.pair")

        sys.stdout.flush()

    ## output best
    print "In sum, anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
        %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
    sys.stdout.flush()
Example #31
0
def setup():
    bucket.setBucket(DataReader.loadBucket())
    wordlist.extend(DataReader.collocationEntries())

    for w in wordlist:
        addStress(tokenize(w))
Example #32
0
        if Y[0, i] == 1:
            plt.plot(X[0, i], X[1, i], '^', c='g')
        elif Y[0, i] == 2:
            plt.plot(X[0, i], X[1, i], 'x', c='r')
        elif Y[0, i] == 3:
            plt.plot(X[0, i], X[1, i], '.', c='b')
        # end if
    # end for
    plt.xlabel("x1")
    plt.ylabel("x2")
    plt.show()


if __name__ == '__main__':

    dataReader = DataReader(x_data_name, y_data_name)
    dataReader.ReadData()
    X = dataReader.NormalizeX()
    Y = dataReader.ToOneHot()

    n_input, n_output = dataReader.num_feature, dataReader.num_category
    n_hidden = 8
    eta, batch_size, max_epoch = 0.1, 10, 5000
    eps = 0.06

    params = CParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps, LossFunctionName.CrossEntropy3)

    loss_history = CLossHistory()
    net = TwoLayerClassificationNet()
Example #33
0
import DataReader as DR
from sklearn import svm
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression as LR
from sklearn.neighbors import KNeighborsClassifier

#Read training data
Ang_Songs = DR.readData("Data-Set/Angry/Train/", "angry")
Hap_Songs = DR.readData("Data-Set/Happy/Train/", "happy")
Sad_Songs = DR.readData("Data-Set/Sad/Train/", "sad")
#Rel_Songs=DR.readData("Data-Set/Relaxed/Train/","relaxed")
SongsTrain = [Ang_Songs, Hap_Songs, Sad_Songs]

#ReadTestingData
AngT_Songs = DR.readData("Data-Set/Angry/Test/", "angry")
HapT_Songs = DR.readData("Data-Set/Happy/Test/", "happy")
SadT_Songs = DR.readData("Data-Set/Sad/Test/", "sad")
#RelT_Songs=DR.readData("Data-Set/Relaxed/Test/","relaxed")
SongsTTrain = [AngT_Songs, HapT_Songs, SadT_Songs]

SongsWordsTrain = [[], []]
for i in range(3):
    for song in SongsTrain[i]:
        # print("\nsongs train[i]=")
        # print(SongsTrain[i])
        # print("\n\nnow song\n")
        # print(song)
        # print("\n\nPrinting s for train\n");
        s = song[4]  #lyrics text only
        # print(s)
    def __init__(self, learning_rate, training_iteration, batch_size,
                 hidden_layer_n, hidden_layer_n2):

        self.dataset = DataReader.data_set(
        )  #Acquiring data from the DataReader class
        self.learning_rate = learning_rate  #Learning rate at which weights and biases get adjusted
        self.training_iteration = training_iteration  #Number of iterations the NN trains for
        self.batch_size = batch_size  #Size of the batch of the data that gets fed to the neural network
        self.display_step = 4  #Display every 4th iteration

        input_layer_n = self.dataset.input_dim  #Input layer matrix
        output_layer_n = self.dataset.output_dim  #Output layer matrix

        self.x = tf.placeholder(
            "float", [None, input_layer_n],
            name="x")  #Creating place holder for the input matrix
        self.y = tf.placeholder(
            "float", [None, output_layer_n],
            name="y")  #Creating place holder for the output matrix

        with tf.name_scope(
                "weights"
        ) as scope:  #Creating weight matrices and populating them with random numbers
            W1 = tf.Variable(
                tf.random_normal([input_layer_n, hidden_layer_n], stddev=0.1))
            W2 = tf.Variable(
                tf.random_normal([hidden_layer_n, hidden_layer_n2],
                                 stddev=0.1))
            W3 = tf.Variable(
                tf.random_normal([hidden_layer_n2, output_layer_n],
                                 stddev=0.1))

        with tf.name_scope(
                "biases"
        ) as scope:  #Creating bias matrices and populating them with random numbers
            b1 = tf.Variable(tf.random_normal([hidden_layer_n], stddev=0.1))
            b2 = tf.Variable(tf.random_normal([hidden_layer_n2], stddev=0.1))
            b3 = tf.Variable(tf.random_normal([output_layer_n], stddev=0.1))

        with tf.name_scope(
                "model") as scope:  #Creating the three output layers
            layer_1 = tf.nn.sigmoid(
                tf.matmul(self.x, W1) + b1
            )  #sigmoid(W[0,0]*i[0] + W[0,1]*i[1] + W[0,2]i[2] + ... + W[0,n]i[0] + b[i])
            layer_2 = tf.nn.sigmoid(
                tf.matmul(layer_1, W2) + b2
            )  #sigmoid(W[0,0]*i[0] + W[0,1]*i[1] + W[0,2]i[2] + ... + W[0,n]i[0] + b[i])
            layer_3 = tf.nn.softmax(
                tf.matmul(layer_2, W3) + b3
            )  #softmax(W[0,0]*i[0] + W[0,1]*i[1] + W[0,2]i[2] + ... + W[0,n]i[0] + b[i])
            self.model = layer_3

        with tf.name_scope(
                "objective_function"
        ) as scope:  #Objective or (activation) function: root mean squared
            self.objective_function = tf.sqrt(
                tf.reduce_sum(tf.square(tf.subtract(self.model, self.y))))

        with tf.name_scope(
                "train"
        ) as scope:  #Using Gradient Descent to minimize the cost of the function with respect to weights and biases
            self.optimizer = tf.train.GradientDescentOptimizer(
                learning_rate).minimize(self.objective_function)

        self.init = tf.global_variables_initializer()
        self.merged_summary_op = tf.summary.merge_all()
Example #35
0

def ShowResult(net, X, Y, title, wb1, wb2):
    # draw train data
    plt.plot(X[0,:], Y[0,:], '.', c='b')
    # create and draw visualized validation data
    TX = np.linspace(0,1,100).reshape(1,100)
    dict_cache = net.ForwardCalculationBatch(TX, wb1, wb2)
    TY = dict_cache["Output"]
    plt.plot(TX, TY, 'x', c='r')
    plt.title(title)
    plt.show()


if __name__ == '__main__':
    dataReader = DataReader(x_data_name, y_data_name)
    XData,YData = dataReader.ReadData()
    X = dataReader.NormalizeX(passthrough=True)
    Y = dataReader.NormalizeY()
    # 为了说明问题,我们用2个隐层单元和20批大小来做试验
    n_input, n_hidden, n_output = 1, 4, 1
    eta, batch_size, max_epoch = 0.1, 1, 10000
    eps = 0.001

    params = CParameters(n_input, n_hidden, n_output,
                         eta, max_epoch, batch_size, eps, 
                         LossFunctionName.MSE,
                         InitialMethod.Xavier)

    loss_history = CLossHistory(params)
Example #36
0
import PIL
from PIL import Image
import numpy as np
import DataReader

w, h = 28, 28
m = DataReader.get_mapping()
data = DataReader.get_images(10, h, w)  # 112800 images in data set
for image in data:
    print("\nCharacter being shown: " + chr(m[image[0]]))
    img = Image.fromarray(np.array(image[1], dtype=np.uint8))
    img.show()
    input()
def convertTargetFeatureToNumeric(data):
    categoryDictionary = dataReader.getCategoryDictionaries()
    data = data.replace(categoryDictionary.keys(), range(len(categoryDictionary.keys())))

    return data
# bert-serving-start -model_dir D:/model/multi_cased_L-12_H-768_A-12/ -max_seq_len 128 -pooling_strategy NONE -show_tokens_to_client -cased_tokenization
from bert_serving.client import BertClient
bc = BertClient(ip='localhost')
train_file = 'D:/data/cmrc_squad/cmrc2018_trial.json'
test_file = 'D:/data/cmrc_squad/cmrc2018_trial.json'
max_query_length = 64
max_seq_length = 128
doc_stride = 128
batch_size = 16
hidden_size = 768
num_epoch = 2
# 0.0001
init_lr = 3e-2
tf.logging.set_verbosity(tf.logging.INFO)

train_data = data_reader.read_squad_examples(train_file, True)
test_data = data_reader.read_squad_examples(test_file, True)
test_data = test_data[0:4]
train_data_collector = []
test_data_collector = []
data_reader.convert_examples_to_features(train_data,
                                         max_query_length=max_query_length,
                                         max_seq_length=max_seq_length,
                                         doc_stride=doc_stride,
                                         is_training=True,
                                         data_collector=train_data_collector,
                                         bert_client=bc)
data_reader.convert_examples_to_features(test_data,
                                         max_query_length=max_query_length,
                                         max_seq_length=max_seq_length,
                                         doc_stride=doc_stride,
Example #39
0
"""
Tester file: This file will run all of the important parts of the project.
Team: Belinda Adam, Jacquelyn Haughey
Machine Learning 2015 Final Project
Phonological Learning of English Pronunciation
"""

import DataReader as dataReader
import Network

# read data from the nettalk data set
f = "nettalk.data.txt"

examples, words, prons = dataReader.readDataFile("random_100_train.txt")
examples2, words2, prons2 = dataReader.readDataFile("random_100_test.txt")

train = examples
test = examples2

network = Network.Network(train, test, 1, 120, 27, 53, 0.5, 1)
network.train_network()
network.test_network()
Example #40
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best"
    print >> sys.stderr,"Read model from",best_network_file
    best_network_model = torch.load(best_network_file)
        
    embedding_matrix = numpy.load(embedding_file)

    "Building torch model"
    network_model = network.Network(nnargs["pair_feature_dimention"],nnargs["mention_feature_dimention"],nnargs["word_embedding_dimention"],nnargs["span_dimention"],1000,nnargs["embedding_size"],nnargs["embedding_dimention"],embedding_matrix).cuda()
    print >> sys.stderr,"save model ..."

    net_copy(network_model,best_network_model)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    print >> sys.stderr,"prepare data for train ..."
    train_docs = DataReader.DataGnerater("train"+reduced)
    print >> sys.stderr,"prepare data for dev and test ..."
    dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test"+reduced)


    l2_lambda = 1e-6
    lr = nnargs["lr"]
    dropout_rate = nnargs["dropout_rate"]
    epoch = nnargs["epoch"]

    model_save_dir = "./model/bp/"
   
    last_cost = 0.0
    all_best_results = {
        'thresh': 0.0,
        'accuracy': 0.0,
        'precision': 0.0,
        'recall': 0.0,
        'f1': 0.0
        }
  
    optimizer = optim.RMSprop(network_model.parameters(), lr=lr, eps=1e-5)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)

    for echo in range(epoch):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo
        
        scheduler.step()

        pair_cost_this_turn = 0.0
        ana_cost_this_turn = 0.0

        pair_nums = 0
        ana_nums = 0

        for data in train_docs.train_generater(shuffle=True):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))
            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))

            optimizer.zero_grad()

            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,dropout_rate)
            loss = F.binary_cross_entropy(output,top_gold,size_average=False)/train_docs.scale_factor_top

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, dropout_rate)
            ana_loss = F.binary_cross_entropy(ana_output,ana_lable,size_average=False)/train_docs.anaphoricity_scale_factor_top

            loss_all = loss + ana_loss    
            
            loss_all.backward()
            pair_cost_this_turn += loss.data[0]
            optimizer.step()

        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain",echo,"Pair total cost:",pair_cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "Learning Rate",lr

        gold = []
        predict = []

        ana_gold = []
        ana_predict = []

        for data in dev_docs.train_generater(shuffle=False):

            mention_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(data["candi_word_index"]).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(data["candi_span"]).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(data["pair_features"]).type(torch.cuda.FloatTensor))
            anaphors = autograd.Variable(torch.from_numpy(data["pair_anaphors"]).type(torch.cuda.LongTensor))
            antecedents = autograd.Variable(torch.from_numpy(data["pair_antecedents"]).type(torch.cuda.LongTensor))

            anaphoricity_index = autograd.Variable(torch.from_numpy(data["mention_word_index"]).type(torch.cuda.LongTensor))
            anaphoricity_span = autograd.Variable(torch.from_numpy(data["mention_span"]).type(torch.cuda.FloatTensor))
            anaphoricity_feature = autograd.Variable(torch.from_numpy(data["anaphoricity_feature"]).type(torch.cuda.FloatTensor))

            
            reindex = autograd.Variable(torch.from_numpy(data["top_score_index"]).type(torch.cuda.LongTensor))
            start_index = autograd.Variable(torch.from_numpy(data["top_starts"]).type(torch.cuda.LongTensor))
            end_index = autograd.Variable(torch.from_numpy(data["top_ends"]).type(torch.cuda.LongTensor))
            top_gold = autograd.Variable(torch.from_numpy(data["top_gold"]).type(torch.cuda.FloatTensor))

            anaphoricity_target = data["anaphoricity_target"]
            anaphoricity_gold = anaphoricity_target.tolist()
            ana_lable = autograd.Variable(torch.cuda.FloatTensor([anaphoricity_gold]))
            
            gold += data["top_gold"].tolist()
            ana_gold += anaphoricity_target.tolist()
        
            output,output_reindex = network_model.forward_top_pair(nnargs["word_embedding_dimention"],mention_index,mention_span,candi_index,candi_spans,pair_feature,anaphors,antecedents,reindex,start_index,end_index,0.0)

            predict += output.data.cpu().numpy().tolist()

            ana_output,_,_ = network_model.forward_anaphoricity(nnargs["word_embedding_dimention"], anaphoricity_index, anaphoricity_span, anaphoricity_feature, 0.0)
            ana_predict += ana_output.data.cpu().numpy()[0].tolist()
        
        gold = numpy.array(gold,dtype=numpy.int32)
        predict = numpy.array(predict)

        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

        thresh_list = [0.3,0.35,0.4,0.45,0.5,0.55,0.6]
        for thresh in thresh_list:
            evaluation_results = get_metrics(gold, predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
 
        print "Pair accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if best_results["f1"] >= all_best_results["f1"]:
            all_best_results = best_results
            print >> sys.stderr, "New High Result, Save Model"
            torch.save(network_model, model_save_dir+"network_model_pretrain.best.top")

        ana_gold = numpy.array(ana_gold,dtype=numpy.int32)
        ana_predict = numpy.array(ana_predict)
        best_results = {
            'thresh': 0.0,
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
        for thresh in thresh_list:
            evaluation_results = get_metrics(ana_gold, ana_predict, thresh)
            if evaluation_results["f1"] >= best_results["f1"]:
                best_results = evaluation_results
        print "Anaphoricity accuracy: %f and Fscore: %f with thresh: %f"\
                %(best_results["accuracy"],best_results["f1"],best_results["thresh"])
        sys.stdout.flush() 

        if (echo+1)%10 == 0:
            best_network_model = torch.load(model_save_dir+"network_model_pretrain.best.top") 
            print "DEV:"
            performance.performance(dev_docs,best_network_model)
            print "TEST:"
            performance.performance(test_docs,best_network_model)
Example #41
0
def performValidation(yPred, yTest):

    dictionary = dataReader.getCategoryDictionaries()
    print(metrics.classification_report(yPred, yTest, target_names=dictionary.keys()))
Example #42
0
from DataReader import *
from AGDSStructure import *
from AGDSKNearest import *
import numpy as np


def classify(data_holder, model, X):
    predicted_label = model.find_similarity(np.array(X))
    win_class = data_holder.get_real_label(predicted_label)
    print(win_class)


if __name__ == '__main__':
    data_reader = DataReader("IrisData.xls")
    agds_structure = AGDSStructure(data_reader.data_frame, data_reader.label)
    k_nearest = AGDSKNearest(agds_structure, 3)

    classify(data_reader, k_nearest, [4.5, 3.0, 1.1, 0.1])
    classify(data_reader, k_nearest, [7.0, 3.2, 4.7, 1.4])
    classify(data_reader, k_nearest, [5.0, 2.0, 4.0, 1.0])
    classify(data_reader, k_nearest, [5.7, 2.5, 4.8, 1.6])
Example #43
0
 def setup(self):
     prov_niscode_shapes = {}
     for record in dr.read_shp('/home/techpriest/Desktop/becode/SpaceEYE/ADM/Apn_AdPr.shp').to_records():
         prov_niscode_shapes[record[3]] = record[-1]
     return prov_niscode_shapes
Example #44
0
        print("Solving it with Tabulation")
        t0 = time.time()
        value = tabulation(s1, s2)
        t1 = time.time()
    elif args.check:
        mem = memoization(s1, s2)
        tab = tabulation(s1, s2)
        print("Check:", (mem == tab))
        print("Memoization =", mem)
        print("Tabulation =", tab)
    else:
        print("Introduzca un metodo, por favor")
        exit(-1)

    if args.showValue and not args.check:
        print("Value =", value)
    if args.timer:
        t = (t1 - t0)
        print("Time =", round(t, 3), "s")


if __name__ == '__main__':
    args = args_creator()
    if args.directory != "":
        files = dr.readAllFiles(args.directory)
        withDirectory(files)
    elif args.file != "":
        withFile(args.file)
    else:
        print("Introduzca un fichero, por favor")
        exit(-1)
Example #45
0
#TrendsScraper
#YahooFinanceScraper

import DataReader from pandas.io.data 
from datetime import datetime
goog = DataReader("GOOG",  "yahoo", datetime(2000,1,1), datetime(2012,1,1))
goog["Adj Close"]

Example #46
0
logs = os.path.join(directory, 'logs')
trainloss = os.path.join(logs, 'train_loss.txt')

if os.path.isdir(logs) == False:
    os.makedirs(logs)

# choose network, can be either DRN18 or DRN26
network = 'DRN26'
# set parameters
batch_size = 8
num_epochs = 100
use_weights = 1
num_classes = 5
image_dims = [500, 500, 3]

data = DataReader(directory, batch_size, num_epochs, use_weights=1)
train_data = data.train_batch(train_file)
num_train_images = data.num_images

test_data = data.test_batch(test_file)
num_val_images = data.num_images

# determine number of iterations based on number of images
training_iterations = int(np.floor(num_train_images / batch_size))
validation_iterations = int(np.floor(num_val_images / batch_size))

handle = tf.placeholder(tf.string, shape=[])
# create iterator allowing us to switch between datasets
iterator = tf.data.Iterator.from_string_handle(handle, train_data.output_types,
                                               train_data.output_shapes)
next_element = iterator.get_next()
Example #47
0
        self.index = np.arange(self.maxlen)
        np.random.shuffle(self.index)
        self.cnt = 0
        return self

    def __next__(self):
        if self.cnt == self.maxlen: raise StopIteration
        self.cnt += self.batch
        return self.data[self.index[self.cnt - self.batch: self.cnt], :], \
               self.label[self.index[self.cnt - self.batch: self.cnt], :]

    def next(self):
        return self.__next__()


if __name__ == "__main__":
    print tf.__version__
    sys.path.append("../") 
    import DataReader
    tdata = DataReader.ImageReader("../dataset/train-images-idx3-ubyte.gz").to_tensor()
    ldata = DataReader.LabelReader("../dataset/train-labels-idx1-ubyte.gz").to_tensor()
    print tdata.shape
    print ldata.shape
    tf_mlp = TFMLP(tdata, ldata)
    tf_mlp.train()

    ttest = DataReader.ImageReader("../dataset/t10k-images-idx3-ubyte.gz").to_tensor()
    ltest = DataReader.LabelReader("../dataset/t10k-labels-idx1-ubyte.gz").to_tensor()

    tf_mlp.test(ttest, ltest)
import DataReader as DR
from sklearn import svm
from nltk.tokenize import word_tokenize
#from nltk import FreqDist
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
#import random
import numpy as np
import pandas as pd
import re
from sklearn.naive_bayes import MultinomialNB

#Read training data
Ang_Songs=DR.readData("Data-Set/Angry/Train/","angry")
Hap_Songs=DR.readData("Data-Set/Happy/Train/","happy")
Sad_Songs=DR.readData("Data-Set/Sad/Train/","sad")
Rel_Songs=DR.readData("Data-Set/Relaxed/Train/","relaxed")
SongsTrain=[Ang_Songs,Hap_Songs,Sad_Songs,Rel_Songs]

#    PROCESSING TRAINING DATA

#tokenizing training data
sw = list(stopwords.words("english"))
lemmatizer=WordNetLemmatizer()

def my_tokenizer(s):
    s = s.lower() # downcase
    tokens = word_tokenize(s) # split string into words (tokens)
    tokens = [t for t in tokens if len(t) > 2] # remove short words, they're probably not useful
    tokens = [lemmatizer.lemmatize(t) for t in tokens] # put words into base form
    tokens = [t for t in tokens if t not in sw] # remove stopwords
Example #49
0
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    embedding_matrix = numpy.load(embedding_file)
    "Building torch model"
    worker = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(worker, best_network_model)

    best_network_file = "./model/network_model_pretrain.best.top"
    print >> sys.stderr, "Read model from ", best_network_file
    best_network_model = torch.load(best_network_file)

    manager = network.Network(
        nnargs["pair_feature_dimention"], nnargs["mention_feature_dimention"],
        nnargs["word_embedding_dimention"], nnargs["span_dimention"], 1000,
        nnargs["embedding_size"], nnargs["embedding_dimention"],
        embedding_matrix).cuda()
    net_copy(manager, best_network_model)

    reduced = ""
    if args.reduced == 1:
        reduced = "_reduced"

    print >> sys.stderr, "prepare data for train ..."
    train_docs_iter = DataReader.DataGnerater("train" + reduced)
    #train_docs_iter = DataReader.DataGnerater("dev"+reduced)
    print >> sys.stderr, "prepare data for dev and test ..."
    dev_docs_iter = DataReader.DataGnerater("dev" + reduced)
    test_docs_iter = DataReader.DataGnerater("test" + reduced)
    '''
    print "Performance after pretraining..."
    print "DEV"
    metric = performance.performance(dev_docs_iter,worker,manager) 
    print "Average:",metric["average"]
    print "TEST"
    metric = performance.performance(test_docs_iter,worker,manager) 
    print "Average:",metric["average"]
    print "***"
    print
    sys.stdout.flush()
    '''

    lr = nnargs["lr"]
    top_k = nnargs["top_k"]

    model_save_dir = "./model/reinforce/"
    utils.mkdir(model_save_dir)

    score_softmax = nn.Softmax()

    optimizer_manager = optim.RMSprop(manager.parameters(), lr=lr, eps=1e-6)
    optimizer_worker = optim.RMSprop(worker.parameters(), lr=lr, eps=1e-6)

    MAX_AVE = 2048

    for echo in range(nnargs["epoch"]):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:", echo

        reward_log = Logger(Tensorboard + args.tb +
                            "/acl2018/%d/reward/" % echo,
                            flush_secs=3)
        entropy_log_manager = Logger(Tensorboard + args.tb +
                                     "/acl2018/%d/entropy/worker" % echo,
                                     flush_secs=3)
        entropy_log_worker = Logger(Tensorboard + args.tb +
                                    "/acl2018/%d/entropy/manager" % echo,
                                    flush_secs=3)

        train_docs = utils.load_pickle(args.DOCUMENT + 'train_docs.pkl')
        #train_docs = utils.load_pickle(args.DOCUMENT + 'dev_docs.pkl')
        docs_by_id = {doc.did: doc for doc in train_docs}

        ave_reward = []
        ave_manager_entropy = []
        ave_worker_entropy = []

        print >> sys.stderr, "Link docs ..."
        tmp_data = []
        cluster_info = {0: [0]}
        cluster_list = [0]
        current_new_cluster = 1
        predict_action_embedding = []
        choose_action = []
        mid = 1

        step = 0

        statistic = {
            "worker_hits": 0,
            "manager_hits": 0,
            "total": 0,
            "manager_predict_last": 0,
            "worker_predict_last": 0
        }

        for data in train_docs_iter.rl_case_generater(shuffle=True):

            rl = data["rl"]

            scores_manager, representations_manager = get_score_representations(
                manager, data)
            scores_worker, representations_worker = get_score_representations(
                worker, data)

            for s, e in zip(rl["starts"], rl["ends"]):
                #action_embeddings = representations_manager[s:e]
                #probs = F.softmax(torch.squeeze(scores_manager[s:e]))
                action_embeddings = representations_worker[s:e]
                probs = F.softmax(torch.squeeze(
                    scores_worker[s:e])).data.cpu().numpy()

                #m = Categorical(F.softmax(torch.squeeze(scores_worker[s:e]))[:-1])
                #a = m.sample()
                #this_action = m.sample()
                #index = this_action.data.cpu().numpy()[0]

                index = utils.choose_action(probs)

                if index == (e - s - 1):
                    should_cluster = current_new_cluster
                    cluster_info[should_cluster] = []
                    current_new_cluster += 1
                else:
                    should_cluster = cluster_list[index]

                choose_action.append(index)
                cluster_info[should_cluster].append(mid)
                cluster_list.append(should_cluster)
                mid += 1

                cluster_indexs = torch.cuda.LongTensor(
                    cluster_info[should_cluster])
                action_embedding_predict_ave = torch.mean(
                    action_embeddings[cluster_indexs], 0, keepdim=True)
                action_embedding_predict_max, max_index = torch.max(
                    action_embeddings[cluster_indexs], dim=0, keepdim=True)

                action_embedding_predict = torch.cat(
                    (action_embedding_predict_ave,
                     action_embedding_predict_max), 1)
                predict_action_embedding.append(action_embedding_predict)

            tmp_data.append(data)

            if rl["end"] == True:

                inside_index = 0
                manager_path = []
                worker_path = []

                doc = docs_by_id[rl["did"]]

                for data in tmp_data:

                    rl = data["rl"]
                    pair_target = data["pair_target"]
                    anaphoricity_target = 1 - data["anaphoricity_target"]
                    target = numpy.concatenate(
                        (pair_target, anaphoricity_target))[rl["reindex"]]
                    scores_worker, representations_worker = get_score_representations(
                        worker, data)

                    for s, e in zip(rl["starts"], rl["ends"]):
                        action_embeddings = representations_worker[s:e]
                        probs = F.softmax(
                            torch.squeeze(scores_worker[s:e])
                        ).data.cpu().numpy(
                        )  #print probs.data.cpu().numpy() -> [  3.51381488e-04   9.99648571e-01]
                        action_embedding_predicted = predict_action_embedding[
                            inside_index]
                        combine_embedding = torch.cat(
                            (action_embeddings, action_embeddings), 1)
                        similarities = torch.sum(
                            torch.abs(combine_embedding -
                                      action_embedding_predicted), 1)
                        similarities = similarities.data.cpu().numpy()

                        action_probabilities = []
                        action_list = []
                        similarity_candidates = heapq.nlargest(
                            top_k, -similarities)
                        for similarity in similarity_candidates:
                            action_index = numpy.argwhere(
                                similarities == -similarity)[0][0]
                            action_probabilities.append(probs[action_index])
                            action_list.append(action_index)

                        manager_action = choose_action[inside_index]

                        if not manager_action in action_list:
                            action_list.append(manager_action)
                            action_probabilities.append(probs[manager_action])
                        sample_action = utils.sample_action(
                            numpy.array(action_probabilities))
                        worker_action = action_list[sample_action]

                        this_target = target[s:e]

                        if this_target[worker_action] == 1:
                            statistic["worker_hits"] += 1
                        if this_target[manager_action] == 1:
                            statistic["manager_hits"] += 1
                        if worker_action == (e - s - 1):
                            statistic["worker_predict_last"] += 1
                        if manager_action == (e - s - 1):
                            statistic["manager_predict_last"] += 1
                        statistic["total"] += 1

                        inside_index += 1

                        #link = manager_action
                        link = worker_action
                        m1, m2 = rl['ids'][s + link]
                        doc.link(m1, m2)

                        manager_path.append(manager_action)
                        worker_path.append(worker_action)

                reward = doc.get_f1()
                for data in tmp_data:
                    for s, e in zip(rl["starts"], rl["ends"]):
                        ids = rl['ids'][s:e]
                        ana = ids[0, 1]
                        old_ant = doc.ana_to_ant[ana]
                        doc.unlink(ana)
                        costs = rl['costs'][s:e]
                        for ant_ind in range(e - s):
                            costs[ant_ind] = doc.link(ids[ant_ind, 0],
                                                      ana,
                                                      hypothetical=True,
                                                      beta=1)
                        doc.link(old_ant, ana)
                        #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))

                inside_index = 0
                worker_entropy = 0.0
                for data in tmp_data:
                    new_step = step
                    # worker
                    scores_worker, representations_worker = get_score_representations(
                        worker, data, dropout=nnargs["dropout_rate"])
                    optimizer_worker.zero_grad
                    worker_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        costs = rl['costs'][s:e]
                        costs = autograd.Variable(
                            torch.from_numpy(costs).type(
                                torch.cuda.FloatTensor))
                        action = worker_path[inside_index]
                        score = F.softmax(torch.squeeze(scores_worker[s:e]))
                        if not score.size() == costs.size():
                            continue

                        baseline = torch.sum(costs * score)
                        this_cost = torch.log(
                            score[action]) * -1.0 * (reward - baseline)

                        if worker_loss is None:
                            worker_loss = this_cost
                        else:
                            worker_loss += this_cost
                        worker_entropy += torch.sum(
                            score * torch.log(score + 1e-7)
                        ).data.cpu().numpy()[
                            0]  #+ 0.001*torch.sum(score*torch.log(score+1e-7))
                        inside_index += 1

                    worker_loss.backward()
                    torch.nn.utils.clip_grad_norm(worker.parameters(),
                                                  nnargs["clip"])
                    optimizer_worker.step()

                    ave_worker_entropy.append(worker_entropy)
                    if len(ave_worker_entropy) >= MAX_AVE:
                        ave_worker_entropy = ave_worker_entropy[1:]
                    entropy_log_worker.log_value(
                        'entropy',
                        float(sum(ave_worker_entropy)) /
                        float(len(ave_worker_entropy)), new_step)
                    new_step += 1

                inside_index = 0
                manager_entropy = 0.0
                for data in tmp_data:
                    new_step = step
                    rl = data["rl"]

                    ave_reward.append(reward)
                    if len(ave_reward) >= MAX_AVE:
                        ave_reward = ave_reward[1:]
                    reward_log.log_value(
                        'reward',
                        float(sum(ave_reward)) / float(len(ave_reward)),
                        new_step)

                    scores_manager, representations_manager = get_score_representations(
                        manager, data, dropout=nnargs["dropout_rate"])

                    #optimizer_manager.zero_grad
                    #manager_loss = None
                    for s, e in zip(rl["starts"], rl["ends"]):
                        #costs = rl['costs'][s:e]
                        #costs = autograd.Variable(torch.from_numpy(costs).type(torch.cuda.FloatTensor))
                        score = F.softmax(torch.squeeze(scores_manager[s:e]))
                        action = manager_path[inside_index]

                        if not score.size() == costs.size():
                            continue

                        #baseline = torch.sum(costs*score)

                        #this_cost = torch.log(score[action])*-1.0*(reward-baseline)# + 0.001*torch.sum(score*torch.log(score+1e-7))
                        #if manager_loss is None:
                        #    manager_loss = this_cost
                        #else:
                        #    manager_loss += this_cost

                        manager_entropy += torch.sum(
                            score *
                            torch.log(score + 1e-7)).data.cpu().numpy()[0]
                        inside_index += 1

                    #manager_loss.backward()
                    #torch.nn.utils.clip_grad_norm(manager.parameters(), nnargs["clip"])
                    #optimizer_manager.step()

                    ave_manager_entropy.append(manager_entropy)
                    if len(ave_manager_entropy) >= MAX_AVE:
                        ave_manager_entropy = ave_manager_entropy[1:]
                    entropy_log_manager.log_value(
                        'entropy',
                        float(sum(ave_manager_entropy)) /
                        float(len(ave_manager_entropy)), new_step)
                    new_step += 1

                step = new_step
                tmp_data = []
                cluster_info = {0: [0]}
                cluster_list = [0]
                current_new_cluster = 1
                mid = 1
                predict_action_embedding = []
                choose_action = []

        end_time = timeit.default_timer()
        print >> sys.stderr, "TRAINING Use %.3f seconds" % (end_time -
                                                            start_time)
        print >> sys.stderr, "save model ..."
        #print "Top k",top_k
        print "Worker Hits", statistic[
            "worker_hits"], "Manager Hits", statistic[
                "manager_hits"], "Total", statistic["total"]
        print "Worker predict last", statistic[
            "worker_predict_last"], "Manager predict last", statistic[
                "manager_predict_last"]
        #torch.save(network_model, model_save_dir+"network_model_rl_worker.%d"%echo)
        #torch.save(ana_network, model_save_dir+"network_model_rl_manager.%d"%echo)

        print "DEV"
        metric = performance.performance(dev_docs_iter, worker, manager)
        print "Average:", metric["average"]
        #print "DEV manager"
        #metric = performance_manager.performance(dev_docs_iter,worker,manager)
        #print "Average:",metric["average"]
        print "TEST"
        metric = performance.performance(test_docs_iter, worker, manager)
        print "Average:", metric["average"]
        print
        sys.stdout.flush()
Example #50
0
def write_2thetas(sample, ring, num_vecs, dgamma, x1d, y1d, step_num, fwhm0=10, amp0=500, plot_flag=False):

    # read in dark image
    dark_path             = sample.data_dir+str(sample.dark_dirs[step_num])+'\\ff\\'
    dark_file             = os.listdir(dark_path)
    assert len(dark_file) == 1
    dark_image            = DataReader.ge2_reader(dark_path+dark_file[0])
    if len(dark_image.shape) > 1:
        dark_image        = np.mean(dark_image, axis=0)

    # initialize storage arrays
    vec_gamma             = np.linspace(-np.pi+(dgamma/2), np.pi-(dgamma/2), num=num_vecs)
    two_theta             = np.zeros((sample.n_data_pt, num_vecs))
    peak_amps             = np.zeros((sample.n_data_pt, num_vecs))
    peak_errs             = np.zeros((sample.n_data_pt, num_vecs))
    
    # loop through each grid point on sample
    for i_data_pt in range(sample.n_data_pt):
       
       # read image
       dir_num               = sample.init_dirs[step_num] + i_data_pt
       path                  = sample.data_dir+str(dir_num)+'\\ff\\'
       file                  = os.listdir(path)
       assert len(file) == 1
       print('reading image ' + str(dir_num), 'x = '+str(x1d[i_data_pt]), 'y = '+str(y1d[i_data_pt])) 
       image                 = DataReader.ge2_reader(path+file[0])[0]  # only using first image because of shutter timing error
       image                -= dark_image                              # subtract dark image
      
       # generate coordinates of each pixel and calculate radius and vector angle
       x, y                  = np.meshgrid(np.arange(image.shape[1], dtype=float), np.arange(image.shape[0], dtype=float))
       x                    -= sample.true_center[1]
       y                    -= sample.true_center[0]
       radius                = np.sqrt( x**2 + y**2 )     # covert x,y coordinates into r,omega coordinates
       gamma                 = np.arctan2(y, x)           # covert x,y coordinates into r,omega coordinates
       
       # loop through each diffraction vector
       for i_vec in range(num_vecs):
            
            # grab slice of detector pixels that are within domega of desired omega
            img_slice                     =  image[np.abs(gamma-vec_gamma[i_vec]) < dgamma]
            r_slice                       = radius[np.abs(gamma-vec_gamma[i_vec]) < dgamma]
            
            # grab section of slice that is within dr of ring radius
            img_slice                     =  img_slice[np.abs(r_slice-ring.radius) < ring.dr]
            r_slice                       =    r_slice[np.abs(r_slice-ring.radius) < ring.dr]
            
            # sort selected pixels values by radial coordinate
            sorted_indices                = np.argsort(r_slice)
            sorted_r                      =   r_slice[sorted_indices]
            sorted_peak                   = img_slice[sorted_indices]
    
            # fit peak to sorted selected pixel values
            ctr_ind, lo_ind, hi_ind       = PeakFitting.get_peak_fit_indices(sorted_peak)
            peak_bg_rm, _                 = PeakFitting.RemoveBackground(sorted_r, sorted_peak, sorted_r[lo_ind], sorted_r[hi_ind])
            peak_fit, p_opt, err          = PeakFitting.fitPeak(sorted_r, peak_bg_rm, sorted_r[ctr_ind], fwhm0, amp0)
            
            # calculate 2 theta 
            opp                           = p_opt[0]
            adj                           = sample.detector_dist
            two_theta[i_data_pt, i_vec]   = np.arctan(opp/adj)
            
            # store peak amplitude and relative error
            peak_amps[i_data_pt, i_vec]   = p_opt[3]
            peak_errs[i_data_pt, i_vec]   = err
            
            if plot_flag:
                plt.close('all')
                fig = plt.figure()
                ax  = fig.add_subplot(111)
                ax.plot(sorted_r, sorted_peak, 'ok')
                ax.plot(sorted_r, peak_bg_rm,  'or')
                ax.plot(sorted_r, peak_fit,    '-r')
                ax.text(0.01, 0.92, 'ctr = '+str(opp), transform=ax.transAxes, color='k', fontsize=14)
                if err < 0.5:
                    ax.text(0.01, 0.85, 'err = '+str(err), transform=ax.transAxes, color='k', fontsize=14)
                else:
                    ax.text(0.01, 0.85, 'err = '+str(err), transform=ax.transAxes, color='r', fontsize=14)
                plt.savefig(ring.peak_dir+str(i_data_pt)+'_'+str(vec_gamma[i_vec])+'.png')
                plt.close('all')
                
       if plot_flag:
           plt.close('all')
           plt.imshow(image, vmin=0, vmax=200)
           plt.savefig(ring.peak_dir+str(i_data_pt)+'_image.png') 
           plt.close('all')

    # write data to a text file
    out_path = ring.peak_dir+sample.step_names[step_num]+'_peakfit_results.txt'
    out_file = open(out_path, 'w') 
    for i_data_pt in range(sample.n_data_pt):
        out_file.write('%24.16f'%x1d[i_data_pt]                + '\\t')
        out_file.write('%24.16f'%y1d[i_data_pt]                + '\\t')
        for i_vec in range(num_vecs):
            out_file.write('%24.16f'%vec_gamma[i_vec]             + '\\t')
            out_file.write('%24.16f'%two_theta[i_data_pt, i_vec]  + '\\t')
            out_file.write('%24.16f'%peak_amps[i_data_pt, i_vec]   + '\\t')
            out_file.write('%24.16f'%peak_errs[i_data_pt, i_vec]   + '\\t')
        out_file.write('\n')
    out_file.close()   
Example #51
0
def write_2thetas(sample,
                  ring,
                  num_vecs,
                  dgamma,
                  x1d,
                  y1d,
                  step_num,
                  fwhm0=10,
                  amp0=500,
                  plot_flag=False):

    # read in dark image
    dark_path = sample.data_dir + str(sample.dark_dirs[step_num]) + '\\ff\\'
    dark_file = os.listdir(dark_path)
    assert len(dark_file) == 1
    dark_image = DataReader.ge2_reader(dark_path + dark_file[0])
    if len(dark_image.shape) > 1:
        dark_image = np.mean(dark_image, axis=0)

    # initialize storage arrays
    vec_gamma = np.linspace(-np.pi + (dgamma / 2),
                            np.pi - (dgamma / 2),
                            num=num_vecs)
    two_theta = np.zeros((sample.n_data_pt, num_vecs))
    peak_amps = np.zeros((sample.n_data_pt, num_vecs))
    peak_errs = np.zeros((sample.n_data_pt, num_vecs))

    # loop through each grid point on sample
    for i_data_pt in range(sample.n_data_pt):

        # read image
        dir_num = sample.init_dirs[step_num] + i_data_pt
        path = sample.data_dir + str(dir_num) + '\\ff\\'
        file = os.listdir(path)
        assert len(file) == 1
        print('reading image ' + str(dir_num), 'x = ' + str(x1d[i_data_pt]),
              'y = ' + str(y1d[i_data_pt]))
        image = DataReader.ge2_reader(path + file[0])[
            0]  # only using first image because of shutter timing error
        image -= dark_image  # subtract dark image

        # generate coordinates of each pixel and calculate radius and vector angle
        x, y = np.meshgrid(np.arange(image.shape[1], dtype=float),
                           np.arange(image.shape[0], dtype=float))
        x -= sample.true_center[1]
        y -= sample.true_center[0]
        radius = np.sqrt(
            x**2 + y**2)  # covert x,y coordinates into r,omega coordinates
        gamma = np.arctan2(
            y, x)  # covert x,y coordinates into r,omega coordinates

        # loop through each diffraction vector
        for i_vec in range(num_vecs):

            # grab slice of detector pixels that are within domega of desired omega
            img_slice = image[np.abs(gamma - vec_gamma[i_vec]) < dgamma]
            r_slice = radius[np.abs(gamma - vec_gamma[i_vec]) < dgamma]

            # grab section of slice that is within dr of ring radius
            img_slice = img_slice[np.abs(r_slice - ring.radius) < ring.dr]
            r_slice = r_slice[np.abs(r_slice - ring.radius) < ring.dr]

            # sort selected pixels values by radial coordinate
            sorted_indices = np.argsort(r_slice)
            sorted_r = r_slice[sorted_indices]
            sorted_peak = img_slice[sorted_indices]

            # fit peak to sorted selected pixel values
            ctr_ind, lo_ind, hi_ind = PeakFitting.get_peak_fit_indices(
                sorted_peak)
            peak_bg_rm, _ = PeakFitting.RemoveBackground(
                sorted_r, sorted_peak, sorted_r[lo_ind], sorted_r[hi_ind])
            peak_fit, p_opt, err = PeakFitting.fitPeak(sorted_r, peak_bg_rm,
                                                       sorted_r[ctr_ind],
                                                       fwhm0, amp0)

            # calculate 2 theta
            opp = p_opt[0]
            adj = sample.detector_dist
            two_theta[i_data_pt, i_vec] = np.arctan(opp / adj)

            # store peak amplitude and relative error
            peak_amps[i_data_pt, i_vec] = p_opt[3]
            peak_errs[i_data_pt, i_vec] = err

            if plot_flag:
                plt.close('all')
                fig = plt.figure()
                ax = fig.add_subplot(111)
                ax.plot(sorted_r, sorted_peak, 'ok')
                ax.plot(sorted_r, peak_bg_rm, 'or')
                ax.plot(sorted_r, peak_fit, '-r')
                ax.text(0.01,
                        0.92,
                        'ctr = ' + str(opp),
                        transform=ax.transAxes,
                        color='k',
                        fontsize=14)
                if err < 0.5:
                    ax.text(0.01,
                            0.85,
                            'err = ' + str(err),
                            transform=ax.transAxes,
                            color='k',
                            fontsize=14)
                else:
                    ax.text(0.01,
                            0.85,
                            'err = ' + str(err),
                            transform=ax.transAxes,
                            color='r',
                            fontsize=14)
                plt.savefig(ring.peak_dir + str(i_data_pt) + '_' +
                            str(vec_gamma[i_vec]) + '.png')
                plt.close('all')

        if plot_flag:
            plt.close('all')
            plt.imshow(image, vmin=0, vmax=200)
            plt.savefig(ring.peak_dir + str(i_data_pt) + '_image.png')
            plt.close('all')

    # write data to a text file
    out_path = ring.peak_dir + sample.step_names[
        step_num] + '_peakfit_results.txt'
    out_file = open(out_path, 'w')
    for i_data_pt in range(sample.n_data_pt):
        out_file.write('%24.16f' % x1d[i_data_pt] + '\\t')
        out_file.write('%24.16f' % y1d[i_data_pt] + '\\t')
        for i_vec in range(num_vecs):
            out_file.write('%24.16f' % vec_gamma[i_vec] + '\\t')
            out_file.write('%24.16f' % two_theta[i_data_pt, i_vec] + '\\t')
            out_file.write('%24.16f' % peak_amps[i_data_pt, i_vec] + '\\t')
            out_file.write('%24.16f' % peak_errs[i_data_pt, i_vec] + '\\t')
        out_file.write('\n')
    out_file.close()
Example #52
0
    za = np.linspace(z_range[1], z_range[0], num=z_num, endpoint=True)
    x2d, z2d = np.meshgrid(xa, za)
if specimen_name == 'al7075_mlf':
    xa = np.linspace(x_range[0], x_range[1], num=x_num, endpoint=True)
    za = np.linspace(z_range[0], z_range[1], num=z_num, endpoint=True)
    x2d, z2d = np.meshgrid(xa, za)

x1d, z1d = x2d.flatten(), z2d.flatten()
#%%    read peak diameters if they have been fit, if not fit here

orient = 'h'
try:
    x, z, diams = [], [], []
    for i_step in range(sample.n_load_step):
        txt_data = DataReader.read_data_from_text(ring.out_dir +
                                                  sample.step_names[i_step] +
                                                  '_diams_' + orient + '.txt')
        x.append(txt_data[:,
                          0]), z.append(txt_data[:,
                                                 1]), diams.append(txt_data[:,
                                                                            2])
except:
    l_centers, l_errs = np.zeros(
        (sample.n_load_step, sample.n_data_pt)), np.zeros(
            (sample.n_load_step, sample.n_data_pt))
    u_centers, u_errs = np.zeros(
        (sample.n_load_step, sample.n_data_pt)), np.zeros(
            (sample.n_load_step, sample.n_data_pt))
    diams = np.zeros((sample.n_load_step, sample.n_data_pt))
    for i_step in range(sample.n_load_step):
        l_centers[i_step, :], l_errs[i_step, :], u_centers[i_step, :], u_errs[
Example #53
0
#Nina Renken
import numpy as np
import matplotlib.pyplot as plt
import DataReader as reader
import datetime as dt
import pandas as pd

data_weather = reader.get_weather_data()
data_weather_avg = data_weather.groupby('Zeitstempel').mean()
data_covid = reader.get_covid_data()
data_covid_nds = data_covid[data_covid.Bundesland.eq('Niedersachsen')]
data_covid_avg = data_covid_nds.groupby('Meldedatum').sum()
data_covid_avg = data_covid_avg.reset_index()

# Präparieren der Wetterdaten
df_list = []
for row in data_weather_avg.itertuples():
    datum = row.Index
    zeitstempel = dt.datetime.strptime(str(datum),
                                       '%Y%m%d').strftime('%Y/%m/%d')
    df_list.append([zeitstempel, row.Wert])

df_weather = pd.DataFrame(df_list, columns=['Datum', 'Temperatur'])
df_weather.plot(x='Datum', y='Temperatur', rot=90)

data_covid_avg.plot(x='Meldedatum', y='AnzahlFall', rot=90)

data = pd.merge(data_covid_avg,
                df_weather,
                left_on='Meldedatum',
                right_on='Datum')
Example #54
0
File: test.py Project: yqy/torch
def main():

    DIR = args.DIR
    embedding_file = args.embedding_dir

    network_file = "./model/model.pkl"
    if os.path.isfile(network_file):
        print >> sys.stderr,"Read model from ./model/model.pkl"
        network_model = torch.load(network_file)
    else:
        embedding_matrix = numpy.load(embedding_file)

        "Building torch model"
        network_model = network.Network(pair_feature_dimention,mention_feature_dimention,word_embedding_dimention,span_dimention,1000,embedding_size,embedding_dimention,embedding_matrix).cuda()
        print >> sys.stderr,"save model ..."
        torch.save(network_model,network_file)

    reduced=""
    if args.reduced == 1:
        reduced="_reduced"

    train_docs = DataReader.DataGnerater("train"+reduced)
    dev_docs = DataReader.DataGnerater("dev"+reduced)
    test_docs = DataReader.DataGnerater("test"+reduced)


    l2_lambda = 1e-5
    lr = 0.002
    dropout_rate = 0.5
    shuffle = True
    times = 0
    best_thres = 0.5

    model_save_dir = "./model/pretrain/"
   
    last_cost = 0.0
     
    for echo in range(30):

        start_time = timeit.default_timer()
        print "Pretrain Epoch:",echo

        optimizer = optim.RMSprop(network_model.parameters(), lr=lr, weight_decay=l2_lambda)

        cost_this_turn = 0.0

        pos_num = 0
        neg_num = 0
        inside_time = 0.0
    
        loss = None

        for data,doc_end in train_docs.generater(shuffle):
            ana_word_index,ana_span,ana_feature,candi_word_index,candi_span,pair_feature_array,target,mention_ids = data


            if len(pair_feature_array) >= 500:
                continue
            if len(target) == 0:
                continue
                

            mention_index = autograd.Variable(torch.from_numpy(ana_word_index).type(torch.cuda.LongTensor))
            mention_span = autograd.Variable(torch.from_numpy(ana_span).type(torch.cuda.FloatTensor))
            mention_feature = autograd.Variable(torch.from_numpy(ana_feature).type(torch.cuda.FloatTensor))
            candi_index = autograd.Variable(torch.from_numpy(candi_word_index).type(torch.cuda.LongTensor))
            candi_spans = autograd.Variable(torch.from_numpy(candi_span).type(torch.cuda.FloatTensor))
            pair_feature = autograd.Variable(torch.from_numpy(pair_feature_array).type(torch.cuda.FloatTensor))

            gold = [0] + target.tolist()
            if sum(target) == 0:
                neg_num += 1
                gold[0] = 1
            else:
                pos_num += 1

            inside_time_start = timeit.default_timer()

            lable = autograd.Variable(torch.cuda.FloatTensor([gold]))
            output,scores = network_model.forward(word_embedding_dimention,mention_index,mention_span,mention_feature,mention_index,mention_span,candi_index,candi_spans,pair_feature,dropout_rate)
            optimizer.zero_grad()
            loss = F.binary_cross_entropy(output,lable)
            loss.backward()
            optimizer.step()
            inside_time += (timeit.default_timer()-inside_time_start)
            cost_this_turn += loss.data[0]


        end_time = timeit.default_timer()
        print >> sys.stderr, "PreTrain",echo,"Total cost:",cost_this_turn
        print >> sys.stderr, "PreTRAINING Use %.3f seconds"%(end_time-start_time)
        print >> sys.stderr, "Inside Use %.3f seconds"%(inside_time)
        print >> sys.stderr, "Neg:Pos",neg_num,pos_num
        print >> sys.stderr, "Learning Rate",lr

        if cost_this_turn > last_cost:
            lr = lr*0.7 
        last_cost = cost_this_turn

        print >> sys.stderr,"save model ..."

        best_thres = Evaluate.evaluate(network_model,dev_docs,best_thres)
Example #55
0
import tensorflow as tf
from CellSeg_CNN import *
import numpy as np
import DataReader

#Reading the images
data_reader = DataReader.DataReader()
input_reader = data_reader.input_reader

training_images = data_reader.training_images
if (input_reader.use_data_rotation):
    rotated_images = data_reader.pi_half_rotated_images
number_of_training_images = np.size(training_images, axis=0)
image_height = np.size(training_images, axis=1)
image_width = np.size(training_images, axis=2)

test_images = data_reader.test_images
number_of_test_images = np.size(test_images, axis=0)
#Reading the ground truth classes
[training_classes, training_defined_samples] = data_reader.training_classes
if (input_reader.use_data_rotation):
    [rotated_classes,
     rotated_defined_mask] = data_reader.pi_half_rotated_classes_and_masks
[test_classes, test_defined_samples] = data_reader.test_classes

#Reading parameters
learning_rate = input_reader.learning_rate
regularisation_param = tf.constant(input_reader.regularisation_parameter)
n_epochs = input_reader.number_of_epochs
tensorboard_file_location = input_reader.tensorboard_location
input_patch_width = input_reader.input_patch_width
Example #56
0
def setup():
	bucket.setBucket(DataReader.loadBucket())
	wordlist.extend(DataReader.collocationEntries())

	for w in wordlist:
		addStress(tokenize(w))
def print_dict_for_line_chart(d):
    # sort dict
    sorted_keys = sorted(d)

    print "--- period ---"
    for key in sorted_keys:
        print "'%s'," % key

    print "--- amounts ---"
    for key in sorted_keys:
        amount_for_key = d[key] / MILLION
        print "'%s'," % amount_for_key


if __name__ == "__main__":
    fundings = DataReader.read_funding_data(funding_data)

    startups_per_state = {}
    total_funding_per_state = {}
    states_funding_per_company = {}
    funding_by_year = {}
    funding_by_month = {}

    get_fundings_per_year()
    get_fundings_per_month()
    get_total_funding_per_state()
    get_fundings_per_startup_per_state()
    print_dict_to_geochart(startups_per_state)
    print_dict_to_geochart(total_funding_per_state)
    print_dict_for_line_chart(funding_by_year)
    print_dict_for_line_chart(funding_by_month)