def train(self, fname, dataset, sess_info, epochs):
        (sess, saver) = sess_info
        f = open_file(fname)
        iterep = 500
        for i in range(iterep * epochs):
            batch = dataset.train.next_batch(100)
            sess.run(self.train_step, feed_dict={'x:0': batch})

            progbar(i, iterep)
            if (i + 1) % iterep == 0:
                a, b = sess.run(
                    [self.nent, self.loss],
                    feed_dict={
                        'x:0':
                        dataset.train.data[np.random.choice(
                            len(dataset.train.data), 200)]
                    })
                c, d = sess.run([self.nent, self.loss],
                                feed_dict={'x:0': dataset.test.data})
                a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean()
                e = test_acc(dataset, sess, self.qy_logit)
                string = (
                    '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format(
                        'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc',
                        'epoch'))
                stream_print(f, string, i <= iterep)
                string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}'
                          .format(a, b, c, d, e, int((i + 1) / iterep)))
                stream_print(f, string)
            # Saves parameters every 10 epochs
            if (i + 1) % (10 * iterep) == 0:
                print('saving')
                save_params(saver, sess, (i + 1) // iterep)
        if f is not None: f.close()
Beispiel #2
0
    def execute_query(self, query):
        start = time.time()
        self.logger.info(" Executing Query: '" + str(query) + "'")
        self.logger.debug(" Query tokens: " +
                          str(process_and_tokenize_string(query)))

        # create question doc from query string
        query_tokens = process_and_tokenize_string(query)
        #ngrams = generate_ngrams(query_tokens,3)
        query_tokens_tfidf = self.tf_idf.get_tokens_value(query_tokens)
        avg = mean(query_tokens_tfidf.values())
        query_tokens_tfidf = {
            k: v
            for k, v in query_tokens_tfidf.items() if v >= avg
        }

        relevant_doc_ids = self.posting_list.get_relevant_docs_ids(
            query_tokens_tfidf.keys())

        relevant_docs = [self.docs[i] for i in relevant_doc_ids]
        top_docs = [TopDoc(self.docs[i]) for i in relevant_doc_ids]

        self.logger.debug("filtered: " + str(len(top_docs)) +
                          " docs ( pool: " + str(len(self.docs)) +
                          ") with tokens " + str(query_tokens_tfidf))
        tf_idf_scores = self.tf_idf.query(query_tokens, relevant_docs)

        for i in range(len(top_docs)):
            progbar(i, len(top_docs), 20)

            top_docs[i].update_score(ScoreType.tf_idf, tf_idf_scores[i])

            top_docs[i].update_score(
                ScoreType.proximity,
                self.posting_list.get_proximity_score(
                    query_tokens, top_docs[i].doc, 6) * 0.5 +
                self.posting_list.get_proximity_score(
                    query_tokens, top_docs[i].doc, 10) * 0.4 +
                self.posting_list.get_proximity_score(
                    query_tokens, top_docs[i].doc, 40) * 0.1)

            top_docs[i].calculate_score()
        print(' ')
        top_docs.sort(key=lambda x: x.score, reverse=True)

        end = time.time()
        self.logger.info("execute_query complete. elapsed time: " +
                         str(end - start) + " secs")
        return top_docs
Beispiel #3
0
    def create_from_docs(self, docs_json):
        # time and log
        start = time.time()
        self.logger.info("Creating documents...")

        # init variables
        self.docs = [None] * len(docs_json)

        # load documents and tokenize
        for i, key in enumerate(docs_json.keys()):
            progbar(i, len(self.docs), 20)
            doc = Document(int(key), docs_json[key])
            self.docs[int(key)] = doc

        end = time.time()
        self.logger.info("Creating document complete. elapsed time: " +
                         str(end - start) + " secs")
Beispiel #4
0
    def execute_query(self, query):
        start = time.time()
        query_tokens = process_and_tokenize_string(query)
        unprocessed_query_tokens = split_strings(query)
        self.logger.info(" Executing Query: '" + str(query) +
                         "'  ---- tokens:" + str(query_tokens))
        top_docs = [TopPassage(doc) for doc in self.docs]
        question_class = -1
        for i, wh in enumerate(wh_questions):
            if wh in unprocessed_query_tokens:
                question_class = i
                break

        #pos_list = nltk.pos_tag(unprocessed_query_tokens)
        tokens_synonyms = []
        for token in remove_stop_words(unprocessed_query_tokens):
            tokens_synonyms += get_processed_synonyms(token)

        #print(tokens_synonyms)

        ngrams_vector = self.ngrams.query(query_tokens, self.docs)
        expanded_ngram_vector = self.ngrams.query(tokens_synonyms, self.docs)

        for i in range(len(top_docs)):
            progbar(i, len(top_docs))
            top_docs[i].update_score(ScoreType.ngram, ngrams_vector[i])
            top_docs[i].update_score(ScoreType.expanded_ngram,
                                     expanded_ngram_vector[i])

            top_docs[i].calculate_score()
        print(' ')
        top_docs.sort(key=lambda x: x.score, reverse=True)

        end = time.time()
        self.logger.info("execute_query complete. elapsed time: " +
                         str(end - start) + " secs")
        return top_docs
Beispiel #5
0
    def train(self,
              fname,
              dataset,
              sess_info,
              epochs,
              save_parameters=True,
              is_labeled=False):
        history = initialize_history()
        (sess, saver) = sess_info
        f = open_file(fname)
        iterep = 500
        for i in range(iterep * epochs):
            batch = dataset.train.next_batch(100)
            sess.run(self.train_step,
                     feed_dict={
                         'x:0': batch,
                         'phase:0': True
                     })
            progbar(i, iterep)
            if (i + 1) % iterep == 0:
                a, b = sess.run(
                    [self.nent, self.loss],
                    feed_dict={
                        'x:0':
                        dataset.train.data[np.random.choice(
                            len(dataset.train.data), 200)],
                        'phase:0':
                        False
                    })
                c, d = sess.run([self.nent, self.loss],
                                feed_dict={
                                    'x:0': dataset.test.data,
                                    'phase:0': False
                                })
                a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean()
                e = (0, test_acc(dataset, sess, self.qy_logit))[is_labeled]
                string = (
                    '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format(
                        'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc',
                        'epoch'))
                stream_print(f, string, i <= iterep)
                string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}'
                          .format(a, b, c, d, e, int((i + 1) / iterep)))
                stream_print(f, string)
                qy = sess.run(self.qy,
                              feed_dict={
                                  'x:0': dataset.test.data,
                                  'phase:0': False
                              })
                print('Sample of qy')
                print(qy[:5])

                history['iters'].append(int((i + 1) / iterep))
                history['ent'].append(a)
                history['val_ent'].append(c)
                history['loss'].append(b)
                history['val_loss'].append(d)
                history['val_acc'].append(e)

            # Saves parameters every 10 epochs
            if (i + 1) % (10 * iterep) == 0 and save_parameters:
                print('saving')
                save_params(saver, sess, (i + 1) // iterep)
        if f is not None: f.close()

        return history
Beispiel #6
0
def Chip_Classify(ImageLocation,SaveLocation,ImageFile,NumberOfClusters,InitialCluster):
	ticOverall = time.time()
	#sleep(random.beta(1,1)*30)
	# Reshape InitialCluster
	InitialCluster = array(InitialCluster).reshape((NumberOfClusters,-1))
	ImageIn = imread(ImageFile)
	with rio.open(ImageFile) as gtf_img:
		Info = gtf_img.profile
		Info.update(dtype=rio.int8)
	#print(time.time()-tic)
	ImageRow, ImageColumn, NumberOfBands = ImageIn.shape
	if NumberOfBands > 8:
		NumberOfBands = NumberOfBands - 1

	# prealocate
	Cluster = zeros((ImageRow, ImageColumn, NumberOfClusters))
	CountClusterPixels = zeros((NumberOfClusters, ImageRow))
	MeanCluster = zeros((NumberOfClusters, NumberOfBands, ImageRow))
	EuclideanDistanceResultant = zeros((ImageRow, ImageColumn, NumberOfClusters))

	TaskIDs = list()
	tic = time.time()
	#ImageRow = 100
	for j in range(0,ImageRow):
		#display(num2str(100*j/ImageRow))
		if(j % 10 == 0):
			progbar(j, ImageRow)

		#TaskID = testFun.remote(j,j,j)
		TaskID = EuclideanDistance.remote(j, ImageColumn, ImageIn[j,:,:], ImageRow, InitialCluster, NumberOfBands, NumberOfClusters)
		TaskIDs.append(TaskID)
		if(len(TaskIDs) % TASKS_LIMIT == 0):
			ticTasks = time.time()
			Ready,Pending = ray.wait(TaskIDs)
			results = ray.get(Ready)
			for output in results:
				jPrime = output[4]
				Cluster[jPrime,:,:] = output[0]						# Cluster
				CountClusterPixels[:,jPrime] = output[1][:,0]		# CountClusterPixels
				EuclideanDistanceResultant[jPrime,:,:] = output[2]	# EuclideanDistanceResultant
				MeanCluster[:,:,jPrime] = output[3]					# MeanCluster
			TaskIDs = Pending
	results = ray.get(TaskIDs)
	for output in results:
		jPrime = output[4]
		Cluster[jPrime,:,:] = output[0]						# Cluster
		CountClusterPixels[:,jPrime] = output[1][:,0]		# CountClusterPixels
		EuclideanDistanceResultant[jPrime,:,:] = output[2]	# EuclideanDistanceResultant
		MeanCluster[:,:,jPrime] = output[3]					# MeanCluster
	progbar(ImageRow, ImageRow)

	print('\nfinished big loop')
	ImageDisplay = npsum(Cluster, axis = 2)
	print("Execution time: " + str(time.time() - tic))

	# savez("big.loop.parallel",Cluster=Cluster,
	# 				 CountClusterPixels=CountClusterPixels,
	# 				 EuclideanDistanceResultant=EuclideanDistanceResultant,
	# 				 MeanCluster=MeanCluster)

	ClusterPixelCount = count_nonzero(Cluster, axis = 2)
	#print("Non-zero cluster pixels: " + str(ClusterPixelCount))

	#Calculate TSSE within clusters
	TsseCluster = zeros((1, NumberOfClusters))
	CountTemporalUnstablePixel = 0

	# TSSECluster Parallel
	print("Starting TSSE Cluster computation\n")
	tic = time.time()
	TaskIDs = list()
	for j in range(0, ImageRow):
		if(j % 10 == 0):
			progbar(j, ImageRow)
		TaskID = TSSECluster.remote(j, Cluster[j,:,:], ImageColumn, ImageIn[j,:,:], InitialCluster, NumberOfBands, NumberOfClusters)
		TaskIDs.append(TaskID)
		if(len(TaskIDs) % TASKS_LIMIT == 0):
			Ready,Pending = ray.wait(TaskIDs)
			results = ray.get(Ready)
			for output in results:
				jPrime = output[2]
				CountTemporalUnstablePixel = CountTemporalUnstablePixel + output[0]
				TsseCluster = TsseCluster + output[1]
				#TsseCluster = npsum((TsseCluster,output[1]), axis=1)
			TaskIDs = Pending
	results = ray.get(TaskIDs)
	for output in results:
		jPrime = output[2]
		CountTemporalUnstablePixel = CountTemporalUnstablePixel + output[0]
		#TsseCluster = npsum((TsseCluster,output[1]), axis=1)
		TsseCluster = TsseCluster + output[1]
	progbar(ImageRow, ImageRow)
	print('\n')
	Totalsse = npsum(TsseCluster)
	print("Execution time: " + str(time.time() - tic))
	savez("small.loop.parallel",CountTemporalUnstablePixel=CountTemporalUnstablePixel,TsseCluster=TsseCluster)

	#get data for final stats....
	#calculate the spatial mean and standard deviation of each cluster
	ClusterMeanAllBands = zeros((NumberOfClusters, NumberOfBands))
	ClusterSdAllBands = zeros((NumberOfClusters, NumberOfBands))

	# Cluster Summary Parallel
	tic = time.time()
	print("Starting Cluster Summary computation\n")
	TaskIDs = list()
	kValues = linspace(0,ImageColumn,2, dtype=int8)
	for i in range(0, NumberOfClusters):
		Temp = Cluster[:, :, i]
		Temp[Temp == i] = 1
		MaskedClusterAllBands = Temp[:,:,None]*ImageIn[:, :, 0:NumberOfBands]

		if(i % 10 == 0):
			progbar(i, NumberOfClusters)
		for j in range(0, NumberOfBands):
			#for k in range(0, ImageColumn):
			for k in range(1,len(kValues)):
				#TaskID = ClusterSummary.remote(i, j, MaskedClusterAllBands[:,kValues[k-1]:kValues[k],j])
				TaskID = ClusterSummary.remote(i, j, zeros(MaskedClusterAllBands[:,kValues[k-1]:kValues[k],j].shape))
				TaskIDs.append(TaskID)
				#if(len(TaskIDs) % TASKS_LIMIT == 0):
				if(len(TaskIDs) >= TASKS_LIMIT):
					Ready,Pending = ray.wait(TaskIDs)
					results = ray.get(Ready)
					for output in results:
						iPrime = output[2]
						jPrime = output[3]
						ClusterMeanAllBands[iPrime, jPrime] += output[0]
						ClusterSdAllBands[iPrime, jPrime] += output[1]
					TaskIDs = Pending
	results = ray.get(TaskIDs)
	for output in results:
		iPrime = output[2]
		jPrime = output[3]
		FinalClusterMean =  output[0]
		FinalClusterSd = output[1]
		ClusterMeanAllBands[iPrime, jPrime] += output[0]
		ClusterSdAllBands[iPrime, jPrime] += output[1]
	progbar(NumberOfClusters, NumberOfClusters)
	print('\n')

	print("Execution time: " + str(time.time() - tic))
	savez("cluster.summary.parallel",ClusterMeanAllBands=ClusterMeanAllBands,ClusterSdAllBands=ClusterSdAllBands)

	filename = str(SaveLocation) + 'ImageDisplay_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat'
	print('Got filename. Now save the data')
	print(filename)
	save(filename, ImageDisplay)

	filename = str(SaveLocation) + 'ClusterCount' + str(NumberOfClusters) + '_' + ImageFile[len(ImageFile)-32:len(ImageFile)-4] + '.tif'

	#geotiffwrite(filename, int8(ImageDisplay), Info.RefMatrix);

	with rio.open(filename, 'w', **Info) as dst:
		dst.write(int8(ImageDisplay), 1)

	filename = str(SaveLocation) + 'Stats_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat'
	savez(filename, [MeanCluster, CountClusterPixels, ClusterPixelCount, ClusterMeanAllBands, ClusterSdAllBands, Totalsse])
	print('done!')
	print("Overall execution: " + str(time.time()-ticOverall))
def valid(epoch, dataloader, model, optimizer, scheduler, loss_weight, writer):
    model.eval()
    str_train = 'val'
    bar = progbar(len(dataloader), width=10)
    with torch.no_grad():
        avgLoss = [0., 0., 0., 0., 0., 0.]
        for i, (images, labels) in enumerate(dataloader):
            images, labels = images.to(device), labels.to(device)

            preds_list = model(images)
            batch_loss = [
                (Dice_Loss(preds, labels) +
                 loss_weight * normal_cross_entropy_loss(preds, labels)) /
                (preds_list[0].shape[0]) for preds in preds_list
            ]
            #batch_loss = [weighted_cross_entropy_loss(preds, labels)/ (preds_list[0].shape[0]) for preds in preds_list]

            bar.update(i,
                       [('1_loss', batch_loss[0]), ('2_loss', batch_loss[1]),
                        ('3_loss', batch_loss[2]), ('4_loss', batch_loss[3]),
                        ('5_loss', batch_loss[4]),
                        ('fuse_loss', batch_loss[5])])
            avgLoss = [(avgLoss[k] * i + batch_loss[k].item()) / (i + 1)
                       for k in range(len(avgLoss))]
            if i % 10 == 0:
                pic1 = torchvision.utils.make_grid(preds_list[0][:8],
                                                   nrow=8,
                                                   padding=2)
                pic2 = torchvision.utils.make_grid(preds_list[1][:8],
                                                   nrow=8,
                                                   padding=2)
                pic3 = torchvision.utils.make_grid(preds_list[2][:8],
                                                   nrow=8,
                                                   padding=2)
                pic4 = torchvision.utils.make_grid(preds_list[3][:8],
                                                   nrow=8,
                                                   padding=2)
                pic5 = torchvision.utils.make_grid(preds_list[4][:8],
                                                   nrow=8,
                                                   padding=2)
                writer.add_image(str_train + '/pred_1', pic1)
                writer.add_image(str_train + '/pred_2', pic2)
                writer.add_image(str_train + '/pred_3', pic3)
                writer.add_image(str_train + '/pred_4', pic4)
                writer.add_image(str_train + '/pred_5', pic5)
                pic = torchvision.utils.make_grid(images[:8],
                                                  nrow=8,
                                                  padding=2)
                writer.add_image('img', pic)
                la = torchvision.utils.make_grid(labels[:8], nrow=8, padding=2)
                writer.add_image('lab', la)

        log = '\n * Finished epoch # %d   ' \
              'Loss_1: %1.4f, Loss_2: %1.4f, Loss_3: %1.4f, Loss_4: %1.4f, Loss_5: %1.4f, fuse_loss: %1.4f\n' % (
              epoch, avgLoss[0], avgLoss[1], avgLoss[2], avgLoss[3], avgLoss[4], avgLoss[5])
        print(log)
        writer.add_scalar(str_train + '/loss_1', avgLoss[0])
        writer.add_scalar(str_train + '/loss_2', avgLoss[1])
        writer.add_scalar(str_train + '/loss_3', avgLoss[2])
        writer.add_scalar(str_train + '/loss_4', avgLoss[3])
        writer.add_scalar(str_train + '/loss_5', avgLoss[4])
        writer.add_scalar(str_train + '/fuse_loss', avgLoss[5])

    return avgLoss[0] + avgLoss[1] + avgLoss[2] + avgLoss[3] + avgLoss[
        4] + avgLoss[5]
def train(epoch, dataloader, model, optimizer, scheduler, loss_weight, writer):
    model.train()
    scheduler.step(epoch=epoch)
    print('\n Training AT epoch = {}'.format(epoch))
    print('current learning rate = {}\n'.format(scheduler.get_lr()))
    str_train = 'train'
    bar = progbar(len(dataloader), width=10)

    avgLoss = [0., 0., 0., 0., 0., 0.]
    for i, (images, labels) in enumerate(dataloader):
        #images, labels = images.to(device), labels.to(device)
        images, labels = images.cuda(), labels.cuda()

        preds_list = model(images)

        #preds_list = preds_lis[0]
        #batch_loss = [(Dice_Loss(preds, labels) + loss_weight * normal_cross_entropy_loss(preds, labels))/ (preds_list[0].shape[0]) for preds in preds_list]
        #batch_loss = [weighted_cross_entropy_loss(preds, labels)/ (preds_list[0].shape[0]) for preds in preds_list]
        batch_loss = []
        #batch_loss = torch.zeros((6, images.shape[0]))
        for preds in preds_list:
            #print(preds.max(), preds.min())
            loss1 = Dice_Loss(preds, labels)  #.to(torch.device('cuda:1'))
            loss2 = normal_cross_entropy_loss(
                preds, labels)  #.to(torch.device('cuda:1'))
            batch_loss.append(loss1 + loss_weight * loss2)  #/ preds.shape[0])
        #loss = batch_loss[0].mean() + batch_loss[1].mean() + batch_loss[2].mean() +batch_loss[3].mean() +batch_loss[4].mean() +batch_loss[5].mean()#torch.sum(batch_loss)
        loss = (batch_loss[0] + batch_loss[1] + batch_loss[2] + batch_loss[3] +
                batch_loss[4] + batch_loss[5])  #.to(torch.device('cuda:1'))
        #batch_loss = normal_cross_entropy_loss(preds_list, labels)/ preds_list.shape[0]
        #loss = batch_losss
        optimizer.zero_grad()
        #loss1.backward(retain_graph=True)
        loss.backward()
        optimizer.step()
        bar.update(i, [('1_loss', batch_loss[0]), ('2_loss', batch_loss[1]),
                       ('3_loss', batch_loss[2]), ('4_loss', batch_loss[3]),
                       ('5_loss', batch_loss[4]),
                       ('fuse_loss', batch_loss[5])])
        avgLoss = [(avgLoss[k] * i + batch_loss[k].item()) / (i + 1)
                   for k in range(len(avgLoss))]
        if i % 20 == 0:
            pic = torchvision.utils.make_grid(images[:8], nrow=8, padding=2)
            writer.add_image('img', pic)
            pic1 = torchvision.utils.make_grid(preds_list[0][:8],
                                               nrow=8,
                                               padding=2)
            pic2 = torchvision.utils.make_grid(preds_list[1][:8],
                                               nrow=8,
                                               padding=2)
            pic3 = torchvision.utils.make_grid(preds_list[2][:8],
                                               nrow=8,
                                               padding=2)
            pic4 = torchvision.utils.make_grid(preds_list[3][:8],
                                               nrow=8,
                                               padding=2)
            pic5 = torchvision.utils.make_grid(preds_list[4][:8],
                                               nrow=8,
                                               padding=2)
            pic6 = torchvision.utils.make_grid(preds_list[5][:8],
                                               nrow=8,
                                               padding=2)
            writer.add_image(str_train + '/pred_1', pic1)
            writer.add_image(str_train + '/pred_2', pic2)
            writer.add_image(str_train + '/pred_3', pic3)
            writer.add_image(str_train + '/pred_4', pic4)
            writer.add_image(str_train + '/pred_5', pic5)
            writer.add_image(str_train + '/fuse', pic6)
            la = torchvision.utils.make_grid(labels[:8], nrow=8, padding=2)
            writer.add_image('lab', la)

    log = '\n * Finished epoch # %d   ' \
              'Loss_1: %1.4f, Loss_2: %1.4f, Loss_3: %1.4f, Loss_4: %1.4f, Loss_5: %1.4f, fuse_loss: %1.4f\n' % (
              epoch, avgLoss[0], avgLoss[1], avgLoss[2], avgLoss[3], avgLoss[4], avgLoss[5])
    print(log)
    writer.add_scalar(str_train + '/loss_1', avgLoss[0])
    writer.add_scalar(str_train + '/loss_2', avgLoss[1])
    writer.add_scalar(str_train + '/loss_3', avgLoss[2])
    writer.add_scalar(str_train + '/loss_4', avgLoss[3])
    writer.add_scalar(str_train + '/loss_5', avgLoss[4])
    writer.add_scalar(str_train + '/fuse_loss', avgLoss[5])

    return avgLoss[0] + avgLoss[1] + avgLoss[2] + avgLoss[3] + avgLoss[
        4] + avgLoss[5]