def train(self, fname, dataset, sess_info, epochs): (sess, saver) = sess_info f = open_file(fname) iterep = 500 for i in range(iterep * epochs): batch = dataset.train.next_batch(100) sess.run(self.train_step, feed_dict={'x:0': batch}) progbar(i, iterep) if (i + 1) % iterep == 0: a, b = sess.run( [self.nent, self.loss], feed_dict={ 'x:0': dataset.train.data[np.random.choice( len(dataset.train.data), 200)] }) c, d = sess.run([self.nent, self.loss], feed_dict={'x:0': dataset.test.data}) a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean() e = test_acc(dataset, sess, self.qy_logit) string = ( '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format( 'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc', 'epoch')) stream_print(f, string, i <= iterep) string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}' .format(a, b, c, d, e, int((i + 1) / iterep))) stream_print(f, string) # Saves parameters every 10 epochs if (i + 1) % (10 * iterep) == 0: print('saving') save_params(saver, sess, (i + 1) // iterep) if f is not None: f.close()
def execute_query(self, query): start = time.time() self.logger.info(" Executing Query: '" + str(query) + "'") self.logger.debug(" Query tokens: " + str(process_and_tokenize_string(query))) # create question doc from query string query_tokens = process_and_tokenize_string(query) #ngrams = generate_ngrams(query_tokens,3) query_tokens_tfidf = self.tf_idf.get_tokens_value(query_tokens) avg = mean(query_tokens_tfidf.values()) query_tokens_tfidf = { k: v for k, v in query_tokens_tfidf.items() if v >= avg } relevant_doc_ids = self.posting_list.get_relevant_docs_ids( query_tokens_tfidf.keys()) relevant_docs = [self.docs[i] for i in relevant_doc_ids] top_docs = [TopDoc(self.docs[i]) for i in relevant_doc_ids] self.logger.debug("filtered: " + str(len(top_docs)) + " docs ( pool: " + str(len(self.docs)) + ") with tokens " + str(query_tokens_tfidf)) tf_idf_scores = self.tf_idf.query(query_tokens, relevant_docs) for i in range(len(top_docs)): progbar(i, len(top_docs), 20) top_docs[i].update_score(ScoreType.tf_idf, tf_idf_scores[i]) top_docs[i].update_score( ScoreType.proximity, self.posting_list.get_proximity_score( query_tokens, top_docs[i].doc, 6) * 0.5 + self.posting_list.get_proximity_score( query_tokens, top_docs[i].doc, 10) * 0.4 + self.posting_list.get_proximity_score( query_tokens, top_docs[i].doc, 40) * 0.1) top_docs[i].calculate_score() print(' ') top_docs.sort(key=lambda x: x.score, reverse=True) end = time.time() self.logger.info("execute_query complete. elapsed time: " + str(end - start) + " secs") return top_docs
def create_from_docs(self, docs_json): # time and log start = time.time() self.logger.info("Creating documents...") # init variables self.docs = [None] * len(docs_json) # load documents and tokenize for i, key in enumerate(docs_json.keys()): progbar(i, len(self.docs), 20) doc = Document(int(key), docs_json[key]) self.docs[int(key)] = doc end = time.time() self.logger.info("Creating document complete. elapsed time: " + str(end - start) + " secs")
def execute_query(self, query): start = time.time() query_tokens = process_and_tokenize_string(query) unprocessed_query_tokens = split_strings(query) self.logger.info(" Executing Query: '" + str(query) + "' ---- tokens:" + str(query_tokens)) top_docs = [TopPassage(doc) for doc in self.docs] question_class = -1 for i, wh in enumerate(wh_questions): if wh in unprocessed_query_tokens: question_class = i break #pos_list = nltk.pos_tag(unprocessed_query_tokens) tokens_synonyms = [] for token in remove_stop_words(unprocessed_query_tokens): tokens_synonyms += get_processed_synonyms(token) #print(tokens_synonyms) ngrams_vector = self.ngrams.query(query_tokens, self.docs) expanded_ngram_vector = self.ngrams.query(tokens_synonyms, self.docs) for i in range(len(top_docs)): progbar(i, len(top_docs)) top_docs[i].update_score(ScoreType.ngram, ngrams_vector[i]) top_docs[i].update_score(ScoreType.expanded_ngram, expanded_ngram_vector[i]) top_docs[i].calculate_score() print(' ') top_docs.sort(key=lambda x: x.score, reverse=True) end = time.time() self.logger.info("execute_query complete. elapsed time: " + str(end - start) + " secs") return top_docs
def train(self, fname, dataset, sess_info, epochs, save_parameters=True, is_labeled=False): history = initialize_history() (sess, saver) = sess_info f = open_file(fname) iterep = 500 for i in range(iterep * epochs): batch = dataset.train.next_batch(100) sess.run(self.train_step, feed_dict={ 'x:0': batch, 'phase:0': True }) progbar(i, iterep) if (i + 1) % iterep == 0: a, b = sess.run( [self.nent, self.loss], feed_dict={ 'x:0': dataset.train.data[np.random.choice( len(dataset.train.data), 200)], 'phase:0': False }) c, d = sess.run([self.nent, self.loss], feed_dict={ 'x:0': dataset.test.data, 'phase:0': False }) a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean() e = (0, test_acc(dataset, sess, self.qy_logit))[is_labeled] string = ( '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format( 'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc', 'epoch')) stream_print(f, string, i <= iterep) string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}' .format(a, b, c, d, e, int((i + 1) / iterep))) stream_print(f, string) qy = sess.run(self.qy, feed_dict={ 'x:0': dataset.test.data, 'phase:0': False }) print('Sample of qy') print(qy[:5]) history['iters'].append(int((i + 1) / iterep)) history['ent'].append(a) history['val_ent'].append(c) history['loss'].append(b) history['val_loss'].append(d) history['val_acc'].append(e) # Saves parameters every 10 epochs if (i + 1) % (10 * iterep) == 0 and save_parameters: print('saving') save_params(saver, sess, (i + 1) // iterep) if f is not None: f.close() return history
def Chip_Classify(ImageLocation,SaveLocation,ImageFile,NumberOfClusters,InitialCluster): ticOverall = time.time() #sleep(random.beta(1,1)*30) # Reshape InitialCluster InitialCluster = array(InitialCluster).reshape((NumberOfClusters,-1)) ImageIn = imread(ImageFile) with rio.open(ImageFile) as gtf_img: Info = gtf_img.profile Info.update(dtype=rio.int8) #print(time.time()-tic) ImageRow, ImageColumn, NumberOfBands = ImageIn.shape if NumberOfBands > 8: NumberOfBands = NumberOfBands - 1 # prealocate Cluster = zeros((ImageRow, ImageColumn, NumberOfClusters)) CountClusterPixels = zeros((NumberOfClusters, ImageRow)) MeanCluster = zeros((NumberOfClusters, NumberOfBands, ImageRow)) EuclideanDistanceResultant = zeros((ImageRow, ImageColumn, NumberOfClusters)) TaskIDs = list() tic = time.time() #ImageRow = 100 for j in range(0,ImageRow): #display(num2str(100*j/ImageRow)) if(j % 10 == 0): progbar(j, ImageRow) #TaskID = testFun.remote(j,j,j) TaskID = EuclideanDistance.remote(j, ImageColumn, ImageIn[j,:,:], ImageRow, InitialCluster, NumberOfBands, NumberOfClusters) TaskIDs.append(TaskID) if(len(TaskIDs) % TASKS_LIMIT == 0): ticTasks = time.time() Ready,Pending = ray.wait(TaskIDs) results = ray.get(Ready) for output in results: jPrime = output[4] Cluster[jPrime,:,:] = output[0] # Cluster CountClusterPixels[:,jPrime] = output[1][:,0] # CountClusterPixels EuclideanDistanceResultant[jPrime,:,:] = output[2] # EuclideanDistanceResultant MeanCluster[:,:,jPrime] = output[3] # MeanCluster TaskIDs = Pending results = ray.get(TaskIDs) for output in results: jPrime = output[4] Cluster[jPrime,:,:] = output[0] # Cluster CountClusterPixels[:,jPrime] = output[1][:,0] # CountClusterPixels EuclideanDistanceResultant[jPrime,:,:] = output[2] # EuclideanDistanceResultant MeanCluster[:,:,jPrime] = output[3] # MeanCluster progbar(ImageRow, ImageRow) print('\nfinished big loop') ImageDisplay = npsum(Cluster, axis = 2) print("Execution time: " + str(time.time() - tic)) # savez("big.loop.parallel",Cluster=Cluster, # CountClusterPixels=CountClusterPixels, # EuclideanDistanceResultant=EuclideanDistanceResultant, # MeanCluster=MeanCluster) ClusterPixelCount = count_nonzero(Cluster, axis = 2) #print("Non-zero cluster pixels: " + str(ClusterPixelCount)) #Calculate TSSE within clusters TsseCluster = zeros((1, NumberOfClusters)) CountTemporalUnstablePixel = 0 # TSSECluster Parallel print("Starting TSSE Cluster computation\n") tic = time.time() TaskIDs = list() for j in range(0, ImageRow): if(j % 10 == 0): progbar(j, ImageRow) TaskID = TSSECluster.remote(j, Cluster[j,:,:], ImageColumn, ImageIn[j,:,:], InitialCluster, NumberOfBands, NumberOfClusters) TaskIDs.append(TaskID) if(len(TaskIDs) % TASKS_LIMIT == 0): Ready,Pending = ray.wait(TaskIDs) results = ray.get(Ready) for output in results: jPrime = output[2] CountTemporalUnstablePixel = CountTemporalUnstablePixel + output[0] TsseCluster = TsseCluster + output[1] #TsseCluster = npsum((TsseCluster,output[1]), axis=1) TaskIDs = Pending results = ray.get(TaskIDs) for output in results: jPrime = output[2] CountTemporalUnstablePixel = CountTemporalUnstablePixel + output[0] #TsseCluster = npsum((TsseCluster,output[1]), axis=1) TsseCluster = TsseCluster + output[1] progbar(ImageRow, ImageRow) print('\n') Totalsse = npsum(TsseCluster) print("Execution time: " + str(time.time() - tic)) savez("small.loop.parallel",CountTemporalUnstablePixel=CountTemporalUnstablePixel,TsseCluster=TsseCluster) #get data for final stats.... #calculate the spatial mean and standard deviation of each cluster ClusterMeanAllBands = zeros((NumberOfClusters, NumberOfBands)) ClusterSdAllBands = zeros((NumberOfClusters, NumberOfBands)) # Cluster Summary Parallel tic = time.time() print("Starting Cluster Summary computation\n") TaskIDs = list() kValues = linspace(0,ImageColumn,2, dtype=int8) for i in range(0, NumberOfClusters): Temp = Cluster[:, :, i] Temp[Temp == i] = 1 MaskedClusterAllBands = Temp[:,:,None]*ImageIn[:, :, 0:NumberOfBands] if(i % 10 == 0): progbar(i, NumberOfClusters) for j in range(0, NumberOfBands): #for k in range(0, ImageColumn): for k in range(1,len(kValues)): #TaskID = ClusterSummary.remote(i, j, MaskedClusterAllBands[:,kValues[k-1]:kValues[k],j]) TaskID = ClusterSummary.remote(i, j, zeros(MaskedClusterAllBands[:,kValues[k-1]:kValues[k],j].shape)) TaskIDs.append(TaskID) #if(len(TaskIDs) % TASKS_LIMIT == 0): if(len(TaskIDs) >= TASKS_LIMIT): Ready,Pending = ray.wait(TaskIDs) results = ray.get(Ready) for output in results: iPrime = output[2] jPrime = output[3] ClusterMeanAllBands[iPrime, jPrime] += output[0] ClusterSdAllBands[iPrime, jPrime] += output[1] TaskIDs = Pending results = ray.get(TaskIDs) for output in results: iPrime = output[2] jPrime = output[3] FinalClusterMean = output[0] FinalClusterSd = output[1] ClusterMeanAllBands[iPrime, jPrime] += output[0] ClusterSdAllBands[iPrime, jPrime] += output[1] progbar(NumberOfClusters, NumberOfClusters) print('\n') print("Execution time: " + str(time.time() - tic)) savez("cluster.summary.parallel",ClusterMeanAllBands=ClusterMeanAllBands,ClusterSdAllBands=ClusterSdAllBands) filename = str(SaveLocation) + 'ImageDisplay_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat' print('Got filename. Now save the data') print(filename) save(filename, ImageDisplay) filename = str(SaveLocation) + 'ClusterCount' + str(NumberOfClusters) + '_' + ImageFile[len(ImageFile)-32:len(ImageFile)-4] + '.tif' #geotiffwrite(filename, int8(ImageDisplay), Info.RefMatrix); with rio.open(filename, 'w', **Info) as dst: dst.write(int8(ImageDisplay), 1) filename = str(SaveLocation) + 'Stats_' + ImageFile[len(ImageFile)-32:len(ImageFile)-3] + 'mat' savez(filename, [MeanCluster, CountClusterPixels, ClusterPixelCount, ClusterMeanAllBands, ClusterSdAllBands, Totalsse]) print('done!') print("Overall execution: " + str(time.time()-ticOverall))
def valid(epoch, dataloader, model, optimizer, scheduler, loss_weight, writer): model.eval() str_train = 'val' bar = progbar(len(dataloader), width=10) with torch.no_grad(): avgLoss = [0., 0., 0., 0., 0., 0.] for i, (images, labels) in enumerate(dataloader): images, labels = images.to(device), labels.to(device) preds_list = model(images) batch_loss = [ (Dice_Loss(preds, labels) + loss_weight * normal_cross_entropy_loss(preds, labels)) / (preds_list[0].shape[0]) for preds in preds_list ] #batch_loss = [weighted_cross_entropy_loss(preds, labels)/ (preds_list[0].shape[0]) for preds in preds_list] bar.update(i, [('1_loss', batch_loss[0]), ('2_loss', batch_loss[1]), ('3_loss', batch_loss[2]), ('4_loss', batch_loss[3]), ('5_loss', batch_loss[4]), ('fuse_loss', batch_loss[5])]) avgLoss = [(avgLoss[k] * i + batch_loss[k].item()) / (i + 1) for k in range(len(avgLoss))] if i % 10 == 0: pic1 = torchvision.utils.make_grid(preds_list[0][:8], nrow=8, padding=2) pic2 = torchvision.utils.make_grid(preds_list[1][:8], nrow=8, padding=2) pic3 = torchvision.utils.make_grid(preds_list[2][:8], nrow=8, padding=2) pic4 = torchvision.utils.make_grid(preds_list[3][:8], nrow=8, padding=2) pic5 = torchvision.utils.make_grid(preds_list[4][:8], nrow=8, padding=2) writer.add_image(str_train + '/pred_1', pic1) writer.add_image(str_train + '/pred_2', pic2) writer.add_image(str_train + '/pred_3', pic3) writer.add_image(str_train + '/pred_4', pic4) writer.add_image(str_train + '/pred_5', pic5) pic = torchvision.utils.make_grid(images[:8], nrow=8, padding=2) writer.add_image('img', pic) la = torchvision.utils.make_grid(labels[:8], nrow=8, padding=2) writer.add_image('lab', la) log = '\n * Finished epoch # %d ' \ 'Loss_1: %1.4f, Loss_2: %1.4f, Loss_3: %1.4f, Loss_4: %1.4f, Loss_5: %1.4f, fuse_loss: %1.4f\n' % ( epoch, avgLoss[0], avgLoss[1], avgLoss[2], avgLoss[3], avgLoss[4], avgLoss[5]) print(log) writer.add_scalar(str_train + '/loss_1', avgLoss[0]) writer.add_scalar(str_train + '/loss_2', avgLoss[1]) writer.add_scalar(str_train + '/loss_3', avgLoss[2]) writer.add_scalar(str_train + '/loss_4', avgLoss[3]) writer.add_scalar(str_train + '/loss_5', avgLoss[4]) writer.add_scalar(str_train + '/fuse_loss', avgLoss[5]) return avgLoss[0] + avgLoss[1] + avgLoss[2] + avgLoss[3] + avgLoss[ 4] + avgLoss[5]
def train(epoch, dataloader, model, optimizer, scheduler, loss_weight, writer): model.train() scheduler.step(epoch=epoch) print('\n Training AT epoch = {}'.format(epoch)) print('current learning rate = {}\n'.format(scheduler.get_lr())) str_train = 'train' bar = progbar(len(dataloader), width=10) avgLoss = [0., 0., 0., 0., 0., 0.] for i, (images, labels) in enumerate(dataloader): #images, labels = images.to(device), labels.to(device) images, labels = images.cuda(), labels.cuda() preds_list = model(images) #preds_list = preds_lis[0] #batch_loss = [(Dice_Loss(preds, labels) + loss_weight * normal_cross_entropy_loss(preds, labels))/ (preds_list[0].shape[0]) for preds in preds_list] #batch_loss = [weighted_cross_entropy_loss(preds, labels)/ (preds_list[0].shape[0]) for preds in preds_list] batch_loss = [] #batch_loss = torch.zeros((6, images.shape[0])) for preds in preds_list: #print(preds.max(), preds.min()) loss1 = Dice_Loss(preds, labels) #.to(torch.device('cuda:1')) loss2 = normal_cross_entropy_loss( preds, labels) #.to(torch.device('cuda:1')) batch_loss.append(loss1 + loss_weight * loss2) #/ preds.shape[0]) #loss = batch_loss[0].mean() + batch_loss[1].mean() + batch_loss[2].mean() +batch_loss[3].mean() +batch_loss[4].mean() +batch_loss[5].mean()#torch.sum(batch_loss) loss = (batch_loss[0] + batch_loss[1] + batch_loss[2] + batch_loss[3] + batch_loss[4] + batch_loss[5]) #.to(torch.device('cuda:1')) #batch_loss = normal_cross_entropy_loss(preds_list, labels)/ preds_list.shape[0] #loss = batch_losss optimizer.zero_grad() #loss1.backward(retain_graph=True) loss.backward() optimizer.step() bar.update(i, [('1_loss', batch_loss[0]), ('2_loss', batch_loss[1]), ('3_loss', batch_loss[2]), ('4_loss', batch_loss[3]), ('5_loss', batch_loss[4]), ('fuse_loss', batch_loss[5])]) avgLoss = [(avgLoss[k] * i + batch_loss[k].item()) / (i + 1) for k in range(len(avgLoss))] if i % 20 == 0: pic = torchvision.utils.make_grid(images[:8], nrow=8, padding=2) writer.add_image('img', pic) pic1 = torchvision.utils.make_grid(preds_list[0][:8], nrow=8, padding=2) pic2 = torchvision.utils.make_grid(preds_list[1][:8], nrow=8, padding=2) pic3 = torchvision.utils.make_grid(preds_list[2][:8], nrow=8, padding=2) pic4 = torchvision.utils.make_grid(preds_list[3][:8], nrow=8, padding=2) pic5 = torchvision.utils.make_grid(preds_list[4][:8], nrow=8, padding=2) pic6 = torchvision.utils.make_grid(preds_list[5][:8], nrow=8, padding=2) writer.add_image(str_train + '/pred_1', pic1) writer.add_image(str_train + '/pred_2', pic2) writer.add_image(str_train + '/pred_3', pic3) writer.add_image(str_train + '/pred_4', pic4) writer.add_image(str_train + '/pred_5', pic5) writer.add_image(str_train + '/fuse', pic6) la = torchvision.utils.make_grid(labels[:8], nrow=8, padding=2) writer.add_image('lab', la) log = '\n * Finished epoch # %d ' \ 'Loss_1: %1.4f, Loss_2: %1.4f, Loss_3: %1.4f, Loss_4: %1.4f, Loss_5: %1.4f, fuse_loss: %1.4f\n' % ( epoch, avgLoss[0], avgLoss[1], avgLoss[2], avgLoss[3], avgLoss[4], avgLoss[5]) print(log) writer.add_scalar(str_train + '/loss_1', avgLoss[0]) writer.add_scalar(str_train + '/loss_2', avgLoss[1]) writer.add_scalar(str_train + '/loss_3', avgLoss[2]) writer.add_scalar(str_train + '/loss_4', avgLoss[3]) writer.add_scalar(str_train + '/loss_5', avgLoss[4]) writer.add_scalar(str_train + '/fuse_loss', avgLoss[5]) return avgLoss[0] + avgLoss[1] + avgLoss[2] + avgLoss[3] + avgLoss[ 4] + avgLoss[5]