def test_string_cut_empty_arguments(self): """ Tests to ensure throws exception when no argument passed. """ with self.assertRaises(TypeError): tm.string_cut()
def chk_organism(pdb_list): global ORGANISM_SEQUENCE, FOLDER org_test = "Following PDB's will not be analyzed as the source organism sequence \nis not present in sequence alignment file\n" org_test += "%10s %60s\n" % ("PDB ID", "ORGANISM") remove_list = [] ''' Identify bad pdb's ''' pdb_organism = "" is_present = False for i in range(len(pdb_list)): pdb_file = "%s/pdbs/%s.pdb" % (FOLDER, pdb_list[i]) gen_util.check_file(pdb_file) p = len(pdb_file) - 8 pdb = pdb_file[p:len(pdb_file)] pdb_organism = pdb_data.get_source(pdb_file) is_present = ORGANISM_SEQUENCE.has_key(pdb_organism), pdb_organism if (is_present[0] == False): org_test += "%10s %60s\n" % (pdb_list[i], pdb_organism) remove_list.append(pdb_list[i]) for i in range(len(remove_list)): pdb_list.remove(remove_list[i]) return pdb_list, org_test
def test_string_cut_return_correct_length_multiple(self): """ Tests if file successfully opens csv and returns a pandas dataframe. """ ### Seeting test parameters test_df = tm.open_file("test_datasets/test_1_full.csv") test_string_cut_value = [1, 3] test_col_list = ["Column_1", "Column_3"] test_df = test_df[test_col_list] processed_df = tm.string_cut_multi(test_df, test_string_cut_value, test_col_list) testing_list = [] for column in test_col_list: value_test = processed_df[column].map(len).max() testing_list.append(value_test) self.assertEqual(testing_list, test_string_cut_value)
def chk_quality(pdb_list): global max_resolution, LOG_FOLDER res_test = "" good_pdbs = "" remove_list = [] ''' Identify bad pdb's ''' for i in range(len(pdb_list)): #pdb_file=pdb_list[i] pdb_file = "%s/pdbs/%s.pdb" % (FOLDER, pdb_list[i]) gen_util.check_file(pdb_file) p = len(pdb_file) - 8 pdb = pdb_file[p:len(pdb_file) - 4] resolution = pdb_data.get_resolution(pdb_file) if (resolution <= max_resolution): good_pdbs += "%6s %8.3f\n" % (pdb, resolution) if (resolution > max_resolution): res_test += "%6s%s%6.3f\n" % ( pdb, "\t did not pass resolution test. will not be used for analysis. \t", resolution) remove_list.append(pdb_list[i]) fName = "%s/XPAT-PdbResolution.dat" % (LOG_FOLDER) w1d.writeData(fName, good_pdbs) ''' Remove bad pdb's ''' for i in range(len(remove_list)): pdb_list.remove(remove_list[i]) return pdb_list, res_test
def test_string_cut_return_correct_length_single(self): """ Tests if file successfully opens csv and returns a pandas dataframe. """ ### Seeting test parameters test_df = tm.open_file("test_datasets/test_1_full.csv") test_string_cut_value = 3 test_col_list = ["Column_1", "Column_3"] for col in test_col_list: all(test_df[col].str.len() < test_string_cut_value) test_df = test_df[test_col_list] processed_df = tm.string_cut_multi(test_df, test_string_cut_value, test_col_list) measurer = np.vectorize(len) result = measurer(processed_df.values.astype(str)).max(axis=0) result = all(elem == test_string_cut_value for elem in result) self.assertTrue(result)
def test_open_non_csv(self): """ Tests if file unsuccessfully opens npy and returns a pandas dataframe. """ ### Test and call in one with self.assertRaises(TypeError): tm.open_file("sdgsdgdgdsg")
def test_string_cut_wong_column_names(self): """ Tests to ensure throws exception when wrong argument types are used. """ test_df = tm.open_file("test_datasets/test_1_full.csv") test_string_cut_value = 3 test_col_list = ["Column_", "Column_"] with self.assertRaises(TypeError): tm.string_cut(test_df, test_string_cut_value, test_col_list)
def download_forms(): # Download each year/quarter master.idx and save record for requested forms f_log = open(PARM_LOGFILE, 'a') f_log.write('BEGIN LOOPS: {0}\n'.format(time.strftime('%c'))) n_tot = 0 n_errs = 0 for year in range(PARM_BGNYEAR, PARM_ENDYEAR + 1): for qtr in range(PARM_BGNQTR, PARM_ENDQTR + 1): startloop = time.clock() n_qtr = 0 file_count = {} # Setup output path path = '{0}{1}\\QTR{2}\\'.format(PARM_PATH, str(year), str(qtr)) if not os.path.exists(path): os.makedirs(path) print('Path: {0} created'.format(path)) masterindex = EDGAR_Pac.download_masterindex(year, qtr, True) # masterindex = list(filter(lambda x: x.name.startswith('BANK'), masterindex)) if masterindex: for item in masterindex[:100]: # while EDGAR_Pac.edgar_server_not_available(True): # kill time when server not available # pass if item.form in PARM_FORMS: n_qtr += 1 # Keep track of filings and identify duplicates fid = str(item.cik) + str(item.filingdate) + item.form if fid in file_count: file_count[fid] += 1 else: file_count[fid] = 1 # Setup EDGAR URL and output file name #https://www.sec.gov/Archives/edgar/data/70858/000007085818000009/Financial_Report.xlsx url = PARM_EDGARPREFIX + item.path fname = (path + str(item.filingdate) + '_' + item.form.replace('/', '-') + '_' + item.path.replace('/', '_')) fname = fname.replace( '.txt', '_' + str(file_count[fid]) + '.txt') print(url) return_url = General_Utilities.download_to_file( url, fname, f_log) if return_url: n_errs += 1 n_tot += 1 # time.sleep(1) # Space out requests print( str(year) + ':' + str(qtr) + ' -> {0:,}'.format(n_qtr) + ' downloads completed. Time = ' + time.strftime('%H:%M:%S', time.gmtime(time.clock() - startloop)) + ' | ' + time.strftime('%c')) f_log.write( '{0} | {1} | n_qtr = {2:>8,} | n_tot = {3:>8,} | n_err = {4:>6,} | {5}\n' .format(year, qtr, n_qtr, n_tot, n_errs, time.strftime('%c'))) f_log.flush() print('{0:,} total forms downloaded.'.format(n_tot)) f_log.write('\n{0:,} total forms downloaded.'.format(n_tot))
def test_string_cut_return_wrong_string_values(self): """ Tests if file successfully opens csv and returns a pandas dataframe. """ ### Seeting test parameters test_df = tm.open_file("test_datasets/test_1_full.csv") test_string_cut_value = [1, "3"] test_col_list = ["Column_1", "Column_3"] test_df = test_df[test_col_list] with self.assertRaises(TypeError): tm.string_cut_multi(test_df, test_string_cut_value, test_col_list)
def get_pdb_list(pdb_files_list): global FOLDER, LOG_FOLDER, ORGANISM_SEQUENCE global pdb_list, organism fOpen = "%s/%s" % (FOLDER, pdb_files_list) print fOpen gen_util.check_file(fOpen) (pdb_list, res_test, org_test) = pdblist.read_pdb_list(fOpen, ORGANISM_SEQUENCE) print res_test print org_test fName = "%s/XPAT-Log.dat" % (LOG_FOLDER) data = "%s\n%s" % (res_test, org_test) w1d.writeData(fName, data) exit()
def test_string_cut_return_correct(self): """ Tests if file successfully opens csv and returns a pandas dataframe. """ ### Call the function test_df = tm.open_file("test_datasets/test_1_full.csv") test_string_cut_value = 3 test_col_list = ["Column_1", "Column_3"] result = tm.string_cut(test_df, test_string_cut_value, test_col_list) ### Test it self.assertIsInstance(result, pd.DataFrame)
def test_open_csv(self): """ Tests if file successfully opens csv and returns a pandas dataframe. """ ### Call the function result = tm.open_file("test_datasets/test_1_full.csv") ### Test it self.assertIsInstance(result, pd.DataFrame)
def run_infer(image_dir): opt = Namespace(base_setup='Baseline_Parameters.txt', search_setup='Small_UNet_Liver.txt') opt.base_setup = ROOT_DIR + '/Training_Setup_Files/' + opt.base_setup opt.search_setup = ROOT_DIR + '/Training_Setup_Files/' + opt.search_setup training_setups = gu.extract_setup_info(opt) for training_setup in tqdm(training_setups, desc='Setup Iteration... ', position=0): infer(training_setup, image_dir)
import numpy as np, os, matplotlib.pyplot as plt, sys os.chdir('/media/karsten_dl/QS2/standard_liverlesion_segmentation/Misc') sys.path.insert(0,os.getcwd()+'/../Utilities') sys.path.insert(0,os.getcwd()+'/../Network_Zoo') import network_zoo as netlib import General_Utilities as gu import nibabel as nib data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Test_Segmentations/Test_Submissions' old_data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Test_Segmentations/Old_Test_Submissions' dp = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/LOADDATA/Test_Data_2D/Volumes' %gui qt import pyqtgraph as pg vol_n = 30 or_vol = gu.normalize(np.stack([np.load(dp+'/test-volume-{}/'.format(vol_n)+x) for x in sorted(os.listdir(dp+'/test-volume-{}'.format(vol_n)),key=lambda x: int(x.split('-')[-1].split('.')[0]))])) vol_info = nib.load(data_path+'/test-segmentation-{}.nii'.format(vol_n)) vol = np.array(vol_info.dataobj) old_vol_info = nib.load(old_data_path+'/test-segmentation-{}.nii'.format(vol_n)) old_vol = np.array(vol_info.dataobj) print('Shape:',vol.shape) print('Shape:',or_vol.shape) print('Shape:',old_vol.shape) vol.shape pg.image(or_vol+vol.transpose(2,0,1).astype(float)) pg.image(or_vol+old_vol.transpose(2,0,1).astype(float)) import pickle as pkl network_setup = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Standard_Liver_Networks/vUnet2D_liver_full_equipment_prime'
import numpy as np, os, matplotlib.pyplot as plt, sys os.chdir('/media/karsten_dl/QS2/standard_liverlesion_segmentation/Misc') sys.path.insert(0,os.getcwd()+'/../Utilities') import General_Utilities as gu data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/LOADDATA/Training_Data_2D' ### Image Weightmaps for i in range(445,473): volume, slicev = 'volume-10', 'slice-{}.npy'.format(i) vol = np.load(data_path+'/Volumes'+'/'+volume+'/'+slicev) liv = np.load(data_path+'/LiverMasks'+'/'+volume+'/'+slicev) les = np.load(data_path+'/LesionMasks'+'/'+volume+'/'+slicev) b_liv = np.load(data_path+'/BoundaryMasksLiver'+'/'+volume+'/'+slicev) b_les = np.load(data_path+'/BoundaryMasksLesion'+'/'+volume+'/'+slicev) f,ax = plt.subplots(1,5) ax[0].imshow(gu.normalize(vol)) ax[1].imshow(liv, cmap='Greys') ax[2].imshow(b_liv.astype(float), cmap='Greys') ax[3].imshow(les, cmap='Reds') ax[4].imshow(b_les.astype(float), cmap='Reds') ax[0].set_xticks([]) ax[1].set_xticks([]) ax[2].set_xticks([]) ax[3].set_xticks([]) ax[4].set_xticks([]) ax[0].set_yticks([]) ax[1].set_yticks([]) ax[2].set_yticks([]) ax[3].set_yticks([]) ax[4].set_yticks([]) f.set_size_inches(15,3)
def get_sequence(): #global organism,sequence,dash_count,FOLDER global FOLDER, ORGANISM_SEQUENCE cwfname = "%s/aln/ClustalW2-TIM.aln" % (FOLDER) gen_util.check_file(cwfname) ORGANISM_SEQUENCE = readcw.read_clustalW(cwfname)
def __getitem__(self, idx): #Choose a positive example with 50% change if training. #During validation, 'Pos' will contain all validation samples. #Note that again, volumes without lesions/positive target masks need to be taken into account. type_choice = not idx % self.pars.Training[ 'pos_sample_chance'] or self.is_validation modes = list(self.input_samples.keys()) type_key = modes[type_choice] if len( self.input_samples[modes[type_choice]]) else modes[not type_choice] type_len = len(self.input_samples[type_key]) next_vol, _ = self.input_samples[type_key][(idx + 1) % type_len] vol, idx = self.input_samples[type_key][idx % type_len] vol_change = next_vol != vol self.curr_vol = vol intvol = self.volume_details[vol]["Input_Image_Paths"][idx] intvol = intvol[len(intvol) // 2] input_image = np.concatenate([ np.expand_dims(np.load(sub_vol), 0) for sub_vol in self.volume_details[vol]["Input_Image_Paths"][idx] ], axis=0) #Perform data standardization if self.pars.Training['no_standardize']: input_image = gu.normalize(input_image, zero_center=False, unit_variance=False, supply_mode="orig") else: input_image = gu.normalize(input_image) #Lesion/Liver Mask to output target_mask = np.load( self.volume_details[vol]["TargetMask_Paths"][idx]) target_mask = np.expand_dims(target_mask, 0) #Liver Mask to use for defining training region of interest crop_mask = np.expand_dims( np.load(self.volume_details[vol]["RefMask_Paths"][idx]), 0) if self.pars.Training['data'] == 'lesion' else None #Weightmask to output weightmap = np.expand_dims( np.load(self.volume_details[vol]["Wmap_Paths"][idx]), 0).astype(float) if self.pars.Training['use_weightmaps'] else None #Generate list of all files that would need to be crop, if cropping is required. files_to_crop = [input_image, target_mask] is_mask = [0, 1] if weightmap is not None: files_to_crop.append(weightmap) is_mask.append(0) if crop_mask is not None: files_to_crop.append(crop_mask) is_mask.append(1) #First however, augmentation, if required, is performed (i.e. on fullsize images to remove border artefacts in crops). if len(self.pars.Training['augment']) and not self.is_validation: # Old: copyFiles needs to be True. files_to_crop = list( gu.augment_2D(files_to_crop, mode_dict=self.pars.Training['augment'], seed=self.rng.randint(0, 1e8), is_mask=is_mask)) #If Cropping is required, we crop now. if len(self.pars.Training['crop_size']) and not self.is_validation: #Add imaginary batch axis in gu.get_crops_per_batch crops_for_picked_batch = gu.get_crops_per_batch( files_to_crop, crop_mask, crop_size=self.pars.Training['crop_size'], seed=self.rng.randint(0, 1e8)) input_image = crops_for_picked_batch[0] target_mask = crops_for_picked_batch[1] weightmap = crops_for_picked_batch[ 2] if weightmap is not None else None crop_mask = crops_for_picked_batch[ -1] if crop_mask is not None else None #If a one-hot encoded target mask is required: one_hot_target = gu.numpy_generate_onehot_matrix( target_mask, self.pars.Training['num_classes'] ) if self.pars.Training['require_one_hot'] else None #If we use auxiliary inputs to input additional information into the network, we compute respective outputs here. auxiliary_targets, auxiliary_wmaps, one_hot_auxiliary_targets = None, None, None if not self.is_validation and self.pars.Network['use_auxiliary_inputs']: auxiliary_targets, auxiliary_wmaps, one_hot_auxiliary_targets = [], [], [] for val in range(len(self.pars.Network['structure']) - 1): aux_level = 2**(val + 1) aux_img = np.round( st.resize(target_mask, (target_mask.shape[0], target_mask.shape[1] // aux_level, target_mask.shape[2] // aux_level), order=0, mode="reflect", preserve_range=True)) auxiliary_targets.append(aux_img) if self.pars.Training['require_one_hot']: one_hot_auxiliary_targets.append( gu.numpy_generate_onehot_matrix( aux_img, self.pars.Training['num_classes'])) if weightmap is not None: aux_img = st.resize( weightmap, (weightmap.shape[0], weightmap.shape[1] // aux_level, weightmap.shape[2] // aux_level), order=0, mode="reflect", preserve_range=True) auxiliary_wmaps.append(aux_img) #Final Output Dictionary return_dict = { "input_images": input_image.astype(float), "targets": target_mask.astype(float), "crop_option": crop_mask.astype(float) if crop_mask is not None else None, "weightmaps": weightmap.astype(float) if weightmap is not None else None, "one_hot_targets": one_hot_target, "aux_targets": auxiliary_targets, "one_hot_aux_targets": one_hot_auxiliary_targets, "aux_weightmaps": auxiliary_wmaps, 'internal_slice_name': intvol, 'vol_change': vol_change } return_dict = { key: item for key, item in return_dict.items() if item is not None } return return_dict
) parse_in.add_argument( '--search_setup', type=str, default='LiverNetwork_Parameters.txt', help= 'Path to search setup-txt, which contains (multiple) variations to the baseline proposed above.' ) opt = parse_in.parse_args() # opt = parse_in.parse_args(["--search_setup","Specific_Setup_Parameters_3D_LesionSegmentation_PC1.txt"]) opt.base_setup = os.getcwd( ) + '/../Train_Networks/Training_Setup_Files/' + opt.base_setup opt.search_setup = os.getcwd( ) + '/../Train_Networks/Training_Setup_Files/' + opt.search_setup training_setups = gu.extract_setup_info(opt) opt = training_setups[0] """=================================================""" ### LOAD NETWORK opt.Training['num_out_classes'] = 2 network = netlib.NetworkSelect(opt) network.n_params = nu.gimme_params(network) opt.Network['Network_name'] = network.name device = torch.device('cuda') _ = network.to(device) ### INPUT DATA input_data = torch.randn( (1, opt.Network['channels'], 256, 256)).type(torch.FloatTensor).to(device) network_pred = network(input_data)[0] """================================================="""
def download_forms(): # Download each year/quarter master.idx and save record for requested forms f_log = open(PARM_LOGFILE, 'a') f_log.write('BEGIN LOOPS: {0}\n'.format(time.strftime('%c'))) n_tot = 0 n_errs = 0 if not os.path.exists(PARM_PATH): os.makedirs(PARM_PATH) print('Path: {0} created'.format(PARM_PATH)) file_list = os.listdir(PARM_PATH) for i in range(len(file_list)): file_list[i] = os.path.join(PARM_PATH, file_list[i]) for year in range(PARM_BGNYEAR, PARM_ENDYEAR + 1): for qtr in range(PARM_BGNQTR, PARM_ENDQTR + 1): startloop = time.clock() n_qtr = 0 file_count = {} # Setup output path # path = PARM_PATH path = '{0}{1}\\QTR{2}\\'.format(PARM_PATH, str(year), str(qtr)) ''' if not os.path.exists(PARM_PATH): os.makedirs(PARM_PATH) print('Path: {0} created'.format(PARM_PATH)) ''' masterindex = EDGAR_Pac.download_masterindex(year, qtr, True) if masterindex: for item in masterindex: while EDGAR_Pac.edgar_server_not_available( True): # kill time when server not available pass if item.form in PARM_FORMS: n_qtr += 1 # Keep track of filings and identify duplicatesfiling fid = str(item.cik) + str(item.filingdate) + item.form if fid in file_count: file_count[fid] += 1 else: file_count[fid] = 1 # Setup EDGAR URL and output file name url = PARM_EDGARPREFIX + item.path fname = (PARM_PATH + str(item.filingdate) + '_' + item.form.replace('/', '-') + '_' + item.path.replace('/', '_')) fname = fname.replace( '.txt', '_' + str(file_count[fid]) + '.txt') if fname not in file_list: return_url = General_Utilities.download_to_file( url, fname, f_log) if return_url: n_errs += 1 n_tot += 1 # time.sleep(1) # Space out requests print( str(year) + ':' + str(qtr) + ' -> {0:,}'.format(n_qtr) + ' downloads completed. Time = ' + time.strftime('%H:%M:%S', time.gmtime(time.clock() - startloop)) + ' | ' + time.strftime('%c')) f_log.write( '{0} | {1} | n_qtr = {2:>8,} | n_tot = {3:>8,} | n_err = {4:>6,} | {5}\n' .format(year, qtr, n_qtr, n_tot, n_errs, time.strftime('%c'))) f_log.flush() print('{0:,} total forms downloaded.'.format(n_tot)) f_log.write('\n{0:,} total forms downloaded.'.format(n_tot))
type=str, default='Baseline_Parameters.txt', help= 'Path to baseline setup-txt which contains all major parameters that most likely will be kept constant during various grid searches.' ) parse_in.add_argument( '--search_setup', type=str, default='', help= 'Path to search setup-txt, which contains (multiple) variations to the baseline proposed above.' ) parse_in.add_argument('--no_date', action='store_true', help='Do not use date when logging files.') # opt = parse_in.parse_args(['--search_setup','Small_UNet_Lesion.txt']) opt = parse_in.parse_args() assert opt.search_setup != '', 'Please provide a Variation-Parameter Text File!' opt.base_setup = os.getcwd() + '/Training_Setup_Files/' + opt.base_setup opt.search_setup = os.getcwd( ) + '/Training_Setup_Files/' + opt.search_setup training_setups = gu.extract_setup_info(opt) for training_setup in tqdm(training_setups, desc='Setup Iteration... ', position=0): main(training_setup)
along with XPAT. If not, see <http://www.gnu.org/licenses/>. ''' import fileIO.Write_1dData as w1d import PDB_Data as pdb_data import Update_Pdb_List as update_pdb_list import General_Utilities as gen_util FOLDER = "/home/aklab/Projects/TIM/TIM-Analysis/pdbs" FOLDER = "/home/aklab/Projects/TIM/TIM-Analysis" LOG_FOLDER = "%s/log" % (FOLDER) max_resolution = 2.50 ORGANISM_SEQUENCE = {} gen_util.check_folder(LOG_FOLDER) ''' HAS a BUG CANT access FOLDER variable from XPAT class ''' def read_pdb_list(fOpen, org_seq): global pdb_list, ORGANISM_SEQUENCE ORGANISM_SEQUENCE = org_seq fileOpen = open(fOpen, "r") list = fileOpen.readlines() pdb_list = []