def test_string_cut_empty_arguments(self):
        """
        Tests to ensure throws exception when no argument passed.
        """

        with self.assertRaises(TypeError):
            tm.string_cut()
Exemple #2
0
def chk_organism(pdb_list):
    global ORGANISM_SEQUENCE, FOLDER
    org_test = "Following PDB's will not be analyzed as the source organism sequence \nis not present in sequence alignment file\n"
    org_test += "%10s %60s\n" % ("PDB ID", "ORGANISM")
    remove_list = []
    '''
    Identify bad pdb's
    '''
    pdb_organism = ""
    is_present = False

    for i in range(len(pdb_list)):
        pdb_file = "%s/pdbs/%s.pdb" % (FOLDER, pdb_list[i])

        gen_util.check_file(pdb_file)

        p = len(pdb_file) - 8
        pdb = pdb_file[p:len(pdb_file)]

        pdb_organism = pdb_data.get_source(pdb_file)

        is_present = ORGANISM_SEQUENCE.has_key(pdb_organism), pdb_organism

        if (is_present[0] == False):
            org_test += "%10s %60s\n" % (pdb_list[i], pdb_organism)
            remove_list.append(pdb_list[i])
    for i in range(len(remove_list)):
        pdb_list.remove(remove_list[i])
    return pdb_list, org_test
    def test_string_cut_return_correct_length_multiple(self):
        """ 
        Tests if file successfully opens csv and returns a pandas
        dataframe. 
        """

        ### Seeting test parameters
        test_df = tm.open_file("test_datasets/test_1_full.csv")

        test_string_cut_value = [1, 3]

        test_col_list = ["Column_1", "Column_3"]

        test_df = test_df[test_col_list]

        processed_df = tm.string_cut_multi(test_df, test_string_cut_value,
                                           test_col_list)

        testing_list = []

        for column in test_col_list:
            value_test = processed_df[column].map(len).max()
            testing_list.append(value_test)

        self.assertEqual(testing_list, test_string_cut_value)
Exemple #4
0
def chk_quality(pdb_list):
    global max_resolution, LOG_FOLDER
    res_test = ""
    good_pdbs = ""
    remove_list = []
    '''
    Identify bad pdb's
    '''
    for i in range(len(pdb_list)):
        #pdb_file=pdb_list[i]
        pdb_file = "%s/pdbs/%s.pdb" % (FOLDER, pdb_list[i])
        gen_util.check_file(pdb_file)

        p = len(pdb_file) - 8
        pdb = pdb_file[p:len(pdb_file) - 4]
        resolution = pdb_data.get_resolution(pdb_file)
        if (resolution <= max_resolution):
            good_pdbs += "%6s %8.3f\n" % (pdb, resolution)
        if (resolution > max_resolution):
            res_test += "%6s%s%6.3f\n" % (
                pdb,
                "\t did not pass resolution test. will not be used for analysis. \t",
                resolution)
            remove_list.append(pdb_list[i])
    fName = "%s/XPAT-PdbResolution.dat" % (LOG_FOLDER)
    w1d.writeData(fName, good_pdbs)
    '''
    Remove bad pdb's
    '''
    for i in range(len(remove_list)):
        pdb_list.remove(remove_list[i])
    return pdb_list, res_test
    def test_string_cut_return_correct_length_single(self):
        """ 
        Tests if file successfully opens csv and returns a pandas
        dataframe. 
        """

        ### Seeting test parameters
        test_df = tm.open_file("test_datasets/test_1_full.csv")

        test_string_cut_value = 3

        test_col_list = ["Column_1", "Column_3"]

        for col in test_col_list:
            all(test_df[col].str.len() < test_string_cut_value)

        test_df = test_df[test_col_list]

        processed_df = tm.string_cut_multi(test_df, test_string_cut_value,
                                           test_col_list)

        measurer = np.vectorize(len)

        result = measurer(processed_df.values.astype(str)).max(axis=0)

        result = all(elem == test_string_cut_value for elem in result)

        self.assertTrue(result)
    def test_open_non_csv(self):
        """ Tests if file unsuccessfully opens npy and returns a 
        pandas dataframe. 
        """

        ### Test and call in one
        with self.assertRaises(TypeError):
            tm.open_file("sdgsdgdgdsg")
    def test_string_cut_wong_column_names(self):
        """
        Tests to ensure throws exception when wrong argument types are used.
        """

        test_df = tm.open_file("test_datasets/test_1_full.csv")

        test_string_cut_value = 3

        test_col_list = ["Column_", "Column_"]

        with self.assertRaises(TypeError):
            tm.string_cut(test_df, test_string_cut_value, test_col_list)
def download_forms():

    # Download each year/quarter master.idx and save record for requested forms
    f_log = open(PARM_LOGFILE, 'a')
    f_log.write('BEGIN LOOPS:  {0}\n'.format(time.strftime('%c')))
    n_tot = 0
    n_errs = 0
    for year in range(PARM_BGNYEAR, PARM_ENDYEAR + 1):
        for qtr in range(PARM_BGNQTR, PARM_ENDQTR + 1):
            startloop = time.clock()
            n_qtr = 0
            file_count = {}
            # Setup output path
            path = '{0}{1}\\QTR{2}\\'.format(PARM_PATH, str(year), str(qtr))
            if not os.path.exists(path):
                os.makedirs(path)
                print('Path: {0} created'.format(path))
            masterindex = EDGAR_Pac.download_masterindex(year, qtr, True)
            # masterindex = list(filter(lambda x: x.name.startswith('BANK'), masterindex))
            if masterindex:
                for item in masterindex[:100]:
                    # while EDGAR_Pac.edgar_server_not_available(True):  # kill time when server not available
                    #     pass
                    if item.form in PARM_FORMS:
                        n_qtr += 1
                        # Keep track of filings and identify duplicates
                        fid = str(item.cik) + str(item.filingdate) + item.form
                        if fid in file_count:
                            file_count[fid] += 1
                        else:
                            file_count[fid] = 1
                        # Setup EDGAR URL and output file name
                        #https://www.sec.gov/Archives/edgar/data/70858/000007085818000009/Financial_Report.xlsx
                        url = PARM_EDGARPREFIX + item.path
                        fname = (path + str(item.filingdate) + '_' +
                                 item.form.replace('/', '-') + '_' +
                                 item.path.replace('/', '_'))
                        fname = fname.replace(
                            '.txt', '_' + str(file_count[fid]) + '.txt')
                        print(url)
                        return_url = General_Utilities.download_to_file(
                            url, fname, f_log)
                        if return_url:
                            n_errs += 1
                        n_tot += 1
                        # time.sleep(1)  # Space out requests
            print(
                str(year) + ':' + str(qtr) + ' -> {0:,}'.format(n_qtr) +
                ' downloads completed.  Time = ' +
                time.strftime('%H:%M:%S', time.gmtime(time.clock() -
                                                      startloop)) + ' | ' +
                time.strftime('%c'))
            f_log.write(
                '{0} | {1} | n_qtr = {2:>8,} | n_tot = {3:>8,} | n_err = {4:>6,} | {5}\n'
                .format(year, qtr, n_qtr, n_tot, n_errs, time.strftime('%c')))

            f_log.flush()

    print('{0:,} total forms downloaded.'.format(n_tot))
    f_log.write('\n{0:,} total forms downloaded.'.format(n_tot))
    def test_string_cut_return_wrong_string_values(self):
        """ 
        Tests if file successfully opens csv and returns a pandas
        dataframe. 
        """

        ### Seeting test parameters
        test_df = tm.open_file("test_datasets/test_1_full.csv")

        test_string_cut_value = [1, "3"]

        test_col_list = ["Column_1", "Column_3"]

        test_df = test_df[test_col_list]

        with self.assertRaises(TypeError):
            tm.string_cut_multi(test_df, test_string_cut_value, test_col_list)
Exemple #10
0
def get_pdb_list(pdb_files_list):
    global FOLDER, LOG_FOLDER, ORGANISM_SEQUENCE

    global pdb_list, organism
    fOpen = "%s/%s" % (FOLDER, pdb_files_list)
    print fOpen
    gen_util.check_file(fOpen)
    (pdb_list, res_test,
     org_test) = pdblist.read_pdb_list(fOpen, ORGANISM_SEQUENCE)

    print res_test
    print org_test

    fName = "%s/XPAT-Log.dat" % (LOG_FOLDER)
    data = "%s\n%s" % (res_test, org_test)
    w1d.writeData(fName, data)
    exit()
    def test_string_cut_return_correct(self):
        """ 
        Tests if file successfully opens csv and returns a pandas
        dataframe. 
        """

        ### Call the function
        test_df = tm.open_file("test_datasets/test_1_full.csv")

        test_string_cut_value = 3

        test_col_list = ["Column_1", "Column_3"]

        result = tm.string_cut(test_df, test_string_cut_value, test_col_list)

        ### Test it
        self.assertIsInstance(result, pd.DataFrame)
    def test_open_csv(self):
        """ Tests if file successfully opens csv and returns a pandas
            dataframe. 
        """

        ### Call the function
        result = tm.open_file("test_datasets/test_1_full.csv")

        ### Test it
        self.assertIsInstance(result, pd.DataFrame)
def run_infer(image_dir):
    opt = Namespace(base_setup='Baseline_Parameters.txt',
                    search_setup='Small_UNet_Liver.txt')

    opt.base_setup = ROOT_DIR + '/Training_Setup_Files/' + opt.base_setup
    opt.search_setup = ROOT_DIR + '/Training_Setup_Files/' + opt.search_setup

    training_setups = gu.extract_setup_info(opt)
    for training_setup in tqdm(training_setups,
                               desc='Setup Iteration... ',
                               position=0):
        infer(training_setup, image_dir)
Exemple #14
0
import numpy as np, os, matplotlib.pyplot as plt, sys
os.chdir('/media/karsten_dl/QS2/standard_liverlesion_segmentation/Misc')
sys.path.insert(0,os.getcwd()+'/../Utilities')
sys.path.insert(0,os.getcwd()+'/../Network_Zoo')
import network_zoo as netlib
import General_Utilities as gu
import nibabel as nib
data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Test_Segmentations/Test_Submissions'
old_data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Test_Segmentations/Old_Test_Submissions'
dp = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/LOADDATA/Test_Data_2D/Volumes'

%gui qt
import pyqtgraph as pg

vol_n = 30
or_vol   = gu.normalize(np.stack([np.load(dp+'/test-volume-{}/'.format(vol_n)+x) for x in sorted(os.listdir(dp+'/test-volume-{}'.format(vol_n)),key=lambda x: int(x.split('-')[-1].split('.')[0]))]))
vol_info = nib.load(data_path+'/test-segmentation-{}.nii'.format(vol_n))
vol      = np.array(vol_info.dataobj)
old_vol_info = nib.load(old_data_path+'/test-segmentation-{}.nii'.format(vol_n))
old_vol      = np.array(vol_info.dataobj)
print('Shape:',vol.shape)
print('Shape:',or_vol.shape)
print('Shape:',old_vol.shape)

vol.shape
pg.image(or_vol+vol.transpose(2,0,1).astype(float))
pg.image(or_vol+old_vol.transpose(2,0,1).astype(float))

import pickle as pkl

network_setup = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/SAVEDATA/Standard_Liver_Networks/vUnet2D_liver_full_equipment_prime'
import numpy as np, os, matplotlib.pyplot as plt, sys
os.chdir('/media/karsten_dl/QS2/standard_liverlesion_segmentation/Misc')
sys.path.insert(0,os.getcwd()+'/../Utilities')
import General_Utilities as gu
data_path = '/media/karsten_dl/QS2/standard_liverlesion_segmentation/LOADDATA/Training_Data_2D'
### Image Weightmaps
for i in range(445,473):
    volume, slicev = 'volume-10', 'slice-{}.npy'.format(i)
    vol    = np.load(data_path+'/Volumes'+'/'+volume+'/'+slicev)
    liv    = np.load(data_path+'/LiverMasks'+'/'+volume+'/'+slicev)
    les    = np.load(data_path+'/LesionMasks'+'/'+volume+'/'+slicev)
    b_liv  = np.load(data_path+'/BoundaryMasksLiver'+'/'+volume+'/'+slicev)
    b_les  = np.load(data_path+'/BoundaryMasksLesion'+'/'+volume+'/'+slicev)

    f,ax = plt.subplots(1,5)
    ax[0].imshow(gu.normalize(vol))
    ax[1].imshow(liv, cmap='Greys')
    ax[2].imshow(b_liv.astype(float), cmap='Greys')
    ax[3].imshow(les, cmap='Reds')
    ax[4].imshow(b_les.astype(float), cmap='Reds')
    ax[0].set_xticks([])
    ax[1].set_xticks([])
    ax[2].set_xticks([])
    ax[3].set_xticks([])
    ax[4].set_xticks([])
    ax[0].set_yticks([])
    ax[1].set_yticks([])
    ax[2].set_yticks([])
    ax[3].set_yticks([])
    ax[4].set_yticks([])
    f.set_size_inches(15,3)
Exemple #16
0
def get_sequence():
    #global organism,sequence,dash_count,FOLDER
    global FOLDER, ORGANISM_SEQUENCE
    cwfname = "%s/aln/ClustalW2-TIM.aln" % (FOLDER)
    gen_util.check_file(cwfname)
    ORGANISM_SEQUENCE = readcw.read_clustalW(cwfname)
    def __getitem__(self, idx):
        #Choose a positive example with 50% change if training.
        #During validation, 'Pos' will contain all validation samples.
        #Note that again, volumes without lesions/positive target masks need to be taken into account.
        type_choice = not idx % self.pars.Training[
            'pos_sample_chance'] or self.is_validation
        modes = list(self.input_samples.keys())
        type_key = modes[type_choice] if len(
            self.input_samples[modes[type_choice]]) else modes[not type_choice]

        type_len = len(self.input_samples[type_key])

        next_vol, _ = self.input_samples[type_key][(idx + 1) % type_len]
        vol, idx = self.input_samples[type_key][idx % type_len]

        vol_change = next_vol != vol
        self.curr_vol = vol

        intvol = self.volume_details[vol]["Input_Image_Paths"][idx]
        intvol = intvol[len(intvol) // 2]

        input_image = np.concatenate([
            np.expand_dims(np.load(sub_vol), 0)
            for sub_vol in self.volume_details[vol]["Input_Image_Paths"][idx]
        ],
                                     axis=0)
        #Perform data standardization
        if self.pars.Training['no_standardize']:
            input_image = gu.normalize(input_image,
                                       zero_center=False,
                                       unit_variance=False,
                                       supply_mode="orig")
        else:
            input_image = gu.normalize(input_image)

        #Lesion/Liver Mask to output
        target_mask = np.load(
            self.volume_details[vol]["TargetMask_Paths"][idx])
        target_mask = np.expand_dims(target_mask, 0)

        #Liver Mask to use for defining training region of interest
        crop_mask = np.expand_dims(
            np.load(self.volume_details[vol]["RefMask_Paths"][idx]),
            0) if self.pars.Training['data'] == 'lesion' else None
        #Weightmask to output
        weightmap = np.expand_dims(
            np.load(self.volume_details[vol]["Wmap_Paths"][idx]),
            0).astype(float) if self.pars.Training['use_weightmaps'] else None

        #Generate list of all files that would need to be crop, if cropping is required.
        files_to_crop = [input_image, target_mask]
        is_mask = [0, 1]
        if weightmap is not None:
            files_to_crop.append(weightmap)
            is_mask.append(0)
        if crop_mask is not None:
            files_to_crop.append(crop_mask)
            is_mask.append(1)

        #First however, augmentation, if required, is performed (i.e. on fullsize images to remove border artefacts in crops).
        if len(self.pars.Training['augment']) and not self.is_validation:
            # Old: copyFiles needs to be True.
            files_to_crop = list(
                gu.augment_2D(files_to_crop,
                              mode_dict=self.pars.Training['augment'],
                              seed=self.rng.randint(0, 1e8),
                              is_mask=is_mask))

        #If Cropping is required, we crop now.
        if len(self.pars.Training['crop_size']) and not self.is_validation:
            #Add imaginary batch axis in gu.get_crops_per_batch
            crops_for_picked_batch = gu.get_crops_per_batch(
                files_to_crop,
                crop_mask,
                crop_size=self.pars.Training['crop_size'],
                seed=self.rng.randint(0, 1e8))
            input_image = crops_for_picked_batch[0]
            target_mask = crops_for_picked_batch[1]
            weightmap = crops_for_picked_batch[
                2] if weightmap is not None else None
            crop_mask = crops_for_picked_batch[
                -1] if crop_mask is not None else None

        #If a one-hot encoded target mask is required:
        one_hot_target = gu.numpy_generate_onehot_matrix(
            target_mask, self.pars.Training['num_classes']
        ) if self.pars.Training['require_one_hot'] else None

        #If we use auxiliary inputs to input additional information into the network, we compute respective outputs here.
        auxiliary_targets, auxiliary_wmaps, one_hot_auxiliary_targets = None, None, None
        if not self.is_validation and self.pars.Network['use_auxiliary_inputs']:
            auxiliary_targets, auxiliary_wmaps, one_hot_auxiliary_targets   = [], [], []
            for val in range(len(self.pars.Network['structure']) - 1):
                aux_level = 2**(val + 1)
                aux_img = np.round(
                    st.resize(target_mask,
                              (target_mask.shape[0], target_mask.shape[1] //
                               aux_level, target_mask.shape[2] // aux_level),
                              order=0,
                              mode="reflect",
                              preserve_range=True))
                auxiliary_targets.append(aux_img)
                if self.pars.Training['require_one_hot']:
                    one_hot_auxiliary_targets.append(
                        gu.numpy_generate_onehot_matrix(
                            aux_img, self.pars.Training['num_classes']))
                if weightmap is not None:
                    aux_img = st.resize(
                        weightmap,
                        (weightmap.shape[0], weightmap.shape[1] // aux_level,
                         weightmap.shape[2] // aux_level),
                        order=0,
                        mode="reflect",
                        preserve_range=True)
                    auxiliary_wmaps.append(aux_img)

        #Final Output Dictionary
        return_dict = {
            "input_images": input_image.astype(float),
            "targets": target_mask.astype(float),
            "crop_option":
            crop_mask.astype(float) if crop_mask is not None else None,
            "weightmaps":
            weightmap.astype(float) if weightmap is not None else None,
            "one_hot_targets": one_hot_target,
            "aux_targets": auxiliary_targets,
            "one_hot_aux_targets": one_hot_auxiliary_targets,
            "aux_weightmaps": auxiliary_wmaps,
            'internal_slice_name': intvol,
            'vol_change': vol_change
        }

        return_dict = {
            key: item
            for key, item in return_dict.items() if item is not None
        }
        return return_dict
)
parse_in.add_argument(
    '--search_setup',
    type=str,
    default='LiverNetwork_Parameters.txt',
    help=
    'Path to search setup-txt, which contains (multiple) variations to the baseline proposed above.'
)
opt = parse_in.parse_args()
# opt = parse_in.parse_args(["--search_setup","Specific_Setup_Parameters_3D_LesionSegmentation_PC1.txt"])
opt.base_setup = os.getcwd(
) + '/../Train_Networks/Training_Setup_Files/' + opt.base_setup
opt.search_setup = os.getcwd(
) + '/../Train_Networks/Training_Setup_Files/' + opt.search_setup

training_setups = gu.extract_setup_info(opt)
opt = training_setups[0]
"""================================================="""
### LOAD NETWORK
opt.Training['num_out_classes'] = 2
network = netlib.NetworkSelect(opt)
network.n_params = nu.gimme_params(network)
opt.Network['Network_name'] = network.name
device = torch.device('cuda')
_ = network.to(device)

### INPUT DATA
input_data = torch.randn(
    (1, opt.Network['channels'], 256, 256)).type(torch.FloatTensor).to(device)
network_pred = network(input_data)[0]
"""================================================="""
Exemple #19
0
def download_forms():

    # Download each year/quarter master.idx and save record for requested forms
    f_log = open(PARM_LOGFILE, 'a')
    f_log.write('BEGIN LOOPS:  {0}\n'.format(time.strftime('%c')))
    n_tot = 0
    n_errs = 0
    if not os.path.exists(PARM_PATH):
        os.makedirs(PARM_PATH)
        print('Path: {0} created'.format(PARM_PATH))
    file_list = os.listdir(PARM_PATH)
    for i in range(len(file_list)):
        file_list[i] = os.path.join(PARM_PATH, file_list[i])
    for year in range(PARM_BGNYEAR, PARM_ENDYEAR + 1):
        for qtr in range(PARM_BGNQTR, PARM_ENDQTR + 1):
            startloop = time.clock()
            n_qtr = 0
            file_count = {}
            # Setup output path
            # path = PARM_PATH
            path = '{0}{1}\\QTR{2}\\'.format(PARM_PATH, str(year), str(qtr))
            '''
            if not os.path.exists(PARM_PATH):
                os.makedirs(PARM_PATH)
                print('Path: {0} created'.format(PARM_PATH))
            '''
            masterindex = EDGAR_Pac.download_masterindex(year, qtr, True)
            if masterindex:
                for item in masterindex:
                    while EDGAR_Pac.edgar_server_not_available(
                            True):  # kill time when server not available
                        pass
                    if item.form in PARM_FORMS:
                        n_qtr += 1
                        # Keep track of filings and identify duplicatesfiling
                        fid = str(item.cik) + str(item.filingdate) + item.form
                        if fid in file_count:
                            file_count[fid] += 1
                        else:
                            file_count[fid] = 1
                        # Setup EDGAR URL and output file name
                        url = PARM_EDGARPREFIX + item.path
                        fname = (PARM_PATH + str(item.filingdate) + '_' +
                                 item.form.replace('/', '-') + '_' +
                                 item.path.replace('/', '_'))
                        fname = fname.replace(
                            '.txt', '_' + str(file_count[fid]) + '.txt')
                        if fname not in file_list:
                            return_url = General_Utilities.download_to_file(
                                url, fname, f_log)
                            if return_url:
                                n_errs += 1
                            n_tot += 1
                        # time.sleep(1)  # Space out requests
            print(
                str(year) + ':' + str(qtr) + ' -> {0:,}'.format(n_qtr) +
                ' downloads completed.  Time = ' +
                time.strftime('%H:%M:%S', time.gmtime(time.clock() -
                                                      startloop)) + ' | ' +
                time.strftime('%c'))
            f_log.write(
                '{0} | {1} | n_qtr = {2:>8,} | n_tot = {3:>8,} | n_err = {4:>6,} | {5}\n'
                .format(year, qtr, n_qtr, n_tot, n_errs, time.strftime('%c')))

            f_log.flush()

    print('{0:,} total forms downloaded.'.format(n_tot))
    f_log.write('\n{0:,} total forms downloaded.'.format(n_tot))
        type=str,
        default='Baseline_Parameters.txt',
        help=
        'Path to baseline setup-txt which contains all major parameters that most likely will be kept constant during various grid searches.'
    )
    parse_in.add_argument(
        '--search_setup',
        type=str,
        default='',
        help=
        'Path to search setup-txt, which contains (multiple) variations to the baseline proposed above.'
    )
    parse_in.add_argument('--no_date',
                          action='store_true',
                          help='Do not use date when logging files.')
    # opt = parse_in.parse_args(['--search_setup','Small_UNet_Lesion.txt'])
    opt = parse_in.parse_args()

    assert opt.search_setup != '', 'Please provide a Variation-Parameter Text File!'

    opt.base_setup = os.getcwd() + '/Training_Setup_Files/' + opt.base_setup
    opt.search_setup = os.getcwd(
    ) + '/Training_Setup_Files/' + opt.search_setup

    training_setups = gu.extract_setup_info(opt)

    for training_setup in tqdm(training_setups,
                               desc='Setup Iteration... ',
                               position=0):
        main(training_setup)
Exemple #21
0
     along with XPAT.  If not, see <http://www.gnu.org/licenses/>.

'''
import fileIO.Write_1dData as w1d
import PDB_Data as pdb_data
import Update_Pdb_List as update_pdb_list
import General_Utilities as gen_util
FOLDER = "/home/aklab/Projects/TIM/TIM-Analysis/pdbs"
FOLDER = "/home/aklab/Projects/TIM/TIM-Analysis"

LOG_FOLDER = "%s/log" % (FOLDER)

max_resolution = 2.50
ORGANISM_SEQUENCE = {}

gen_util.check_folder(LOG_FOLDER)
'''
HAS a BUG CANT access FOLDER variable from XPAT class

'''


def read_pdb_list(fOpen, org_seq):
    global pdb_list, ORGANISM_SEQUENCE

    ORGANISM_SEQUENCE = org_seq

    fileOpen = open(fOpen, "r")
    list = fileOpen.readlines()

    pdb_list = []