def main():
    # first create the directory for lst and rec files
    root = os.path.abspath(os.path.dirname(__file__))
    imgDir = os.path.join(root, settings.images_directory)

    # make sure images directory exits
    settings.makeDir(settings.images_directory)

    for url in url_list:
        response_info = requests.get(url)
        tree = html.fromstring(response_info.text)

        #get the first occurrence of the data withen following tag (should be 100 entries)
        script = tree.xpath('//script[@language="javascript"]/text()')[0]

        #get what the first oi= points to (~100 entries)
        json_string = regex.findall(script)[0]

        #load 100 entries into json object
        json_data = json.loads(json_string)

        #get list of other pages, all anchors in the footer tag, get the href attribute
        next_page_url = tree.xpath('//footer/a/@href')

        #lets get the page of data and complete FS image for a particular case
        links = [settings.domain + "/" + x['nodeRef'] for x in json_data]
        for link in links:
            extract(link, imgDir)
    #os.rename(os.path.join(prefix + "_train.lst"), os.path.join(root, prefix + ".lst"))


def makeRecFile(prefix, imageloc):
    """
    :param prefix: name of the list file
    :param imageloc: where images are located, both positive and negative samples
    :return:
    """
    subprocess.call("python ~/mxnet/tools/im2rec.py --encoding .png " +
                    prefix + " " + imageloc,
                    shell=True)


#first create the directory
settings.makeDir(settings.record_IO_directory)

#get the absolute path
root = os.path.abspath(os.path.dirname(__file__))
recordIODir = os.path.join(root, settings.record_IO_directory)

#some locations
val = os.path.join(recordIODir, "Val")
test = os.path.join(recordIODir, "Test")
train = os.path.join(recordIODir, "Train")
valdir = os.path.join(root, settings.mxnet_images_val_dir)
testdir = os.path.join(root, settings.mxnet_images_test_dir)
traindir = os.path.join(root, settings.mxnet_images_train_dir)

#first lets create the list files
makeLstFile(val, valdir)
Esempio n. 3
0
from PySide.QtGui import QApplication, QMessageBox, QFileDialog, \
            QInputDialog, QLineEdit
from PySide.QtCore import QObject, Slot, Signal

from sumokoin.address import Address

from utils.common import print_money, print_money2

from settings import APP_NAME, VERSION, DATA_DIR, COIN, makeDir, seed_languages
from utils.logger import log, LEVEL_ERROR, LEVEL_INFO

tray_icon_tooltip = "%s v%d.%d" % (APP_NAME, VERSION[0], VERSION[1])

wallet_dir_path = os.path.join(DATA_DIR, 'wallets')
makeDir(wallet_dir_path)

wallet_log_dir_path = os.path.join(DATA_DIR, 'logs')
makeDir(wallet_log_dir_path)

password_regex = re.compile(r"^([a-zA-Z0-9!@#\$%\^&\*]{1,256})$")
wallet_file_regex = re.compile(r"wallet_(\d+)")

from webui import LogViewer


class Hub(QObject):
    current_block_height = 0

    def __init__(self, app):
        super(Hub, self).__init__()
def main():


    #=======================================================================================================================
    # #create a list of all the diagnosis that are one of the above values, otherwise its "missing"
    #  also figure out how many are nodule and normal as these are the images_all to train on
    #=======================================================================================================================
    all_diagnosis = {}

    with open(settings.json_data_file) as data_file:
        alldata = json.load(data_file)
        all_diagnosis = {k:get_items(v) for k,v in alldata.items() }

    total_images = len(all_diagnosis)
    print ("number initial records " + str(total_images))

    #=======================================================================================================================
    #calculate unique combos of above
    #=======================================================================================================================

    unique_combos = Counter((all_diagnosis.values()))
    unique_combos = sorted(unique_combos.items())
    # unique_combos.items().sort(key=lambda x: x[0])    #python 2.7
    unique_combos = dict(unique_combos)
    print ("number unique combos " + str(len(unique_combos)))

    #sort by number cases, show last 4
    sort = sorted(unique_combos.items(), key=itemgetter(1))
    print (sort[-40:])
    labels = sorted([i for i in sort[-4:] if i[0]!="missing"], key=lambda x: x[1])
    print (labels)

    #=======================================================================================================================
    #find all indexes where a particular disease occurs of form { 'diagnosis':[1111,2222,3345...images_all]}
    #=======================================================================================================================
    from collections import defaultdict
    index_list = defaultdict(list)
    for key, value in all_diagnosis.items():
        index_list[value].append(key)

    #=======================================================================================================================
    #Training and testing list of form
    # [normal indices]
    # [nodule indices]
    # =======================================================================================================================
    normal_images_list = []
    nodule_images_list = []

    for i in index_list:
        if "normal"in i and "nodule" in i:
            print(" WARNING-Throwing out " + str(len(index_list[i])) +
                 " values, has both nodule and normal in " + i )
        elif "normal" in i:
            normal_images_list += index_list[i]
            #print( " normal in :"+ i + "  numb:"+ str(len(index_list[i])))
        # elif i=="opacity":
        #         train_images_dict["abnormal"]=index_list[i][:354]
        #         test_images_dict["abnormal"] = index_list[i][354:374]
        #     elif i=="cardiomegaly":
        #         train_images_dict["abnormal"]=index_list[i][:251]
        #         test_images_dict["abnormal"] = index_list[i][251:266]
        #     elif i=="lung/hypoinflation":
        #         train_images_dict["abnormal"]=index_list[i][:229]
        #         test_images_dict["abnormal"] = index_list[i][229:249]
        #     elif i=="calcified granuloma":
        #         train_images_dict["abnormal"]+=index_list[i][:243]
        #         test_images_dict["abnormal"] += index_list[i][243:263]
        #     elif i=="thoracic vertebrae/degenerative":
        #         train_images_dict["abnormal"]+=index_list[i][:218]
        #         test_images_dict["abnormal"] += index_list[i][218:238]
        #     elif i=="lung/hyperdistention":
        #         train_images_dict["abnormal"]+=index_list[i][:190]
        #         test_images_dict["abnormal"] += index_list[i][190:210]
        #     elif i=="surgical instruments":
        #         train_images_dict["abnormal"]+=index_list[i][:71]
        #         test_images_dict["abnormal"] += index_list[i][71:86]
        #     elif i=="catheters, indwelling":
        #         train_images_dict["abnormal"]+=index_list[i][:100]
        #         test_images_dict["abnormal"] += index_list[i][100:112]
        #     elif i=="calcinosis":
        #         train_images_dict["abnormal"]+=index_list[i][:146]
        #         test_images_dict["abnormal"] += index_list[i][146:166]
        #elif i == "nodule" or i== "calcinosis nodule":
        elif "nodule" in i:
            nodule_images_list += index_list[i]
            #print(" nodule in :" + i + "  numb:" + str(len(index_list[i])))

    numb_nodule=len(nodule_images_list)
    numb_normal=len(normal_images_list)
    print("Number nodule: " + str(numb_nodule))
    print("Number normal: " + str(numb_normal))
    print ("if normal and nodule are not approximately equal then dataset is unbalanced")

    #the following assummes that we have at least twice as many normal as nodule so use number nodule total
    if settings.test_run == True:
        numb_nodule = settings.test_images  #use just 20 images
        numb_normal = numb_nodule * 2  # use all original normal, we will double nodule by flipping each nodule image
        nodule_images_list = nodule_images_list[:numb_nodule]
        normal_images_list = normal_images_list[:numb_normal]
    print ("Running test on " + str(numb_nodule) + " images")

    # =======================================================================================================================
    #create separate directories for above classes and clear them if needed
    #part of making .rec file for mxnet
    #../images_rec/nodule and normal
    # =======================================================================================================================
    settings.makeDir(settings.mxnet_images_train_dir)
    settings.makeDir(settings.mxnet_images_train_nodule_dir)
    settings.makeDir(settings.mxnet_images_train_normal_dir)

    settings.makeDir(settings.mxnet_images_val_dir)
    settings.makeDir(settings.mxnet_images_val_nodule_dir)
    settings.makeDir(settings.mxnet_images_val_normal_dir)

    settings.makeDir(settings.mxnet_images_test_dir)
    settings.makeDir(settings.mxnet_images_test_nodule_dir)
    settings.makeDir(settings.mxnet_images_test_normal_dir)

    # clear existing image data in
    if (settings.clear_subdirs == True):
        settings.clear_folder_files(settings.mxnet_images_train_nodule_dir)
        settings.clear_folder_files(settings.mxnet_images_train_normal_dir)
        settings.clear_folder_files(settings.mxnet_images_val_nodule_dir)
        settings.clear_folder_files(settings.mxnet_images_val_normal_dir)
        settings.clear_folder_files(settings.mxnet_images_test_nodule_dir)
        settings.clear_folder_files(settings.mxnet_images_test_normal_dir)

    # images_in_noduledir =[]

    #=================================================================
    #figure out how many images go in each split
    #=================================================================
    normal_train = int(settings.trainpercent * numb_normal)
    normal_test = int(settings.testpercent * numb_normal)
    normal_val = int(settings.valpercent * numb_normal)

    #if any leftover add back to train
    leftover = numb_normal - normal_train - normal_test - normal_val
    normal_train += leftover

    nodule_train = int(settings.trainpercent * numb_nodule)
    nodule_test  = int(settings.testpercent  * numb_nodule)
    nodule_val   = int(settings.valpercent   * numb_nodule)

    #if any leftover add back to train
    leftover = numb_nodule - nodule_train - nodule_test - nodule_val
    nodule_train += leftover

    #=================================================================
    #create train_list
    #=================================================================
    normal_train_list = normal_images_list[0:normal_train]
    normal_test_list = normal_images_list[normal_train:normal_train+ normal_test]
    normal_val_list = normal_images_list[normal_train+ normal_test :]

    nodule_train_list = nodule_images_list[0:nodule_train]
    nodule_test_list = nodule_images_list[nodule_train:nodule_test+nodule_train]
    nodule_val_list = nodule_images_list[nodule_test+nodule_train :]

    # =======================================================================================================================
    #copy in nodule and normal files to ../images_rec/nodule and normal
    #note that this is an unbalanced dataset (10 to 1) with 211 nodules and 2706 normals
    # =======================================================================================================================
    next_image_number = total_images + 1
    next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_test_nodule_dir, nodule_test_list,
              next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) )
    next_image_number =  copyfiles(settings.images_directory, settings.mxnet_images_val_nodule_dir, nodule_val_list,
              next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) )
    next_image_number =  copyfiles(settings.images_directory, settings.mxnet_images_train_nodule_dir, nodule_train_list,
              next_image_number, flip = True, crop=True, resize = True, newsize= (224,224) )

    next_image_number =  copyfiles(settings.images_directory, settings.mxnet_images_test_normal_dir, normal_test_list,
              next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) )
    next_image_number =  copyfiles(settings.images_directory, settings.mxnet_images_val_normal_dir, normal_val_list,
              next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) )
    next_image_number = copyfiles(settings.images_directory, settings.mxnet_images_train_normal_dir, normal_train_list,
              next_image_number, flip = False, crop=True, resize = True, newsize= (224,224) )

    # =================================================================
    # if desired figure out how to balance normal and nodule
    # or how many more to add to nodule by DUPLICATING existing files
    if (settings.force_balance == True):

        diff = numb_normal - numb_nodule
        if diff >0:
            diff_train = int(settings.trainpercent * diff)
            diff_test = int(settings.testpercent * diff)
            diff_val = int(settings.valpercent * diff)

            leftover = diff - diff_train - diff_test - diff_val
            diff_train += leftover

            next_image_number =  duplicatefiles(settings.mxnet_images_test_nodule_dir, nodule_test + diff_test, next_image_number)
            next_image_number =  duplicatefiles(settings.mxnet_images_val_nodule_dir, nodule_val + diff_val, next_image_number)
            next_image_number =  duplicatefiles(settings.mxnet_images_train_nodule_dir, nodule_train + diff_train, next_image_number)

    # =======================================================================================================================
    #convert normal and nodule to 0 and 1
    #IS THIS EVEN USED?
    # =======================================================================================================================
    label_dict = {"normal":np.array([1.0,0.0]),"nodule":np.array([0.0,1.0])}

    # new_image_dict={}
    # for i in all_diagnosis:
    #     if all_diagnosis[i] in label_index.keys():
    #         new_image_dict[i] = label_index[all_diagnosis[i]]
    # new_image_dict={}
    # for i in all_diagnosis:
    #     if all_diagnosis[i] =="normal":
    #         new_image_dict[i] = np.array([1,0])
    #     else:
    #         new_image_dict[i] = np.array([0,1])

    print("number of bogus images")
    for image in invalid_images:
        print (image)