コード例 #1
0
def test_luna_patches_3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    # pid = '1.3.6.1.4.1.14519.5.2.1.6279.6001.138080888843357047811238713686'
    # luna_data_paths = [pathfinder.LUNA_DATA_PATH + '/%s.mhd' % pid]
    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        # img = data_transforms.hu2normHU(img)
        id = os.path.basename(p).replace('.mhd', '')
        print id

        annotations = id2zyxd[id]
        print annotations
        for zyxd in annotations:
            img_out, mask = config().data_prep_function_train(img,
                                                              pixel_spacing=pixel_spacing,
                                                              p_transform=config().p_transform,
                                                              p_transform_augment=config().p_transform_augment,
                                                              patch_center=zyxd,
                                                              luna_annotations=annotations,
                                                              luna_origin=origin)
            try:
                plot_slice_3d_2(img_out, mask, 0, id)
                plot_slice_3d_2(img_out, mask, 1, id)
                plot_slice_3d_2(img_out, mask, 2, id)
            except:
                pass
        print '------------------------------------------'
コード例 #2
0
ファイル: data.py プロジェクト: 317070/kaggle-heart
def sample_augmentation_parameters(transformation):
    # TODO: bad thing to mix fixed and random params!!!
    if set(transformation.keys()) == {'patch_size', 'mm_patch_size'} or \
                    set(transformation.keys()) == {'patch_size', 'mm_patch_size', 'mask_roi'}:
        return None

    shift_x = config().rng.uniform(*transformation.get('translation_range_x', [0., 0.]))
    shift_y = config().rng.uniform(*transformation.get('translation_range_y', [0., 0.]))
    translation = (shift_x, shift_y)
    rotation = config().rng.uniform(*transformation.get('rotation_range', [0., 0.]))
    shear = config().rng.uniform(*transformation.get('shear_range', [0., 0.]))
    roi_scale = config().rng.uniform(*transformation.get('roi_scale_range', [1., 1.]))
    z = config().rng.uniform(*transformation.get('zoom_range', [1., 1.]))
    zoom = (z, z)

    if 'do_flip' in transformation:
        if type(transformation['do_flip']) == tuple:
            flip_x = config().rng.randint(2) > 0 if transformation['do_flip'][0] else False
            flip_y = config().rng.randint(2) > 0 if transformation['do_flip'][1] else False
        else:
            flip_x = config().rng.randint(2) > 0 if transformation['do_flip'] else False
            flip_y = False
    else:
        flip_x, flip_y = False, False

    sequence_shift = config().rng.randint(30) if transformation.get('sequence_shift', False) else 0

    return namedtuple('Params', ['translation', 'rotation', 'shear', 'zoom',
                                 'roi_scale',
                                 'flip_x', 'flip_y',
                                 'sequence_shift'])(translation, rotation, shear, zoom,
                                                    roi_scale,
                                                    flip_x, flip_y,
                                                    sequence_shift)
コード例 #3
0
def import_cash_crops(ccrops_list):

    sql = "INSERT INTO cash_crops(cash_crop) VALUES(%s)"
    conn = None

    try:

        # read database configuration
        params = configuration.config()

        # connect to the PostgreSQL database
        conn = psycopg2.connect(**params)

        # create a new cursor
        cur = conn.cursor()

        # execute the INSERT statement
        cur.executemany(sql, ccrops_list)

        # commit the changes to the database
        conn.commit()

        # close communication with the database
        cur.close()

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

    finally:
        if conn is not None:
            conn.close()
コード例 #4
0
def test_luna3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    # luna_data_paths = [
    #     pathfinder.LUNA_DATA_PATH + '/1.3.6.1.4.1.14519.5.2.1.6279.6001.287966244644280690737019247886.mhd']

    luna_data_paths = [
        '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.943403138251347598519939390311.mhd']
    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        id = os.path.basename(p).replace('.mhd', '')
        print id

        annotations = id2zyxd[id]

        img_out, mask, annotations_out = config().data_prep_function(img,
                                                                     pixel_spacing=pixel_spacing,
                                                                     luna_annotations=annotations,
                                                                     luna_origin=origin)

        mask[mask == 0.] = 0.1
        print annotations_out
        for zyxd in annotations_out:
            plot_slice_3d_2(img_out, mask, 0, id, idx=zyxd)
            plot_slice_3d_2(img_out, mask, 1, id, idx=zyxd)
            plot_slice_3d_2(img_out, mask, 2, id, idx=zyxd)
コード例 #5
0
ファイル: Robot_Main.py プロジェクト: twallace27603/piStuff
 def __init__(self):
     self.w = wheels.wheels()
     self.s = Sensors()
     self.config = configuration.config()
     self.w.setConfig(self.config)
     self.s.setConfig(self.config)
     self.s.calibration = True
コード例 #6
0
ファイル: addons.py プロジェクト: kynikos/outspline
def start_interface():
    interface = None

    for i in configuration.config('Interfaces').get_sections():
        if configuration.config('Interfaces')(i).get_bool('enabled'):
            # Exactly one interface must be enabled
            if interface:
                raise exceptions.MultipleInterfacesError()
            else:
                interface = sys.modules['outspline.interfaces.' + i]

    # Exactly one interface must be enabled
    if interface:
        interface.loop()
    else:
        raise exceptions.InterfaceNotFoundError()
コード例 #7
0
def test_luna3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    # luna_data_paths = [
    #     pathfinder.LUNA_DATA_PATH + '/1.3.6.1.4.1.14519.5.2.1.6279.6001.287966244644280690737019247886.mhd']

    luna_data_paths = [
        '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.943403138251347598519939390311.mhd']
    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        id = os.path.basename(p).replace('.mhd', '')
        print(id)

        annotations = id2zyxd[id]

        img_out, mask, annotations_out = config().data_prep_function(img,
                                                                     pixel_spacing=pixel_spacing,
                                                                     luna_annotations=annotations,
                                                                     luna_origin=origin)

        mask[mask == 0.] = 0.1
        print(annotations_out)
        for zyxd in annotations_out:
            plot_slice_3d_2(img_out, mask, 0, id, idx=zyxd)
            plot_slice_3d_2(img_out, mask, 1, id, idx=zyxd)
            plot_slice_3d_2(img_out, mask, 2, id, idx=zyxd)
def count_proportion():
    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(
        pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    n_white = 0
    n_black = 0

    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        img = data_transforms.hu2normHU(img)
        id = os.path.basename(p).replace('.mhd', '')
        print id

        annotations = id2zyxd[id]

        img_out, annotations_out = data_transforms.transform_scan3d(
            img,
            pixel_spacing=pixel_spacing,
            p_transform=config().p_transform,
            p_transform_augment=None,
            # config().p_transform_augment,
            luna_annotations=annotations,
            luna_origin=origin)

        mask = data_transforms.make_3d_mask_from_annotations(img_out.shape,
                                                             annotations_out,
                                                             shape='sphere')
        n_white += np.sum(mask)
        n_black += mask.shape[0] * mask.shape[1] * mask.shape[2] - np.sum(mask)

        print 'white', n_white
        print 'black', n_black
コード例 #9
0
ファイル: databaseAPI.py プロジェクト: rslissa/SerraSmart
    def connect(self):
        """ Connect to the PostgreSQL database server """
        try:
            # read connection parameters
            params = config()

            # connect to the PostgreSQL server
            print('Connecting to the PostgreSQL database...')
            self.conn = psycopg2.connect(**params)

            # create a cursor
            cur = self.conn.cursor()

            # execute a statement
            print('PostgreSQL database version:')
            cur.execute('SELECT version()')

            # display the PostgreSQL database server version
            db_version = cur.fetchone()
            print(db_version)

            # close the communication with the PostgreSQL
            cur.close()
        except (Exception, psycopg2.DatabaseError) as error:
            print(error)
コード例 #10
0
def get_s3_resource():
    """
    The calls to AWS STS AssumeRole must be signed with the access key ID and secret access key of an existing IAM user.
    The credentials can be in environment variables or in a configuration file and will be discovered automatically by the boto3.client() function.
    For more information, see the Python SDK documentation: http://boto3.readthedocs.io/en/latest/reference/services/sts.html#client

    Output: S3 resource object
    """

    if not hasattr(get_s3_resource, 's3_resource'):

        get_s3_resource.s3_resource = boto3.resource('s3')

        configuration = config()
        if configuration['other'].getboolean('cross_account_access'):
            sts_client = boto3.client('sts')
            response = sts_client.assume_role(
                RoleArn=configuration['other']['cross_account_access_role'],
                RoleSessionName="AssumeRoleSession")
            get_s3_resource.s3_resource = boto3.resource(
                's3',
                aws_access_key_id=response['Credentials']['AccessKeyId'],
                aws_secret_access_key=response['Credentials']
                ['SecretAccessKey'],
                aws_session_token=response['Credentials']['SessionToken'],
            )

    return get_s3_resource.s3_resource
コード例 #11
0
def start_interface():
    interface = None

    for i in configuration.config('Interfaces').get_sections():
        if configuration.config('Interfaces')(i).get_bool('enabled'):
            # Exactly one interface must be enabled
            if interface:
                raise exceptions.MultipleInterfacesError()
            else:
                interface = sys.modules['outspline.interfaces.' + i]

    # Exactly one interface must be enabled
    if interface:
        interface.loop()
    else:
        raise exceptions.InterfaceNotFoundError()
コード例 #12
0
def count_proportion():
    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    n_white = 0
    n_black = 0

    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        img = data_transforms.hu2normHU(img)
        id = os.path.basename(p).replace('.mhd', '')
        print id

        annotations = id2zyxd[id]

        img_out, annotations_out = data_transforms.transform_scan3d(img,
                                                                    pixel_spacing=pixel_spacing,
                                                                    p_transform=config().p_transform,
                                                                    p_transform_augment=None,
                                                                    # config().p_transform_augment,
                                                                    luna_annotations=annotations,
                                                                    luna_origin=origin)

        mask = data_transforms.make_3d_mask_from_annotations(img_out.shape, annotations_out, shape='sphere')
        n_white += np.sum(mask)
        n_black += mask.shape[0] * mask.shape[1] * mask.shape[2] - np.sum(mask)

        print 'white', n_white
        print 'black', n_black
コード例 #13
0
def connect_to_postgresql():

    # read database configuration
    params = configuration.config()

    # connect to the PostgreSQL database
    conn = psycopg2.connect(**params)

    return conn
コード例 #14
0
def preprocess_with_augmentation(patient_data,
                                 result,
                                 index,
                                 augment=True,
                                 metadata=None,
                                 testaug=False):
    """
    Load the resulting data, augment it if needed, and put it in result at the correct index
    :param patient_data:
    :param result:
    :param index:
    :return:
    """
    if augment:
        augmentation_parameters = sample_augmentation_parameters()
    else:
        augmentation_parameters = None

    for tag, data in patient_data.iteritems():
        metadata_tag = metadata[tag]
        desired_shape = result[tag][index].shape
        # try to fit data into the desired shape
        if tag.startswith("sliced:data:singleslice"):
            cleaning_processes = getattr(config(), 'cleaning_processes', [])
            data = clean_images([patient_data[tag]],
                                metadata=metadata_tag,
                                cleaning_processes=cleaning_processes)
            patient_4d_tensor, zoom_ratios = resize_and_augment(
                data,
                output_shape=desired_shape[-2:],
                augment=augmentation_parameters)[0]
            if "area_per_pixel:sax" in result:
                result["area_per_pixel:sax"][index] = zoom_ratios[0] * np.prod(
                    metadata_tag["PixelSpacing"])

            put_in_the_middle(result[tag][index], patient_4d_tensor)
        elif tag.startswith("sliced:data"):
            # put time dimension first, then axis dimension
            data = clean_images(patient_data[tag], metadata=metadata_tag)
            patient_4d_tensor, zoom_ratios = resize_and_augment(
                data,
                output_shape=desired_shape[-2:],
                augment=augmentation_parameters)
            if "area_per_pixel:sax" in result:
                result["area_per_pixel:sax"][index] = zoom_ratios[0] * np.prod(
                    metadata_tag[0]["PixelSpacing"])

            if "noswitch" not in tag:
                patient_4d_tensor = np.swapaxes(patient_4d_tensor, 1, 0)

            put_in_the_middle(result[tag][index], patient_4d_tensor)
        if tag.startswith("sliced:data:shape"):
            result[tag][index] = patient_data[tag]
        if tag.startswith("sliced:meta:"):
            # TODO: this probably doesn't work very well yet
            result[tag][index] = patient_data[tag]
    return
コード例 #15
0
def download_s3_folder(lob_data_bucket, day_folder, keys):
    configuration = config()
    raw_data_folder = configuration['folders']['raw_lob_data']

    with futures.ThreadPoolExecutor(max_workers=100) as executor:
        future_to_key = {
            executor.submit(download_S3_object, lob_data_bucket, key,
                            f'{raw_data_folder}/tmp'): key
            for key in keys
        }
        for future in futures.as_completed(future_to_key):
            future_to_key[future]
コード例 #16
0
def sample_augmentation_parameters(transformation):
    # TODO: bad thing to mix fixed and random params!!!
    if set(transformation.keys()) == {'patch_size', 'mm_patch_size'} or \
                    set(transformation.keys()) == {'patch_size', 'mm_patch_size', 'mask_roi'}:
        return None

    shift_x = config().rng.uniform(
        *transformation.get('translation_range_x', [0., 0.]))
    shift_y = config().rng.uniform(
        *transformation.get('translation_range_y', [0., 0.]))
    translation = (shift_x, shift_y)
    rotation = config().rng.uniform(
        *transformation.get('rotation_range', [0., 0.]))
    shear = config().rng.uniform(*transformation.get('shear_range', [0., 0.]))
    roi_scale = config().rng.uniform(
        *transformation.get('roi_scale_range', [1., 1.]))
    z = config().rng.uniform(*transformation.get('zoom_range', [1., 1.]))
    zoom = (z, z)

    if 'do_flip' in transformation:
        if type(transformation['do_flip']) == tuple:
            flip_x = config().rng.randint(
                2) > 0 if transformation['do_flip'][0] else False
            flip_y = config().rng.randint(
                2) > 0 if transformation['do_flip'][1] else False
        else:
            flip_x = config().rng.randint(
                2) > 0 if transformation['do_flip'] else False
            flip_y = False
    else:
        flip_x, flip_y = False, False

    sequence_shift = config().rng.randint(30) if transformation.get(
        'sequence_shift', False) else 0

    return namedtuple('Params', [
        'translation', 'rotation', 'shear', 'zoom', 'roi_scale', 'flip_x',
        'flip_y', 'sequence_shift'
    ])(translation, rotation, shear, zoom, roi_scale, flip_x, flip_y,
       sequence_shift)
コード例 #17
0
ファイル: GA.py プロジェクト: seasun525/PyPyJITTuner
def evaluate(individual):
    Config = config()
    value_list = Config.decode_list(individual)
    y1 = value_list[0]**2 + value_list[1]**2 + value_list[2]**2 + 10
    y2 = (value_list[3] - 1)**2 + (value_list[4] - 1)**2 + 15
    y3 = (value_list[5] - 1)**2 + 20
    t_y1 = ()
    t_y2 = ()
    t_y3 = ()
    for i in range(20):
        t_y1 = t_y1 + (y1 + random.random() * 5, )
        t_y2 = t_y2 + (y2 + random.random() * 5, )
        t_y3 = t_y3 + (y3 + random.random() * 5, )
    return t_y1, t_y2, t_y3  #, y3, y3, y3, y3, y3, y3, y3, y3, y3, y3, y3,y3,y3
コード例 #18
0
def test_luna_patches_3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = utils_lung.get_patient_data_paths(
        pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]

    # pid = '1.3.6.1.4.1.14519.5.2.1.6279.6001.138080888843357047811238713686'
    # luna_data_paths = [pathfinder.LUNA_DATA_PATH + '/%s.mhd' % pid]
    for k, p in enumerate(luna_data_paths):
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        # img = data_transforms.hu2normHU(img)
        id = os.path.basename(p).replace('.mhd', '')
        print(id)

        annotations = id2zyxd[id]
        print(annotations)
        for zyxd in annotations:
            img_out, mask = config().data_prep_function_train(
                img,
                pixel_spacing=pixel_spacing,
                p_transform=config().p_transform,
                p_transform_augment=config().p_transform_augment,
                patch_center=zyxd,
                luna_annotations=annotations,
                luna_origin=origin)
            try:
                plot_slice_3d_2(img_out, mask, 0, id)
                plot_slice_3d_2(img_out, mask, 1, id)
                plot_slice_3d_2(img_out, mask, 2, id)
            except:
                pass
        print('------------------------------------------')
コード例 #19
0
def sample_test_augmentation_parameters():
    global quasi_random_generator

    augm = config().augmentation_params_test if hasattr(
        config(), 'augmentation_params_test') else config().augmentation_params
    if "translation" in augm:
        newdict = dict()
        if "translation" in augm:
            newdict["translate_x"] = augm["translation"]
            newdict["translate_y"] = augm["translation"]
        if "shear" in augm:
            newdict["shear"] = augm["shear"]
        if "flip_vert" in augm:
            newdict["flip_vert"] = augm["flip_vert"]
        if "roll_time" in augm:
            newdict["roll_time"] = augm["roll_time"]
        if "flip_time" in augm:
            newdict["flip_time"] = augm["flip_time"]
        augmentation_params = dict(DEFAULT_AUGMENTATION_PARAMETERS, **newdict)
    else:
        augmentation_params = dict(DEFAULT_AUGMENTATION_PARAMETERS, **augm)

    if quasi_random_generator is None:
        quasi_random_generator = quasi_random.scrambled_halton_sequence_generator(
            dimension=len(augmentation_params), permutation='Braaten-Weller')
    res = dict()
    try:
        sample = quasi_random_generator.next()
    except ValueError:
        quasi_random_generator = quasi_random.scrambled_halton_sequence_generator(
            dimension=len(augmentation_params), permutation='Braaten-Weller')
        sample = quasi_random_generator.next()

    for rand, (key, (a, b)) in izip(sample, augmentation_params.iteritems()):
        #res[key] = config().rng.uniform(a,b)
        res[key] = a + rand * (b - a)
    return res
コード例 #20
0
ファイル: preprocess.py プロジェクト: fdoperezi/kaggle-heart
def sample_test_augmentation_parameters():
    global quasi_random_generator

    augm = config().augmentation_params_test if hasattr(config(), 'augmentation_params_test') else config().augmentation_params
    if "translation" in augm:
        newdict = dict()
        if "translation" in augm:
            newdict["translate_x"] = augm["translation"]
            newdict["translate_y"] = augm["translation"]
        if "shear" in augm:
            newdict["shear"] = augm["shear"]
        if "flip_vert" in augm:
            newdict["flip_vert"] = augm["flip_vert"]
        if "roll_time" in augm:
            newdict["roll_time"] = augm["roll_time"]
        if "flip_time" in augm:
            newdict["flip_time"] = augm["flip_time"]
        augmentation_params = dict(DEFAULT_AUGMENTATION_PARAMETERS, **newdict)
    else:
        augmentation_params = dict(DEFAULT_AUGMENTATION_PARAMETERS, **augm)

    if quasi_random_generator is None:
        quasi_random_generator = quasi_random.scrambled_halton_sequence_generator(dimension=len(augmentation_params),
                                                                                  permutation='Braaten-Weller')
    res = dict()
    try:
        sample = quasi_random_generator.next()
    except ValueError:
        quasi_random_generator = quasi_random.scrambled_halton_sequence_generator(dimension=len(augmentation_params),
                                                                                  permutation='Braaten-Weller')
        sample = quasi_random_generator.next()

    for rand, (key, (a, b)) in izip(sample, augmentation_params.iteritems()):
        #res[key] = config().rng.uniform(a,b)
        res[key] = a + rand*(b-a)
    return res
コード例 #21
0
ファイル: main.py プロジェクト: julianVelandia/Scraper
def _news_scraper(news_site_uid):
    host = config()['news_sites'][news_site_uid]['url']

    #logging.info('Beginning scraper for {}'.format(host))
    homepage = news.HomePage(news_site_uid, host)

    articles = []
    for link in homepage.article_links:
        article = _fetch_article(news_site_uid, host, link)

        if article:
            #logger.info('Article fetched!!')
            articles.append(article)

    _save_articles(news_site_uid, articles)
コード例 #22
0
ファイル: preprocess.py プロジェクト: fdoperezi/kaggle-heart
def preprocess_with_augmentation(patient_data, result, index, augment=True, metadata=None, testaug=False):
    """
    Load the resulting data, augment it if needed, and put it in result at the correct index
    :param patient_data:
    :param result:
    :param index:
    :return:
    """
    if augment:
        augmentation_parameters = sample_augmentation_parameters()
    else:
        augmentation_parameters = None

    for tag, data in patient_data.iteritems():
        metadata_tag = metadata[tag]
        desired_shape = result[tag][index].shape
        # try to fit data into the desired shape
        if tag.startswith("sliced:data:singleslice"):
            cleaning_processes = getattr(config(), 'cleaning_processes', [])
            data = clean_images(
                [patient_data[tag]], metadata=metadata_tag,
                cleaning_processes=cleaning_processes)
            patient_4d_tensor, zoom_ratios = resize_and_augment(data, output_shape=desired_shape[-2:], augment=augmentation_parameters)[0]
            if "area_per_pixel:sax" in result:
                result["area_per_pixel:sax"][index] = zoom_ratios[0] * np.prod(metadata_tag["PixelSpacing"])

            put_in_the_middle(result[tag][index], patient_4d_tensor)
        elif tag.startswith("sliced:data"):
            # put time dimension first, then axis dimension
            data = clean_images(patient_data[tag], metadata=metadata_tag)
            patient_4d_tensor, zoom_ratios = resize_and_augment(data, output_shape=desired_shape[-2:], augment=augmentation_parameters)
            if "area_per_pixel:sax" in result:
                result["area_per_pixel:sax"][index] = zoom_ratios[0] * np.prod(metadata_tag[0]["PixelSpacing"])

            if "noswitch" not in tag:
                patient_4d_tensor = np.swapaxes(patient_4d_tensor,1,0)

            put_in_the_middle(result[tag][index], patient_4d_tensor)
        if tag.startswith("sliced:data:shape"):
            result[tag][index] = patient_data[tag]
        if tag.startswith("sliced:meta:"):
            # TODO: this probably doesn't work very well yet
            result[tag][index] = patient_data[tag]
    return
コード例 #23
0
ファイル: main.py プロジェクト: julianVelandia/Scraper
def _fetch_article(news_site_uid, host, link):
    #logger.info('Start fetching article at {}'.format(link))

    article = None
    try:
        article = news.ArticlePage(news_site_uid, _build_link(host, link))
    except (HTTPError, ConnectionError, MaxRetryError) as e:
        #logger.warning('Error while fechting the article', exc_info=False)


    if article and not article.body:
        #logger.warning('Invalid article. There is no body')
        return None

    return article


def _build_link(host, link):
    if is_well_formed_link.match(link):
        return link
    elif is_root_path.match(link):
        return '{}{}'.format(host, link)
    else:
        return '{host}/{uri}'.format(host=host, uri=link)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    news_site_choices = list(config()['news_sites'].keys())
    parser.add_argument('news_site',
                        help='The news site that you want to scrape',
                        type=str,
                        choices=news_site_choices)

    args = parser.parse_args()
    _news_scraper(args.news_site)
コード例 #24
0
ファイル: predict.py プロジェクト: 317070/kaggle-heart
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path
    print "  %s" % submission_path

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)
    num_params = lasagne.layers.count_params(top_layer)
    print "  number of parameters: %d" % num_params
    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print "    %s %s %s" % (name,  num_param, layer.output_shape)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    data_loader.filter_patient_folders()

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "classification_correction_function"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "systole": np.zeros((0,600)),
                    "diastole": np.zeros((0,600))
                    } for i in xrange(NUM_PATIENTS)]


    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        classification_correction = test_data["output"]["classification_correction_function"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]

                    kaggle_systole = kaggle_systoles[idx:idx+1,:]
                    kaggle_diastole = kaggle_diastoles[idx:idx+1,:]
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole)
                    kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole)
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    patient_data["systole"] =  np.concatenate((patient_data["systole"], kaggle_systole ),axis=0)
                    patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    already_printed = False
    for prediction in predictions:
        if prediction["systole"].size>0 and prediction["diastole"].size>0:
            average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
            prediction["systole_average"] = average_method(prediction["systole"])
            prediction["diastole_average"] = average_method(prediction["diastole"])
            try:
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])
            except:
                if not already_printed:
                    print "WARNING: These distributions are not distributions"
                    already_printed = True
                prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"])
                prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"])
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])


    print "Calculating training and validation set scores for reference"

    validation_dict = {}
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        errors = []
        for patient in patient_ids:
            prediction = predictions[patient-1]
            if "systole_average" in prediction:
                assert patient == regular_labels[patient-1, 0]
                error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1])
                errors.append(error)
                error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2])
                errors.append(error)
        if len(errors)>0:
            errors = np.array(errors)
            estimated_CRSP = np.mean(errors)
            print "  %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)
            validation_dict[set_name] = estimated_CRSP
        else:
            print "  %s kaggle loss: not calculated" % (string.rjust(set_name, 12))


    print "dumping prediction file to %s" % prediction_path
    with open(prediction_path, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'prediction_path': prediction_path,
                        'submission_path': submission_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions': predictions,
                        'validation_errors': validation_dict,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"

    print "dumping submission file to %s" % submission_path
    with open(submission_path, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)])
        for prediction in predictions:
            # the submission only has patients 501 to 700
            if prediction["patient"] in data_loader.test_patients_indices:
                if "diastole_average" not in prediction or "systole_average" not in prediction:
                    raise Exception("Not all test-set patients were predicted")
                csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()])
                csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()])
    print "submission file dumped"

    return
コード例 #25
0
# predictions path
predictions_dir = utils.get_dir_path('model-predictions',
                                     pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/' + expid

if valid_tta_feat or test_tta_feat or all_tta_feat or train_tta_feat:
    outputs_path += '/features'

utils.auto_make_dir(outputs_path)

if dump:
    prediction_dump = os.path.join(outputs_path,
                                   expid + "_" + args.eval + "_predictions.p")

print('Build model')
model = config().build_model()
model.l_out.load_state_dict(metadata['param_values'])
model.l_out.cuda()
model.l_out.eval()
criterion = config().build_objective()

if test:
    data_iterator = config().test_data_iterator
elif feat:
    data_iterator = config().feat_data_iterator


def get_preds_targs(data_iterator):
    print('Data')
    print('n', sys.argv[2], ': %d' % data_iterator.nsamples)
コード例 #26
0
def get_trade_data(pair, date_start, date_end,
                   frequency=timedelta(seconds=10)):
    '''
    Function that returns a dataframe of resampled trade data and ready
    to be concatenated to a quotes dataframe with depth (Level = -1)

    Arguments:
    pair -- string, curency pair to return (e.g.'USDT_BTC')
    date_start -- string, timeseries start
    date_end -- string, timeseries end
    frequency -- timedelta, the minimum time granularity (e.g. timedelta(seconds=10))
    '''

    print(f'Checking for cached trade data from {date_start} to {date_end}')

    configuration = config()
    raw_data_folder = configuration['folders']['raw_trade_data']
    resampled_data_folder = configuration['folders']['resampled_data']

    date_start = datetime.strptime(date_start, '%Y-%m-%d')
    date_end = datetime.strptime(date_end, '%Y-%m-%d')
    freq = f'{int(frequency.total_seconds())}s'
    os.makedirs(f'{resampled_data_folder}/{pair}/trades/{freq}', exist_ok=True)

    data = []

    # Loop through day folders
    date_to_process = date_start
    while date_to_process <= date_end:
        resampled_file_path = f'{resampled_data_folder}/{pair}/trades/{freq}/{datetime.strftime(date_to_process, "%Y-%m-%d")}.csv.gz'
        if os.path.isfile(resampled_file_path):
            print(f'Found {resampled_file_path}')
        else:
            print(f'Generating {resampled_file_path}')
            raw_file_name = f'{pair}-{datetime.strftime(date_to_process, "%Y%m%d")}.csv.gz'
            raw_file_path = f'{raw_data_folder}/{pair}/{raw_file_name}'

            if not os.path.isfile(raw_file_path):
                s3_resource = get_s3_resource()
                trade_data_bucket = s3_resource.Bucket(
                    configuration['buckets']['trade_data'])
                trade_data_bucket.download_file(f'{pair}/{raw_file_name}',
                                                f'{raw_file_path}')
                print(f'Downloaded {raw_file_name} from S3')

            day_data = pd.read_csv(raw_file_path, parse_dates=['date'])

            df_trades_grp = day_data.groupby(
                [pd.Grouper(key='date', freq=freq), 'type']).agg({
                    'amount': 'sum',
                    'rate': 'mean'
                }).reset_index()
            df_trades_piv = df_trades_grp.pivot(values=['amount', 'rate'],
                                                columns='type',
                                                index='date').reset_index()

            df_trades_piv.columns = list(map(
                "_".join, df_trades_piv.columns))  # "flatten" column names
            df_trades_piv.rename(columns={
                'date_': 'Datetime',
                'amount_buy': 'Ask_Size',
                'amount_sell': 'Bid_Size',
                'rate_buy': 'Ask_Price',
                'rate_sell': 'Bid_Price'
            },
                                 inplace=True)

            # fill gaps with no trades - MAYBE we need something similar for quotes as a data integrity check
            start_dt = datetime(date_to_process.year, date_to_process.month,
                                date_to_process.day, 0, 0, 0)
            end_dt = datetime(date_to_process.year, date_to_process.month,
                              date_to_process.day, 23, 59,
                              59)  # to ensure each timestep is covered
            date_range_reindex = pd.DataFrame(pd.date_range(start_dt,
                                                            end_dt,
                                                            freq=freq),
                                              columns=['Datetime'])
            df_trades_piv = pd.merge(df_trades_piv,
                                     date_range_reindex,
                                     right_on='Datetime',
                                     left_on='Datetime',
                                     how='right').sort_values('Datetime')

            # impute NAs - zero for size and last px for price
            df_trades_piv.loc[:, [
                'Ask_Size', 'Bid_Size'
            ]] = df_trades_piv.loc[:, ['Ask_Size', 'Bid_Size']].fillna(0)
            df_trades_piv.loc[:, [
                'Ask_Price', 'Bid_Price'
            ]] = df_trades_piv.loc[:, ['Ask_Price', 'Bid_Price']].fillna(
                method='ffill')

            # impute NAs for the first rows of the dataframes
            try:
                # check if previous day exists and assign last value of previous day df
                prev_day = date_to_process + timedelta(days=-1)
                prev_day_data = pd.read_csv(
                    f'{resampled_data_folder}/{pair}/trades/{freq}/{datetime.strftime(prev_day, "%Y-%m-%d")}.csv.gz'
                )
                prev_file_ask_px = prev_day_data.iloc[-1]['Ask_Price']
                prev_file_bid_px = prev_day_data.iloc[-1]['Bid_Price']

            except Exception as e:
                # if previous day not in the database, use first avaialble future value - not ideal
                print(e)
                print(
                    f'Non-continuous data being processed. imputing avg values for bid or ask prices at the beginning of {date_to_process}'
                )
                # NOT ideal cause we are leaking information
                prev_file_ask_px = df_trades_piv['Ask_Price'].dropna().iloc[0]
                prev_file_bid_px = df_trades_piv['Bid_Price'].dropna().iloc[0]

            df_trades_piv.loc[:,
                              'Bid_Price'] = df_trades_piv.loc[:,
                                                               'Bid_Price'].fillna(
                                                                   prev_file_bid_px
                                                               )
            df_trades_piv.loc[:,
                              'Ask_Price'] = df_trades_piv.loc[:,
                                                               'Ask_Price'].fillna(
                                                                   prev_file_ask_px
                                                               )

            # level -1 to keep it separate from order book depth
            df_trades_piv['Level'] = -1
            df_trades_piv.to_csv(resampled_file_path, compression='gzip')

        date_to_process += timedelta(
            days=1)  # the most nested folder is a day of the month
        data.append(resampled_file_path)

    return dd.read_csv(data, compression='gzip')
コード例 #27
0
ファイル: train.py プロジェクト: JamesZhuh/kaggle-heart
                                     )
        train_data["intermediates"] = iter_train(0)
        pickle.dump(train_data, open(metadata_path + "-dump", "wb"))

    return


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    required = parser.add_argument_group('required arguments')
    required.add_argument('-c', '--config',
                          help='configuration to run',
                          required=True)
    args = parser.parse_args()
    set_configuration(args.config)

    expid = utils.generate_expid(args.config)

    log_file = LOGS_PATH + "%s.log" % expid
    with print_to_file(log_file):

        print "Running configuration:", config().__name__
        print "Current git version:", utils.get_git_revision_hash()

        train_model(expid)
        print "log saved to '%s'" % log_file
        predict_model(expid)
        print "log saved to '%s'" % log_file


コード例 #28
0
ファイル: predict.py プロジェクト: 317070/kaggle-heart
assert config_name == metadata['configuration']
if 'subconfiguration' in metadata:
    set_subconfiguration(metadata['subconfiguration'])
set_configuration(config_name)

# predictions paths
prediction_dir = utils.get_dir_path('predictions', pathfinder.METADATA_PATH)
prediction_path = prediction_dir + "/%s-%s-%s-%s.pkl" % (metadata['experiment_id'], set, n_tta_iterations, mean)

# submissions paths
submission_dir = utils.get_dir_path('submissions', pathfinder.METADATA_PATH)
submission_path = submission_dir + "/%s-%s-%s-%s.csv" % (metadata['experiment_id'], set, n_tta_iterations, mean)

print "Build model"
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_top)
all_params = nn.layers.get_all_params(model.l_top)
num_params = nn.layers.count_params(model.l_top)
print '  number of parameters: %d' % num_params
nn.layers.set_all_param_values(model.l_top, metadata['param_values'])

xs_shared = [nn.utils.shared_empty(dim=len(l.shape)) for l in model.l_ins]
givens_in = {}
for l_in, x in izip(model.l_ins, xs_shared):
    givens_in[l_in.input_var] = x

iter_test_det = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.l_outs],
                                givens=givens_in, on_unused_input='warn')

if set == 'train':
コード例 #29
0
ファイル: train.py プロジェクト: RickBoss/Data-Science-Bowl-1
expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# metadata
metadata_dir = utils.get_dir_path('train', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_top)
all_params = nn.layers.get_all_params(model.l_top)
num_params = nn.layers.count_params(model.l_top)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers[:-1]:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

train_loss = config().build_objective(model)
コード例 #30
0
ファイル: train.py プロジェクト: 317070/kaggle-heart
expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# metadata
metadata_dir = utils.get_dir_path('train', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_top)
all_params = nn.layers.get_all_params(model.l_top)
num_params = nn.layers.count_params(model.l_top)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers[:-1]:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

train_loss = config().build_objective(model)
コード例 #31
0
expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

train_loss = config().build_objective(model, deterministic=False)
train_loss2 = config().build_objective2(model, deterministic=False)
コード例 #32
0
config_name = sys.argv[1]
set_configuration('configs_luna_size_scan', config_name)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

# builds model and sets its parameters
model = config().build_model()

x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
givens_valid = {}
givens_valid[model.l_in.input_var] = x_shared

get_predictions_patch = theano.function([],
                                        nn.layers.get_output(model.l_out, deterministic=True),
                                        givens=givens_valid,
                                        on_unused_input='ignore')

data_iterator = config().data_iterator

#existing_preds = [f.rsplit('.') for f in os.listdir(outputs_path)]
#print existing_preds
コード例 #33
0
ファイル: train.py プロジェクト: JamesZhuh/kaggle-heart
def train_model(expid):
    metadata_path = MODEL_PATH + "%s.pkl" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    data_loader.filter_patient_folders()

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)

    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)
    if "cutoff_gradients" in interface_layers:
        submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)]
        all_params = [p for p in all_params if p not in submodel_params]

    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers["pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1]
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings = layers_dict.values()
            )
            lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values'])

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print string.ljust("#data:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(int(num_param).__str__(), 10)
        num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10)
        print "    %s %s %s %s" % (name,  num_param, num_size, layer.output_shape)
    print "  number of parameters: %d" % num_params

    obj = config().build_objective(interface_layers)

    train_loss_theano = obj.get_loss()
    kaggle_loss_theano = obj.get_kaggle_loss()
    segmentation_loss_theano = obj.get_segmentation_loss()

    validation_other_losses = collections.OrderedDict()
    validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses)
    validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True)
    validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True)


    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }

    # contains target_vars of the objective! Not the output layers desired values!
    # There can be more output layers than are strictly required for the objective
    # e.g. for debugging

    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems()
    }

    learning_rate_schedule = config().learning_rate_schedule

    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))
    idx = T.lscalar('idx')

    givens = dict()
    for key in obj.target_vars.keys():
        if key=="segmentation":
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size]
        else:
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size]

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    updates = config().build_updates(train_loss_theano, all_params, learning_rate)

    #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)]))
    #theano_printer.print_me_this("Grad norm", grad_norm)

    iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore", updates=updates,
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )
    iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(),
                                    givens=givens, on_unused_input="ignore")

    num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk))
    print "Will train for %d chunks" % num_chunks_train
    if config().restart_from_save and os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
        start_chunk_idx = resume_metadata['chunks_since_start'] + 1
        chunks_train_idcs = range(start_chunk_idx, num_chunks_train)

        # set lr to the correct value
        current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx))
        print "  setting learning rate to %.7f" % current_lr
        learning_rate.set_value(current_lr)
        losses_train = resume_metadata['losses_train']
        losses_eval_valid = resume_metadata['losses_eval_valid']
        losses_eval_train = resume_metadata['losses_eval_train']
        losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle']
        losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle']
    else:
        chunks_train_idcs = range(num_chunks_train)
        losses_train = []
        losses_eval_valid = []
        losses_eval_train = []
        losses_eval_valid_kaggle = []
        losses_eval_train_kaggle = []


    create_train_gen = partial(config().create_train_gen,
                               required_input_keys = xs_shared.keys(),
                               required_output_keys = ys_shared.keys()# + ["patients"],
                               )


    create_eval_valid_gen = partial(config().create_eval_valid_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()# + ["patients"]
                                   )

    create_eval_train_gen = partial(config().create_eval_train_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()
                                   )

    print "Train model"
    start_time = time.time()
    prev_time = start_time

    num_batches_chunk = config().batches_per_chunk


    for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())):
        print "Chunk %d/%d" % (e + 1, num_chunks_train)
        epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS)
        print "  Epoch %.1f" % epoch

        for key, rate in learning_rate_schedule.iteritems():
            if epoch >= key:
                lr = np.float32(rate)
                learning_rate.set_value(lr)
        print "  learning rate %.7f" % lr

        if config().dump_network_loaded_data:
            pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb"))

        for key in xs_shared:
            xs_shared[key].set_value(train_data["input"][key])

        for key in ys_shared:
            ys_shared[key].set_value(train_data["output"][key])

        #print "train:", sorted(train_data["output"]["patients"])
        losses = []
        kaggle_losses = []
        segmentation_losses = []
        for b in xrange(num_batches_chunk):
            iter_result = iter_train(b)

            loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3])
            utils.detect_nans(loss, xs_shared, ys_shared, all_params)
 
            losses.append(loss)
            kaggle_losses.append(kaggle_loss)
            segmentation_losses.append(segmentation_loss)

        mean_train_loss = np.mean(losses)
        print "  mean training loss:\t\t%.6f" % mean_train_loss
        losses_train.append(mean_train_loss)

        print "  mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses)
        print "  mean segment loss:\t\t%.6f" % np.mean(segmentation_losses)

        if ((e + 1) % config().validate_every) == 0:
            print
            print "Validating"
            if config().validate_train_set:
                subsets = ["validation", "train"]
                gens = [create_eval_valid_gen, create_eval_train_gen]
                losses_eval = [losses_eval_valid, losses_eval_train]
                losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle]
            else:
                subsets = ["validation"]
                gens = [create_eval_valid_gen]
                losses_eval = [losses_eval_valid]
                losses_kaggle = [losses_eval_valid_kaggle]

            for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle):

                vld_losses = []
                vld_kaggle_losses = []
                vld_segmentation_losses = []
                vld_other_losses = {k:[] for k,_ in validation_other_losses.items()}
                print "  %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset))

                for validation_data in buffering.buffered_gen_threaded(create_gen()):
                    num_batches_chunk_eval = config().batches_per_chunk

                    if config().dump_network_loaded_data:
                        pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb"))

                    for key in xs_shared:
                        xs_shared[key].set_value(validation_data["input"][key])

                    for key in ys_shared:
                        ys_shared[key].set_value(validation_data["output"][key])

                    #print "validate:", validation_data["output"]["patients"]

                    for b in xrange(num_batches_chunk_eval):
                        losses = tuple(iter_validate(b)[:3+len(validation_other_losses)])
                        loss, kaggle_loss, segmentation_loss = losses[:3]
                        other_losses = losses[3:]
                        vld_losses.extend(loss)
                        vld_kaggle_losses.extend(kaggle_loss)
                        vld_segmentation_losses.extend(segmentation_loss)
                        for k, other_loss in zip(validation_other_losses, other_losses):
                            vld_other_losses[k].extend(other_loss)

                vld_losses = np.array(vld_losses)
                vld_kaggle_losses = np.array(vld_kaggle_losses)
                vld_segmentation_losses = np.array(vld_segmentation_losses)
                for k in validation_other_losses:
                    vld_other_losses[k] = np.array(vld_other_losses[k])

                # now select only the relevant section to average
                sunny_len = get_lenght_of_set(name="sunny", set=subset)
                regular_len = get_lenght_of_set(name="regular", set=subset)
                num_valid_samples = get_number_of_validation_samples(set=subset)

                #print losses[:num_valid_samples]
                #print kaggle_losses[:regular_len]
                #print segmentation_losses[:sunny_len]
                loss_to_save = obj.compute_average(vld_losses[:num_valid_samples])
                print "  mean training loss:\t\t%.6f" % loss_to_save
                print "  mean kaggle loss:\t\t%.6f"   % np.mean(vld_kaggle_losses[:regular_len])
                print "  mean segment loss:\t\t%.6f"  % np.mean(vld_segmentation_losses[:sunny_len])
                # print "    acc:\t%.2f%%" % (acc * 100)
                for k, v in vld_other_losses.items():
                    print "  mean %s loss:\t\t%.6f"  % (k, obj.compute_average(v[:num_valid_samples], loss_name=k))
                print

                losses_validation.append(loss_to_save)

                kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len])
                losses_kgl.append(kaggle_to_save)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

        if ((e + 1) % config().save_every) == 0:
            print
            print "Saving metadata, parameters"

            with open(metadata_path, 'w') as f:
                pickle.dump({
                    'metadata_path': metadata_path,
                    'configuration_file': config().__name__,
                    'git_revision_hash': utils.get_git_revision_hash(),
                    'experiment_id': expid,
                    'chunks_since_start': e,
                    'losses_train': losses_train,
                    'losses_eval_train': losses_eval_train,
                    'losses_eval_train_kaggle': losses_eval_train_kaggle,
                    'losses_eval_valid': losses_eval_valid,
                    'losses_eval_valid_kaggle': losses_eval_valid_kaggle,
                    'time_since_start': time_since_start,
                    'param_values': lasagne.layers.get_all_param_values(top_layer)
                }, f, pickle.HIGHEST_PROTOCOL)

            print "  saved to %s" % metadata_path
            print

    # store all known outputs from last batch:
    if config().take_a_dump:
        all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print()
        for layer in all_layers[:-1]:
            all_theano_variables.append(lasagne.layers.helper.get_output(layer))

        iter_train = theano.function([idx], all_theano_variables,
                                     givens=givens, on_unused_input="ignore", updates=updates,
                                     # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                     )
        train_data["intermediates"] = iter_train(0)
        pickle.dump(train_data, open(metadata_path + "-dump", "wb"))

    return
コード例 #34
0
ファイル: train_seg_patch.py プロジェクト: neouuid/dsb3
expid = utils.generate_expid(config_name)
print()
print("Experiment ID: %s" % expid)
print()

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print('Build model')
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print('  number of parameters: %d' % num_params)
print(string.ljust('  layer output shapes:', 36),)
print(string.ljust('#params:', 10),)
print('output shape:')
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print('    %s %s %s' % (name, num_param, layer.output_shape))

train_loss = config().build_objective(model, deterministic=False)
valid_loss = config().build_objective(model, deterministic=True)
def test_dsb():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_1/'
    utils.auto_make_dir(image_dir)

    patient_data_paths = utils_lung.get_patient_data_paths(
        pathfinder.DATA_PATH)
    print len(patient_data_paths)
    patient_data_paths = [
        pathfinder.DATA_PATH + '/01de8323fa065a8963533c4a86f2f6c1'
    ]

    for k, p in enumerate(patient_data_paths):
        pid = utils_lung.extract_pid_dir(p)
        # sid2data, sid2metadata = utils_lung.get_patient_data(p)
        # sids_sorted = utils_lung.sort_sids_by_position(sid2metadata)
        # sids_sorted_jonas = utils_lung.sort_slices_jonas(sid2metadata)
        # sid2position = utils_lung.slice_location_finder(sid2metadata)
        #
        # jonas_slicethick = []
        # for i in xrange(len(sids_sorted_jonas) - 1):
        #     s = np.abs(sid2position[sids_sorted_jonas[i + 1]] - sid2position[sids_sorted_jonas[i]])
        #     jonas_slicethick.append(s)
        #
        # img = np.stack([data_transforms.ct2HU(sid2data[sid], sid2metadata[sid]) for sid in sids_sorted])
        # xx = (jonas_slicethick[0],
        #       sid2metadata[sids_sorted[0]]['PixelSpacing'][0],
        #       sid2metadata[sids_sorted[0]]['PixelSpacing'][1])
        # pixel_spacing = np.asarray(xx)

        img, pixel_spacing = utils_lung.read_dicom_scan(p)
        mask = lung_segmentation.segment_HU_scan_ira(img)
        print pid
        print pixel_spacing
        print '===================================='

        img_out, transform_matrix, mask_out = data_transforms.transform_scan3d(
            img,
            pixel_spacing=pixel_spacing,
            p_transform=config().p_transform,
            p_transform_augment=None,
            lung_mask=mask)

        for i in xrange(100, img_out.shape[0], 5):
            plot_slice_3d_2(img_out,
                            mask_out,
                            0,
                            str(pid) + str(i),
                            idx=np.array([i, 200, 200]))

        plot_slice_3d_2(img_out,
                        mask_out,
                        0,
                        pid,
                        idx=np.array(img_out.shape) / 2)
        plot_slice_3d_2(mask_out,
                        img_out,
                        0,
                        pid,
                        idx=np.array(img_out.shape) / 4)
        plot_slice_3d_2(mask_out,
                        img_out,
                        0,
                        pid,
                        idx=np.array(img_out.shape) / 8)
config_name = sys.argv[1]
set_configuration('configs_seg_scan', config_name)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

# builds model and sets its parameters
model = config().build_model()

x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
idx_z = T.lscalar('idx_z')
idx_y = T.lscalar('idx_y')
idx_x = T.lscalar('idx_x')

window_size = config().window_size
stride = config().stride
n_windows = config().n_windows

givens = {}
givens[model.l_in.input_var] = x_shared

get_predictions_patch = theano.function([],
                                        nn.layers.get_output(model.l_out, deterministic=True),
コード例 #37
0
def predict_slice_model(expid, outfile, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    _check_slicemodel(input_layers)

    # Print the architecture
    _print_architecture(top_layer)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "slices"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "slices": {
                        slice_id: {
                            "systole": np.zeros((0,600)),
                            "diastole": np.zeros((0,600))
                        } for slice_id in data_loader.get_slice_ids_for_patient(i+1)
                    }
                   } for i in xrange(NUM_PATIENTS)]


    # Loop over data and generate predictions
    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        slice_ids = test_data["output"]["slices"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, (patient_id, slice_id) in enumerate(
                    zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size],
                        slice_ids[b*config().batch_size:(b+1)*config().batch_size])):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]
                    patient_slice_data = patient_data["slices"][slice_id]
                    patient_slice_data["systole"] =  np.concatenate((patient_slice_data["systole"],  kaggle_systoles[idx:idx+1,:]),axis=0)
                    patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    # Average predictions
    already_printed = False
    for prediction in predictions:
        for prediction_slice_id in prediction["slices"]:
            prediction_slice = prediction["slices"][prediction_slice_id]
            if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0:
                average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
                prediction_slice["systole_average"] = average_method(prediction_slice["systole"])
                prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"])
                try:
                    test_if_valid_distribution(prediction_slice["systole_average"])
                    test_if_valid_distribution(prediction_slice["diastole_average"])
                except:
                    if not already_printed:
                        print "WARNING: These distributions are not distributions"
                        already_printed = True
                    prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"])
                    prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"])


    print "Calculating training and validation set scores for reference"
    # Add CRPS scores to the predictions
    # Iterate over train and validation sets
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        # Iterate over patients in the set
        for patient in patient_ids:
            prediction = predictions[patient-1]
            # Iterate over the slices
            for slice_id in prediction["slices"]:
                prediction_slice = prediction["slices"][slice_id]
                if "systole_average" in prediction_slice:
                    assert patient == regular_labels[patient-1, 0]
                    error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1])
                    prediction_slice["systole_CRPS"] = error_sys
                    prediction_slice["target_systole"] = regular_labels[patient-1, 1]
                    error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2])
                    prediction_slice["diastole_CRPS"] = error_dia
                    prediction_slice["target_diastole"] = regular_labels[patient-1, 2]
                    prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia


    print "dumping prediction file to %s" % outfile
    with open(outfile, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions_per_slice': predictions,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"


    return
コード例 #38
0
expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

train_loss = config().build_objective(model, deterministic=False)
valid_loss = config().build_objective(model, deterministic=True)
コード例 #39
0
# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
output_pkl_file = predictions_dir + '/%s-%s.pkl' % (expid, set)

submissions_dir = utils.get_dir_path('submissions', pathfinder.METADATA_PATH)
output_csv_file = submissions_dir + '/%s-%s.csv' % (expid, set)

# if os.path.isfile(output_pkl_file):
#     pid2prediction = utils.load_pkl(output_pkl_file)
#     utils_lung.write_submission(pid2prediction, output_csv_file)
#     print 'saved csv'
#     print output_csv_file
#     sys.exit(0)

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
コード例 #40
0
config_name = sys.argv[1]
n_tta_iterations = int(sys.argv[2]) if len(sys.argv) >= 3 else 100
mean = sys.argv[3] if len(sys.argv) >= 4 else "geometric"

print "Make %s tta predictions for %s set using %s mean" % (n_tta_iterations, "valid and test", mean)

metadata_dir = utils.get_dir_path("train", METADATA_PATH)
metadata_path = utils.find_model_metadata(metadata_dir, config_name)
metadata = utils.load_pkl(metadata_path)
assert config_name == metadata["configuration"]
if "subconfiguration" in metadata:
    set_subconfiguration(metadata["subconfiguration"])
set_configuration(config_name)

# predictions paths
jonas_prediction_path = PREDICTIONS_PATH + "/ira_%s.pkl" % config().__name__
prediction_dir = utils.get_dir_path("predictions", METADATA_PATH)
valid_prediction_path = prediction_dir + "/%s-%s-%s-%s.pkl" % (
    metadata["experiment_id"],
    "valid",
    n_tta_iterations,
    mean,
)
test_prediction_path = prediction_dir + "/%s-%s-%s-%s.pkl" % (metadata["experiment_id"], "test", n_tta_iterations, mean)

# submissions paths
submission_dir = utils.get_dir_path("submissions", METADATA_PATH)
submission_path = submission_dir + "/%s-%s-%s-%s.csv" % (metadata["experiment_id"], "test", n_tta_iterations, mean)

# logs
logs_dir = utils.get_dir_path("logs", METADATA_PATH)
コード例 #41
0
ファイル: addons.py プロジェクト: kynikos/outspline
def load_addon(faddon, reqversion, tablenames):
    '''
    Poss. cases | BASE               | DEPENDENCY         | OPTIONAL
    ------------+--------------------+--------------------+--------------------
    NOT FOUND   | impossible         | critical exception | debug message
    ------------+--------------------+--------------------+--------------------
    DISABLED    | debug message      | critical exception | debug message
    ------------+--------------------+--------------------+--------------------
    VERSION     | impossible         | critical exception | critical exception
    ------------+--------------------+--------------------+--------------------
    TABLES      | critical exception | critical exception | critical exception
    '''
    try:
        folder, addon = faddon.split('.')
    except ValueError:
        # Check core version

        # Get only the major version number
        instversion = int(info.core.version.split(".", 1)[0])

        if reqversion is not False and instversion != reqversion:
            raise exceptions.AddonVersionError(instversion)
    else:
        section, logname = {
            'extensions': ('Extensions', 'extension'),
            'interfaces': ('Interfaces', 'interface'),
            'plugins': ('Plugins', 'plugin'),
        }[folder]

        mfaddon = '.'.join(('outspline', faddon))

        # An addon may list a dependency that is not installed
        # This check must be done before the other ones, in fact if the addon
        # is not installed it's impossible to read its info
        if addon not in configuration.config(section).get_sections():
            raise exceptions.AddonNotFoundError()

        # This check must be done before the version or the provided tables
        # ones, in fact if an addon is disabled these problems shouldn't matter
        if not configuration.config(section)(addon).get_bool('enabled'):
            raise exceptions.AddonDisabledError()

        ainfo = importlib.import_module(".".join(("outspline", "info", folder,
                                                                    addon)))

        # Get only the major version number
        # This version check must be done before the 'mfaddon not in
        # sys.modules' one, otherwise it's not always performed; for example
        # two different addons may require the same addon with different
        # versions, and if the first one required the correct version, when
        # checking the second one no exception would be raised
        instversion = int(ainfo.version.split(".", 1)[0])

        if reqversion is not False and instversion != reqversion:
            raise exceptions.AddonVersionError(instversion)

        # This check must be done after the version one, see the comment there
        # for the reason
        if mfaddon not in sys.modules:
            if section == 'Extensions':
                ptables = {table: faddon for table in ainfo.provides_tables
                                                                    if table}
                test = [table for table in set(tablenames) & set(ptables)
                                        if tablenames[table] != ptables[table]]

                if test:
                    raise exceptions.ExtensionProvidedTablesError(test,
                                        [tablenames[table] for table in test])

                tablenames.update(ptables)

            try:
                ainfo.dependencies
            except AttributeError:
                pass
            else:
                for dep, ver in ainfo.dependencies:
                    try:
                        load_addon(dep, int(ver), tablenames=tablenames)
                    # If I wanted to silently disable an addon in case one of
                    # its dependencies is not satisfied (not found,
                    # disabled...) I should disable the addon in the
                    # configuration to prevent the following bug: an enabled
                    # addon is activated since all its dependencies are
                    # enabled; that addon also has an optional dependency which
                    # is also enabled and activated; this optional dependency,
                    # though, has a dependency which is not enabled, so it is
                    # not imported by this load_addon() function; however,
                    # since in the configuration it is enabled, it's imported
                    # by the main addon anyway with
                    # coreaux_api.import_optional_extension_api(), thus
                    # breaking the application, since the dependency for the
                    # optional dependency is still missing
                    # Note that this change won't be written in the
                    # configuration file, since it's updated with
                    # config.export_add()
                    #except ...:
                    #    configuration.config(section)(addon)['enabled'] = 'off'
                    except exceptions.AddonNotFoundError:
                        log.error('{} depends on {} which however cannot be '
                                                'found'.format(faddon, dep))
                        # Raise a different exception, otherwise it may be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()
                    except exceptions.AddonDisabledError:
                        log.error('{} depends on {} which however is '
                                                'disabled'.format(faddon, dep))
                        # Raise a different exception, otherwise it will be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()
                    except exceptions.AddonVersionError as err:
                        log.error('{} depends on {} {} which however is '
                                            'installed with version {}'.format(
                                            faddon, dep, ver, err.version))
                        # Raise a different exception, otherwise it may be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()
                    except exceptions.ExtensionProvidedTablesError as err:
                        log.error('{} depends on {} which provides tables {} '
                                    'that are already provided by {}'.format(
                                    faddon, dep, ', '.join(err.tables),
                                    ', '.join(err.extensions)))
                        # Raise a different exception, otherwise it will be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()

            try:
                ainfo.optional_dependencies
            except AttributeError:
                pass
            else:
                for opt, ver in ainfo.optional_dependencies:
                    try:
                        load_addon(opt, int(ver), tablenames=tablenames)
                    except exceptions.AddonNotFoundError:
                        log.debug('{} optionally depends on {} which however '
                                        'cannot be found'.format(faddon, opt))
                    except exceptions.AddonDisabledError:
                        log.debug('{} optionally depends on {} which however '
                                            'is disabled'.format(faddon, opt))
                    except exceptions.AddonVersionError as err:
                        log.error('{} optionally depends on {} {} which '
                                'however is installed with version {}'.format(
                                faddon, opt, ver, err.version))
                        # Just crash the application, in fact it's not easy to
                        # handle this case, as the same addon may be required
                        # by another addon with the correct version, but still
                        # this addon should *not* use this dependency
                        # Raise a different exception, otherwise it will be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()
                    except exceptions.ExtensionProvidedTablesError as err:
                        log.error('{} optionally depends on {} which provides '
                                'tables {} that are already provided by '
                                '{}'.format(faddon, opt, ', '.join(err.tables),
                                ', '.join(err.extensions)))
                        # Just crash the application, in fact it's not easy to
                        # handle this case, as the same addon may be required
                        # by another addon with the correct version, but still
                        # this addon should *not* use this dependency
                        # Raise a different exception, otherwise it will be
                        # caught by start_addons()
                        raise exceptions.AddonDependencyError()

            mod = importlib.import_module(mfaddon)

            # Interfaces must have a main() fnuction
            if hasattr(mod, 'main') or folder == 'interfaces':
                mod.main()

            enabled_addons[section].add(addon)

            log.info('Loaded {}: {}'.format(logname, addon))
コード例 #42
0
ファイル: cliargparse.py プロジェクト: kynikos/outspline
def parse_cli_args():
    # Options -h and --help are automatically created
    cliparser = argparse.ArgumentParser(description=_DESCRIPTION)

    cliparser.add_argument('-c',
                           '--config',
                           default=None,
                           metavar='FILE',
                           dest='configfile',
                           help='set the configuration file name: a relative '
                                'or full path can be specified (default: {})'
                                ''.format(_USER_CONFIG_FILE))

    cliparser.add_argument('-l',
                           '--logfile',
                           default=None,
                           metavar='FILE',
                           dest='logfile',
                           help='set the log file name: a relative or full '
                                'path can be specified (default: {}, see also '
                                '--loglevel option)'
                                ''.format(os.path.expanduser(config('Log'
                                                            )['log_file'])))

    cliparser.add_argument('-L',
                           '--loglevel',
                           default=None,
                           metavar='NN',
                           dest='loglevel',
                           help='a 2-digit number (in base 4, from 00 to 33) '
                                'whose digits define the verbosity of, '
                                'respectively, stdout and file log messages; '
                                '0) disabled; 1) essential reports; 2) normal '
                                'verbosity; 3) debug mode; digits different '
                                'from 0,1,2,3 will default to the respective '
                                'value set in the configuration file '
                                '(default: {}{}, see also --logfile option)'
                                ''.format(config('Log')['log_level_stdout'],
                                config('Log')['log_level_file']))

    cliparser.add_argument('-u',
                           '--config-update',
                           action='store_true',
                           dest='updonly',
                           help='only create or update the configuration '
                                'file, then exit')

    cliparser.add_argument('-v',
                           '--version',
                           action=ShowVersion,
                           nargs=0,
                           dest='version',
                           help='show program\'s version number, copyright '
                                'and license information, then exit')

    cliparser.add_argument('--about',
                           action=ShowAbout,
                           nargs=0,
                           dest='about',
                           help='show information on the installed components '
                                                    'and addons, then exit')

    return cliparser.parse_args()
コード例 #43
0
import numpy as np
import scipy.misc
import tensorflow as tf
import tensorflow.contrib.gan as tfgan
import tensorflow.contrib.slim as slim
from tensorflow.contrib.gan.python import namedtuples

import configuration
import data_provider

tf.reset_default_graph()
conf = configuration.config()
initializer = None
batch_norm_params = {
    'decay': conf.batch_norm_decay,
    'epsilon': conf.epsilon,
    'updates_collections': tf.GraphKeys.UPDATE_OPS,
    'is_training': conf.is_training,
    'zero_debias_moving_mean': True
}

# 训练参数
global_step = tf.train.get_or_create_global_step()
generator_loss_fn = tfgan.losses.modified_generator_loss
discriminator_loss_fn = tfgan.losses.modified_discriminator_loss
weights_initializer = tf.initializers.random_normal(mean=0, stddev=0.02)

gen_lr = tf.train.exponential_decay(conf.gen_lr, global_step, conf.decay_steps,
                                    0.5, "generator_learning_rate")
tf.summary.scalar("gen_learning_rate", gen_lr)
generator_optimizer = tf.train.AdamOptimizer(learning_rate=gen_lr, beta1=0.5)
コード例 #44
0
metadata_path = sys.argv[1]
metadata_dir = utils.get_dir_path('train', METADATA_PATH)
metadata = utils.load_pkl(metadata_dir + '/%s' % metadata_path)
config_name = metadata['configuration']
if 'subconfiguration' in metadata:
    set_subconfiguration(metadata['subconfiguration'])

set_configuration(config_name)

# predictions paths
prediction_dir = utils.get_dir_path('predictions', METADATA_PATH)
prediction_path = prediction_dir + "/%s.pkl" % metadata['experiment_id']
prediction_mu_std_path = prediction_dir + "/%s_mu_sigma.pkl" % metadata['experiment_id']

print "Build model"
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_top)
all_params = nn.layers.get_all_params(model.l_top)
num_params = nn.layers.count_params(model.l_top)
print '  number of parameters: %d' % num_params
nn.layers.set_all_param_values(model.l_top, metadata['param_values'])

xs_shared = [nn.utils.shared_empty(dim=len(l.shape)) for l in model.l_ins]
givens_in = {}
for l_in, x in izip(model.l_ins, xs_shared):
    givens_in[l_in.input_var] = x

iter_test_det = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.l_outs],
                                givens=givens_in, on_unused_input='warn')

iter_mu = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.mu_layers], givens=givens_in,
コード例 #45
0
def build_nesterov_updates(train_loss, all_params, learning_rate):
    updates = lasagne.updates.nesterov_momentum(train_loss, all_params,
                                                learning_rate,
                                                config().momentum)
    return updates
コード例 #46
0
expid = utils.generate_expid(config_name)
print()
print("Experiment ID: %s" % expid)
print()

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print('Build model')
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print('  number of parameters: %d' % num_params)
print(string.ljust('  layer output shapes:', 36), )
print(string.ljust('#params:', 10), )
print('output shape:')
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print('    %s %s %s' % (name, num_param, layer.output_shape))

train_loss = config().build_objective(model, deterministic=False)
valid_loss = config().build_objective(model, deterministic=True)
コード例 #47
0
def get_lob_data(pair,
                 date_start,
                 date_end,
                 frequency=timedelta(seconds=10),
                 lob_depth=10):
    '''
    Function to get limit orde book snapshots time series

    Arguments:
    pair -- string, curency pair to return (e.g.'USDT_BTC')
    date_start -- string, timeseries start
    date_end -- string, timeseries end
    frequency -- timedelta, the minimum time granularity (e.g. timedelta(seconds=10))
    lob_depth -- number of ob levels analyzed

    Returns: Dask data frame
    '''

    print(f'Checking for cached LOB data from {date_start} to {date_end}')

    #TODO assert if date_end is yesterday or earlier

    assert frequency >= timedelta(
        seconds=1), 'Frequency must be equal to or greater than 1 second'

    configuration = config()
    raw_data_folder = configuration['folders']['raw_lob_data']
    resampled_data_folder = configuration['folders']['resampled_data']

    date_start = datetime.strptime(date_start, '%Y-%m-%d')
    date_end = datetime.strptime(date_end, '%Y-%m-%d')
    freq = f'{int(frequency.total_seconds())}s'

    os.makedirs(
        f'{resampled_data_folder}/{pair}/{lob_depth}_levels/original_frequency',
        exist_ok=True)
    os.makedirs(f'{resampled_data_folder}/{pair}/{lob_depth}_levels/{freq}',
                exist_ok=True)

    data = []

    # Loop through day folders
    date_to_process = date_start
    while date_to_process <= date_end:
        day_folder = datetime.strftime(date_to_process, '%Y/%m/%d')
        day_cache_file_name = f'{datetime.strftime(date_to_process, "%Y-%m-%d")}.csv.gz'
        resampled_file_path = f'{resampled_data_folder}/{pair}/{lob_depth}_levels/{freq}/{day_cache_file_name}'
        if os.path.isfile(resampled_file_path):
            print(f'Found {resampled_file_path}')
        else:
            print(f'Generating {resampled_file_path}')
            original_file_name = f'{resampled_data_folder}/{pair}/{lob_depth}_levels/original_frequency/{day_cache_file_name}'
            if os.path.isfile(original_file_name):
                day_data = pd.read_csv(original_file_name,
                                       parse_dates=['Datetime'])
            else:
                # empty json and nested list every new day processed
                raw_data = {}  # empty dict to update with incoming json
                processed_data = []

                if not os.path.isdir(f'{raw_data_folder}/{pair}/{day_folder}'):
                    s3_resource = get_s3_resource()
                    lob_data_bucket = s3_resource.Bucket(
                        configuration['buckets']['lob_data'])
                    os.makedirs(f'{raw_data_folder}/tmp/{pair}/{day_folder}',
                                exist_ok=True)

                    keys = []
                    for obj in lob_data_bucket.objects.filter(
                            Prefix=f'{pair}/{day_folder}'):
                        keys.append(obj.key)

                    download_s3_folder(lob_data_bucket, day_folder, keys)
                    shutil.move(f'{raw_data_folder}/tmp/{pair}/{day_folder}',
                                f'{raw_data_folder}/{pair}/{day_folder}')

                # Load all files in to a dictionary
                for file_name in os.listdir(
                        f'{raw_data_folder}/{pair}/{day_folder}'):

                    try:
                        with gzip.open(
                                f'{raw_data_folder}/{pair}/{day_folder}/{file_name}',
                                'r') as f:
                            json_string = f.read().decode('utf-8')
                            frozen = json_string.count('"isFrozen": "1"')
                            if frozen > 0:
                                print(f'Frozen {frozen} snapshots')
                        raw_data_temp = load_lob_json(json_string)

                    except Exception as e:
                        print(e.errno)
                        print(e)

                    raw_data.update(raw_data_temp)

                # number of seconds in a day / frequencey in seconds
                snapshot_count_day = int(24 * 60 * 60 /
                                         frequency.total_seconds())
                if len(raw_data) != snapshot_count_day:
                    diff = snapshot_count_day - len(raw_data)
                    if diff > 0:
                        print(f'{diff} gaps in {original_file_name}')
                    else:
                        print(
                            f'{diff * -1} additional data points in {original_file_name}'
                        )

                #del(raw_data['BTC_XRP-20200404_000000'])

                #TODO fix sequence order

                raw_data_frame = pd.DataFrame.from_dict(raw_data,
                                                        orient='index')
                raw_data_frame.reset_index(inplace=True)
                raw_data_frame['index'] = raw_data_frame['index'].str[-15:]
                raw_data_frame['index'] = pd.to_datetime(
                    raw_data_frame['index'], format='%Y%m%d_%H%M%S')
                raw_data_frame.set_index('index', drop=True, inplace=True)
                raw_data_frame.sort_index(inplace=True)
                idx_start = date_to_process
                idx_end = date_to_process + timedelta(days=1) - timedelta(
                    seconds=1)
                idx = pd.date_range(idx_start, idx_end, freq='1s')
                raw_data_frame = raw_data_frame.reindex(idx).ffill().fillna(
                    method='bfill'
                )  # forward fill gaps and back fill first item if missing

                # Convert hierarchical json data in to tabular format
                levels = list(range(lob_depth))
                for row in raw_data_frame.itertuples():

                    ask_price, ask_volume = zip(*row.asks[0:lob_depth])
                    bid_price, bid_volume = zip(*row.bids[0:lob_depth])
                    sequences = [row.seq] * lob_depth
                    datetimes = [row.Index] * lob_depth

                    processed_data.append(
                        list(
                            zip(ask_price, ask_volume, bid_price, bid_volume,
                                levels, sequences, datetimes)))

                # unravel nested structure and force data types
                day_data = pd.DataFrame(
                    [y for x in processed_data
                     for y in x],  #flatten the list of lists structure
                    columns=[
                        'Ask_Price', 'Ask_Size', 'Bid_Price', 'Bid_Size',
                        'Level', 'Sequence', 'Datetime'
                    ])

                day_data['Ask_Price'] = day_data['Ask_Price'].astype('float64')
                day_data['Bid_Price'] = day_data['Bid_Price'].astype('float64')
                day_data['Sequence'] = day_data['Sequence'].astype('int64')

                day_data.to_csv(original_file_name, compression='gzip')

            # resample dataframe to the wanted frequency
            resampled_day_data = day_data.groupby([
                pd.Grouper(key='Datetime', freq=freq),
                pd.Grouper(key='Level')
            ]).last().reset_index()
            resampled_day_data.to_csv(resampled_file_path, compression='gzip')

        date_to_process += timedelta(
            days=1)  # the most nested folder is a day of the month
        data.append(resampled_file_path)

    # computed = df.compute()
    # df = df.repartition(npartitions=1)
    # df.to_csv(f'{root_caching_folder}/{pair}/{output_file_name}', compression='gzip', single_file = True)
    # df.to_parquet(f'/tmp/10-seconds.parquet', compression='gzip', engine='pyarrow', write_index=False)

    return dd.read_csv(data, compression='gzip')
コード例 #48
0
metadata = utils.load_pkl(metadata_path)
expid = metadata['experiment_id']

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s-test.log' % expid)
sys.stderr = sys.stdout

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/' + expid
utils.auto_make_dir(outputs_path)

print('Build model')
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print('  number of parameters: %d' % num_params)
print(string.ljust('  layer output shapes:', 36),)
print(string.ljust('#params:', 10),)
print('output shape:')
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print('    %s %s %s' % (name, num_param, layer.output_shape))

nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
コード例 #49
0
def import_px_data(frequency, pair, date_start, date_end, lob_depth, norm_type,
                   roll):
    '''
    Function that loads preprocessed data ready to be shaped/used for the model to train.
    Experiment folder is the path where data has been cached. The other parameters are part of the
    unique cached file nomenclature. If the file does not exist, it is generated frrom the input data
    in the "else" block

    Arguments:
    frequency --  timedelta, the minimum time granularity (e.g. timedelta(seconds=10))
    pair -- string, curency pair to return (e.g.'USDT_BTC')
    date_start -- string, timeseries start
    date_end -- string, timeseries end
    lob_depth -- integer, how many levels of the order book to be considered
    norm_type -- string, can assume values of 'z' or 'dyn' for z-score or dynamic z-score
    roll -- integer, function of the granularity provided
    '''

    configuration = config()

    resampled_data_folder = configuration['folders']['resampled_data']
    frequency_seconds = int(frequency.total_seconds())

    # Data import - needs to be adjusted importing from several files using Dask
    quotes_file_name = f'{pair}--{lob_depth}lev--{frequency_seconds}sec--{date_start}--{date_end}.csv.gz'

    standardized_train_file = f'{resampled_data_folder}/{pair}/TRAIN--{norm_type}-{roll}--{quotes_file_name}'
    standardized_test_file = f'{resampled_data_folder}/{pair}/TEST--{norm_type}-{roll}--{quotes_file_name}'

    top_ob_train_file = f'{resampled_data_folder}/{pair}/TRAIN_TOP--{quotes_file_name}'
    top_ob_test_file = f'{resampled_data_folder}/{pair}/TEST_TOP--{quotes_file_name}'

    # standardized test file contains both trades and quotes
    if os.path.isfile(
            standardized_test_file
    ):  # testing for one of cache files, assuming all were saved
        # Import cached standardized data
        print(f'Reading cached {standardized_train_file}')
        train_dyn_df = pd.read_csv(standardized_train_file)  #, index_col=1)
        train_dyn_df.drop('Unnamed: 0', axis=1, inplace=True)

        print(f'Reading cached {standardized_test_file}')
        test_dyn_df = pd.read_csv(standardized_test_file)  #, index_col=1)
        test_dyn_df.drop('Unnamed: 0', axis=1, inplace=True)

        print(f'Reading cached {top_ob_train_file}')
        top_ob_train = pd.read_csv(top_ob_train_file)  #, index_col=[0,1])

        print(f'Reading cached {top_ob_test_file}')
        top_ob_test = pd.read_csv(top_ob_test_file)  #, index_col=[0,1])

    else:  # check separately for quotes and trades input files

        quotes_data_input = get_lob_data(pair, date_start, date_end, frequency,
                                         lob_depth)
        quotes_data_input['Datetime'] = dd.to_datetime(
            quotes_data_input['Datetime'])

        trades_data_input = get_trade_data(pair, date_start, date_end,
                                           frequency)
        trades_data_input['Datetime'] = dd.to_datetime(
            trades_data_input['Datetime'])

        # once input files have been correctly read from the input folder, it's time to create a single standardized cache for trades and quotes

        # TODO - concatenate Dask dataframes
        quotes_data_input_pd = quotes_data_input.compute()
        trades_data_input_pd = trades_data_input.compute()

        data = pd.concat([trades_data_input_pd, quotes_data_input_pd
                          ]).sort_values(by=['Datetime', 'Level'])

        roll = roll  #+ 1 # +1 from extra level trades(level -1)
        stdz_depth = lob_depth + 1
        train_dyn_df, test_dyn_df, top_ob_train, top_ob_test = standardized_data_cache(
            data, roll, stdz_depth, standardized_train_file,
            standardized_test_file, top_ob_train_file, top_ob_test_file)

    # reset indexes, cast datetime type and clean unwanted columns
    print(f'train_dyn_df {train_dyn_df.head(3)}')
    print(f'test_dyn_df {test_dyn_df.head(3)}')
    print(f'top_ob_train {top_ob_train.head(3)}')
    print(f'top_ob_test {top_ob_test.head(3)}')
    #train_dyn_df = train_dyn_df.reset_index()
    train_dyn_df['Datetime'] = pd.to_datetime(train_dyn_df['Datetime'])

    #test_dyn_df = test_dyn_df.reset_index()
    test_dyn_df['Datetime'] = pd.to_datetime(test_dyn_df['Datetime'])
    #test_dyn_df.set_index('index', inplace=True)

    #top_ob_train = top_ob_train.reset_index()
    top_ob_train['Datetime'] = pd.to_datetime(top_ob_train['Datetime'])
    top_ob_train.drop('Unnamed: 0', axis=1, inplace=True)

    #top_ob_test = top_ob_test.reset_index()
    top_ob_test['Datetime'] = pd.to_datetime(top_ob_test['Datetime'])
    top_ob_test.drop('Unnamed: 0', axis=1, inplace=True)

    return train_dyn_df, test_dyn_df, top_ob_train, top_ob_test
コード例 #50
0
config_name = sys.argv[1]
set_configuration('configs_luna_props_scan', config_name)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

# builds model and sets its parameters
model = config().build_model()

x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
givens_valid = {}
givens_valid[model.l_in.input_var] = x_shared

get_predictions_patch = theano.function([],
                                        nn.layers.get_output(model.l_out, deterministic=True),
                                        givens=givens_valid,
                                        on_unused_input='ignore')

data_iterator = config().data_iterator

#existing_preds = [f.rsplit('.') for f in os.listdir(outputs_path)]
#print existing_preds
コード例 #51
0
ファイル: preprocess.py プロジェクト: fdoperezi/kaggle-heart
def preprocess_normscale(patient_data, result, index, augment=True,
                         metadata=None,
                         normscale_resize_and_augment_function=normscale_resize_and_augment,
                         testaug=False):
    """Normalizes scale and augments the data.

    Args:
        patient_data: the data to be preprocessed.
        result: dict to store the result in.
        index: index indicating in which slot the result dict the data
            should go.
        augment: flag indicating wheter augmentation is needed.
        metadata: metadata belonging to the patient data.
    """
    if augment:
        if testaug:
            augmentation_params = sample_test_augmentation_parameters()
        else:
            augmentation_params = sample_augmentation_parameters()
    else:
        augmentation_params = None

    zoom_factor = None

    # Iterate over different sorts of data
    for tag, data in patient_data.iteritems():
        if tag in metadata:
            metadata_tag = metadata[tag]
        desired_shape = result[tag][index].shape

        cleaning_processes = getattr(config(), 'cleaning_processes', [])
        cleaning_processes_post = getattr(config(), 'cleaning_processes_post', [])

        if tag.startswith("sliced:data:singleslice"):
            # Cleaning data before extracting a patch
            data = clean_images(
                [patient_data[tag]], metadata=metadata_tag,
                cleaning_processes=cleaning_processes)

            # Augment and extract patch
            # Decide which roi to use.
            shift_center = (None, None)
            if getattr(config(), 'use_hough_roi', False):
                shift_center = metadata_tag["hough_roi"]

            patient_3d_tensor = normscale_resize_and_augment_function(
                data, output_shape=desired_shape[-2:],
                augment=augmentation_params,
                pixel_spacing=metadata_tag["PixelSpacing"],
                shift_center=shift_center[::-1])[0]

            if augmentation_params is not None:
                zoom_factor = augmentation_params["zoom_x"] * augmentation_params["zoom_y"]
            else:
                zoom_factor = 1.0
                
            # Clean data further
            patient_3d_tensor = clean_images(
                patient_3d_tensor, metadata=metadata_tag,
                cleaning_processes=cleaning_processes_post)

            if "area_per_pixel:sax" in result:
                raise NotImplementedError()

            if augmentation_params and not augmentation_params.get("change_brightness", 0) == 0:
                patient_3d_tensor = augment_brightness(patient_3d_tensor, augmentation_params["change_brightness"])

            put_in_the_middle(result[tag][index], patient_3d_tensor, True)


        elif tag.startswith("sliced:data:randomslices"):
            # Clean each slice separately
            data = [
                clean_images([slicedata], metadata=metadata, cleaning_processes=cleaning_processes)[0]
                for slicedata, metadata in zip(data, metadata_tag)]

            # Augment and extract patches
            shift_centers = [(None, None)] * len(data)
            if getattr(config(), 'use_hough_roi', False):
                shift_centers = [m["hough_roi"] for m in metadata_tag]

            patient_3d_tensors = [
                normscale_resize_and_augment_function(
                    [slicedata], output_shape=desired_shape[-2:],
                    augment=augmentation_params,
                    pixel_spacing=metadata["PixelSpacing"],
                    shift_center=shift_center[::-1])[0]
                for slicedata, metadata, shift_center in zip(data, metadata_tag, shift_centers)]
            if augmentation_params is not None:
                zoom_factor = augmentation_params["zoom_x"] * augmentation_params["zoom_y"]
            else:
                zoom_factor = 1.0

            # Clean data further
            patient_3d_tensors = [
                clean_images([patient_3d_tensor], metadata=metadata, cleaning_processes=cleaning_processes_post)[0]
                for patient_3d_tensor, metadata in zip(patient_3d_tensors, metadata_tag)]

            patient_4d_tensor = _make_4d_tensor(patient_3d_tensors)

            if augmentation_params and not augmentation_params.get("change_brightness", 0) == 0:
                patient_4d_tensor = augment_brightness(patient_4d_tensor, augmentation_params["change_brightness"])

            if "area_per_pixel:sax" in result:
                raise NotImplementedError()

            put_in_the_middle(result[tag][index], patient_4d_tensor, True)

        elif tag.startswith("sliced:data:sax:locations"):
            pass  # will be filled in by the next one
        elif tag.startswith("sliced:data:sax:is_not_padded"):
            pass  # will be filled in by the next one
        elif tag.startswith("sliced:data:sax"):
            # step 1: sort (data, metadata_tag) with slice_location_finder
            slice_locations, sorted_indices, sorted_distances = slice_location_finder({i: metadata for i,metadata in enumerate(metadata_tag)})

            data = [data[idx] for idx in sorted_indices]
            metadata_tag = [metadata_tag[idx] for idx in sorted_indices]

            slice_locations = np.array([slice_locations[idx]["relative_position"] for idx in sorted_indices])
            slice_locations = slice_locations - (slice_locations[-1] + slice_locations[0])/2.0

            data = [
                clean_images([slicedata], metadata=metadata, cleaning_processes=cleaning_processes)[0]
                for slicedata, metadata in zip(data, metadata_tag)]

            # Augment and extract patches
            shift_centers = [(None, None)] * len(data)
            if getattr(config(), 'use_hough_roi', False):
                shift_centers = [m["hough_roi"] for m in metadata_tag]

            patient_3d_tensors = [
                normscale_resize_and_augment_function(
                    [slicedata], output_shape=desired_shape[-2:],
                    augment=augmentation_params,
                    pixel_spacing=metadata["PixelSpacing"],
                    shift_center=shift_center[::-1])[0]
                for slicedata, metadata, shift_center in zip(data, metadata_tag, shift_centers)]

            if augmentation_params is not None:
                zoom_factor = augmentation_params["zoom_x"] * augmentation_params["zoom_y"]
            else:
                zoom_factor = 1.0

            # Clean data further
            patient_3d_tensors = [
                clean_images([patient_3d_tensor], metadata=metadata, cleaning_processes=cleaning_processes_post)[0]
                for patient_3d_tensor, metadata in zip(patient_3d_tensors, metadata_tag)]

            patient_4d_tensor = _make_4d_tensor(patient_3d_tensors)

            if augmentation_params and not augmentation_params.get("change_brightness", 0) == 0:
                patient_4d_tensor = augment_brightness(patient_4d_tensor, augmentation_params["change_brightness"])

            # Augment sax order
            if augmentation_params and augmentation_params.get("flip_sax", 0) > 0.5:
                patient_4d_tensor = patient_4d_tensor[::-1]
                slice_locations = slice_locations[::-1]

            # Put data (images and metadata) in right location
            put_in_the_middle(result[tag][index], patient_4d_tensor, True)

            if "sliced:data:sax:locations" in result:
                eps_location = 1e-7
                is_padded = np.array([False]*len(result["sliced:data:sax:locations"][index]))
                put_in_the_middle(result["sliced:data:sax:locations"][index], slice_locations + eps_location, True, is_padded)

            if "sliced:data:sax:distances" in result:
                eps_location = 1e-7
                sorted_distances.append(0.0)  # is easier for correct padding
                is_padded = np.array([False]*len(result["sliced:data:sax:distances"][index]))
                put_in_the_middle(result["sliced:data:sax:distances"][index], np.array(sorted_distances) + eps_location, True, is_padded)

            if "sliced:data:sax:is_not_padded" in result:
                result["sliced:data:sax:is_not_padded"][index] = np.logical_not(is_padded)



        elif tag.startswith("sliced:data:chanzoom:2ch"):
            # step 1: sort (data, metadata_tag) with slice_location_finder
            slice_locations, sorted_indices, sorted_distances = slice_location_finder({i: metadata for i,metadata in enumerate(metadata_tag[2])})

            top_slice_metadata = metadata_tag[2][sorted_indices[0]]
            bottom_slice_metadata = metadata_tag[2][sorted_indices[-1]]

            ch2_metadata = metadata_tag[1]
            ch4_metadata = metadata_tag[0]

            trf_2ch, trf_4ch = get_chan_transformations(
                ch2_metadata=ch2_metadata,
                ch4_metadata=ch4_metadata,
                top_point_metadata = top_slice_metadata,
                bottom_point_metadata = bottom_slice_metadata,
                output_width=desired_shape[-1]
                )

            ch4_3d_patient_tensor, ch2_3d_patient_tensor = [], []
            ch4_data = data[0]
            ch2_data = data[1]
            if ch4_data is None and ch2_data is not None:
                ch4_data = ch2_data
                ch4_metadata = ch2_metadata
            if ch2_data is None and ch4_data is not None:
                ch2_data = ch4_data
                ch2_metadata = ch4_metadata

            for ch, ch_result, transform, metadata in [(ch4_data, ch4_3d_patient_tensor, trf_4ch, ch4_metadata),
                                                        (ch2_data, ch2_3d_patient_tensor, trf_2ch, ch2_metadata)]:
                tform_shift_center, tform_shift_uncenter = build_center_uncenter_transforms(desired_shape[-2:])
                zoom_factor = np.sqrt(np.abs(np.linalg.det(transform.params[:2,:2])) * np.prod(metadata["PixelSpacing"]))
                normalise_zoom_transform = build_augmentation_transform(zoom_x=zoom_factor, zoom_y=zoom_factor)
                if augmentation_params:
                    augment_tform = build_augmentation_transform(**augmentation_params)
                    total_tform = tform_shift_uncenter + augment_tform + normalise_zoom_transform + tform_shift_center + transform
                else:
                    total_tform = tform_shift_uncenter + normalise_zoom_transform + tform_shift_center + transform

                ch_result[:] = [fast_warp(c, total_tform, output_shape=desired_shape[-2:]) for c in ch]
                # print "zoom factor:", zoom_factor

            if augmentation_params is not None:
                zoom_factor = augmentation_params["zoom_x"] * augmentation_params["zoom_y"]
            else:
                zoom_factor = 1.0
            # Clean data further
            ch4_3d_patient_tensor = clean_images(np.array([ch4_3d_patient_tensor]), metadata=ch4_metadata, cleaning_processes=cleaning_processes_post)[0]
            ch2_3d_patient_tensor = clean_images(np.array([ch2_3d_patient_tensor]), metadata=ch2_metadata, cleaning_processes=cleaning_processes_post)[0]

            # Put data (images and metadata) in right location
            put_in_the_middle(result["sliced:data:chanzoom:2ch"][index], ch2_3d_patient_tensor, True)
            put_in_the_middle(result["sliced:data:chanzoom:4ch"][index], ch4_3d_patient_tensor, True)

        elif tag.startswith("sliced:data:shape"):
            raise NotImplementedError()

        elif tag.startswith("sliced:data"):
            # put time dimension first, then axis dimension
            data = clean_images(patient_data[tag], metadata=metadata_tag)
            patient_4d_tensor, zoom_ratios = resize_and_augment(data, output_shape=desired_shape[-2:], augment=augmentation_parameters)
            if "area_per_pixel:sax" in result:
                result["area_per_pixel:sax"][index] = zoom_ratios[0] * np.prod(metadata_tag[0]["PixelSpacing"])

            if "noswitch" not in tag:
                patient_4d_tensor = np.swapaxes(patient_4d_tensor,1,0)

            put_in_the_middle(result[tag][index], patient_4d_tensor)

        elif tag.startswith("sliced:meta:all"):
            # TODO: this probably doesn't work very well yet
            result[tag][index] = patient_data[tag]

        elif tag.startswith("sliced:meta:PatientSex"):
            result[tag][index][0] = -1. if patient_data[tag]=='M' else 1.

        elif tag.startswith("sliced:meta:PatientAge"):
            number, letter = patient_data[tag][:3], patient_data[tag][-1]
            letter_rescale_factors = {'D': 365.25, 'W': 52.1429, 'M': 12., 'Y': 1.}
            result[tag][index][0] = float(patient_data[tag][:3]) / letter_rescale_factors[letter]

    if augmentation_params and zoom_factor:
        label_correction_function = lambda x: x * zoom_factor
        classification_correction_function = lambda x: utils.zoom_array(x, 1./zoom_factor)
        return label_correction_function, classification_correction_function
    else:
        return lambda x: x, lambda x: x
コード例 #52
0
ファイル: test_fpred_patch.py プロジェクト: ericsolo/python
metadata = utils.load_pkl(metadata_path)
expid = metadata['experiment_id']

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s-test.log' % expid)
sys.stderr = sys.stdout

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/' + expid
utils.auto_make_dir(outputs_path)

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s' % (name, num_param, layer.output_shape)

nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
コード例 #53
0
expid = utils.generate_expid(config_name)
print()
print("Experiment ID: %s" % expid)
print()

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print('Build model')
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print('  number of parameters: %d' % num_params)
print(string.ljust('  layer output shapes:', 36), )
print(string.ljust('#params:', 10), )
print('output shape:')
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print('    %s %s %s %s' %
          (name, num_param, layer.output_shape, layer.name))

train_loss = config().build_objective(model, deterministic=False)
コード例 #54
0
ファイル: test_iterators.py プロジェクト: ericsolo/python
from configuration import set_configuration, config
import utils_plots
import numpy as np

set_configuration('configs_seg_scan', 'luna_s_local')

data_iter = config().valid_data_iterator
for (x, y, lung_mask, annotations, transform_matrices, pid) in data_iter.generate():

    predictions_scan = lung_mask * x

    for nodule_n, zyxd in enumerate(annotations):
        utils_plots.plot_slice_3d_4(input=x[0, 0], lung_mask=lung_mask[0, 0], prediction=predictions_scan[0, 0],
                                    mask=y[0, 0],
                                    axis=0, pid='-'.join([str(nodule_n), str(pid)]), idx=zyxd)
コード例 #55
0
expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = metadata_dir + '/%s.pkl' % expid

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
sys.stderr = sys.stdout

print 'Build model'
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_out)
all_params = nn.layers.get_all_params(model.l_out)
num_params = nn.layers.count_params(model.l_out)
print '  number of parameters: %d' % num_params
print string.ljust('  layer output shapes:', 36),
print string.ljust('#params:', 10),
print 'output shape:'
for layer in all_layers:
    name = string.ljust(layer.__class__.__name__, 32)
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = string.ljust(num_param.__str__(), 10)
    print '    %s %s %s %s' % (name, num_param, layer.output_shape, layer.name)

train_loss = config().build_objective(model, deterministic=False)
valid_loss = config().build_objective(model, deterministic=True)
コード例 #56
0
ファイル: test_lung_seg_scan.py プロジェクト: ericsolo/python
    sys.exit("Usage: test_luna_scan.py <configuration_name>")

config_name = sys.argv[1]
set_configuration('configs_seg_scan', config_name)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

data_iterator = config().train_data_iterator

print
print 'Data'
print 'n samples: %d' % data_iterator.nsamples

start_time = time.time()
n_pos = 0
tp = 0
for n, (x, y, lung_mask, annotations, tf_matrix, pid) in enumerate(data_iterator.generate()):
    print '-------------------------------------'
    print n, pid
    n_pos += annotations.shape[0]
    n_pid_tp = 0
    annotations = np.int32(annotations)
    for i in xrange(annotations.shape[0]):
コード例 #57
0
    blobs = np.asarray(blobs_original_voxel_coords)
    print blobs.shape
    utils.save_pkl(blobs, outputs_path + '/%s.pkl' % pid)


jobs = []
theano.config.warn_float64 = 'raise'

if len(sys.argv) < 3:
    sys.exit("Usage: test_seg_scan_dsb.py <configuration_name> <data_iterator_part>")

config_name = sys.argv[1]
set_configuration('configs_seg_scan', config_name)

data_iterator_part = int(sys.argv[2])  # start from 0
assert data_iterator_part < len(config().data_iterators)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
utils.auto_make_dir(outputs_path)

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % config_name)
sys.stderr = sys.stdout

# builds model and sets its parameters
model = config().build_model()

x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
コード例 #58
0
ファイル: fabfile.py プロジェクト: xnomagichash/filament
def deploy(module_name=None, resume=False, lock_root=False):
    app = flask.discover(module_name)
    lpath = os.path.realpath(os.path.join(env.flask_dir, app.name))

    # create new server
    server = Provider.load(env.provider)
    server.name = "-".join((app.name, uuid4().hex))
    server.create().wait()

    # prepare system software
    api.run('apt-get install -y openssh-server')

    # set up firewall
    api.run('mkdir /etc/iptables')
    api.put(config('iptables'), '/etc/iptables/rules')
    api.put(config('iptables.sh'), '/etc/network/if-pre-up.d/iptables')
    api.run('chmod +x /etc/network/if-pre-up.d/iptables')

    # install software
    with open(config('packages.txt')) as requirements:
        for requirement in requirements:
            print("installing requirement `{0}`...".format(requirement))
            api.run('apt-get install -y {0}'.format(requirement.strip()))

    # install python packages
    api.put(config('requirements.txt'), 'requirements.txt')
    api.run('pip3 install -r requirements.txt')
    api.run('rm requirements.txt')

    # deploy application
    rpath = os.path.join(SRV_ROOT, app.name)
    api.put(lpath, SRV_ROOT, use_sudo=True)
    api.run('chown -R www-data {0}'.format(rpath))
    api.run('chmod -R 500 {0}'.format(rpath))
    api.put(config('uwsgi.ini'), rpath, use_sudo=True)

    # extract socket_name
    with open(config('uwsgi.ini')) as ini:
        conf = ConfigParser.RawConfigParser()
        conf.readfp(ini)
        socket_name = conf.get('uwsgi', 'socket')


    # configure web server
    render('nginx.conf', '/etc/nginx/nginx.conf',
        socket_name = socket_name,
        static_directories = [
            ( x.static_url_path, os.path.join(
                rpath,
                os.path.realpath(x.static_folder).split(lpath)[1][1:]
            )) for x in chain([app], app.blueprints.values())
               if x.static_url_path ]
    )

    # configure supervisord
    api.run('pip install supervisor')
    render('supervisord.conf', '/etc/supervisord.conf',
        flask_dir = module_name,
        location = SRV_ROOT,
        ini_file = os.path.join(rpath, 'uwsgi.ini'),
    )

    # start web service
    api.run('/usr/local/bin/supervisord')
    api.run('service nginx start')

    if lock_root:
        # prepare admin user
        api.run('addgroup admin')
        api.run('adduser admin --quiet --ingroup admin --gecos ""')
        api.run('sudo -u admin mkdir /home/admin/.ssh')
        api.put('~/.ssh/id_rsa.pub', '/home/admin/.ssh/authorized_keys')
        api.run('chown admin:admin /home/admin/.ssh/authorized_keys')

        # lock down SSH
        api.put('build/sshd_config', '/etc/ssh/sshd_config')
        api.run('service ssh restart')