Beispiel #1
0
    def dataToSlides(stitch=True):
        t_gen_slides_0 = time.time()
        all_images = get_images()
        all_slides = []
        for imagefile in all_images:
            all_slides.append(
                Slide(imagefile)
            )  # Pixel computations are done here, as the slide is created.
        printl('Total # of non-zero pixels: ' + str(Pixel.total_pixels) +
               ', total number of pixels after filtering: ' +
               str(len(Pixel.all)))
        printl('Total # of blob2ds: ' + str(len(Blob2d.all)))
        printl('Generating ' + str(len(all_slides)) + ' slides took', end='')
        print_elapsed_time(t_gen_slides_0, time.time(), prefix='')
        printl(
            "Pairing all blob2ds with their potential partners in adjacent slides",
            flush=True)
        Slide.set_possible_partners(all_slides)

        if stitch:
            printl('Setting shape contexts for all blob2ds ',
                   flush=True,
                   end="")
            Slide.set_all_shape_contexts(all_slides)
            stitchlist = Pairing.stitchAllBlobs(
                all_slides, debug=False
            )  # TODO change this to work with a list of ids or blob2ds
        else:
            printl(
                '\n-> Skipping stitching the slides, this will result in less accurate blob3ds for the time being'
            )
        blob3dlist = Slide.extract_blob3ds(all_slides, stitched=stitch)
        printl('There are a total of ' + str(len(blob3dlist)) + ' blob3ds')
        return all_slides, blob3dlist  # Returns slides and all their blob3ds in a list
Beispiel #2
0
    def stitchAllBlobs(slidelist, quiet=True, debug=False):
        t_start_stitching = time.time()
        printl('')
        for slide_num, slide in enumerate(slidelist[:-1]):
            # Skipping last slide, because pairing go from lower slide to upper slide, so it's already processed with the second to last slide
            # IE blob2ds in the last slide are partners to the previous slide's blob2ds, and have no direct possible partners of their own
            t_start_stitching_this_slide = time.time()
            printl('Stitching %s blob2ds from slide #%s/%s to %s blob2ds from slide #%s/%s' % (len(slide.blob2dlist), slide_num + 1,
                len(slidelist), len(slidelist[slide_num+1].blob2dlist), str(slide_num + 2), len(slidelist)), end=' ')

            progress = ProgressBar(max_val=len(slide.blob2dlist), increments=20,
                                   symbol='.')  # Note actually more responsive to do based on blob than # of pixels, due to using only a subset to stitch
            for b_num, blob1 in enumerate(slide.blob2dlist):
                blob1 = Blob2d.get(blob1)
                if len(blob1.possible_partners) > 0:
                    if debug:
                        printl('  Starting on a new blob from bloblist:' + str(blob1) + ' which has:' + str(
                            len(blob1.possible_partners)) + ' possible partners')
                for b2_num, blob2 in enumerate(blob1.possible_partners):
                    blob2 = Blob2d.get(blob2)
                    if debug:
                        printl('   Comparing to blob2:' + str(blob2))
                    new_stitch = Pairing(blob1.id, blob2.id, 1.1, 36, quiet=quiet) # TODO use this to assign ids to pairings
                progress.update(b_num, set_val=True)

            if quiet and not debug:
                progress.finish()
                print_elapsed_time(t_start_stitching_this_slide, time.time(), prefix='took')
        print_elapsed_time(t_start_stitching, time.time(), prefix='Stitching all slides took', endline=False)
        printl(' total')
Beispiel #3
0
 def set_all_shape_contexts(slidelist):
     # Note Use the shape contexts approach from here: http://www.cs.berkeley.edu/~malik/papers/mori-belongie-malik-pami05.pdf
     # Note The paper uses 'Representative Shape Contexts' to do inital matching; I will do away with this in favor of checking bounds for possible overlaps
     t0 = time.time()
     pb = ProgressBar(max_val=sum(
         len(Blob2d.get(b2d).edge_pixels) for slide in slidelist
         for b2d in slide.blob2dlist))
     for slide in slidelist:
         for blob in slide.blob2dlist:
             Blob2d.get(blob).set_shape_contexts(36)
             pb.update(len(Blob2d.get(blob).edge_pixels), set_val=False)
     pb.finish()
     print_elapsed_time(t0, time.time(), prefix='took')
Beispiel #4
0
def save(blob3dlist, filename, directory=Config.PICKLEDIR):
    slash = ''
    if directory != '':
        if directory[-1] not in ['/', '\\']:
            slash = '/'
    filename = directory + slash + filename
    printl('\nSaving to file \'' + str(filename) + str('\''))
    done = False
    while not done:
        try:
            printl('Pickling ' + str(len(blob3dlist)) + ' b3ds ', end='')
            t0 = t = time.time()
            pickle.dump(
                {
                    'b3ds': Blob3d.all,
                    'possible_merges': Blob3d.possible_merges,
                    "merged_b3ds": Blob3d.lists_of_merged_blob3ds,
                    "merged_parent_b3ds": Blob3d.list_of_merged_blob3d_parents
                },
                open(filename + '_b3ds', "wb"),
                protocol=0)
            print_elapsed_time(t, time.time(), prefix='took')

            printl('Pickling ' + str(len(Blob2d.all)) + ' b2ds ', end='')
            t = time.time()
            pickle.dump({
                'b2ds': Blob2d.all,
                'used_ids': Blob2d.used_ids
            },
                        open(filename + '_b2ds', "wb"),
                        protocol=0)
            print_elapsed_time(t, time.time(), prefix='took')

            printl('Pickling ' + str(len(Pixel.all)) + ' pixels ', end='')
            t = time.time()
            pickle.dump(
                {
                    'pixels': Pixel.all,
                    'total_pixels': Pixel.total_pixels
                },
                open(filename + '_pixels', "wb"),
                protocol=0)
            print_elapsed_time(t, time.time(), prefix='took')
            done = True

            printl('Saving took:', end='')
            print_elapsed_time(t0, time.time(), prefix='')
        except RuntimeError:
            printl(
                '\nIf recursion depth has been exceeded, '
                'you may increase the maximal depth with: sys.setrecursionlimit(<newdepth>)'
            )
            printl('The current max recursion depth is: ' +
                   str(sys.getrecursionlimit()))
            printl(
                'Opening up an interactive console, press \'n\' then \'enter\' to load variables before interacting,'
                ' and enter \'exit\' to resume execution')
            debug()
            pass
    log.flush()
Beispiel #5
0
 def assign_alive_pixels_to_blob2dlist(self, quiet=False):
     self.assignPixelsToIds(
         self.alive_pixels)  # Note only printing when primary slide
     id_lists = pixels_to_id_lists(self.alive_pixels)
     self.blob2dlist = [
     ]  # Note that blobs in the blob list are ordered by number of pixels, not id, this makes merging faster
     for (blobnum, blobslist) in enumerate(id_lists):
         newb2d = Blob2d(blobslist, self.height)
         self.blob2dlist.append(newb2d.id)
     if not quiet:
         printl('There were ' + str(len(self.alive_pixels)) +
                ' alive pixels assigned to ' + str(len(self.blob2dlist)) +
                ' blobs.')
         self.tf = time.time()
         printl('Creating this slide took', end='')
         print_elapsed_time(self.t0, self.tf, prefix='')
         printl('')
Beispiel #6
0
def main():
    util.start_timetest()
    util.print_elapsed_time()
    util.printlog('abc')
    util.sleep(0.1)
    util.printlog()
    util.printlog('xyz')
    util.sleep(0.2)
    util.printlog('1234567890')
    util.sleep(3)
    util.printlog()
    util.print_elapsed_time()
    util.sleep(1)
    util.print_elapsed_time('Elapsed: ')
    util.sleep(0.5)
    util.print_elapsed_time(suffix=' has passed')
    util.sleep(0.5)
    util.print_elapsed_time('It took ', ' to process')
    util.printlog('Done')
def main():
    if len(sys.argv) == 4:
        start_time_program = time.time()

        messages_filepath, categories_filepath, database_filepath = sys.argv[1:]
        print()

        start_time = time.time()
        print('Loading data...\n    MESSAGES: {}\n    CATEGORIES: {}'
              .format(messages_filepath, categories_filepath))
        df = load_data(messages_filepath, categories_filepath)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Cleaning data...')
        df = clean_data(df)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Saving data...\n    DATABASE: {}'.format(database_filepath))
        save_data(df, database_filepath)
        util.print_elapsed_time(start_time, time.time())

        print()
        print('Cleaned data saved to database!')
        util.print_elapsed_time(start_time_program, time.time(), prompt="Total execution time")
    
    else:
        print("""
Invalid number of arguments. Please respect the usage message.
        
Usage: python {} MESSAGES CATEGORIES DATABASE

    MESSAGES:   Message dataset (.csv-file, input)
    CATEGORIES: Categories dataset (.csv-file, input)
    DATABASE:   Database to store the cleaned messages (.sqlite-file, output)
    
Example: python process_data.py disaster_messages.csv disaster_categories.csv DisasterResponse.sqlite
""".format(sys.argv[0]))
Beispiel #8
0
def load(filename, directory=Config.PICKLEDIR):
    if directory[-1] not in ['/', '\\']:
        slash = '/'
    else:
        slash = ''
    filename = directory + slash + filename
    t_start = time.time()
    printl('Loading from file \'' + str(filename) + str('\''))

    printl('Loading b3ds ', end='', flush=True)
    t = time.time()

    buff = pickle.load(open(filename + '_b3ds', "rb"))
    Blob3d.all = buff['b3ds']

    found_merged_b3ds = False
    if "merged_b3ds" in buff:
        Blob3d.lists_of_merged_blob3ds = buff["merged_b3ds"]
        found_merged_b3ds = True
    found_merged_parents = False
    if "merged_parent_b3ds" in buff:
        Blob3d.list_of_merged_blob3d_parents = buff["merged_parent_b3ds"]
        found_merged_parents = True

    Blob3d.next_id = max(b3d.id for b3d in Blob3d.all.values()) + 1
    print_elapsed_time(t,
                       time.time(),
                       prefix='(' + str(len(Blob3d.all)) + ') took',
                       flush=True)
    if not found_merged_b3ds:
        print(
            " No lists of merged b3ds found, likely a legacy pickle file or small dataset"
        )
    if not found_merged_parents:
        print(
            " No lists of merged b3 parents found, likely a legacy pickle file or small dataset"
        )

    printl('Loading b2ds ', end='', flush=True)
    t = time.time()
    buff = pickle.load(open(filename + '_b2ds', "rb"))
    Blob2d.all = buff['b2ds']
    Blob2d.used_ids = buff['used_ids']
    Blob2d.total_blobs = len(Blob2d.all)
    print_elapsed_time(t,
                       time.time(),
                       prefix='(' + str(len(Blob2d.all)) + ') took',
                       flush=True)

    printl('Loading pixels ', end='', flush=True)
    t = time.time()
    buff = pickle.load(open(filename + '_pixels', "rb"))
    Pixel.all = buff['pixels']
    Pixel.total_pixels = len(Pixel.all)
    print_elapsed_time(t,
                       time.time(),
                       prefix='(' + str(len(Pixel.all)) + ') took',
                       flush=True)

    printl('Total time to load:', end='')
    print_elapsed_time(t_start, time.time(), prefix='')
Beispiel #9
0
def bloom_b3ds(blob3dlist, stitch=False):
    allb2ds = [Blob2d.get(b2d) for b3d in blob3dlist for b2d in b3d.blob2ds]
    printl('\nProcessing internals of ' + str(len(allb2ds)) +
           ' 2d blobs via \'blooming\' ',
           end='')
    t_start_bloom = time.time()
    num_unbloomed = len(allb2ds)
    pb = ProgressBar(max_val=sum(len(b2d.pixels) for b2d in allb2ds),
                     increments=50)
    for bnum, blob2d in enumerate(allb2ds):
        blob2d.gen_internal_blob2ds(
        )  # NOTE will have len 0 if no blooming can be done
        pb.update(len(blob2d.pixels), set_val=False
                  )  # set is false so that we add to an internal counter
    pb.finish()

    print_elapsed_time(t_start_bloom, time.time(), prefix='took')
    printl('Before blooming there were: ' + str(num_unbloomed) +
           ' b2ds contained within b3ds, there are now ' +
           str(len(Blob2d.all)))

    # Setting possible_partners
    printl(
        'Pairing all new blob2ds with their potential partners in adjacent slides'
    )
    max_avail_depth = max(b2d.recursive_depth for b2d in Blob2d.all.values())
    for cur_depth in range(max_avail_depth)[1:]:  # Skip those at depth 0
        depth = [
            b2d.id for b2d in Blob2d.all.values()
            if b2d.recursive_depth == cur_depth
        ]
        max_h_d = max(Blob2d.all[b2d].height for b2d in depth)
        min_h_d = min(Blob2d.all[b2d].height for b2d in depth)
        ids_by_height = [[] for _ in range(max_h_d - min_h_d + 1)]
        for b2d in depth:
            ids_by_height[Blob2d.get(b2d).height - min_h_d].append(b2d)
        for height_val, h in enumerate(
                ids_by_height[:-1]):  # All but the last one
            for b2d in h:
                b2d = Blob2d.all[b2d]
                b2d.set_possible_partners(ids_by_height[height_val + 1])

    # Creating b3ds
    printl('Creating 3d blobs from the generated 2d blobs')
    all_new_b3ds = []
    for depth_offset in range(
            max_avail_depth + 1
    )[1:]:  # Skip offset of zero, which refers to the b3ds which have already been stitched
        printd('Depth_offset: ' + str(depth_offset), Config.debug_blooming)
        new_b3ds = []
        for b3d in blob3dlist:
            all_d1_with_pp_in_this_b3d = []
            for b2d in b3d.blob2ds:
                # Note this is the alternative to storing b3dID with b2ds
                b2d = Blob2d.get(b2d)
                d_1 = [
                    blob for blob in b2d.getdescendants()
                    if blob.recursive_depth == b2d.recursive_depth +
                    depth_offset
                ]
                if len(d_1):
                    for desc in d_1:
                        if len(desc.possible_partners):
                            all_d1_with_pp_in_this_b3d.append(desc.id)
            all_d1_with_pp_in_this_b3d = set(all_d1_with_pp_in_this_b3d)
            if len(all_d1_with_pp_in_this_b3d) != 0:
                printd(' Working on b3d: ' + str(b3d), Config.debug_blooming)
                printd(
                    '  Len of all_d1_with_pp: ' +
                    str(len(all_d1_with_pp_in_this_b3d)),
                    Config.debug_blooming)
                printd('  They are: ' + str(all_d1_with_pp_in_this_b3d),
                       Config.debug_blooming)
                printd(
                    '   = ' + str(
                        list(
                            Blob2d.get(b2d)
                            for b2d in all_d1_with_pp_in_this_b3d)),
                    Config.debug_blooming)
            for b2d in all_d1_with_pp_in_this_b3d:
                b2d = Blob2d.get(b2d)
                printd(
                    '    Working on b2d: ' + str(b2d) + ' with pp: ' +
                    str(b2d.possible_partners), Config.debug_blooming)
                if b2d.b3did == -1:  # unset
                    cur_matches = [
                        b2d
                    ]  # NOTE THIS WAS CHANGED BY REMOVED .getdescendants() #HACK
                    for pp in b2d.possible_partners:
                        printd(
                            "     *Checking if pp:" + str(pp) +
                            ' is in all_d1: ' +
                            str(all_d1_with_pp_in_this_b3d),
                            Config.debug_blooming)
                        if pp in all_d1_with_pp_in_this_b3d:  # HACK REMOVED
                            printd("     Added partner: " + str(pp),
                                   Config.debug_blooming)
                            cur_matches += [
                                Blob2d.get(b)
                                for b in Blob2d.get(pp).getpartnerschain()
                            ]
                    if len(cur_matches) > 1:
                        printd("**LEN OF CUR_MATCHES MORE THAN 1",
                               Config.debug_blooming)
                        new_b3d_list = [
                            blob.id for blob in set(cur_matches)
                            if blob.recursive_depth == b2d.recursive_depth
                            and blob.b3did == -1
                        ]
                        if len(new_b3d_list):
                            new_b3ds.append(
                                Blob3d(new_b3d_list,
                                       r_depth=b2d.recursive_depth))
        all_new_b3ds += new_b3ds
    printl(' Made a total of ' + str(len(all_new_b3ds)) + ' new b3ds')

    if stitch:
        # Set up shape contexts
        printl('Setting shape contexts for stitching')
        for b2d in [
                Blob2d.all[b2d] for b3d in all_new_b3ds for b2d in b3d.blob2ds
        ]:
            b2d.set_shape_contexts(36)

        # Stitching
        printl('Stitching the newly generated 2d blobs')
        for b3d_num, b3d in enumerate(all_new_b3ds):
            printl(' Working on b3d: ' + str(b3d_num) + ' / ' +
                   str(len(all_new_b3ds)))
            Pairing.stitch_blob2ds(b3d.blob2ds, debug=False)
    return all_new_b3ds
Beispiel #10
0
def main():
    if len(sys.argv) == 3:
        start_time_program = time.time()
        database_filepath, model_filepath = sys.argv[1:]
        print()

        start_time = time.time()
        print('Loading data...\n    DATABASE: {}'.format(database_filepath))
        X, Y, category_names = load_data(database_filepath)
        X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.2,
                                                            random_state=42)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Building model...')
        model = build_model(grid_search=True)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Training model...')
        model = train_model(model, X_train, Y_train)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Evaluating model...')
        model = evaluate_model(model, X_test, Y_test, category_names)
        util.print_elapsed_time(start_time, time.time())

        start_time = time.time()
        print('Saving model...\n    MODEL: {}'.format(model_filepath))
        save_model(model, model_filepath)
        util.print_elapsed_time(start_time, time.time())

        print()
        print('Trained model saved!')
        util.print_elapsed_time(start_time_program,
                                time.time(),
                                prompt="Total execution time")

    else:
        print("""
Invalid number of arguments. Please respect the usage message.
        
Usage: python {} DATABASE CLASSIFIER

    DATABASE:   Database containing cleaned messages and categories (.sqlite-file, input)
    CLASSIFIER: File to store the trained classifier (.pickle-file, output)
    
Example: python train_classifier.py DisasterResponse.sqlite classifier.pickle
""".format(sys.argv[0]))