def dataToSlides(stitch=True): t_gen_slides_0 = time.time() all_images = get_images() all_slides = [] for imagefile in all_images: all_slides.append( Slide(imagefile) ) # Pixel computations are done here, as the slide is created. printl('Total # of non-zero pixels: ' + str(Pixel.total_pixels) + ', total number of pixels after filtering: ' + str(len(Pixel.all))) printl('Total # of blob2ds: ' + str(len(Blob2d.all))) printl('Generating ' + str(len(all_slides)) + ' slides took', end='') print_elapsed_time(t_gen_slides_0, time.time(), prefix='') printl( "Pairing all blob2ds with their potential partners in adjacent slides", flush=True) Slide.set_possible_partners(all_slides) if stitch: printl('Setting shape contexts for all blob2ds ', flush=True, end="") Slide.set_all_shape_contexts(all_slides) stitchlist = Pairing.stitchAllBlobs( all_slides, debug=False ) # TODO change this to work with a list of ids or blob2ds else: printl( '\n-> Skipping stitching the slides, this will result in less accurate blob3ds for the time being' ) blob3dlist = Slide.extract_blob3ds(all_slides, stitched=stitch) printl('There are a total of ' + str(len(blob3dlist)) + ' blob3ds') return all_slides, blob3dlist # Returns slides and all their blob3ds in a list
def stitchAllBlobs(slidelist, quiet=True, debug=False): t_start_stitching = time.time() printl('') for slide_num, slide in enumerate(slidelist[:-1]): # Skipping last slide, because pairing go from lower slide to upper slide, so it's already processed with the second to last slide # IE blob2ds in the last slide are partners to the previous slide's blob2ds, and have no direct possible partners of their own t_start_stitching_this_slide = time.time() printl('Stitching %s blob2ds from slide #%s/%s to %s blob2ds from slide #%s/%s' % (len(slide.blob2dlist), slide_num + 1, len(slidelist), len(slidelist[slide_num+1].blob2dlist), str(slide_num + 2), len(slidelist)), end=' ') progress = ProgressBar(max_val=len(slide.blob2dlist), increments=20, symbol='.') # Note actually more responsive to do based on blob than # of pixels, due to using only a subset to stitch for b_num, blob1 in enumerate(slide.blob2dlist): blob1 = Blob2d.get(blob1) if len(blob1.possible_partners) > 0: if debug: printl(' Starting on a new blob from bloblist:' + str(blob1) + ' which has:' + str( len(blob1.possible_partners)) + ' possible partners') for b2_num, blob2 in enumerate(blob1.possible_partners): blob2 = Blob2d.get(blob2) if debug: printl(' Comparing to blob2:' + str(blob2)) new_stitch = Pairing(blob1.id, blob2.id, 1.1, 36, quiet=quiet) # TODO use this to assign ids to pairings progress.update(b_num, set_val=True) if quiet and not debug: progress.finish() print_elapsed_time(t_start_stitching_this_slide, time.time(), prefix='took') print_elapsed_time(t_start_stitching, time.time(), prefix='Stitching all slides took', endline=False) printl(' total')
def set_all_shape_contexts(slidelist): # Note Use the shape contexts approach from here: http://www.cs.berkeley.edu/~malik/papers/mori-belongie-malik-pami05.pdf # Note The paper uses 'Representative Shape Contexts' to do inital matching; I will do away with this in favor of checking bounds for possible overlaps t0 = time.time() pb = ProgressBar(max_val=sum( len(Blob2d.get(b2d).edge_pixels) for slide in slidelist for b2d in slide.blob2dlist)) for slide in slidelist: for blob in slide.blob2dlist: Blob2d.get(blob).set_shape_contexts(36) pb.update(len(Blob2d.get(blob).edge_pixels), set_val=False) pb.finish() print_elapsed_time(t0, time.time(), prefix='took')
def save(blob3dlist, filename, directory=Config.PICKLEDIR): slash = '' if directory != '': if directory[-1] not in ['/', '\\']: slash = '/' filename = directory + slash + filename printl('\nSaving to file \'' + str(filename) + str('\'')) done = False while not done: try: printl('Pickling ' + str(len(blob3dlist)) + ' b3ds ', end='') t0 = t = time.time() pickle.dump( { 'b3ds': Blob3d.all, 'possible_merges': Blob3d.possible_merges, "merged_b3ds": Blob3d.lists_of_merged_blob3ds, "merged_parent_b3ds": Blob3d.list_of_merged_blob3d_parents }, open(filename + '_b3ds', "wb"), protocol=0) print_elapsed_time(t, time.time(), prefix='took') printl('Pickling ' + str(len(Blob2d.all)) + ' b2ds ', end='') t = time.time() pickle.dump({ 'b2ds': Blob2d.all, 'used_ids': Blob2d.used_ids }, open(filename + '_b2ds', "wb"), protocol=0) print_elapsed_time(t, time.time(), prefix='took') printl('Pickling ' + str(len(Pixel.all)) + ' pixels ', end='') t = time.time() pickle.dump( { 'pixels': Pixel.all, 'total_pixels': Pixel.total_pixels }, open(filename + '_pixels', "wb"), protocol=0) print_elapsed_time(t, time.time(), prefix='took') done = True printl('Saving took:', end='') print_elapsed_time(t0, time.time(), prefix='') except RuntimeError: printl( '\nIf recursion depth has been exceeded, ' 'you may increase the maximal depth with: sys.setrecursionlimit(<newdepth>)' ) printl('The current max recursion depth is: ' + str(sys.getrecursionlimit())) printl( 'Opening up an interactive console, press \'n\' then \'enter\' to load variables before interacting,' ' and enter \'exit\' to resume execution') debug() pass log.flush()
def assign_alive_pixels_to_blob2dlist(self, quiet=False): self.assignPixelsToIds( self.alive_pixels) # Note only printing when primary slide id_lists = pixels_to_id_lists(self.alive_pixels) self.blob2dlist = [ ] # Note that blobs in the blob list are ordered by number of pixels, not id, this makes merging faster for (blobnum, blobslist) in enumerate(id_lists): newb2d = Blob2d(blobslist, self.height) self.blob2dlist.append(newb2d.id) if not quiet: printl('There were ' + str(len(self.alive_pixels)) + ' alive pixels assigned to ' + str(len(self.blob2dlist)) + ' blobs.') self.tf = time.time() printl('Creating this slide took', end='') print_elapsed_time(self.t0, self.tf, prefix='') printl('')
def main(): util.start_timetest() util.print_elapsed_time() util.printlog('abc') util.sleep(0.1) util.printlog() util.printlog('xyz') util.sleep(0.2) util.printlog('1234567890') util.sleep(3) util.printlog() util.print_elapsed_time() util.sleep(1) util.print_elapsed_time('Elapsed: ') util.sleep(0.5) util.print_elapsed_time(suffix=' has passed') util.sleep(0.5) util.print_elapsed_time('It took ', ' to process') util.printlog('Done')
def main(): if len(sys.argv) == 4: start_time_program = time.time() messages_filepath, categories_filepath, database_filepath = sys.argv[1:] print() start_time = time.time() print('Loading data...\n MESSAGES: {}\n CATEGORIES: {}' .format(messages_filepath, categories_filepath)) df = load_data(messages_filepath, categories_filepath) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Cleaning data...') df = clean_data(df) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Saving data...\n DATABASE: {}'.format(database_filepath)) save_data(df, database_filepath) util.print_elapsed_time(start_time, time.time()) print() print('Cleaned data saved to database!') util.print_elapsed_time(start_time_program, time.time(), prompt="Total execution time") else: print(""" Invalid number of arguments. Please respect the usage message. Usage: python {} MESSAGES CATEGORIES DATABASE MESSAGES: Message dataset (.csv-file, input) CATEGORIES: Categories dataset (.csv-file, input) DATABASE: Database to store the cleaned messages (.sqlite-file, output) Example: python process_data.py disaster_messages.csv disaster_categories.csv DisasterResponse.sqlite """.format(sys.argv[0]))
def load(filename, directory=Config.PICKLEDIR): if directory[-1] not in ['/', '\\']: slash = '/' else: slash = '' filename = directory + slash + filename t_start = time.time() printl('Loading from file \'' + str(filename) + str('\'')) printl('Loading b3ds ', end='', flush=True) t = time.time() buff = pickle.load(open(filename + '_b3ds', "rb")) Blob3d.all = buff['b3ds'] found_merged_b3ds = False if "merged_b3ds" in buff: Blob3d.lists_of_merged_blob3ds = buff["merged_b3ds"] found_merged_b3ds = True found_merged_parents = False if "merged_parent_b3ds" in buff: Blob3d.list_of_merged_blob3d_parents = buff["merged_parent_b3ds"] found_merged_parents = True Blob3d.next_id = max(b3d.id for b3d in Blob3d.all.values()) + 1 print_elapsed_time(t, time.time(), prefix='(' + str(len(Blob3d.all)) + ') took', flush=True) if not found_merged_b3ds: print( " No lists of merged b3ds found, likely a legacy pickle file or small dataset" ) if not found_merged_parents: print( " No lists of merged b3 parents found, likely a legacy pickle file or small dataset" ) printl('Loading b2ds ', end='', flush=True) t = time.time() buff = pickle.load(open(filename + '_b2ds', "rb")) Blob2d.all = buff['b2ds'] Blob2d.used_ids = buff['used_ids'] Blob2d.total_blobs = len(Blob2d.all) print_elapsed_time(t, time.time(), prefix='(' + str(len(Blob2d.all)) + ') took', flush=True) printl('Loading pixels ', end='', flush=True) t = time.time() buff = pickle.load(open(filename + '_pixels', "rb")) Pixel.all = buff['pixels'] Pixel.total_pixels = len(Pixel.all) print_elapsed_time(t, time.time(), prefix='(' + str(len(Pixel.all)) + ') took', flush=True) printl('Total time to load:', end='') print_elapsed_time(t_start, time.time(), prefix='')
def bloom_b3ds(blob3dlist, stitch=False): allb2ds = [Blob2d.get(b2d) for b3d in blob3dlist for b2d in b3d.blob2ds] printl('\nProcessing internals of ' + str(len(allb2ds)) + ' 2d blobs via \'blooming\' ', end='') t_start_bloom = time.time() num_unbloomed = len(allb2ds) pb = ProgressBar(max_val=sum(len(b2d.pixels) for b2d in allb2ds), increments=50) for bnum, blob2d in enumerate(allb2ds): blob2d.gen_internal_blob2ds( ) # NOTE will have len 0 if no blooming can be done pb.update(len(blob2d.pixels), set_val=False ) # set is false so that we add to an internal counter pb.finish() print_elapsed_time(t_start_bloom, time.time(), prefix='took') printl('Before blooming there were: ' + str(num_unbloomed) + ' b2ds contained within b3ds, there are now ' + str(len(Blob2d.all))) # Setting possible_partners printl( 'Pairing all new blob2ds with their potential partners in adjacent slides' ) max_avail_depth = max(b2d.recursive_depth for b2d in Blob2d.all.values()) for cur_depth in range(max_avail_depth)[1:]: # Skip those at depth 0 depth = [ b2d.id for b2d in Blob2d.all.values() if b2d.recursive_depth == cur_depth ] max_h_d = max(Blob2d.all[b2d].height for b2d in depth) min_h_d = min(Blob2d.all[b2d].height for b2d in depth) ids_by_height = [[] for _ in range(max_h_d - min_h_d + 1)] for b2d in depth: ids_by_height[Blob2d.get(b2d).height - min_h_d].append(b2d) for height_val, h in enumerate( ids_by_height[:-1]): # All but the last one for b2d in h: b2d = Blob2d.all[b2d] b2d.set_possible_partners(ids_by_height[height_val + 1]) # Creating b3ds printl('Creating 3d blobs from the generated 2d blobs') all_new_b3ds = [] for depth_offset in range( max_avail_depth + 1 )[1:]: # Skip offset of zero, which refers to the b3ds which have already been stitched printd('Depth_offset: ' + str(depth_offset), Config.debug_blooming) new_b3ds = [] for b3d in blob3dlist: all_d1_with_pp_in_this_b3d = [] for b2d in b3d.blob2ds: # Note this is the alternative to storing b3dID with b2ds b2d = Blob2d.get(b2d) d_1 = [ blob for blob in b2d.getdescendants() if blob.recursive_depth == b2d.recursive_depth + depth_offset ] if len(d_1): for desc in d_1: if len(desc.possible_partners): all_d1_with_pp_in_this_b3d.append(desc.id) all_d1_with_pp_in_this_b3d = set(all_d1_with_pp_in_this_b3d) if len(all_d1_with_pp_in_this_b3d) != 0: printd(' Working on b3d: ' + str(b3d), Config.debug_blooming) printd( ' Len of all_d1_with_pp: ' + str(len(all_d1_with_pp_in_this_b3d)), Config.debug_blooming) printd(' They are: ' + str(all_d1_with_pp_in_this_b3d), Config.debug_blooming) printd( ' = ' + str( list( Blob2d.get(b2d) for b2d in all_d1_with_pp_in_this_b3d)), Config.debug_blooming) for b2d in all_d1_with_pp_in_this_b3d: b2d = Blob2d.get(b2d) printd( ' Working on b2d: ' + str(b2d) + ' with pp: ' + str(b2d.possible_partners), Config.debug_blooming) if b2d.b3did == -1: # unset cur_matches = [ b2d ] # NOTE THIS WAS CHANGED BY REMOVED .getdescendants() #HACK for pp in b2d.possible_partners: printd( " *Checking if pp:" + str(pp) + ' is in all_d1: ' + str(all_d1_with_pp_in_this_b3d), Config.debug_blooming) if pp in all_d1_with_pp_in_this_b3d: # HACK REMOVED printd(" Added partner: " + str(pp), Config.debug_blooming) cur_matches += [ Blob2d.get(b) for b in Blob2d.get(pp).getpartnerschain() ] if len(cur_matches) > 1: printd("**LEN OF CUR_MATCHES MORE THAN 1", Config.debug_blooming) new_b3d_list = [ blob.id for blob in set(cur_matches) if blob.recursive_depth == b2d.recursive_depth and blob.b3did == -1 ] if len(new_b3d_list): new_b3ds.append( Blob3d(new_b3d_list, r_depth=b2d.recursive_depth)) all_new_b3ds += new_b3ds printl(' Made a total of ' + str(len(all_new_b3ds)) + ' new b3ds') if stitch: # Set up shape contexts printl('Setting shape contexts for stitching') for b2d in [ Blob2d.all[b2d] for b3d in all_new_b3ds for b2d in b3d.blob2ds ]: b2d.set_shape_contexts(36) # Stitching printl('Stitching the newly generated 2d blobs') for b3d_num, b3d in enumerate(all_new_b3ds): printl(' Working on b3d: ' + str(b3d_num) + ' / ' + str(len(all_new_b3ds))) Pairing.stitch_blob2ds(b3d.blob2ds, debug=False) return all_new_b3ds
def main(): if len(sys.argv) == 3: start_time_program = time.time() database_filepath, model_filepath = sys.argv[1:] print() start_time = time.time() print('Loading data...\n DATABASE: {}'.format(database_filepath)) X, Y, category_names = load_data(database_filepath) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Building model...') model = build_model(grid_search=True) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Training model...') model = train_model(model, X_train, Y_train) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Evaluating model...') model = evaluate_model(model, X_test, Y_test, category_names) util.print_elapsed_time(start_time, time.time()) start_time = time.time() print('Saving model...\n MODEL: {}'.format(model_filepath)) save_model(model, model_filepath) util.print_elapsed_time(start_time, time.time()) print() print('Trained model saved!') util.print_elapsed_time(start_time_program, time.time(), prompt="Total execution time") else: print(""" Invalid number of arguments. Please respect the usage message. Usage: python {} DATABASE CLASSIFIER DATABASE: Database containing cleaned messages and categories (.sqlite-file, input) CLASSIFIER: File to store the trained classifier (.pickle-file, output) Example: python train_classifier.py DisasterResponse.sqlite classifier.pickle """.format(sys.argv[0]))