def load_data(fn: str): """ Load data from prepared working set in JSON from ``fn`` file. For each image: 1. Load from JSON 2. Convert to grayscale :param fn: JSON file with :return: two tables in dict: ``id_array`` with hit ID's and `bitmap_array`` with array of gray bitmaps """ id_array = [] bitmap_array = [] stored = {} def load_parser(obj: dict, count: int, ret: List[dict]) -> Optional[bool]: progress_load_filter(obj, count, ret) load_image(obj, False) st = '%03d' % ((count - 1) // 1000) stored[obj[ID]] = st store_png(OUTPUT_DIR, ['images', st], str(obj[ID]), obj[FRAME_CONTENT]) id_array.append(obj[ID]) bitmap_array.append([obj[GRAY]]) return False load_json(fn, load_parser) return { 'id_array': id_array, 'bitmap_array': bitmap_array, 'stored': stored }
def div_per_parts(): files = glob.glob('%s/*.json' % OUTPUT_DIR) files = sorted(files) for fn in files: load_json(fn, part_write) if len(part) > 0: write_part_and_clean()
def measure_angle(fn: str): hits, count, errors = load_json('../../data/%s' % fn, progress_and_process_image) for h in hits: nkg_mark_hit_area(h) nkg_make_track(h, scale=1, downscale=False, skeleton_method='zhang') angle = 0 if h.get(NKG_PATH): angles = analyse_path(h.get(NKG_PATH), cut_first=1, cut_latest=1) if len(angles): angle = angles[0] if math.isnan(angle): angle = 0 store_png('/tmp/credo', [fn], '%03d_%s' % (abs(angle), str(h.get(ID))), h.get(IMAGE))
def run_file(fn): log_prefix = '%s: ' % str(threading.get_ident()) fn_name = fn[len(INPUT_DIR) + 1:] print('%sStart file: %s' % (log_prefix, fn_name)) fn_load = time.time() # load and analyse detections, count, errors = load_json(fn, load_parser) print('%s ... droped by non image: %d' % (log_prefix, count - len(detections))) if len(errors): print('%s ... errors in: %s' % (log_prefix, fn)) lp = 0 for error in errors: lp += 1 with open('%s/%s-%06d.txt' % (ERROR_DIR, fn_name, lp), 'w') as f: f.write(error) start_analyze(detections, log_prefix) # found IDs of goods leave_good = [] for d in detections: if d.get(CLASSIFIED) != CLASS_ARTIFACT: if CLASSIFIED in d.keys(): del d[CLASSIFIED] if ARTIFACT_NEAR_HOT_PIXEL2 in d.keys(): del d[ARTIFACT_NEAR_HOT_PIXEL2] if ARTIFACT_TOO_OFTEN in d.keys(): del d[ARTIFACT_TOO_OFTEN] if IMAGE in d.keys(): d[CROP_WIDTH] = d[IMAGE].size[0] d[CROP_HEIGHT] = d[IMAGE].size[1] del d[IMAGE] leave_good.append(d) # load again and save as fn_out = '%s/%s' % (OUTPUT_DIR, fn_name) write_detections(leave_good, fn_out) print('%s file %s done, since start: %03ds, hits with images: %d, dropped: %d, leaved: %d' % (log_prefix, fn_name, time.time() - fn_load, count, count - len(leave_good), len(leave_good))) if not DEBUG: os.rename(fn, '%s/%s' % (PASSED_DIR, fn_name)) return len(leave_good)
def run_file(fn): log_prefix = '%s: ' % str(threading.get_ident()) fn_name = fn[len(INPUT_DIR) + 1:] print('%sStart file: %s' % (log_prefix, fn_name)) fn_load = time.time() # load and analyse detections, count, errors = load_json(fn, load_parser) print('%s ... droped by non image: %d' % (log_prefix, count - len(detections))) if len(errors): print('%s ... errors in: %s' % (log_prefix, fn)) lp = 0 for error in errors: lp += 1 with open('%s/%s-%06d.txt' % (ERROR_DIR, fn_name, lp), 'w') as f: f.write(error) start_analyze(detections, log_prefix) for d in detections: if IMAGE in d.keys(): del d[IMAGE] # load again and save as fn_out = '%s/%s' % (OUTPUT_DIR, fn_name) write_detections(detections, fn_out) print( '%s file %s done, since start: %03ds, hits with images: %d, dropped: %d, leaved: %d' % (log_prefix, fn_name, time.time() - fn_load, count, count - len(detections), len(detections))) if not DEBUG: os.rename(fn, '%s/%s' % (PASSED_DIR, fn_name)) return len(detections)
def main(): def load_parser(obj: dict, count: int, ret: List[dict]) -> Optional[bool]: progress_load_filter(obj, count, ret) load_image(obj, False) return True objs, count, errors = load_json(WORKING_SET, load_parser) # config data source, please uncomment and use one from both spots = [] tracks = [] worms = [] others = [] all = [] multi = [] for d in objs: astropy_measures(d) d.pop(GRAY) d.pop(FRAME_DECODED) if d[ASTROPY_FOUND] == 1: all.append(d) if d[ASTROPY_ELLIPTICITY][0] < 0.1 and d[ASTROPY_SOLIDITY][0] > 0.8: spots.append(d) elif d[ASTROPY_ELLIPTICITY][0] > 0.8 and d[ASTROPY_SOLIDITY][ 0] > 0.8: tracks.append(d) elif d[ASTROPY_SOLIDITY][0] < 0.8: worms.append(d) else: others.append(d) elif d[ASTROPY_FOUND] > 1: multi.append(d) def store_pngs(arr, subdir): for a in arr: store_png( OUTPUT_DIR, [subdir, 'by_solidity'], '%.3f_%.3f_%d' % (a[ASTROPY_SOLIDITY][0], a[ASTROPY_ELLIPTICITY][0], a[ID]), a[IMAGE]) store_png( OUTPUT_DIR, [subdir, 'by_ellipticity'], '%.3f_%.3f_%d' % (a[ASTROPY_ELLIPTICITY][0], a[ASTROPY_SOLIDITY][0], a[ID]), a[IMAGE]) s = int(a[ASTROPY_SOLIDITY][0] * 5) * 2 e = int(a[ASTROPY_ELLIPTICITY][0] * 5) * 2 store_png(OUTPUT_DIR, [ subdir, 'by_matrix_solidity_per_ellipticity', '%02d-%02d_%02d-%02d' % (s, s + 2, e, e + 2) ], '%d' % a[ID], a[IMAGE]) # a.pop(IMAGE) # with open(os.path.join(OUTPUT_DIR, '%s.json' % subdir), 'w') as json_file: # json.dump({'detections': arr}, json_file) store_pngs(spots, 'spots') store_pngs(tracks, 'tracks') store_pngs(worms, 'worms') store_pngs(others, 'others') store_pngs(all, 'all') store_pngs(multi, 'multi')
def prepare_working_set(input_file: Union[str, List[str]], output_file: str = None, exclude_edge: bool = True) -> Iterable[dict]: """ Prepare detections set for working. Function save output as JSON file or return as iterable of detections. The detections are filter by follow rules: 1. Has image (exclude i.e. CosmicWatch detections). 2. Image is not corrupted. 3. Image has size 60px x 60px. 4. (optional) Image has not ``edge`` key. Note: function print progress and result logs to stderr Note: when ``input_file`` (or element in ``input_file`` when is a list) is the ``"-"`` string then input will be read from ``stdin``. Otherwise the file will be open as input text stream. Note: the return is the lazy evaluated iterable object. Because is need to compute. Do when you not use it then not be computed. If you want get true list please use:: list(prepare_working_set(...)) Note: pleas not run in whole CREDO database. Function load all detections before write to RAM. TODO: implement stream writing to JSON to solve this problem :param input_file: file or list of files :param output_file: output file name, when is None then output will not be written :param exclude_edge: exclude image with EDGE key, default: True :return: iterable with filtered object """ objs = [] count = 0 # load detection from file or list of files if isinstance(input_file, list): for fn in input_file: print('Load file: %s' % fn, file=sys.stderr) os, c = load_json(fn, progress_load_filter) objs.extend(os) count += c else: print('Load file: %s' % input_file, file=sys.stderr) objs, count = load_json(input_file, progress_load_filter) # set of ID of detections without non_image_count = 0 corrupted_count = 0 other_size_count = 0 edge_count = 0 # filtered will be save from original JSON objects detections = deepcopy(objs) # make set of detections to save to_save = set() for d in detections: if not d.get(FRAME_CONTENT): non_image_count += 1 continue try: load_image(d) # free memory: clean original base64 frame contend and byte array with decoded it, save only PIL.Image object d.pop(FRAME_CONTENT) d.pop(FRAME_DECODED) if not d.get(CROP_SIZE) == (60, 60): other_size_count += 1 continue if exclude_edge and d.get(EDGE): edge_count += 1 continue to_save.add(d.get(ID)) except Exception as e: print('Fail of load image in object with ID: %d, error: %s' % (d.get(ID), str(e)), file=sys.stderr) corrupted_count += 1 continue # print logs print('The results count: %d and the excluded counts:' % len(to_save), file=sys.stderr) print('- whole detections: %d' % count, file=sys.stderr) print('- non image count: %d' % non_image_count, file=sys.stderr) print('- corrupted image count: %d' % corrupted_count, file=sys.stderr) print('- other size count: %d' % other_size_count, file=sys.stderr) if exclude_edge: print('- detections in edge count: %d' % edge_count, file=sys.stderr) # filter detections for save from original JSON objects objs_to_save = filter(lambda obj: obj.get(ID) in to_save, objs) # (optional) write filtered detections to JSON file if output_file is not None: out = sys.stdout if output_file == '-' else open(output_file, 'w') json.dump({'detections': list(objs_to_save)}, out) if output_file != '-': out.close() print('Saved to: %s' % output_file, file=sys.stderr) # return lazy iterable of detections with additional keys return filter(lambda obj: obj.get(ID) in to_save, detections)
import math from credo_cf import load_json, progress_and_process_image, group_by_id, GRAY, nkg_mark_hit_area, NKG_MASK, nkg_make_track, NKG_PATH, NKG_DIRECTION, \ NKG_DERIVATIVE, ID import matplotlib.pyplot as plt from numpy import unravel_index import numpy as np import itertools from scipy.sparse import csr_matrix from scipy.sparse.dok import dok_matrix from scipy.sparse.csgraph import dijkstra # prepare dataset: hits - JSON's objects, and grays - numpy grayscale images 60x60 objects, count, errors = load_json('../data/manual.json', progress_and_process_image) by_id = group_by_id(objects) used_hits = { 4711435, 6234182, 9152349, 4913621, 5468291, 7097636, 4976474, 5206452, 4876475, 5951007, 4714801, 4819239, 4660572, 4705446, 8280225, 8459656, 8471578, 9124308, 9314789, 4813841 } hits = [] for u in used_hits: hits.append(by_id[u][0]) grays = list(map(lambda x: x['gray'], hits)) # settings used_kernel = [[-1, 0], [0, -1], [1, 0], [0, 1]] used_kernel2 = [[-1, 0], [-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1]]