def preprocess_visual_features_for_jsons(metajson_files, downloaded_images_dir=None): for metajson_file in metajson_files: metadata = parse_json_file(metajson_file) if metadata == None: print "Could not read json file %s" % metajson_file continue print_status("ID: " + metadata["id"] + " File name: " + metajson_file + "\n") if metadata["stat"] == "ok": try: if not downloaded_images_dir == None: image_filename = metajson_file.split(os.sep)[-1].replace('.json', '.jpg') image_path = downloaded_images_dir + os.sep + image_filename image = Image(image_path).toHSV() else: url = get_small_image_url(metadata) image = Image(url).toHSV() except Exception: print "Could not get image:", metadata["id"] continue visual_data = {} visual_data = extract_colors(image, visual_data, 5) visual_data = extract_edges(image, visual_data, 5) file_name_for_visual_metadata = metajson_file.replace('.json', '_visual.json') write_json_file(visual_data, file_name_for_visual_metadata) else: print "Status was not ok:", metadata["id"]
def extract_features(image_cluster, metadata_dir): images = [] for metajson_file, _ in image_cluster: relative_path_to_json = construct_path_to_json(metajson_file) full_path_to_json = metadata_dir + relative_path_to_json metadata = parse_json_file(full_path_to_json) if metadata == None: continue if metadata["stat"] == "ok": data = {} url = get_small_image_url(metadata) data["image_id"] = metadata["id"] data["file_path"] = metajson_file data["url"] = url try: image = Image(url).toHSV() except Exception: continue data = extract_colors(image, data, 5) data = extract_edges(image, data, 5) images.append(data) return images