nrows += 1 work_times.append(work_time) total_work_time += work_time # Save true bitmap GIF and TXT to directory true_txt_url = true_image_url.replace(".gif",".txt") true_image_file = "true_img_{:04d}.gif".format(i) true_txt_file = "true_img_{:04d}.txt".format(i) urllib.urlretrieve (true_image_url, "{}/{}".format(args.output_dir,true_image_file)) urllib.urlretrieve (true_txt_url, "{}/{}".format(args.output_dir,true_txt_file)) # Make GIF from draw events bmpmaker.clear() bmpmaker.process_commands_str(draw_events) bitmap = bmpmaker.bitmap drawn_image_file = "drawn_img_{:04d}.gif".format(i) drawn_image_save_path = "{}/{}".format(args.output_dir,drawn_image_file) imgmaker.save_bitmap(bitmap, drawn_image_save_path) # Make TXT from draw events drawn_txt_file = "drawn_img_{:04d}.txt".format(i) drawn_txt_save_path = "{}/{}".format(args.output_dir,drawn_txt_file) np.savetxt(drawn_txt_save_path, bitmap, fmt='%d') # Save command text and generated events text too commands_txt_file = "written_commands_{:04d}.txt".format(i) events_txt_file = "draw_events_{:04d}.txt".format(i) save_txt_to_file(commands.replace("<br>","\n"), "{}/{}".format(args.output_dir,commands_txt_file))
def write_data(rows, key_attrs, image_idx, commands_idx, actions_idx, output_file, include_bitmaps=True, grid_dims=(25,25)): tokenizer = TreebankWordTokenizer() bmpmaker = BitmapMaker(grid_dims[0],grid_dims[1]) with open(output_file, "w") as fout: objs = [] for i,row in enumerate(rows): if i % 100 == 0: print " {} ".format(i) image_url = row[image_idx] if include_bitmaps: bitmap = retrieve_bitmap(image_url_to_text_bitmap_url(image_url)) else: bitmap = None image_key = image_url_to_key(image_url) commands = tokenize_description(tokenizer, row[commands_idx]) actions = row[actions_idx] abs_seq = AbsoluteEventSequence.from_mturk_string(actions).canonicalize() rel_seq = RelativeEventSequence.from_absolute(abs_seq) cur_seq = CursorEventSequence.from_absolute(abs_seq) abs_str = str(abs_seq) rel_str = str(rel_seq) cur_str = str(cur_seq) raw_txt = actions.replace("\r", "").replace("\n"," ") rel_alignments,rel_align_info = align_strings(commands, rel_str, "relative", "clever", grid_dims[0], backup_using_naive=True) abs_alignments,abs_align_info = align_strings(commands, abs_str, "absolute", "clever", grid_dims[0], backup_using_naive=True) join_list_of_lists = lambda l: " ".join(str(i) for i in itertools.chain(*l)) str_rel_alignments = [(c,join_list_of_lists(r)) for c,r in rel_alignments] str_abs_alignments = [(c,join_list_of_lists(r)) for c,r in abs_alignments] obj = {} obj["image_url"] = image_url obj["image_id"] = image_key obj["sample_id"] = i obj["commands"] = commands obj["actions.absolute"] = abs_str obj["actions.relative"] = rel_str obj["actions.cursor"] = cur_str obj["alignments.relative"] = str_rel_alignments obj["alignments.relative.info"] = rel_align_info obj["alignments.absolute"] = str_abs_alignments obj["alignments.absolute.info"] = abs_align_info for key,val in key_attrs[image_key].iteritems(): obj[key] = val if bitmap is not None: abs_norm_seq = AbsoluteEventSequence.from_relative(rel_seq, grid_dims[0], grid_dims[1]) try: bmpmaker.clear() bmpmaker.process_commands(abs_norm_seq.events) bitmap_mturk_norm = bmpmaker.bitmap.copy() except: bitmap_mturk_norm = None try: bmpmaker.clear() bmpmaker.process_commands(abs_seq.events) bitmap_mturk = bmpmaker.bitmap.copy() except: bitmap_mturk = None obj["actions.absolute.normalized"] = str(abs_norm_seq) obj["bitmap.orig_generated"] = bitmap_to_list(bitmap.astype(np.int_)) obj["bitmap.from_mturk"] = bitmap_to_list(bitmap_mturk.astype(np.int_)) obj["bitmap.normalized"] = bitmap_to_list(bitmap_mturk_norm) objs.append(obj) json.dump(objs, fout, indent=4, sort_keys=True) print "Created dataset at %s" % output_file
default="Answer.WritingTexts", help="Name of CSV field containing draw events") parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images") args = parser.parse_args() imgmaker = ImageMaker(args.width, args.padding) bmpmaker = BitmapMaker(args.width, args.width) np.random.seed(0) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) colindex = header.index(args.field) for i, row in enumerate(reader): commands_str = row[colindex] bmpmaker.clear() event_sequence = AbsoluteEventSequence.from_mturk_string( commands_str) event_sequence = event_sequence.canonicalize() canonical_events = event_sequence.events bmpmaker.process_commands(canonical_events) bitmap = bmpmaker.bitmap bitmap_file = "{}/img_{:04d}.gif".format(args.output_dir, i) imgmaker.save_bitmap(bitmap, bitmap_file)