default="Answer.WritingTexts", help="Name of CSV field containing draw events") parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images") args = parser.parse_args() imgmaker = ImageMaker(args.width, args.padding) bmpmaker = BitmapMaker(args.width, args.width) np.random.seed(0) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) colindex = header.index(args.field) for i, row in enumerate(reader): commands_str = row[colindex] bmpmaker.clear() event_sequence = AbsoluteEventSequence.from_mturk_string( commands_str) event_sequence = event_sequence.canonicalize() canonical_events = event_sequence.events bmpmaker.process_commands(canonical_events) bitmap = bmpmaker.bitmap bitmap_file = "{}/img_{:04d}.gif".format(args.output_dir, i) imgmaker.save_bitmap(bitmap, bitmap_file)
def write_data(rows, key_attrs, image_idx, commands_idx, actions_idx, output_file, include_bitmaps=True, grid_dims=(25,25)): tokenizer = TreebankWordTokenizer() bmpmaker = BitmapMaker(grid_dims[0],grid_dims[1]) with open(output_file, "w") as fout: objs = [] for i,row in enumerate(rows): if i % 100 == 0: print " {} ".format(i) image_url = row[image_idx] if include_bitmaps: bitmap = retrieve_bitmap(image_url_to_text_bitmap_url(image_url)) else: bitmap = None image_key = image_url_to_key(image_url) commands = tokenize_description(tokenizer, row[commands_idx]) actions = row[actions_idx] abs_seq = AbsoluteEventSequence.from_mturk_string(actions).canonicalize() rel_seq = RelativeEventSequence.from_absolute(abs_seq) cur_seq = CursorEventSequence.from_absolute(abs_seq) abs_str = str(abs_seq) rel_str = str(rel_seq) cur_str = str(cur_seq) raw_txt = actions.replace("\r", "").replace("\n"," ") rel_alignments,rel_align_info = align_strings(commands, rel_str, "relative", "clever", grid_dims[0], backup_using_naive=True) abs_alignments,abs_align_info = align_strings(commands, abs_str, "absolute", "clever", grid_dims[0], backup_using_naive=True) join_list_of_lists = lambda l: " ".join(str(i) for i in itertools.chain(*l)) str_rel_alignments = [(c,join_list_of_lists(r)) for c,r in rel_alignments] str_abs_alignments = [(c,join_list_of_lists(r)) for c,r in abs_alignments] obj = {} obj["image_url"] = image_url obj["image_id"] = image_key obj["sample_id"] = i obj["commands"] = commands obj["actions.absolute"] = abs_str obj["actions.relative"] = rel_str obj["actions.cursor"] = cur_str obj["alignments.relative"] = str_rel_alignments obj["alignments.relative.info"] = rel_align_info obj["alignments.absolute"] = str_abs_alignments obj["alignments.absolute.info"] = abs_align_info for key,val in key_attrs[image_key].iteritems(): obj[key] = val if bitmap is not None: abs_norm_seq = AbsoluteEventSequence.from_relative(rel_seq, grid_dims[0], grid_dims[1]) try: bmpmaker.clear() bmpmaker.process_commands(abs_norm_seq.events) bitmap_mturk_norm = bmpmaker.bitmap.copy() except: bitmap_mturk_norm = None try: bmpmaker.clear() bmpmaker.process_commands(abs_seq.events) bitmap_mturk = bmpmaker.bitmap.copy() except: bitmap_mturk = None obj["actions.absolute.normalized"] = str(abs_norm_seq) obj["bitmap.orig_generated"] = bitmap_to_list(bitmap.astype(np.int_)) obj["bitmap.from_mturk"] = bitmap_to_list(bitmap_mturk.astype(np.int_)) obj["bitmap.normalized"] = bitmap_to_list(bitmap_mturk_norm) objs.append(obj) json.dump(objs, fout, indent=4, sort_keys=True) print "Created dataset at %s" % output_file
parser.add_argument("-dim", type=int, default=5, help="Dimensionality of grid") parser.add_argument("-csv", type=str, required=True, help="input CSV file with drawing task results") parser.add_argument("-field", type=str, default="Answer.WritingTexts", help="Name of CSV field containing draw events") parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images") args = parser.parse_args() imgmaker = ImageMaker(args.width, args.padding) bmpmaker = BitmapMaker(args.width, args.width) np.random.seed(0) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) colindex = header.index(args.field) for i,row in enumerate(reader): commands_str = row[colindex] bmpmaker.clear() event_sequence = AbsoluteEventSequence.from_mturk_string(commands_str) event_sequence = event_sequence.canonicalize() canonical_events = event_sequence.events bmpmaker.process_commands(canonical_events) bitmap = bmpmaker.bitmap bitmap_file = "{}/img_{:04d}.gif".format(args.output_dir,i) imgmaker.save_bitmap(bitmap, bitmap_file)