parser.add_argument("-url_loc", type=str, default="http://nlp.stanford.edu/mkayser/reports/", help="URL location where report will appear") args = parser.parse_args() output_file = "report.html" workers = {} worker_counts = {} html_lines = [] work_times = [] total_work_time = 0 nrows = 0 imgmaker = ImageMaker(args.block_px, args.padding_px) bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim) try: os.mkdir(args.output_dir) except OSError: pass with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) image_index = header.index(args.image_field) commands_index = header.index(args.commands_field) draw_events_index = header.index(args.draw_events_field) worker_id_index = header.index(args.worker_id_field) work_time_index = header.index(args.work_time_field)
def write_data(rows, key_attrs, image_idx, commands_idx, actions_idx, output_file, include_bitmaps=True, grid_dims=(25,25)): tokenizer = TreebankWordTokenizer() bmpmaker = BitmapMaker(grid_dims[0],grid_dims[1]) with open(output_file, "w") as fout: objs = [] for i,row in enumerate(rows): if i % 100 == 0: print " {} ".format(i) image_url = row[image_idx] if include_bitmaps: bitmap = retrieve_bitmap(image_url_to_text_bitmap_url(image_url)) else: bitmap = None image_key = image_url_to_key(image_url) commands = tokenize_description(tokenizer, row[commands_idx]) actions = row[actions_idx] abs_seq = AbsoluteEventSequence.from_mturk_string(actions).canonicalize() rel_seq = RelativeEventSequence.from_absolute(abs_seq) cur_seq = CursorEventSequence.from_absolute(abs_seq) abs_str = str(abs_seq) rel_str = str(rel_seq) cur_str = str(cur_seq) raw_txt = actions.replace("\r", "").replace("\n"," ") rel_alignments,rel_align_info = align_strings(commands, rel_str, "relative", "clever", grid_dims[0], backup_using_naive=True) abs_alignments,abs_align_info = align_strings(commands, abs_str, "absolute", "clever", grid_dims[0], backup_using_naive=True) join_list_of_lists = lambda l: " ".join(str(i) for i in itertools.chain(*l)) str_rel_alignments = [(c,join_list_of_lists(r)) for c,r in rel_alignments] str_abs_alignments = [(c,join_list_of_lists(r)) for c,r in abs_alignments] obj = {} obj["image_url"] = image_url obj["image_id"] = image_key obj["sample_id"] = i obj["commands"] = commands obj["actions.absolute"] = abs_str obj["actions.relative"] = rel_str obj["actions.cursor"] = cur_str obj["alignments.relative"] = str_rel_alignments obj["alignments.relative.info"] = rel_align_info obj["alignments.absolute"] = str_abs_alignments obj["alignments.absolute.info"] = abs_align_info for key,val in key_attrs[image_key].iteritems(): obj[key] = val if bitmap is not None: abs_norm_seq = AbsoluteEventSequence.from_relative(rel_seq, grid_dims[0], grid_dims[1]) try: bmpmaker.clear() bmpmaker.process_commands(abs_norm_seq.events) bitmap_mturk_norm = bmpmaker.bitmap.copy() except: bitmap_mturk_norm = None try: bmpmaker.clear() bmpmaker.process_commands(abs_seq.events) bitmap_mturk = bmpmaker.bitmap.copy() except: bitmap_mturk = None obj["actions.absolute.normalized"] = str(abs_norm_seq) obj["bitmap.orig_generated"] = bitmap_to_list(bitmap.astype(np.int_)) obj["bitmap.from_mturk"] = bitmap_to_list(bitmap_mturk.astype(np.int_)) obj["bitmap.normalized"] = bitmap_to_list(bitmap_mturk_norm) objs.append(obj) json.dump(objs, fout, indent=4, sort_keys=True) print "Created dataset at %s" % output_file
parser.add_argument("-csv", type=str, required=True, help="input CSV file with drawing task results") parser.add_argument("-field", type=str, default="Answer.WritingTexts", help="Name of CSV field containing draw events") parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images") args = parser.parse_args() imgmaker = ImageMaker(args.width, args.padding) bmpmaker = BitmapMaker(args.width, args.width) np.random.seed(0) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) colindex = header.index(args.field) for i, row in enumerate(reader): commands_str = row[colindex] bmpmaker.clear() event_sequence = AbsoluteEventSequence.from_mturk_string( commands_str) event_sequence = event_sequence.canonicalize()
from bitmap import BitmapMaker from events import AbsoluteEventSequence, RelativeEventSequence if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-width", type=int, default=32, help="Width of blocks in pixels") parser.add_argument("-padding", type=int, default=2, help="Padding of blocks in pixels") parser.add_argument("-dim", type=int, default=5, help="Dimensionality of grid") parser.add_argument("-csv", type=str, required=True, help="input CSV file with drawing task results") parser.add_argument("-field", type=str, default="Answer.WritingTexts", help="Name of CSV field containing draw events") parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images") args = parser.parse_args() imgmaker = ImageMaker(args.width, args.padding) bmpmaker = BitmapMaker(args.width, args.width) np.random.seed(0) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) colindex = header.index(args.field) for i,row in enumerate(reader): commands_str = row[colindex] bmpmaker.clear() event_sequence = AbsoluteEventSequence.from_mturk_string(commands_str) event_sequence = event_sequence.canonicalize()
help="URL location where report will appear") args = parser.parse_args() output_file = "report.html" workers = {} worker_counts = {} html_lines = [] work_times = [] total_work_time = 0 nrows = 0 imgmaker = ImageMaker(args.block_px, args.padding_px) bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim) try: os.mkdir(args.output_dir) except OSError: pass with open(args.csv) as fin: reader = csv.reader(fin) header = next(reader) image_index = header.index(args.image_field) commands_index = header.index(args.commands_field) draw_events_index = header.index(args.draw_events_field) worker_id_index = header.index(args.worker_id_field) work_time_index = header.index(args.work_time_field)
type=int, default=16, help="Block width in pixels") parser.add_argument("-padding_px", type=int, default=1, help="Padding in pixels") parser.add_argument("-bitmap_dim", type=int, default=25, help="Width of bitmap in blocks (assumed square)") # Output location parser.add_argument("-output_dir", type=str, required=True, help="Directory to write output to") args = parser.parse_args() output_file = "report.html" imgmaker = ImageMaker(args.block_px, args.padding_px) bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim) make_dir_unless_exists(args.output_dir) convert_commands_to_image_sequence(bmpmaker, imgmaker, args.commands, "{}/img_".format(args.output_dir), args.mode)
parser.add_argument("-mode", type=str, required=True, help="Format of output sequences: [relative,cursor]") parser.add_argument( "-output", type=str, default="", help="Output file to write updated JSON to. If not specified, a suffix " "is added to the input file to create the output file.") parser.add_argument( "-width", type=int, default=25, help="Width of bitmap (assume square) - use larger number than size" " of original bitmap to allow for conversion of relative sequences" " to absolute sequences") args = parser.parse_args() input_file = args.json mode = args.mode output_file = args.output if args.output else "%s-with-hamming.json" % ( input_file[:input_file.rfind("/")] + input_file[input_file.rfind("/"):].replace(".json", "")) width = args.width input_data = json.load(open(input_file, 'Ur')) bmpmaker = BitmapMaker(width, width) output_data = update_json(input_data, mode) json.dump(output_data, open(output_file, 'w'), sort_keys=True, indent=4)