parser.add_argument("-url_loc", type=str, default="http://nlp.stanford.edu/mkayser/reports/", help="URL location where report will appear")
    args = parser.parse_args()

    output_file = "report.html"

    workers = {}
    worker_counts = {}

    html_lines = []

    work_times = []
    total_work_time = 0
    nrows = 0

    imgmaker = ImageMaker(args.block_px, args.padding_px)
    bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim)

    try:
        os.mkdir(args.output_dir)
    except OSError:
        pass

    with open(args.csv) as fin:
        reader = csv.reader(fin)
        header = next(reader)

        image_index = header.index(args.image_field)
        commands_index = header.index(args.commands_field)
        draw_events_index = header.index(args.draw_events_field)
        worker_id_index = header.index(args.worker_id_field)
        work_time_index = header.index(args.work_time_field)
Example #2
0
def write_data(rows, key_attrs, image_idx, commands_idx, actions_idx, output_file, include_bitmaps=True, grid_dims=(25,25)):
    tokenizer = TreebankWordTokenizer()
    bmpmaker = BitmapMaker(grid_dims[0],grid_dims[1])

    with open(output_file, "w") as fout:
        objs = []
        for i,row in enumerate(rows):

            if i % 100 == 0:
                print "  {}  ".format(i)

            image_url = row[image_idx]
            if include_bitmaps:
                bitmap = retrieve_bitmap(image_url_to_text_bitmap_url(image_url))
            else:
                bitmap = None
            
            image_key = image_url_to_key(image_url)
            commands = tokenize_description(tokenizer, row[commands_idx])
            actions = row[actions_idx]

            abs_seq = AbsoluteEventSequence.from_mturk_string(actions).canonicalize()
            rel_seq = RelativeEventSequence.from_absolute(abs_seq)
            cur_seq = CursorEventSequence.from_absolute(abs_seq)

            abs_str = str(abs_seq)
            rel_str = str(rel_seq)
            cur_str = str(cur_seq)

            raw_txt = actions.replace("\r", "").replace("\n"," ")

            rel_alignments,rel_align_info = align_strings(commands, rel_str, "relative", "clever", grid_dims[0], backup_using_naive=True)
            abs_alignments,abs_align_info = align_strings(commands, abs_str, "absolute", "clever", grid_dims[0], backup_using_naive=True)

            join_list_of_lists = lambda l: " ".join(str(i) for i in itertools.chain(*l))
            str_rel_alignments = [(c,join_list_of_lists(r)) for c,r in rel_alignments]
            str_abs_alignments = [(c,join_list_of_lists(r)) for c,r in abs_alignments]

            obj = {}
            obj["image_url"] = image_url
            obj["image_id"] = image_key
            obj["sample_id"] = i
            obj["commands"] = commands  
            obj["actions.absolute"] = abs_str
            obj["actions.relative"] = rel_str
            obj["actions.cursor"] = cur_str
            obj["alignments.relative"] = str_rel_alignments
            obj["alignments.relative.info"] = rel_align_info
            obj["alignments.absolute"] = str_abs_alignments
            obj["alignments.absolute.info"] = abs_align_info

            for key,val in key_attrs[image_key].iteritems():
                obj[key] = val

            if bitmap is not None:                
                abs_norm_seq = AbsoluteEventSequence.from_relative(rel_seq, grid_dims[0], grid_dims[1])

                try:
                    bmpmaker.clear()
                    bmpmaker.process_commands(abs_norm_seq.events)
                    bitmap_mturk_norm = bmpmaker.bitmap.copy()
                except:
                    bitmap_mturk_norm = None
                    

                try:
                    bmpmaker.clear()
                    bmpmaker.process_commands(abs_seq.events)                
                    bitmap_mturk = bmpmaker.bitmap.copy()
                except:
                    bitmap_mturk = None
                
                obj["actions.absolute.normalized"] = str(abs_norm_seq)
                obj["bitmap.orig_generated"] = bitmap_to_list(bitmap.astype(np.int_))
                obj["bitmap.from_mturk"] = bitmap_to_list(bitmap_mturk.astype(np.int_))
                obj["bitmap.normalized"] = bitmap_to_list(bitmap_mturk_norm)
            objs.append(obj)
        json.dump(objs, fout, indent=4, sort_keys=True)

        print "Created dataset at %s" % output_file
Example #3
0
    parser.add_argument("-csv",
                        type=str,
                        required=True,
                        help="input CSV file with drawing task results")
    parser.add_argument("-field",
                        type=str,
                        default="Answer.WritingTexts",
                        help="Name of CSV field containing draw events")
    parser.add_argument("-output_dir",
                        type=str,
                        default="output/make_images_from_drawing_task_results",
                        help="output directory for images")
    args = parser.parse_args()

    imgmaker = ImageMaker(args.width, args.padding)
    bmpmaker = BitmapMaker(args.width, args.width)
    np.random.seed(0)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    with open(args.csv) as fin:
        reader = csv.reader(fin)
        header = next(reader)
        colindex = header.index(args.field)
        for i, row in enumerate(reader):
            commands_str = row[colindex]
            bmpmaker.clear()
            event_sequence = AbsoluteEventSequence.from_mturk_string(
                commands_str)
            event_sequence = event_sequence.canonicalize()
from bitmap import BitmapMaker
from events import AbsoluteEventSequence, RelativeEventSequence


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-width", type=int, default=32, help="Width of blocks in pixels")
    parser.add_argument("-padding", type=int, default=2, help="Padding of blocks in pixels")
    parser.add_argument("-dim", type=int, default=5, help="Dimensionality of grid")
    parser.add_argument("-csv", type=str, required=True, help="input CSV file with drawing task results")
    parser.add_argument("-field", type=str, default="Answer.WritingTexts", help="Name of CSV field containing draw events")
    parser.add_argument("-output_dir", type=str, default="output/make_images_from_drawing_task_results", help="output directory for images")
    args = parser.parse_args()

    imgmaker = ImageMaker(args.width, args.padding)
    bmpmaker = BitmapMaker(args.width, args.width)
    np.random.seed(0)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)


    with open(args.csv) as fin:
        reader = csv.reader(fin)
        header = next(reader)
        colindex = header.index(args.field)
        for i,row in enumerate(reader):
            commands_str = row[colindex]
            bmpmaker.clear()
            event_sequence = AbsoluteEventSequence.from_mturk_string(commands_str)
            event_sequence = event_sequence.canonicalize()
                        help="URL location where report will appear")
    args = parser.parse_args()

    output_file = "report.html"

    workers = {}
    worker_counts = {}

    html_lines = []

    work_times = []
    total_work_time = 0
    nrows = 0

    imgmaker = ImageMaker(args.block_px, args.padding_px)
    bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim)

    try:
        os.mkdir(args.output_dir)
    except OSError:
        pass

    with open(args.csv) as fin:
        reader = csv.reader(fin)
        header = next(reader)

        image_index = header.index(args.image_field)
        commands_index = header.index(args.commands_field)
        draw_events_index = header.index(args.draw_events_field)
        worker_id_index = header.index(args.worker_id_field)
        work_time_index = header.index(args.work_time_field)
                        type=int,
                        default=16,
                        help="Block width in pixels")
    parser.add_argument("-padding_px",
                        type=int,
                        default=1,
                        help="Padding in pixels")
    parser.add_argument("-bitmap_dim",
                        type=int,
                        default=25,
                        help="Width of bitmap in blocks (assumed square)")

    # Output location
    parser.add_argument("-output_dir",
                        type=str,
                        required=True,
                        help="Directory to write output to")

    args = parser.parse_args()

    output_file = "report.html"

    imgmaker = ImageMaker(args.block_px, args.padding_px)
    bmpmaker = BitmapMaker(args.bitmap_dim, args.bitmap_dim)

    make_dir_unless_exists(args.output_dir)

    convert_commands_to_image_sequence(bmpmaker, imgmaker, args.commands,
                                       "{}/img_".format(args.output_dir),
                                       args.mode)
Example #7
0
    parser.add_argument("-mode",
                        type=str,
                        required=True,
                        help="Format of output sequences: [relative,cursor]")

    parser.add_argument(
        "-output",
        type=str,
        default="",
        help="Output file to write updated JSON to. If not specified, a suffix "
        "is added to the input file to create the output file.")
    parser.add_argument(
        "-width",
        type=int,
        default=25,
        help="Width of bitmap (assume square) - use larger number than size"
        " of original bitmap to allow for conversion of relative sequences"
        " to absolute sequences")
    args = parser.parse_args()
    input_file = args.json
    mode = args.mode
    output_file = args.output if args.output else "%s-with-hamming.json" % (
        input_file[:input_file.rfind("/")] +
        input_file[input_file.rfind("/"):].replace(".json", ""))
    width = args.width
    input_data = json.load(open(input_file, 'Ur'))
    bmpmaker = BitmapMaker(width, width)

    output_data = update_json(input_data, mode)
    json.dump(output_data, open(output_file, 'w'), sort_keys=True, indent=4)