def main(): source_file('data_utils_init.sh') img_step = int(os.environ['IMG_STEP']) al_file = sys.argv[1] annos = al.parse(al_file) img_dirs = sorted(sys.argv[2:]) images = [] for img_dir in img_dirs: images.extend(sorted(glob.glob(img_dir + '/*jpeg'))[::img_step]) new_annos = [] anno_idx = 0 for image in images: image_name = u'/'.join(image.split('/')[-2:]) if anno_idx == len(annos) or image_name != annos[anno_idx].imageName: new_anno = al.Annotation() new_anno.imageName = image_name else: new_anno = annos[anno_idx] anno_idx += 1 new_annos.append(new_anno) assert anno_idx == len(annos) al.save(sys.argv[1].replace('.al', '.pal'), new_annos) if sys.argv[1].find('.al') != -1: os.remove(sys.argv[1])
def main(): """ This modifies the input file in the new hits directory to only have hits from the bad worker """ parser = OptionParser() parser.add_option('-f', '--al-file', dest='al_file', type='string', default=None, help="al file from bad worker to resubmit") parser.add_option('-i', '--input-file', dest='input_file', type='string', default=None, help="input file in new hits directory to be resubmitted") (opts, args) = parser.parse_args() assert opts.al_file is not None and opts.input_file is not None parse = al.parse(opts.al_file) images = [] for p in parse: images.append(p.imageName) with open(opts.input_file) as fh: lines = fh.readlines() new_lines = ['urls\n'] for line in lines: for image in images: if line.find(image) != -1: new_lines.append(line) break with open(opts.input_file, 'w') as fh: fh.writelines(new_lines)
def fix_paths(al_file): parse = al.parse(al_file) for p in parse: if len(p.imageName.split('/')) < 2 or len(p.imageName.split('/')[0]) == 0: inp = p.imageName.split('/')[-1] p.imageName = '_'.join(inp.split('_')[:-1]) + '/' + inp[:inp.find('.jpeg')+5] al.save(al_file, parse)
def main(): hits_dir = sys.argv[1] old_anno_file = liwb.get_old_anno(hits_dir) new_anno_file = old_anno_file.replace('.al', '-redo-verified.al') old_annos = al.parse(old_anno_file) new_annos = al.parse(new_anno_file) old_name2anno = {p.imageName: p for p in old_annos} for na in new_annos: if na.imageName not in old_name2anno: print "old anno %s doesn't have %s, skipping..." % (os.path.basename(old_anno_file), na.imageName) continue na.rects.extend(old_name2anno[na.imageName].rects) al.saveXML(old_anno_file.replace('.al', '-verified.al'), new_annos) #shutil.move(old_anno_file, env.trash + os.path.basename(old_anno_file)) shutil.move(new_anno_file, env.trash + os.path.basename(new_anno_file))
def main(): hits_dir = sys.argv[1] old_anno_file = liwb.get_old_anno(hits_dir) new_anno_file = old_anno_file.replace('.al', '-redo-verified.al') old_annos = al.parse(old_anno_file) new_annos = al.parse(new_anno_file) old_name2anno = {p.imageName: p for p in old_annos} for na in new_annos: if na.imageName not in old_name2anno: print "old anno %s doesn't have %s, skipping..." % ( os.path.basename(old_anno_file), na.imageName) continue na.rects.extend(old_name2anno[na.imageName].rects) al.saveXML(old_anno_file.replace('.al', '-verified.al'), new_annos) #shutil.move(old_anno_file, env.trash + os.path.basename(old_anno_file)) shutil.move(new_anno_file, env.trash + os.path.basename(new_anno_file))
def main(): """ This will update a bad al file with a good one, and will overwrite the bad file with the updated images. """ parser = OptionParser() parser.add_option('-b', '--bad', dest='bad_file', type='string') parser.add_option('-g', '--good', dest='good_file', type='string') (opts, _) = parser.parse_args() bad = al.parse(opts.bad_file) good = al.parse(opts.good_file) name2anno = {} for g in good: name2anno[g.imageName] = g fixed = [] for b in bad: fixed.append(name2anno.get(b.imageName, b)) al.save(opts.bad_file, fixed)
def main(): hits_dir = sys.argv[1] while hits_dir[-1] == '/': hits_dir = hits_dir[:-1] vid = os.path.basename(hits_dir) base_pay = get_base_pay(hits_dir) # get total images id2work = {} total_num_images = 0 al_files = get_worker_al_files(hits_dir) for f in al_files: workerid = os.path.basename(f).split('_')[1].split('.al')[0] num_images = len(al.parse(f)) total_num_images += num_images id2work[workerid] = TurkWorker(workerid, num_images, base_pay) # get the bonus payments with open('%s/%s-grant-bonus.sh' % (hits_dir, vid)) as fh: payments = fh.readlines() payments = payments[::2] for payment in payments: workerid = get_following_word(payment, '--workerid') id2work[workerid].add_payment(payment) total = np.zeros(4) for turkwork in id2work.values(): print turkwork total += turkwork.totals() print "TOTALS -- base: $%5.2f, bonus: $%5.2f, total: $%5.2f, " \ "num images: %d, num cars: %d, %.2f cars/image, avg price: $%4.3f/image, $%4.3f/car" % \ (total[0], total[1], total[2], total_num_images, total[3], float(total[3])/total_num_images, total[2]/total_num_images, total[2]/total[3]) with open('%s/%s-grant-bonus.sh' % (hits_dir, vid), 'w') as fh: for turkwork in id2work.values(): fh.write(turkwork.bonus_str(hits_dir))
def main(): """ This modifies the input file in the new hits directory to only have hits from the bad worker """ parser = OptionParser() parser.add_option('-f', '--al-file', dest='al_file', type='string', default=None, help="al file from bad worker to resubmit") parser.add_option( '-i', '--input-file', dest='input_file', type='string', default=None, help="input file in new hits directory to be resubmitted") (opts, args) = parser.parse_args() assert opts.al_file is not None and opts.input_file is not None parse = al.parse(opts.al_file) images = [] for p in parse: images.append(p.imageName) with open(opts.input_file) as fh: lines = fh.readlines() new_lines = ['urls\n'] for line in lines: for image in images: if line.find(image) != -1: new_lines.append(line) break with open(opts.input_file, 'w') as fh: fh.writelines(new_lines)
import mechturk.annolib as al import glob import sys import os tot_len = 0 hits_dir = sys.argv[1] while hits_dir[-1] == '/': hits_dir = hits_dir[:-1] files = glob.glob('%s/results_by_worker_%s/*al' % (hits_dir, os.path.basename(hits_dir))) for f in files: tot_len += len(al.parse(f)) print tot_len
def find_previous_annos(hits_dir): old_al = get_old_anno(hits_dir) name2anno = {p.imageName: p for p in al.parse(old_al)} return name2anno