def archive_pybossa(doc, method=u'archive_pybossa', name='', description=''): """ Adds recognition result to a pybossa service for postcorrection. Args: doc [(unicode, unicode), ...]: The input document tuple method (unicode): The suffix string appended to all output files. Returns: The list of input storage tuples. """ logger.debug('Creating pybossa project named {}'.format(name)) proj = pbclient.create_project('{} ({})'.format(name, doc[0][0]), doc[0][0], description) logger.debug('Creating pybossa tasks for docs {}'.format(doc)) for d in doc: data = tei.OCRRecord() with storage.StorageFile(*d, mode='rb') as fp: data.load_tei(fp) for line_id, line in data.lines.iteritems(): text = u'' for seg in line['content'].itervalues(): text += u''.join(x['grapheme'] for x in seg['content'].itervalues()) pbclient.create_task(proj.id, { 'image': data.img, 'dimensions': data.dimensions, 'line_text': text.encode('utf-8'), 'bbox': [ line['bbox'][0], line['bbox'][1], line['bbox'][2], line['bbox'][3] ] }) return doc
def create_video_task(app, signal_name, video_id, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, signal_name=signal_name, video_id=video_id) pbclient.create_task(app.id, task_info)
def archive_pybossa(doc, method=u'archive_pybossa'): """ Adds recognition result to a pybossa service for postcorrection. Args: doc (unicode, unicode): The input document tuple method (unicode): The suffix string appended to all output files. Returns: The input storage tuple. """ logger.debug('Creating pybossa task {} {}'.format(*doc)) for d in doc: data = tei.OCRRecord() data.load_tei(d[1]) for line_id, line in data.lines.iteritems(): pbclient.create_task( project, { 'image': data.img, 'dimensions': data.dimensions, 'line_text': line, 'bbox': [ str(line['bbox'][0]), str(line['bbox'][1]), str(line['bbox'][2]), str(line['bbox'][3]) ] }) return doc
def send_data(): api_key = request.json.get('api_key') server_url = request.json.get('server_url') data = request.json.get('data') project_name = request.json.get('project_name') tpl_presenter = render_template('presenter.html', steps=data['steps'], project_name=project_name) pbclient.set('endpoint', server_url) pbclient.set('api_key', api_key) # TODO: check error app = pbclient.find_app(short_name=project_name)[0] app.info['task_presenter'] = tpl_presenter for task in pbclient.find_tasks(app.id): pbclient.delete_task(task.id) for task in data['tasks']: pbclient.create_task(app.id, task) pbclient.update_app(app) return '', 200
def create_tasks(engine): log.info("Updating tasks on pyBossa...") app = setup() with flask_app.open_resource('resources/pbnetworks_template.html') as f: app.info['task_presenter'] = f.read() pbclient.update_app(app) tasks = pbclient.get_tasks(app.id, limit=30000) existing = dict([(t.data.get('info').get('signature'), t) for t in tasks]) for rep in sl.all(engine, sl.get_table(engine, 'representative')): networking = rep.get('networking') if networking is None or len(networking.strip()) < 3: continue signature = rep.get('identification_code') + networking signature = sha1(signature.encode('ascii', 'ignore')).hexdigest() rep['signature'] = signature print [rep.get('name')] log.debug("Task: %s", rep['name']) rep['last_update_date'] = rep['last_update_date'].isoformat() rep['registration_date'] = rep['registration_date'].isoformat() #print [(k, type(v)) for k,v in rep.items()] if signature in existing: task = existing.get(signature) task.data['info'] = rep pbclient.update_task(task) else: pbclient.create_task(app.id, rep)
def create_photo_task(app, photo, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, link=photo['link'], url_m=photo['url_m'], url_b=photo['url_b']) pbclient.create_task(app.id, task_info)
def create_phrasal_verb_task(app, phrasal_verb, exemplo_parte_1, verbo, exemplo_parte_2, particula, exemplo_parte_3, question1, question2): # Data for the tasks task_info = dict(question1=question1, question2=question2, n_answers=5, phrasal_verb=phrasal_verb, exemplo_parte_1=exemplo_parte_1, verbo=verbo, exemplo_parte_2=exemplo_parte_2, particula=particula, exemplo_parte_3=exemplo_parte_3) pbclient.create_task(app.id, task_info)
def POST(self): global last_status data = web.input() last_status = '<p>Connecting to PyBossa <i class="loading"></i></p>' # check endpoint and api_key pbclient.set('endpoint', data.endpoint) pbclient.set('api_key', data.api_key) app = pbclient.find_app(short_name=data.appname) if len(app) == 0: last_status += '<p class="error" data-field="appname">PyBossa app not found.</p>' else: app = app[0] res = pbclient.update_app(app) if res == 403: last_status += '<p class="error" data-field="api_key">You\'re not allowed to edit that app. Double check your API key.</p>' else: last_status += '<p>Loading data from Google spreadsheet <i class="loading"></i></p>' url = 'http://spreadsheets.google.com/feeds/cells/%s/%s/public/basic?alt=json' % (data.spreadsheet, data.worksheet) r = requests.get(url) if r.status_code / 100 == 4: last_status += '<p class="error" data-field="spreadsheet">The spreadsheet could not be found. Make sure that the key is right and that you properly shared the document (click on <i>File > Publish to the web</i>).</p>' else: last_status += '<p>Parsing spreadsheet data <i class="loading"></i></p>' tasks = parse_spreadsheet(r.json) tmp = last_status total = len(tasks) completed = 0 for info in tasks: info['n_answers'] = int(data.n_answers) res = pbclient.create_task(app.id, info) completed += 1 last_status = tmp + '<p>Uploading tasks to PyBossa (%d of %d)<i class="loading"></i></p>' % (completed, total) last_status += '<p>finished.</p>' print ''
def form_valid(self, form): import pbclient from moonsheep.settings import PYBOSSA_BASE_URL, PYBOSSA_API_KEY pbclient.set('endpoint', PYBOSSA_BASE_URL) pbclient.set('api_key', PYBOSSA_API_KEY) if not len(initial_task.registry): raise ImproperlyConfigured for task in initial_task.registry: pbclient.create_task( project_id=PYBOSSA_PROJECT_ID, info={ 'type': task, 'url': form.cleaned_data.get('url'), }, n_answers=1 ) return super(NewTaskFormView, self).form_valid(form)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = photo try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_sound_clip_task(app, sound_clip, priority=0): # Data for the tasks task_info = sound_clip try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict(question=question) task_info.update(photo) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def load_tasks(self): with open(self.options.load_tasks) as f: for task in json.load(f): if 'info' in task: task = task['info'] task["question"] = self.app_config['question'] task["n_answers"] = self.options.n_answers self.handle_result(pbclient.create_task(self.app.id, task)) print ".",
def create_sound_task(app, sound, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, embed=sound) try: response = pbclient.create_task(app.id, task_info) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict(question=question, url=photo['url'], viewid=photo['viewid']) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def setup(): # settings pbclient.set('api_key', "74690b3e-e980-4299-b006-9c6a5c50b355") pbclient.set('endpoint', 'http://pybossa.socientize.eu/pybossa') # Create the app #pbclient.create_app('Semantics Map','Semantics','What is the perceived relation between words? '); #update app pyBossaApp = pbclient.find_app(short_name='Sun4All')[0] #pyBossaApp.long_description = '- add long description -'; #pyBossaApp.info['task_presenter'] = contents('template.html') #pyBossaApp.info['thumbnail'] = "http://societic.ibercivis.es/semantics/static/images/icon.jpg" #pyBossaApp.info['tutorial'] = contents('tutorial.html') #pbclient.update_app(pyBossaApp) #create tasks try: cnx = get_connection() except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: return "Something is wrong your username or password" elif err.errno == errorcode.ER_BAD_DB_ERROR: return "Database does not exists" else: return err else: #Get Data cursor = cnx.cursor() cursor.execute("SELECT * FROM images") words = cursor.fetchall() cursor.close() cnx.close() if (len(words) > 0): for item in words: task_info = dict(n_answers=5, start=item[0], end=item[1], startWord=getWord(item[0]), endWord=getWord(item[1])) pbclient.create_task(pyBossaApp.id, task_info) return "ok"
def setup(): # settings pbclient.set('api_key', "74690b3e-e980-4299-b006-9c6a5c50b355") pbclient.set('endpoint', 'http://pybossa.socientize.eu/pybossa') # Create the app #pbclient.create_app('Semantics Map','Semantics','What is the perceived relation between words? '); #update app pyBossaApp = pbclient.find_app(short_name='Sun4All')[0]; #pyBossaApp.long_description = '- add long description -'; #pyBossaApp.info['task_presenter'] = contents('template.html') #pyBossaApp.info['thumbnail'] = "http://societic.ibercivis.es/semantics/static/images/icon.jpg" #pyBossaApp.info['tutorial'] = contents('tutorial.html') #pbclient.update_app(pyBossaApp) #create tasks try: cnx = get_connection(); except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: return "Something is wrong your username or password"; elif err.errno == errorcode.ER_BAD_DB_ERROR: return "Database does not exists"; else: return err; else: #Get Data cursor = cnx.cursor(); cursor.execute("SELECT * FROM images"); words = cursor.fetchall(); cursor.close(); cnx.close(); if (len(words)>0): for item in words: task_info = dict(n_answers=5, start=item[0], end=item[1], startWord=getWord(item[0]), endWord=getWord(item[1]) ) pbclient.create_task(pyBossaApp.id, task_info) return "ok";
def create_msg_task(app, msg, question): # Data for the tasks # msgs_text and msgs_html are lists, hence 'msgs' not 'msg'. # msg_subject and msg_date are simple strings. task_info = dict(question=question, n_answers=options.n_answers, msgs_text=msg['msgs_text'], msgs_html=msg['msgs_html'], msg_subject=msg['msg_subject'], msg_date=msg['msg_date']) print task_info['msg_subject'] print len(pbclient.get_tasks(app.id)) # from erpy.ipshell import ipshell # ipshell('here') # sys.exit() # return pbclient.create_task(app.id, task_info)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict( n_answers=options.n_answers, link_big=photo['link_big'], link_small=photo['link_small']) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, link=photo['link'], url_m=photo['url_m'], url_b=photo['url_b']) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_graph_task(app, graph, question, priority=0): # Data for the tasks task_info = graph try: response = pbclient.create_task(app.id, task_info, priority_0=priority, n_answers=options.n_answers) #if int(response.headers['X-Rate-Limit']) < 10: # print "We are close to hit the maximum rate limit" # print "Sleeping 5 minutes before adding more tasks" # sleep(300) check_api_error(response) except: format_error("pbclient.create_task", response)
def create_new_task(self, task, info): """ Helper method for creating new task. It has proposed structure :param task: :param info: :return: created task """ # TODO: 'type' is now reserved key in task params # TODO: maybe we should reserve '_type' ? info['type'] = ".".join([task.__module__, task.__name__]) if TASK_SOURCE == RANDOM_SOURCE: logger.info("Skipping task creation because TASK_SOURCE is set to random: " + repr(info)) else: return pbclient.create_task(self.project_id, info, self.N_ANSWERS)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict( n_answers=options.n_answers, idiss=photo['idISS'], link_big=photo['link_big'], link_small=photo['link_small'], linkData=photo['linkData'], citylon=photo['citylon'], citylat=photo['citylat'], focal=photo['focal']) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: #response = pbclient.create_task(app.id, task_info, priority_0=priority) format_error("pbclient.create_task", response)
def create_photo_task(app, photo, question, priority=0): # Data for the tasks task_info = dict(n_answers=options.n_answers, idiss=photo['idISS'], link_big=photo['link_big'], link_small=photo['link_small'], linkData=photo['linkData'], citylon=photo['citylon'], citylat=photo['citylat'], focal=photo['focal']) try: response = pbclient.create_task(app.id, task_info, priority_0=priority) check_api_error(response) except: #response = pbclient.create_task(app.id, task_info, priority_0=priority) format_error("pbclient.create_task", response)
def create_video_task(app, oembed, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, oembed=oembed) pbclient.create_task(app.id, task_info)
def create_task(self, project_id, tweet): info = dict(tweet=tweet) task = pbclient.create_task(project_id=project_id, info=info) return task
def _add_task(self, task_data): res = pbclient.create_task(self.project_id, task_data) if self._wait_if_reached_rate_limit(res): self._add_task(task_data)
def create_sound_task(app, sound, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, embed=sound) pbclient.create_task(app.id, task_info)
app.long_description = open('long_description.html').read() app.info['task_presenter'] = open('template.html').read() app.info['thumbnail'] = app_config['thumbnail'] except: format_error("pbclient.create_app", response) try: response = pbclient.update_app(app) check_api_error(response) for page in range(1, options.pdf_pages): # Data for the tasks task_info = dict(question=app_config['question'], page=page, pdf_url=options.pdf_url) response = pbclient.create_task(app.id, task_info, n_answers=options.n_answers) check_api_error(response) except: format_error("pbclient.update_app or pbclient.create_task", response) else: if options.add_more_tasks: try: response = pbclient.find_app( short_name=app_config['short_name']) check_api_error(response) app = response[0] for page in range(1, options.pdf_pages + 1): # Data for the tasks
app = response[0] app.long_description = open('long_description.html').read() app.info['task_presenter'] = open('template.html').read() app.info['thumbnail'] = app_config['thumbnail'] except: format_error("pbclient.create_app", response) try: response = pbclient.update_app(app) check_api_error(response) for page in range(1, options.pdf_pages): # Data for the tasks task_info = dict(question=app_config['question'], page=page, pdf_url=options.pdf_url) response = pbclient.create_task(app.id, task_info, n_answers=options.n_answers) check_api_error(response) except: format_error("pbclient.update_app or pbclient.create_task", response) else: if options.add_more_tasks: try: response = pbclient.find_app(short_name=app_config['short_name']) check_api_error(response) app = response[0] for page in range(1, options.pdf_pages + 1): # Data for the tasks task_info = dict(question="Transcribe the following page", page=page, pdf_url=options.pdf_url)
def create_task(self): # Data for the tasks task_info = json.loads(self.options.create_task) task_info["question"] = self.app_config['question'] task_info["n_answers"] = self.options.n_answers pbclient.create_task(self.app.id, task_info)
app = response[0] app.long_description = open('long_description.html').read() app.info['task_presenter'] = open('template.html').read() app.info['thumbnail'] = app_config['thumbnail'] except: format_error("pbclient.create_app", response) try: response = pbclient.update_app(app) check_api_error(response) for page in range(1, 15): # Data for the tasks task_info = dict(question=app_config['question'], page=page, pdf_url=options.pdf_url) response = pbclient.create_task(app.id, task_info) check_api_error(response) except: format_error("pbclient.update_app or pbclient.create_task", response) else: if options.add_more_tasks: try: response = pbclient.find_app(short_name=app_config['short_name']) check_api_error(response) app = response[0] for page in range(1, options.pdf_pages + 1): # Data for the tasks task_info = dict(question="Transcribe the following page", page=page, pdf_url=options.pdf_url)
def create_tasks(app_config, submit_tasks): config = RawConfigParser() config.read(args.task_config) response = pbclient.find_app(short_name=app_config['short_name']) app = response[0] app_id = app.id #polygon around area to be tasked, as list of (lat, long) lists area_polygon = polygon_file_to_path(config.get("area", "polygon_file")) extent = area_polygon.get_extents().get_points() #The northern, southern, western, and eastern bounds of the area to work on. nb = extent[1][0] wb = extent[0][1] sb = extent[0][0] eb = extent[1][1] shared_style = simplekml.Style() shared_style.iconstyle.color = "ff0000ff" shared_style.labelstyle.scale = 0.5 shared_style.iconstyle.scale = 0.5 kml = simplekml.Kml() pnt = kml.newpoint(name="NW") pnt.coords = [(wb, nb)] pnt.style = shared_style pnt = kml.newpoint(name="NE") pnt.coords = [(eb, nb)] pnt.style = shared_style pnt = kml.newpoint(name="SW") pnt.coords = [(wb, sb)] pnt.style = shared_style pnt = kml.newpoint(name="SE") pnt.coords = [(eb, sb)] pnt.style = shared_style #Size of the tasks, into how many rows and columns should the area be divided. task_cols = int(config.get("tasksize", "task_cols")) task_rows = int(config.get("tasksize", "task_rows")) boundary = float(config.get("tasksize", "boundary")) ns_step = (sb - nb) / task_rows ns_boundary = ns_step * boundary we_step = (eb - wb) / task_cols we_boundary = we_step * boundary task_counter = 0 if submit_tasks: res = requests.get(args.server + '/api/app') remaining_requests = int(res.headers['x-ratelimit-remaining']) print("Remaining requests: " + str(remaining_requests)) else: remaining_requests = 99999 for col in range(task_cols): wbr = wb + col * we_step ebr = wb + (col + 1) * we_step for row in range(task_rows): while remaining_requests < 10: time.sleep(60) res = requests.get(args.server + '/api/app') remaining_requests = int(res.headers['x-ratelimit-remaining']) print(remaining_requests) nbc = nb + row * ns_step sbc = nb + (row + 1) * ns_step if area_polygon.intersects_bbox(Bbox([[nbc, wbr], [sbc, ebr]])): if submit_tasks: task_info = dict(question=app_config['question'], n_answers=config.get("meta", "n_answers"), westbound=wbr, eastbound=ebr, northbound=nbc, southbound=sbc, westmapbound=wbr - we_boundary, eastmapbound=ebr + we_boundary, northmapbound=nbc - ns_boundary, southmapbound=sbc + ns_boundary, location=str(row) + "_" + str(col), batch=config.get("meta", "batch_name")) response = pbclient.create_task(app_id, task_info) check_api_error(response) remaining_requests -= 1 pnt = kml.newpoint(name=str(task_counter)) pnt.coords = [(wbr, nbc)] pnt.style = shared_style pnt = kml.newpoint(name=str(task_counter)) pnt.coords = [(ebr, nbc)] pnt.style = shared_style pnt = kml.newpoint(name=str(task_counter)) pnt.coords = [(wbr, sbc)] pnt.style = shared_style pnt = kml.newpoint(name=str(task_counter)) pnt.coords = [(ebr, sbc)] pnt.style = shared_style task_counter += 1 print("Task: " + str(task_counter)) kml.save(config.get("meta", "batch_name") + "_tasks.kml")
pbclient.update_app(app) # First of all we get the URL photos if options.tags: photos = get_flickr_photos(tags=options.tags) else: photos = get_flickr_photos() # Finally, we have to create a set of tasks for the application # For this, we get first the photo URLs from Flickr for i in xrange(1): for photo in photos: # Data for the tasks task_info = dict(question=app_config['question'], url_m=photo['url_m'], url_b=photo['url_b'], photo_info=photo['photo_info']) pbclient.create_task(app.id, task_info, n_answers=int(options.n_answers)) else: if options.add_more_tasks: app = pbclient.find_app(short_name=app_config['short_name'])[0] if options.tags: photos = get_flickr_photos(tags=options.tags) else: photos = get_flickr_photos(tags=options.tags) for photo in photos: task_info = dict(question=app_config['question'], n_answers=int(options.n_answers), url_m=photo['url_m'], url_b=photo['url_b'], photo_info=photo['photo_info']) pbclient.create_task(app.id, task_info)
app.info['thumbnail'] = app_config['thumbnail'] app.info['tutorial'] = open('tutorial.html').read() pbclient.update_app(app) # First of all we get the URL photos photos = get_flickr_photos() # Finally, we have to create a set of tasks for the application # For this, we get first the photo URLs from Flickr for i in xrange(1): for photo in photos: # Data for the tasks task_info = dict(question=app_config['question'], n_answers=int(options.n_answers), link=photo['link'], url_m=photo['url_m'], url_b=photo['url_b']) pbclient.create_task(app.id, task_info) else: if options.add_more_tasks: app = pbclient.find_app(short_name=app_config['short_name'])[0] photos = get_flickr_photos() for photo in photos: task_info = dict(question="Do you see a human in this photo?", n_answers=int(options.n_answers), link=photo['link'], url_m=photo['url_m'], url_b=photo['url_b']) pbclient.create_task(app.id, task_info) if options.update_template: print "Updating app template" app = pbclient.find_app(short_name=app_config['short_name'])[0]
def create_tweet_task(app, tweet, question): # Data for the tasks task_info = dict(question=question, n_answers=options.n_answers, tweet=tweet) pbclient.create_task(app.id, task_info)
app = response[0] app.long_description = open('long_description.html').read() app.info['task_presenter'] = open('template.html').read() app.info['thumbnail'] = app_config['thumbnail'] except: format_error("pbclient.create_app", response) try: response = pbclient.update_app(app) check_api_error(response) for page in range(1, 15): # Data for the tasks task_info = dict(question=app_config['question'], page=page, pdf_url=options.pdf_url) response = pbclient.create_task(app.id, task_info) check_api_error(response) except: format_error("pbclient.update_app or pbclient.create_task", response) else: if options.add_more_tasks: try: response = pbclient.find_app( short_name=app_config['short_name']) check_api_error(response) app = response[0] for page in range(1, options.pdf_pages + 1): # Data for the tasks task_info = dict(question="Transcribe the following page",
app.info['tutorial'] = open('tutorial.html').read() pbclient.update_app(app) with open('PabloPh_latest_UN_051212_5_38PM.csv', 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') # Each row has the following format # tweetid, # text # date # username # userid for row in csvreader: if row[0] != 'tweetid': task_info = task_formatter(app_config, row, options.n_answers) pbclient.create_task(app.id, task_info) else: app = pbclient.find_app(short_name=app_config['short_name'])[0] if options.add_more_tasks: import csv with open('PabloPh_latest_UN_051212_5_38PM.csv', 'rb') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') # Each row has the following format # tweetid, # text # date # username # userid for row in csvreader: if row[0] != 'tweetid': task_info = task_formatter(app_config, row,
import pbclient pbclient.set('endpoint', config.ENDPOINT) pbclient.set('api_key', config.API_KEY) app = pbclient.find_app(short_name=config.APP)[0] # steal task from original flickrperson app print 'loading tasks from flickrperson app' r = requests.get('http://crowdcrafting.org/api/task?app_id=147&limit=1000') tasks = r.json print len(tasks), 'tasks loaded.' finished = 0 for t in tasks: sent = False while not sent: #try: pbclient.create_task(app.id, t['info'], n_answers=100) print '\rsending tasks (%d of %d)' % (finished, len(tasks)), sys.stdout.flush() finished += 1 sent = True #except: # sent = False print