def get_tasks_geometry(q): while not q.empty(): project_id, group_id, task_file = q.get() firebase = firebase_admin_auth() fb_db = firebase.database() completed_count = fb_db.child("groups").child(project_id).child( group_id).child("completedCount").shallow().get().val() group_tasks = fb_db.child("groups").child(project_id).child( group_id).child("tasks").shallow().get().val() print(group_tasks) if len(group_tasks) > 0: for task_id in group_tasks: # get TileX, TileY, TileZ and convert to integer tile_z, tile_x, tile_y = map(int, task_id.split('-')) task_geom = geometry_from_tile_coords(tile_x, tile_y, tile_z) outline = '%s;%i;%i;%i;%s\n' % ( task_id, int(project_id), int(group_id), int(completed_count), task_geom) task_file.write(outline) q.task_done()
def download_tasks_completed_count(project_id): ### this functions uses threading to get the completed counts of all groups per project firebase = firebase_admin_auth() fb_db = firebase.database() # this tries to set the max pool connections to 100 adapter = requests.adapters.HTTPAdapter(max_retries=10, pool_connections=100, pool_maxsize=100) for scheme in ('http://', 'https://'): fb_db.requests.mount(scheme, adapter) completed_count_filename = 'completed_count.csv' completed_count_file = open(completed_count_filename, 'w') # we will use a queue to limit the number of threads running in parallel q = Queue(maxsize=0) num_threads = 8 # it is important to use the shallow option, only keys will be loaded and not the complete json all_groups = fb_db.child("groups").child(project_id).shallow().get().val() for group_id in all_groups: q.put([fb_db, completed_count_file, project_id, group_id]) for i in range(num_threads): worker = threading.Thread(target=get_completed_count, args=(q, )) #worker.setDaemon(True) worker.start() q.join() completed_count_file.close() del fb_db return completed_count_filename
def download_users(): ### this functions loads the user data from firebase firebase = firebase_admin_auth() fb_db = firebase.database() users_filename = 'users.csv' users_file = open(users_filename, 'w') # header = 'taskId,projectId,groupId,geo\n' # task_file.write(header) users = fb_db.child("users").get() for user_a in users.each(): user = {} key = user_a.key() val = user_a.val() # we need to check for empty groups user["id"] = key user["distance"] = val["distance"] user["contributions"] = val["contributions"] user["name"] = str(val["username"].encode('utf-8)')) outline = '%s;%i;%i;%s\n' % (user["id"], user["distance"], user["contributions"], str(user["name"])) users_file.write(outline) users_file.close() del fb_db return users_filename
def check_imports(new_imports): corrupt_imports = [] submission_key = get_submission_key() for import_key, project in new_imports.items(): check_result = check_project_geometry(project) if check_result != 'correct': corrupt_imports.append([import_key, project['project']['name'], check_result]) print('some error in geometry') elif project['key'] != submission_key: check_result = 'no/wrong submission key provided' corrupt_imports.append([import_key, project['project']['name'], check_result]) print('no submission key provided') for import_key, project_name, check_result in corrupt_imports: # send slack message that project was corrupt, maybe project manager could try to reimport msg = '%s \n %s \n %s \n %s' % (import_key, project_name, check_result, str(new_imports[import_key])) head = 'google-mapswipe-workers: run_import.py: project %s (%s) not imported' % (import_key, project_name) send_slack_message(head + '\n' + msg) # delete project from dict del new_imports[import_key] print('removed corrupt import %s from new imports' % import_key) print('check result: %s' % check_result) # delete corrupt import in firebase firebase = firebase_admin_auth() fb_db = firebase.database() fb_db.child("imports").child(import_key).remove() return new_imports
def set_project_contributors_firebase(project_id, contributors): # connect to firebase firebase = firebase_admin_auth() fb_db = firebase.database() # update contributors value for firebase project # contributors in firebase is stored as integer contributors = int(contributors) fb_db.child("projects").child(project_id).update( {"contributors": contributors}) # check if progress has been updated new_contributors = fb_db.child("projects").child(project_id).child( "contributors").shallow().get().val() if contributors == new_contributors: print('update contributors for project %s successful' % project_id) logging.warning( 'update contributors in firebase for project %s successful' % project_id) return True else: print('update contributors in firebase for project %s FAILED' % project_id) logging.warning('update contributors for project %s FAILED' % project_id) return False
def get_projects(): # connect to firebase firebase = firebase_admin_auth() fb_db = firebase.database() project_dict = {} project_dict['all'] = [] project_dict['active'] = [] project_dict['not_finished'] = [] # get the projects from firebase all_projects = fb_db.child("projects").get().val() for project in all_projects: try: # some project miss critical information, they will be skipped project_id = all_projects[project]['id'] project_active = all_projects[project]['state'] project_progress = all_projects[project]['progress'] except: continue project_dict['all'].append(int(project_id)) # projects with state=0 are active, state=3 means inactive if project_active == 0: project_dict['active'].append(project_id) if project_progress < 100: project_dict['not_finished'].append(project_id) return project_dict
def project_exists(project_id): # check if a project corresponding to the provided id exists in firebase and has all information required firebase = firebase_admin_auth() fb_db = firebase.database() # get the headers from firebase project_data = fb_db.child("projects").child( project_id).shallow().get().val() if project_data is None: print('project is not in firebase projects table: %s' % project_id) logging.warning('project is not in firebase projects table: %s' % project_id) return False # projects neeed to have at least 12 attributes in firebase, otherwise something went wrong during the import elif len(project_data) < 12: print('project missed critical information: %s' % project_id) logging.warning('project missed critical information in firebase: %s' % project_id) return False else: print( 'project is in firebase projects table and has all attributes: %s' % project_id) logging.warning( 'project is in firebase projects table and has all attributes: %s' % project_id) return True
def set_import_complete(project): try: firebase = firebase_admin_auth() fb_db = firebase.database() fb_db.child("imports").child(project['importKey']).child('complete').set(True) logging.warning('set import complete for import %s and project %s' % (project['importKey'], project['id'])) return True except: return False
def get_verification_count(project_id): firebase = firebase_admin_auth() fb_db = firebase.database() # get the verification count for this project from firebase verification_count = float( fb_db.child("projects").child(project_id).child( "verificationCount").shallow().get().val()) return verification_count
def upload_project_firebase(project): try: firebase = firebase_admin_auth() fb_db = firebase.database() fb_db.child("projects").child(project['id']).set(project) logging.warning('uploaded project in firebase for project %s' % project['id']) return True except: return False
def upload_groups_firebase(project_id, groups): try: firebase = firebase_admin_auth() fb_db = firebase.database() fb_db.child("groups").child(project_id).set(groups) logging.warning('uploaded groups in firebase for project %s' % project_id) return True except: return False
def get_user_name(user_id): # connect to firebase firebase = firebase_admin_auth() fb_db = firebase.database() # get all projects user_name = fb_db.child("users").child(user_id).child('username').get().val() print('got user name information from firebase.') logging.warning('got user name information from firebase.') return user_name
def get_all_projects(): # connect to firebase firebase = firebase_admin_auth() fb_db = firebase.database() # get all projects all_projects = fb_db.child("projects").get().val() print('got project information from firebase.') logging.warning('got project information from firebase.') return all_projects
def get_project_name(project_id): # connect to firebase firebase = firebase_admin_auth() fb_db = firebase.database() # get all projects project_name = fb_db.child("projects").child(project_id).child('name').get().val() print('got project name information from firebase.') logging.warning('got project name information from firebase.') return project_name
def get_highest_project_id(): firebase = firebase_admin_auth() fb_db = firebase.database() project_keys = fb_db.child('projects').shallow().get().val() if not project_keys: project_keys = [0] project_ids = list(map(int, list(project_keys))) project_ids.sort() highest_project_id = project_ids[-1] logging.warning('returned highest project id: %s' % highest_project_id) return highest_project_id
def get_all_projects(): ### this functions gets the IDs of all projects in firebase ### and returns a list firebase = firebase_admin_auth() fb_db = firebase.database() project_list = [] all_projects = fb_db.child("projects").shallow().get().val() for project_id in all_projects: project_list.append(int(project_id)) del fb_db return project_list
def download_group_progress(project_id, verification_count): # this functions uses threading to get the completed counts of all groups per project # create a list where we store the progress and other information for each group group_progress_list = [] # we will use a queue to limit the number of threads running in parallel q = Queue(maxsize=0) num_threads = 24 # it is important to use the shallow option, only keys will be loaded and not the complete json firebase = firebase_admin_auth() fb_db = firebase.database() # this tries to set the max pool connections to 100 adapter = requests.adapters.HTTPAdapter(max_retries=5, pool_connections=100, pool_maxsize=100) for scheme in ('http://', 'https://'): fb_db.requests.mount(scheme, adapter) all_groups = fb_db.child("groups").child(project_id).shallow().get().val() print('downloaded all groups of project %s from firebase' % project_id) logging.warning('downloaded all groups of project %s from firebase' % project_id) for group_id in all_groups: q.put([ fb_db, group_progress_list, project_id, group_id, verification_count ]) print('added all groups of project %s to queue' % project_id) logging.warning('added all groups of project %s to queue' % project_id) for i in range(num_threads): worker = threading.Thread(target=get_group_progress, args=(q, )) worker.start() q.join() del fb_db print('downloaded progress for all groups of project %s from firebase' % project_id) logging.warning( 'downloaded progress for all groups of project %s from firebase' % project_id) return group_progress_list
def check_projects(project_list): firebase = firebase_admin_auth() fb_db = firebase.database() for project_id in project_list: # we need to add an try because the user may provide a project id that does not exist project_val = fb_db.child('projects').child( project_id).shallow().get().val() if project_val == None: print('the project id is not in firebase: ', project_id) project_list.remove(project_id) elif len(project_val) < 12: print('the project missed critical information in firebase: ', project_id) project_list.remove(project_id) else: pass del fb_db return project_list
def delete_firebase_results(all_results): firebase = firebase_admin_auth() fb_db = firebase.database() # we will use multilocation update to delete the entries, therefore we crate an dict with the items we want to delete data = {} for task_id, results in all_results.items(): for child_id, result in results.items(): key = 'results/{task_id}/{child_id}'.format(task_id=task_id, child_id=child_id) data[key] = None #q.put([fb_db, task_id, child_id]) fb_db.update(data) print('finished deleting results') logging.warning('deleted results in firebase') del fb_db
def get_new_projects(project_list): ### this functions gets the latest project information from firebase ### and returns a dict firebase = firebase_admin_auth() fb_db = firebase.database() project_dict = {} timestamp = datetime.datetime.now().isoformat() for project_id in project_list: project_val = fb_db.child("projects").child(project_id).get().val() project_val["last_check"] = timestamp # check whether all information exist for the project, skip if no import key project_dict[str(project_id)] = project_val project_dict = json.dumps(project_dict) project_dict = json.loads(project_dict) del fb_db return project_dict
def get_projects_to_import(): # this functions looks for new entries in the firebase import table # the output is a dictionary with all information for newly imported projects new_imports = {} firebase = firebase_admin_auth() fb_db = firebase.database() # iterate over all the keys in the importer, add the ones to the import cache that are not yet complete all_imports = fb_db.child("imports").get().val() if all_imports: for import_key, project in all_imports.items(): try: # check if project was already imported and "complete" is set complete = project['complete'] except: # insert into new projects dict new_imports[import_key] = project return new_imports
def download_tasks(project_id): ### this function downloads all group info from firebase ### in the second step all tasks are generated task_filename = 'tasks.csv' task_file = open(task_filename, 'w') firebase = firebase_admin_auth() fb_db = firebase.database() group_ids = fb_db.child("groups").child(project_id).shallow().get().val() print('got group ids data from firebase') # this tries to set the max pool connections to 100 adapter = requests.adapters.HTTPAdapter(max_retries=5, pool_connections=100, pool_maxsize=100) for scheme in ('http://', 'https://'): fb_db.requests.mount(scheme, adapter) # we will use a queue to limit the number of threads running in parallel q = Queue(maxsize=0) num_threads = 8 for group_id in group_ids: q.put([project_id, group_id, task_file]) for i in range(num_threads): worker = threading.Thread(target=get_tasks_geometry, args=(q, )) #worker.setDaemon(True) worker.start() q.join() task_file.close() print('Saved tasks file') del fb_db return task_filename
def run_transfer_results(): logging.basicConfig(filename='transfer_results.log', level=logging.WARNING, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M:%S', filemode='a') # first check if we have results stored locally, that have not been inserted in MySQL results_filename = 'results.json' if os.path.isfile(results_filename): # start to import the old results first with open(results_filename) as results_file: results = json.load(results_file) results_txt_filename = results_to_txt(results) logging.warning( "there are results in %s that we didnt't insert. do it now!" % results_filename) save_results_mysql(results_txt_filename) delete_firebase_results(results) os.remove(results_filename) print('removed "results.json" file') logging.warning('removed "results.json" file') firebase = firebase_admin_auth() fb_db = firebase.database() print('opened connection to firebase') # this tries to set the max pool connections to 100 adapter = requests.adapters.HTTPAdapter(max_retries=5, pool_connections=100, pool_maxsize=100) for scheme in ('http://', 'https://'): fb_db.requests.mount(scheme, adapter) # download all results and save as in json file to avoid data loss when script fails all_results = fb_db.child("results").get().val() del fb_db print('downloaded all results from firebase') logging.warning('downloaded all results from firebase') # test if there are any results to transfer if all_results: with open(results_filename, 'w') as fp: json.dump(all_results, fp) logging.warning('wrote results data to %s' % results_filename) print('wrote results data to %s' % results_filename) results_txt_filename = results_to_txt(all_results) save_results_mysql(results_txt_filename) delete_firebase_results(all_results) os.remove(results_filename) print('removed "results.json" file') logging.warning('removed "results.json" file') else: logging.warning('there are no results to transfer in firebase') print('there are no results to transfer in firebase')
def delete_project_firebase(project_id): firebase = firebase_admin_auth() fb_db = firebase.database() fb_db.child("projects").child(project_id).remove() logging.warning('deleted project in firebase for project %s' % project_id)
def get_project_geom(project_id, project_import_key): ### this functions gets the geometry of a project from firebase ### the geometry will be obtained from the imports table in firebase firebase = firebase_admin_auth() fb_db = firebase.database() # get import-key for the project #project_import_key = fb_db.child("projects").child(str(project_id)).child("importKey").get().val() # get kml geometry from firebase imports table kml_geom = fb_db.child("imports").child(project_import_key).child( "kml").get().val() # we need to check whether there is any kml in the firebase table if kml_geom is None: return None else: # write valid kml string to file temp_file = '{}_extent.kml'.format(project_id) temp_file_obj = open(temp_file, 'w', encoding='utf-8') temp_file_obj.write(kml_geom) temp_file_obj.close() adresse = temp_file driver = ogr.GetDriverByName('KML') datasource = driver.Open(adresse) layer = datasource.GetLayer() nbFeat = layer.GetFeatureCount() #create new multipolygon geometry project_geom = ogr.Geometry(ogr.wkbMultiPolygon) # we check how many features are in the layer # most mapswipe projects will contain only 1 feature, but this may change in the future if nbFeat == 1: feature = layer.GetFeature(0) feat_geom = feature.GetGeometryRef() # add this to remove z-coordinates, z-coordinates produced a wrong geometry type feat_geom.FlattenTo2D() if feat_geom.GetGeometryType() == ogr.wkbPolygon: project_geom.AddGeometry(feat_geom) elif feat_geom.GetGeometryType() == ogr.wkbMultiPolygon: project_geom = feat_geom else: # get geometry for each feature for i in range(0, nbFeat): feature = layer.GetFeature(i) feat_geom = feature.GetGeometryRef() if feat_geom.GetGeometryType() == ogr.wkbPolygon: project_geom.AddGeometry(feat_geom) # if the geometry is a multipolygon, we loop through each individual polygon elif feat_geom.GetGeometryType() == ogr.wkbMultiPolygon: for new_feat_geom in feat_geom: project_geom.AddGeometry(new_feat_geom) # check thevalidity of geoms if not check_project_geometry(layer): corrupt = True project_geom = None else: # convert geometry to wkt corrupt = False project_geom = project_geom.ExportToWkt() # close data source and remove kml file datasource.Destroy() os.remove(temp_file) return project_geom, corrupt
def get_results_from_firebase(): firebase = firebase_admin_auth() fb_db = firebase.database() results = fb_db.child("results").get().val() return results