def createZooniverseProject(projName, projDesc, primLang, flag_hidden): print('--- --- --- ---') print('Establishing connection to Zooniverse and creating project') notSaved = True saveCheck = 0 project = None connected = False while not connected: url = 'http://zooniverse.org/' print('Attempting connection.') try: response = requests.get(url, timeout=0.2) except ConnectionError as ce: print(ce) except HTTPError as he: print(he) except Timeout as to: print(to) else: print(response) connected = True while (notSaved and (saveCheck < 5)): notSaved = False #Make a new project project = Project() #Project name #tutorial_project.display_name = ('{}_test'.format(now)) project.display_name = projName saveCheck += 1 #Project description project.description = projDesc #Project language project.primary_language = primLang #Project visibility project.private = flag_hidden try: project.save() except PanoptesAPIException as e: print('!!! {} , Waiting 10 seconds...'.format(e)) notSaved = True for i in range(0, 10): print('... Waiting {}...'.format(i)) time.sleep(3) project.delete() saveCheck += 1 print('Project successfully created.') return project
def create(display_name, description, primary_language, public, quiet): """ Creates a new project. Prints the project ID and name of the new project. """ project = Project() project.display_name = display_name project.description = description project.primary_language = primary_language project.private = not public project.save() if quiet: click.echo(project.id) else: echo_project(project)
def pushNewSubjectSet(args, customArgs, projID): args['F_livePost'] = True connection = panoptesConnect(args['username'], args['password']) args['zooniverseConnection'] = connection #Get existing project project = Project(projID) if project == None: print('Could not find this project') return None print(project.display_name) args['project'] = project #Create new subject set subjectSet = createSubjectSet(args['subjectSetTitle'], args['project']) args['subjectSet'] = subjectSet #Create new subjects and populate project with filled subject set createSubjects(args, customArgs) return args
def retrieve_classifications(self, destination: str, project_id: int, zooniverse_login: str = "", zooniverse_pwd: str = "", chunks: List[str] = [], **kwargs): """Retrieve classifications from Zooniverse as a CSV dataframe. They will be matched with the original chunks metadata if the path one or more chunk metadata files is provided. :param destination: output CSV dataframe destination :type destination: str :param project_id: zooniverse project id :type project_id: int :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to '' :type zooniverse_login: str, optional :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to '' :type zooniverse_pwd: str, optional :param chunks: the list of chunk metadata files to match the classifications to. If provided, only the classifications that have a match will be returned. :type chunks: List[str], optional """ self.get_credentials(zooniverse_login, zooniverse_pwd) from panoptes_client import Panoptes, Project, Classification Panoptes.connect(username=self.zooniverse_login, password=self.zooniverse_pwd) project = Project(project_id) answers_translation_table = [] for workflow in project.links.workflows: workflow_id = workflow.id for task_id in workflow.tasks: n = 0 for answer in workflow.tasks[task_id]["answers"]: answers_translation_table.append({ "workflow_id": str(workflow_id), "task_id": str(task_id), "answer_id": str(n), "answer": answer["label"], }) n += 1 answers_translation_table = pd.DataFrame(answers_translation_table) classifications = [] for c in Classification.where(scope="project", page_size=1000, project_id=project_id): classifications.append(c.raw) classifications = pd.DataFrame(classifications) classifications["user_id"] = classifications["links"].apply( lambda s: s["user"]) classifications["subject_id"] = (classifications["links"].apply( lambda s: s["subjects"][0]).astype(int)) classifications["workflow_id"] = classifications["links"].apply( lambda s: s["workflow"]) classifications["tasks"] = classifications["annotations"].apply( lambda s: [(str(r["task"]), str(r["value"])) for r in s]) classifications = classifications.explode("tasks") classifications["task_id"] = classifications["tasks"].str[0] classifications["answer_id"] = classifications["tasks"].str[1] classifications.drop(columns=["tasks"], inplace=True) classifications = classifications[[ "id", "user_id", "subject_id", "task_id", "answer_id", "workflow_id" ]] classifications = classifications.merge( answers_translation_table, left_on=["workflow_id", "task_id", "answer_id"], right_on=["workflow_id", "task_id", "answer_id"], ) if chunks: chunks = pd.concat([pd.read_csv(f) for f in chunks]) classifications = classifications.merge(chunks, left_on="subject_id", right_on="zooniverse_id") classifications.set_index("id").to_csv(destination)
def upload_chunks(self, chunks: str, project_id: int, set_name: str, zooniverse_login="", zooniverse_pwd="", amount: int = 1000, ignore_errors: bool = False, **kwargs): """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project. :param chunks: path to the chunk CSV dataframe :type chunks: [type] :param project_id: zooniverse project id :type project_id: int :param set_name: name of the subject set :type set_name: str :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to '' :type zooniverse_login: str, optional :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to '' :type zooniverse_pwd: str, optional :param amount: amount of chunks to upload, defaults to 0 :type amount: int, optional """ self.chunks_file = chunks self.get_credentials(zooniverse_login, zooniverse_pwd) metadata_location = os.path.join(self.chunks_file) try: self.chunks = pd.read_csv(metadata_location, index_col="index") except: raise Exception("cannot read chunk metadata from {}.".format( metadata_location)) assert_dataframe("chunks", self.chunks) assert_columns_presence( "chunks", self.chunks, {"recording_filename", "onset", "offset", "uploaded", "mp3"}, ) from panoptes_client import Panoptes, Project, Subject, SubjectSet Panoptes.connect(username=self.zooniverse_login, password=self.zooniverse_pwd) zooniverse_project = Project(project_id) subjects_metadata = [] uploaded = 0 subject_set = None for ss in zooniverse_project.links.subject_sets: if ss.display_name == set_name: subject_set = ss if subject_set is None: subject_set = SubjectSet() subject_set.links.project = zooniverse_project subject_set.display_name = set_name subject_set.save() subjects = [] chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head( amount) chunks_to_upload = chunks_to_upload.to_dict(orient="index") if len(chunks_to_upload) == 0: print("nothing left to upload.") return for chunk_index in chunks_to_upload: chunk = chunks_to_upload[chunk_index] print("uploading chunk {} ({},{})".format( chunk["recording_filename"], chunk["onset"], chunk["offset"])) subject = Subject() subject.links.project = zooniverse_project subject.add_location( os.path.join(os.path.dirname(self.chunks_file), "chunks", chunk["mp3"])) subject.metadata["date_extracted"] = chunk["date_extracted"] try: subject.save() except Exception as e: print("failed to save chunk {}. an exception has occured:\n{}". format(chunk_index, str(e))) print(traceback.format_exc()) if args.ignore_errors: continue else: print("subject upload halting here.") break subjects.append(subject) chunk["index"] = chunk_index chunk["zooniverse_id"] = str(subject.id) chunk["project_id"] = str(project_id) chunk["subject_set"] = str(subject_set.display_name) chunk["uploaded"] = True subjects_metadata.append(chunk) if len(subjects) == 0: return subject_set.add(subjects) self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index")) self.chunks.to_csv(self.chunks_file)
# connect to zooniverse Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME, password=zooniverse_config.Zooniverse_PASS) project = Project.find(zooniverse_config.Project_ID) # connection to mongodb mongoConn = MongoClient(csh_db_config.DB_HOST + ":" + str(csh_db_config.DB_PORT)) cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME] cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER, csh_db_config.TRANSCRIPTION_DB_PASS) cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl] cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets] classification_export = Project( zooniverse_config.Project_ID).get_export('classifications') classification = classification_export.content.decode('utf-8') #Traverses through each row of classifications in the JSON file created by zooniverse and assigns them to appropriate headers for row in csv.DictReader(io.StringIO(classification)): annotations = json.loads(row['annotations']) subject_data = json.loads(row['subject_data']) transcription_question_1 = '' transcription_text_1 = '' transcription_question_2 = '' transcription_text_2 = '' transcription_filename = '' subject_id = row['subject_ids'] subject_id = str(subject_id)
if save: build_file += build_part with open(project_file, 'r') as p_file: r = csv.DictReader(p_file) project_list = [] for row in r: project_list.append(row['projects']) i = 0 for prjct in project_list: sys.stdout.write('processing..') sys.stdout.flush() try: build_part = "{:<8},{}".format(prjct, Project( int(prjct)).display_name) + '\n' build_part += "{:<12},{:<14},{:<28},{:<28},{:<10},{:12},{}".format( 'Project_id', 'Workflow_id', 'Created date', 'Finished date', 'Subjects', 'Retirement', 'Workflow name') + '\n' for workflow_id, project_id in all_workflows: i += 1 if i % 5 == 0: sys.stdout.write('.') sys.stdout.flush() if prjct == project_id: wrkflw = Workflow(int(workflow_id)) finished_at = wrkflw.finished_at if finished_at is None: finished_at = ' ' build_part += u"{:<12},{:<14},{:<28},{:<28},{:<10},{:<12},{}".format( prjct, wrkflw.id, wrkflw.created_at, finished_at,
PROJECT_ID = 6767 PROCESSED_SUBJECTS_FILE = 'processed_subjects.txt' PROCESSED_SETS_FILE = 'processed_sets.txt' with open('config.yaml') as config_f: config = yaml.load(config_f, Loader=yaml.FullLoader) if os.path.isfile(PROCESSED_SETS_FILE): with open(PROCESSED_SETS_FILE) as processed_f: processed_sets = { s.strip() for s in processed_f.readlines() } else: processed_sets = set() Panoptes.connect(**config) project = Project(PROJECT_ID) with open(PROCESSED_SETS_FILE, 'a') as processed_sets_f: for subject_set in project.links.subject_sets: if subject_set.id in processed_sets: continue with ChargingBar( 'Updating {}'.format(subject_set.display_name), max=subject_set.set_member_subjects_count, suffix='%(percent).1f%% %(eta_td)s' ) as bar: with Subject.async_saves(): for subject in Subject.where(subject_set_id=subject_set.id, page_size=100): bar.next() if '!CERiT' in subject.metadata: continue
def create_subject_set(project_id: int, name: str): subject_set = SubjectSet() subject_set.links.project = Project(project_id) subject_set.display_name = name subject_set.save() return subject_set
def main(): # connect to zooniverse Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME, password=zooniverse_config.Zooniverse_PASS) project = Project.find(zooniverse_config.Project_ID) # connection to mongodb mongoConn = MongoClient(csh_db_config.DB_HOST + ":" + str(csh_db_config.DB_PORT)) cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME] cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER, csh_db_config.TRANSCRIPTION_DB_PASS) cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl] cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets] classification_export = Project(zooniverse_config.Project_ID).get_export('classifications') classification = classification_export.content.decode('utf-8') # keep track of the number of classifications num_classifications = 0 # traverses through each row of classifications and assigns them to appropriate headers for row in csv.DictReader(io.StringIO(classification)): annotations = json.loads(row['annotations']) subject_data = json.loads(row['subject_data']) transcription_question_1 = '' transcription_text_1 = '' transcription_question_2 = '' transcription_text_2 = '' transcription_filename = '' subject_id = row['subject_ids'] subject_id = str(subject_id) # parse the JSON output from Zooniverse into individual fields for task in annotations: try: if 'Is there a word in this image?' in task['task_label']: if task['value'] is not None: transcription_question_1 = str(task['task_label']) transcription_text_1 = str(task['value']) num_classifications += 1 except KeyError: try: if 'Please type the word(s) that appears in this image' in task['task_label']: if task['value'] is not None: transcription_question_2 = str(task['task_label']) transcription_text_2 = str(task['value']) except KeyError: continue # retrieve and update the record from MongoDB updateQuery = { '$set':{ 'responses': [{ 'labellerId': row['user_id'], 'type' : transcription_text_1, 'label' : transcription_text_2 }], 'transcription': { 'status' : 'done' } } } record = cshCollection.find_one_and_update({'_id': transcription_filename}, updateQuery) print('{} classifications retrieved from Zooniverse and records updated in MongoDB'.format(num_classifications))
except: print('ERROR: No se ha podido leer el fichero de configuración.') sys.exit(1) # modify path and file name as needed: manifest_images_file = manifest_path + "manifest_images_" + month + ".csv" manifest_sounds_file = manifest_path + "manifest_sounds_" + month + ".csv" image_set_name = 'image_set_' + month audio_set_name = 'audio_set_' + month # Conexión con Panoptes Panoptes.connect(username=user, password=passwd) # El proyecto "Sky Sounds" tiene asociado el identificador 13586. project = Project('13586') # ------- Subject set de imágenes ------- # Conexión con el subject set correspondiente o creación de uno nuevo en caso # de que este no exista. try: # Comprueba si existe el subject set. subject_set = SubjectSet.where(project_id=project.id, display_name=image_set_name).next() except StopIteration: # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto. subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = image_set_name subject_set.save()
logger.info("Imported Manifest file {} with {} records".format( args['manifest'], len(mani.keys()))) # read Zooniverse credentials config = read_config_file(args['password_file']) ################################### # Create Zooniverse Connection # Fetch/Create SubjectSet ################################### # connect to panoptes connect_to_panoptes() # Get Project my_project = Project(args['project_id']) # get or create a subject set if args['subject_set_id'] is not None: my_set = get_subject_set(args['subject_set_id'], args['subject_set_name']) else: my_set = uploader.create_subject_set(my_project, args['subject_set_name']) logger.info("Created new subject set with id {}, name {}".format( my_set.id, my_set.display_name)) ################################### # Create Stats Variables ###################################
subject_data_file = 'WO399_11Jun2020-trunc.tsv' subject_file_list = 'wo_399_file_list.txt' subject_file_root = credentials.subject_file_root subject_file_old_root_re = '' if credentials.subject_file_old_root != '': subject_file_old_root_re = re.compile(credentials.subject_file_old_root) file_inventory = defaultdict(list) project_id = 11982 # we are only interested in the Piece-level records in the catalogue export. # the following regex matches Piece references (and lower) only. piece_ref_re = re.compile(r'WO 399/(\d+)') file_path_extraction_re = re.compile(r'wo\\399\\(\d+)') Panoptes.connect(username=credentials.username, password=credentials.password) project = Project(project_id) # Read the subject_data_file to get the names and docrefs of the documents # Identify the images that belong to a subject set and upload them def create_subject_set(docref, name): print("Attempting to create a subject set via the Zooniverse API") subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = docref + " - " + name subject_set.save() return subject_set with open(subject_file_list, 'r') as f: