def cli(ctx, endpoint, admin): ctx.config_dir = os.path.expanduser('~/.panoptes/') ctx.config_file = os.path.join(ctx.config_dir, 'config.yml') ctx.config = { 'endpoint': 'https://www.zooniverse.org', 'username': '', 'password': '', } try: with open(ctx.config_file) as conf_f: ctx.config.update(yaml.full_load(conf_f)) except IOError: pass if endpoint: ctx.config['endpoint'] = endpoint if ctx.invoked_subcommand != 'configure': Panoptes.connect( endpoint=ctx.config['endpoint'], username=ctx.config['username'], password=ctx.config['password'], admin=admin, )
def __connect(self): """ Connect to the panoptes client api :return: """ Panoptes.connect(username=self.username, password=self.password) return Project.find(self.project_id)
def add_to_subject_set(subject_set_id, subject_set_file, username=None, password=None): """ Import a 1 column file of subject_ids to a subject_set. Parameters ---------- subject_set_id : str subject set ID linked to the web interface subject_set_file : str one-column file of subject IDs (output of cull_subject_ids) username, password : str, str if passed, will add subject set ids to the subject set on the web. """ lines = [] with open(subject_set_file) as subject_ids: lines.append(subject_ids.read().splitlines()) if username is not None: try: from panoptes_client import Panoptes, SubjectSet except ImportError: print( 'Install https://github.com/zooniverse/panoptes-python-client') sys.exit(1) Panoptes.connect(username=username, password=password) subject_set = SubjectSet.find(subject_set_id) subject_set.add(np.unique(lines)) return
def find_duplicates(): Panoptes.connect(username='******', password=getpass()) gzb_project = Project.find(slug='tingard/galaxy-builder') subject_sets = [] for set in gzb_project.links.subject_sets: subject_sets.append(list(set.subjects)) subjects = [j for i in subject_sets for j in i] subject_set_ids = [[np.int64(j.id) for j in i] for i in subject_sets] ids = [int(i.id) for i in subjects] dr7objids = [np.int64(i.metadata.get('SDSS dr7 id', False)) for i in subjects] pairings = sorted(zip(ids, dr7objids), key=lambda i: i[0]) df = pd.DataFrame(pairings, columns=('subject_id', 'dr7objid')) df = df[df['dr7objid'] != 0].groupby('subject_id').max() n_sids = len(df) n_dr7ids = len(df.groupby('dr7objid')) print('{} unique subject ids'.format(n_sids)) print('{} unique dr7 object ids'.format(n_dr7ids)) print('{} duplicate galaxies'.format(n_sids - n_dr7ids)) groups = np.array([np.concatenate(([i[0]], i[1].index.values)) for i in df.groupby('dr7objid') if len(i[1]) > 1]) # okay, what subject sets are our duplicates? s1 = gzb_project.links.subject_sets[ np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 1])) for i in range(len(subject_set_ids))]) ] s2 = gzb_project.links.subject_sets[ np.argmax([np.all(np.isin(subject_set_ids[i], groups[:, 2])) for i in range(len(subject_set_ids))]) ] print(s1, s2) return groups
def make_tutorial_images(imagePaths, ellipseData, projectData): # Connect to Panoptes Panoptes.connect( username=projectData["user_name"], password=projectData["password"] ) newSubjects = [] for imageId, imagePath in enumerate(imagePaths): print(f"Adding {imagePath}...") try: subjectSet = SubjectSet.find(projectData["subject_set"]) except PanoptesAPIException as e: print(e) return newSubject = Subject() newSubject.add_location(imagePath) newSubject.links.project = subjectSet.links.project newSubject.metadata.update( make_metadata( ellipseData.get_group(imageId).reset_index(drop=True), imagePath ) ) newSubject.save() newSubjects.append(newSubject) subjectSet.add(newSubjects)
def upload_manifest_to_galaxy_zoo(subject_set_name, manifest, galaxy_zoo_id='5733', n_processes=10): """ Save manifest (set of galaxies with metadata prepared) to Galaxy Zoo Args: subject_set_name (str): name for subject set manifest (list): containing dicts of form {png_loc: img.png, key_data: {metadata_col: metadata_value}} galaxy_zoo_id (str): panoptes project id e.g. '5733' for Galaxy Zoo, '6490' for mobile n_processes (int): number of processes with which to upload galaxies in parallel Returns: None """ if 'TEST' in subject_set_name: logging.warning('Testing mode detected - not uploading!') return manifest if galaxy_zoo_id == '5733': logging.info('Uploading to Galaxy Zoo project 5733') elif galaxy_zoo_id == '6490': logging.info('Uploading to mobile app project 6490') else: logging.info('Uploading to unknown project {}'.format(galaxy_zoo_id)) # Important - don't commit the password! zooniverse_login = read_data_from_txt(zooniverse_login_loc) Panoptes.connect(**zooniverse_login) galaxy_zoo = Project.find(galaxy_zoo_id) subject_set = SubjectSet() subject_set.links.project = galaxy_zoo subject_set.display_name = subject_set_name subject_set.save() pbar = tqdm(total=len(manifest), unit=' subjects uploaded') save_subject_params = {'project': galaxy_zoo, 'pbar': pbar} save_subject_partial = functools.partial(save_subject, **save_subject_params) pool = ThreadPool(n_processes) new_subjects = pool.map(save_subject_partial, manifest) pbar.close() pool.close() pool.join() # new_subjects = [] # for subject in manifest: # print(subject) # new_subjects.append(save_subject_partial(subject)) subject_set.add(new_subjects) return manifest # for debugging only
def get_panoptes_auth_token(): # This token is only valid for ~2 hours. Don't use for long-running downloads. # Here, we only need a few calls to get the workflow versions # Will ask the devs to expose this nicely with the already-built expiry check. with open(api_to_json.ZOONIVERSE_LOGIN_LOC, 'r') as f: # beware sneaky shared global state zooniverse_login = json.load(f) Panoptes.connect(**zooniverse_login) return Panoptes._local.panoptes_client.get_bearer_token()
def _retrieve_user(user_id): if user_id in users: user = users[user_id] else: Panoptes.connect(endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), client_id=getenv('PANOPTES_CLIENT_ID'), client_secret=getenv('PANOPTES_CLIENT_SECRET')) user = User.find(user_id) users[user_id] = user return user
def main(production=False): uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org', admin=True ) pId = 5733 # if production else 1820 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = 'Test_subject_set_' + str(int(time.time())) subject_set.save() loc = os.path.abspath(os.path.dirname(__file__)) subjects = os.listdir(loc + '/subjects') images, differences, model, metadata = [ sorted(( int(re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i).group(1)) for i in subjects if re.match(r'{}_([0-9]+)\.(?:json|png)$'.format(s), i) )) for s in ('difference', 'image', 'model', 'metadata') ] if not images == differences == model == metadata: print( 'Images, differences, model and metadata ' + 'must all have same length' ) # TODO: change subject directory structure to be more efficient # (not having 12,000+ files in a folder...) for i in images: try: with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f: metadata = json.load(f) except IOError: metadata = {} subject_set = uploadSubjectToSet( project, subject_set, [[j.format(loc, i) for j in ( '{}/subjects/image_{}.png', '{}/subjects/difference_{}.json', '{}/subjects/model_{}.json' )]], # locations [metadata], )
def create_subject_set(folder_name, set_name='test_subject_set'): subject_names = [ i.group(1) for i in ( re.match(r'image_(.*?).png', f) for f in os.listdir(folder_name) ) if i is not None ] files = [ [ join(folder_name, file_name) for file_name in ( 'image_{}.png'.format(subject_name), 'difference_{}.json'.format(subject_name), 'model_{}.json'.format(subject_name), 'metadata_{}.json'.format(subject_name), ) ] for subject_name in subject_names ] assert all(os.path.exists(j) for i in files for j in i), 'Missing files!' uname = input('Enter your username: ') pwd = getpass.getpass() Panoptes.connect( username=uname, password=pwd, admin=True ) pId = 5590 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = set_name subject_set.save() metadata_list = [] for fs in files: try: with open(fs[3]) as metaF: metadata = json.load(metaF) except IOError: metadata = {} metadata_list.append(metadata) subject_set = uploadSubjectToSet( project, subject_set, [i[:3] for i in files], metadata_list, )
def _retrieve_user(user_id): if user_id in users: user = users[user_id] else: Panoptes.connect(endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), client_id=getenv('PANOPTES_CLIENT_ID'), client_secret=getenv('PANOPTES_CLIENT_SECRET')) try: user = User.find(user_id) except PanoptesAPIException: # some users are not found in panoptes # return an empty class with an `id` attribute user = CantFindUser(user_id) users[user_id] = user return user
def panoptes_connect(): # file with username and password on the first line with a space in between panoptesuserfile = 'panoptesuserfile.txt' with open(panoptesuserfile) as fp: uinfo = (fp.readline()).strip().split() return Panoptes.connect(username=uinfo[0], password=uinfo[1])
def upload_images(id, use_database=True): print('Create subject set and upload images for', id) if use_database: update_status(id, gz_status='Uploading') wd = os.getcwd() Panoptes.connect(username='******', password=os.environ['PANOPTES_PASSWORD']) os.chdir(target + id) project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar') subject_set = SubjectSet() subject_set.display_name = id subject_set.links.project = project subject_set.save() print('Made subject set') new_subjects = [] g = glob.glob('*-manifest.txt') for i, f in enumerate(g): bits = open(f).readlines()[0].split(',') metadata = { 'subject_id': int(bits[0]), 'ra': float(bits[5]), 'dec': float(bits[6]), '#size': float(bits[7]), 'source_name': bits[4] } print('Upload doing', bits[4], '%i/%i' % (i, len(g))) subject = Subject() subject.links.project = project subject.metadata.update(metadata) for location in bits[1:4]: subject.add_location(location) subject.save() new_subjects.append(subject) subject_set.add(new_subjects) workflow = Workflow(11973) workflow.links.subject_sets.add(subject_set) if use_database: update_status(id, gz_status='In progress') print('Done!')
def main(argv=None): """Main caller for workflow1to2""" parser = argparse.ArgumentParser( description="Add subject sets to workflow 2a,b,c from classifications") parser.add_argument('username', type=str, help='zooniverse username') parser.add_argument('password', type=str, help='password') parser.add_argument('-o', '--overwrite', action='store_true') parser.add_argument('-a', '--add', action='store_true') args = parser.parse_args(argv) # log in. Panoptes.connect(username=args.username, password=args.password) classifications_filename = 'astronomy-rewind-classifications.csv' wf1_filename = 'astronomy-rewind-classifications_wf1.csv' cull_wf1(classifications_filename, wf1_filename, overwrite=args.overwrite) cull_subject_ids(wf1_filename, overwrite=args.overwrite, add=args.add)
def main(production=False): uname = input('Enter your username: '******'https://panoptes-staging.zooniverse.org', admin=True) pId = 5590 if production else 1820 project = Project.find(pId) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = 'Test_subject_set_' + str(int(time.time())) subject_set.save() loc = os.path.abspath(os.path.dirname(__file__)) subjects = os.listdir(loc + '/subjects') # TODO: change subject directory structure to be more efficient # (not having 12,000+ files in a folder...) for i in range(20): if 'image_{}.png'.format(i) in subjects: try: with open('{}/subjects/metadata_{}.json'.format(loc, i)) as f: metadata = json.load(f) except IOError: metadata = {} subject_set = uploadSubjectToSet( project, subject_set, [[ j.format(loc, i) for j in ('{}/subjects/image_{}.png', '{}/subjects/difference_{}.json', '{}/subjects/model_{}.json') ]], # locations [metadata], ) else: break
def cli(ctx, endpoint): ctx.config_dir = os.path.join(os.environ['HOME'], '.panoptes') ctx.config_file = os.path.join(ctx.config_dir, 'config.yml') ctx.config = { 'endpoint': 'https://panoptes.zooniverse.org', 'username': '', 'password': '', } try: with open(ctx.config_file) as conf_f: ctx.config.update(yaml.load(conf_f)) except IOError: pass if endpoint: ctx.config['endpoint'] = endpoint Panoptes.connect( endpoint=ctx.config['endpoint'], username=ctx.config['username'], password=ctx.config['password'] )
def create_subjects_and_link_to_project(self, proto_subjects, project_id, workflow_id, subject_set_id): try: USERNAME = os.getenv('PANOPTES_USERNAME') PASSWORD = os.getenv('PANOPTES_PASSWORD') Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=self.ENDPOINT) project = Project.find(project_id) workflow = Workflow().find(workflow_id) if subject_set_id == None: subject_set = SubjectSet() ts = time.gmtime() subject_set.display_name = time.strftime( "%m-%d-%Y %H:%M:%S", ts) subject_set.links.project = project subject_set.save() else: subject_set = SubjectSet().find(subject_set_id) subjects = [] for proto_subject in proto_subjects: subject = Subject() subject.links.project = project subject.add_location(proto_subject['location_lc']) subject.add_location(proto_subject['location_ps']) subject.metadata.update(proto_subject['metadata']) subject.save() subjects.append(subject) subject_set.add(subjects) workflow.add_subject_sets(subject_set) except Exception: self.log.exception("Error in create_subjects_and_link_to_project ")
def get_conn(self): if self._panoptes_client is None: self.log.info(f"{self.__class__.__name__} version {__version__}") self.log.debug( f"getting connection information from {self._conn_id}") config = self.get_connection(self._conn_id) ctyp = config.conn_type or self.DEFAULT_CONN_TYPE host = config.host or self.DEFAULT_HOST port = config.port or self.DEFAULT_PORT slug = config.schema login = config.login password = config.password if config.extra: try: extra = json.loads(config.extra) except json.decoder.JSONDecodeError: self._auto_disable_subject_sets = False else: self._auto_disable_subject_sets = extra.get( "auto_disable_ssets", False) if not login: raise MissingLoginError(self._conn_id) if not password: raise MissingPasswordError(self._conn_id) if not slug: raise MissingSchemaError(self._conn_id) project_slug = f"{login}/{slug}" endpoint = f"{ctyp}://{host}:{port}" self._panoptes_client = Panoptes.connect(username=login, password=password, endpoint=endpoint) self._project = Project.find(slug=project_slug) self.log.info( f"Searching project by slug {project_slug} found: {self._project}" ) return self._panoptes_client, self._project
def run(): """ Query for completed subjects, calculate kmeans vertex centroids, fetch subject images, split columns by centroids, row segmentatino with Ocropy. """ logger = setup_logger(settings.APP_NAME, 'log/kmeans_and_enqueue_completed_subjects.log', logging.DEBUG) subject_set_csv = SubjectSetCSV() workflow_router = SubjectSetWorkflowRouter(subject_set_csv, settings, logger) pages_raw_subject_ids = subject_set_csv.raw_pages_subject_ids() logger.debug("Running Wires and Rails Workflow Processor") Panoptes.connect(username=settings.PANOPTES_USERNAME, password=settings.PANOPTES_PASSWORD) retired_subject_ids = [] vertices_and_target_subject_sets = [] for _subject_set_id, metadata in settings.COLUMNS_WORKFLOW_METADATA.items( ): logger.debug("Loading vertices / subject retirement info for %(debug_name)s subject set " \ "(subject set id: %(subject_set_id)d; workflow id: %(workflow_id)d; task id: " \ " %(task_id)s", metadata) classification_kwargs = { 'scope': 'project', 'project_id': settings.PROJECT_ID, 'workflow_id': metadata['workflow_id'] } logger.debug("Loading classifications by params %s", str(classification_kwargs)) classifications_records = [ c for c in Classification.where(**classification_kwargs) ] classifications = VertexClassifications(classifications_records, pages_raw_subject_ids) # Aggregate vertex centroids centroids_by_subject = classifications.vertex_centroids( metadata['task_id']) for subject_id, centroids in centroids_by_subject.items(): # Find target subject set ID, or log and skip the subject try: target_subject_set_id = workflow_router \ .target_subject_set_id(subject_id, classifications_records) except UnidentifiedRawSubjectSetException as ex: logger.error(ex.args[0]) continue except SharedMajorityException as ex: # TODO need add'l monitoring for this, e.g. manual report exception logger.error(ex.args[0]) continue vertices_and_target_subject_sets.append( [subject_id, centroids, target_subject_set_id]) # Aggregate retired subjects workflow = Workflow.find(metadata['workflow_id']) retirement_count = workflow.retirement['options']['count'] retired_subject_ids += classifications.retired_subject_ids( metadata['task_id'], retirement_count) logger.debug( 'Retrieved the following subject centroids for image segmentation: %s', str(vertices_and_target_subject_sets)) logger.debug('For the following retired subject IDs: %s', str(retired_subject_ids)) queue = Queue(connection=Redis(host=settings.REDIS_HOST)) for subject_id, centroids, target_subject_set_id in vertices_and_target_subject_sets: if subject_id not in retired_subject_ids: continue subject = Subject.find(subject_id) if settings.METADATA_KEY_ALREADY_PROCESSED in subject.metadata and \ subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED]: logger.debug('Skipping subject id %d; already processed.', subject_id) continue logger.debug('Enqueuing subjects id: %d', subject_id) queue.enqueue(QueueOperations.queue_new_subject_creation, subject_id, centroids, target_subject_set_id, timeout=2 * 60 * 60) QueueOperations.flag_subject_as_queued(subject)
size = file_bytes.tell() print('Uploading ', original_file, scale, resized_width, size) # ensure the file pointer is returned to the beginning of the file-like object file_bytes.seek(0, 0) return file_bytes parser = argparse.ArgumentParser(description='Zooniverse Uploader') parser.add_argument('image_dir') parser.add_argument('--subject', '-s', required=True) args = parser.parse_args() set_name = args.subject # connect to zooniverse - requires the User_name and Password to be set up as environmental variables in your OS Panoptes.connect(username=os.environ['ZOONIVERSE_USERNAME'], password=os.environ['ZOONIVERSE_PASSWORD']) # modify the project slug if used for other than Snapshots at Sea project = Project.find(slug='tedcheese/snapshots-at-sea') if not os.path.exists(args.image_dir): print('[%s] does not exist.' % args.image_dir) sys.exit() # load the list of image files found in the directory: # The local file name will be uploaded as metadata with the image file_types = ['jpg', 'jpeg'] subject_metadata = {} for entry in os.listdir(args.image_dir): if entry.partition('.')[2].lower() in file_types: subject_metadata[entry] = {'Filename': entry} print('Found ', len(subject_metadata), ' files to upload in this directory.')
description = 'Shallow clouds are the nemesis of climate modelers. Help us by detecting cloud organization from satellite images.' subject_name = 'EUREC4A-ICON-Scenes' # Read filenames to upload # import pdb; pdb.set_trace() # files = sorted(glob.glob(path1+'*.jpeg')) # files = files + sorted(glob.glob(path2+'*.jpeg')) # files = files + sorted(glob.glob(path3+'*C02*.jpeg')) files = sorted(glob.glob(path1 + '*[02468]??.jpeg')) # Create metadata subject_metadata = {} for f, file in enumerate(files): subject_metadata[file] = {'file': file, 'subject_reference': f} Panoptes.connect(username=username, password=password) # tutorial_project = Project() tutorial_project = Project.find(7699) # tutorial_project.display_name = display_name # tutorial_project.description = description # tutorial_project.primary_language = 'en' # tutorial_project.private =True # tutorial_project.save() subject_set = SubjectSet() subject_set.links.project = tutorial_project subject_set.display_name = subject_name subject_set.save() tutorial_project.reload() print(tutorial_project.links.subject_sets)
B-1180 Brussels BELGIUM phone : +32 (0)2 373.04.19 e-mail : [email protected] web : www.aeronomie.be ________________________________________________ """ filename = "panoptes_test2_export.csv" from panoptes_client import Project, Panoptes from panoptes_client.panoptes import PanoptesAPIException import requests, sys Panoptes.connect(username=username, password=password) project = Project.find(slug='zooniverse/radio-meteor-zoo') #r = project.get_classifications_export(generate=True, wait=True, wait_timeout=1800) wait_timeout = 60 project.generate_classifications_export() for attempt in range(60): print "wait classification export (attempt %d)" % attempt sys.stdout.flush() try: export = project.wait_classifications_export(wait_timeout) except PanoptesAPIException as e: print str(e)[:32] if str(e)[:32] == "classifications_export not ready": continue
#This program takes the list of classifications crowdsourced in Zooniverse and updates the same in MongoDB corresponding to the subjects. #Importing required packages from panoptes_client import SubjectSet, Subject, Project, Panoptes from pymongo import MongoClient import argparse import datetime import itertools import csv import json import io import csh_db_config import zooniverse_config # connect to zooniverse Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME, password=zooniverse_config.Zooniverse_PASS) project = Project.find(zooniverse_config.Project_ID) # connection to mongodb mongoConn = MongoClient(csh_db_config.DB_HOST + ":" + str(csh_db_config.DB_PORT)) cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME] cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER, csh_db_config.TRANSCRIPTION_DB_PASS) cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl] cshSubjectSets = cshTransDB[csh_db_config.TRANS_DB_SubjectSets] classification_export = Project( zooniverse_config.Project_ID).get_export('classifications') classification = classification_export.content.decode('utf-8')
import re import urllib2 import os import getpass import wikipedia # ask user for login and object they want to classify thing = raw_input("What would you like to classify? ") user = raw_input("Zooniverse username: "******"password: "******"lxml")
#!/usr/bin/env python3 """ Un-flag arbitrary subjects as not processed, useful for debugging workflow processing. """ import sys sys.path.insert(0, "..") from panoptes_client import Panoptes, Subject from lib import settings Panoptes.connect(username=settings.PANOPTES_USERNAME, password=settings.PANOPTES_PASSWORD) # SUBJECT_IDS = ['5823821', '5823822'] # SUBJECT_IDS = ['14813279', '14813280', '14813281'] # SUBJECT_IDS = ['15327062','15327056','15327068','15327065'] # Telegraph tests - SUBJECT_IDS = ['15327068', '15327065', '15327062', '15327059', '15327056'] for subject_id in SUBJECT_IDS: subject = Subject.find(subject_id) subject.metadata[settings.METADATA_KEY_ALREADY_PROCESSED] = False subject.save()
import yaml import os import urllib SUBJECT_ID_FILE = 'subjects.txt' with open('config.yaml') as config_f: config = yaml.load(config_f, Loader=yaml.FullLoader) with open(SUBJECT_ID_FILE) as subject_id_f: subject_ids = [ s.strip() for s in subject_id_f.readlines() ] Panoptes.connect(**config) with ChargingBar( 'Updating', max=len(subject_ids), suffix='%(percent).1f%% %(eta_td)s' ) as bar: with Subject.async_saves(): for subject_id in subject_ids: bar.next() subject = Subject.find(subject_id) if '!CERiT' in subject.metadata: continue
# DEFINE ZOONIVERSE SUBJECT METADATA, corresponds to METADATA FROM ITEM above, update both accordingly segment['metadata'] = { 'APA Citation': apa_citation, 'Date': item_date, 'Library Of Congress Item ID': loc_id, 'Source Collection': source_collection, 'Title': item_title } segments.append(segment) print('Item segments transformation complete.') return segments segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID) Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT) project = Project.find(PROJECT) subject_set = SubjectSet() subject_set.links.project = project subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode subject_set.save() print('Begin Zooniverse subject upload...') for segment in segments: subject = Subject() subject.links.project = project subject.add_location(segment['location'])
def connect_api_client(): # connect to the API only once for this function request Panoptes.connect(endpoint=getenv('PANOPTES_URL', 'https://panoptes.zooniverse.org/'), client_id=getenv('PANOPTES_CLIENT_ID'), client_secret=getenv('PANOPTES_CLIENT_SECRET'))
""" This version is written in Python 3.66 This script attempts to retrieve the exif data from existing subject image files and add the datetime to the subject metadata. It requires the project owner credentials to be set up as OS environmental variables, and an appropriate project slug modified on line 11. depending on the camera used to take the original subject image the exif code may be different than that in the code and may need to be modified""" import os from PIL import Image, ExifTags import panoptes_client from panoptes_client import SubjectSet, Project, Panoptes import requests Panoptes.connect(username=os.environ['User_name'], password=os.environ['Password']) project = Project.find(slug='pmason\fossiltrainer') while True: set_id = input('Entry subject set id to update:' + '\n') try: subject_set = SubjectSet.find(set_id) count_subjects = 0 subject_list = [] for subject in subject_set.subjects: count_subjects += 1 if subject.metadata['DateTime'] == '': try: img = Image.open(requests.get(subject.locations[0]['image/jpeg'], stream=True).raw) exif_dict = img._getexif() date_time = exif_dict[306] except (IOError, KeyError): print('Acquiring exif data for ', subject.id, ' failed') continue subject.metadata['DateTime'] = date_time
def upload_chunks(self, chunks: str, project_id: int, set_name: str, zooniverse_login="", zooniverse_pwd="", amount: int = 1000, ignore_errors: bool = False, **kwargs): """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project. :param chunks: path to the chunk CSV dataframe :type chunks: [type] :param project_id: zooniverse project id :type project_id: int :param set_name: name of the subject set :type set_name: str :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to '' :type zooniverse_login: str, optional :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to '' :type zooniverse_pwd: str, optional :param amount: amount of chunks to upload, defaults to 0 :type amount: int, optional """ self.chunks_file = chunks self.get_credentials(zooniverse_login, zooniverse_pwd) metadata_location = os.path.join(self.chunks_file) try: self.chunks = pd.read_csv(metadata_location, index_col="index") except: raise Exception("cannot read chunk metadata from {}.".format( metadata_location)) assert_dataframe("chunks", self.chunks) assert_columns_presence( "chunks", self.chunks, {"recording_filename", "onset", "offset", "uploaded", "mp3"}, ) from panoptes_client import Panoptes, Project, Subject, SubjectSet Panoptes.connect(username=self.zooniverse_login, password=self.zooniverse_pwd) zooniverse_project = Project(project_id) subjects_metadata = [] uploaded = 0 subject_set = None for ss in zooniverse_project.links.subject_sets: if ss.display_name == set_name: subject_set = ss if subject_set is None: subject_set = SubjectSet() subject_set.links.project = zooniverse_project subject_set.display_name = set_name subject_set.save() subjects = [] chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head( amount) chunks_to_upload = chunks_to_upload.to_dict(orient="index") if len(chunks_to_upload) == 0: print("nothing left to upload.") return for chunk_index in chunks_to_upload: chunk = chunks_to_upload[chunk_index] print("uploading chunk {} ({},{})".format( chunk["recording_filename"], chunk["onset"], chunk["offset"])) subject = Subject() subject.links.project = zooniverse_project subject.add_location( os.path.join(os.path.dirname(self.chunks_file), "chunks", chunk["mp3"])) subject.metadata["date_extracted"] = chunk["date_extracted"] try: subject.save() except Exception as e: print("failed to save chunk {}. an exception has occured:\n{}". format(chunk_index, str(e))) print(traceback.format_exc()) if args.ignore_errors: continue else: print("subject upload halting here.") break subjects.append(subject) chunk["index"] = chunk_index chunk["zooniverse_id"] = str(subject.id) chunk["project_id"] = str(project_id) chunk["subject_set"] = str(subject_set.display_name) chunk["uploaded"] = True subjects_metadata.append(chunk) if len(subjects) == 0: return subject_set.add(subjects) self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index")) self.chunks.to_csv(self.chunks_file)
# print('Failure whilst processing "' + img.filename + '": ' + str(e)) if os.path.splitext(image)[1] == ".csv": os.rename(image, out_path+'/'+os.path.basename(os.path.normpath(image))) else: f = open(logfile, "a") t = time.localtime() f.write('\nFailure whilst processing "' + image + '": ' + str(e)+ " " + time.strftime("%D:%H:%M:%S",t)+'\n\n') f.close() # move error files into seperate folder os.rename(image, errorfiles + os.path.basename(os.path.normpath(image))) # delete the tmp file after the images have been resized try: Panoptes.connect(username=zcfg.login['user'], password=zcfg.login['pass']) project = Project.find("6307") except Exception as e: f = open(logfile, "a") t = time.localtime() f.write('Unable to connect to Zooniverse: '+time.strftime("%D:%H:%M:%S",t)+'\n') f.close() subject_set = SubjectSet() s = Subject() subject_set.links.project = project subject_set.display_name = 'Tutorial subject set 2'
def retrieve_classifications(self, destination: str, project_id: int, zooniverse_login: str = "", zooniverse_pwd: str = "", chunks: List[str] = [], **kwargs): """Retrieve classifications from Zooniverse as a CSV dataframe. They will be matched with the original chunks metadata if the path one or more chunk metadata files is provided. :param destination: output CSV dataframe destination :type destination: str :param project_id: zooniverse project id :type project_id: int :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to '' :type zooniverse_login: str, optional :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to '' :type zooniverse_pwd: str, optional :param chunks: the list of chunk metadata files to match the classifications to. If provided, only the classifications that have a match will be returned. :type chunks: List[str], optional """ self.get_credentials(zooniverse_login, zooniverse_pwd) from panoptes_client import Panoptes, Project, Classification Panoptes.connect(username=self.zooniverse_login, password=self.zooniverse_pwd) project = Project(project_id) answers_translation_table = [] for workflow in project.links.workflows: workflow_id = workflow.id for task_id in workflow.tasks: n = 0 for answer in workflow.tasks[task_id]["answers"]: answers_translation_table.append({ "workflow_id": str(workflow_id), "task_id": str(task_id), "answer_id": str(n), "answer": answer["label"], }) n += 1 answers_translation_table = pd.DataFrame(answers_translation_table) classifications = [] for c in Classification.where(scope="project", page_size=1000, project_id=project_id): classifications.append(c.raw) classifications = pd.DataFrame(classifications) classifications["user_id"] = classifications["links"].apply( lambda s: s["user"]) classifications["subject_id"] = (classifications["links"].apply( lambda s: s["subjects"][0]).astype(int)) classifications["workflow_id"] = classifications["links"].apply( lambda s: s["workflow"]) classifications["tasks"] = classifications["annotations"].apply( lambda s: [(str(r["task"]), str(r["value"])) for r in s]) classifications = classifications.explode("tasks") classifications["task_id"] = classifications["tasks"].str[0] classifications["answer_id"] = classifications["tasks"].str[1] classifications.drop(columns=["tasks"], inplace=True) classifications = classifications[[ "id", "user_id", "subject_id", "task_id", "answer_id", "workflow_id" ]] classifications = classifications.merge( answers_translation_table, left_on=["workflow_id", "task_id", "answer_id"], right_on=["workflow_id", "task_id", "answer_id"], ) if chunks: chunks = pd.concat([pd.read_csv(f) for f in chunks]) classifications = classifications.merge(chunks, left_on="subject_id", right_on="zooniverse_id") classifications.set_index("id").to_csv(destination)
def get_classifications(save_dir, max_classifications=None, last_id=None, project_id='5733', max_rate_per_sec=40, per_file=5000) -> int: """Save as we download line-by-line, to avoid memory issues and ensure results are saved. Args: save_dir ([type]): [description] max_classifications ([type], optional): Defaults to None. [description] last_id ([type], optional): Defaults to None. [description] Returns: int: last id downloaded """ assert save_dir with open(ZOONIVERSE_LOGIN_LOC, 'r') as f: zooniverse_login = json.load(f) Panoptes.connect(**zooniverse_login) # TODO specify workflow if possible? classifications = Classification.where(scope='project', project_id=project_id, last_id=last_id) classification_n = 0 latest_id = 0 pbar = tqdm(total=max_classifications) min_time = timedelta(seconds=(1. / max_rate_per_sec)) atomic_file = AtomicFile(save_dir) while classification_n < max_classifications: try: # may possibly be requesting the very first classification twice, not clear how - TODO test initial_time = datetime.now() classification = classifications.next( ).raw # raw is the actual data # minor tweaks for convenience # replace subject id with subject information from API subject_id = classification['links']['subjects'][ 0] # only works for single-subject projects del classification['links']['subjects'] subject = get_subject( subject_id ) # assume id is unique, and hence only one match is possible classification['links']['subject'] = subject.raw atomic_file.add(classification) time_elapsed = datetime.now() - initial_time if time_elapsed < min_time: sleep_seconds = (min_time - time_elapsed).total_seconds() logging.debug('Sleeping {} seconds'.format(sleep_seconds)) time.sleep(sleep_seconds) except StopIteration: # all retrieved logging.info('All classifications retrieved') break if int(classification['id']) > latest_id: latest_id = int(classification['id']) pbar.update() classification_n += 1 atomic_file.end_file() # write anything left pbar.close() return latest_id
GZ: specobjid, dr8objid, dr7objid, ra, dec, t01_smooth_or_features_a02_features_or_disk_weighted_fraction, t02_edgeon_a05_no_weighted_fraction, t03_bar_a06_bar_weighted_fraction, t04_spiral_a08_spiral_weighted_fraction 3) Sky match the tables by RA and DEC in topcat, with the MaNGA data as table 1, and GZ as table 2. 4) Remove the second set of RA and DEC columns, and name the first ones 'RA' and 'DEC' 5) Save this file as a csv 6) This only works in python 2 because it uses panoptes_client ''' import numpy as np from panoptes_client import SubjectSet, Subject, Project, Panoptes import os import progressbar as pb myusername = os.environ['PANOPTES_USERNAME'] mypassword = os.environ['PANOPTES_PASSWORD'] Panoptes.connect(username= myusername, password=mypassword) project = Project.find(id='73') fullsample = SubjectSet.find(5326) spirals = SubjectSet.find(5324) bars = SubjectSet.find(5325) progress = pb.ProgressBar(widgets=[pb.Bar(), pb.ETA()]) data = np.genfromtxt('../GZ3D/MatchedData.csv', delimiter = ',', names=True, dtype=[('DEC', float), ('IAUNAME', '|S30'),('IFUTARGETSIZE',int), ('MANGAID', '|S10'),('MANGA_TILEID',int),('NSAID', int), ('PETROTH50',float),('RA',float),('SERSIC_TH50',float), ('Z',float),('specobjid', int),('dr8objid', int), ('dr7objid', int),('t01_smooth_or_features_a02_features_or_disk_weighted_fraction', float), ('t02_edgeon_a05_no_weighted_fraction', float), ('t03_bar_a06_bar_weighted_fraction', float),